Files
appRobotVideoControls/programs/2_estimate_camera_pose_from_aruco_json.py
chk 5a7176920a Phase 1 abgeschlossen: Positionen werden erkannt.
Positionen aus den Merkern heraus erkennbar. Viele Bilder gleichzeitig verarbeitbar.
2026-05-25 22:16:11 +02:00

651 lines
14 KiB
Python

#!/usr/bin/env python3
"""
2_estimate_camera_pose_from_aruco_json.py
Berechnet die Kameraposition im Maschinen-/Board-Koordinatensystem
aus:
1. einer ArUco-Detections-JSON
2. robots.json mit bekannten Marker-Positionen
NEU:
- Marker-Orientierungen unterstützt
- Default: Board-Marker zeigen nach +Z
- Qualitätsbewertung erweitert
- Speichert ALLE erkannten Marker
- Speichert auch fehlgeschlagene Lösungen
- Bewertet Kamerageometrie
- Bewertet Markerabdeckung
- Bewertet Sichtwinkel
- Bewertet Markeranzahl
- Speichert vollständige Rohdaten
Benötigt:
pip install opencv-python numpy
"""
import argparse
import json
import math
from pathlib import Path
import cv2
import numpy as np
# ============================================================
# Hilfsfunktionen
# ============================================================
def normalize(v):
n = np.linalg.norm(v)
if n < 1e-9:
return v
return v / n
def rotation_matrix_from_axes(x_axis, y_axis, z_axis):
R = np.column_stack([
normalize(x_axis),
normalize(y_axis),
normalize(z_axis)
])
return R.astype(np.float32)
def rvec_tvec_to_camera_pose(rvec, tvec):
"""
OpenCV:
X_cam = R * X_world + t
Kamera im Weltkoordinatensystem:
C = -R^T * t
"""
R_cw, _ = cv2.Rodrigues(rvec)
R_wc = R_cw.T
cam_pos = -R_wc @ tvec
return R_wc, cam_pos.reshape(3)
def rotation_matrix_to_euler_zyx(R):
yaw = math.degrees(math.atan2(R[1, 0], R[0, 0]))
sp = math.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)
pitch = math.degrees(math.atan2(-R[2, 0], sp))
roll = math.degrees(math.atan2(R[2, 1], R[2, 2]))
return roll, pitch, yaw
# ============================================================
# Marker-Orientierung
# ============================================================
def get_marker_rotation(marker):
# Explizite Rotation vorhanden?
if "rotation_matrix" in marker:
return np.array(
marker["rotation_matrix"],
dtype=np.float32
)
# Default:
# Marker zeigt nach +Z
#
# x = rechts
# y = oben
# z = aus Board heraus (+Z)
x_axis = np.array([1, 0, 0], dtype=np.float32)
y_axis = np.array([0, 1, 0], dtype=np.float32)
z_axis = np.array([0, 0, 1], dtype=np.float32)
return rotation_matrix_from_axes(
x_axis,
y_axis,
z_axis
)
# ============================================================
# Marker Lookup
# ============================================================
def build_marker_lookup(robot_data):
marker_lookup = {}
for marker in robot_data.get("Marker", []):
marker_id = int(marker.get("id", -1))
if marker_id < 0:
continue
# Nur absolute Marker verwenden
if "position" not in marker:
continue
pos = marker["position"]
if pos is None:
continue
if len(pos) != 3:
continue
rotation = get_marker_rotation(marker)
marker_lookup[marker_id] = {
"position": np.array(
pos,
dtype=np.float32
),
"rotation": rotation,
"on": marker.get("on", "unknown")
}
return marker_lookup
# ============================================================
# Marker-Eckpunkte
# ============================================================
def build_marker_object_points(
marker_center_world,
marker_rotation,
marker_size_m):
half = marker_size_m / 2.0
# OpenCV Corner Reihenfolge
local = np.array([
[-half, half, 0.0],
[ half, half, 0.0],
[ half, -half, 0.0],
[-half, -half, 0.0],
], dtype=np.float32)
rotated = (marker_rotation @ local.T).T
return rotated + marker_center_world.reshape(1, 3)
# ============================================================
# Qualitätsmetriken
# ============================================================
def compute_marker_spread(points_3d):
if len(points_3d) < 2:
return 0.0
mins = np.min(points_3d, axis=0)
maxs = np.max(points_3d, axis=0)
diag = np.linalg.norm(maxs - mins)
return float(diag)
def compute_viewing_angles(
camera_position,
marker_lookup,
used_markers):
results = []
for marker_id in used_markers:
marker = marker_lookup[marker_id]
pos = marker["position"]
R = marker["rotation"]
normal = R[:, 2]
to_camera = normalize(camera_position - pos)
dot = np.clip(
np.dot(normal, to_camera),
-1.0,
1.0
)
angle = math.degrees(math.acos(dot))
results.append(angle)
if len(results) == 0:
return {
"mean": None,
"max": None
}
return {
"mean": float(np.mean(results)),
"max": float(np.max(results))
}
def compute_pose_quality(
rms,
max_err,
num_markers,
marker_spread,
viewing_angle_mean):
score = 100.0
# Reprojection Error
score -= rms * 8.0
# Max Error
score -= max_err * 2.0
# Wenige Marker
if num_markers < 2:
score -= 50
elif num_markers < 4:
score -= 25
elif num_markers < 6:
score -= 10
# Schlechte räumliche Verteilung
if marker_spread < 0.10:
score -= 30
elif marker_spread < 0.25:
score -= 15
# Schlechter Blickwinkel
if viewing_angle_mean is not None:
if viewing_angle_mean > 70:
score -= 25
elif viewing_angle_mean > 50:
score -= 10
score = max(0.0, min(100.0, score))
return float(score)
# ============================================================
# Main
# ============================================================
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--detections",
required=True
)
parser.add_argument(
"--robots",
required=True
)
parser.add_argument(
"--min-confidence",
type=float,
default=0.5
)
parser.add_argument(
"--max-reprojection-error",
type=float,
default=3.0
)
args = parser.parse_args()
# ============================================================
# JSON laden
# ============================================================
with open(args.detections, "r", encoding="utf-8") as f:
detection_data = json.load(f)
with open(args.robots, "r", encoding="utf-8") as f:
robot_data = json.load(f)
# ============================================================
# Kamera
# ============================================================
K = np.array(
detection_data["camera"]["camera_matrix"],
dtype=np.float32
)
D = np.array(
detection_data["camera"]["distortion_coefficients"],
dtype=np.float32
).reshape(-1, 1)
# ============================================================
# Marker laden
# ============================================================
known_markers = build_marker_lookup(robot_data)
# ============================================================
# Punktlisten
# ============================================================
object_points = []
image_points = []
used_markers = []
rejected_markers = []
detections = detection_data["detections"]
for det in detections:
marker_id = int(det["marker_id"])
confidence = float(
det.get("confidence", 1.0)
)
reason = None
if confidence < args.min_confidence:
reason = "low_confidence"
elif marker_id not in known_markers:
reason = "unknown_marker"
if reason is not None:
rejected_markers.append({
"marker_id": marker_id,
"reason": reason,
"confidence": confidence
})
continue
marker_info = known_markers[marker_id]
marker_center_world = marker_info["position"]
marker_rotation = marker_info["rotation"]
marker_size = float(
det["marker_size_m"]
)
obj_pts = build_marker_object_points(
marker_center_world,
marker_rotation,
marker_size
)
img_pts = np.array(
det["image_points_px"],
dtype=np.float32
)
object_points.append(obj_pts)
image_points.append(img_pts)
used_markers.append(marker_id)
# ============================================================
# Ausgabe vorbereiten
# ============================================================
out = {
"success": False,
"camera_pose": None,
"quality": {},
"used_markers": [],
"rejected_markers": rejected_markers,
"all_detected_markers": [
int(d["marker_id"])
for d in detections
]
}
# ============================================================
# Zu wenige Marker
# ============================================================
if len(object_points) == 0:
out["quality"] = {
"error": "no_known_markers",
"num_detected_markers": len(detections),
"num_used_markers": 0
}
out_file = Path(args.detections).with_suffix(
".camera_pose.json"
)
with open(out_file, "w", encoding="utf-8") as f:
json.dump(out, f, indent=2)
print("Keine bekannten Marker gefunden.")
print(out_file)
return
# ============================================================
# solvePnP
# ============================================================
object_points = np.concatenate(
object_points,
axis=0
)
image_points = np.concatenate(
image_points,
axis=0
)
success, rvec, tvec = cv2.solvePnP(
object_points,
image_points,
K,
D,
flags=cv2.SOLVEPNP_ITERATIVE
)
if not success:
out["quality"] = {
"error": "solvepnp_failed",
"num_used_markers": len(set(used_markers))
}
out_file = Path(args.detections).with_suffix(
".camera_pose.json"
)
with open(out_file, "w", encoding="utf-8") as f:
json.dump(out, f, indent=2)
print("solvePnP fehlgeschlagen")
return
# ============================================================
# Kamera Pose
# ============================================================
R_wc, cam_pos = rvec_tvec_to_camera_pose(
rvec,
tvec
)
roll, pitch, yaw = rotation_matrix_to_euler_zyx(
R_wc
)
# ============================================================
# Reprojektion
# ============================================================
projected, _ = cv2.projectPoints(
object_points,
rvec,
tvec,
K,
D
)
projected = projected.reshape(-1, 2)
reproj_error = np.linalg.norm(
projected - image_points,
axis=1
)
rms = float(
np.sqrt(np.mean(reproj_error ** 2))
)
max_err = float(
np.max(reproj_error)
)
# ============================================================
# Qualität
# ============================================================
marker_positions = np.array([
known_markers[mid]["position"]
for mid in set(used_markers)
])
marker_spread = compute_marker_spread(
marker_positions
)
viewing = compute_viewing_angles(
cam_pos,
known_markers,
set(used_markers)
)
quality_score = compute_pose_quality(
rms,
max_err,
len(set(used_markers)),
marker_spread,
viewing["mean"]
)
# ============================================================
# Ausgabe
# ============================================================
print()
print("==================================================")
print("KAMERA-POSE")
print("==================================================")
print()
print("Position [m]")
print(f"X = {cam_pos[0]:.6f}")
print(f"Y = {cam_pos[1]:.6f}")
print(f"Z = {cam_pos[2]:.6f}")
print()
print("Orientation [deg]")
print(f"Roll = {roll:.3f}")
print(f"Pitch = {pitch:.3f}")
print(f"Yaw = {yaw:.3f}")
print()
print("Reprojection")
print(f"RMS = {rms:.3f}px")
print(f"MAX = {max_err:.3f}px")
print()
print("Quality")
print(f"Score = {quality_score:.1f}/100")
print(f"Marker Spread = {marker_spread:.3f}m")
if viewing["mean"] is not None:
print(
f"Mean Viewing Angle = "
f"{viewing['mean']:.1f}deg"
)
# ============================================================
# JSON speichern
# ============================================================
out["success"] = True
out["camera_pose"] = {
"position_m": {
"x": float(cam_pos[0]),
"y": float(cam_pos[1]),
"z": float(cam_pos[2]),
},
"orientation_deg": {
"roll": float(roll),
"pitch": float(pitch),
"yaw": float(yaw),
},
"rotation_matrix_world_from_camera": (
R_wc.tolist()
)
}
out["quality"] = {
"pose_quality_score": quality_score,
"reprojection_rms_px": rms,
"reprojection_max_px": max_err,
"num_detected_markers": len(detections),
"num_used_markers": len(set(used_markers)),
"marker_spread_m": marker_spread,
"mean_viewing_angle_deg": viewing["mean"],
"max_viewing_angle_deg": viewing["max"],
"pose_stable":
max_err <= args.max_reprojection_error
}
out["used_markers"] = sorted(
list(set(used_markers))
)
out_file = Path(args.detections).with_suffix(
".camera_pose.json"
)
with open(out_file, "w", encoding="utf-8") as f:
json.dump(out, f, indent=2)
print()
print("Gespeichert:")
print(out_file)
if __name__ == "__main__":
main()