#!/usr/bin/env python3 """ estimate_camera_pose_from_aruco_json.py Berechnet die Kameraposition im Maschinen-/Board-Koordinatensystem aus: 1. einer ArUco-Detections-JSON 2. robots.json mit bekannten Marker-Positionen Das Script verwendet ausschließlich bekannte Marker und bestimmt daraus die Kamera-Extrinsics mittels solvePnP. Ergebnis: - Kamera-Position im Weltkoordinatensystem - Kamera-Orientierung (Roll/Pitch/Yaw) - optionale Reprojektion zur Qualitätskontrolle Benötigt: pip install opencv-python numpy Beispiel: python 2_estimate_camera_pose_from_aruco_json.py \ --detections detection.json \ --robots robots.json """ import argparse import json import math from pathlib import Path import cv2 import numpy as np # ============================================================ # Hilfsfunktionen # ============================================================ def rvec_tvec_to_camera_pose(rvec, tvec): """ OpenCV liefert: X_cam = R * X_world + t Gesucht: Kamera-Pose im Weltkoordinatensystem => R_wc = R^T => C = -R^T * t """ R_cw, _ = cv2.Rodrigues(rvec) R_wc = R_cw.T cam_pos = -R_wc @ tvec return R_wc, cam_pos.reshape(3) def rotation_matrix_to_euler_zyx(R): """ Euler ZYX: yaw(Z), pitch(Y), roll(X) """ yaw = math.degrees(math.atan2(R[1, 0], R[0, 0])) sp = math.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2) pitch = math.degrees(math.atan2(-R[2, 0], sp)) roll = math.degrees(math.atan2(R[2, 1], R[2, 2])) return roll, pitch, yaw def build_marker_lookup(robot_data): """ Liest nur Marker mit ABSOLUTER Position. Unterstützt: "position" -> absolute Weltposition [m] "relPos" -> wird aktuell ignoriert """ marker_lookup = {} for marker in robot_data.get("Marker", []): marker_id = int(marker.get("id", -1)) # negative IDs ignorieren if marker_id < 0: continue # Nur absolute Weltpositionen verwenden if "position" not in marker: continue pos = marker["position"] if pos is None: continue if len(pos) != 3: continue marker_lookup[marker_id] = np.array( pos, dtype=np.float32 ) return marker_lookup def build_marker_object_points(marker_center_world, marker_size_m): """ Baut die 3D-Eckpunkte eines Markers auf. Wichtig: Die Corner-Reihenfolge MUSS zur OpenCV-ArUco-Reihenfolge passen. Reihenfolge: top-left top-right bottom-right bottom-left """ half = marker_size_m / 2.0 local = np.array([ [-half, half, 0.0], [ half, half, 0.0], [ half, -half, 0.0], [-half, -half, 0.0], ], dtype=np.float32) return local + marker_center_world.reshape(1, 3) # ============================================================ # Main # ============================================================ def main(): parser = argparse.ArgumentParser() parser.add_argument( "--detections", required=True, help="ArUco detection JSON" ) parser.add_argument( "--robots", required=True, help="robots.json" ) parser.add_argument( "--min-confidence", type=float, default=0.5, help="Minimale Marker-Confidence" ) parser.add_argument( "--max-reprojection-error", type=float, default=3.0, help="Maximal erlaubter Reprojektionsfehler in Pixel" ) args = parser.parse_args() # ============================================================ # JSON laden # ============================================================ with open(args.detections, "r", encoding="utf-8") as f: detection_data = json.load(f) with open(args.robots, "r", encoding="utf-8") as f: robot_data = json.load(f) # ============================================================ # Kamera-Parameter # ============================================================ K = np.array( detection_data["camera"]["camera_matrix"], dtype=np.float32 ) D = np.array( detection_data["camera"]["distortion_coefficients"], dtype=np.float32 ).reshape(-1, 1) # ============================================================ # Bekannte Marker # ============================================================ known_markers = build_marker_lookup(robot_data) # ============================================================ # 2D/3D Punktlisten aufbauen # ============================================================ object_points = [] image_points = [] used_markers = [] detections = detection_data["detections"] for det in detections: marker_id = int(det["marker_id"]) confidence = float(det.get("confidence", 1.0)) if confidence < args.min_confidence: continue if marker_id not in known_markers: continue marker_center_world = known_markers[marker_id] marker_size = float(det["marker_size_m"]) obj_pts = build_marker_object_points( marker_center_world, marker_size ) img_pts = np.array( det["image_points_px"], dtype=np.float32 ) object_points.append(obj_pts) image_points.append(img_pts) used_markers.append(marker_id) if len(object_points) == 0: raise RuntimeError( "Keine bekannten Marker gefunden." ) object_points = np.concatenate(object_points, axis=0) image_points = np.concatenate(image_points, axis=0) print() print("==================================================") print("Bekannte Marker verwendet:") print(sorted(set(used_markers))) print("==================================================") print() # ============================================================ # solvePnP # ============================================================ success, rvec, tvec = cv2.solvePnP( object_points, image_points, K, D, flags=cv2.SOLVEPNP_ITERATIVE ) if not success: raise RuntimeError("solvePnP fehlgeschlagen") # ============================================================ # Kamera-Pose berechnen # ============================================================ R_wc, cam_pos = rvec_tvec_to_camera_pose( rvec, tvec ) roll, pitch, yaw = rotation_matrix_to_euler_zyx(R_wc) # ============================================================ # Reprojektionsfehler # ============================================================ projected, _ = cv2.projectPoints( object_points, rvec, tvec, K, D ) projected = projected.reshape(-1, 2) reproj_error = np.linalg.norm( projected - image_points, axis=1 ) rms = np.sqrt(np.mean(reproj_error ** 2)) max_err = np.max(reproj_error) # ============================================================ # Ausgabe # ============================================================ print("==================================================") print("KAMERA-POSE") print("==================================================") print() print("Position [m]") print(f" X = {cam_pos[0]: .6f}") print(f" Y = {cam_pos[1]: .6f}") print(f" Z = {cam_pos[2]: .6f}") print() print("Orientation [deg]") print(f" Roll = {roll: .3f}") print(f" Pitch = {pitch: .3f}") print(f" Yaw = {yaw: .3f}") print() print("Reprojection Error") print(f" RMS = {rms:.3f} px") print(f" MAX = {max_err:.3f} px") print() if max_err > args.max_reprojection_error: print("[WARNUNG] Reprojektionsfehler relativ hoch") else: print("[OK] Pose stabil") print() # ============================================================ # JSON speichern # ============================================================ out = { "camera_pose": { "position_m": { "x": float(cam_pos[0]), "y": float(cam_pos[1]), "z": float(cam_pos[2]), }, "orientation_deg": { "roll": float(roll), "pitch": float(pitch), "yaw": float(yaw), } }, "quality": { "reprojection_rms_px": float(rms), "reprojection_max_px": float(max_err), "num_markers_used": len(set(used_markers)), "markers_used": sorted(set(used_markers)) } } out_file = Path(args.detections).with_suffix(".camera_pose.json") with open(out_file, "w", encoding="utf-8") as f: json.dump(out, f, indent=2) print(f"Pose gespeichert in:") print(out_file) if __name__ == "__main__": main()