Files
appRobotVideoControls/programs/2_estimate_camera_pose_from_aruco_json.py
chk f37097ea96 2 Camera Detection
Checke wo sich im Bild die Kamera befindet
2026-05-25 16:50:48 +02:00

383 lines
8.9 KiB
Python

#!/usr/bin/env python3
"""
estimate_camera_pose_from_aruco_json.py
Berechnet die Kameraposition im Maschinen-/Board-Koordinatensystem
aus:
1. einer ArUco-Detections-JSON
2. robots.json mit bekannten Marker-Positionen
Das Script verwendet ausschließlich bekannte Marker
und bestimmt daraus die Kamera-Extrinsics mittels solvePnP.
Ergebnis:
- Kamera-Position im Weltkoordinatensystem
- Kamera-Orientierung (Roll/Pitch/Yaw)
- optionale Reprojektion zur Qualitätskontrolle
Benötigt:
pip install opencv-python numpy
Beispiel:
python 2_estimate_camera_pose_from_aruco_json.py \
--detections detection.json \
--robots robots.json
"""
import argparse
import json
import math
from pathlib import Path
import cv2
import numpy as np
# ============================================================
# Hilfsfunktionen
# ============================================================
def rvec_tvec_to_camera_pose(rvec, tvec):
"""
OpenCV liefert:
X_cam = R * X_world + t
Gesucht:
Kamera-Pose im Weltkoordinatensystem
=> R_wc = R^T
=> C = -R^T * t
"""
R_cw, _ = cv2.Rodrigues(rvec)
R_wc = R_cw.T
cam_pos = -R_wc @ tvec
return R_wc, cam_pos.reshape(3)
def rotation_matrix_to_euler_zyx(R):
"""
Euler ZYX:
yaw(Z), pitch(Y), roll(X)
"""
yaw = math.degrees(math.atan2(R[1, 0], R[0, 0]))
sp = math.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)
pitch = math.degrees(math.atan2(-R[2, 0], sp))
roll = math.degrees(math.atan2(R[2, 1], R[2, 2]))
return roll, pitch, yaw
def build_marker_lookup(robot_data):
"""
Liest nur Marker mit ABSOLUTER Position.
Unterstützt:
"position" -> absolute Weltposition [m]
"relPos" -> wird aktuell ignoriert
"""
marker_lookup = {}
for marker in robot_data.get("Marker", []):
marker_id = int(marker.get("id", -1))
# negative IDs ignorieren
if marker_id < 0:
continue
# Nur absolute Weltpositionen verwenden
if "position" not in marker:
continue
pos = marker["position"]
if pos is None:
continue
if len(pos) != 3:
continue
marker_lookup[marker_id] = np.array(
pos,
dtype=np.float32
)
return marker_lookup
def build_marker_object_points(marker_center_world, marker_size_m):
"""
Baut die 3D-Eckpunkte eines Markers auf.
Wichtig:
Die Corner-Reihenfolge MUSS zur OpenCV-ArUco-Reihenfolge passen.
Reihenfolge:
top-left
top-right
bottom-right
bottom-left
"""
half = marker_size_m / 2.0
local = np.array([
[-half, half, 0.0],
[ half, half, 0.0],
[ half, -half, 0.0],
[-half, -half, 0.0],
], dtype=np.float32)
return local + marker_center_world.reshape(1, 3)
# ============================================================
# Main
# ============================================================
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--detections",
required=True,
help="ArUco detection JSON"
)
parser.add_argument(
"--robots",
required=True,
help="robots.json"
)
parser.add_argument(
"--min-confidence",
type=float,
default=0.5,
help="Minimale Marker-Confidence"
)
parser.add_argument(
"--max-reprojection-error",
type=float,
default=3.0,
help="Maximal erlaubter Reprojektionsfehler in Pixel"
)
args = parser.parse_args()
# ============================================================
# JSON laden
# ============================================================
with open(args.detections, "r", encoding="utf-8") as f:
detection_data = json.load(f)
with open(args.robots, "r", encoding="utf-8") as f:
robot_data = json.load(f)
# ============================================================
# Kamera-Parameter
# ============================================================
K = np.array(
detection_data["camera"]["camera_matrix"],
dtype=np.float32
)
D = np.array(
detection_data["camera"]["distortion_coefficients"],
dtype=np.float32
).reshape(-1, 1)
# ============================================================
# Bekannte Marker
# ============================================================
known_markers = build_marker_lookup(robot_data)
# ============================================================
# 2D/3D Punktlisten aufbauen
# ============================================================
object_points = []
image_points = []
used_markers = []
detections = detection_data["detections"]
for det in detections:
marker_id = int(det["marker_id"])
confidence = float(det.get("confidence", 1.0))
if confidence < args.min_confidence:
continue
if marker_id not in known_markers:
continue
marker_center_world = known_markers[marker_id]
marker_size = float(det["marker_size_m"])
obj_pts = build_marker_object_points(
marker_center_world,
marker_size
)
img_pts = np.array(
det["image_points_px"],
dtype=np.float32
)
object_points.append(obj_pts)
image_points.append(img_pts)
used_markers.append(marker_id)
if len(object_points) == 0:
raise RuntimeError(
"Keine bekannten Marker gefunden."
)
object_points = np.concatenate(object_points, axis=0)
image_points = np.concatenate(image_points, axis=0)
print()
print("==================================================")
print("Bekannte Marker verwendet:")
print(sorted(set(used_markers)))
print("==================================================")
print()
# ============================================================
# solvePnP
# ============================================================
success, rvec, tvec = cv2.solvePnP(
object_points,
image_points,
K,
D,
flags=cv2.SOLVEPNP_ITERATIVE
)
if not success:
raise RuntimeError("solvePnP fehlgeschlagen")
# ============================================================
# Kamera-Pose berechnen
# ============================================================
R_wc, cam_pos = rvec_tvec_to_camera_pose(
rvec,
tvec
)
roll, pitch, yaw = rotation_matrix_to_euler_zyx(R_wc)
# ============================================================
# Reprojektionsfehler
# ============================================================
projected, _ = cv2.projectPoints(
object_points,
rvec,
tvec,
K,
D
)
projected = projected.reshape(-1, 2)
reproj_error = np.linalg.norm(
projected - image_points,
axis=1
)
rms = np.sqrt(np.mean(reproj_error ** 2))
max_err = np.max(reproj_error)
# ============================================================
# Ausgabe
# ============================================================
print("==================================================")
print("KAMERA-POSE")
print("==================================================")
print()
print("Position [m]")
print(f" X = {cam_pos[0]: .6f}")
print(f" Y = {cam_pos[1]: .6f}")
print(f" Z = {cam_pos[2]: .6f}")
print()
print("Orientation [deg]")
print(f" Roll = {roll: .3f}")
print(f" Pitch = {pitch: .3f}")
print(f" Yaw = {yaw: .3f}")
print()
print("Reprojection Error")
print(f" RMS = {rms:.3f} px")
print(f" MAX = {max_err:.3f} px")
print()
if max_err > args.max_reprojection_error:
print("[WARNUNG] Reprojektionsfehler relativ hoch")
else:
print("[OK] Pose stabil")
print()
# ============================================================
# JSON speichern
# ============================================================
out = {
"camera_pose": {
"position_m": {
"x": float(cam_pos[0]),
"y": float(cam_pos[1]),
"z": float(cam_pos[2]),
},
"orientation_deg": {
"roll": float(roll),
"pitch": float(pitch),
"yaw": float(yaw),
}
},
"quality": {
"reprojection_rms_px": float(rms),
"reprojection_max_px": float(max_err),
"num_markers_used": len(set(used_markers)),
"markers_used": sorted(set(used_markers))
}
}
out_file = Path(args.detections).with_suffix(".camera_pose.json")
with open(out_file, "w", encoding="utf-8") as f:
json.dump(out, f, indent=2)
print(f"Pose gespeichert in:")
print(out_file)
if __name__ == "__main__":
main()