appRobotVideoControls/programs/2_estimate_camera_pose_from_aruco_json.py

#!/usr/bin/env python3
"""
estimate_camera_pose_from_aruco_json.py

Berechnet die Kameraposition im Maschinen-/Board-Koordinatensystem
aus:
  1. einer ArUco-Detections-JSON
  2. robots.json mit bekannten Marker-Positionen

Das Script verwendet ausschließlich bekannte Marker
und bestimmt daraus die Kamera-Extrinsics mittels solvePnP.

Ergebnis:
- Kamera-Position im Weltkoordinatensystem
- Kamera-Orientierung (Roll/Pitch/Yaw)
- optionale Reprojektion zur Qualitätskontrolle

Benötigt:
    pip install opencv-python numpy

Beispiel:
    python 2_estimate_camera_pose_from_aruco_json.py \
        --detections detection.json \
        --robots robots.json
"""

import argparse
import json
import math
from pathlib import Path

import cv2
import numpy as np


# ============================================================
# Hilfsfunktionen
# ============================================================

def rvec_tvec_to_camera_pose(rvec, tvec):
    """
    OpenCV liefert:
        X_cam = R * X_world + t

    Gesucht:
        Kamera-Pose im Weltkoordinatensystem

    => R_wc = R^T
    => C = -R^T * t
    """

    R_cw, _ = cv2.Rodrigues(rvec)

    R_wc = R_cw.T
    cam_pos = -R_wc @ tvec

    return R_wc, cam_pos.reshape(3)


def rotation_matrix_to_euler_zyx(R):
    """
    Euler ZYX:
        yaw(Z), pitch(Y), roll(X)
    """

    yaw = math.degrees(math.atan2(R[1, 0], R[0, 0]))

    sp = math.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)

    pitch = math.degrees(math.atan2(-R[2, 0], sp))

    roll = math.degrees(math.atan2(R[2, 1], R[2, 2]))

    return roll, pitch, yaw


def build_marker_lookup(robot_data):
    """
    Liest nur Marker mit ABSOLUTER Position.

    Unterstützt:
        "position"  -> absolute Weltposition [m]
        "relPos"    -> wird aktuell ignoriert
    """

    marker_lookup = {}

    for marker in robot_data.get("Marker", []):

        marker_id = int(marker.get("id", -1))

        # negative IDs ignorieren
        if marker_id < 0:
            continue

        # Nur absolute Weltpositionen verwenden
        if "position" not in marker:
            continue

        pos = marker["position"]

        if pos is None:
            continue

        if len(pos) != 3:
            continue

        marker_lookup[marker_id] = np.array(
            pos,
            dtype=np.float32
        )

    return marker_lookup

def build_marker_object_points(marker_center_world, marker_size_m):
    """
    Baut die 3D-Eckpunkte eines Markers auf.

    Wichtig:
    Die Corner-Reihenfolge MUSS zur OpenCV-ArUco-Reihenfolge passen.

    Reihenfolge:
        top-left
        top-right
        bottom-right
        bottom-left
    """

    half = marker_size_m / 2.0

    local = np.array([
        [-half,  half, 0.0],
        [ half,  half, 0.0],
        [ half, -half, 0.0],
        [-half, -half, 0.0],
    ], dtype=np.float32)

    return local + marker_center_world.reshape(1, 3)


# ============================================================
# Main
# ============================================================

def main():

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--detections",
        required=True,
        help="ArUco detection JSON"
    )

    parser.add_argument(
        "--robots",
        required=True,
        help="robots.json"
    )

    parser.add_argument(
        "--min-confidence",
        type=float,
        default=0.5,
        help="Minimale Marker-Confidence"
    )

    parser.add_argument(
        "--max-reprojection-error",
        type=float,
        default=3.0,
        help="Maximal erlaubter Reprojektionsfehler in Pixel"
    )

    args = parser.parse_args()

    # ============================================================
    # JSON laden
    # ============================================================

    with open(args.detections, "r", encoding="utf-8") as f:
        detection_data = json.load(f)

    with open(args.robots, "r", encoding="utf-8") as f:
        robot_data = json.load(f)

    # ============================================================
    # Kamera-Parameter
    # ============================================================

    K = np.array(
        detection_data["camera"]["camera_matrix"],
        dtype=np.float32
    )

    D = np.array(
        detection_data["camera"]["distortion_coefficients"],
        dtype=np.float32
    ).reshape(-1, 1)

    # ============================================================
    # Bekannte Marker
    # ============================================================

    known_markers = build_marker_lookup(robot_data)

    # ============================================================
    # 2D/3D Punktlisten aufbauen
    # ============================================================

    object_points = []
    image_points = []

    used_markers = []

    detections = detection_data["detections"]

    for det in detections:

        marker_id = int(det["marker_id"])

        confidence = float(det.get("confidence", 1.0))

        if confidence < args.min_confidence:
            continue

        if marker_id not in known_markers:
            continue

        marker_center_world = known_markers[marker_id]

        marker_size = float(det["marker_size_m"])

        obj_pts = build_marker_object_points(
            marker_center_world,
            marker_size
        )

        img_pts = np.array(
            det["image_points_px"],
            dtype=np.float32
        )

        object_points.append(obj_pts)
        image_points.append(img_pts)

        used_markers.append(marker_id)

    if len(object_points) == 0:
        raise RuntimeError(
            "Keine bekannten Marker gefunden."
        )

    object_points = np.concatenate(object_points, axis=0)
    image_points = np.concatenate(image_points, axis=0)

    print()
    print("==================================================")
    print("Bekannte Marker verwendet:")
    print(sorted(set(used_markers)))
    print("==================================================")
    print()

    # ============================================================
    # solvePnP
    # ============================================================

    success, rvec, tvec = cv2.solvePnP(
        object_points,
        image_points,
        K,
        D,
        flags=cv2.SOLVEPNP_ITERATIVE
    )

    if not success:
        raise RuntimeError("solvePnP fehlgeschlagen")

    # ============================================================
    # Kamera-Pose berechnen
    # ============================================================

    R_wc, cam_pos = rvec_tvec_to_camera_pose(
        rvec,
        tvec
    )

    roll, pitch, yaw = rotation_matrix_to_euler_zyx(R_wc)

    # ============================================================
    # Reprojektionsfehler
    # ============================================================

    projected, _ = cv2.projectPoints(
        object_points,
        rvec,
        tvec,
        K,
        D
    )

    projected = projected.reshape(-1, 2)

    reproj_error = np.linalg.norm(
        projected - image_points,
        axis=1
    )

    rms = np.sqrt(np.mean(reproj_error ** 2))
    max_err = np.max(reproj_error)

    # ============================================================
    # Ausgabe
    # ============================================================

    print("==================================================")
    print("KAMERA-POSE")
    print("==================================================")
    print()

    print("Position [m]")
    print(f"  X = {cam_pos[0]: .6f}")
    print(f"  Y = {cam_pos[1]: .6f}")
    print(f"  Z = {cam_pos[2]: .6f}")

    print()

    print("Orientation [deg]")
    print(f"  Roll  = {roll: .3f}")
    print(f"  Pitch = {pitch: .3f}")
    print(f"  Yaw   = {yaw: .3f}")

    print()

    print("Reprojection Error")
    print(f"  RMS = {rms:.3f} px")
    print(f"  MAX = {max_err:.3f} px")

    print()

    if max_err > args.max_reprojection_error:
        print("[WARNUNG] Reprojektionsfehler relativ hoch")
    else:
        print("[OK] Pose stabil")

    print()

    # ============================================================
    # JSON speichern
    # ============================================================

    out = {
        "camera_pose": {
            "position_m": {
                "x": float(cam_pos[0]),
                "y": float(cam_pos[1]),
                "z": float(cam_pos[2]),
            },
            "orientation_deg": {
                "roll": float(roll),
                "pitch": float(pitch),
                "yaw": float(yaw),
            }
        },
        "quality": {
            "reprojection_rms_px": float(rms),
            "reprojection_max_px": float(max_err),
            "num_markers_used": len(set(used_markers)),
            "markers_used": sorted(set(used_markers))
        }
    }

    out_file = Path(args.detections).with_suffix(".camera_pose.json")

    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(out, f, indent=2)

    print(f"Pose gespeichert in:")
    print(out_file)


if __name__ == "__main__":
    main()