appRobotVideoControls/programs/2_estimate_camera_pose_from_aruco_json.py

#!/usr/bin/env python3
"""
2_estimate_camera_pose_from_aruco_json.py

Berechnet die Kameraposition im Maschinen-/Board-Koordinatensystem
aus:
  1. einer ArUco-Detections-JSON
  2. robots.json mit bekannten Marker-Positionen

NEU:
- Marker-Orientierungen unterstützt
- Default: Board-Marker zeigen nach +Z
- Qualitätsbewertung erweitert
- Speichert ALLE erkannten Marker
- Speichert auch fehlgeschlagene Lösungen
- Bewertet Kamerageometrie
- Bewertet Markerabdeckung
- Bewertet Sichtwinkel
- Bewertet Markeranzahl
- Speichert vollständige Rohdaten

Benötigt:
    pip install opencv-python numpy
"""

import argparse
import json
import math
from pathlib import Path

import cv2
import numpy as np


# ============================================================
# Hilfsfunktionen
# ============================================================

def normalize(v):
    n = np.linalg.norm(v)

    if n < 1e-9:
        return v

    return v / n


def rotation_matrix_from_axes(x_axis, y_axis, z_axis):

    R = np.column_stack([
        normalize(x_axis),
        normalize(y_axis),
        normalize(z_axis)
    ])

    return R.astype(np.float32)


def rvec_tvec_to_camera_pose(rvec, tvec):
    """
    OpenCV:
        X_cam = R * X_world + t

    Kamera im Weltkoordinatensystem:
        C = -R^T * t
    """

    R_cw, _ = cv2.Rodrigues(rvec)

    R_wc = R_cw.T

    cam_pos = -R_wc @ tvec

    return R_wc, cam_pos.reshape(3)


def rotation_matrix_to_euler_zyx(R):

    yaw = math.degrees(math.atan2(R[1, 0], R[0, 0]))

    sp = math.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)

    pitch = math.degrees(math.atan2(-R[2, 0], sp))

    roll = math.degrees(math.atan2(R[2, 1], R[2, 2]))

    return roll, pitch, yaw


# ============================================================
# Marker-Orientierung
# ============================================================

def get_marker_rotation(marker):

    # Explizite Rotation vorhanden?
    if "rotation_matrix" in marker:
        return np.array(
            marker["rotation_matrix"],
            dtype=np.float32
        )

    # Default:
    # Marker zeigt nach +Z
    #
    # x = rechts
    # y = oben
    # z = aus Board heraus (+Z)

    x_axis = np.array([1, 0, 0], dtype=np.float32)
    y_axis = np.array([0, 1, 0], dtype=np.float32)
    z_axis = np.array([0, 0, 1], dtype=np.float32)

    return rotation_matrix_from_axes(
        x_axis,
        y_axis,
        z_axis
    )


# ============================================================
# Marker Lookup
# ============================================================

def build_marker_lookup(robot_data):

    marker_lookup = {}

    for marker in robot_data.get("Marker", []):

        marker_id = int(marker.get("id", -1))

        if marker_id < 0:
            continue

        # Nur absolute Marker verwenden
        if "position" not in marker:
            continue

        pos = marker["position"]

        if pos is None:
            continue

        if len(pos) != 3:
            continue

        rotation = get_marker_rotation(marker)

        marker_lookup[marker_id] = {
            "position": np.array(
                pos,
                dtype=np.float32
            ),
            "rotation": rotation,
            "on": marker.get("on", "unknown")
        }

    return marker_lookup


# ============================================================
# Marker-Eckpunkte
# ============================================================

def build_marker_object_points(
        marker_center_world,
        marker_rotation,
        marker_size_m):

    half = marker_size_m / 2.0

    # OpenCV Corner Reihenfolge
    local = np.array([
        [-half,  half, 0.0],
        [ half,  half, 0.0],
        [ half, -half, 0.0],
        [-half, -half, 0.0],
    ], dtype=np.float32)

    rotated = (marker_rotation @ local.T).T

    return rotated + marker_center_world.reshape(1, 3)


# ============================================================
# Qualitätsmetriken
# ============================================================

def compute_marker_spread(points_3d):

    if len(points_3d) < 2:
        return 0.0

    mins = np.min(points_3d, axis=0)
    maxs = np.max(points_3d, axis=0)

    diag = np.linalg.norm(maxs - mins)

    return float(diag)


def compute_viewing_angles(
        camera_position,
        marker_lookup,
        used_markers):

    results = []

    for marker_id in used_markers:

        marker = marker_lookup[marker_id]

        pos = marker["position"]

        R = marker["rotation"]

        normal = R[:, 2]

        to_camera = normalize(camera_position - pos)

        dot = np.clip(
            np.dot(normal, to_camera),
            -1.0,
            1.0
        )

        angle = math.degrees(math.acos(dot))

        results.append(angle)

    if len(results) == 0:
        return {
            "mean": None,
            "max": None
        }

    return {
        "mean": float(np.mean(results)),
        "max": float(np.max(results))
    }


def compute_pose_quality(
        rms,
        max_err,
        num_markers,
        marker_spread,
        viewing_angle_mean):

    score = 100.0

    # Reprojection Error
    score -= rms * 8.0

    # Max Error
    score -= max_err * 2.0

    # Wenige Marker
    if num_markers < 2:
        score -= 50

    elif num_markers < 4:
        score -= 25

    elif num_markers < 6:
        score -= 10

    # Schlechte räumliche Verteilung
    if marker_spread < 0.10:
        score -= 30

    elif marker_spread < 0.25:
        score -= 15

    # Schlechter Blickwinkel
    if viewing_angle_mean is not None:

        if viewing_angle_mean > 70:
            score -= 25

        elif viewing_angle_mean > 50:
            score -= 10

    score = max(0.0, min(100.0, score))

    return float(score)


# ============================================================
# Main
# ============================================================

def main():

    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--detections",
        required=True
    )

    parser.add_argument(
        "--robots",
        required=True
    )

    parser.add_argument(
        "--min-confidence",
        type=float,
        default=0.5
    )

    parser.add_argument(
        "--max-reprojection-error",
        type=float,
        default=3.0
    )

    args = parser.parse_args()

    # ============================================================
    # JSON laden
    # ============================================================

    with open(args.detections, "r", encoding="utf-8") as f:
        detection_data = json.load(f)

    with open(args.robots, "r", encoding="utf-8") as f:
        robot_data = json.load(f)

    # ============================================================
    # Kamera
    # ============================================================

    K = np.array(
        detection_data["camera"]["camera_matrix"],
        dtype=np.float32
    )

    D = np.array(
        detection_data["camera"]["distortion_coefficients"],
        dtype=np.float32
    ).reshape(-1, 1)

    # ============================================================
    # Marker laden
    # ============================================================

    known_markers = build_marker_lookup(robot_data)

    # ============================================================
    # Punktlisten
    # ============================================================

    object_points = []
    image_points = []

    used_markers = []
    rejected_markers = []

    detections = detection_data["detections"]

    for det in detections:

        marker_id = int(det["marker_id"])

        confidence = float(
            det.get("confidence", 1.0)
        )

        reason = None

        if confidence < args.min_confidence:
            reason = "low_confidence"

        elif marker_id not in known_markers:
            reason = "unknown_marker"

        if reason is not None:

            rejected_markers.append({
                "marker_id": marker_id,
                "reason": reason,
                "confidence": confidence
            })

            continue

        marker_info = known_markers[marker_id]

        marker_center_world = marker_info["position"]

        marker_rotation = marker_info["rotation"]

        marker_size = float(
            det["marker_size_m"]
        )

        obj_pts = build_marker_object_points(
            marker_center_world,
            marker_rotation,
            marker_size
        )

        img_pts = np.array(
            det["image_points_px"],
            dtype=np.float32
        )

        object_points.append(obj_pts)
        image_points.append(img_pts)

        used_markers.append(marker_id)

    # ============================================================
    # Ausgabe vorbereiten
    # ============================================================

    out = {
        "success": False,
        "camera_pose": None,
        "quality": {},
        "used_markers": [],
        "rejected_markers": rejected_markers,
        "all_detected_markers": [
            int(d["marker_id"])
            for d in detections
        ]
    }

    # ============================================================
    # Zu wenige Marker
    # ============================================================

    if len(object_points) == 0:

        out["quality"] = {
            "error": "no_known_markers",
            "num_detected_markers": len(detections),
            "num_used_markers": 0
        }

        out_file = Path(args.detections).with_suffix(
            ".camera_pose.json"
        )

        with open(out_file, "w", encoding="utf-8") as f:
            json.dump(out, f, indent=2)

        print("Keine bekannten Marker gefunden.")
        print(out_file)

        return

    # ============================================================
    # solvePnP
    # ============================================================

    object_points = np.concatenate(
        object_points,
        axis=0
    )

    image_points = np.concatenate(
        image_points,
        axis=0
    )

    success, rvec, tvec = cv2.solvePnP(
        object_points,
        image_points,
        K,
        D,
        flags=cv2.SOLVEPNP_ITERATIVE
    )

    if not success:

        out["quality"] = {
            "error": "solvepnp_failed",
            "num_used_markers": len(set(used_markers))
        }

        out_file = Path(args.detections).with_suffix(
            ".camera_pose.json"
        )

        with open(out_file, "w", encoding="utf-8") as f:
            json.dump(out, f, indent=2)

        print("solvePnP fehlgeschlagen")
        return

    # ============================================================
    # Kamera Pose
    # ============================================================

    R_wc, cam_pos = rvec_tvec_to_camera_pose(
        rvec,
        tvec
    )

    roll, pitch, yaw = rotation_matrix_to_euler_zyx(
        R_wc
    )

    # ============================================================
    # Reprojektion
    # ============================================================

    projected, _ = cv2.projectPoints(
        object_points,
        rvec,
        tvec,
        K,
        D
    )

    projected = projected.reshape(-1, 2)

    reproj_error = np.linalg.norm(
        projected - image_points,
        axis=1
    )

    rms = float(
        np.sqrt(np.mean(reproj_error ** 2))
    )

    max_err = float(
        np.max(reproj_error)
    )

    # ============================================================
    # Qualität
    # ============================================================

    marker_positions = np.array([
        known_markers[mid]["position"]
        for mid in set(used_markers)
    ])

    marker_spread = compute_marker_spread(
        marker_positions
    )

    viewing = compute_viewing_angles(
        cam_pos,
        known_markers,
        set(used_markers)
    )

    quality_score = compute_pose_quality(
        rms,
        max_err,
        len(set(used_markers)),
        marker_spread,
        viewing["mean"]
    )

    # ============================================================
    # Ausgabe
    # ============================================================

    print()
    print("==================================================")
    print("KAMERA-POSE")
    print("==================================================")

    print()
    print("Position [m]")
    print(f"X = {cam_pos[0]:.6f}")
    print(f"Y = {cam_pos[1]:.6f}")
    print(f"Z = {cam_pos[2]:.6f}")

    print()
    print("Orientation [deg]")
    print(f"Roll  = {roll:.3f}")
    print(f"Pitch = {pitch:.3f}")
    print(f"Yaw   = {yaw:.3f}")

    print()
    print("Reprojection")
    print(f"RMS = {rms:.3f}px")
    print(f"MAX = {max_err:.3f}px")

    print()
    print("Quality")
    print(f"Score = {quality_score:.1f}/100")
    print(f"Marker Spread = {marker_spread:.3f}m")

    if viewing["mean"] is not None:
        print(
            f"Mean Viewing Angle = "
            f"{viewing['mean']:.1f}deg"
        )

    # ============================================================
    # JSON speichern
    # ============================================================

    out["success"] = True

    out["camera_pose"] = {
        "position_m": {
            "x": float(cam_pos[0]),
            "y": float(cam_pos[1]),
            "z": float(cam_pos[2]),
        },
        "orientation_deg": {
            "roll": float(roll),
            "pitch": float(pitch),
            "yaw": float(yaw),
        },
        "rotation_matrix_world_from_camera": (
            R_wc.tolist()
        )
    }

    out["quality"] = {
        "pose_quality_score": quality_score,
        "reprojection_rms_px": rms,
        "reprojection_max_px": max_err,
        "num_detected_markers": len(detections),
        "num_used_markers": len(set(used_markers)),
        "marker_spread_m": marker_spread,
        "mean_viewing_angle_deg": viewing["mean"],
        "max_viewing_angle_deg": viewing["max"],
        "pose_stable":
            max_err <= args.max_reprojection_error
    }

    out["used_markers"] = sorted(
        list(set(used_markers))
    )

    out_file = Path(args.detections).with_suffix(
        ".camera_pose.json"
    )

    with open(out_file, "w", encoding="utf-8") as f:
        json.dump(out, f, indent=2)

    print()
    print("Gespeichert:")
    print(out_file)


if __name__ == "__main__":
    main()