#!/usr/bin/env python3
"""
4_robotState_estimation_v2.py

Sequential robot-state estimation from optimized 3D ArUco marker positions.

Mathematical idea
-----------------
This script estimates a robot state q from already optimized marker positions p_i^obs.

The robot state consists of:
  - a world pose for the root link (translation + rotation)
  - one scalar variable for each actuated joint

Forward kinematics predicts each marker position:
  p_i^pred(q) = T_world(link(i); q) * p_i^local

Instead of solving the whole robot at once, we solve it sequentially by kinematic prefix:
  stage 0: root link
  stage 1: root + first child layer
  stage 2: root + first three connected elements ...
  ...
Each stage reuses the previous solution as initialization and only activates the
joint variables that belong to the current kinematic prefix.

The residual minimized at each stage is:
  sum_i w_i ||p_i^pred(q) - p_i^obs||^2
  + weak priors on the active joint values
  + weak prior on the root pose

This is intentionally conservative and debuggable. It is designed so that:
  - early links can be resolved first,
  - later links only refine an already plausible state,
  - one visible marker on an early rigid prefix can already fix a lot of state,
  - ambiguous branches can later be resolved by adding more links.

Optional future extension:
  raw detections + camera poses can be added later as visibility / normal cues,
  but this script intentionally works directly from aruco_positions_optimized*.json.

Dependencies
------------
numpy, scipy (optional but strongly recommended)
"""

from __future__ import annotations

import argparse
import json
import math
import os
import sys
import time
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple

import numpy as np


# =============================================================================
# JSON / path helpers
# =============================================================================

def resolve_path(path: str) -> str:
    path = os.path.expanduser(path)
    if os.path.isabs(path):
        return path
    return os.path.abspath(path)


def load_json(path: str) -> Any:
    with open(resolve_path(path), "r", encoding="utf-8") as f:
        return json.load(f)


def save_json(path: str, data: Any) -> None:
    with open(resolve_path(path), "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)


# =============================================================================
# Small math helpers
# =============================================================================

def safe_norm(v: np.ndarray, eps: float = 1e-12) -> float:
    return float(np.linalg.norm(v) + eps)


def normalize(v: np.ndarray, eps: float = 1e-12) -> np.ndarray:
    v = np.asarray(v, dtype=np.float64)
    return v / safe_norm(v, eps)


def get_length_scale(robot_data: Dict[str, Any]) -> float:
    units = robot_data.get("units", {}) or {}
    length_unit = str(units.get("length", "")).strip().lower()
    if length_unit in ("mm", "millimeter", "millimeters"):
        return 1.0 / 1000.0
    if length_unit in ("cm", "centimeter", "centimeters"):
        return 1.0 / 100.0
    return 1.0


def rotation_matrix_xyz(rx: float, ry: float, rz: float, degrees: bool = False) -> np.ndarray:
    """Rotation order: X then Y then Z."""
    if degrees:
        rx, ry, rz = math.radians(rx), math.radians(ry), math.radians(rz)

    cx, sx = math.cos(rx), math.sin(rx)
    cy, sy = math.cos(ry), math.sin(ry)
    cz, sz = math.cos(rz), math.sin(rz)

    rx_m = np.array([[1.0, 0.0, 0.0],
                     [0.0, cx, -sx],
                     [0.0, sx, cx]], dtype=np.float64)
    ry_m = np.array([[cy, 0.0, sy],
                     [0.0, 1.0, 0.0],
                     [-sy, 0.0, cy]], dtype=np.float64)
    rz_m = np.array([[cz, -sz, 0.0],
                     [sz, cz, 0.0],
                     [0.0, 0.0, 1.0]], dtype=np.float64)
    return rz_m @ ry_m @ rx_m


def axis_angle_rotation(axis: np.ndarray, angle_rad: float) -> np.ndarray:
    axis = normalize(axis)
    x, y, z = axis
    c = math.cos(angle_rad)
    s = math.sin(angle_rad)
    C = 1.0 - c
    return np.array([
        [c + x * x * C, x * y * C - z * s, x * z * C + y * s],
        [y * x * C + z * s, c + y * y * C, y * z * C - x * s],
        [z * x * C - y * s, z * y * C + x * s, c + z * z * C]
    ], dtype=np.float64)


def make_transform(R: Optional[np.ndarray] = None, t: Optional[np.ndarray] = None) -> np.ndarray:
    T = np.eye(4, dtype=np.float64)
    if R is not None:
        T[:3, :3] = np.asarray(R, dtype=np.float64)
    if t is not None:
        T[:3, 3] = np.asarray(t, dtype=np.float64).reshape(3)
    return T


def transform_point(T: np.ndarray, p: np.ndarray) -> np.ndarray:
    p4 = np.ones(4, dtype=np.float64)
    p4[:3] = np.asarray(p, dtype=np.float64).reshape(3)
    return (T @ p4)[:3]


def matrix_to_euler_xyz(R: np.ndarray) -> np.ndarray:
    """Return XYZ Euler angles in degrees."""
    R = np.asarray(R, dtype=np.float64)
    sy = math.sqrt(R[0, 0] * R[0, 0] + R[1, 0] * R[1, 0])
    singular = sy < 1e-9

    if not singular:
        x = math.atan2(R[2, 1], R[2, 2])
        y = math.atan2(-R[2, 0], sy)
        z = math.atan2(R[1, 0], R[0, 0])
    else:
        x = math.atan2(-R[1, 2], R[1, 1])
        y = math.atan2(-R[2, 0], sy)
        z = 0.0
    return np.degrees([x, y, z])


def wrap_angle_rad(a: float) -> float:
    return (a + math.pi) % (2.0 * math.pi) - math.pi


def kabsch(P: np.ndarray, Q: np.ndarray, W: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray]:
    """Find R, t such that R @ P + t ≈ Q."""
    P = np.asarray(P, dtype=np.float64)
    Q = np.asarray(Q, dtype=np.float64)
    assert P.shape == Q.shape and P.shape[1] == 3

    if W is None:
        W = np.ones(len(P), dtype=np.float64)
    W = np.asarray(W, dtype=np.float64).reshape(-1)
    W = np.maximum(W, 1e-12)
    W = W / np.sum(W)

    p_cent = np.sum(P * W[:, None], axis=0)
    q_cent = np.sum(Q * W[:, None], axis=0)

    P0 = P - p_cent
    Q0 = Q - q_cent
    H = (P0 * W[:, None]).T @ Q0

    U, S, Vt = np.linalg.svd(H)
    R = Vt.T @ U.T
    if np.linalg.det(R) < 0:
        Vt[-1, :] *= -1.0
        R = Vt.T @ U.T
    t = q_cent - R @ p_cent
    return R, t


# =============================================================================
# Data structures
# =============================================================================

@dataclass
class MarkerInfo:
    marker_id: int
    link_name: str
    local_pos_m: np.ndarray
    size: Optional[float] = None
    normal: Optional[np.ndarray] = None
    name: str = ""


@dataclass
class LinkInfo:
    name: str
    parent: Optional[str]
    joint_type: Optional[str]
    joint_axis: Optional[np.ndarray]
    joint_origin_m: np.ndarray
    joint_rotation_deg: np.ndarray
    joint_variable: Optional[str]
    mount_position_m: np.ndarray
    mount_rotation_deg: np.ndarray
    markers: List[MarkerInfo]


@dataclass
class StageResult:
    depth: int
    active_links: List[str]
    active_joint_vars: List[str]
    mean_error_m: Optional[float]
    rms_error_m: Optional[float]
    worst_error_m: Optional[float]
    num_markers_used: int
    optimizer_info: Dict[str, Any]


@dataclass
class EstimationConfig:
    root_pose_prior_weight: float = 0.1
    joint_prior_weight: float = 0.05
    marker_base_weight: float = 1.0
    robust_loss: str = "soft_l1"
    max_iterations: int = 120
    include_joint_prior: bool = True
    include_root_prior: bool = True
    sequential: bool = True
    show_stage_reports: bool = True


# =============================================================================
# Robot parsing
# =============================================================================

def parse_robot(robot_data: Dict[str, Any]) -> Tuple[Dict[str, LinkInfo], Dict[int, MarkerInfo], List[str]]:
    length_scale = get_length_scale(robot_data)
    links = robot_data.get("links", {}) or {}

    link_infos: Dict[str, LinkInfo] = {}
    marker_by_id: Dict[int, MarkerInfo] = {}
    issues: List[str] = []

    for link_name, link_data in links.items():
        parent = link_data.get("parent", None)

        joint = link_data.get("jointToParent", {}) or {}
        joint_type = joint.get("type", None)

        axis = joint.get("axis", None)
        if axis is not None:
            axis = np.asarray(axis, dtype=np.float64)
            if safe_norm(axis) > 1e-12:
                axis = normalize(axis)
            else:
                axis = None

        joint_origin = np.asarray(joint.get("origin", [0, 0, 0]), dtype=np.float64) * float(length_scale)
        joint_rotation_deg = np.asarray(joint.get("rotation", [0, 0, 0]), dtype=np.float64)
        joint_variable = joint.get("variable", None)

        mount_position = np.asarray(link_data.get("mountPosition", [0, 0, 0]), dtype=np.float64) * float(length_scale)
        mount_rotation_deg = np.asarray(link_data.get("mountRotation", [0, 0, 0]), dtype=np.float64)

        markers_data = link_data.get("markers", []) or []
        markers: List[MarkerInfo] = []
        for idx, m in enumerate(markers_data):
            marker_id = int(m.get("id", -1))
            pos = m.get("position", None)

            if marker_id < 0 or pos is None or len(pos) != 3:
                issues.append(f"[WARN] link='{link_name}': skipped invalid marker entry at index {idx}")
                continue

            if marker_id in marker_by_id:
                issues.append(
                    f"[WARN] duplicate marker ID {marker_id} in link '{link_name}' "
                    f"(already in '{marker_by_id[marker_id].link_name}'); using first occurrence"
                )
                continue

            normal = None
            if "normal" in m and isinstance(m["normal"], list) and len(m["normal"]) == 3:
                normal = np.asarray(m["normal"], dtype=np.float64)

            marker = MarkerInfo(
                marker_id=marker_id,
                link_name=link_name,
                local_pos_m=np.asarray(pos, dtype=np.float64) * float(length_scale),
                size=m.get("size", None),
                normal=normal,
                name=str(m.get("name", f"marker_{marker_id}")),
            )
            markers.append(marker)
            marker_by_id[marker_id] = marker

        link_infos[link_name] = LinkInfo(
            name=link_name,
            parent=parent,
            joint_type=joint_type,
            joint_axis=axis,
            joint_origin_m=joint_origin,
            joint_rotation_deg=joint_rotation_deg,
            joint_variable=joint_variable,
            mount_position_m=mount_position,
            mount_rotation_deg=mount_rotation_deg,
            markers=markers,
        )

    return link_infos, marker_by_id, issues


def topological_links(link_infos: Dict[str, LinkInfo]) -> List[str]:
    children = {name: [] for name in link_infos}
    roots = []
    for name, info in link_infos.items():
        if info.parent is None:
            roots.append(name)
        else:
            children.setdefault(info.parent, []).append(name)

    order: List[str] = []
    queue = list(roots)
    seen = set()

    while queue:
        cur = queue.pop(0)
        if cur in seen:
            continue
        seen.add(cur)
        order.append(cur)
        for ch in children.get(cur, []):
            queue.append(ch)

    for name in link_infos:
        if name not in seen:
            order.append(name)
    return order


def compute_link_depths(link_infos: Dict[str, LinkInfo]) -> Dict[str, int]:
    depths: Dict[str, int] = {}

    def depth_of(name: str) -> int:
        if name in depths:
            return depths[name]
        info = link_infos[name]
        if info.parent is None:
            depths[name] = 0
        else:
            depths[name] = depth_of(info.parent) + 1
        return depths[name]

    for name in link_infos:
        depth_of(name)
    return depths


# =============================================================================
# Marker weights
# =============================================================================

def marker_weight(marker_info: MarkerInfo, base_weight: float = 1.0, ref_size: float = 25.0) -> float:
    """
    Weight the marker residuals lightly by marker size.
    Larger markers tend to be more stable in image space.
    """
    w = base_weight
    if marker_info.size is not None:
        try:
            size = float(marker_info.size)
            if size > 0:
                w *= max(0.35, math.sqrt(size / ref_size))
        except Exception:
            pass
    return w


# =============================================================================
# Forward kinematics
# =============================================================================

def link_static_transform(link: LinkInfo) -> np.ndarray:
    Rm = rotation_matrix_xyz(*link.mount_rotation_deg, degrees=True)
    return make_transform(Rm, link.mount_position_m)


def joint_motion_transform(link: LinkInfo, q_value: float) -> np.ndarray:
    joint_type = (link.joint_type or "").strip().lower()
    if link.joint_axis is None:
        return np.eye(4, dtype=np.float64)

    axis = normalize(link.joint_axis)
    if joint_type == "linear":
        return make_transform(np.eye(3), axis * float(q_value))
    if joint_type == "revolute":
        return make_transform(axis_angle_rotation(axis, float(q_value)), np.zeros(3))
    return np.eye(4, dtype=np.float64)


def world_to_link_transform(
    link_name: str,
    link_infos: Dict[str, LinkInfo],
    state: Dict[str, Any],
    cache: Dict[str, np.ndarray],
) -> np.ndarray:
    if link_name in cache:
        return cache[link_name]

    link = link_infos[link_name]
    if link.parent is None:
        T = make_transform(state["root_R"], state["root_t"])
        cache[link_name] = T
        return T

    parent_T = world_to_link_transform(link.parent, link_infos, state, cache)

    # Convention:
    # parent frame -> joint origin -> joint motion -> static joint rotation -> child mount transform
    T = parent_T @ make_transform(np.eye(3), link.joint_origin_m)

    q = state["joint_values"].get(link.joint_variable, state["joint_defaults"].get(link.joint_variable, 0.0))
    T = T @ joint_motion_transform(link, q)

    R_static = rotation_matrix_xyz(*link.joint_rotation_deg, degrees=True)
    T = T @ make_transform(R_static, np.zeros(3))

    T = T @ link_static_transform(link)

    cache[link_name] = T
    return T


def predict_marker_positions(
    link_infos: Dict[str, LinkInfo],
    state: Dict[str, Any],
) -> Dict[int, np.ndarray]:
    pred: Dict[int, np.ndarray] = {}
    cache: Dict[str, np.ndarray] = {}
    for link_name, link in link_infos.items():
        T = world_to_link_transform(link_name, link_infos, state, cache)
        for marker in link.markers:
            pred[marker.marker_id] = transform_point(T, marker.local_pos_m)
    return pred


# =============================================================================
# Initial parameters
# =============================================================================

def infer_joint_defaults(robot_data: Dict[str, Any], link_infos: Dict[str, LinkInfo]) -> Dict[str, float]:
    defaults_raw = robot_data.get("defaultPosition", {}) or {}
    out: Dict[str, float] = {}

    for link in link_infos.values():
        var = link.joint_variable
        if not var:
            continue
        raw_val = defaults_raw.get(var, 0.0)
        joint_type = (link.joint_type or "").strip().lower()
        if joint_type == "linear":
            out[var] = float(raw_val) * get_length_scale(robot_data)
        elif joint_type == "revolute":
            out[var] = math.radians(float(raw_val))
        else:
            out[var] = float(raw_val)
    return out


def initial_root_pose(
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
) -> Tuple[np.ndarray, np.ndarray]:
    roots = [name for name, info in link_infos.items() if info.parent is None]
    if not roots:
        return np.eye(3, dtype=np.float64), np.zeros(3, dtype=np.float64)

    root = roots[0]
    root_info = link_infos[root]

    P = []
    Q = []
    W = []

    for marker in root_info.markers:
        if marker.marker_id in observed_markers:
            obs = np.asarray(observed_markers[marker.marker_id]["position_m"], dtype=np.float64)
            P.append(marker.local_pos_m)
            Q.append(obs)
            W.append(marker_weight(marker, base_weight=1.0))

    if len(P) >= 3:
        return kabsch(np.asarray(P), np.asarray(Q), np.asarray(W))

    return np.eye(3, dtype=np.float64), np.zeros(3, dtype=np.float64)


def build_initial_state(
    robot_data: Dict[str, Any],
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
) -> Dict[str, Any]:
    root_R, root_t = initial_root_pose(link_infos, observed_markers)
    joint_defaults = infer_joint_defaults(robot_data, link_infos)

    return {
        "root_R": root_R,
        "root_t": root_t,
        "joint_defaults": joint_defaults,
        "joint_values": dict(joint_defaults),
    }


# =============================================================================
# Stage helpers
# =============================================================================

def active_links_for_depth(link_infos: Dict[str, LinkInfo], depths: Dict[str, int], max_depth: int) -> List[str]:
    ordered = topological_links(link_infos)
    return [name for name in ordered if depths.get(name, 0) <= max_depth]


def active_joint_vars_for_links(link_infos: Dict[str, LinkInfo], active_links: List[str]) -> List[str]:
    vars_out: List[str] = []
    seen = set()
    for name in active_links:
        link = link_infos[name]
        if not link.joint_variable:
            continue
        if link.joint_variable in seen:
            continue
        seen.add(link.joint_variable)
        vars_out.append(link.joint_variable)
    return vars_out


def active_observations_for_links(
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
    active_links: List[str],
) -> Dict[int, Dict[str, Any]]:
    allowed_links = set(active_links)
    out: Dict[int, Dict[str, Any]] = {}
    for marker_id, obs in observed_markers.items():
        link_name = obs.get("link", None)
        if link_name in allowed_links:
            out[marker_id] = obs
    return out


def stage_variables_to_vector(
    state: Dict[str, Any],
    active_joint_vars: List[str],
) -> np.ndarray:
    try:
        from scipy.spatial.transform import Rotation
        root_rvec = Rotation.from_matrix(state["root_R"]).as_rotvec()
    except Exception:
        root_rvec = np.zeros(3, dtype=np.float64)

    root_t = np.asarray(state["root_t"], dtype=np.float64).reshape(3)
    values = list(root_t) + list(np.asarray(root_rvec, dtype=np.float64))
    for var in active_joint_vars:
        values.append(float(state["joint_values"].get(var, state["joint_defaults"].get(var, 0.0))))
    return np.asarray(values, dtype=np.float64)


def vector_to_stage_state(
    x: np.ndarray,
    template_state: Dict[str, Any],
    active_joint_vars: List[str],
) -> Dict[str, Any]:
    try:
        from scipy.spatial.transform import Rotation
    except Exception:
        Rotation = None

    x = np.asarray(x, dtype=np.float64).reshape(-1)

    out = {
        "root_t": x[0:3].copy(),
        "root_R": template_state["root_R"].copy(),
        "joint_defaults": dict(template_state["joint_defaults"]),
        "joint_values": dict(template_state["joint_values"]),
    }

    root_rvec = x[3:6].copy()
    if Rotation is not None:
        out["root_R"] = Rotation.from_rotvec(root_rvec).as_matrix()
    else:
        angle = safe_norm(root_rvec)
        if angle < 1e-12:
            out["root_R"] = np.eye(3, dtype=np.float64)
        else:
            out["root_R"] = axis_angle_rotation(root_rvec / angle, angle)

    idx = 6
    for var in active_joint_vars:
        if idx >= len(x):
            break
        out["joint_values"][var] = float(x[idx])
        idx += 1

    return out


def residuals_stage(
    x: np.ndarray,
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
    template_state: Dict[str, Any],
    active_joint_vars: List[str],
    cfg: EstimationConfig,
) -> np.ndarray:
    state = vector_to_stage_state(x, template_state, active_joint_vars)
    pred = predict_marker_positions(link_infos, state)

    res: List[float] = []

    # Marker residuals
    for marker_id, obs in observed_markers.items():
        if marker_id not in pred:
            continue

        link_name = obs.get("link", None)
        marker = None
        if link_name is not None and link_name in link_infos:
            for m in link_infos[link_name].markers:
                if m.marker_id == marker_id:
                    marker = m
                    break
        if marker is None:
            continue

        w = marker_weight(marker, base_weight=cfg.marker_base_weight)
        sqrt_w = math.sqrt(max(1e-12, w))

        p_obs = np.asarray(obs["position_m"], dtype=np.float64)
        p_pred = pred[marker_id]
        d = (p_pred - p_obs) * sqrt_w
        res.extend(d.tolist())

    # Root prior
    if cfg.include_root_prior:
        root_t_prior = np.asarray(template_state["root_t"], dtype=np.float64)
        try:
            from scipy.spatial.transform import Rotation
            root_rvec_prior = Rotation.from_matrix(template_state["root_R"]).as_rotvec()
        except Exception:
            root_rvec_prior = np.zeros(3, dtype=np.float64)

        root_t = x[0:3]
        root_rvec = x[3:6]
        w = math.sqrt(max(1e-12, cfg.root_pose_prior_weight))
        res.extend(((root_t - root_t_prior) * w).tolist())
        res.extend(((root_rvec - root_rvec_prior) * w).tolist())

    # Joint priors
    if cfg.include_joint_prior:
        idx = 6
        w = math.sqrt(max(1e-12, cfg.joint_prior_weight))
        for var in active_joint_vars:
            if idx >= len(x):
                break
            q = x[idx]
            q0 = template_state["joint_defaults"].get(var, 0.0)
            # Revolute angles should not wrap too aggressively during optimization,
            # but the prior itself should be short-arc.
            if abs(q0) <= math.pi * 2.5:
                dq = wrap_angle_rad(float(q - q0))
            else:
                dq = float(q - q0)
            res.append(dq * w)
            idx += 1

    return np.asarray(res, dtype=np.float64)


def optimize_stage(
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
    template_state: Dict[str, Any],
    active_joint_vars: List[str],
    cfg: EstimationConfig,
) -> Tuple[Dict[str, Any], Dict[str, Any]]:
    try:
        from scipy.optimize import least_squares
    except ImportError:
        print("[WARN] scipy not available; returning template state for this stage.")
        return template_state, {"success": False, "message": "scipy unavailable"}

    x0 = stage_variables_to_vector(template_state, active_joint_vars)

    print(f"[INFO] Stage variables: {len(x0)} (root pose + {len(active_joint_vars)} active joints)")

    result = least_squares(
        lambda x: residuals_stage(x, link_infos, observed_markers, template_state, active_joint_vars, cfg),
        x0,
        loss=cfg.robust_loss,
        f_scale=1.0,
        max_nfev=cfg.max_iterations,
        verbose=1,
    )

    final_state = vector_to_stage_state(result.x, template_state, active_joint_vars)
    opt_info = {
        "cost": float(np.sum(result.fun ** 2)),
        "success": bool(result.success),
        "status": int(result.status),
        "message": str(result.message),
        "nfev": int(result.nfev),
        "njev": int(getattr(result, "njev", -1)),
    }
    final_state["_optimizer"] = opt_info
    return final_state, opt_info


def compute_error_stats(
    link_infos: Dict[str, LinkInfo],
    state: Dict[str, Any],
    observed_markers: Dict[int, Dict[str, Any]],
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
    pred = predict_marker_positions(link_infos, state)

    marker_reports: List[Dict[str, Any]] = []
    errors = []

    marker_to_link = {}
    marker_meta = {}
    for link in link_infos.values():
        for m in link.markers:
            marker_to_link[m.marker_id] = link.name
            marker_meta[m.marker_id] = m

    for marker_id, obs in observed_markers.items():
        if marker_id not in pred:
            continue

        p_obs = np.asarray(obs["position_m"], dtype=np.float64)
        p_pred = pred[marker_id]
        err = p_pred - p_obs
        err_norm = float(np.linalg.norm(err))
        errors.append(err_norm)

        m = marker_meta.get(marker_id)
        marker_reports.append(
            {
                "marker_id": int(marker_id),
                "link": marker_to_link.get(marker_id, "unknown"),
                "observed_position_m": [float(x) for x in p_obs],
                "predicted_position_m": [float(x) for x in p_pred],
                "error_m": [float(x) for x in err],
                "error_norm_m": err_norm,
                "error_norm_mm": err_norm * 1000.0,
                "marker_size": None if m is None else m.size,
            }
        )

    if errors:
        arr = np.asarray(errors, dtype=np.float64)
        stats = {
            "num_markers_used": int(len(errors)),
            "mean_error_m": float(np.mean(arr)),
            "median_error_m": float(np.median(arr)),
            "rms_error_m": float(np.sqrt(np.mean(arr ** 2))),
            "worst_error_m": float(np.max(arr)),
            "p80_error_m": float(np.quantile(arr, 0.80)),
            "p90_error_m": float(np.quantile(arr, 0.90)),
        }
    else:
        stats = {
            "num_markers_used": 0,
            "mean_error_m": None,
            "median_error_m": None,
            "rms_error_m": None,
            "worst_error_m": None,
            "p80_error_m": None,
            "p90_error_m": None,
        }

    return marker_reports, stats


def print_stage_stats(stage_idx: int, depth: int, stage_result: StageResult) -> None:
    print(
        f"[INFO] Stage {stage_idx} (depth={depth}) | "
        f"active_links={len(stage_result.active_links)} | "
        f"active_joint_vars={len(stage_result.active_joint_vars)} | "
        f"markers={stage_result.num_markers_used}"
    )
    if stage_result.mean_error_m is not None:
        print(
            f"[INFO] Stage {stage_idx} error: "
            f"mean={stage_result.mean_error_m*1000.0:.2f} mm | "
            f"rms={stage_result.rms_error_m*1000.0:.2f} mm | "
            f"worst={stage_result.worst_error_m*1000.0:.2f} mm"
        )
    if stage_result.optimizer_info:
        print(
            f"[INFO] Stage {stage_idx} optimizer: "
            f"success={stage_result.optimizer_info.get('success', False)} | "
            f"nfev={stage_result.optimizer_info.get('nfev', -1)} | "
            f"cost={stage_result.optimizer_info.get('cost', 0.0):.6f}"
        )


def sequential_optimize_state(
    robot_data: Dict[str, Any],
    link_infos: Dict[str, LinkInfo],
    observed_markers: Dict[int, Dict[str, Any]],
    initial_state: Dict[str, Any],
    cfg: EstimationConfig,
) -> Tuple[Dict[str, Any], List[StageResult]]:
    """
    Sequential prefix optimization:
      - solve links up to depth 0,
      - then depth 1,
      - then depth 2, ...
    Each new stage starts from the previous state and only activates the
    joint variables that belong to the current prefix.
    """
    depths = compute_link_depths(link_infos)
    max_depth = max(depths.values()) if depths else 0

    current_state = {
        "root_R": initial_state["root_R"].copy(),
        "root_t": initial_state["root_t"].copy(),
        "joint_defaults": dict(initial_state["joint_defaults"]),
        "joint_values": dict(initial_state["joint_values"]),
    }

    stage_results: List[StageResult] = []

    for depth in range(0, max_depth + 1):
        active_links = active_links_for_depth(link_infos, depths, depth)
        active_joint_vars = active_joint_vars_for_links(link_infos, active_links)
        active_obs = active_observations_for_links(link_infos, observed_markers, active_links)

        # If a stage introduces no observations, we still may want the chain prefix,
        # but there is nothing to fit against. In that case just keep the current state.
        if len(active_obs) == 0:
            stage_state = current_state
            stage_res = StageResult(
                depth=depth,
                active_links=active_links,
                active_joint_vars=active_joint_vars,
                mean_error_m=None,
                rms_error_m=None,
                worst_error_m=None,
                num_markers_used=0,
                optimizer_info={"skipped": True, "reason": "no active observations"},
            )
            stage_results.append(stage_res)
            if cfg.show_stage_reports:
                print_stage_stats(len(stage_results) - 1, depth, stage_res)
            continue

        # Optimize only the active prefix variables.
        template_state = current_state
        optimized_state, opt_info = optimize_stage(
            link_infos=link_infos,
            observed_markers=active_obs,
            template_state=template_state,
            active_joint_vars=active_joint_vars,
            cfg=cfg,
        )

        # Merge optimized active values into the running state.
        current_state["root_R"] = optimized_state["root_R"].copy()
        current_state["root_t"] = optimized_state["root_t"].copy()
        current_state["joint_values"].update(optimized_state["joint_values"])

        # Evaluate the active prefix after the stage.
        marker_reports, stats = compute_error_stats(link_infos, current_state, active_obs)

        stage_res = StageResult(
            depth=depth,
            active_links=active_links,
            active_joint_vars=active_joint_vars,
            mean_error_m=stats["mean_error_m"],
            rms_error_m=stats["rms_error_m"],
            worst_error_m=stats["worst_error_m"],
            num_markers_used=stats["num_markers_used"],
            optimizer_info=opt_info,
        )
        stage_results.append(stage_res)

        if cfg.show_stage_reports:
            print_stage_stats(len(stage_results) - 1, depth, stage_res)

    if stage_results:
        current_state["_optimizer"] = stage_results[-1].optimizer_info
    return current_state, stage_results


# =============================================================================
# Output
# =============================================================================

def state_to_json(
    robot_data: Dict[str, Any],
    link_infos: Dict[str, LinkInfo],
    state: Dict[str, Any],
    observed_markers: Dict[int, Dict[str, Any]],
    marker_reports: List[Dict[str, Any]],
    stats: Dict[str, Any],
    input_file: str,
    robot_file: str,
    stage_results: List[StageResult],
) -> Dict[str, Any]:
    movements = {}
    for link_name in topological_links(link_infos):
        link = link_infos[link_name]
        if not link.joint_variable:
            continue

        q = state["joint_values"].get(link.joint_variable, state["joint_defaults"].get(link.joint_variable, 0.0))
        joint_type = (link.joint_type or "").strip().lower()

        if joint_type == "revolute":
            movements[link.joint_variable] = {
                "value_rad": float(q),
                "value_deg": float(math.degrees(q)),
                "joint_type": joint_type,
                "link": link_name,
            }
        elif joint_type == "linear":
            movements[link.joint_variable] = {
                "value_m": float(q),
                "value_mm": float(q * 1000.0),
                "joint_type": joint_type,
                "link": link_name,
            }
        else:
            movements[link.joint_variable] = {
                "value": float(q),
                "joint_type": joint_type,
                "link": link_name,
            }

    link_pose_entries = []
    cache: Dict[str, np.ndarray] = {}
    for link_name in topological_links(link_infos):
        T = world_to_link_transform(link_name, link_infos, state, cache)
        R = T[:3, :3]
        t = T[:3, 3]
        link = link_infos[link_name]
        num_used = sum(1 for m in link.markers if m.marker_id in observed_markers)
        link_pose_entries.append(
            {
                "link": link_name,
                "parent": link.parent,
                "position_m": [float(x) for x in t],
                "rotation_matrix": [[float(v) for v in row] for row in R],
                "euler_xyz_deg": [float(x) for x in matrix_to_euler_xyz(R)],
                "num_observed_markers": int(num_used),
                "num_markers_total": int(len(link.markers)),
            }
        )

    root_link = next((name for name, info in link_infos.items() if info.parent is None), None)
    root_T = make_transform(state["root_R"], state["root_t"])

    out = {
        "schema_version": "2.0",
        "created_utc": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "source": {
            "marker_positions_file": input_file,
            "robot_file": robot_file,
        },
        "summary": {
            "num_links": int(len(link_infos)),
            "num_observed_markers": int(len(observed_markers)),
            "num_link_markers": int(sum(len(li.markers) for li in link_infos.values())),
            "root_link": root_link,
            "optimizer": state.get("_optimizer", {}),
            "fit_stats": stats,
            "stages": [
                {
                    "depth": int(s.depth),
                    "active_links": s.active_links,
                    "active_joint_vars": s.active_joint_vars,
                    "mean_error_m": s.mean_error_m,
                    "rms_error_m": s.rms_error_m,
                    "worst_error_m": s.worst_error_m,
                    "num_markers_used": int(s.num_markers_used),
                    "optimizer_info": s.optimizer_info,
                }
                for s in stage_results
            ],
        },
        "world_pose": {
            "root_translation_m": [float(x) for x in root_T[:3, 3]],
            "root_rotation_matrix": [[float(v) for v in row] for row in root_T[:3, :3]],
            "root_euler_xyz_deg": [float(x) for x in matrix_to_euler_xyz(root_T[:3, :3])],
        },
        "movements": movements,
        "links": link_pose_entries,
        "markers": marker_reports,
    }

    return out


# =============================================================================
# Main
# =============================================================================

def main() -> None:
    parser = argparse.ArgumentParser(
        description="Sequential robot state estimation from optimized ArUco marker positions"
    )
    parser.add_argument(
        "optimized_markers",
        help="aruco_positions_optimized*.json"
    )
    parser.add_argument(
        "-robot", "--robot",
        required=True,
        help="robot.json"
    )
    parser.add_argument(
        "-out", "--out",
        default=None,
        help="Output robot_state.json path"
    )
    parser.add_argument(
        "--maxIterations",
        type=int,
        default=120,
        help="Maximum least-squares iterations per stage"
    )
    parser.add_argument(
        "--jointPriorWeight",
        type=float,
        default=0.05,
        help="Prior weight for joint variables"
    )
    parser.add_argument(
        "--rootPosePriorWeight",
        type=float,
        default=0.1,
        help="Prior weight for root pose"
    )
    parser.add_argument(
        "--markerBaseWeight",
        type=float,
        default=1.0,
        help="Base weight for marker residuals"
    )
    parser.add_argument(
        "--noJointPrior",
        action="store_true",
        help="Disable joint priors"
    )
    parser.add_argument(
        "--noRootPrior",
        action="store_true",
        help="Disable root pose prior"
    )
    parser.add_argument(
        "--noSequential",
        action="store_true",
        help="Disable sequential prefix optimization and do one global pass"
    )

    args = parser.parse_args()

    print("[STEP 1] Load robot.json and optimized marker positions...")
    robot_data = load_json(args.robot)
    link_infos, marker_by_id, issues = parse_robot(robot_data)
    observed_markers = load_json(args.optimized_markers)
    markers = observed_markers.get("markers", []) if isinstance(observed_markers, dict) else []

    observed_map: Dict[int, Dict[str, Any]] = {}
    for m in markers:
        marker_id = int(m["marker_id"])
        observed_map[marker_id] = m

    for issue in issues:
        print(issue)

    print(f"[INFO] Links: {len(link_infos)}")
    print(f"[INFO] Known robot markers: {len(marker_by_id)}")
    print(f"[INFO] Observed optimized markers: {len(observed_map)}")

    print("\n[STEP 2] Build initial robot state...")
    initial_state = build_initial_state(robot_data, link_infos, observed_map)
    print(f"[INFO] Initial root translation (m): {initial_state['root_t']}")
    print(f"[INFO] Initial joint defaults: {initial_state['joint_defaults']}")

    cfg = EstimationConfig(
        root_pose_prior_weight=float(args.rootPosePriorWeight),
        joint_prior_weight=float(args.jointPriorWeight),
        marker_base_weight=float(args.markerBaseWeight),
        robust_loss="soft_l1",
        max_iterations=int(args.maxIterations),
        include_joint_prior=not args.noJointPrior,
        include_root_prior=not args.noRootPrior,
        sequential=not args.noSequential,
        show_stage_reports=True,
    )

    print("\n[STEP 3] Estimate robot state...")
    if cfg.sequential:
        final_state, stage_results = sequential_optimize_state(
            robot_data=robot_data,
            link_infos=link_infos,
            observed_markers=observed_map,
            initial_state=initial_state,
            cfg=cfg,
        )
    else:
        # Fallback: single global stage over the entire kinematic tree.
        active_links = topological_links(link_infos)
        active_joint_vars = active_joint_vars_for_links(link_infos, active_links)
        final_state, opt_info = optimize_stage(
            link_infos=link_infos,
            observed_markers=observed_map,
            template_state=initial_state,
            active_joint_vars=active_joint_vars,
            cfg=cfg,
        )
        final_state["_optimizer"] = opt_info
        stage_results = []

    print("\n[STEP 4] Build report and save robot_state.json...")
    marker_reports, stats = compute_error_stats(link_infos, final_state, observed_map)

    out_data = state_to_json(
        robot_data=robot_data,
        link_infos=link_infos,
        state=final_state,
        observed_markers=observed_map,
        marker_reports=marker_reports,
        stats=stats,
        input_file=args.optimized_markers,
        robot_file=args.robot,
        stage_results=stage_results,
    )

    out_path = args.out
    if out_path is None:
        out_dir = os.path.dirname(args.optimized_markers) or "."
        out_path = os.path.join(out_dir, "robot_state.json")

    save_json(out_path, out_data)

    print(f"[INFO] Saved to {out_path}")
    if stats["mean_error_m"] is not None:
        print(f"[INFO] Mean marker error: {stats['mean_error_m']*1000.0:.2f} mm")
        print(f"[INFO] RMS marker error : {stats['rms_error_m']*1000.0:.2f} mm")
        print(f"[INFO] Worst marker error: {stats['worst_error_m']*1000.0:.2f} mm")
    else:
        print("[INFO] No marker error statistics available.")


if __name__ == "__main__":
    main()