Pipeline mit KAmera-Position

This commit is contained in:
chk
2026-05-28 16:28:17 +02:00
parent 1bd0d35567
commit 8f387f0e0d
18 changed files with 28805 additions and 29 deletions

View File

@@ -0,0 +1,587 @@
#!/usr/bin/env python3
import argparse
import json
import os
import hashlib
import time
import uuid
from typing import Dict, Any
import cv2
import numpy as np
# ------------------------------------------------------------
# Utilities
# ------------------------------------------------------------
def load_intrinsics_npz(npz_path: str):
data = np.load(npz_path)
for k in ('camera_matrix', 'mtx', 'K'):
if k in data:
K = data[k].astype(np.float32)
break
else:
raise KeyError('Camera matrix not found in npz')
for k in ('dist_coeffs', 'dist', 'D'):
if k in data:
D = data[k].astype(np.float32).reshape(-1, 1)
break
else:
D = np.zeros((5, 1), dtype=np.float32)
return K, D
# ------------------------------------------------------------
def load_robot_vision_config(robot_json_path: str):
with open(robot_json_path, 'r', encoding='utf-8') as f:
robot = json.load(f)
vision_config = robot.get('vision_config', {})
marker_type = vision_config.get('MarkerType', 'DICT_4X4_250')
marker_size = float(vision_config.get('MarkerSize', 0.025))
return {
'MarkerType': marker_type,
'MarkerSize': marker_size
}
# ------------------------------------------------------------
def get_aruco_detector(dict_name: str):
mapping = {
'DICT_4X4_250': cv2.aruco.DICT_4X4_250,
'DICT_5X5_100': cv2.aruco.DICT_5X5_100,
'DICT_6X6_250': cv2.aruco.DICT_6X6_250,
'DICT_ARUCO_ORIGINAL': cv2.aruco.DICT_ARUCO_ORIGINAL,
}
dict_id = mapping.get(dict_name, cv2.aruco.DICT_4X4_250)
dictionary = cv2.aruco.getPredefinedDictionary(dict_id)
try:
params = cv2.aruco.DetectorParameters()
except Exception:
params = cv2.aruco.DetectorParameters_create()
try:
detector = cv2.aruco.ArucoDetector(dictionary, params)
return detector, None
except Exception:
return None, (dictionary, params)
# ------------------------------------------------------------
def detect_markers(image, detector_tuple):
detector, fallback = detector_tuple
if detector is not None:
corners, ids, rejected = detector.detectMarkers(image)
else:
dictionary, params = fallback
corners, ids, rejected = cv2.aruco.detectMarkers(
image,
dictionary,
parameters=params
)
return corners, ids, rejected
# ------------------------------------------------------------
def hash_file(path):
sha = hashlib.sha256()
with open(path, 'rb') as f:
while True:
chunk = f.read(1024 * 1024)
if not chunk:
break
sha.update(chunk)
return sha.hexdigest()
# ------------------------------------------------------------
def polygon_mask(shape, polygon):
mask = np.zeros(shape, dtype=np.uint8)
cv2.fillConvexPoly(
mask,
polygon.astype(np.int32),
255
)
return mask
# ------------------------------------------------------------
def shrink_polygon(points, scale=0.80):
center = np.mean(points, axis=0)
shrunk = center + (points - center) * scale
return shrunk.astype(np.float32)
# ------------------------------------------------------------
def compute_sharpness(gray_image, polygon):
shrunk = shrink_polygon(polygon, scale=0.80)
mask = polygon_mask(gray_image.shape, shrunk)
pixels = gray_image[mask == 255]
if pixels.size == 0:
return 0.0
temp = np.zeros_like(gray_image)
temp[mask == 255] = gray_image[mask == 255]
lap = cv2.Laplacian(temp, cv2.CV_64F)
values = lap[mask == 255]
if values.size == 0:
return 0.0
return float(values.var())
# ------------------------------------------------------------
def compute_contrast(gray_image, polygon):
shrunk = shrink_polygon(polygon, scale=0.80)
mask = polygon_mask(gray_image.shape, shrunk)
pixels = gray_image[mask == 255]
if pixels.size == 0:
return {
'p05': 0.0,
'p95': 0.0,
'dynamic_range': 0.0,
'mean_gray': 0.0,
'std_gray': 0.0
}
p05 = float(np.percentile(pixels, 5))
p95 = float(np.percentile(pixels, 95))
return {
'p05': p05,
'p95': p95,
'dynamic_range': float(p95 - p05),
'mean_gray': float(np.mean(pixels)),
'std_gray': float(np.std(pixels))
}
# ------------------------------------------------------------
def compute_edge_ratio(corners):
edge_lengths = []
for k in range(4):
p1 = corners[k]
p2 = corners[(k + 1) % 4]
edge_lengths.append(
float(np.linalg.norm(p1 - p2))
)
edge_ratio = (
max(edge_lengths) /
max(1e-6, min(edge_lengths))
)
return edge_ratio, edge_lengths
# ------------------------------------------------------------
def compute_geometry_metrics(center, corners, width, height):
image_center = np.array(
[width / 2.0, height / 2.0],
dtype=np.float32
)
dist_center = np.linalg.norm(center - image_center)
max_dist = np.linalg.norm(image_center)
distance_center_norm = float(
dist_center / max(1e-6, max_dist)
)
min_x = np.min(corners[:, 0])
max_x = np.max(corners[:, 0])
min_y = np.min(corners[:, 1])
max_y = np.max(corners[:, 1])
border_distance_px = float(min(
min_x,
min_y,
width - max_x,
height - max_y
))
return {
'distance_to_center_norm': distance_center_norm,
'distance_to_border_px': border_distance_px
}
# ------------------------------------------------------------
def compute_confidence(
area_px,
sharpness,
edge_ratio,
dynamic_range,
border_distance_px
):
score = 1.0
# area
score *= min(1.0, area_px / 1500.0)
# sharpness
score *= min(1.0, sharpness / 120.0)
# edge distortion
score *= 1.0 / max(1.0, edge_ratio)
# contrast
score *= min(1.0, dynamic_range / 80.0)
# border distance
score *= min(1.0, max(0.0, border_distance_px) / 50.0)
score = max(0.0, min(1.0, score))
return float(score)
# ------------------------------------------------------------
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'-i',
'--image',
required=True
)
parser.add_argument(
'-npz',
'--intrinsics',
required=True
)
parser.add_argument(
'-robot',
'--robot',
required=True
)
parser.add_argument(
'-cameraId',
'--cameraId',
required=True,
type=str
)
parser.add_argument(
'-outDir',
'--outDir',
required=True
)
args = parser.parse_args()
os.makedirs(args.outDir, exist_ok=True)
# --------------------------------------------------------
# Load robot vision config
# --------------------------------------------------------
vision_config = load_robot_vision_config(args.robot)
marker_type = vision_config['MarkerType']
marker_size = vision_config['MarkerSize']
# --------------------------------------------------------
# Load image
# --------------------------------------------------------
image = cv2.imread(args.image)
if image is None:
raise RuntimeError(f'Cannot read image: {args.image}')
gray = cv2.cvtColor(
image,
cv2.COLOR_BGR2GRAY
)
height, width = gray.shape[:2]
# --------------------------------------------------------
# Intrinsics
# --------------------------------------------------------
K, D = load_intrinsics_npz(args.intrinsics)
# --------------------------------------------------------
# Detection
# --------------------------------------------------------
detector_tuple = get_aruco_detector(marker_type)
corners_list, ids, rejected = detect_markers(
gray,
detector_tuple
)
detections = []
# --------------------------------------------------------
# Valid detections
# --------------------------------------------------------
if ids is not None:
ids = ids.flatten().tolist()
for i, marker_id in enumerate(ids):
corners = corners_list[i].reshape((4, 2)).astype(np.float32)
center = corners.mean(axis=0)
area_px = float(
cv2.contourArea(corners)
)
perimeter_px = float(
cv2.arcLength(corners, True)
)
edge_ratio, edge_lengths = compute_edge_ratio(corners)
sharpness = compute_sharpness(
gray,
corners
)
contrast = compute_contrast(
gray,
corners
)
geometry = compute_geometry_metrics(
center,
corners,
width,
height
)
confidence = compute_confidence(
area_px=area_px,
sharpness=sharpness,
edge_ratio=edge_ratio,
dynamic_range=contrast['dynamic_range'],
border_distance_px=geometry['distance_to_border_px']
)
detection = {
'observation_id': str(uuid.uuid4()),
'type': 'aruco',
'marker_id': int(marker_id),
'marker_size_m': marker_size,
'image_points_px': corners.tolist(),
'center_px': center.tolist(),
'quality': {
'area_px': area_px,
'perimeter_px': perimeter_px,
'sharpness': {
'laplacian_var': sharpness
},
'contrast': contrast,
'geometry': geometry,
'edge_ratio': edge_ratio,
'edge_lengths_px': edge_lengths
},
'confidence': confidence
}
detections.append(detection)
# --------------------------------------------------------
# Rejected candidates
# --------------------------------------------------------
rejected_candidates = []
if rejected is not None:
for candidate in rejected:
pts = candidate.reshape((-1, 2)).astype(np.float32)
center = pts.mean(axis=0)
area_px = float(
cv2.contourArea(pts)
)
rejected_candidates.append({
'image_points_px': pts.tolist(),
'center_px': center.tolist(),
'area_px': area_px
})
# --------------------------------------------------------
# Final output
# --------------------------------------------------------
output = {
'schema_version': '1.0',
'created_utc': time.strftime(
'%Y-%m-%dT%H:%M:%SZ',
time.gmtime()
),
'vision_config': {
'MarkerType': marker_type,
'MarkerSize': marker_size
},
'camera': {
'camera_id': args.cameraId,
'intrinsics_file': os.path.abspath(args.intrinsics),
'camera_matrix': K.tolist(),
'distortion_coefficients': D.reshape(-1).tolist()
},
'image': {
'image_file': os.path.abspath(args.image),
'image_sha256': hash_file(args.image),
'width_px': int(width),
'height_px': int(height)
},
'aruco': {
'dictionary': marker_type,
'num_detected_markers': len(detections),
'num_rejected_candidates': len(rejected_candidates)
},
'detections': detections,
'rejected_candidates': rejected_candidates
}
# --------------------------------------------------------
# Output path
# --------------------------------------------------------
input_filename = os.path.basename(args.image)
input_base = os.path.splitext(input_filename)[0]
out_json = os.path.join(
args.outDir,
f'{input_base}_aruco_detection.json'
)
# --------------------------------------------------------
# Save JSON
# --------------------------------------------------------
with open(out_json, 'w', encoding='utf-8') as f:
json.dump(
output,
f,
indent=2
)
print(f'Saved: {out_json}')
# ------------------------------------------------------------
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,562 @@
#!/usr/bin/env python3
"""
Step 2 of the vision pipeline
=============================
Overall workflow
----------------
1) Detect ArUco markers in each image and store 2D corners per marker.
2) Use the Board markers from robot.json as fixed world references.
3) Estimate the camera pose for each image with a Perspective-n-Point (PnP)
solve from the detected 2D corners and the known 3D Board marker corners.
4) Use that camera pose later as the fixed extrinsic input for articulated
bundle adjustment of the robot joints.
Mathematical methods
--------------------
- Homogeneous coordinates and rigid transforms in SE(3)
- ArUco marker geometry on a known board frame
- Perspective projection with camera intrinsics K and distortion D
- Robust pose estimation with RANSAC-PnP
- Optional nonlinear refinement by minimizing reprojection error
Conventions
-----------
- robot.json defines the Board frame as the world frame.
- The camera pose reported by this script is the pose of the CAMERA in the
Board/world frame.
- The OpenCV rvec/tvec output is the object pose in the camera frame.
We invert that transform to obtain world_T_camera.
Expected inputs
---------------
- robot.json containing Board marker positions in millimeters.
- ArUco detection JSON(s) from detect_aruco_observations.py.
Important:
The detection JSON already contains the camera intrinsics and distortion
coefficients copied from the calibration file during Step 1.
Therefore Step 2 no longer requires the original .npz file.
The detection JSON becomes the self-contained handover format between
pipeline stages.
Output
------
A JSON file per input detection file containing:
- camera pose (world_T_camera)
- camera pose inverse (camera_T_world)
- per-marker and global reprojection errors
- number of inlier correspondences
- list of used marker IDs
"""
from __future__ import annotations
import argparse
import json
import math
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
import cv2
import numpy as np
# ----------------------------------------------------------------------------
# Helpers
# ----------------------------------------------------------------------------
def load_robot_json(robot_json_path: str) -> Dict[str, Any]:
with open(robot_json_path, "r", encoding="utf-8") as f:
return json.load(f)
def load_intrinsics_from_detection_json(detection_json: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
cam = detection_json.get("camera", {}) or {}
if "camera_matrix" not in cam:
raise KeyError("camera_matrix missing in detection JSON")
K = np.asarray(cam["camera_matrix"], dtype=np.float64)
D = np.asarray(
cam.get("distortion_coefficients", [0, 0, 0, 0, 0]),
dtype=np.float64,
).reshape(-1, 1)
return K, D
def as_float3(v: Any) -> np.ndarray:
arr = np.asarray(v, dtype=np.float64).reshape(-1)
if arr.size < 3:
arr = np.pad(arr, (0, 3 - arr.size))
return arr[:3]
def normalize(v: np.ndarray, eps: float = 1e-12) -> np.ndarray:
n = float(np.linalg.norm(v))
if n < eps:
return v * 0.0
return v / n
def rotation_from_normal(normal: np.ndarray) -> np.ndarray:
"""Return a 3x3 rotation matrix whose local +Z aligns with the given normal.
The local x/y axes are chosen deterministically by projecting a stable world
reference axis into the tangent plane.
"""
z_axis = normalize(normal)
if np.linalg.norm(z_axis) < 1e-12:
raise ValueError("Degenerate normal vector")
ref = np.array([1.0, 0.0, 0.0], dtype=np.float64)
if abs(float(np.dot(ref, z_axis))) > 0.9:
ref = np.array([0.0, 1.0, 0.0], dtype=np.float64)
x_axis = ref - np.dot(ref, z_axis) * z_axis
x_axis = normalize(x_axis)
if np.linalg.norm(x_axis) < 1e-12:
ref = np.array([0.0, 1.0, 0.0], dtype=np.float64)
x_axis = ref - np.dot(ref, z_axis) * z_axis
x_axis = normalize(x_axis)
y_axis = np.cross(z_axis, x_axis)
y_axis = normalize(y_axis)
# Columns are the basis vectors of the local frame in world coordinates.
R = np.column_stack([x_axis, y_axis, z_axis])
return R
def rodrigues_to_matrix(rvec: np.ndarray) -> np.ndarray:
R, _ = cv2.Rodrigues(rvec.reshape(3, 1))
return R.astype(np.float64)
def matrix_to_rodrigues(R: np.ndarray) -> np.ndarray:
rvec, _ = cv2.Rodrigues(R.astype(np.float64))
return rvec.reshape(3)
def invert_rigid_transform(R: np.ndarray, t: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""Invert p_cam = R * p_world + t."""
R_inv = R.T
t_inv = -R_inv @ t.reshape(3)
return R_inv, t_inv
def make_homogeneous(R: np.ndarray, t: np.ndarray) -> np.ndarray:
T = np.eye(4, dtype=np.float64)
T[:3, :3] = R
T[:3, 3] = t.reshape(3)
return T
def project_points(points_3d: np.ndarray, rvec: np.ndarray, tvec: np.ndarray, K: np.ndarray, D: np.ndarray) -> np.ndarray:
pts = points_3d.reshape(-1, 1, 3).astype(np.float64)
img_pts, _ = cv2.projectPoints(pts, rvec.reshape(3, 1), tvec.reshape(3, 1), K, D)
return img_pts.reshape(-1, 2)
def rms_error(errors_px: np.ndarray) -> float:
if errors_px.size == 0:
return float("nan")
return float(np.sqrt(np.mean(np.sum(errors_px ** 2, axis=1))))
def safe_confidence(det: Dict[str, Any]) -> float:
try:
c = float(det.get("confidence", 1.0))
except Exception:
c = 1.0
return max(0.0, min(1.0, c))
# ----------------------------------------------------------------------------
# Robot board model
# ----------------------------------------------------------------------------
@dataclass
class BoardMarker:
marker_id: int
center_world_mm: np.ndarray
normal_world: np.ndarray
size_mm: float
rotation_world: np.ndarray
def corner_points_world_mm(self) -> np.ndarray:
"""Return the 4 corner points in board/world coordinates.
Corner order is OpenCV/ArUco compatible:
top-left, top-right, bottom-right, bottom-left in the marker local frame.
"""
s = float(self.size_mm)
half = s * 0.5
# Local marker corners in the marker's own frame, z=0.
corners_local = np.array([
[-half, +half, 0.0],
[+half, +half, 0.0],
[+half, -half, 0.0],
[-half, -half, 0.0],
], dtype=np.float64)
corners_world = (self.rotation_world @ corners_local.T).T + self.center_world_mm.reshape(1, 3)
return corners_world
def extract_board_markers(robot: Dict[str, Any]) -> Dict[int, BoardMarker]:
links = robot.get("links", {})
if "Board" not in links:
raise KeyError("robot.json must contain links.Board for the world reference frame")
board = links["Board"]
marker_defaults = robot.get("renderingInfo", {}).get("markerDefaults", {}) or {}
default_size_mm = float(marker_defaults.get("size", 25.0))
markers: Dict[int, BoardMarker] = {}
for m in board.get("markers", []):
if not isinstance(m, dict):
continue
if "id" not in m or "position" not in m:
continue
marker_id = int(m["id"])
center = as_float3(m["position"])
normal = as_float3(m.get("normal", [0, 0, 1]))
size_mm = float(m.get("size", default_size_mm))
# If a spin is present in the future, it can be added here.
# For the current Board markers it is not needed.
R = rotation_from_normal(normal)
markers[marker_id] = BoardMarker(
marker_id=marker_id,
center_world_mm=center,
normal_world=normalize(normal),
size_mm=size_mm,
rotation_world=R,
)
if not markers:
raise ValueError("No Board markers found in robot.json")
return markers
# ----------------------------------------------------------------------------
# Observation loading
# ----------------------------------------------------------------------------
def load_detection_json(path: str) -> Dict[str, Any]:
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def build_pnp_correspondences(
detections: Sequence[Dict[str, Any]],
board_markers: Dict[int, BoardMarker],
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, List[int]]:
"""Build object/image correspondences from all detected Board markers.
Returns:
object_points_mm: (N, 3)
image_points_px: (N, 2)
weights: (N,)
used_marker_ids: list of marker ids that contributed at least one corner
"""
object_points: List[np.ndarray] = []
image_points: List[np.ndarray] = []
weights: List[np.ndarray] = []
used_marker_ids: List[int] = []
for det in detections:
if str(det.get("type", "")).lower() != "aruco":
continue
try:
marker_id = int(det["marker_id"])
except Exception:
continue
if marker_id not in board_markers:
continue
corners_px = np.asarray(det.get("image_points_px", []), dtype=np.float64)
if corners_px.shape != (4, 2):
continue
marker = board_markers[marker_id]
corners_world_mm = marker.corner_points_world_mm()
object_points.append(corners_world_mm)
image_points.append(corners_px)
conf = safe_confidence(det)
# Repeat the marker confidence for all 4 corners.
weights.append(np.full((4,), conf, dtype=np.float64))
used_marker_ids.append(marker_id)
if not object_points:
raise ValueError("No usable Board marker correspondences found in detections")
obj = np.concatenate(object_points, axis=0).astype(np.float64)
img = np.concatenate(image_points, axis=0).astype(np.float64)
w = np.concatenate(weights, axis=0).astype(np.float64)
return obj, img, w, sorted(set(used_marker_ids))
# ----------------------------------------------------------------------------
# Pose estimation
# ----------------------------------------------------------------------------
def pnp_initial_pose(object_points_mm: np.ndarray, image_points_px: np.ndarray, K: np.ndarray, D: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Robust initial pose estimate using RANSAC-PnP when possible."""
obj = object_points_mm.reshape(-1, 1, 3).astype(np.float64)
img = image_points_px.reshape(-1, 1, 2).astype(np.float64)
# Prefer RANSAC for robustness.
ok, rvec, tvec, inliers = cv2.solvePnPRansac(
obj,
img,
K,
D,
iterationsCount=200,
reprojectionError=4.0,
confidence=0.999,
flags=cv2.SOLVEPNP_EPNP,
)
if not ok or rvec is None or tvec is None:
# Fallback to a direct solve.
ok, rvec, tvec = cv2.solvePnP(
obj,
img,
K,
D,
flags=cv2.SOLVEPNP_ITERATIVE,
)
if not ok:
raise RuntimeError("cv2.solvePnP failed")
inliers = np.arange(len(object_points_mm), dtype=np.int32).reshape(-1, 1)
rvec = rvec.reshape(3)
tvec = tvec.reshape(3)
if inliers is None:
inliers = np.arange(len(object_points_mm), dtype=np.int32).reshape(-1, 1)
return rvec, tvec, inliers
def refine_pose_with_weights(
rvec_init: np.ndarray,
tvec_init: np.ndarray,
object_points_mm: np.ndarray,
image_points_px: np.ndarray,
weights: np.ndarray,
K: np.ndarray,
D: np.ndarray,
) -> Tuple[np.ndarray, np.ndarray]:
"""Optional weighted nonlinear refinement.
Uses OpenCV's LM refinement if available; otherwise falls back to the initial pose.
"""
rvec = rvec_init.reshape(3, 1).astype(np.float64)
tvec = tvec_init.reshape(3, 1).astype(np.float64)
# Build a diagonal weighting by repeating stronger correspondences more often is not ideal.
# Instead, use a conservative weighted refinement via OpenCV if present.
if hasattr(cv2, "solvePnPRefineLM"):
try:
cv2.solvePnPRefineLM(
object_points_mm.reshape(-1, 1, 3).astype(np.float64),
image_points_px.reshape(-1, 1, 2).astype(np.float64),
K,
D,
rvec,
tvec,
)
return rvec.reshape(3), tvec.reshape(3)
except Exception:
pass
return rvec_init.reshape(3), tvec_init.reshape(3)
def compute_reprojection_statistics(
object_points_mm: np.ndarray,
image_points_px: np.ndarray,
rvec: np.ndarray,
tvec: np.ndarray,
K: np.ndarray,
D: np.ndarray,
) -> Dict[str, Any]:
pred = project_points(object_points_mm, rvec, tvec, K, D)
residual = image_points_px - pred
err = np.linalg.norm(residual, axis=1)
return {
"rmse_px": float(np.sqrt(np.mean(err ** 2))) if err.size else float("nan"),
"mean_px": float(np.mean(err)) if err.size else float("nan"),
"median_px": float(np.median(err)) if err.size else float("nan"),
"max_px": float(np.max(err)) if err.size else float("nan"),
"per_corner_errors_px": err.tolist(),
"per_corner_residuals_px": residual.tolist(),
}
def pose_to_camera_in_world(rvec: np.ndarray, tvec: np.ndarray) -> Dict[str, Any]:
"""Convert object-to-camera pose into camera pose in world coordinates."""
R_wc_obj = rodrigues_to_matrix(rvec)
t_wc_obj = tvec.reshape(3)
# object/world -> camera is: p_cam = R * p_world + t
# therefore camera in world is inverse transform.
R_cw, t_cw = invert_rigid_transform(R_wc_obj, t_wc_obj)
T_world_camera = make_homogeneous(R_cw, t_cw)
T_camera_world = make_homogeneous(R_wc_obj, t_wc_obj)
return {
"rvec_world_to_camera": rvec.reshape(3).tolist(),
"tvec_world_to_camera_mm": tvec.reshape(3).tolist(),
"R_world_to_camera": R_wc_obj.tolist(),
"T_camera_world": T_camera_world.tolist(),
"R_camera_to_world": R_cw.tolist(),
"t_camera_in_world_mm": t_cw.tolist(),
"T_world_camera": T_world_camera.tolist(),
}
def estimate_camera_pose_from_detection(
detection_json: Dict[str, Any],
robot: Dict[str, Any],
) -> Dict[str, Any]:
board_markers = extract_board_markers(robot)
K, D = load_intrinsics_from_detection_json(detection_json)
detections = detection_json.get("detections", [])
object_points_mm, image_points_px, weights, used_marker_ids = build_pnp_correspondences(detections, board_markers)
rvec_init, tvec_init, inliers = pnp_initial_pose(object_points_mm, image_points_px, K, D)
rvec, tvec = refine_pose_with_weights(rvec_init, tvec_init, object_points_mm, image_points_px, weights, K, D)
stats = compute_reprojection_statistics(object_points_mm, image_points_px, rvec, tvec, K, D)
pose = pose_to_camera_in_world(rvec, tvec)
# Add a few diagnostics useful for checking against the real world.
camera_pose = {
**pose,
"statistics": {
**stats,
"num_correspondences": int(len(object_points_mm)),
"num_inliers": int(len(inliers)) if inliers is not None else int(len(object_points_mm)),
"used_marker_ids": used_marker_ids,
},
"input": {
"detection_image_file": detection_json.get("image", {}).get("image_file"),
"camera_id": detection_json.get("camera", {}).get("camera_id"),
"marker_dictionary": detection_json.get("vision_config", {}).get("MarkerType"),
},
}
return camera_pose
# ----------------------------------------------------------------------------
# Main
# ----------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser(description="Estimate camera pose from Board ArUco markers using PnP.")
parser.add_argument("--robot", required=True, help="Path to robot.json")
parser.add_argument(
"--detections",
required=True,
nargs="+",
help="One or more detection JSON files created by detect_aruco_observations.py",
)
parser.add_argument("--outdir", required=True, help="Directory for the pose JSON outputs")
parser.add_argument("--write-summary", action="store_true", help="Write one combined summary JSON as well")
args = parser.parse_args()
os.makedirs(args.outdir, exist_ok=True)
robot = load_robot_json(args.robot)
summary: Dict[str, Any] = {
"schema_version": "1.0",
"algorithm": "board_pnp_camera_pose",
"robot_file": os.path.abspath(args.robot),
"intrinsics_source": "embedded_in_detection_json",
"results": [],
}
for det_path in args.detections:
detection_json = load_detection_json(det_path)
pose = estimate_camera_pose_from_detection(detection_json, robot)
base = Path(det_path).stem
out_path = Path(args.outdir) / f"{base}_camera_pose.json"
payload = {
"schema_version": "1.0",
"created_utc": __import__("time").strftime("%Y-%m-%dT%H:%M:%SZ", __import__("time").gmtime()),
"source_detection_file": os.path.abspath(det_path),
"camera_pose": pose,
}
with open(out_path, "w", encoding="utf-8") as f:
json.dump(payload, f, indent=2)
summary["results"].append({
"detection_file": os.path.abspath(det_path),
"output_file": str(out_path),
"rmse_px": pose["statistics"]["rmse_px"],
"median_px": pose["statistics"]["median_px"],
"num_correspondences": pose["statistics"]["num_correspondences"],
"used_marker_ids": pose["statistics"]["used_marker_ids"],
})
print(f"Saved: {out_path}")
print(f" RMSE: {pose['statistics']['rmse_px']:.3f} px")
print(f" Median: {pose['statistics']['median_px']:.3f} px")
print(f" Used markers: {pose['statistics']['used_marker_ids']}")
if args.write_summary:
summary_path = Path(args.outdir) / "camera_pose_summary.json"
with open(summary_path, "w", encoding="utf-8") as f:
json.dump(summary, f, indent=2)
print(f"Saved summary: {summary_path}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,47 @@
{
"schema_version": "1.0",
"algorithm": "board_pnp_camera_pose",
"robot_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\robot.json",
"intrinsics_source": "embedded_in_detection_json",
"results": [
{
"detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1a_aruco_detection.json",
"output_file": "render_1a_aruco_detection_camera_pose.json",
"rmse_px": 53.69444650385422,
"median_px": 32.08915387366267,
"num_correspondences": 32,
"used_marker_ids": [
205,
206,
207,
208,
210,
211,
215,
217
]
},
{
"detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1b_aruco_detection.json",
"output_file": "render_1b_aruco_detection_camera_pose.json",
"rmse_px": 0.9028284747812563,
"median_px": 0.87428115526563,
"num_correspondences": 8,
"used_marker_ids": [
210,
215
]
},
{
"detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1c_aruco_detection.json",
"output_file": "render_1c_aruco_detection_camera_pose.json",
"rmse_px": 1.013497154241286,
"median_px": 0.98147711476875,
"num_correspondences": 8,
"used_marker_ids": [
210,
214
]
}
]
}

BIN
pipeline/render.npz Normal file

Binary file not shown.

BIN
pipeline/render_1a.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,295 @@
{
"schema_version": "1.0",
"created_utc": "2026-05-28T14:19:28Z",
"source_detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1a_aruco_detection.json",
"camera_pose": {
"rvec_world_to_camera": [
-2.305496407968605,
1.065207351740574,
0.34998974882711126
],
"tvec_world_to_camera_mm": [
-315.2306259816743,
224.77488089057,
1383.92954200185
],
"R_world_to_camera": [
[
0.6485090690393105,
-0.7612056883925573,
0.0013738022347707657
],
[
-0.6120529322947699,
-0.5203636712935098,
0.5954937931392015
],
[
-0.45257838596550426,
-0.38702396509357573,
-0.8033587336925552
]
],
"T_camera_world": [
[
0.6485090690393105,
-0.7612056883925573,
0.0013738022347707657,
-315.2306259816743
],
[
-0.6120529322947699,
-0.5203636712935098,
0.5954937931392015,
224.77488089057
],
[
-0.45257838596550426,
-0.38702396509357573,
-0.8033587336925552,
1383.92954200185
],
[
0.0,
0.0,
0.0,
1.0
]
],
"R_camera_to_world": [
[
0.6485090690393105,
-0.6120529322947699,
-0.45257838596550426
],
[
-0.7612056883925573,
-0.5203636712935098,
-0.38702396509357573
],
[
0.0013738022347707657,
0.5954937931392015,
-0.8033587336925552
]
],
"t_camera_in_world_mm": [
968.3406431525125,
412.6232353376735,
978.3729024968281
],
"T_world_camera": [
[
0.6485090690393105,
-0.6120529322947699,
-0.45257838596550426,
968.3406431525125
],
[
-0.7612056883925573,
-0.5203636712935098,
-0.38702396509357573,
412.6232353376735
],
[
0.0013738022347707657,
0.5954937931392015,
-0.8033587336925552,
978.3729024968281
],
[
0.0,
0.0,
0.0,
1.0
]
],
"statistics": {
"rmse_px": 53.69444650385422,
"mean_px": 43.49018862623262,
"median_px": 32.08915387366267,
"max_px": 131.0132247190388,
"per_corner_errors_px": [
125.9940054900667,
105.07352467385594,
109.87324347189022,
131.0132247190388,
21.010672127270144,
30.17780962535308,
29.741870343336966,
21.53517675057069,
30.674698289199373,
37.37789632268799,
33.503609458125965,
26.755846339359543,
52.6597777482743,
55.943114202091685,
56.41293588599435,
53.3845318388676,
24.140302976164055,
12.501614463984566,
17.843775799913118,
27.851972943047976,
19.55527279482321,
15.006143006879194,
8.168997067942962,
18.41951241518959,
26.644168238518596,
43.73515602327787,
39.76763972637787,
23.72433018345239,
42.69531998291903,
60.42053547730011,
53.73185743455883,
36.347500219111076
],
"per_corner_residuals_px": [
[
-111.43384038596344,
58.79616174774151
],
[
-94.21398419609403,
46.520648848501196
],
[
-93.31959118739167,
57.996409644488836
],
[
-110.44277538824701,
70.4759421066691
],
[
19.119375834118955,
-8.711934971827702
],
[
26.07607978564579,
-15.190070993800134
],
[
28.267108793747752,
-9.249292511446242
],
[
21.386189257435035,
-2.528783645053636
],
[
14.391649857517677,
-27.089066604665845
],
[
22.623064650364768,
-29.75405987987233
],
[
20.48965174053592,
-26.507848239232487
],
[
14.349578701348491,
-22.58240253890682
],
[
42.68032051979753,
-30.845460489754316
],
[
45.092250608114796,
-33.11073786135313
],
[
46.99580232025039,
-31.20599140474883
],
[
45.653152604068396,
-27.671246754794254
],
[
16.64076508745518,
-17.488257920248316
],
[
7.241353209464364,
-10.190837448506784
],
[
7.5129531818588475,
-16.185050796483836
],
[
16.94668756026067,
-22.102990240127752
],
[
19.553105326365085,
0.2911463137773467
],
[
11.302035895770928,
9.871591186511665
],
[
7.972569331990371,
1.780632287325858
],
[
17.27663470149446,
-6.38720056084361
],
[
-23.527458763101663,
12.504814483751204
],
[
-37.370264800659925,
22.720633378253694
],
[
-37.18919281759213,
14.086486679897916
],
[
-23.33139615759319,
4.299976277948048
],
[
-18.10794101712986,
38.66513701726649
],
[
-31.863312707890373,
51.33585891599432
],
[
-35.3592943065546,
40.45779046747623
],
[
-21.569770185610196,
29.255525739904954
]
],
"num_correspondences": 32,
"num_inliers": 12,
"used_marker_ids": [
205,
206,
207,
208,
210,
211,
215,
217
]
},
"input": {
"detection_image_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1a.png",
"camera_id": "cam1",
"marker_dictionary": "DICT_4X4_250"
}
}
}

BIN
pipeline/render_1b.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 MiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
{
"schema_version": "1.0",
"created_utc": "2026-05-28T14:19:28Z",
"source_detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1b_aruco_detection.json",
"camera_pose": {
"rvec_world_to_camera": [
-3.4928668596759627,
-0.11465199431472019,
0.06891346455898573
],
"tvec_world_to_camera_mm": [
-218.99274893021345,
-66.7998001007427,
991.433880749304
],
"R_world_to_camera": [
[
0.99716158725115,
0.07035390512768569,
-0.026815982996183867
],
[
0.0566912850549916,
-0.9359657619679411,
-0.3474970368543954
],
[
-0.04954661552094863,
0.34499046429873387,
-0.9372975581070098
]
],
"T_camera_world": [
[
0.99716158725115,
0.07035390512768569,
-0.026815982996183867,
-218.99274893021345
],
[
0.0566912850549916,
-0.9359657619679411,
-0.3474970368543954,
-66.7998001007427
],
[
-0.04954661552094863,
0.34499046429873387,
-0.9372975581070098,
991.433880749304
],
[
0.0,
0.0,
0.0,
1.0
]
],
"R_camera_to_world": [
[
0.99716158725115,
0.0566912850549916,
-0.04954661552094863
],
[
0.07035390512768569,
-0.9359657619679411,
0.34499046429873387
],
[
-0.026815982996183867,
-0.3474970368543954,
-0.9372975581070098
]
],
"t_camera_in_world_mm": [
271.2803169327997,
-389.1505655599084,
900.1833170218047
],
"T_world_camera": [
[
0.99716158725115,
0.0566912850549916,
-0.04954661552094863,
271.2803169327997
],
[
0.07035390512768569,
-0.9359657619679411,
0.34499046429873387,
-389.1505655599084
],
[
-0.026815982996183867,
-0.3474970368543954,
-0.9372975581070098,
900.1833170218047
],
[
0.0,
0.0,
0.0,
1.0
]
],
"statistics": {
"rmse_px": 0.9028284747812563,
"mean_px": 0.7517926037815363,
"median_px": 0.87428115526563,
"max_px": 1.5028790321047885,
"per_corner_errors_px": [
0.11338351412627667,
0.7806424610163631,
0.13345378642964098,
1.1182055161944655,
1.5028790321047885,
0.967919849514897,
1.1773769846431092,
0.22047968622274988
],
"per_corner_residuals_px": [
[
-0.08913552529247681,
-0.07007623995662016
],
[
-0.7008974849087508,
-0.343722806328401
],
[
-0.021934401844987406,
0.13163888152104164
],
[
1.012165401384209,
0.4752944105377992
],
[
1.4812157895764813,
0.254254144213121
],
[
-0.9440788085447593,
0.21350418811266536
],
[
-0.7765629048541882,
-0.8849670156405409
],
[
0.03208204880576204,
0.2181330653094733
]
],
"num_correspondences": 8,
"num_inliers": 5,
"used_marker_ids": [
210,
215
]
},
"input": {
"detection_image_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1b.png",
"camera_id": "cam1",
"marker_dictionary": "DICT_4X4_250"
}
}
}

BIN
pipeline/render_1c.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 MiB

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
{
"schema_version": "1.0",
"created_utc": "2026-05-28T14:19:28Z",
"source_detection_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1c_aruco_detection.json",
"camera_pose": {
"rvec_world_to_camera": [
2.263468911707335,
0.4185279076232256,
-0.15618269854175848
],
"tvec_world_to_camera_mm": [
-131.52071411361564,
6.261008191633575,
845.2175730094486
],
"R_world_to_camera": [
[
0.9373310430689994,
0.34765394455176385,
0.023393386603497504
],
[
0.24733770712755493,
-0.6165675091799039,
-0.7474413456964855
],
[
-0.24542733004306072,
0.7063860427990407,
-0.6639157960213375
]
],
"T_camera_world": [
[
0.9373310430689994,
0.34765394455176385,
0.023393386603497504,
-131.52071411361564
],
[
0.24733770712755493,
-0.6165675091799039,
-0.7474413456964855,
6.261008191633575
],
[
-0.24542733004306072,
0.7063860427990407,
-0.6639157960213375,
845.2175730094486
],
[
0.0,
0.0,
0.0,
1.0
]
],
"R_camera_to_world": [
[
0.9373310430689994,
0.24733770712755493,
-0.24542733004306072
],
[
0.34765394455176385,
-0.6165675091799039,
0.7063860427990407
],
[
0.023393386603497504,
-0.7474413456964855,
-0.6639157960213375
]
],
"t_camera_in_world_mm": [
329.1693569840543,
-547.46586742482,
568.9097490955903
],
"T_world_camera": [
[
0.9373310430689994,
0.24733770712755493,
-0.24542733004306072,
329.1693569840543
],
[
0.34765394455176385,
-0.6165675091799039,
0.7063860427990407,
-547.46586742482
],
[
0.023393386603497504,
-0.7474413456964855,
-0.6639157960213375,
568.9097490955903
],
[
0.0,
0.0,
0.0,
1.0
]
],
"statistics": {
"rmse_px": 1.013497154241286,
"mean_px": 0.959948878595011,
"median_px": 0.98147711476875,
"max_px": 1.4670431124238368,
"per_corner_errors_px": [
0.7453912348426285,
0.7217726973711857,
1.0945509565073668,
0.3852000087970229,
0.8849242405465457,
1.4670431124238368,
1.0780299889909544,
1.3026787892805474
],
"per_corner_residuals_px": [
[
-0.14935206506288523,
-0.7302753272853124
],
[
-0.6208469528596652,
-0.3681098854898437
],
[
0.5489523368783011,
0.9469388196853288
],
[
0.3286776763773105,
0.20087317349123168
],
[
0.6565456329204267,
0.5933285290629442
],
[
-1.440736791251254,
0.27657366477171763
],
[
-0.6545732662175396,
-0.8565526815772273
],
[
1.3003287403698778,
-0.07821249906197636
]
],
"num_correspondences": 8,
"num_inliers": 8,
"used_marker_ids": [
210,
214
]
},
"input": {
"detection_image_file": "C:\\Users\\kech\\SynologyDrive\\2026-AppServer-AppRobot\\appRobotRendering\\pipeline\\render_1c.png",
"camera_id": "cam1",
"marker_dictionary": "DICT_4X4_250"
}
}
}

6
pipeline/run.bat Normal file
View File

@@ -0,0 +1,6 @@
python3 1_detect_aruco_observations.py --image render_1a.png -npz render.npz -robot ../robot.json -cameraId cam1 -outDir .
python3 1_detect_aruco_observations.py --image render_1b.png -npz render.npz -robot ../robot.json -cameraId cam1 -outDir .
python3 1_detect_aruco_observations.py --image render_1c.png -npz render.npz -robot ../robot.json -cameraId cam1 -outDir .
python3 2_KameraPosition.py --robot ../robot.json --detections render_1a_aruco_detection.json render_1b_aruco_detection.json render_1c_aruco_detection.json --outdir . --write-summary

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 MiB

After

Width:  |  Height:  |  Size: 1.3 MiB

View File

@@ -649,11 +649,6 @@ for link_name, link_info in links_def.items():
else:
marker_obj.data.materials[0] = marker_mat
print("Marker:", marker_name)
print("WORLD POS:", marker_obj.matrix_world.translation)
print("LOCAL POS:", marker_obj.location)
# --------------------------------------------------------
# --------------------------------------------------------
# BACKING PLATE (white PLA behind marker)

View File

@@ -16,8 +16,8 @@
"renderingInfo": {
"width": 1280,
"height": 720,
"cameraPosition": [-150, -400, 800],
"cameraTarget": [230, -180, 180],
"cameraPosition": [370, -600, 500],
"cameraTarget": [210, -180, 180],
"cameraUpVector": [0, 0, 1],
"lightPosition": [-500, -500, 500],
"lightTarget": [0, 0, 0],
@@ -78,13 +78,13 @@
}
},
"defaultPosition": {
"x": 150,
"x": 100,
"y": 30,
"z": -40,
"a": 260,
"b": 0,
"c": 0,
"e": 0
"z": -30,
"a": -120,
"b": 22,
"c": 91,
"e": 10
},
"recognized": {
"x": null,
@@ -117,15 +117,15 @@
"color": [0.85, 0.20, 0.20]
},
"markers":[
{"id":205,"position":[800, -90, 0.3], "normal":[0,0,1]},
{"id":206,"position":[600, 0, 0.3], "normal":[0,0,1]},
{"id":207,"position":[800, 0, 0.3], "normal":[0,0,1]},
{"id":208,"position":[500, -90, 0.3], "normal":[0,0,1]},
{"id":210,"position":[0, 0.0, 0.3], "normal":[0,0,1]},
{"id":211,"position":[200, 0.0, 0.3], "normal":[0,0,1]},
{"id":214,"position":[400, 0.0, 0.3], "normal":[0,0,1]},
{"id":215,"position":[200, -90, 0.3], "normal":[0,0,1]},
{"id":217,"position":[600, -90, 0.3], "normal":[0,0,1]}
{"id":210,"position":[20, -20, 0.3], "normal":[0,0,1]},
{"id":211,"position":[250, -10, 0.3], "normal":[0,0,1]},
{"id":215,"position":[250, -90, 0.3], "normal":[0,0,1]},
{"id":214,"position":[350, -10, 0.3], "normal":[0,0,1]},
{"id":208,"position":[350, -90, 0.3], "normal":[0,0,1]},
{"id":206,"position":[650, -10, 0.3], "normal":[0,0,1]},
{"id":205,"position":[750, -90, 0.3], "normal":[0,0,1]},
{"id":207,"position":[750, -10, 0.3], "normal":[0,0,1]},
{"id":217,"position":[650, -90, 0.3], "normal":[0,0,1]}
],
"model": [
{
@@ -330,13 +330,14 @@
],
"markers":[
{"id":228, "position":[-24.75, -112, 24.75],"normal":[-1,0,1], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id": 122, "name": "aruco_122", "position":[-24.75, -182, 24.75],"normal":[-1,0,1], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id": 122, "name": "aruco_122", "position":[-35,-112,0], "normal":[-1,0,0], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id": 124, "name": "aruco_124", "position":[-35,-219,0], "normal":[-1,0,0], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id":223,"name": "aruco_223", "position":[-28.67,-112,-20.08], "normal":[-28.67,0,-20.08], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id":218,"name": "aruco_218", "position":[35,-112,0], "normal":[1,0,0], "relPosSource":["Fusion","Fusion","Fusion"]},
{"id":219, "name": "aruco_219", "position":[35,-219,0], "normal":[1,0,0], "relPosSource":["Fusion","Fusion","Fusion"]}
{"id":124, "position":[24.75, -112, -24.75],"normal":[1,0,-1]},
{"id":122, "name": "aruco_122", "position":[-35,-112,0], "normal":[-1,0,0]},
{"id":218, "name": "aruco_218", "position":[35,-112,0], "normal":[1,0,0]},
{"id":122, "name": "aruco_122", "position":[0, -182, 30],"normal":[0,0,1]},
{"id":101, "name": "aruco_122", "position":[ 24.75, -182, -24.75],"normal":[ 1,0,-1]},
{"id":102, "name": "aruco_122", "position":[-24.75, -182, -24.75],"normal":[-1,0,-1]},
{"id":124, "name": "aruco_124", "position":[-35,-219,0], "normal":[-1,0,0]},
{"id":219, "name": "aruco_219", "position":[35,-219,0], "normal":[1,0,0]}
]
},
@@ -383,6 +384,62 @@
"radius": 7,
"color": [0.95, 0.20, 0.20]
}
},
"FingerA": {
"parent": "Palm",
"size": [80, 60, 20],
"mountPosition": [0, 0, 0],
"mountRotation": [0, 0, 0],
"jointToParent": {
"name": "Slider",
"type": "linear",
"axis": [1, 0, 0],
"origin": [4, -35,0],
"rotation": [0, 0, 0],
"variable": "e"
},
"skeleton": {
"from": [0, 0,0],
"to": [0, -60, 0],
"radius": 4,
"color": [0.20, 0.80, 0.20]
},
"model": [
{
"stlFile": "surfaces/Finger.stl",
"originOfModel": [24,0,-9.1],
"rotationOfModelDegree": [90, -90,0],
"material": "defaultPlastic"
}
]
},
"FingerB": {
"parent": "Palm",
"size": [80, 60, 20],
"mountPosition": [0, 0, 0],
"mountRotation": [0, 0, 0],
"jointToParent": {
"name": "Slider",
"type": "linear",
"axis": [-1, 0, 0],
"origin": [-4, -35,0],
"rotation": [0, 0, 0],
"variable": "e"
},
"skeleton": {
"from": [0, 0,0],
"to": [0, -60, 0],
"radius": 4,
"color": [0.20, 0.80, 0.20]
},
"model": [
{
"stlFile": "surfaces/Finger.stl",
"originOfModel": [-24,0,9.1],
"rotationOfModelDegree": [90, 90,0],
"material": "defaultPlastic"
}
]
}
}
}

BIN
surfaces/Finger.stl Normal file

Binary file not shown.