Claude: über nacht arbeiten. Pipeline verbessern

This commit is contained in:
chk
2026-06-02 06:04:41 +02:00
parent e5b41e9110
commit 30e97eb4bd
252 changed files with 125356 additions and 52694 deletions

View File

@@ -0,0 +1,41 @@
scene,method,mean_abs_deg,max_abs_deg,mean_abs_mm,max_abs_mm
Scene11,sequential_vector,0.21615071495813823,0.8314190755566813,0.008262698768396692,0.013081401253181468
Scene11,sequential_fk,0.38749059657140494,0.7435712942532007,0.15624107163785528,0.2994007420225291
Scene11,global_ba,0.24076193032735205,0.7758696886111238,0.05602993259134115,0.060046841579868726
Scene11,hybrid,0.24076193032735205,0.7758696886111238,0.05602993259134115,0.060046841579868726
Scene12,sequential_vector,0.1394358162818984,0.2923307824906374,0.05495893424309628,0.05495893424309628
Scene12,sequential_fk,0.2887046847064975,0.700084532189635,0.05495893424309628,0.05495893424309628
Scene12,global_ba,0.23421405631437628,0.6623398298570464,0.06225340961087511,0.06225340961087511
Scene12,hybrid,0.23421405631437628,0.6623398298570464,0.06225340961087511,0.06225340961087511
Scene4,sequential_vector,0.1689903806559073,0.37177692028527076,0.12196674018700904,0.12203581131754504
Scene4,sequential_fk,0.2052561066616761,0.40043968827325216,0.20664534471432816,0.2912548781111113
Scene4,global_ba,0.12764613973571387,0.21003754767065175,0.004541214773356117,0.007785849528890054
Scene4,hybrid,0.12764613973571387,0.21003754767065175,0.004541214773356117,0.007785849528890054
Scene5,sequential_vector,0.24751400376691776,0.5125580870892463,0.09353406132425324,0.1507674786526909
Scene5,sequential_fk,0.37528621184521854,0.6967000968706998,0.12625134900152357,0.21620205400723158
Scene5,global_ba,0.09060360042167304,0.1434928216381195,0.014419220965278257,0.020356040652554697
Scene5,hybrid,0.09060360042167304,0.1434928216381195,0.014419220965278257,0.020356040652554697
Scene6,sequential_vector,0.3720210667917684,0.7648573568365009,0.3613204413461384,0.712110322655052
Scene6,sequential_fk,0.7316454589650221,1.7237275472800775,0.4305546445989883,0.8505787291607518
Scene6,global_ba,0.3541493232707921,1.1662562506099334,0.14890611576813906,0.2013600383222438
Scene6,hybrid,0.3541493232707921,1.1662562506099334,0.14890611576813906,0.2013600383222438
Scene7,sequential_vector,0.5815188036541258,1.7172960028987632,0.17909586175084868,0.313891394909561
Scene7,sequential_fk,0.6459864657291178,1.3044728301107398,0.05085403969021218,0.05740775078828797
Scene7,global_ba,0.5322101800604117,1.5679743083230164,0.32619525048234,0.39010299334816345
Scene7,hybrid,0.5322101800604117,1.5679743083230164,0.32619525048234,0.39010299334816345
Scene8,sequential_vector,0.2597533931518797,0.6380771626184014,0.029322037687468328,0.032765746060491985
Scene8,sequential_fk,0.45929803469902597,0.9239031754049449,0.05204031166694967,0.07820229401945467
Scene8,global_ba,0.3073474311985535,0.6645055768230179,0.13313561563484289,0.19702884143382704
Scene8,hybrid,0.3073474311985535,0.6645055768230179,0.13313561563484289,0.19702884143382704
Scene9,sequential_vector,0.3923356692953973,1.4948777416184953,0.14356583700798886,0.2574687935528708
Scene9,sequential_fk,0.5281710000926637,1.3165046155579603,0.2919599619770281,0.5542570434909493
Scene9,global_ba,0.13197745617665077,0.19531145821179052,0.09420136800160872,0.11355612175911745
Scene9,hybrid,0.13197745617665077,0.19531145821179052,0.09420136800160872,0.11355612175911745
Scene9a,sequential_vector,0.34522769828549826,1.0414123806210682,0.2061424863522392,0.38298669630472126
Scene9a,sequential_fk,0.5338944077748919,1.8379242756954568,0.17538586419238023,0.3214734519850033
Scene9a,global_ba,0.380374450420976,1.1839749264013335,0.1337460792771763,0.145277731665594
Scene9a,hybrid,0.380374450420976,1.1839749264013335,0.1337460792771763,0.145277731665594
Scene9b,sequential_vector,0.4318049021823242,0.7961252335628615,0.24164404042724819,0.4507708686551588
Scene9b,sequential_fk,0.1849035221198619,0.44040456936312466,0.035433954208512475,0.038350696217687386
Scene9b,global_ba,0.13080989543460078,0.3566984466850158,0.05474286758364233,0.06138719680519422
Scene9b,hybrid,0.13080989543460078,0.3566984466850158,0.05474286758364233,0.06138719680519422
1 scene method mean_abs_deg max_abs_deg mean_abs_mm max_abs_mm
2 Scene11 sequential_vector 0.21615071495813823 0.8314190755566813 0.008262698768396692 0.013081401253181468
3 Scene11 sequential_fk 0.38749059657140494 0.7435712942532007 0.15624107163785528 0.2994007420225291
4 Scene11 global_ba 0.24076193032735205 0.7758696886111238 0.05602993259134115 0.060046841579868726
5 Scene11 hybrid 0.24076193032735205 0.7758696886111238 0.05602993259134115 0.060046841579868726
6 Scene12 sequential_vector 0.1394358162818984 0.2923307824906374 0.05495893424309628 0.05495893424309628
7 Scene12 sequential_fk 0.2887046847064975 0.700084532189635 0.05495893424309628 0.05495893424309628
8 Scene12 global_ba 0.23421405631437628 0.6623398298570464 0.06225340961087511 0.06225340961087511
9 Scene12 hybrid 0.23421405631437628 0.6623398298570464 0.06225340961087511 0.06225340961087511
10 Scene4 sequential_vector 0.1689903806559073 0.37177692028527076 0.12196674018700904 0.12203581131754504
11 Scene4 sequential_fk 0.2052561066616761 0.40043968827325216 0.20664534471432816 0.2912548781111113
12 Scene4 global_ba 0.12764613973571387 0.21003754767065175 0.004541214773356117 0.007785849528890054
13 Scene4 hybrid 0.12764613973571387 0.21003754767065175 0.004541214773356117 0.007785849528890054
14 Scene5 sequential_vector 0.24751400376691776 0.5125580870892463 0.09353406132425324 0.1507674786526909
15 Scene5 sequential_fk 0.37528621184521854 0.6967000968706998 0.12625134900152357 0.21620205400723158
16 Scene5 global_ba 0.09060360042167304 0.1434928216381195 0.014419220965278257 0.020356040652554697
17 Scene5 hybrid 0.09060360042167304 0.1434928216381195 0.014419220965278257 0.020356040652554697
18 Scene6 sequential_vector 0.3720210667917684 0.7648573568365009 0.3613204413461384 0.712110322655052
19 Scene6 sequential_fk 0.7316454589650221 1.7237275472800775 0.4305546445989883 0.8505787291607518
20 Scene6 global_ba 0.3541493232707921 1.1662562506099334 0.14890611576813906 0.2013600383222438
21 Scene6 hybrid 0.3541493232707921 1.1662562506099334 0.14890611576813906 0.2013600383222438
22 Scene7 sequential_vector 0.5815188036541258 1.7172960028987632 0.17909586175084868 0.313891394909561
23 Scene7 sequential_fk 0.6459864657291178 1.3044728301107398 0.05085403969021218 0.05740775078828797
24 Scene7 global_ba 0.5322101800604117 1.5679743083230164 0.32619525048234 0.39010299334816345
25 Scene7 hybrid 0.5322101800604117 1.5679743083230164 0.32619525048234 0.39010299334816345
26 Scene8 sequential_vector 0.2597533931518797 0.6380771626184014 0.029322037687468328 0.032765746060491985
27 Scene8 sequential_fk 0.45929803469902597 0.9239031754049449 0.05204031166694967 0.07820229401945467
28 Scene8 global_ba 0.3073474311985535 0.6645055768230179 0.13313561563484289 0.19702884143382704
29 Scene8 hybrid 0.3073474311985535 0.6645055768230179 0.13313561563484289 0.19702884143382704
30 Scene9 sequential_vector 0.3923356692953973 1.4948777416184953 0.14356583700798886 0.2574687935528708
31 Scene9 sequential_fk 0.5281710000926637 1.3165046155579603 0.2919599619770281 0.5542570434909493
32 Scene9 global_ba 0.13197745617665077 0.19531145821179052 0.09420136800160872 0.11355612175911745
33 Scene9 hybrid 0.13197745617665077 0.19531145821179052 0.09420136800160872 0.11355612175911745
34 Scene9a sequential_vector 0.34522769828549826 1.0414123806210682 0.2061424863522392 0.38298669630472126
35 Scene9a sequential_fk 0.5338944077748919 1.8379242756954568 0.17538586419238023 0.3214734519850033
36 Scene9a global_ba 0.380374450420976 1.1839749264013335 0.1337460792771763 0.145277731665594
37 Scene9a hybrid 0.380374450420976 1.1839749264013335 0.1337460792771763 0.145277731665594
38 Scene9b sequential_vector 0.4318049021823242 0.7961252335628615 0.24164404042724819 0.4507708686551588
39 Scene9b sequential_fk 0.1849035221198619 0.44040456936312466 0.035433954208512475 0.038350696217687386
40 Scene9b global_ba 0.13080989543460078 0.3566984466850158 0.05474286758364233 0.06138719680519422
41 Scene9b hybrid 0.13080989543460078 0.3566984466850158 0.05474286758364233 0.06138719680519422

View File

@@ -0,0 +1,339 @@
{
"observation": "corner_pose",
"matrix": {
"Scene11": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.21615071495813823,
"max_abs_deg": 0.8314190755566813,
"mean_abs_mm": 0.008262698768396692,
"max_abs_mm": 0.013081401253181468
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.38749059657140494,
"max_abs_deg": 0.7435712942532007,
"mean_abs_mm": 0.15624107163785528,
"max_abs_mm": 0.2994007420225291
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.24076193032735205,
"max_abs_deg": 0.7758696886111238,
"mean_abs_mm": 0.05602993259134115,
"max_abs_mm": 0.060046841579868726
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.24076193032735205,
"max_abs_deg": 0.7758696886111238,
"mean_abs_mm": 0.05602993259134115,
"max_abs_mm": 0.060046841579868726
}
},
"Scene12": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.1394358162818984,
"max_abs_deg": 0.2923307824906374,
"mean_abs_mm": 0.05495893424309628,
"max_abs_mm": 0.05495893424309628
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.2887046847064975,
"max_abs_deg": 0.700084532189635,
"mean_abs_mm": 0.05495893424309628,
"max_abs_mm": 0.05495893424309628
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.23421405631437628,
"max_abs_deg": 0.6623398298570464,
"mean_abs_mm": 0.06225340961087511,
"max_abs_mm": 0.06225340961087511
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.23421405631437628,
"max_abs_deg": 0.6623398298570464,
"mean_abs_mm": 0.06225340961087511,
"max_abs_mm": 0.06225340961087511
}
},
"Scene4": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.1689903806559073,
"max_abs_deg": 0.37177692028527076,
"mean_abs_mm": 0.12196674018700904,
"max_abs_mm": 0.12203581131754504
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.2052561066616761,
"max_abs_deg": 0.40043968827325216,
"mean_abs_mm": 0.20664534471432816,
"max_abs_mm": 0.2912548781111113
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.12764613973571387,
"max_abs_deg": 0.21003754767065175,
"mean_abs_mm": 0.004541214773356117,
"max_abs_mm": 0.007785849528890054
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.12764613973571387,
"max_abs_deg": 0.21003754767065175,
"mean_abs_mm": 0.004541214773356117,
"max_abs_mm": 0.007785849528890054
}
},
"Scene5": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.24751400376691776,
"max_abs_deg": 0.5125580870892463,
"mean_abs_mm": 0.09353406132425324,
"max_abs_mm": 0.1507674786526909
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.37528621184521854,
"max_abs_deg": 0.6967000968706998,
"mean_abs_mm": 0.12625134900152357,
"max_abs_mm": 0.21620205400723158
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.09060360042167304,
"max_abs_deg": 0.1434928216381195,
"mean_abs_mm": 0.014419220965278257,
"max_abs_mm": 0.020356040652554697
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.09060360042167304,
"max_abs_deg": 0.1434928216381195,
"mean_abs_mm": 0.014419220965278257,
"max_abs_mm": 0.020356040652554697
}
},
"Scene6": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.3720210667917684,
"max_abs_deg": 0.7648573568365009,
"mean_abs_mm": 0.3613204413461384,
"max_abs_mm": 0.712110322655052
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.7316454589650221,
"max_abs_deg": 1.7237275472800775,
"mean_abs_mm": 0.4305546445989883,
"max_abs_mm": 0.8505787291607518
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.3541493232707921,
"max_abs_deg": 1.1662562506099334,
"mean_abs_mm": 0.14890611576813906,
"max_abs_mm": 0.2013600383222438
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.3541493232707921,
"max_abs_deg": 1.1662562506099334,
"mean_abs_mm": 0.14890611576813906,
"max_abs_mm": 0.2013600383222438
}
},
"Scene7": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.5815188036541258,
"max_abs_deg": 1.7172960028987632,
"mean_abs_mm": 0.17909586175084868,
"max_abs_mm": 0.313891394909561
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.6459864657291178,
"max_abs_deg": 1.3044728301107398,
"mean_abs_mm": 0.05085403969021218,
"max_abs_mm": 0.05740775078828797
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.5322101800604117,
"max_abs_deg": 1.5679743083230164,
"mean_abs_mm": 0.32619525048234,
"max_abs_mm": 0.39010299334816345
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.5322101800604117,
"max_abs_deg": 1.5679743083230164,
"mean_abs_mm": 0.32619525048234,
"max_abs_mm": 0.39010299334816345
}
},
"Scene8": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.2597533931518797,
"max_abs_deg": 0.6380771626184014,
"mean_abs_mm": 0.029322037687468328,
"max_abs_mm": 0.032765746060491985
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.45929803469902597,
"max_abs_deg": 0.9239031754049449,
"mean_abs_mm": 0.05204031166694967,
"max_abs_mm": 0.07820229401945467
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.3073474311985535,
"max_abs_deg": 0.6645055768230179,
"mean_abs_mm": 0.13313561563484289,
"max_abs_mm": 0.19702884143382704
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.3073474311985535,
"max_abs_deg": 0.6645055768230179,
"mean_abs_mm": 0.13313561563484289,
"max_abs_mm": 0.19702884143382704
}
},
"Scene9": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.3923356692953973,
"max_abs_deg": 1.4948777416184953,
"mean_abs_mm": 0.14356583700798886,
"max_abs_mm": 0.2574687935528708
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.5281710000926637,
"max_abs_deg": 1.3165046155579603,
"mean_abs_mm": 0.2919599619770281,
"max_abs_mm": 0.5542570434909493
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.13197745617665077,
"max_abs_deg": 0.19531145821179052,
"mean_abs_mm": 0.09420136800160872,
"max_abs_mm": 0.11355612175911745
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.13197745617665077,
"max_abs_deg": 0.19531145821179052,
"mean_abs_mm": 0.09420136800160872,
"max_abs_mm": 0.11355612175911745
}
},
"Scene9a": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.34522769828549826,
"max_abs_deg": 1.0414123806210682,
"mean_abs_mm": 0.2061424863522392,
"max_abs_mm": 0.38298669630472126
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.5338944077748919,
"max_abs_deg": 1.8379242756954568,
"mean_abs_mm": 0.17538586419238023,
"max_abs_mm": 0.3214734519850033
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.380374450420976,
"max_abs_deg": 1.1839749264013335,
"mean_abs_mm": 0.1337460792771763,
"max_abs_mm": 0.145277731665594
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.380374450420976,
"max_abs_deg": 1.1839749264013335,
"mean_abs_mm": 0.1337460792771763,
"max_abs_mm": 0.145277731665594
}
},
"Scene9b": {
"sequential_vector": {
"n_joints": 7,
"mean_abs_deg": 0.4318049021823242,
"max_abs_deg": 0.7961252335628615,
"mean_abs_mm": 0.24164404042724819,
"max_abs_mm": 0.4507708686551588
},
"sequential_fk": {
"n_joints": 7,
"mean_abs_deg": 0.1849035221198619,
"max_abs_deg": 0.44040456936312466,
"mean_abs_mm": 0.035433954208512475,
"max_abs_mm": 0.038350696217687386
},
"global_ba": {
"n_joints": 7,
"mean_abs_deg": 0.13080989543460078,
"max_abs_deg": 0.3566984466850158,
"mean_abs_mm": 0.05474286758364233,
"max_abs_mm": 0.06138719680519422
},
"hybrid": {
"n_joints": 7,
"mean_abs_deg": 0.13080989543460078,
"max_abs_deg": 0.3566984466850158,
"mean_abs_mm": 0.05474286758364233,
"max_abs_mm": 0.06138719680519422
}
}
},
"aggregate": {
"sequential_vector": {
"mean_deg": 0.31547524490238554,
"std_deg": 0.1281946335848945,
"worst_deg": 1.7172960028987632,
"mean_mm": 0.1439813139094687,
"worst_mm": 0.712110322655052,
"n_scenes": 10
},
"sequential_fk": {
"mean_deg": 0.43406364891653804,
"std_deg": 0.17149696414532492,
"worst_deg": 1.8379242756954568,
"mean_mm": 0.15803254759308744,
"worst_mm": 0.8505787291607518,
"n_scenes": 10
},
"global_ba": {
"mean_deg": 0.25300944633611,
"std_deg": 0.13398997870322718,
"worst_deg": 1.5679743083230164,
"mean_mm": 0.10281710746886,
"worst_mm": 0.39010299334816345,
"n_scenes": 10
},
"hybrid": {
"mean_deg": 0.25300944633611,
"std_deg": 0.13398997870322718,
"worst_deg": 1.5679743083230164,
"mean_mm": 0.10281710746886,
"worst_mm": 0.39010299334816345,
"n_scenes": 10
}
}
}

116
benchmark/eval_pose.py Normal file
View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
eval_pose.py
============
Compare estimated joint angles (robot_state.json) against ground truth
(simulation/SceneX/pose.json -> "position").
Per-joint error:
revolute (y,z,a,b,c): angular error in degrees, wrap-aware (179 vs -179 = 2deg)
linear (x,e): error in millimetres
Prints a table and optionally writes a JSON summary. Returns nonzero if any
observable joint exceeds a tolerance (for scripted regression checks).
"""
from __future__ import annotations
import argparse
import json
import sys
from typing import Any, Dict
LINEAR = {"x", "e"}
JOINTS = ["x", "y", "z", "a", "b", "c", "e"]
def load_estimate(path: str) -> Dict[str, Dict[str, Any]]:
d = json.load(open(path, "r", encoding="utf-8"))
mv = d.get("movements", {}) or {}
out: Dict[str, Dict[str, Any]] = {}
for v in JOINTS:
e = mv.get(v, {})
# tolerate several historical schemas
val = e.get("value", e.get("value_mm", e.get("value_deg")))
out[v] = {
"value": float(val) if val is not None else 0.0,
"observable": bool(e.get("observable", True)),
"n_markers": int(e.get("n_markers", -1)),
}
return out
def load_gt(path: str) -> Dict[str, float]:
d = json.load(open(path, "r", encoding="utf-8"))
pos = d.get("position", d)
return {v: float(pos[v]) for v in JOINTS if v in pos}
def joint_error(v: str, est: float, gt: float) -> float:
if v in LINEAR:
return abs(est - gt)
return abs(((est - gt + 180.0) % 360.0) - 180.0)
def evaluate(estimate_path: str, gt_path: str) -> Dict[str, Any]:
est = load_estimate(estimate_path)
gt = load_gt(gt_path)
rows = []
ang_errs, lin_errs = [], []
for v in JOINTS:
if v not in gt:
continue
e = est.get(v, {"value": 0.0, "observable": False, "n_markers": -1})
err = joint_error(v, e["value"], gt[v])
unit = "mm" if v in LINEAR else "deg"
rows.append({"joint": v, "estimate": e["value"], "gt": gt[v], "error": err,
"unit": unit, "observable": e["observable"], "n_markers": e["n_markers"]})
if e["observable"]:
(lin_errs if v in LINEAR else ang_errs).append(err)
summary = {
"n_joints": len(rows),
"mean_abs_deg": (sum(ang_errs) / len(ang_errs)) if ang_errs else None,
"max_abs_deg": max(ang_errs) if ang_errs else None,
"mean_abs_mm": (sum(lin_errs) / len(lin_errs)) if lin_errs else None,
"max_abs_mm": max(lin_errs) if lin_errs else None,
}
return {"rows": rows, "summary": summary}
def main() -> int:
ap = argparse.ArgumentParser(description="Evaluate estimated joint angles vs ground truth")
ap.add_argument("estimate", help="robot_state.json")
ap.add_argument("gt", help="simulation/SceneX/pose.json")
ap.add_argument("--out", default=None)
ap.add_argument("--tolDeg", type=float, default=2.0)
ap.add_argument("--tolMm", type=float, default=3.0)
args = ap.parse_args()
res = evaluate(args.estimate, args.gt)
print(f"{'joint':>6} | {'est':>9} | {'gt':>9} | {'error':>9} | obs | nMk")
print("-" * 58)
worst = 0.0
for r in res["rows"]:
flag = " " if r["observable"] else "U"
print(f"{r['joint']:>6} | {r['estimate']:9.2f} | {r['gt']:9.2f} | "
f"{r['error']:7.2f}{r['unit']:>2} | {flag:>3} | {r['n_markers']:>3}")
s = res["summary"]
print("-" * 58)
md = f"{s['mean_abs_deg']:.2f}" if s["mean_abs_deg"] is not None else "-"
xd = f"{s['max_abs_deg']:.2f}" if s["max_abs_deg"] is not None else "-"
mm = f"{s['mean_abs_mm']:.2f}" if s["mean_abs_mm"] is not None else "-"
xm = f"{s['max_abs_mm']:.2f}" if s["max_abs_mm"] is not None else "-"
print(f"angles: mean {md}deg / max {xd}deg | linear: mean {mm}mm / max {xm}mm")
if args.out:
json.dump(res, open(args.out, "w", encoding="utf-8"), indent=2)
print(f"[INFO] wrote {args.out}")
over = [r for r in res["rows"] if r["observable"] and
r["error"] > (args.tolMm if r["joint"] in LINEAR else args.tolDeg)]
return 1 if over else 0
if __name__ == "__main__":
sys.exit(main())

183
benchmark/run_benchmark.py Normal file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
"""
run_benchmark.py
================
Pose-estimation benchmark over many scenes x methods against ground truth.
For each scene it ensures corner marker poses exist (pipeline step 3b), then
runs each pose-estimation method and compares the estimated joint angles to
simulation/SceneX/pose.json. Produces a matrix (mean/max joint error) and a
per-method aggregate including the spread across scenes — the actual measure
of STABILITY.
Usage:
python benchmark/run_benchmark.py
python benchmark/run_benchmark.py --scenes 4 5 8 --methods hybrid global_ba
python benchmark/run_benchmark.py --observation center_point # aruco_positions_initial.json
"""
from __future__ import annotations
import argparse
import json
import os
import subprocess
import sys
from statistics import mean, pstdev
from typing import Any, Dict, List, Optional
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
PY = sys.executable
ALL_METHODS = ["sequential_vector", "sequential_fk", "global_ba", "hybrid"]
def run(cmd: List[str]) -> bool:
r = subprocess.run(cmd, cwd=ROOT, capture_output=True, text=True)
if r.returncode != 0:
print(f"[WARN] command failed: {' '.join(cmd)}\n{r.stderr.strip()[:300]}")
return False
return True
def discover_scenes() -> List[str]:
out = []
sim = os.path.join(ROOT, "data", "simulation")
for s in sorted(os.listdir(sim)):
if not s.startswith("Scene"):
continue
ev = os.path.join(ROOT, "data", "evaluations", s)
if os.path.exists(os.path.join(sim, s, "pose.json")) and os.path.isdir(ev):
import glob
if glob.glob(os.path.join(ev, "*_aruco_detection.json")):
out.append(s)
return out
def ensure_marker_poses(scene: str, robot_path: str) -> Optional[str]:
eval_dir = os.path.join(ROOT, "data", "evaluations", scene)
mp = os.path.join(eval_dir, "aruco_marker_poses.json")
if not os.path.exists(mp):
ok = run([PY, "pipeline/3b_corner_marker_poses.py", "--evalDir", eval_dir, "--robot", robot_path])
if not ok:
return None
return mp
def center_input(scene: str) -> Optional[str]:
p = os.path.join(ROOT, "data", "evaluations", scene, "aruco_positions_initial.json")
return p if os.path.exists(p) else None
def eval_one(scene: str, method: str, markers_path: str, robot_path: str) -> Optional[Dict[str, Any]]:
eval_dir = os.path.join(ROOT, "data", "evaluations", scene)
rs = os.path.join(eval_dir, f"bench_rs_{method}.json")
if not run([PY, "pipeline/pose_estimation.py", markers_path, "-robot", robot_path,
"--method", method, "-out", rs]):
return None
ev = os.path.join(eval_dir, f"bench_ev_{method}.json")
gt = os.path.join(ROOT, "data", "simulation", scene, "pose.json")
# eval_pose returns nonzero when over tolerance; that's fine, we read the JSON
run([PY, "benchmark/eval_pose.py", rs, gt, "--out", ev, "--tolDeg", "999", "--tolMm", "999"])
if not os.path.exists(ev):
return None
return json.load(open(ev, "r", encoding="utf-8"))
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--scenes", nargs="*", default=None, help="scene names or numbers (default: all ready)")
ap.add_argument("--methods", nargs="*", default=ALL_METHODS)
ap.add_argument("--robot", default=os.path.join(ROOT, "data", "robot", "robot.json"))
ap.add_argument("--observation", choices=["corner_pose", "center_point"], default="corner_pose")
ap.add_argument("--out", default=os.path.join(ROOT, "benchmark", "benchmark_results.json"))
ap.add_argument("--csv", default=os.path.join(ROOT, "benchmark", "benchmark_results.csv"))
args = ap.parse_args()
scenes = discover_scenes()
if args.scenes:
want = {s if s.startswith("Scene") else f"Scene{s}" for s in args.scenes}
scenes = [s for s in scenes if s in want]
print(f"[INFO] scenes: {scenes}")
print(f"[INFO] methods: {args.methods} | observation: {args.observation}\n")
matrix: Dict[str, Dict[str, Any]] = {}
for scene in scenes:
if args.observation == "corner_pose":
mpath = ensure_marker_poses(scene, args.robot)
else:
mpath = center_input(scene)
if not mpath:
print(f"[WARN] {scene}: no {args.observation} input, skipping")
continue
matrix[scene] = {}
for m in args.methods:
ev = eval_one(scene, m, mpath, args.robot)
matrix[scene][m] = ev["summary"] if ev else None
# ---- print matrix ----
def fmt(s, key):
return f"{s[key]:5.2f}" if (s and s.get(key) is not None) else " - "
print("\n" + "=" * 78)
print("POSE ERROR — mean angle [deg] (max) / mean linear [mm] (max)")
print("=" * 78)
header = f"{'scene':>8} | " + " | ".join(f"{m[:12]:>16}" for m in args.methods)
print(header)
print("-" * len(header))
for scene in scenes:
if scene not in matrix:
continue
cells = []
for m in args.methods:
s = matrix[scene][m]
if s:
cells.append(f"{fmt(s,'mean_abs_deg')}({fmt(s,'max_abs_deg').strip()})/{fmt(s,'mean_abs_mm').strip()}")
else:
cells.append(" FAIL ")
print(f"{scene:>8} | " + " | ".join(f"{c:>16}" for c in cells))
# ---- per-method aggregate (stability = spread across scenes) ----
print("\n" + "=" * 78)
print("PER-METHOD AGGREGATE across scenes (lower = better, std = instability)")
print("=" * 78)
print(f"{'method':>16} | {'mean deg':>9} | {'std deg':>8} | {'worst deg':>9} | {'mean mm':>8} | {'worst mm':>8}")
print("-" * 78)
agg: Dict[str, Any] = {}
for m in args.methods:
degs = [matrix[s][m]["mean_abs_deg"] for s in matrix
if matrix[s].get(m) and matrix[s][m].get("mean_abs_deg") is not None]
maxdegs = [matrix[s][m]["max_abs_deg"] for s in matrix
if matrix[s].get(m) and matrix[s][m].get("max_abs_deg") is not None]
mms = [matrix[s][m]["mean_abs_mm"] for s in matrix
if matrix[s].get(m) and matrix[s][m].get("mean_abs_mm") is not None]
maxmms = [matrix[s][m]["max_abs_mm"] for s in matrix
if matrix[s].get(m) and matrix[s][m].get("max_abs_mm") is not None]
a = {
"mean_deg": mean(degs) if degs else None,
"std_deg": pstdev(degs) if len(degs) > 1 else 0.0,
"worst_deg": max(maxdegs) if maxdegs else None,
"mean_mm": mean(mms) if mms else None,
"worst_mm": max(maxmms) if maxmms else None,
"n_scenes": len(degs),
}
agg[m] = a
def f(x):
return f"{x:9.3f}" if x is not None else " - "
print(f"{m:>16} | {f(a['mean_deg'])} | {a['std_deg']:8.3f} | {f(a['worst_deg'])} | "
f"{f(a['mean_mm'])} | {f(a['worst_mm'])}")
json.dump({"observation": args.observation, "matrix": matrix, "aggregate": agg},
open(args.out, "w", encoding="utf-8"), indent=2)
# CSV
with open(args.csv, "w", encoding="utf-8") as f:
f.write("scene,method,mean_abs_deg,max_abs_deg,mean_abs_mm,max_abs_mm\n")
for scene in matrix:
for m in args.methods:
s = matrix[scene].get(m)
if s:
f.write(f"{scene},{m},{s.get('mean_abs_deg','')},{s.get('max_abs_deg','')},"
f"{s.get('mean_abs_mm','')},{s.get('max_abs_mm','')}\n")
print(f"\n[INFO] wrote {args.out} and {args.csv}")
if __name__ == "__main__":
main()