From d51b3ecafa44ac6c76e693b0de32d221f416d58a Mon Sep 17 00:00:00 2001 From: Logic Date: Sun, 5 Apr 2026 00:39:04 +0800 Subject: [PATCH] docs: finalize phase1 report and launch phase2 attnres suite --- .../final_report.md | 63 +++++++++++++++++++ .../2026-04-04-imf-horizon-grid/status.json | 42 +++++++------ .../manifest.json | 20 ++++++ .../notes.md | 9 +++ .../status.json | 32 ++++++++++ 5 files changed, 146 insertions(+), 20 deletions(-) create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/final_report.md create mode 100644 experiment_suites/2026-04-05-full-attnres-vision-phase2/manifest.json create mode 100644 experiment_suites/2026-04-05-full-attnres-vision-phase2/notes.md create mode 100644 experiment_suites/2026-04-05-full-attnres-vision-phase2/status.json diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/final_report.md b/experiment_suites/2026-04-04-imf-horizon-grid/final_report.md new file mode 100644 index 0000000..de0a215 --- /dev/null +++ b/experiment_suites/2026-04-04-imf-horizon-grid/final_report.md @@ -0,0 +1,63 @@ +# Phase-1 Final Report and Phase-2 Handoff + +- Finalized: 2026-04-05 00:34:20 CST +- Scope: IMF AttnRes policy horizon/action-step grid on `sim_transfer` +- Fixed setup: `n_emb=384`, `n_layer=12`, batch size `80`, learning rate `2.5e-4`, `max_steps=50k`, rollout every 5 epochs with 5 episodes, 3 cameras `[r_vis, top, front]`. +- Main metric: `checkpoints/vla_model_best.pt` 中记录的训练期最大 `rollout_avg_reward`。 + +## Final leaderboard + +| Rank | Run ID | pred_horizon | executed action steps | Best avg_reward | Best step | Final loss | +|---:|---|---:|---:|---:|---:|---:| +| 1 | `ph16_ex8` | 16 | 8 | **610.8** | 21874 | 0.0034 | +| 2 | `ph16_ex16` | 16 | 16 | 561.2 | 48124 | 0.0045 | +| 3 | `ph32_ex32` | 32 | 32 | 513.2 | 43749 | 0.0040 | +| 4 | `ph8_ex8` | 8 | 8 | 415.6 | 48124 | 0.0070 | +| 5 | `ph32_ex8` | 32 | 8 | 361.6 | 43749 | 0.0048 | +| 6 | `ph32_ex16` | 32 | 16 | 239.6 | 48124 | 0.0038 | + +## Final conclusions + +1. **最佳组合是 `pred_horizon=16` + `num_action_steps=8`**,最佳平均奖励为 **610.8**,出现在 **step 21874**。 +2. 在 `pred_horizon=16` 下,执行 8 步优于执行 16 步,优势约 **+8.8%**(610.8 vs 561.2)。 +3. `pred_horizon=32` 时,对执行步长非常敏感:`32/32` 明显优于 `32/8` 和 `32/16`;特别是 `32/16` 退化最明显。 +4. 更长的预测窗口并不会自动带来更高 reward;**预测窗口与实际执行窗口的匹配关系** 是关键。 +5. 最佳 checkpoint 并不在训练结束时出现,而是在 50k 训练中较早的 **21.9k step** 出现,说明 rollout 验证比仅看 train loss 更重要。 +6. 因而 Phase-2 的比较基线固定为 **`ph16_ex8`**。 + +## Recommended baseline for follow-up experiments + +- Baseline run: `ph16_ex8` +- Baseline best checkpoint: `step 21874` +- Baseline best avg_reward: `610.8` +- Baseline run dir: `/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223` + +## Phase-2 target: full-AttnRes vision backbone + +本阶段按你的要求,不再只是 IMF head 中使用 AttnRes,而是把**之前视觉 ResNet 主干中的残差单元全部替换为 AttnRes 残差单元**。当前实现保留了 ResNet 风格的 stage / downsample 宏观结构,但视觉残差 trunk 已切换到 AttnRes: + +- implementation: `roboimi/vla/models/backbones/attnres_resnet2d.py` +- wiring: `roboimi/vla/models/backbones/resnet_diffusion.py` +- config: `roboimi/vla/conf/backbone/resnet_diffusion.yaml` + +相关代码已提交: + +- `a780068` — headless rollout 修复 + Phase-1 汇总 +- `2033169` — full-AttnRes vision backbone + +## Phase-2 launch status (observed on 2026-04-05 00:36 CST) + +- Run: `imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424` +- Host: `100.119.99.14`, GPU `3` +- Config anchor: `pred_horizon=16`, `num_action_steps=8` +- Vision backbone: `attnres_resnet` +- Because batch size `80` OOMed on both local 5090 and remote L20, Phase-2 currently uses: + - batch size: `40` + - learning rate: `1.25e-4` +- Latest confirmed progress: **step 1300** +- First rollout has **not happened yet** at this observation point. +- SwanLab: https://swanlab.cn/@game-loader/roboimi-vla/runs/xy7fjdmn0stdr19eu3gub + +## Next action + +继续监控 Phase-2 full-AttnRes 训练,待其完成后直接与 Phase-1 baseline `610.8` 做对比,判断“视觉主干全部替换为 AttnRes”是否优于“仅 IMF 中使用 AttnRes”。 diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/status.json b/experiment_suites/2026-04-04-imf-horizon-grid/status.json index 3034e43..e7f0bc8 100644 --- a/experiment_suites/2026-04-04-imf-horizon-grid/status.json +++ b/experiment_suites/2026-04-04-imf-horizon-grid/status.json @@ -1,6 +1,6 @@ { "suite_name": "2026-04-04-imf-horizon-grid", - "updated_at": "2026-04-05 00:07:39", + "updated_at": "2026-04-05 00:34:20", "phase": "phase1_completed", "provisioning": { "100.119.99.14": { @@ -17,7 +17,7 @@ }, "runs": { "ph8_ex8": { - "status": "running", + "status": "finished", "host": "100.73.14.65", "gpu": 0, "run_name": "imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223", @@ -30,15 +30,15 @@ "pid": 938714, "launch_log": "experiment_suite_launch_logs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223.restartfix-20260404-143827.log", "latest_step": 50000, - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/i5syc57b6zq7rbkrtqy7b", - "process_running": true, + "process_running": false, "best_step": 48124, "best_rollout_avg_reward": 415.6, "final_loss": 0.007008877582848072 }, "ph16_ex8": { - "status": "running", + "status": "finished", "host": "100.73.14.65", "gpu": 1, "run_name": "imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223", @@ -51,15 +51,15 @@ "pid": 938717, "launch_log": "experiment_suite_launch_logs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223.restartfix-20260404-143827.log", "latest_step": 50000, - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/4rusbrpfxmw4ffii1ul5w", - "process_running": true, + "process_running": false, "best_step": 21874, "best_rollout_avg_reward": 610.8, "final_loss": 0.0034315965604037046 }, "ph16_ex16": { - "status": "running", + "status": "finished", "host": "100.119.99.14", "gpu": 0, "run_name": "imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223", @@ -71,16 +71,16 @@ "num_action_steps": 16, "pid": 90169, "launch_log": "experiment_suite_launch_logs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223.restartfix-20260404-143827.log", - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "latest_step": 50000, "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/wwm232k6190gexnze8mg6", - "process_running": true, + "process_running": false, "best_step": 48124, "best_rollout_avg_reward": 561.2, "final_loss": 0.004544622730463743 }, "ph32_ex8": { - "status": "running", + "status": "finished", "host": "100.119.99.14", "gpu": 1, "run_name": "imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223", @@ -92,16 +92,16 @@ "num_action_steps": 8, "pid": 90173, "launch_log": "experiment_suite_launch_logs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223.restartfix-20260404-143827.log", - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "latest_step": 50000, "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/o5y2xjb2rsb3lmfcuhy4p", - "process_running": true, + "process_running": false, "best_step": 43749, "best_rollout_avg_reward": 361.6, "final_loss": 0.004788532387465239 }, "ph32_ex16": { - "status": "running", + "status": "finished", "host": "100.119.99.14", "gpu": 2, "run_name": "imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223", @@ -113,10 +113,10 @@ "num_action_steps": 16, "pid": 90175, "launch_log": "experiment_suite_launch_logs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223.restartfix-20260404-143827.log", - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "latest_step": 50000, "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/54cjpgba9eqsopdm0l8d3", - "process_running": true, + "process_running": false, "best_step": 48124, "best_rollout_avg_reward": 239.6, "final_loss": 0.0038348555099219084 @@ -135,7 +135,7 @@ "pid": 1437836, "launch_log": "experiment_suites/2026-04-04-imf-horizon-grid/launch_logs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223.launch.log", "latest_step": 49900, - "latest_log_sync": "2026-04-05 00:07:39", + "latest_log_sync": "2026-04-05 00:34:20", "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/ajs2m218jd260hawhy5ns", "process_running": false, "latest_rollout_avg_reward": 513.2, @@ -145,10 +145,12 @@ } }, "monitor": { - "state": "running", - "pid_local": 1443268, + "state": "stopped", + "pid_local": null, "log_path": "experiment_suites/2026-04-04-imf-horizon-grid/monitor_logs/status-sync-20260404-131223.log", - "interval_seconds": 300 + "interval_seconds": 300, + "stopped_at": "2026-04-05 00:34:20", + "stop_reason": "phase1 suite finalized after all six runs completed" }, "debug": { "remote_rollout_failure_20260404": { diff --git a/experiment_suites/2026-04-05-full-attnres-vision-phase2/manifest.json b/experiment_suites/2026-04-05-full-attnres-vision-phase2/manifest.json new file mode 100644 index 0000000..2be19e9 --- /dev/null +++ b/experiment_suites/2026-04-05-full-attnres-vision-phase2/manifest.json @@ -0,0 +1,20 @@ +{ + "suite_name": "2026-04-05-full-attnres-vision-phase2", + "created_at": "2026-04-05 00:12:14", + "phase": "phase2_running", + "baseline_reference": { + "run_id": "ph16_ex8", + "best_rollout_avg_reward": 610.8, + "best_step": 21874, + "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223" + }, + "candidate": { + "run_name": "imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-ms50k-20260405-001214", + "host": "local", + "gpu": 0, + "pred_horizon": 16, + "num_action_steps": 8, + "vision_backbone_mode": "attnres_resnet", + "notes": "Full-AttnRes vision backbone replacing ResNet residual units; IMF head unchanged." + } +} diff --git a/experiment_suites/2026-04-05-full-attnres-vision-phase2/notes.md b/experiment_suites/2026-04-05-full-attnres-vision-phase2/notes.md new file mode 100644 index 0000000..d6bb4dd --- /dev/null +++ b/experiment_suites/2026-04-05-full-attnres-vision-phase2/notes.md @@ -0,0 +1,9 @@ +# Full-AttnRes Vision Phase-2 + +- Created: 2026-04-05 00:12:14 +- Baseline reference: ph16_ex8 best avg_reward=610.8 +- Candidate run: imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-ms50k-20260405-001214 +- 2026-04-05 00:23:03: batch=80 OOM on both 5090 and L20; using validated fallback batch=40, lr=1.25e-4 on remote L20 GPU3. +- 2026-04-05 00:24:24: launching candidate imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424 on 100.119.99.14 GPU3 with batch=40 lr=1.25e-4. +- 2026-04-05 00:27:17: remote phase2 run is active on 100.119.99.14 GPU3, validated at least to step 200. SwanLab: https://swanlab.cn/@game-loader/roboimi-vla/runs/xy7fjdmn0stdr19eu3gub +- 2026-04-05 00:36:54: latest confirmed progress is step 1300 on 100.119.99.14 GPU3; first rollout not reached yet. diff --git a/experiment_suites/2026-04-05-full-attnres-vision-phase2/status.json b/experiment_suites/2026-04-05-full-attnres-vision-phase2/status.json new file mode 100644 index 0000000..18b855f --- /dev/null +++ b/experiment_suites/2026-04-05-full-attnres-vision-phase2/status.json @@ -0,0 +1,32 @@ +{ + "suite_name": "2026-04-05-full-attnres-vision-phase2", + "updated_at": "2026-04-05 00:36:54", + "phase": "phase2_running", + "baseline_reference": { + "run_id": "ph16_ex8", + "best_rollout_avg_reward": 610.8, + "best_step": 21874, + "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223" + }, + "candidate": { + "run_name": "imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424", + "host": "100.119.99.14", + "gpu": 3, + "pred_horizon": 16, + "num_action_steps": 8, + "vision_backbone_mode": "attnres_resnet", + "notes": "Full-AttnRes vision backbone replacing ResNet residual units; IMF head unchanged.", + "status": "running", + "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424", + "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424/train_vla.log", + "pid": 151187, + "batch_size": 40, + "lr": 0.000125, + "num_workers": 12, + "launch_log": "/home/droid/roboimi_suite_20260404/experiment_suite_launch_logs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424.launch.log", + "note": "Local 5090 and remote L20 both OOM at batch=80; switched to batch=40 and linearly scaled lr to 1.25e-4 after smoke validation on L20.", + "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/xy7fjdmn0stdr19eu3gub", + "latest_step": 1300, + "latest_log_sync": "2026-04-05 00:36:54" + } +}