docs: finalize phase1 report and launch phase2 attnres suite

This commit is contained in:
Logic
2026-04-05 00:39:04 +08:00
parent 2033169840
commit d51b3ecafa
5 changed files with 146 additions and 20 deletions

View File

@@ -0,0 +1,20 @@
{
"suite_name": "2026-04-05-full-attnres-vision-phase2",
"created_at": "2026-04-05 00:12:14",
"phase": "phase2_running",
"baseline_reference": {
"run_id": "ph16_ex8",
"best_rollout_avg_reward": 610.8,
"best_step": 21874,
"run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223"
},
"candidate": {
"run_name": "imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-ms50k-20260405-001214",
"host": "local",
"gpu": 0,
"pred_horizon": 16,
"num_action_steps": 8,
"vision_backbone_mode": "attnres_resnet",
"notes": "Full-AttnRes vision backbone replacing ResNet residual units; IMF head unchanged."
}
}

View File

@@ -0,0 +1,9 @@
# Full-AttnRes Vision Phase-2
- Created: 2026-04-05 00:12:14
- Baseline reference: ph16_ex8 best avg_reward=610.8
- Candidate run: imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-ms50k-20260405-001214
- 2026-04-05 00:23:03: batch=80 OOM on both 5090 and L20; using validated fallback batch=40, lr=1.25e-4 on remote L20 GPU3.
- 2026-04-05 00:24:24: launching candidate imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424 on 100.119.99.14 GPU3 with batch=40 lr=1.25e-4.
- 2026-04-05 00:27:17: remote phase2 run is active on 100.119.99.14 GPU3, validated at least to step 200. SwanLab: https://swanlab.cn/@game-loader/roboimi-vla/runs/xy7fjdmn0stdr19eu3gub
- 2026-04-05 00:36:54: latest confirmed progress is step 1300 on 100.119.99.14 GPU3; first rollout not reached yet.

View File

@@ -0,0 +1,32 @@
{
"suite_name": "2026-04-05-full-attnres-vision-phase2",
"updated_at": "2026-04-05 00:36:54",
"phase": "phase2_running",
"baseline_reference": {
"run_id": "ph16_ex8",
"best_rollout_avg_reward": 610.8,
"best_step": 21874,
"run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223"
},
"candidate": {
"run_name": "imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424",
"host": "100.119.99.14",
"gpu": 3,
"pred_horizon": 16,
"num_action_steps": 8,
"vision_backbone_mode": "attnres_resnet",
"notes": "Full-AttnRes vision backbone replacing ResNet residual units; IMF head unchanged.",
"status": "running",
"run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424",
"log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424/train_vla.log",
"pid": 151187,
"batch_size": 40,
"lr": 0.000125,
"num_workers": 12,
"launch_log": "/home/droid/roboimi_suite_20260404/experiment_suite_launch_logs/imf-p2-full-attnres-vision-ph16-ex08-emb384-l12-b40-lr1p25e4-ms50k-l20g3-20260405-002424.launch.log",
"note": "Local 5090 and remote L20 both OOM at batch=80; switched to batch=40 and linearly scaled lr to 1.25e-4 after smoke validation on L20.",
"swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/xy7fjdmn0stdr19eu3gub",
"latest_step": 1300,
"latest_log_sync": "2026-04-05 00:36:54"
}
}