feat: add vision transfer backbones and IMF variants

This commit is contained in:
Logic
2026-04-09 14:02:24 +08:00
parent d51b3ecafa
commit ff7c9c1f2a
58 changed files with 2788 additions and 26 deletions

View File

@@ -0,0 +1,8 @@
# CHECKLIST
- [x] Confirm camera mapping (`right` -> `r_vis`)
- [x] Create front+r_vis run contract
- [x] Remote smoke test passes
- [x] Launch 50k run on remote GPU1
- [x] Record pid / log / SwanLab
- [x] Report status back to user

View File

@@ -0,0 +1,23 @@
# PLAN
## Goal
Train a 50k-step IMF baseline with the original ResNet vision backbone, using `front` + `r_vis` cameras only.
## Fixed comparison contract
- Same hyperparameters as the active top/front and front-only runs
- Agent: `resnet_imf_attnres`
- Vision backbone mode: `resnet`
- `pred_horizon=16`, `num_action_steps=8`
- `n_emb=384`, `n_layer=12`, `n_head=1`, `n_kv_head=1`
- `inference_steps=1`
- `batch_size=80`, `lr=2.5e-4`, cosine warmup 2000
- dataset: `/home/droid/sim_dataset/sim_transfer`
- cameras: `[r_vis, front]`
- rollout every 5 epochs with 5 episodes, headless
## Important dimension override
- Two-camera visual cond dim = `64*2 + 16 = 144`, so set `agent.num_cams=2`, `agent.head.cond_dim=144`.
## Resource plan
- Host: `100.119.99.14`
- GPU: `1`

View File

@@ -0,0 +1,6 @@
# Notes
- 2026-04-05 10:20:09: remote 2-step smoke passed on `100.119.99.14` GPU1 with `r_vis + front`, batch=80, no OOM.
- 2026-04-05 10:20:49: launched main run `imf-resnet-frontrvis-2cam-ph16-ex08-emb384-l12-ms50k-l20g1-20260405-102029`.
- 2026-04-05 10:22:03: confirmed training is stable through step 200, latest loss 0.3321.
- SwanLab: https://swanlab.cn/@game-loader/roboimi-vla/runs/3fyzjfdcbiq7frtbqv6ss

View File

@@ -0,0 +1,55 @@
{
"suite_name": "2026-04-05-front-rvis-resnet-2cam",
"updated_at": "2026-04-05 10:22:03",
"phase": "running",
"interpretation": {
"right_camera_name": "r_vis"
},
"baseline_reference": {
"source_run": "imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023",
"notes": "Same hyperparameters as the active top/front run, replacing top with r_vis."
},
"smoke_test": {
"status": "passed",
"host": "100.119.99.14",
"gpu": 1,
"run_dir": "/home/droid/roboimi_suite_20260404/runs/smoke-frontrvis-resnet-ph16-ex08-20260405-102001",
"batch_size": 80,
"max_steps": 2,
"note": "2-step remote CUDA smoke passed on L20 GPU1 without OOM."
},
"main_run": {
"status": "running",
"host": "100.119.99.14",
"gpu": 1,
"launch_pid": 159910,
"pid": 159913,
"run_name": "imf-resnet-frontrvis-2cam-ph16-ex08-emb384-l12-ms50k-l20g1-20260405-102029",
"run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-resnet-frontrvis-2cam-ph16-ex08-emb384-l12-ms50k-l20g1-20260405-102029",
"log_path": "/home/droid/roboimi_suite_20260404/runs/imf-resnet-frontrvis-2cam-ph16-ex08-emb384-l12-ms50k-l20g1-20260405-102029/train_vla.log",
"launch_log": "/home/droid/roboimi_suite_20260404/experiment_suite_launch_logs/imf-resnet-frontrvis-2cam-ph16-ex08-emb384-l12-ms50k-l20g1-20260405-102029.launch.log",
"dataset_dir": "/home/droid/sim_dataset/sim_transfer",
"camera_names": [
"r_vis",
"front"
],
"pred_horizon": 16,
"num_action_steps": 8,
"head_cond_dim": 144,
"head_n_emb": 384,
"head_n_layer": 12,
"vision_backbone_mode": "resnet",
"pretrained_backbone_weights": null,
"freeze_backbone": false,
"batch_size": 80,
"lr": 0.00025,
"num_workers": 12,
"max_steps": 50000,
"rollout_val_freq_epochs": 5,
"rollout_num_episodes": 5,
"swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/3fyzjfdcbiq7frtbqv6ss",
"latest_step": 200,
"latest_loss": 0.3321,
"process_running": true
}
}