feat: add vision transfer backbones and IMF variants

This commit is contained in:
Logic
2026-04-09 14:02:24 +08:00
parent d51b3ecafa
commit ff7c9c1f2a
58 changed files with 2788 additions and 26 deletions

View File

@@ -0,0 +1,8 @@
# CHECKLIST
- [x] Confirm baseline hyperparameters from trusted prior run
- [x] Confirm local GPU availability
- [x] Smoke test with `top/front` cameras only
- [x] Launch 50k run
- [x] Record pid / run dir / log path / SwanLab URL
- [x] Report status back to user

View File

@@ -0,0 +1,30 @@
# PLAN
## Goal
Train a 50k-step IMF baseline with the original ResNet vision backbone (no full-AttnRes vision replacement), using only `top` and `front` cameras as image conditioning.
## Fixed comparison contract
- Agent: `resnet_imf_attnres`
- Vision backbone mode: `resnet`
- `pred_horizon=16`
- `num_action_steps=8`
- `n_emb=384`, `n_layer=12`, `n_head=1`, `n_kv_head=1`
- `inference_steps=1`
- `batch_size=80`, `lr=2.5e-4`, cosine scheduler, warmup 2000
- dataset: `/home/droid/project/diana_sim/sim_transfer`
- cameras: `[top, front]` only
- training budget: `max_steps=50000`
- rollout validation: every 5 epochs, 5 episodes, headless
## Resource plan
- Host: local
- GPU: RTX 5090 (GPU 0)
## Execution path
1. Run a short 2-step smoke test on GPU with the exact 2-camera config.
2. If smoke passes, launch the 50k main run with durable log redirection.
3. Record run name, pid, log path, and SwanLab URL into suite status.
## Fallbacks
- If batch 80 OOMs, fall back to batch 64 with scaled lr 2.0e-4.
- If dataloader startup is unstable, reduce num_workers from 12 to 8.

View File

@@ -0,0 +1,5 @@
# Notes
- 2026-04-05 08:50:04: 2-step smoke test passed locally on RTX 5090 with `top/front` cameras, batch=80, no OOM.
- 2026-04-05 08:50:42: launched main run `imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023` on local GPU0.
- SwanLab: https://swanlab.cn/@game-loader/roboimi-vla/runs/vi77mn5dwd19z4nttxab8

View File

@@ -0,0 +1,51 @@
{
"suite_name": "2026-04-05-top-front-resnet-2cam",
"updated_at": "2026-04-05 08:52:12",
"phase": "running",
"baseline_reference": {
"source_run": "imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223",
"best_rollout_avg_reward": 610.8,
"best_step": 21874,
"notes": "Same IMF baseline as Phase-1 best, but switch cameras from [r_vis, top, front] to [top, front] and keep the original ResNet vision backbone."
},
"smoke_test": {
"status": "passed",
"run_dir": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/smoke-topfront-resnet-ph16-ex08-20260405-085000",
"batch_size": 80,
"num_workers": 4,
"max_steps": 2,
"note": "2-step local CUDA smoke passed without OOM using top/front only."
},
"main_run": {
"status": "running",
"host": "local",
"gpu": 0,
"pid": 1693348,
"run_name": "imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023",
"run_dir": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023",
"log_path": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023/train_vla.log",
"launch_log": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/experiment_suites/2026-04-05-top-front-resnet-2cam/launch_logs/imf-resnet-topfront-2cam-ph16-ex08-emb384-l12-ms50k-5090-20260405-085023.launch.log",
"dataset_dir": "/home/droid/project/diana_sim/sim_transfer",
"camera_names": [
"top",
"front"
],
"pred_horizon": 16,
"num_action_steps": 8,
"head_n_emb": 384,
"head_n_layer": 12,
"vision_backbone_mode": "resnet",
"pretrained_backbone_weights": null,
"freeze_backbone": false,
"batch_size": 80,
"lr": 0.00025,
"num_workers": 12,
"max_steps": 50000,
"rollout_val_freq_epochs": 5,
"rollout_num_episodes": 5,
"swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/vi77mn5dwd19z4nttxab8",
"latest_step": 500,
"latest_loss": 0.0978,
"process_running": true
}
}