From a78006808a8bbc8c69ac6a869ae6b66b3b303698 Mon Sep 17 00:00:00 2001
From: Logic <Logic@gitea.ideaopen.cn>
Date: Sat, 4 Apr 2026 23:47:15 +0800
Subject: [PATCH] fix: stabilize headless rollout and summarize phase1 grid

---
 ...4-imf-horizon-grid-and-attnres-ablation.md |  68 ++++++++
 .../leaderboard.csv                           |   7 +
 .../2026-04-04-imf-horizon-grid/manifest.json | 115 ++++++++++++
 .../2026-04-04-imf-horizon-grid/notes.md      |  20 +++
 .../phase1_summary.md                         |  38 ++++
 .../2026-04-04-imf-horizon-grid/status.json   | 165 ++++++++++++++++++
 roboimi/demos/vla_scripts/eval_vla.py         |   8 +-
 tests/test_eval_vla_headless_import.py        |  26 +++
 8 files changed, 446 insertions(+), 1 deletion(-)
 create mode 100644 docs/superpowers/plans/2026-04-04-imf-horizon-grid-and-attnres-ablation.md
 create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/leaderboard.csv
 create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/manifest.json
 create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/notes.md
 create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/phase1_summary.md
 create mode 100644 experiment_suites/2026-04-04-imf-horizon-grid/status.json
 create mode 100644 tests/test_eval_vla_headless_import.py

diff --git a/docs/superpowers/plans/2026-04-04-imf-horizon-grid-and-attnres-ablation.md b/docs/superpowers/plans/2026-04-04-imf-horizon-grid-and-attnres-ablation.md
new file mode 100644
index 0000000..69e088d
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-04-imf-horizon-grid-and-attnres-ablation.md
@@ -0,0 +1,68 @@
+# IMF Horizon Grid and AttnRes Ablation Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Run a 6-run Phase-1 IMF horizon/action-step experiment grid across available GPUs, monitor progress and collect best rollout metrics, then use the best horizon setting for a Phase-2 visual-attnres ablation.
+
+**Architecture:** Use the current IMF training code as-is for Phase-1 by sweeping explicit `(pred_horizon, num_action_steps)` overrides while keeping emb=384, layer=12, and max_steps=50k fixed. Maintain a local experiment suite directory with a manifest and machine-readable status snapshots so progress can be resumed and summarized across turns. After Phase-1 completes, compare the current head-only attnres setup against a variant that also adds attnres into the visual ResNet path.
+
+**Tech Stack:** Python, Hydra/OmegaConf, PyTorch, SSH/Tailscale, JSON/CSV status files, SwanLab.
+
+---
+
+### Task 1: Prepare the experiment suite manifest and state tracking
+
+**Files:**
+- Create: `experiment_suites/2026-04-04-imf-horizon-grid/manifest.json`
+- Create: `experiment_suites/2026-04-04-imf-horizon-grid/status.json`
+- Create: `experiment_suites/2026-04-04-imf-horizon-grid/notes.md`
+
+- [ ] Define the 6 legal Phase-1 combinations: `(8,8)`, `(16,8)`, `(16,16)`, `(32,8)`, `(32,16)`, `(32,32)`.
+- [ ] Record for each run: name, host, GPU slot, command, log path, SwanLab run name, and completion criteria.
+- [ ] Define the comparison metric as the maximum rollout average reward seen during training (`max avg_reward`), preferably read from the best-checkpoint metadata and cross-checked against logs.
+- [ ] Keep `status.json` updated with per-run state: queued / running / finished / failed plus latest parsed progress.
+
+### Task 2: Prepare the remote 8-GPU execution target
+
+**Files:**
+- Remote working directory under `/home/droid/`
+- Reuse or create a synced code directory for this suite
+
+- [ ] Verify the remote dataset path and environment path.
+- [ ] Verify GPU availability and reserve 6 GPUs for Phase-1 launches.
+- [ ] Sync the required code to a dedicated remote suite directory.
+- [ ] Record exact remote paths back into the local suite manifest.
+
+### Task 3: Launch the 6 Phase-1 experiments in parallel
+
+**Files:**
+- Reuse: `roboimi/demos/vla_scripts/train_vla.py`
+- Modify only local suite tracking files unless a launch bug is discovered
+
+- [ ] Launch 6 runs concurrently with fixed settings: IMF, emb=384, layer=12, max_steps=50k.
+- [ ] Keep all other relevant training hyperparameters aligned to the current strong baseline unless a concrete blocker appears.
+- [ ] Assign one GPU per run on the 8xL20 host.
+- [ ] Capture PID, log path, and SwanLab URL for each run in `status.json`.
+
+### Task 4: Monitor and summarize Phase-1 until all 6 finish
+
+**Files:**
+- Update: `experiment_suites/2026-04-04-imf-horizon-grid/status.json`
+- Update: `experiment_suites/2026-04-04-imf-horizon-grid/notes.md`
+
+- [ ] Periodically parse each run’s log/checkpoints to extract latest step, latest rollout reward, and best rollout reward so far.
+- [ ] Keep a resumable local summary so progress can be continued in later turns without rediscovery.
+- [ ] After all 6 runs finish, rank them by `max avg_reward` and write a compact Phase-1 summary.
+
+### Task 5: Prepare the Phase-2 visual-attnres ablation
+
+**Files:**
+- Likely modify: vision backbone implementation and config files (to be confirmed after code inspection)
+- Add/update targeted tests for the visual backbone path if code changes are needed
+
+- [ ] Use the best Phase-1 `(pred_horizon, num_action_steps)` combination as the fixed rollout setting for Phase-2.
+- [ ] Compare:
+  1. current setup: attnres only in the IMF head
+  2. ablation setup: attnres in both IMF head and visual encoder path
+- [ ] Keep the rest of the training settings fixed.
+- [ ] Launch and monitor the Phase-2 pair after Phase-1 summary is complete.
diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/leaderboard.csv b/experiment_suites/2026-04-04-imf-horizon-grid/leaderboard.csv
new file mode 100644
index 0000000..908eff0
--- /dev/null
+++ b/experiment_suites/2026-04-04-imf-horizon-grid/leaderboard.csv
@@ -0,0 +1,7 @@
+rank,run_id,status,pred_horizon,num_action_steps,best_rollout_avg_reward,best_step,final_step,final_loss,host,run_dir
+1,ph16_ex8,finished,16,8,610.8,21874,50000,0.0034315965604037046,100.73.14.65,/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223
+2,ph16_ex16,finished,16,16,561.2,48124,50000,0.004544622730463743,100.119.99.14,/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223
+3,ph32_ex32,finished,32,32,513.2,43749,50000,0.003953303210437298,local,/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223
+4,ph8_ex8,finished,8,8,415.6,48124,50000,0.007008877582848072,100.73.14.65,/home/droid/roboimi_suite_20260404/runs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223
+5,ph32_ex8,finished,32,8,361.6,43749,50000,0.004788532387465239,100.119.99.14,/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223
+6,ph32_ex16,finished,32,16,239.6,48124,50000,0.0038348555099219084,100.119.99.14,/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223
diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/manifest.json b/experiment_suites/2026-04-04-imf-horizon-grid/manifest.json
new file mode 100644
index 0000000..862f384
--- /dev/null
+++ b/experiment_suites/2026-04-04-imf-horizon-grid/manifest.json
@@ -0,0 +1,115 @@
+{
+  "suite_name": "2026-04-04-imf-horizon-grid",
+  "created_at": "2026-04-04 13:19:52",
+  "updated_at": "2026-04-04 13:19:52",
+  "phase": "phase1_launching",
+  "metric": "max_avg_reward",
+  "baseline": {
+    "agent": "resnet_imf_attnres",
+    "batch_size": 80,
+    "lr": 0.00025,
+    "num_workers": 12,
+    "max_steps": 50000,
+    "rollout_val_freq_epochs": 5,
+    "rollout_num_episodes": 5,
+    "val_split": 0.0,
+    "seed": 42,
+    "scheduler_type": "cosine",
+    "warmup_steps": 2000,
+    "min_lr": 1e-06,
+    "weight_decay": 1e-05,
+    "grad_clip": 1.0,
+    "inference_steps": 1,
+    "embed_dim": 384,
+    "n_layer": 12,
+    "n_head": 1,
+    "n_kv_head": 1,
+    "freeze_backbone": false,
+    "pretrained_backbone_weights": null,
+    "camera_names": [
+      "r_vis",
+      "top",
+      "front"
+    ]
+  },
+  "runs": [
+    {
+      "id": "ph8_ex8",
+      "pred_horizon": 8,
+      "num_action_steps": 8,
+      "host": "100.73.14.65",
+      "host_label": "tailnet-5880",
+      "gpu": 0,
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "python": "/home/droid/miniforge3/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "run_name": "imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223",
+      "launch_state": "ready"
+    },
+    {
+      "id": "ph16_ex8",
+      "pred_horizon": 16,
+      "num_action_steps": 8,
+      "host": "100.73.14.65",
+      "host_label": "tailnet-5880",
+      "gpu": 1,
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "python": "/home/droid/miniforge3/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "run_name": "imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223",
+      "launch_state": "ready"
+    },
+    {
+      "id": "ph16_ex16",
+      "pred_horizon": 16,
+      "num_action_steps": 16,
+      "host": "100.119.99.14",
+      "host_label": "tailnet-l20",
+      "gpu": 0,
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "python": "/home/droid/miniforge3/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "run_name": "imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223",
+      "launch_state": "provisioning_required"
+    },
+    {
+      "id": "ph32_ex8",
+      "pred_horizon": 32,
+      "num_action_steps": 8,
+      "host": "100.119.99.14",
+      "host_label": "tailnet-l20",
+      "gpu": 1,
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "python": "/home/droid/miniforge3/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "run_name": "imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223",
+      "launch_state": "provisioning_required"
+    },
+    {
+      "id": "ph32_ex16",
+      "pred_horizon": 32,
+      "num_action_steps": 16,
+      "host": "100.119.99.14",
+      "host_label": "tailnet-l20",
+      "gpu": 2,
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "python": "/home/droid/miniforge3/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "run_name": "imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223",
+      "launch_state": "provisioning_required"
+    },
+    {
+      "id": "ph32_ex32",
+      "pred_horizon": 32,
+      "num_action_steps": 32,
+      "host": "local",
+      "host_label": "local-5090",
+      "gpu": 0,
+      "workdir": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy",
+      "python": "/home/droid/.conda/envs/roboimi/bin/python",
+      "dataset_dir": "/home/droid/project/diana_sim/sim_transfer",
+      "run_name": "imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223",
+      "launch_state": "ready"
+    }
+  ]
+}
diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/notes.md b/experiment_suites/2026-04-04-imf-horizon-grid/notes.md
new file mode 100644
index 0000000..e30da26
--- /dev/null
+++ b/experiment_suites/2026-04-04-imf-horizon-grid/notes.md
@@ -0,0 +1,20 @@
+# IMF Horizon Grid Suite Notes
+
+- Created: 2026-04-04 13:19:52
+- Phase-1 matrix: (8,8), (16,8), (16,16), (32,8), (32,16), (32,32)
+- Fixed baseline: IMF AttnRes, n_emb=384, n_layer=12, batch_size=80, lr=2.5e-4, max_steps=50k, rollout every 5 epochs with 5 episodes.
+- Host allocation:
+  - local RTX 5090: ph32_ex32
+  - 100.73.14.65 RTX 5880 GPU0: ph8_ex8
+  - 100.73.14.65 RTX 5880 GPU1: ph16_ex8
+  - 100.119.99.14 L20 GPU0: ph16_ex16
+  - 100.119.99.14 L20 GPU1: ph32_ex8
+  - 100.119.99.14 L20 GPU2: ph32_ex16
+- 100.119.99.14 still needs env + dataset + swanlab credential copy before launch.
+
+- 2026-04-04 13:23:43: launched local ph32_ex32 (pid 1437836), remote 100.73 ph8_ex8 (pid 931824), ph16_ex8 (pid 931826); started 100.119 bootstrap (local pid 1437837).
+- 2026-04-04 13:25:43: first status sync — local ph32_ex32 step≈500; remote ph8_ex8 step≈400; remote ph16_ex8 step≈400.
+- 2026-04-04 13:27:41: second status sync — 100.119 bootstrap finished env copy and entered dataset copy; local ph32_ex32 step≈900; remote ph8_ex8 step≈800; remote ph16_ex8 step≈800.
+- 2026-04-04 13:35:31: 100.119 bootstrap data/env copy finished. Original validation command hit a quoting bug, then I manually revalidated torch+mujoco+swanlab and launched ph16_ex16/ph32_ex8/ph32_ex16 with pids 81129/81130/81131.
+- 2026-04-04 13:37:36: all 6 Phase-1 runs are now up. SwanLab links recorded in status.json; latest observed steps ~ local 900 / 5880 runs 800 / L20 runs 100.
+- 2026-04-04 14:41:08: diagnosed remote first-rollout crash as early mujoco import before MUJOCO_GL=egl in eval_vla.py via raw_action_trajectory_viewer. Added regression test tests/test_eval_vla_headless_import.py, fixed import to lazy-load, verified 20-step headless eval on 5880 and L20, then resumed 5 failed runs from step 4374. Current resumed pids: ph8_ex8=938714, ph16_ex8=938717, ph16_ex16=90169, ph32_ex8=90173, ph32_ex16=90175.
diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/phase1_summary.md b/experiment_suites/2026-04-04-imf-horizon-grid/phase1_summary.md
new file mode 100644
index 0000000..d8caafd
--- /dev/null
+++ b/experiment_suites/2026-04-04-imf-horizon-grid/phase1_summary.md
@@ -0,0 +1,38 @@
+# Phase-1 IMF Horizon Grid Summary
+
+- Generated: 2026-04-04 23:43:38
+- Fixed baseline: IMF AttnRes head, n_emb=384, n_layer=12, batch_size=80, lr=2.5e-4, max_steps=50k, rollout every 5 epochs with 5 episodes, 3 cameras `[r_vis, top, front]`.
+- Primary metric: `checkpoints/vla_model_best.pt -> rollout_avg_reward` (max training-time rollout average reward).
+
+## Ranked results
+
+| Rank | Run ID | pred_horizon | num_action_steps | Best avg_reward | Best step | Final loss | Host |
+|---:|---|---:|---:|---:|---:|---:|---|
+| 1 | `ph16_ex8` | 16 | 8 | 610.8 | 21874 | 0.0034 | 100.73.14.65 |
+| 2 | `ph16_ex16` | 16 | 16 | 561.2 | 48124 | 0.0045 | 100.119.99.14 |
+| 3 | `ph32_ex32` | 32 | 32 | 513.2 | 43749 | 0.0040 | local |
+| 4 | `ph8_ex8` | 8 | 8 | 415.6 | 48124 | 0.0070 | 100.73.14.65 |
+| 5 | `ph32_ex8` | 32 | 8 | 361.6 | 43749 | 0.0048 | 100.119.99.14 |
+| 6 | `ph32_ex16` | 32 | 16 | 239.6 | 48124 | 0.0038 | 100.119.99.14 |
+
+## Main observations
+
+- Best overall setting was **`pred_horizon=16`, `num_action_steps=8`** with **max avg_reward = 610.8** at step **21874**.
+- Comparing horizon 16: executing 8 steps outperformed executing 16 steps (`ph16_ex8` > `ph16_ex16`).
+- Comparing horizon 32: executing the full 32-step chunk was much better than executing 16 or 8 steps (`ph32_ex32` > `ph32_ex8` > `ph32_ex16`).
+- Short horizon 8 with 8-step execution was competitive but clearly below the best 16/8 and 32/32 settings.
+- In this sweep, increasing prediction horizon helped only when the executed chunk length matched a good control cadence; mismatch could hurt a lot (especially `ph32_ex16`).
+
+## Raw results
+
+- `ph16_ex8`: best avg_reward=610.8 @ step 21874, final_loss=0.0034, run_dir=`/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223`
+- `ph16_ex16`: best avg_reward=561.2 @ step 48124, final_loss=0.0045, run_dir=`/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223`
+- `ph32_ex32`: best avg_reward=513.2 @ step 43749, final_loss=0.0040, run_dir=`/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223`
+- `ph8_ex8`: best avg_reward=415.6 @ step 48124, final_loss=0.0070, run_dir=`/home/droid/roboimi_suite_20260404/runs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223`
+- `ph32_ex8`: best avg_reward=361.6 @ step 43749, final_loss=0.0048, run_dir=`/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223`
+- `ph32_ex16`: best avg_reward=239.6 @ step 48124, final_loss=0.0038, run_dir=`/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223`
+
+## Recommendation for Phase-2 anchor
+
+- Use **`pred_horizon=16`, `num_action_steps=8`** as the strongest Phase-1 baseline if the goal is purely maximizing rollout reward.
+- If phase-2 needs a more conservative action execution budget, `ph16_ex8` is the strongest non-full-32 execution setting and may still be a good comparison anchor.
diff --git a/experiment_suites/2026-04-04-imf-horizon-grid/status.json b/experiment_suites/2026-04-04-imf-horizon-grid/status.json
new file mode 100644
index 0000000..0cae42c
--- /dev/null
+++ b/experiment_suites/2026-04-04-imf-horizon-grid/status.json
@@ -0,0 +1,165 @@
+{
+  "suite_name": "2026-04-04-imf-horizon-grid",
+  "updated_at": "2026-04-04 23:46:01",
+  "phase": "phase1_completed",
+  "provisioning": {
+    "100.119.99.14": {
+      "state": "completed_manual_launch",
+      "bootstrap_pid_local": 1437837,
+      "log_path": "experiment_suites/2026-04-04-imf-horizon-grid/provision_logs/100.119.99.14-bootstrap-20260404-131223.log",
+      "env_copy": "completed",
+      "dataset_copy": "completed",
+      "launch_watcher_pid_local": null,
+      "launch_watcher_log": "experiment_suites/2026-04-04-imf-horizon-grid/launch_logs/100.119.99.14-launch-watcher-20260404-131223.log",
+      "swanlab_copy": "completed",
+      "bootstrap_validation_note": "initial validation command had a quoting bug; manual validation passed and launches were started successfully"
+    }
+  },
+  "runs": {
+    "ph8_ex8": {
+      "status": "finished",
+      "host": "100.73.14.65",
+      "gpu": 0,
+      "run_name": "imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223",
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223",
+      "pred_horizon": 8,
+      "num_action_steps": 8,
+      "pid": 938714,
+      "launch_log": "experiment_suite_launch_logs/imf-p1-ph08-ex08-emb384-l12-ms50k-5880g0-20260404-131223.restartfix-20260404-143827.log",
+      "latest_step": 50000,
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/i5syc57b6zq7rbkrtqy7b",
+      "process_running": false,
+      "best_step": 48124,
+      "best_rollout_avg_reward": 415.6,
+      "final_loss": 0.007008877582848072
+    },
+    "ph16_ex8": {
+      "status": "finished",
+      "host": "100.73.14.65",
+      "gpu": 1,
+      "run_name": "imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223",
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223",
+      "pred_horizon": 16,
+      "num_action_steps": 8,
+      "pid": 938717,
+      "launch_log": "experiment_suite_launch_logs/imf-p1-ph16-ex08-emb384-l12-ms50k-5880g1-20260404-131223.restartfix-20260404-143827.log",
+      "latest_step": 50000,
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/4rusbrpfxmw4ffii1ul5w",
+      "process_running": false,
+      "best_step": 21874,
+      "best_rollout_avg_reward": 610.8,
+      "final_loss": 0.0034315965604037046
+    },
+    "ph16_ex16": {
+      "status": "finished",
+      "host": "100.119.99.14",
+      "gpu": 0,
+      "run_name": "imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223",
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223",
+      "pred_horizon": 16,
+      "num_action_steps": 16,
+      "pid": 90169,
+      "launch_log": "experiment_suite_launch_logs/imf-p1-ph16-ex16-emb384-l12-ms50k-l20g0-20260404-131223.restartfix-20260404-143827.log",
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "latest_step": 50000,
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/wwm232k6190gexnze8mg6",
+      "process_running": false,
+      "best_step": 48124,
+      "best_rollout_avg_reward": 561.2,
+      "final_loss": 0.004544622730463743
+    },
+    "ph32_ex8": {
+      "status": "finished",
+      "host": "100.119.99.14",
+      "gpu": 1,
+      "run_name": "imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223",
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223",
+      "pred_horizon": 32,
+      "num_action_steps": 8,
+      "pid": 90173,
+      "launch_log": "experiment_suite_launch_logs/imf-p1-ph32-ex08-emb384-l12-ms50k-l20g1-20260404-131223.restartfix-20260404-143827.log",
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "latest_step": 50000,
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/o5y2xjb2rsb3lmfcuhy4p",
+      "process_running": false,
+      "best_step": 43749,
+      "best_rollout_avg_reward": 361.6,
+      "final_loss": 0.004788532387465239
+    },
+    "ph32_ex16": {
+      "status": "finished",
+      "host": "100.119.99.14",
+      "gpu": 2,
+      "run_name": "imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223",
+      "workdir": "/home/droid/roboimi_suite_20260404",
+      "dataset_dir": "/home/droid/sim_dataset/sim_transfer",
+      "log_path": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/roboimi_suite_20260404/runs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223",
+      "pred_horizon": 32,
+      "num_action_steps": 16,
+      "pid": 90175,
+      "launch_log": "experiment_suite_launch_logs/imf-p1-ph32-ex16-emb384-l12-ms50k-l20g2-20260404-131223.restartfix-20260404-143827.log",
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "latest_step": 50000,
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/54cjpgba9eqsopdm0l8d3",
+      "process_running": false,
+      "best_step": 48124,
+      "best_rollout_avg_reward": 239.6,
+      "final_loss": 0.0038348555099219084
+    },
+    "ph32_ex32": {
+      "status": "finished",
+      "host": "local",
+      "gpu": 0,
+      "run_name": "imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223",
+      "workdir": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy",
+      "dataset_dir": "/home/droid/project/diana_sim/sim_transfer",
+      "log_path": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223/train_vla.log",
+      "run_dir": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/runs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223",
+      "pred_horizon": 32,
+      "num_action_steps": 32,
+      "pid": 1437836,
+      "launch_log": "experiment_suites/2026-04-04-imf-horizon-grid/launch_logs/imf-p1-ph32-ex32-emb384-l12-ms50k-5090-20260404-131223.launch.log",
+      "latest_step": 50000,
+      "latest_log_sync": "2026-04-04 23:42:34",
+      "swanlab_url": "https://swanlab.cn/@game-loader/roboimi-vla/runs/ajs2m218jd260hawhy5ns",
+      "process_running": false,
+      "latest_rollout_avg_reward": 513.2,
+      "best_rollout_avg_reward": 513.2,
+      "best_step": 43749,
+      "final_loss": 0.003953303210437298
+    }
+  },
+  "monitor": {
+    "state": "running",
+    "pid_local": 1443268,
+    "log_path": "experiment_suites/2026-04-04-imf-horizon-grid/monitor_logs/status-sync-20260404-131223.log",
+    "interval_seconds": 300
+  },
+  "debug": {
+    "remote_rollout_failure_20260404": {
+      "root_cause": "eval_vla.py imported raw_action_trajectory_viewer at module import time, which imported mujoco before MUJOCO_GL=egl was set; remote headless rollout then fell back to GLFW/X11 and crashed with mujoco.FatalError: gladLoadGL error during env.reset()->mj.Renderer(...)",
+      "fixed_file": "roboimi/demos/vla_scripts/eval_vla.py",
+      "verification": {
+        "pytest": "tests/test_eval_vla_headless_import.py passed",
+        "remote_eval_5880": "1 episode x 20 steps headless eval passed",
+        "remote_eval_l20": "1 episode x 20 steps headless eval passed"
+      }
+    }
+  },
+  "phase1_summary_md": "/home/droid/project/roboimi/.worktrees/feat-imf-attnres-policy/experiment_suites/2026-04-04-imf-horizon-grid/phase1_summary.md"
+}
diff --git a/roboimi/demos/vla_scripts/eval_vla.py b/roboimi/demos/vla_scripts/eval_vla.py
index a8003dd..5b5af41 100644
--- a/roboimi/demos/vla_scripts/eval_vla.py
+++ b/roboimi/demos/vla_scripts/eval_vla.py
@@ -26,7 +26,6 @@ from hydra.utils import instantiate
 from einops import rearrange
 
 from roboimi.utils.act_ex_utils import sample_transfer_pose
-from roboimi.utils.raw_action_trajectory_viewer import build_trajectory_capsule_markers
 from roboimi.vla.eval_utils import execute_policy_action
 
 sys.path.append(os.getcwd())
@@ -362,6 +361,13 @@ def _save_rollout_trajectory_image(
     if output_path is None or camera_name is None:
         return None
 
+    # IMPORTANT:
+    # Keep this import lazy so headless rollout can set MUJOCO_GL=egl before
+    # anything imports mujoco. Importing this helper at module import time would
+    # pull in mujoco too early on remote headless hosts and make rollout fail
+    # with gladLoadGL / missing DISPLAY errors.
+    from roboimi.utils.raw_action_trajectory_viewer import build_trajectory_capsule_markers
+
     output_path = str(output_path)
     Path(output_path).parent.mkdir(parents=True, exist_ok=True)
 
diff --git a/tests/test_eval_vla_headless_import.py b/tests/test_eval_vla_headless_import.py
new file mode 100644
index 0000000..e9d4763
--- /dev/null
+++ b/tests/test_eval_vla_headless_import.py
@@ -0,0 +1,26 @@
+import json
+import os
+import subprocess
+import sys
+
+
+def test_eval_vla_import_does_not_import_mujoco_early_when_headless_backend_not_set():
+    env = os.environ.copy()
+    env.pop('MUJOCO_GL', None)
+    proc = subprocess.run(
+        [
+            sys.executable,
+            '-c',
+            (
+                'import json, sys; '
+                'from roboimi.demos.vla_scripts import eval_vla; '
+                'print(json.dumps({"mujoco_in_sys_modules": "mujoco" in sys.modules}))'
+            ),
+        ],
+        capture_output=True,
+        text=True,
+        env=env,
+        check=True,
+    )
+    payload = json.loads(proc.stdout.strip())
+    assert payload['mujoco_in_sys_modules'] is False