From 08c1950c6df7e08a0c158ca9a9cad6a1d617de8c Mon Sep 17 00:00:00 2001
From: Logic <logic@gitea.ideaopen.cn>
Date: Sat, 14 Mar 2026 12:25:44 +0800
Subject: [PATCH] chore(pusht): add 5090 repro docs and uv setup

---
 AGENTS.md                                     |  68 +++++++++++
 PUSHT_REPRO_5090.md                           | 108 ++++++++++++++++++
 .../env_runner/pusht_image_runner.py          |   7 +-
 diffusion_policy/gym_util/sync_vector_env.py  |  37 +++++-
 requirements-pusht-5090.txt                   |  38 ++++++
 setup_uv_pusht_5090.sh                        |  20 ++++
 6 files changed, 270 insertions(+), 8 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 PUSHT_REPRO_5090.md
 create mode 100644 requirements-pusht-5090.txt
 create mode 100755 setup_uv_pusht_5090.sh

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..0f022be
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,68 @@
+# Agent Notes
+
+## Purpose
+`~/diffusion_policy` is the Diffusion Policy training repo. The main workflow here is Hydra-driven training via `train.py`, with the canonical PushT image experiment configured by `image_pusht_diffusion_policy_cnn.yaml`.
+
+## Top Level
+- `diffusion_policy/`: core code, configs, datasets, env runners, workspaces.
+- `data/`: local datasets, outputs, checkpoints, run logs.
+- `train.py`: main training entrypoint.
+- `eval.py`: checkpoint evaluation entrypoint.
+- `image_pusht_diffusion_policy_cnn.yaml`: canonical single-seed PushT image config from the README path.
+- `.venv/`: local `uv`-managed virtualenv.
+- `.uv-cache/`, `.uv-python/`: local `uv` cache and Python install state.
+- `README.md`: upstream instructions and canonical commands.
+
+## Canonical PushT Image Path
+- Entrypoint: `python train.py --config-dir=. --config-name=image_pusht_diffusion_policy_cnn.yaml`
+- Dataset path in config: `data/pusht/pusht_cchi_v7_replay.zarr`
+- README canonical device override: `training.device=cuda:0`
+
+## Data
+- PushT archive currently present at `data/pusht.zip`
+- Unpacked dataset used by training: `data/pusht/pusht_cchi_v7_replay.zarr`
+
+## Local Compatibility Adjustments
+- `diffusion_policy/env_runner/pusht_image_runner.py` now uses `SyncVectorEnv` instead of `AsyncVectorEnv`.
+  Reason: avoid shared-memory and semaphore failures on this host/session.
+- `diffusion_policy/gym_util/sync_vector_env.py` has local compatibility changes:
+  - added `reset_async`
+  - seeded `reset_wait`
+  - updated `concatenate(...)` call order for the current `gym` API
+
+## Environment Expectations
+- Use the local `uv` env at `.venv`
+- Verified local Python: `3.9.25`
+- Verified local Torch stack: `torch 2.8.0+cu128`, `torchvision 0.23.0+cu128`
+- Other key installed versions verified in `.venv`:
+  - `gym 0.23.1`
+  - `hydra-core 1.2.0`
+  - `diffusers 0.11.1`
+  - `huggingface_hub 0.10.1`
+  - `wandb 0.13.3`
+  - `zarr 2.12.0`
+  - `numcodecs 0.10.2`
+  - `av 14.0.1`
+- Important note: this shell currently reports `torch.cuda.is_available() == False`, so always verify CUDA access in the current session before assuming GPU is usable.
+
+## Logging And Outputs
+- Hydra run outputs: `data/outputs/...`
+- Per-run files to check first:
+  - `.hydra/overrides.yaml`
+  - `logs.json.txt`
+  - `train.log`
+  - `checkpoints/latest.ckpt`
+- Extra launcher logs may live under `data/run_logs/`
+
+## Practical Guidance
+- Inspect with `rg`, `sed`, and existing Hydra output folders before changing code.
+- Prefer config overrides before code edits.
+- On this host, start from these safety overrides unless revalidated:
+  - `logging.mode=offline`
+  - `dataloader.num_workers=0`
+  - `val_dataloader.num_workers=0`
+  - `task.env_runner.n_envs=1`
+  - `task.env_runner.n_test_vis=0`
+  - `task.env_runner.n_train_vis=0`
+- If a run fails, inspect `.hydra/overrides.yaml`, then `logs.json.txt`, then `train.log`.
+- Avoid driver or system changes unless the repo-local path is clearly blocked.
diff --git a/PUSHT_REPRO_5090.md b/PUSHT_REPRO_5090.md
new file mode 100644
index 0000000..99c3eb2
--- /dev/null
+++ b/PUSHT_REPRO_5090.md
@@ -0,0 +1,108 @@
+# PushT Repro On 5090
+
+## Goal
+Reproduce the canonical single-seed image PushT experiment from this repo in `~/diffusion_policy` using `image_pusht_diffusion_policy_cnn.yaml`.
+
+## Current Verified Local Setup
+- Virtualenv: `./.venv` managed with `uv`
+- Python: `3.9.25`
+- Torch stack: `torch 2.8.0+cu128`, `torchvision 0.23.0+cu128`
+- Version strategy used here:
+  - newer Torch/CUDA stack for current 5090-class hardware support
+  - keep older repo-era packages where they are still required by the code
+- Verified key pins in `.venv`:
+  - `numpy 1.26.4`
+  - `gym 0.23.1`
+  - `hydra-core 1.2.0`
+  - `diffusers 0.11.1`
+  - `huggingface_hub 0.10.1`
+  - `wandb 0.13.3`
+  - `zarr 2.12.0`
+  - `numcodecs 0.10.2`
+  - `av 14.0.1`
+  - `robomimic 0.2.0`
+
+## Dataset
+- README source: `https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip`
+- Local archive currently present: `data/pusht.zip`
+- Unpacked dataset used by the config: `data/pusht/pusht_cchi_v7_replay.zarr`
+
+## Repo-Local Code Adjustments
+- `diffusion_policy/env_runner/pusht_image_runner.py`
+  - switched PushT image evaluation from `AsyncVectorEnv` to `SyncVectorEnv`
+- `diffusion_policy/gym_util/sync_vector_env.py`
+  - added `reset_async`
+  - added seeded `reset_wait`
+  - updated `concatenate(...)` call order for current `gym`
+
+These changes were needed to keep PushT evaluation working without the async shared-memory path.
+
+## Validated GPU Smoke Command
+This route is verified by `data/outputs/gpu_smoke2___pusht_gpu_smoke`, which contains `logs.json.txt` plus checkpoints:
+
+```bash
+.venv/bin/python train.py \
+  --config-dir=. \
+  --config-name=image_pusht_diffusion_policy_cnn.yaml \
+  training.seed=42 \
+  training.device=cuda:0 \
+  logging.mode=offline \
+  dataloader.num_workers=0 \
+  val_dataloader.num_workers=0 \
+  task.env_runner.n_envs=1 \
+  training.debug=true \
+  task.env_runner.n_test=2 \
+  task.env_runner.n_test_vis=0 \
+  task.env_runner.n_train=1 \
+  task.env_runner.n_train_vis=0 \
+  task.env_runner.max_steps=20
+```
+
+## Practical Full Training Command Used Here
+This matches the longer GPU run under `data/outputs/2026.03.13/15.37.00_train_diffusion_unet_hybrid_pusht_image_gpu_seed42`:
+
+```bash
+.venv/bin/python train.py \
+  --config-dir=. \
+  --config-name=image_pusht_diffusion_policy_cnn.yaml \
+  training.seed=42 \
+  training.device=cuda:0 \
+  logging.mode=offline \
+  dataloader.num_workers=0 \
+  val_dataloader.num_workers=0 \
+  task.env_runner.n_envs=1 \
+  task.env_runner.n_test_vis=0 \
+  task.env_runner.n_train_vis=0 \
+  hydra.run.dir=data/outputs/2026.03.13/15.37.00_train_diffusion_unet_hybrid_pusht_image_gpu_seed42
+```
+
+## Why These Overrides Were Used
+- `logging.mode=offline`
+  - avoids needing a W&B login and still leaves local run metadata in the output dir
+- `dataloader.num_workers=0` and `val_dataloader.num_workers=0`
+  - avoids extra multiprocessing on this host
+- `task.env_runner.n_envs=1`
+  - keeps PushT eval on the serial `SyncVectorEnv` path
+- `task.env_runner.n_test_vis=0` and `task.env_runner.n_train_vis=0`
+  - avoids video-writing issues on this stack
+  - one earlier GPU run with default vis settings logged libav/libx264 `profile=high` errors in `data/outputs/_train_diffusion_unet_hybrid_pusht_image_gpu_seed42/train.log`
+
+## Output Locations
+- Smoke run:
+  - `data/outputs/gpu_smoke2___pusht_gpu_smoke`
+- Longer GPU run:
+  - `data/outputs/2026.03.13/15.37.00_train_diffusion_unet_hybrid_pusht_image_gpu_seed42`
+- Files to inspect inside a run:
+  - `.hydra/overrides.yaml`
+  - `logs.json.txt`
+  - `train.log`
+  - `checkpoints/latest.ckpt`
+
+## Known Caveats
+- The default config is still tuned for older assumptions:
+  - `logging.mode=online`
+  - `dataloader.num_workers=8`
+  - `task.env_runner.n_envs=null`
+  - `task.env_runner.n_test_vis=4`
+  - `task.env_runner.n_train_vis=2`
+- In this shell, `torch.cuda.is_available()` currently reports `False` even though the repo contains validated GPU smoke/full run artifacts. Re-check device visibility in the current session before restarting a GPU run.
diff --git a/diffusion_policy/env_runner/pusht_image_runner.py b/diffusion_policy/env_runner/pusht_image_runner.py
index f65c06a..82187b6 100644
--- a/diffusion_policy/env_runner/pusht_image_runner.py
+++ b/diffusion_policy/env_runner/pusht_image_runner.py
@@ -8,8 +8,7 @@ import dill
 import math
 import wandb.sdk.data_types.video as wv
 from diffusion_policy.env.pusht.pusht_image_env import PushTImageEnv
-from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
-# from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
 from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
 from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
 
@@ -121,7 +120,9 @@ class PushTImageRunner(BaseImageRunner):
             env_prefixs.append('test/')
             env_init_fn_dills.append(dill.dumps(init_fn))
 
-        env = AsyncVectorEnv(env_fns)
+        # This environment can run without multiprocessing, which avoids
+        # shared-memory and semaphore restrictions on some machines.
+        env = SyncVectorEnv(env_fns)
 
         # test env
         # env.reset(seed=env_seeds)
diff --git a/diffusion_policy/gym_util/sync_vector_env.py b/diffusion_policy/gym_util/sync_vector_env.py
index c85a682..260e1c1 100644
--- a/diffusion_policy/gym_util/sync_vector_env.py
+++ b/diffusion_policy/gym_util/sync_vector_env.py
@@ -60,17 +60,44 @@ class SyncVectorEnv(VectorEnv):
         for env, seed in zip(self.envs, seeds):
             env.seed(seed)
 
-    def reset_wait(self):
+    def reset_async(self, seed=None, return_info=False, options=None):
+        if seed is None:
+            seeds = [None for _ in range(self.num_envs)]
+        elif isinstance(seed, int):
+            seeds = [seed + i for i in range(self.num_envs)]
+        else:
+            seeds = list(seed)
+            assert len(seeds) == self.num_envs
+        self._reset_seeds = seeds
+        self._reset_return_info = return_info
+        self._reset_options = options
+
+    def reset_wait(self, seed=None, return_info=False, options=None):
+        seeds = getattr(self, '_reset_seeds', None)
+        if seeds is None:
+            if seed is None:
+                seeds = [None for _ in range(self.num_envs)]
+            elif isinstance(seed, int):
+                seeds = [seed + i for i in range(self.num_envs)]
+            else:
+                seeds = list(seed)
         self._dones[:] = False
         observations = []
-        for env in self.envs:
+        infos = []
+        for env, seed_i in zip(self.envs, seeds):
+            if seed_i is not None:
+                env.seed(seed_i)
             observation = env.reset()
             observations.append(observation)
+            infos.append({})
         self.observations = concatenate(
-            observations, self.observations, self.single_observation_space
+            self.single_observation_space, observations, self.observations
         )
 
-        return deepcopy(self.observations) if self.copy else self.observations
+        obs = deepcopy(self.observations) if self.copy else self.observations
+        if return_info:
+            return obs, infos
+        return obs
 
     def step_async(self, actions):
         self._actions = actions
@@ -84,7 +111,7 @@ class SyncVectorEnv(VectorEnv):
             observations.append(observation)
             infos.append(info)
         self.observations = concatenate(
-            observations, self.observations, self.single_observation_space
+            self.single_observation_space, observations, self.observations
         )
 
         return (
diff --git a/requirements-pusht-5090.txt b/requirements-pusht-5090.txt
new file mode 100644
index 0000000..9bf8f9d
--- /dev/null
+++ b/requirements-pusht-5090.txt
@@ -0,0 +1,38 @@
+# Direct package pins for the canonical PushT image workflow on host 5090.
+# Torch/TorchVision/Torchaudio are installed separately from the cu128 index in setup_uv_pusht_5090.sh.
+
+numpy==1.26.4
+scipy==1.11.4
+numba==0.59.1
+llvmlite==0.42.0
+cffi==1.15.1
+cython==0.29.32
+h5py==3.8.0
+pandas==2.2.3
+zarr==2.12.0
+numcodecs==0.10.2
+hydra-core==1.2.0
+einops==0.4.1
+tqdm==4.64.1
+dill==0.3.5.1
+scikit-video==1.1.11
+scikit-image==0.19.3
+gym==0.23.1
+pymunk==6.2.1
+wandb==0.13.3
+threadpoolctl==3.1.0
+shapely==1.8.5.post1
+imageio==2.22.0
+imageio-ffmpeg==0.4.7
+termcolor==2.0.1
+tensorboard==2.10.1
+tensorboardx==2.5.1
+psutil==7.2.2
+click==8.1.8
+boto3==1.24.96
+diffusers==0.11.1
+huggingface-hub==0.10.1
+av==14.0.1
+pygame==2.5.2
+robomimic==0.2.0
+opencv-python-headless==4.10.0.84
diff --git a/setup_uv_pusht_5090.sh b/setup_uv_pusht_5090.sh
new file mode 100755
index 0000000..d32aefe
--- /dev/null
+++ b/setup_uv_pusht_5090.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")" && pwd)"
+cd "$ROOT_DIR"
+
+export UV_CACHE_DIR="${UV_CACHE_DIR:-$ROOT_DIR/.uv-cache}"
+export UV_PYTHON_INSTALL_DIR="${UV_PYTHON_INSTALL_DIR:-$ROOT_DIR/.uv-python}"
+
+uv venv --python 3.9 .venv
+source .venv/bin/activate
+
+uv pip install --upgrade pip wheel setuptools==80.9.0
+uv pip install --python .venv/bin/python \
+  --index-url https://download.pytorch.org/whl/cu128 \
+  torch==2.8.0+cu128 torchvision==0.23.0+cu128 torchaudio==2.8.0+cu128
+uv pip install --python .venv/bin/python -r requirements-pusht-5090.txt
+uv pip install --python .venv/bin/python -e .
+
+echo "uv environment ready at $ROOT_DIR/.venv"