diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..23dbd4e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,140 @@
+bin
+logs
+wandb
+outputs
+data
+data_local
+.vscode
+_wandb
+
+**/.DS_Store
+
+fuse.cfg
+
+*.ai
+
+# Generation results
+results/
+
+ray/auth.json
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bb5e57a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,374 @@
+# Diffusion Policy
+
+[[Project page]](https://diffusion-policy.cs.columbia.edu/)
+[[Paper]](https://diffusion-policy.cs.columbia.edu/#paper)
+[[Data]](https://diffusion-policy.cs.columbia.edu/data/)
+[[Colab (state)]](https://colab.research.google.com/drive/1gxdkgRVfM55zihY9TFLja97cSVZOZq2B?usp=sharing)
+[[Colab (vision)]](https://colab.research.google.com/drive/18GIHeOQ5DyjMN8iIRZL2EKZ0745NLIpg?usp=sharing)
+
+
+[Cheng Chi](http://cheng-chi.github.io/)<sup>1</sup>,
+[Siyuan Feng](https://www.cs.cmu.edu/~sfeng/)<sup>2</sup>,
+[Yilun Du](https://yilundu.github.io/)<sup>3</sup>,
+[Zhenjia Xu](https://yilundu.github.io/)<sup>1</sup>,
+[Eric Cousineau](https://www.eacousineau.com/)<sup>2</sup>,
+[Benjamin Burchfiel](http://www.benburchfiel.com/)<sup>2</sup>,
+[Shuran Song](https://www.cs.columbia.edu/~shurans/)<sup>1</sup>
+
+<sup>1</sup>Columbia University,
+<sup>2</sup>Toyota Research Institute,
+<sup>3</sup>MIT
+
+<img src="media/teaser.png" alt="drawing" width="100%"/>
+<img src="media/multimodal_sim.png" alt="drawing" width="100%"/>
+
+## 🛝 Try it out!
+Our self-contained Google Colab notebooks is the easiest way to play with Diffusion Policy. We provide separate notebooks for  [state-based environment](https://colab.research.google.com/drive/1gxdkgRVfM55zihY9TFLja97cSVZOZq2B?usp=sharing) and [vision-based environment](https://colab.research.google.com/drive/18GIHeOQ5DyjMN8iIRZL2EKZ0745NLIpg?usp=sharing).
+
+## 🧾 Checkout our experiment logs!
+For each experiment used to generate Table I,II and IV in the [paper](https://diffusion-policy.cs.columbia.edu/#paper), we provide:
+1. A `config.yaml` that contains all parameters needed to reproduce the experiment.
+2. Detailed training/eval `logs.json.txt` for every training step.
+3. Checkpoints for the best `epoch=*-test_mean_score=*.ckpt` and last `latest.ckpt` epoch of each run.
+
+Experiment logs are hosted on our website as nested directories in format:
+`https://diffusion-policy.cs.columbia.edu/data/experiments/<image|low_dim>/<task>/<method>/`
+
+Within each experiment directory you may find:
+```
+.
+├── config.yaml
+├── metrics
+│   └── logs.json.txt
+├── train_0
+│   ├── checkpoints
+│   │   ├── epoch=0300-test_mean_score=1.000.ckpt
+│   │   └── latest.ckpt
+│   └── logs.json.txt
+├── train_1
+│   ├── checkpoints
+│   │   ├── epoch=0250-test_mean_score=1.000.ckpt
+│   │   └── latest.ckpt
+│   └── logs.json.txt
+└── train_2
+    ├── checkpoints
+    │   ├── epoch=0250-test_mean_score=1.000.ckpt
+    │   └── latest.ckpt
+    └── logs.json.txt
+```
+The `metrics/logs.json.txt` file aggregates evaluation metrics from all 3 training runs every 50 epochs using `multirun_metrics.py`. The numbers reported in the paper correspond to `max` and `k_min_train_loss` aggregation keys.
+
+To download all files in a subdirectory, use:
+
+```console
+$ wget --recursive --no-parent --no-host-directories --relative --reject="index.html*" https://diffusion-policy.cs.columbia.edu/data/experiments/low_dim/square_ph/diffusion_policy_cnn/
+```
+
+## 🛠️ Installation
+### 🖥️ Simulation
+To reproduce our simulation benchmark results, install our conda environment on a Linux machine with Nvidia GPU. On Ubuntu 20.04 you need to install the following apt packages for mujoco:
+```console
+sudo apt install -y libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf
+```
+
+We recommend [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge) instead of the standard anaconda distribution for faster installation: 
+```console
+mamba env create -f conda_environment.yaml
+```
+
+but you can use conda as well: 
+```console
+conda env create -f conda_environment.yaml
+```
+
+The `conda_environment_macos.yaml` file is only for development on MacOS and does not have full support for benchmarks.
+
+### 🦾 Real Robot
+Hardware (for Push-T):
+* 1x [UR5-CB3](https://www.universal-robots.com/cb3) or [UR5e](https://www.universal-robots.com/products/ur5-robot/) ([RTDE Interface](https://www.universal-robots.com/articles/ur/interface-communication/real-time-data-exchange-rtde-guide/) is required)
+* 2x [RealSense D415](https://www.intelrealsense.com/depth-camera-d415/)
+* 1x [3Dconnexion SpaceMouse](https://3dconnexion.com/us/product/spacemouse-wireless/) (for teleop)
+* 1x [Millibar Robotics Manual Tool Changer](https://www.millibar.com/manual-tool-changer/) (only need robot side)
+* 1x 3D printed [End effector](https://cad.onshape.com/documents/a818888644a15afa6cc68ee5/w/2885b48b018cda84f425beca/e/3e8771c2124cee024edd2fed?renderMode=0&uiState=63ffcba6631ca919895e64e5)
+* 1x 3D printed [T-block](https://cad.onshape.com/documents/f1140134e38f6ed6902648d5/w/a78cf81827600e4ff4058d03/e/f35f57fb7589f72e05c76caf?renderMode=0&uiState=63ffcbc9af4a881b344898ee)
+* USB-C cables and screws for RealSense
+
+Software:
+* Ubuntu 20.04.3 (tested)
+* Mujoco dependencies: 
+`sudo apt install libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf`
+* [RealSense SDK](https://github.com/IntelRealSense/librealsense/blob/master/doc/distribution_linux.md)
+* Spacemouse dependencies: 
+`sudo apt apt install libspnav-dev spacenavd; sudo systemctl start spacenavd`
+* Conda environment `mamba env create -f conda_environment_real.yaml`
+
+## 🖥️ Reproducing Simulation Benchmark Results 
+### Download Training Data
+Under the repo root, create data subdirectory:
+```console
+[diffusion_policy]$ mkdir data && cd data
+```
+
+Download the corresponding zip file from [https://diffusion-policy.cs.columbia.edu/data/training/](https://diffusion-policy.cs.columbia.edu/data/training/)
+```console
+[data]$ wget https://diffusion-policy.cs.columbia.edu/data/training/pusht.zip
+```
+
+Extract training data:
+```console
+[data]$ unzip pusht.zip && rm -f pusht.zip && cd ..
+```
+
+Grab config file for the corresponding experiment:
+```console
+[diffusion_policy]$ wget -O image_pusht_diffusion_policy_cnn.yaml https://diffusion-policy.cs.columbia.edu/data/experiments/image/pusht/diffusion_policy_cnn/config.yaml
+```
+
+### Running for a single seed
+Activate conda environment and login to [wandb](https://wandb.ai) (if you haven't already).
+```console
+[diffusion_policy]$ conda activate robodiff
+(robodiff)[diffusion_policy]$ wandb login
+```
+
+Launch training with seed 42 on GPU 0.
+```console
+(robodiff)[diffusion_policy]$ python train.py --config-dir=. --config-name=image_pusht_diffusion_policy_cnn.yaml training.seed=42 training.device=cuda:0 hydra.run.dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}'
+```
+
+This will create a directory in format `data/outputs/yyyy.mm.dd/hh.mm.ss_<method_name>_<task_name>` where configs, logs and checkpoints are written to. The policy will be evaluated every 50 epochs with the success rate logged as `test/mean_score` on wandb, as well as videos for some rollouts.
+```console
+(robodiff)[diffusion_policy]$ tree data/outputs/2023.03.01/20.02.03_train_diffusion_unet_hybrid_pusht_image -I wandb
+data/outputs/2023.03.01/20.02.03_train_diffusion_unet_hybrid_pusht_image
+├── checkpoints
+│   ├── epoch=0000-test_mean_score=0.134.ckpt
+│   └── latest.ckpt
+├── .hydra
+│   ├── config.yaml
+│   ├── hydra.yaml
+│   └── overrides.yaml
+├── logs.json.txt
+├── media
+│   ├── 2k5u6wli.mp4
+│   ├── 2kvovxms.mp4
+│   ├── 2pxd9f6b.mp4
+│   ├── 2q5gjt5f.mp4
+│   ├── 2sawbf6m.mp4
+│   └── 538ubl79.mp4
+└── train.log
+
+3 directories, 13 files
+```
+
+### Running for multiple seeds
+Launch local ray cluster. For large scale experiments, you might want to setup an [AWS cluster with autoscaling](https://docs.ray.io/en/master/cluster/vms/user-guides/launching-clusters/aws.html). All other commands remain the same.
+```console
+(robodiff)[diffusion_policy]$ export CUDA_VISIBLE_DEVICES=0,1,2  # select GPUs to be managed by the ray cluster
+(robodiff)[diffusion_policy]$ ray start --head --num-gpus=3
+```
+
+Launch a ray client which will start 3 training workers (3 seeds) and 1 metrics monitor worker.
+```console
+(robodiff)[diffusion_policy]$ python ray_train_multirun.py --config-dir=. --config-name=image_pusht_diffusion_policy_cnn.yaml --seeds=42,43,44 --monitor_key=test/mean_score -- multi_run.run_dir='data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}' multi_run.wandb_name_base='${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}'
+```
+
+In addition to the wandb log written by each training worker individually, the metrics monitor worker will log to wandb project `diffusion_policy_metrics` for the metrics aggregated from all 3 training runs. Local config, logs and checkpoints will be written to `data/outputs/yyyy.mm.dd/hh.mm.ss_<method_name>_<task_name>` in a directory structure identical to our [training logs](https://diffusion-policy.cs.columbia.edu/data/experiments/):
+```console
+(robodiff)[diffusion_policy]$ tree data/outputs/2023.03.01/22.13.58_train_diffusion_unet_hybrid_pusht_image -I 'wandb|media'
+data/outputs/2023.03.01/22.13.58_train_diffusion_unet_hybrid_pusht_image
+├── config.yaml
+├── metrics
+│   ├── logs.json.txt
+│   ├── metrics.json
+│   └── metrics.log
+├── train_0
+│   ├── checkpoints
+│   │   ├── epoch=0000-test_mean_score=0.174.ckpt
+│   │   └── latest.ckpt
+│   ├── logs.json.txt
+│   └── train.log
+├── train_1
+│   ├── checkpoints
+│   │   ├── epoch=0000-test_mean_score=0.131.ckpt
+│   │   └── latest.ckpt
+│   ├── logs.json.txt
+│   └── train.log
+└── train_2
+    ├── checkpoints
+    │   ├── epoch=0000-test_mean_score=0.105.ckpt
+    │   └── latest.ckpt
+    ├── logs.json.txt
+    └── train.log
+
+7 directories, 16 files
+```
+
+## 🦾 Demo, Training and Eval on a Real Robot
+Make sure your UR5 robot is running and accepting command from its network interface (emergency stop button within reach at all time), your RealSense cameras plugged in to your workstation (tested with `realsense-viewer`) and your SpaceMouse connected with the `spacenavd` deamon running (verify with `systemctl status spacenavd`).
+
+Start the demonstration collection script. Press "C" to start recording. Use SpaceMouse to move the robot. Press "S" to stop recording. 
+```console
+(robodiff)[diffusion_policy]$ python demo_real_robot.py -o data/demo_pusht_real --robot_ip 192.168.0.204
+```
+
+This should result in a demonstration dataset in `data/demo_pusht_real` with in the same structure as our example [real Push-T training dataset](https://diffusion-policy.cs.columbia.edu/data/training/pusht_real.zip).
+
+To train a Diffusion Policy, launch training with config:
+```console
+(robodiff)[diffusion_policy]$ python train.py --config-name=train_diffusion_unet_real_image_workspace task.dataset_path=data/demo_pusht_real
+```
+Edit [`diffusion_policy/config/task/real_pusht_image.yaml`](./diffusion_policy/config/task/real_pusht_image.yaml) if your camera setup is different.
+
+Assuming the training has finished and you have a checkpoint at `data/outputs/blah/checkpoints/latest.ckpt`, launch the evaluation script with:
+```console
+python eval_real_robot.py -i data/outputs/blah/checkpoints/latest.ckpt -o data/eval_pusht_real --robot_ip 192.168.0.204
+```
+Press "C" to start evaluation (handing control over to the policy). Press "S" to stop the current episode.
+
+## 🗺️ Codebase Tutorial
+This codebase is structured under the requirement that:
+1. implementing `N` tasks and `M` methods will only require `O(N+M)` amount of code instead of `O(N*M)`
+2. while retaining maximum flexibility.
+
+To achieve this requirement, we 
+1. maintained a simple unified interface between tasks and methods and 
+2. made the implementation of the tasks and the methods independent of each other. 
+
+These design decisions come at the cost of code repetition between the tasks and the methods. However, we believe that the benefit of being able to add/modify task/methods without affecting the remainder and being able understand a task/method by reading the code linearly outweighs the cost of copying and pasting 😊.
+
+### The Split
+On the task side, we have:
+* `Dataset`: adapts a (third-party) dataset to the interface.
+* `EnvRunner`: executes a `Policy` that accepts the interface and produce logs and metrics.
+* `config/task/<task_name>.yaml`: contains all information needed to construct `Dataset` and `EnvRunner`.
+* (optional) `Env`: an `gym==0.21.0` compatible class that encapsulates the task environment.
+
+On the policy side, we have:
+* `Policy`: implements inference according to the interface and part of the training process.
+* `Workspace`: manages the life-cycle of training and evaluation (interleaved) of a method. 
+* `config/<workspace_name>.yaml`: contains all information needed to construct `Policy` and `Workspace`.
+
+### The Interface
+#### Low Dim
+A [`LowdimPolicy`](./diffusion_policy/policy/base_lowdim_policy.py) takes observation dictionary:
+- `"obs":` Tensor of shape `(B,To,Do)`
+
+and predicts action dictionary:
+- `"action": ` Tensor of shape `(B,Ta,Da)`
+
+A [`LowdimDataset`](./diffusion_policy/dataset/base_dataset.py) returns a sample of dictionary:
+- `"obs":` Tensor of shape `(To, Do)`
+- `"action":` Tensor of shape `(Ta, Da)`
+
+Its `get_normalizer` method returns a [`LinearNormalizer`](./diffusion_policy/model/common/normalizer.py) with keys `"obs","action"`.
+
+The `Policy` handles normalization on GPU with its copy of the `LinearNormalizer`. The parameters of the `LinearNormalizer` is saved as part of the `Policy`'s weights checkpoint.
+
+#### Image
+A [`ImagePolicy`](./diffusion_policy/policy/base_image_policy.py) takes observation dictionary:
+- `"key0":` Tensor of shape `(B,To,*)`
+- `"key1":` Tensor of shape e.g. `(B,To,H,W,3)` ([0,1] float32)
+
+and predicts action dictionary:
+- `"action": ` Tensor of shape `(B,Ta,Da)`
+
+A [`ImageDataset`](./diffusion_policy/dataset/base_dataset.py) returns a sample of dictionary:
+- `"obs":` Dict of
+    - `"key0":` Tensor of shape `(To, *)`
+    - `"key1":` Tensor fo shape `(To,H,W,3)`
+- `"action":` Tensor of shape `(Ta, Da)`
+
+Its `get_normalizer` method returns a [`LinearNormalizer`](./diffusion_policy/model/common/normalizer.py) with keys `"key0","key1","action"`.
+
+#### Example
+```
+To = 3
+Ta = 4
+T = 6
+|o|o|o|
+| | |a|a|a|a|
+|o|o|
+| |a|a|a|a|a|
+| | | | |a|a|
+```
+Terminology in the paper: `varname` in the codebase
+- Observation Horizon: `To|n_obs_steps`
+- Action Horizon: `Ta|n_action_steps`
+- Prediction Horizon: `T|horizon`
+
+The classical (e.g. MDP) single step observation/action formulation is included as a special case where `To=1` and `Ta=1`.
+
+## 🔩 Key Components
+### `Workspace`
+A `Workspace` object encapsulates all states and code needed to run an experiment. 
+* Inherits from [`BaseWorkspace`](./diffusion_policy/workspace/base_workspace.py).
+* A single `OmegaConf` config object generated by `hydra` should contain all information needed to construct the Workspace object and running experiments. This config correspond to `config/<workspace_name>.yaml` + hydra overrides.
+* The `run` method contains the entire pipeline for the experiment.
+* Checkpoints happen at the `Workspace` level. All training states implemented as object attributes are automatically saved by the `save_checkpoint` method.
+* All other states for the experiment should be implemented as local variables in the `run` method.
+
+The entrypoint for training is `train.py` which uses `@hydra.main` decorator. Read [hydra](https://hydra.cc/)'s official documentation for command line arguments and config overrides. For example, the argument `task=<task_name>` will replace the `task` subtree of the config with the content of `config/task/<task_name>.yaml`, thereby selecting the task to run for this experiment.
+
+### `Dataset`
+A `Dataset` object:
+* Inherits from `torch.utils.data.Dataset`.
+* Returns a sample conforming to [the interface](#the-interface) depending on whether the task has Low Dim or Image observations.
+* Has a method `get_normalizer` that returns a `LinearNormalizer` conforming to [the interface](#the-interface).
+
+Normalization is a very common source of bugs during project development. It is sometimes helpful to print out the specific `scale` and `bias` vectors used for each key in the `LinearNormalizer`.
+
+Most of our implementations of `Dataset` uses a combination of [`ReplayBuffer`](#replaybuffer) and [`SequenceSampler`](./diffusion_policy/common/sampler.py) to generate samples. Correctly handling padding at the beginning and the end of each demonstration episode according to `To` and `Ta` is important for good performance. Please read our [`SequenceSampler`](./diffusion_policy/common/sampler.py) before implementing your own sampling method.
+
+### `Policy`
+A `Policy` object:
+* Inherits from `BaseLowdimPolicy` or `BaseImagePolicy`.
+* Has a method `predict_action` that given observation dict, predicts actions conforming to [the interface](#the-interface).
+* Has a method `set_normalizer` that takes in a `LinearNormalizer` and handles observation/action normalization internally in the policy.
+* (optional) Might has a method `compute_loss` that takes in a batch and returns the loss to be optimized.
+* (optional) Usually each `Policy` class correspond to a `Workspace` class due to the differences of training and evaluation process between methods.
+
+### `EnvRunner`
+A `EnvRunner` object abstracts away the subtle differences between different task environments.
+* Has a method `run` that takes a `Policy` object for evaluation, and returns a dict of logs and metrics. Each value should be compatible with `wandb.log`. 
+
+To maximize evaluation speed, we usually vectorize environments using our modification of [`gym.vector.AsyncVectorEnv`](./diffusion_policy/gym_util/async_vector_env.py) which runs each individual environment in a separate process (workaround python GIL). 
+
+⚠️ Since subprocesses are launched using `fork` on linux, you need to be specially careful for environments that creates its OpenGL context during initialization (e.g. robosuite) which, once inherited by the child process memory space, often causes obscure bugs like segmentation fault. As a workaround, you can provide a `dummy_env_fn` that constructs an environment without initializing OpenGL.
+
+### `ReplayBuffer`
+The [`ReplayBuffer`](./diffusion_policy/common/replay_buffer.py) is a key data structure for storing a demonstration dataset both in-memory and on-disk with chunking and compression. It makes heavy use of the [`zarr`](https://zarr.readthedocs.io/en/stable/index.html) format but also has a `numpy` backend for lower access overhead.
+
+On disk, it can be stored as a nested directory (e.g. `data/pusht_cchi_v7_replay.zarr`) or a zip file (e.g. `data/robomimic/datasets/square/mh/image_abs.hdf5.zarr.zip`).
+
+Due to the relative small size of our datasets, it's often possible to store the entire image-based dataset in RAM with [`Jpeg2000` compression](./diffusion_policy/codecs/imagecodecs_numcodecs.py) which eliminates disk IO during training at the expense increasing of CPU workload.
+
+Example:
+```
+data/pusht_cchi_v7_replay.zarr
+ ├── data
+ │   ├── action (25650, 2) float32
+ │   ├── img (25650, 96, 96, 3) float32
+ │   ├── keypoint (25650, 9, 2) float32
+ │   ├── n_contacts (25650, 1) float32
+ │   └── state (25650, 5) float32
+ └── meta
+     └── episode_ends (206,) int64
+```
+
+Each array in `data` stores one data field from all episodes concatenated along the first dimension (time). The `meta/episode_ends` array stores the end index for each episode along the fist dimension.
+
+## 🩹 Adding a Task
+Read and imitate:
+* `diffusion_policy/dataset/pusht_image_dataset.py`
+* `diffusion_policy/env_runner/pusht_image_runner.py`
+* `diffusion_policy/config/task/pusht_image.yaml`
+
+Make sure that `shape_meta` correspond to input and output shapes for your task. Make sure `env_runner._target_` and `dataset._target_` point to the new classes you have added. When training, add `task=<your_task_name>` to `train.py`'s arguments.
+
+## 🩹 Adding a Method
+Read and imitate:
+* `diffusion_policy/workspace/train_diffusion_unet_image_workspace.py`
+* `diffusion_policy/policy/diffusion_unet_image_policy.py`
+* `diffusion_policy/config/train_diffusion_unet_image_workspace.yaml`
+
+Make sure your workspace yaml's `_target_` points to the new workspace class you created.
diff --git a/conda_environment.yaml b/conda_environment.yaml
new file mode 100644
index 0000000..97cbe9e
--- /dev/null
+++ b/conda_environment.yaml
@@ -0,0 +1,63 @@
+name: robodiff
+channels:
+  - pytorch
+  - pytorch3d
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.9
+  - pip=22.2.2
+  - cudatoolkit=11.6
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - pytorch3d=0.7.0
+  - numpy=1.23.3
+  - numba==0.56.4
+  - scipy==1.9.1
+  - py-opencv=4.6.0
+  - cffi=1.15.1
+  - ipykernel=6.16
+  - matplotlib=3.6.1
+  - zarr=2.12.0
+  - numcodecs=0.10.2
+  - h5py=3.7.0
+  - hydra-core=1.2.0
+  - einops=0.4.1
+  - tqdm=4.64.1
+  - dill=0.3.5.1
+  - scikit-video=1.1.11
+  - scikit-image=0.19.3
+  - gym=0.21.0
+  - pymunk=6.2.1
+  - wandb=0.13.3
+  - threadpoolctl=3.1.0
+  - shapely=1.8.4
+  - cython=0.29.32
+  - imageio=2.22.0
+  - imageio-ffmpeg=0.4.7
+  - termcolor=2.0.1
+  - tensorboard=2.10.1
+  - tensorboardx=2.5.1
+  - psutil=5.9.2
+  - click=8.0.4
+  - boto3=1.24.96
+  - accelerate=0.13.2
+  - datasets=2.6.1
+  - diffusers=0.11.1
+  - av=10.0.0
+  - cmake=3.24.3
+  # trick to force reinstall imagecodecs via pip
+  - imagecodecs==2022.8.8
+  - pip:
+    - ray[default,tune]==2.2.0
+    # requires mujoco py dependencies libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf
+    - free-mujoco-py==2.1.6
+    - pygame==2.1.2
+    - pybullet-svl==3.1.6.4
+    - robosuite @ https://github.com/cheng-chi/robosuite/archive/277ab9588ad7a4f4b55cf75508b44aa67ec171f0.tar.gz
+    - robomimic==0.2.0
+    - pytorchvideo==0.1.5
+    # pip package required for jpeg-xl
+    - imagecodecs==2022.9.26
+    - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz
+    - dm-control==1.0.9
diff --git a/conda_environment_macos.yaml b/conda_environment_macos.yaml
new file mode 100644
index 0000000..0d26767
--- /dev/null
+++ b/conda_environment_macos.yaml
@@ -0,0 +1,55 @@
+name: robodiff
+channels:
+  - pytorch
+  - conda-forge
+dependencies:
+  - python=3.9
+  - pip=22.2.2
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - numpy=1.23.3
+  - numba==0.56.4
+  - scipy==1.9.1
+  - py-opencv=4.6.0
+  - cffi=1.15.1
+  - ipykernel=6.16
+  - matplotlib=3.6.1
+  - zarr=2.12.0
+  - numcodecs=0.10.2
+  - h5py=3.7.0
+  - hydra-core=1.2.0
+  - einops=0.4.1
+  - tqdm=4.64.1
+  - dill=0.3.5.1
+  - scikit-video=1.1.11
+  - scikit-image=0.19.3
+  - gym=0.21.0
+  - pymunk=6.2.1
+  - wandb=0.13.3
+  - threadpoolctl=3.1.0
+  - shapely=1.8.4
+  - cython=0.29.32
+  - imageio=2.22.0
+  - imageio-ffmpeg=0.4.7
+  - termcolor=2.0.1
+  - tensorboard=2.10.1
+  - tensorboardx=2.5.1
+  - psutil=5.9.2
+  - click=8.0.4
+  - boto3=1.24.96
+  - accelerate=0.13.2
+  - datasets=2.6.1
+  - diffusers=0.11.1
+  - av=10.0.0
+  - cmake=3.24.3
+  # trick to force reinstall imagecodecs via pip
+  - imagecodecs==2022.8.8
+  - pip:
+    - ray[default,tune]==2.2.0
+    - pygame==2.1.2
+    - robomimic==0.2.0
+    - pytorchvideo==0.1.5
+    - atomics==1.0.2
+    # No support for jpeg-xl for MacOS
+    - imagecodecs==2022.9.26
+    - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz
diff --git a/conda_environment_real.yaml b/conda_environment_real.yaml
new file mode 100644
index 0000000..1f84599
--- /dev/null
+++ b/conda_environment_real.yaml
@@ -0,0 +1,71 @@
+name: robodiff
+channels:
+  - pytorch
+  - pytorch3d
+  - nvidia
+  - conda-forge
+dependencies:
+  - python=3.9
+  - pip=22.2.2
+  - cudatoolkit=11.6
+  - pytorch=1.12.1
+  - torchvision=0.13.1
+  - pytorch3d=0.7.0
+  - numpy=1.23.3
+  - numba==0.56.4
+  - scipy==1.9.1
+  - py-opencv=4.6.0
+  - cffi=1.15.1
+  - ipykernel=6.16
+  - matplotlib=3.6.1
+  - zarr=2.12.0
+  - numcodecs=0.10.2
+  - h5py=3.7.0
+  - hydra-core=1.2.0
+  - einops=0.4.1
+  - tqdm=4.64.1
+  - dill=0.3.5.1
+  - scikit-video=1.1.11
+  - scikit-image=0.19.3
+  - gym=0.21.0
+  - pymunk=6.2.1
+  - wandb=0.13.3
+  - threadpoolctl=3.1.0
+  - shapely=1.8.4
+  - cython=0.29.32
+  - imageio=2.22.0
+  - imageio-ffmpeg=0.4.7
+  - termcolor=2.0.1
+  - tensorboard=2.10.1
+  - tensorboardx=2.5.1
+  - psutil=5.9.2
+  - click=8.0.4
+  - boto3=1.24.96
+  - accelerate=0.13.2
+  - datasets=2.6.1
+  - diffusers=0.11.1
+  - av=10.0.0
+  - cmake=3.24.3
+  # trick to force reinstall imagecodecs via pip
+  - imagecodecs==2022.8.8
+  - pip:
+    - ray[default,tune]==2.2.0
+    # requires mujoco py dependencies libosmesa6-dev libgl1-mesa-glx libglfw3 patchelf
+    - free-mujoco-py==2.1.6
+    - pygame==2.1.2
+    - pybullet-svl==3.1.6.4
+    - robosuite @ https://github.com/cheng-chi/robosuite/archive/277ab9588ad7a4f4b55cf75508b44aa67ec171f0.tar.gz
+    - robomimic==0.2.0
+    - pytorchvideo==0.1.5
+    # requires librealsense https://github.com/IntelRealSense/librealsense/blob/master/doc/distribution_linux.md
+    - pyrealsense2==2.51.1.4348
+    # requires apt install libspnav-dev spacenavd; systemctl start spacenavd
+    - spnav @ https://github.com/cheng-chi/spnav/archive/c1c938ebe3cc542db4685e0d13850ff1abfdb943.tar.gz
+    - ur-rtde==1.5.5
+    - atomics==1.0.2
+    # pip package required for jpeg-xl
+    - imagecodecs==2022.9.26
+    - r3m @ https://github.com/facebookresearch/r3m/archive/b2334e726887fa0206962d7984c69c5fb09cceab.tar.gz
+    - dm-control==1.0.9
+    - pynput==1.7.6
+  
\ No newline at end of file
diff --git a/demo_real_robot.py b/demo_real_robot.py
new file mode 100644
index 0000000..846bade
--- /dev/null
+++ b/demo_real_robot.py
@@ -0,0 +1,160 @@
+"""
+Usage:
+(robodiff)$ python demo_real_robot.py -o <demo_save_dir> --robot_ip <ip_of_ur5>
+
+Robot movement:
+Move your SpaceMouse to move the robot EEF (locked in xy plane).
+Press SpaceMouse right button to unlock z axis.
+Press SpaceMouse left button to enable rotation axes.
+
+Recording control:
+Click the opencv window (make sure it's in focus).
+Press "C" to start recording.
+Press "S" to stop recording.
+Press "Q" to exit program.
+Press "Backspace" to delete the previously recorded episode.
+"""
+
+# %%
+import time
+from multiprocessing.managers import SharedMemoryManager
+import click
+import cv2
+import numpy as np
+import scipy.spatial.transform as st
+from diffusion_policy.real_world.real_env import RealEnv
+from diffusion_policy.real_world.spacemouse_shared_memory import Spacemouse
+from diffusion_policy.common.precise_sleep import precise_wait
+from diffusion_policy.real_world.keystroke_counter import (
+    KeystrokeCounter, Key, KeyCode
+)
+
+@click.command()
+@click.option('--output', '-o', required=True, help="Directory to save demonstration dataset.")
+@click.option('--robot_ip', '-ri', required=True, help="UR5's IP address e.g. 192.168.0.204")
+@click.option('--vis_camera_idx', default=0, type=int, help="Which RealSense camera to visualize.")
+@click.option('--init_joints', '-j', is_flag=True, default=False, help="Whether to initialize robot joint configuration in the beginning.")
+@click.option('--frequency', '-f', default=10, type=float, help="Control frequency in Hz.")
+@click.option('--command_latency', '-cl', default=0.01, type=float, help="Latency between receiving SapceMouse command to executing on Robot in Sec.")
+def main(output, robot_ip, vis_camera_idx, init_joints, frequency, command_latency):
+    dt = 1/frequency
+    with SharedMemoryManager() as shm_manager:
+        with KeystrokeCounter() as key_counter, \
+            Spacemouse(shm_manager=shm_manager) as sm, \
+            RealEnv(
+                output_dir=output, 
+                robot_ip=robot_ip, 
+                # recording resolution
+                obs_image_resolution=(1280,720),
+                frequency=frequency,
+                init_joints=init_joints,
+                enable_multi_cam_vis=True,
+                record_raw_video=True,
+                # number of threads per camera view for video recording (H.264)
+                thread_per_video=3,
+                # video recording quality, lower is better (but slower).
+                video_crf=21,
+                shm_manager=shm_manager
+            ) as env:
+            cv2.setNumThreads(1)
+
+            # realsense exposure
+            env.realsense.set_exposure(exposure=120, gain=0)
+            # realsense white balance
+            env.realsense.set_white_balance(white_balance=5900)
+
+            time.sleep(1.0)
+            print('Ready!')
+            state = env.get_robot_state()
+            target_pose = state['TargetTCPPose']
+            t_start = time.monotonic()
+            iter_idx = 0
+            stop = False
+            is_recording = False
+            while not stop:
+                # calculate timing
+                t_cycle_end = t_start + (iter_idx + 1) * dt
+                t_sample = t_cycle_end - command_latency
+                t_command_target = t_cycle_end + dt
+
+                # pump obs
+                obs = env.get_obs()
+
+                # handle key presses
+                press_events = key_counter.get_press_events()
+                for key_stroke in press_events:
+                    if key_stroke == KeyCode(char='q'):
+                        # Exit program
+                        stop = True
+                    elif key_stroke == KeyCode(char='c'):
+                        # Start recording
+                        env.start_episode(t_start + (iter_idx + 2) * dt - time.monotonic() + time.time())
+                        key_counter.clear()
+                        is_recording = True
+                        print('Recording!')
+                    elif key_stroke == KeyCode(char='s'):
+                        # Stop recording
+                        env.end_episode()
+                        key_counter.clear()
+                        is_recording = False
+                        print('Stopped.')
+                    elif key_stroke == Key.backspace:
+                        # Delete the most recent recorded episode
+                        if click.confirm('Are you sure to drop an episode?'):
+                            env.drop_episode()
+                            key_counter.clear()
+                            is_recording = False
+                        # delete
+                stage = key_counter[Key.space]
+
+                # visualize
+                vis_img = obs[f'camera_{vis_camera_idx}'][-1,:,:,::-1].copy()
+                episode_id = env.replay_buffer.n_episodes
+                text = f'Episode: {episode_id}, Stage: {stage}'
+                if is_recording:
+                    text += ', Recording!'
+                cv2.putText(
+                    vis_img,
+                    text,
+                    (10,30),
+                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                    fontScale=1,
+                    thickness=2,
+                    color=(255,255,255)
+                )
+
+                cv2.imshow('default', vis_img)
+                cv2.pollKey()
+
+                precise_wait(t_sample)
+                # get teleop command
+                sm_state = sm.get_motion_state_transformed()
+                # print(sm_state)
+                dpos = sm_state[:3] * (env.max_pos_speed / frequency)
+                drot_xyz = sm_state[3:] * (env.max_rot_speed / frequency)
+                
+                if not sm.is_button_pressed(0):
+                    # translation mode
+                    drot_xyz[:] = 0
+                else:
+                    dpos[:] = 0
+                if not sm.is_button_pressed(1):
+                    # 2D translation mode
+                    dpos[2] = 0    
+
+                drot = st.Rotation.from_euler('xyz', drot_xyz)
+                target_pose[:3] += dpos
+                target_pose[3:] = (drot * st.Rotation.from_rotvec(
+                    target_pose[3:])).as_rotvec()
+
+                # execute teleop command
+                env.exec_actions(
+                    actions=[target_pose], 
+                    timestamps=[t_command_target-time.monotonic()+time.time()],
+                    stages=[stage])
+                precise_wait(t_cycle_end)
+                iter_idx += 1
+
+# %%
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/codecs/imagecodecs_numcodecs.py b/diffusion_policy/codecs/imagecodecs_numcodecs.py
new file mode 100644
index 0000000..cf55b97
--- /dev/null
+++ b/diffusion_policy/codecs/imagecodecs_numcodecs.py
@@ -0,0 +1,1386 @@
+
+# imagecodecs/numcodecs.py
+
+# Copyright (c) 2021-2022, Christoph Gohlke
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+#    this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+"""Additional numcodecs implemented using imagecodecs."""
+
+__version__ = '2022.9.26'
+
+__all__ = ('register_codecs',)
+
+import numpy
+from numcodecs.abc import Codec
+from numcodecs.registry import register_codec, get_codec
+
+import imagecodecs
+
+
+def protective_squeeze(x: numpy.ndarray):
+    """
+    Squeeze dim only if it's not the last dim.
+    Image dim expected to be *, H, W, C
+    """
+    img_shape = x.shape[-3:]
+    if len(x.shape) > 3:
+        n_imgs = numpy.prod(x.shape[:-3])
+        if n_imgs > 1:
+            img_shape = (-1,) + img_shape
+    return x.reshape(img_shape)
+
+def get_default_image_compressor(**kwargs):
+    if imagecodecs.JPEGXL:
+        # has JPEGXL
+        this_kwargs = {
+            'effort': 3,
+            'distance': 0.3,
+            # bug in libjxl, invalid codestream for non-lossless
+            # when decoding speed > 1
+            'decodingspeed': 1
+        }
+        this_kwargs.update(kwargs)
+        return JpegXl(**this_kwargs)
+    else:
+        this_kwargs = {
+            'level': 50
+        }
+        this_kwargs.update(kwargs)
+        return Jpeg2k(**this_kwargs)
+
+class Aec(Codec):
+    """AEC codec for numcodecs."""
+
+    codec_id = 'imagecodecs_aec'
+
+    def __init__(
+        self, bitspersample=None, flags=None, blocksize=None, rsi=None
+    ):
+        self.bitspersample = bitspersample
+        self.flags = flags
+        self.blocksize = blocksize
+        self.rsi = rsi
+
+    def encode(self, buf):
+        return imagecodecs.aec_encode(
+            buf,
+            bitspersample=self.bitspersample,
+            flags=self.flags,
+            blocksize=self.blocksize,
+            rsi=self.rsi,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.aec_decode(
+            buf,
+            bitspersample=self.bitspersample,
+            flags=self.flags,
+            blocksize=self.blocksize,
+            rsi=self.rsi,
+            out=_flat(out),
+        )
+
+
+class Apng(Codec):
+    """APNG codec for numcodecs."""
+
+    codec_id = 'imagecodecs_apng'
+
+    def __init__(self, level=None, photometric=None, delay=None):
+        self.level = level
+        self.photometric = photometric
+        self.delay = delay
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.apng_encode(
+            buf,
+            level=self.level,
+            photometric=self.photometric,
+            delay=self.delay,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.apng_decode(buf, out=out)
+
+
+class Avif(Codec):
+    """AVIF codec for numcodecs."""
+
+    codec_id = 'imagecodecs_avif'
+
+    def __init__(
+        self,
+        level=None,
+        speed=None,
+        tilelog2=None,
+        bitspersample=None,
+        pixelformat=None,
+        numthreads=None,
+        index=None,
+    ):
+        self.level = level
+        self.speed = speed
+        self.tilelog2 = tilelog2
+        self.bitspersample = bitspersample
+        self.pixelformat = pixelformat
+        self.numthreads = numthreads
+        self.index = index
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.avif_encode(
+            buf,
+            level=self.level,
+            speed=self.speed,
+            tilelog2=self.tilelog2,
+            bitspersample=self.bitspersample,
+            pixelformat=self.pixelformat,
+            numthreads=self.numthreads,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.avif_decode(
+            buf, index=self.index, numthreads=self.numthreads, out=out
+        )
+
+
+class Bitorder(Codec):
+    """Bitorder codec for numcodecs."""
+
+    codec_id = 'imagecodecs_bitorder'
+
+    def encode(self, buf):
+        return imagecodecs.bitorder_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.bitorder_decode(buf, out=_flat(out))
+
+
+class Bitshuffle(Codec):
+    """Bitshuffle codec for numcodecs."""
+
+    codec_id = 'imagecodecs_bitshuffle'
+
+    def __init__(self, itemsize=1, blocksize=0):
+        self.itemsize = itemsize
+        self.blocksize = blocksize
+
+    def encode(self, buf):
+        return imagecodecs.bitshuffle_encode(
+            buf, itemsize=self.itemsize, blocksize=self.blocksize
+        ).tobytes()
+
+    def decode(self, buf, out=None):
+        return imagecodecs.bitshuffle_decode(
+            buf,
+            itemsize=self.itemsize,
+            blocksize=self.blocksize,
+            out=_flat(out),
+        )
+
+
+class Blosc(Codec):
+    """Blosc codec for numcodecs."""
+
+    codec_id = 'imagecodecs_blosc'
+
+    def __init__(
+        self,
+        level=None,
+        compressor=None,
+        typesize=None,
+        blocksize=None,
+        shuffle=None,
+        numthreads=None,
+    ):
+        self.level = level
+        self.compressor = compressor
+        self.typesize = typesize
+        self.blocksize = blocksize
+        self.shuffle = shuffle
+        self.numthreads = numthreads
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.blosc_encode(
+            buf,
+            level=self.level,
+            compressor=self.compressor,
+            typesize=self.typesize,
+            blocksize=self.blocksize,
+            shuffle=self.shuffle,
+            numthreads=self.numthreads,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.blosc_decode(
+            buf, numthreads=self.numthreads, out=_flat(out)
+        )
+
+
+class Blosc2(Codec):
+    """Blosc2 codec for numcodecs."""
+
+    codec_id = 'imagecodecs_blosc2'
+
+    def __init__(
+        self,
+        level=None,
+        compressor=None,
+        typesize=None,
+        blocksize=None,
+        shuffle=None,
+        numthreads=None,
+    ):
+        self.level = level
+        self.compressor = compressor
+        self.typesize = typesize
+        self.blocksize = blocksize
+        self.shuffle = shuffle
+        self.numthreads = numthreads
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.blosc2_encode(
+            buf,
+            level=self.level,
+            compressor=self.compressor,
+            typesize=self.typesize,
+            blocksize=self.blocksize,
+            shuffle=self.shuffle,
+            numthreads=self.numthreads,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.blosc2_decode(
+            buf, numthreads=self.numthreads, out=_flat(out)
+        )
+
+
+class Brotli(Codec):
+    """Brotli codec for numcodecs."""
+
+    codec_id = 'imagecodecs_brotli'
+
+    def __init__(self, level=None, mode=None, lgwin=None):
+        self.level = level
+        self.mode = mode
+        self.lgwin = lgwin
+
+    def encode(self, buf):
+        return imagecodecs.brotli_encode(
+            buf, level=self.level, mode=self.mode, lgwin=self.lgwin
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.brotli_decode(buf, out=_flat(out))
+
+
+class ByteShuffle(Codec):
+    """ByteShuffle codec for numcodecs."""
+
+    codec_id = 'imagecodecs_byteshuffle'
+
+    def __init__(
+        self, shape, dtype, axis=-1, dist=1, delta=False, reorder=False
+    ):
+        self.shape = tuple(shape)
+        self.dtype = numpy.dtype(dtype).str
+        self.axis = axis
+        self.dist = dist
+        self.delta = bool(delta)
+        self.reorder = bool(reorder)
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        assert buf.shape == self.shape
+        assert buf.dtype == self.dtype
+        return imagecodecs.byteshuffle_encode(
+            buf,
+            axis=self.axis,
+            dist=self.dist,
+            delta=self.delta,
+            reorder=self.reorder,
+        ).tobytes()
+
+    def decode(self, buf, out=None):
+        if not isinstance(buf, numpy.ndarray):
+            buf = numpy.frombuffer(buf, dtype=self.dtype).reshape(*self.shape)
+        return imagecodecs.byteshuffle_decode(
+            buf,
+            axis=self.axis,
+            dist=self.dist,
+            delta=self.delta,
+            reorder=self.reorder,
+            out=out,
+        )
+
+
+class Bz2(Codec):
+    """Bz2 codec for numcodecs."""
+
+    codec_id = 'imagecodecs_bz2'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        return imagecodecs.bz2_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.bz2_decode(buf, out=_flat(out))
+
+
+class Cms(Codec):
+    """CMS codec for numcodecs."""
+
+    codec_id = 'imagecodecs_cms'
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    def encode(self, buf, out=None):
+        # return imagecodecs.cms_transform(buf)
+        raise NotImplementedError
+
+    def decode(self, buf, out=None):
+        # return imagecodecs.cms_transform(buf)
+        raise NotImplementedError
+
+
+class Deflate(Codec):
+    """Deflate codec for numcodecs."""
+
+    codec_id = 'imagecodecs_deflate'
+
+    def __init__(self, level=None, raw=False):
+        self.level = level
+        self.raw = bool(raw)
+
+    def encode(self, buf):
+        return imagecodecs.deflate_encode(buf, level=self.level, raw=self.raw)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.deflate_decode(buf, out=_flat(out), raw=self.raw)
+
+
+class Delta(Codec):
+    """Delta codec for numcodecs."""
+
+    codec_id = 'imagecodecs_delta'
+
+    def __init__(self, shape=None, dtype=None, axis=-1, dist=1):
+        self.shape = None if shape is None else tuple(shape)
+        self.dtype = None if dtype is None else numpy.dtype(dtype).str
+        self.axis = axis
+        self.dist = dist
+
+    def encode(self, buf):
+        if self.shape is not None or self.dtype is not None:
+            buf = protective_squeeze(numpy.asarray(buf))
+            assert buf.shape == self.shape
+            assert buf.dtype == self.dtype
+        return imagecodecs.delta_encode(
+            buf, axis=self.axis, dist=self.dist
+        ).tobytes()
+
+    def decode(self, buf, out=None):
+        if self.shape is not None or self.dtype is not None:
+            buf = numpy.frombuffer(buf, dtype=self.dtype).reshape(*self.shape)
+        return imagecodecs.delta_decode(
+            buf, axis=self.axis, dist=self.dist, out=out
+        )
+
+
+class Float24(Codec):
+    """Float24 codec for numcodecs."""
+
+    codec_id = 'imagecodecs_float24'
+
+    def __init__(self, byteorder=None, rounding=None):
+        self.byteorder = byteorder
+        self.rounding = rounding
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.float24_encode(
+            buf, byteorder=self.byteorder, rounding=self.rounding
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.float24_decode(
+            buf, byteorder=self.byteorder, out=out
+        )
+
+
+class FloatPred(Codec):
+    """Floating Point Predictor codec for numcodecs."""
+
+    codec_id = 'imagecodecs_floatpred'
+
+    def __init__(self, shape, dtype, axis=-1, dist=1):
+        self.shape = tuple(shape)
+        self.dtype = numpy.dtype(dtype).str
+        self.axis = axis
+        self.dist = dist
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        assert buf.shape == self.shape
+        assert buf.dtype == self.dtype
+        return imagecodecs.floatpred_encode(
+            buf, axis=self.axis, dist=self.dist
+        ).tobytes()
+
+    def decode(self, buf, out=None):
+        if not isinstance(buf, numpy.ndarray):
+            buf = numpy.frombuffer(buf, dtype=self.dtype).reshape(*self.shape)
+        return imagecodecs.floatpred_decode(
+            buf, axis=self.axis, dist=self.dist, out=out
+        )
+
+
+class Gif(Codec):
+    """GIF codec for numcodecs."""
+
+    codec_id = 'imagecodecs_gif'
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.gif_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.gif_decode(buf, asrgb=False, out=out)
+
+
+class Heif(Codec):
+    """HEIF codec for numcodecs."""
+
+    codec_id = 'imagecodecs_heif'
+
+    def __init__(
+        self,
+        level=None,
+        bitspersample=None,
+        photometric=None,
+        compression=None,
+        numthreads=None,
+        index=None,
+    ):
+        self.level = level
+        self.bitspersample = bitspersample
+        self.photometric = photometric
+        self.compression = compression
+        self.numthreads = numthreads
+        self.index = index
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.heif_encode(
+            buf,
+            level=self.level,
+            bitspersample=self.bitspersample,
+            photometric=self.photometric,
+            compression=self.compression,
+            numthreads=self.numthreads,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.heif_decode(
+            buf,
+            index=self.index,
+            photometric=self.photometric,
+            numthreads=self.numthreads,
+            out=out,
+        )
+
+
+class Jetraw(Codec):
+    """Jetraw codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jetraw'
+
+    def __init__(
+        self,
+        shape,
+        identifier,
+        parameters=None,
+        verbosity=None,
+        errorbound=None,
+    ):
+        self.shape = shape
+        self.identifier = identifier
+        self.errorbound = errorbound
+        imagecodecs.jetraw_init(parameters, verbosity)
+
+    def encode(self, buf):
+        return imagecodecs.jetraw_encode(
+            buf, identifier=self.identifier, errorbound=self.errorbound
+        )
+
+    def decode(self, buf, out=None):
+        if out is None:
+            out = numpy.empty(self.shape, numpy.uint16)
+        return imagecodecs.jetraw_decode(buf, out=out)
+
+
+class Jpeg(Codec):
+    """JPEG codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jpeg'
+
+    def __init__(
+        self,
+        bitspersample=None,
+        tables=None,
+        header=None,
+        colorspace_data=None,
+        colorspace_jpeg=None,
+        level=None,
+        subsampling=None,
+        optimize=None,
+        smoothing=None,
+    ):
+        self.tables = tables
+        self.header = header
+        self.bitspersample = bitspersample
+        self.colorspace_data = colorspace_data
+        self.colorspace_jpeg = colorspace_jpeg
+        self.level = level
+        self.subsampling = subsampling
+        self.optimize = optimize
+        self.smoothing = smoothing
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.jpeg_encode(
+            buf,
+            level=self.level,
+            colorspace=self.colorspace_data,
+            outcolorspace=self.colorspace_jpeg,
+            subsampling=self.subsampling,
+            optimize=self.optimize,
+            smoothing=self.smoothing,
+        )
+
+    def decode(self, buf, out=None):
+        out_shape = None
+        if out is not None:
+            out_shape = out.shape
+            out = protective_squeeze(out)
+        img = imagecodecs.jpeg_decode(
+            buf,
+            bitspersample=self.bitspersample,
+            tables=self.tables,
+            header=self.header,
+            colorspace=self.colorspace_jpeg,
+            outcolorspace=self.colorspace_data,
+            out=out,
+        )
+        if out_shape is not None:
+            img = img.reshape(out_shape)
+        return img
+
+    def get_config(self):
+        """Return dictionary holding configuration parameters."""
+        config = dict(id=self.codec_id)
+        for key in self.__dict__:
+            if not key.startswith('_'):
+                value = getattr(self, key)
+                if value is not None and key in ('header', 'tables'):
+                    import base64
+
+                    value = base64.b64encode(value).decode()
+                config[key] = value
+        return config
+
+    @classmethod
+    def from_config(cls, config):
+        """Instantiate codec from configuration object."""
+        for key in ('header', 'tables'):
+            value = config.get(key, None)
+            if value is not None and isinstance(value, str):
+                import base64
+
+                config[key] = base64.b64decode(value.encode())
+        return cls(**config)
+
+
+class Jpeg2k(Codec):
+    """JPEG 2000 codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jpeg2k'
+
+    def __init__(
+        self,
+        level=None,
+        codecformat=None,
+        colorspace=None,
+        tile=None,
+        reversible=None,
+        bitspersample=None,
+        resolutions=None,
+        numthreads=None,
+        verbose=0,
+    ):
+        self.level = level
+        self.codecformat = codecformat
+        self.colorspace = colorspace
+        self.tile = None if tile is None else tuple(tile)
+        self.reversible = reversible
+        self.bitspersample = bitspersample
+        self.resolutions = resolutions
+        self.numthreads = numthreads
+        self.verbose = verbose
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.jpeg2k_encode(
+            buf,
+            level=self.level,
+            codecformat=self.codecformat,
+            colorspace=self.colorspace,
+            tile=self.tile,
+            reversible=self.reversible,
+            bitspersample=self.bitspersample,
+            resolutions=self.resolutions,
+            numthreads=self.numthreads,
+            verbose=self.verbose,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.jpeg2k_decode(
+            buf, verbose=self.verbose, numthreads=self.numthreads, out=out
+        )
+
+
+class JpegLs(Codec):
+    """JPEG LS codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jpegls'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.jpegls_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.jpegls_decode(buf, out=out)
+
+
+class JpegXl(Codec):
+    """JPEG XL codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jpegxl'
+
+    def __init__(
+        self,
+        # encode
+        level=None,
+        effort=None,
+        distance=None,
+        lossless=None,
+        decodingspeed=None,
+        photometric=None,
+        planar=None,
+        usecontainer=None,
+        # decode
+        index=None,
+        keeporientation=None,
+        # both
+        numthreads=None,
+    ):
+        """
+        Return JPEG XL image from numpy array.
+        Float must be in nominal range 0..1.
+
+        Currently L, LA, RGB, RGBA images are supported in contig mode.
+        Extra channels are only supported for grayscale images in planar mode.
+        
+        Parameters
+        ----------
+        level : Default to None, i.e. not overwriting lossess and decodingspeed options.
+            When < 0: Use lossless compression
+            When in [0,1,2,3,4]: Sets the decoding speed tier for the provided options. 
+                Minimum is 0 (slowest to decode, best quality/density), and maximum 
+                is 4 (fastest to decode, at the cost of some quality/density).
+        effort : Default to 3.
+            Sets encoder effort/speed level without affecting decoding speed. 
+            Valid values are, from faster to slower speed: 1:lightning 2:thunder 
+                3:falcon 4:cheetah 5:hare 6:wombat 7:squirrel 8:kitten 9:tortoise. 
+            Speed: lightning, thunder, falcon, cheetah, hare, wombat, squirrel, kitten, tortoise 
+            control the encoder effort in ascending order. 
+            This also affects memory usage: using lower effort will typically reduce memory 
+            consumption during encoding.
+            lightning and thunder are fast modes useful for lossless mode (modular).
+            falcon disables all of the following tools.
+            cheetah enables coefficient reordering, context clustering, and heuristics for selecting DCT sizes and quantization steps.
+            hare enables Gaborish filtering, chroma from luma, and an initial estimate of quantization steps.
+            wombat enables error diffusion quantization and full DCT size selection heuristics.
+            squirrel (default) enables dots, patches, and spline detection, and full context clustering.
+            kitten optimizes the adaptive quantization for a psychovisual metric.
+            tortoise enables a more thorough adaptive quantization search.
+        distance : Default to 1.0
+            Sets the distance level for lossy compression: target max butteraugli distance, 
+            lower = higher quality. Range: 0 .. 15. 0.0 = mathematically lossless 
+            (however, use JxlEncoderSetFrameLossless instead to use true lossless, 
+            as setting distance to 0 alone is not the only requirement). 
+            1.0 = visually lossless. Recommended range: 0.5 .. 3.0.
+        lossess : Default to False. 
+            Use lossess encoding.
+        decodingspeed : Default to 0.
+            Duplicate to level. [0,4]
+        photometric : Return JxlColorSpace value. 
+            Default logic is quite complicated but works most of the time.
+            Accepted value:
+                int: [-1,3]
+                str: ['RGB', 
+                    'WHITEISZERO', 'MINISWHITE', 
+                    'BLACKISZERO', 'MINISBLACK', 'GRAY',
+                    'XYB', 'KNOWN']
+        planar : Enable multi-channel mode.
+            Default to false.
+        usecontainer : 
+            Forces the encoder to use the box-based container format (BMFF) 
+            even when not necessary.
+            When using JxlEncoderUseBoxes, JxlEncoderStoreJPEGMetadata or 
+            JxlEncoderSetCodestreamLevel with level 10, the encoder will 
+            automatically also use the container format, it is not necessary 
+            to use JxlEncoderUseContainer for those use cases.
+            By default this setting is disabled.
+        index : Selectively decode frames for animation.
+            Default to 0, decode all frames.
+            When set to > 0, decode that frame index only.
+        keeporientation : 
+            Enables or disables preserving of as-in-bitstream pixeldata orientation. 
+            Some images are encoded with an Orientation tag indicating that the 
+            decoder must perform a rotation and/or mirroring to the encoded image data.
+
+            If skip_reorientation is JXL_FALSE (the default): the decoder will apply 
+            the transformation from the orientation setting, hence rendering the image 
+            according to its specified intent. When producing a JxlBasicInfo, the decoder 
+            will always set the orientation field to JXL_ORIENT_IDENTITY (matching the 
+            returned pixel data) and also align xsize and ysize so that they correspond 
+            to the width and the height of the returned pixel data.
+
+            If skip_reorientation is JXL_TRUE: the decoder will skip applying the 
+            transformation from the orientation setting, returning the image in 
+            the as-in-bitstream pixeldata orientation. This may be faster to decode 
+            since the decoder doesnt have to apply the transformation, but can 
+            cause wrong display of the image if the orientation tag is not correctly 
+            taken into account by the user.
+
+            By default, this option is disabled, and the returned pixel data is 
+            re-oriented according to the images Orientation setting.
+        threads : Default to 1.
+            If <= 0, use all cores.
+            If > 32, clipped to 32.
+        """
+
+        self.level = level
+        self.effort = effort
+        self.distance = distance
+        self.lossless = bool(lossless)
+        self.decodingspeed = decodingspeed
+        self.photometric = photometric
+        self.planar = planar
+        self.usecontainer = usecontainer
+        self.index = index
+        self.keeporientation = keeporientation
+        self.numthreads = numthreads
+
+    def encode(self, buf):
+        # TODO: only squeeze all but last dim
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.jpegxl_encode(
+            buf,
+            level=self.level,
+            effort=self.effort,
+            distance=self.distance,
+            lossless=self.lossless,
+            decodingspeed=self.decodingspeed,
+            photometric=self.photometric,
+            planar=self.planar,
+            usecontainer=self.usecontainer,
+            numthreads=self.numthreads,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.jpegxl_decode(
+            buf,
+            index=self.index,
+            keeporientation=self.keeporientation,
+            numthreads=self.numthreads,
+            out=out,
+        )
+
+
+class JpegXr(Codec):
+    """JPEG XR codec for numcodecs."""
+
+    codec_id = 'imagecodecs_jpegxr'
+
+    def __init__(
+        self,
+        level=None,
+        photometric=None,
+        hasalpha=None,
+        resolution=None,
+        fp2int=None,
+    ):
+        self.level = level
+        self.photometric = photometric
+        self.hasalpha = hasalpha
+        self.resolution = resolution
+        self.fp2int = fp2int
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.jpegxr_encode(
+            buf,
+            level=self.level,
+            photometric=self.photometric,
+            hasalpha=self.hasalpha,
+            resolution=self.resolution,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.jpegxr_decode(buf, fp2int=self.fp2int, out=out)
+
+
+class Lerc(Codec):
+    """LERC codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lerc'
+
+    def __init__(self, level=None, version=None, planar=None):
+        self.level = level
+        self.version = version
+        self.planar = bool(planar)
+        # TODO: support mask?
+        # self.mask = None
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.lerc_encode(
+            buf,
+            level=self.level,
+            version=self.version,
+            planar=self.planar,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lerc_decode(buf, out=out)
+
+
+class Ljpeg(Codec):
+    """LJPEG codec for numcodecs."""
+
+    codec_id = 'imagecodecs_ljpeg'
+
+    def __init__(self, bitspersample=None):
+        self.bitspersample = bitspersample
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.ljpeg_encode(buf, bitspersample=self.bitspersample)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.ljpeg_decode(buf, out=out)
+
+
+class Lz4(Codec):
+    """LZ4 codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lz4'
+
+    def __init__(self, level=None, hc=False, header=True):
+        self.level = level
+        self.hc = hc
+        self.header = bool(header)
+
+    def encode(self, buf):
+        return imagecodecs.lz4_encode(
+            buf, level=self.level, hc=self.hc, header=self.header
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lz4_decode(buf, header=self.header, out=_flat(out))
+
+
+class Lz4f(Codec):
+    """LZ4F codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lz4f'
+
+    def __init__(
+        self,
+        level=None,
+        blocksizeid=False,
+        contentchecksum=None,
+        blockchecksum=None,
+    ):
+        self.level = level
+        self.blocksizeid = blocksizeid
+        self.contentchecksum = contentchecksum
+        self.blockchecksum = blockchecksum
+
+    def encode(self, buf):
+        return imagecodecs.lz4f_encode(
+            buf,
+            level=self.level,
+            blocksizeid=self.blocksizeid,
+            contentchecksum=self.contentchecksum,
+            blockchecksum=self.blockchecksum,
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lz4f_decode(buf, out=_flat(out))
+
+
+class Lzf(Codec):
+    """LZF codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lzf'
+
+    def __init__(self, header=True):
+        self.header = bool(header)
+
+    def encode(self, buf):
+        return imagecodecs.lzf_encode(buf, header=self.header)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lzf_decode(buf, header=self.header, out=_flat(out))
+
+
+class Lzma(Codec):
+    """LZMA codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lzma'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        return imagecodecs.lzma_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lzma_decode(buf, out=_flat(out))
+
+
+class Lzw(Codec):
+    """LZW codec for numcodecs."""
+
+    codec_id = 'imagecodecs_lzw'
+
+    def encode(self, buf):
+        return imagecodecs.lzw_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.lzw_decode(buf, out=_flat(out))
+
+
+class PackBits(Codec):
+    """PackBits codec for numcodecs."""
+
+    codec_id = 'imagecodecs_packbits'
+
+    def __init__(self, axis=None):
+        self.axis = axis
+
+    def encode(self, buf):
+        if not isinstance(buf, (bytes, bytearray)):
+            buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.packbits_encode(buf, axis=self.axis)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.packbits_decode(buf, out=_flat(out))
+
+
+class Pglz(Codec):
+    """PGLZ codec for numcodecs."""
+
+    codec_id = 'imagecodecs_pglz'
+
+    def __init__(self, header=True, strategy=None):
+        self.header = bool(header)
+        self.strategy = strategy
+
+    def encode(self, buf):
+        return imagecodecs.pglz_encode(
+            buf, strategy=self.strategy, header=self.header
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.pglz_decode(buf, header=self.header, out=_flat(out))
+
+
+class Png(Codec):
+    """PNG codec for numcodecs."""
+
+    codec_id = 'imagecodecs_png'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.png_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.png_decode(buf, out=out)
+
+
+class Qoi(Codec):
+    """QOI codec for numcodecs."""
+
+    codec_id = 'imagecodecs_qoi'
+
+    def __init__(self):
+        pass
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.qoi_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.qoi_decode(buf, out=out)
+
+
+class Rgbe(Codec):
+    """RGBE codec for numcodecs."""
+
+    codec_id = 'imagecodecs_rgbe'
+
+    def __init__(self, header=False, shape=None, rle=None):
+        if not header and shape is None:
+            raise ValueError('must specify data shape if no header')
+        if shape and shape[-1] != 3:
+            raise ValueError('invalid shape')
+        self.shape = shape
+        self.header = bool(header)
+        self.rle = None if rle is None else bool(rle)
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.rgbe_encode(buf, header=self.header, rle=self.rle)
+
+    def decode(self, buf, out=None):
+        if out is None and not self.header:
+            out = numpy.empty(self.shape, numpy.float32)
+        return imagecodecs.rgbe_decode(
+            buf, header=self.header, rle=self.rle, out=out
+        )
+
+
+class Rcomp(Codec):
+    """Rcomp codec for numcodecs."""
+
+    codec_id = 'imagecodecs_rcomp'
+
+    def __init__(self, shape, dtype, nblock=None):
+        self.shape = tuple(shape)
+        self.dtype = numpy.dtype(dtype).str
+        self.nblock = nblock
+
+    def encode(self, buf):
+        return imagecodecs.rcomp_encode(buf, nblock=self.nblock)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.rcomp_decode(
+            buf,
+            shape=self.shape,
+            dtype=self.dtype,
+            nblock=self.nblock,
+            out=out,
+        )
+
+
+class Snappy(Codec):
+    """Snappy codec for numcodecs."""
+
+    codec_id = 'imagecodecs_snappy'
+
+    def encode(self, buf):
+        return imagecodecs.snappy_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.snappy_decode(buf, out=_flat(out))
+
+
+class Spng(Codec):
+    """SPNG codec for numcodecs."""
+
+    codec_id = 'imagecodecs_spng'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.spng_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.spng_decode(buf, out=out)
+
+
+class Tiff(Codec):
+    """TIFF codec for numcodecs."""
+
+    codec_id = 'imagecodecs_tiff'
+
+    def __init__(self, index=None, asrgb=None, verbose=0):
+        self.index = index
+        self.asrgb = bool(asrgb)
+        self.verbose = verbose
+
+    def encode(self, buf):
+        # TODO: not implemented
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.tiff_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.tiff_decode(
+            buf,
+            index=self.index,
+            asrgb=self.asrgb,
+            verbose=self.verbose,
+            out=out,
+        )
+
+
+class Webp(Codec):
+    """WebP codec for numcodecs."""
+
+    codec_id = 'imagecodecs_webp'
+
+    def __init__(self, level=None, lossless=None, method=None, hasalpha=None):
+        self.level = level
+        self.hasalpha = bool(hasalpha)
+        self.method = method
+        self.lossless = lossless
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        return imagecodecs.webp_encode(
+            buf, level=self.level, lossless=self.lossless, method=self.method
+        )
+
+    def decode(self, buf, out=None):
+        return imagecodecs.webp_decode(buf, hasalpha=self.hasalpha, out=out)
+
+
+class Xor(Codec):
+    """XOR codec for numcodecs."""
+
+    codec_id = 'imagecodecs_xor'
+
+    def __init__(self, shape=None, dtype=None, axis=-1):
+        self.shape = None if shape is None else tuple(shape)
+        self.dtype = None if dtype is None else numpy.dtype(dtype).str
+        self.axis = axis
+
+    def encode(self, buf):
+        if self.shape is not None or self.dtype is not None:
+            buf = protective_squeeze(numpy.asarray(buf))
+            assert buf.shape == self.shape
+            assert buf.dtype == self.dtype
+        return imagecodecs.xor_encode(buf, axis=self.axis).tobytes()
+
+    def decode(self, buf, out=None):
+        if self.shape is not None or self.dtype is not None:
+            buf = numpy.frombuffer(buf, dtype=self.dtype).reshape(*self.shape)
+        return imagecodecs.xor_decode(buf, axis=self.axis, out=_flat(out))
+
+
+class Zfp(Codec):
+    """ZFP codec for numcodecs."""
+
+    codec_id = 'imagecodecs_zfp'
+
+    def __init__(
+        self,
+        shape=None,
+        dtype=None,
+        strides=None,
+        level=None,
+        mode=None,
+        execution=None,
+        numthreads=None,
+        chunksize=None,
+        header=True,
+    ):
+        if header:
+            self.shape = None
+            self.dtype = None
+            self.strides = None
+        elif shape is None or dtype is None:
+            raise ValueError('invalid shape or dtype')
+        else:
+            self.shape = tuple(shape)
+            self.dtype = numpy.dtype(dtype).str
+            self.strides = None if strides is None else tuple(strides)
+        self.level = level
+        self.mode = mode
+        self.execution = execution
+        self.numthreads = numthreads
+        self.chunksize = chunksize
+        self.header = bool(header)
+
+    def encode(self, buf):
+        buf = protective_squeeze(numpy.asarray(buf))
+        if not self.header:
+            assert buf.shape == self.shape
+            assert buf.dtype == self.dtype
+        return imagecodecs.zfp_encode(
+            buf,
+            level=self.level,
+            mode=self.mode,
+            execution=self.execution,
+            header=self.header,
+            numthreads=self.numthreads,
+            chunksize=self.chunksize,
+        )
+
+    def decode(self, buf, out=None):
+        if self.header:
+            return imagecodecs.zfp_decode(buf, out=out)
+        return imagecodecs.zfp_decode(
+            buf,
+            shape=self.shape,
+            dtype=numpy.dtype(self.dtype),
+            strides=self.strides,
+            numthreads=self.numthreads,
+            out=out,
+        )
+
+
+class Zlib(Codec):
+    """Zlib codec for numcodecs."""
+
+    codec_id = 'imagecodecs_zlib'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        return imagecodecs.zlib_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.zlib_decode(buf, out=_flat(out))
+
+
+class Zlibng(Codec):
+    """Zlibng codec for numcodecs."""
+
+    codec_id = 'imagecodecs_zlibng'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        return imagecodecs.zlibng_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.zlibng_decode(buf, out=_flat(out))
+
+
+class Zopfli(Codec):
+    """Zopfli codec for numcodecs."""
+
+    codec_id = 'imagecodecs_zopfli'
+
+    def encode(self, buf):
+        return imagecodecs.zopfli_encode(buf)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.zopfli_decode(buf, out=_flat(out))
+
+
+class Zstd(Codec):
+    """ZStandard codec for numcodecs."""
+
+    codec_id = 'imagecodecs_zstd'
+
+    def __init__(self, level=None):
+        self.level = level
+
+    def encode(self, buf):
+        return imagecodecs.zstd_encode(buf, level=self.level)
+
+    def decode(self, buf, out=None):
+        return imagecodecs.zstd_decode(buf, out=_flat(out))
+
+
+def _flat(out):
+    """Return numpy array as contiguous view of bytes if possible."""
+    if out is None:
+        return None
+    view = memoryview(out)
+    if view.readonly or not view.contiguous:
+        return None
+    return view.cast('B')
+
+
+def register_codecs(codecs=None, force=False, verbose=True):
+    """Register codecs in this module with numcodecs."""
+    for name, cls in globals().items():
+        if not hasattr(cls, 'codec_id') or name == 'Codec':
+            continue
+        if codecs is not None and cls.codec_id not in codecs:
+            continue
+        try:
+            try:
+                get_codec({'id': cls.codec_id})
+            except TypeError:
+                # registered, but failed
+                pass
+        except ValueError:
+            # not registered yet
+            pass
+        else:
+            if not force:
+                if verbose:
+                    log_warning(
+                        f'numcodec {cls.codec_id!r} already registered'
+                    )
+                continue
+            if verbose:
+                log_warning(f'replacing registered numcodec {cls.codec_id!r}')
+        register_codec(cls)
+
+
+def log_warning(msg, *args, **kwargs):
+    """Log message with level WARNING."""
+    import logging
+
+    logging.getLogger(__name__).warning(msg, *args, **kwargs)
diff --git a/diffusion_policy/common/checkpoint_util.py b/diffusion_policy/common/checkpoint_util.py
new file mode 100644
index 0000000..fbb0837
--- /dev/null
+++ b/diffusion_policy/common/checkpoint_util.py
@@ -0,0 +1,59 @@
+from typing import Optional, Dict
+import os
+
+class TopKCheckpointManager:
+    def __init__(self,
+            save_dir,
+            monitor_key: str,
+            mode='min',
+            k=1,
+            format_str='epoch={epoch:03d}-train_loss={train_loss:.3f}.ckpt'
+        ):
+        assert mode in ['max', 'min']
+        assert k >= 0
+
+        self.save_dir = save_dir
+        self.monitor_key = monitor_key
+        self.mode = mode
+        self.k = k
+        self.format_str = format_str
+        self.path_value_map = dict()
+    
+    def get_ckpt_path(self, data: Dict[str, float]) -> Optional[str]:
+        if self.k == 0:
+            return None
+
+        value = data[self.monitor_key]
+        ckpt_path = os.path.join(
+            self.save_dir, self.format_str.format(**data))
+        
+        if len(self.path_value_map) < self.k:
+            # under-capacity
+            self.path_value_map[ckpt_path] = value
+            return ckpt_path
+        
+        # at capacity
+        sorted_map = sorted(self.path_value_map.items(), key=lambda x: x[1])
+        min_path, min_value = sorted_map[0]
+        max_path, max_value = sorted_map[-1]
+
+        delete_path = None
+        if self.mode == 'max':
+            if value > min_value:
+                delete_path = min_path
+        else:
+            if value < max_value:
+                delete_path = max_path
+
+        if delete_path is None:
+            return None
+        else:
+            del self.path_value_map[delete_path]
+            self.path_value_map[ckpt_path] = value
+
+            if not os.path.exists(self.save_dir):
+                os.mkdir(self.save_dir)
+
+            if os.path.exists(delete_path):
+                os.remove(delete_path)
+            return ckpt_path
diff --git a/diffusion_policy/common/cv2_util.py b/diffusion_policy/common/cv2_util.py
new file mode 100644
index 0000000..1d749bc
--- /dev/null
+++ b/diffusion_policy/common/cv2_util.py
@@ -0,0 +1,150 @@
+from typing import Tuple
+import math
+import cv2
+import numpy as np
+
+def draw_reticle(img, u, v, label_color):
+    """
+    Draws a reticle (cross-hair) on the image at the given position on top of
+    the original image.
+    @param img (In/Out) uint8 3 channel image
+    @param u X coordinate (width)
+    @param v Y coordinate (height)
+    @param label_color tuple of 3 ints for RGB color used for drawing.
+    """
+    # Cast to int.
+    u = int(u)
+    v = int(v)
+
+    white = (255, 255, 255)
+    cv2.circle(img, (u, v), 10, label_color, 1)
+    cv2.circle(img, (u, v), 11, white, 1)
+    cv2.circle(img, (u, v), 12, label_color, 1)
+    cv2.line(img, (u, v + 1), (u, v + 3), white, 1)
+    cv2.line(img, (u + 1, v), (u + 3, v), white, 1)
+    cv2.line(img, (u, v - 1), (u, v - 3), white, 1)
+    cv2.line(img, (u - 1, v), (u - 3, v), white, 1)
+
+
+def draw_text(
+    img,
+    *,
+    text,
+    uv_top_left,
+    color=(255, 255, 255),
+    fontScale=0.5,
+    thickness=1,
+    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+    outline_color=(0, 0, 0),
+    line_spacing=1.5,
+):
+    """
+    Draws multiline with an outline.
+    """
+    assert isinstance(text, str)
+
+    uv_top_left = np.array(uv_top_left, dtype=float)
+    assert uv_top_left.shape == (2,)
+
+    for line in text.splitlines():
+        (w, h), _ = cv2.getTextSize(
+            text=line,
+            fontFace=fontFace,
+            fontScale=fontScale,
+            thickness=thickness,
+        )
+        uv_bottom_left_i = uv_top_left + [0, h]
+        org = tuple(uv_bottom_left_i.astype(int))
+
+        if outline_color is not None:
+            cv2.putText(
+                img,
+                text=line,
+                org=org,
+                fontFace=fontFace,
+                fontScale=fontScale,
+                color=outline_color,
+                thickness=thickness * 3,
+                lineType=cv2.LINE_AA,
+            )
+        cv2.putText(
+            img,
+            text=line,
+            org=org,
+            fontFace=fontFace,
+            fontScale=fontScale,
+            color=color,
+            thickness=thickness,
+            lineType=cv2.LINE_AA,
+        )
+
+        uv_top_left += [0, h * line_spacing]
+
+
+def get_image_transform(
+        input_res: Tuple[int,int]=(1280,720), 
+        output_res: Tuple[int,int]=(640,480), 
+        bgr_to_rgb: bool=False):
+
+    iw, ih = input_res
+    ow, oh = output_res
+    rw, rh = None, None
+    interp_method = cv2.INTER_AREA
+
+    if (iw/ih) >= (ow/oh):
+        # input is wider
+        rh = oh
+        rw = math.ceil(rh / ih * iw)
+        if oh > ih:
+            interp_method = cv2.INTER_LINEAR
+    else:
+        rw = ow
+        rh = math.ceil(rw / iw * ih)
+        if ow > iw:
+            interp_method = cv2.INTER_LINEAR
+    
+    w_slice_start = (rw - ow) // 2
+    w_slice = slice(w_slice_start, w_slice_start + ow)
+    h_slice_start = (rh - oh) // 2
+    h_slice = slice(h_slice_start, h_slice_start + oh)
+    c_slice = slice(None)
+    if bgr_to_rgb:
+        c_slice = slice(None, None, -1)
+
+    def transform(img: np.ndarray):
+        assert img.shape == ((ih,iw,3))
+        # resize
+        img = cv2.resize(img, (rw, rh), interpolation=interp_method)
+        # crop
+        img = img[h_slice, w_slice, c_slice]
+        return img
+    return transform
+
+def optimal_row_cols(
+        n_cameras,
+        in_wh_ratio,
+        max_resolution=(1920, 1080)
+    ):
+    out_w, out_h = max_resolution
+    out_wh_ratio = out_w / out_h
+    
+    n_rows = np.arange(n_cameras,dtype=np.int64) + 1
+    n_cols = np.ceil(n_cameras / n_rows).astype(np.int64)
+    cat_wh_ratio = in_wh_ratio * (n_cols / n_rows)
+    ratio_diff = np.abs(out_wh_ratio - cat_wh_ratio)
+    best_idx = np.argmin(ratio_diff)
+    best_n_row = n_rows[best_idx]
+    best_n_col = n_cols[best_idx]
+    best_cat_wh_ratio = cat_wh_ratio[best_idx]
+
+    rw, rh = None, None
+    if best_cat_wh_ratio >= out_wh_ratio:
+        # cat is wider
+        rw = math.floor(out_w / best_n_col)
+        rh = math.floor(rw / in_wh_ratio)
+    else:
+        rh = math.floor(out_h / best_n_row)
+        rw = math.floor(rh * in_wh_ratio)
+    
+    # crop_resolution = (rw, rh)
+    return rw, rh, best_n_col, best_n_row
diff --git a/diffusion_policy/common/env_util.py b/diffusion_policy/common/env_util.py
new file mode 100644
index 0000000..a380f5a
--- /dev/null
+++ b/diffusion_policy/common/env_util.py
@@ -0,0 +1,23 @@
+import cv2
+import numpy as np
+
+
+def render_env_video(env, states, actions=None):
+    observations = states
+    imgs = list()
+    for i in range(len(observations)):
+        state = observations[i]
+        env.set_state(state)
+        if i == 0:
+            env.set_state(state)
+        img = env.render()
+        # draw action
+        if actions is not None:
+            action = actions[i]
+            coord = (action / 512 * 96).astype(np.int32)
+            cv2.drawMarker(img, coord, 
+                color=(255,0,0), markerType=cv2.MARKER_CROSS,
+                markerSize=8, thickness=1)
+        imgs.append(img)
+    imgs = np.array(imgs)
+    return imgs
diff --git a/diffusion_policy/common/json_logger.py b/diffusion_policy/common/json_logger.py
new file mode 100644
index 0000000..ee9fb4a
--- /dev/null
+++ b/diffusion_policy/common/json_logger.py
@@ -0,0 +1,117 @@
+from typing import Optional, Callable, Any, Sequence
+import os
+import copy
+import json
+import numbers
+import pandas as pd
+
+
+def read_json_log(path: str, 
+        required_keys: Sequence[str]=tuple(), 
+        **kwargs) -> pd.DataFrame:
+    """
+    Read json-per-line file, with potentially incomplete lines.
+    kwargs passed to pd.read_json
+    """
+    lines = list()
+    with open(path, 'r') as f:
+        while True:
+            # one json per line
+            line = f.readline()
+            if len(line) == 0:
+                # EOF
+                break
+            elif not line.endswith('\n'):
+                # incomplete line
+                break
+            is_relevant = False
+            for k in required_keys:
+                if k in line:
+                    is_relevant = True
+                    break
+            if is_relevant:
+                lines.append(line)
+    if len(lines) < 1:
+        return pd.DataFrame()  
+    json_buf = f'[{",".join([line for line in (line.strip() for line in lines) if line])}]'
+    df = pd.read_json(json_buf, **kwargs)
+    return df
+
+class JsonLogger:
+    def __init__(self, path: str, 
+            filter_fn: Optional[Callable[[str,Any],bool]]=None):
+        if filter_fn is None:
+            filter_fn = lambda k,v: isinstance(v, numbers.Number)
+
+        # default to append mode
+        self.path = path
+        self.filter_fn = filter_fn
+        self.file = None
+        self.last_log = None
+    
+    def start(self):
+        # use line buffering
+        try:
+            self.file = file = open(self.path, 'r+', buffering=1)
+        except FileNotFoundError:
+            self.file = file = open(self.path, 'w+', buffering=1)
+
+        # Move the pointer (similar to a cursor in a text editor) to the end of the file
+        pos = file.seek(0, os.SEEK_END)
+
+        # Read each character in the file one at a time from the last
+        # character going backwards, searching for a newline character
+        # If we find a new line, exit the search
+        while pos > 0 and file.read(1) != "\n":
+            pos -= 1
+            file.seek(pos, os.SEEK_SET)
+        # now the file pointer is at one past the last '\n'
+        # and pos is at the last '\n'.
+        last_line_end = file.tell()
+        
+        # find the start of second last line
+        pos = max(0, pos-1)
+        file.seek(pos, os.SEEK_SET)
+        while pos > 0 and file.read(1) != "\n":
+            pos -= 1
+            file.seek(pos, os.SEEK_SET)
+        # now the file pointer is at one past the second last '\n'
+        last_line_start = file.tell()
+
+        if last_line_start < last_line_end:
+            # has last line of json
+            last_line = file.readline()
+            self.last_log = json.loads(last_line)
+        
+        # remove the last incomplete line
+        file.seek(last_line_end)
+        file.truncate()
+    
+    def stop(self):
+        self.file.close()
+        self.file = None
+    
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+    
+    def log(self, data: dict):
+        filtered_data = dict(
+            filter(lambda x: self.filter_fn(*x), data.items()))
+        # save current as last log
+        self.last_log = filtered_data
+        for k, v in filtered_data.items():
+            if isinstance(v, numbers.Integral):
+                filtered_data[k] = int(v)
+            elif isinstance(v, numbers.Number):
+                filtered_data[k] = float(v)
+        buf = json.dumps(filtered_data)
+        # ensure one line per json
+        buf = buf.replace('\n','') + '\n'
+        self.file.write(buf)
+    
+    def get_last_log(self):
+        return copy.deepcopy(self.last_log)
diff --git a/diffusion_policy/common/nested_dict_util.py b/diffusion_policy/common/nested_dict_util.py
new file mode 100644
index 0000000..b186f70
--- /dev/null
+++ b/diffusion_policy/common/nested_dict_util.py
@@ -0,0 +1,32 @@
+import functools
+
+def nested_dict_map(f, x):
+    """
+    Map f over all leaf of nested dict x
+    """
+
+    if not isinstance(x, dict):
+        return f(x)
+    y = dict()
+    for key, value in x.items():
+        y[key] = nested_dict_map(f, value)
+    return y
+
+def nested_dict_reduce(f, x):
+    """
+    Map f over all values of nested dict x, and reduce to a single value
+    """
+    if not isinstance(x, dict):
+        return x
+
+    reduced_values = list()
+    for value in x.values():
+        reduced_values.append(nested_dict_reduce(f, value))
+    y = functools.reduce(f, reduced_values)
+    return y
+
+
+def nested_dict_check(f, x):
+    bool_dict = nested_dict_map(f, x)
+    result = nested_dict_reduce(lambda x, y: x and y, bool_dict)
+    return result
diff --git a/diffusion_policy/common/normalize_util.py b/diffusion_policy/common/normalize_util.py
new file mode 100644
index 0000000..d6fc7b6
--- /dev/null
+++ b/diffusion_policy/common/normalize_util.py
@@ -0,0 +1,223 @@
+from diffusion_policy.model.common.normalizer import SingleFieldLinearNormalizer
+from diffusion_policy.common.pytorch_util import dict_apply, dict_apply_reduce, dict_apply_split
+import numpy as np
+
+
+def get_range_normalizer_from_stat(stat, output_max=1, output_min=-1, range_eps=1e-7):
+    # -1, 1 normalization
+    input_max = stat['max']
+    input_min = stat['min']
+    input_range = input_max - input_min
+    ignore_dim = input_range < range_eps
+    input_range[ignore_dim] = output_max - output_min
+    scale = (output_max - output_min) / input_range
+    offset = output_min - scale * input_min
+    offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
+
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=scale,
+        offset=offset,
+        input_stats_dict=stat
+    )
+
+def get_image_range_normalizer():
+    scale = np.array([2], dtype=np.float32)
+    offset = np.array([-1], dtype=np.float32)
+    stat = {
+        'min': np.array([0], dtype=np.float32),
+        'max': np.array([1], dtype=np.float32),
+        'mean': np.array([0.5], dtype=np.float32),
+        'std': np.array([np.sqrt(1/12)], dtype=np.float32)
+    }
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=scale,
+        offset=offset,
+        input_stats_dict=stat
+    )
+
+def get_identity_normalizer_from_stat(stat):
+    scale = np.ones_like(stat['min'])
+    offset = np.zeros_like(stat['min'])
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=scale,
+        offset=offset,
+        input_stats_dict=stat
+    )
+
+def robomimic_abs_action_normalizer_from_stat(stat, rotation_transformer):
+    result = dict_apply_split(
+        stat, lambda x: {
+            'pos': x[...,:3],
+            'rot': x[...,3:6],
+            'gripper': x[...,6:]
+    })
+
+    def get_pos_param_info(stat, output_max=1, output_min=-1, range_eps=1e-7):
+        # -1, 1 normalization
+        input_max = stat['max']
+        input_min = stat['min']
+        input_range = input_max - input_min
+        ignore_dim = input_range < range_eps
+        input_range[ignore_dim] = output_max - output_min
+        scale = (output_max - output_min) / input_range
+        offset = output_min - scale * input_min
+        offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
+
+        return {'scale': scale, 'offset': offset}, stat
+
+    def get_rot_param_info(stat):
+        example = rotation_transformer.forward(stat['mean'])
+        scale = np.ones_like(example)
+        offset = np.zeros_like(example)
+        info = {
+            'max': np.ones_like(example),
+            'min': np.full_like(example, -1),
+            'mean': np.zeros_like(example),
+            'std': np.ones_like(example)
+        }
+        return {'scale': scale, 'offset': offset}, info
+    
+    def get_gripper_param_info(stat):
+        example = stat['max']
+        scale = np.ones_like(example)
+        offset = np.zeros_like(example)
+        info = {
+            'max': np.ones_like(example),
+            'min': np.full_like(example, -1),
+            'mean': np.zeros_like(example),
+            'std': np.ones_like(example)
+        }
+        return {'scale': scale, 'offset': offset}, info
+
+    pos_param, pos_info = get_pos_param_info(result['pos'])
+    rot_param, rot_info = get_rot_param_info(result['rot'])
+    gripper_param, gripper_info = get_gripper_param_info(result['gripper'])
+
+    param = dict_apply_reduce(
+        [pos_param, rot_param, gripper_param], 
+        lambda x: np.concatenate(x,axis=-1))
+    info = dict_apply_reduce(
+        [pos_info, rot_info, gripper_info], 
+        lambda x: np.concatenate(x,axis=-1))
+
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=param['scale'],
+        offset=param['offset'],
+        input_stats_dict=info
+    )
+
+
+def robomimic_abs_action_only_normalizer_from_stat(stat):
+    result = dict_apply_split(
+        stat, lambda x: {
+            'pos': x[...,:3],
+            'other': x[...,3:]
+    })
+
+    def get_pos_param_info(stat, output_max=1, output_min=-1, range_eps=1e-7):
+        # -1, 1 normalization
+        input_max = stat['max']
+        input_min = stat['min']
+        input_range = input_max - input_min
+        ignore_dim = input_range < range_eps
+        input_range[ignore_dim] = output_max - output_min
+        scale = (output_max - output_min) / input_range
+        offset = output_min - scale * input_min
+        offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
+
+        return {'scale': scale, 'offset': offset}, stat
+
+    
+    def get_other_param_info(stat):
+        example = stat['max']
+        scale = np.ones_like(example)
+        offset = np.zeros_like(example)
+        info = {
+            'max': np.ones_like(example),
+            'min': np.full_like(example, -1),
+            'mean': np.zeros_like(example),
+            'std': np.ones_like(example)
+        }
+        return {'scale': scale, 'offset': offset}, info
+
+    pos_param, pos_info = get_pos_param_info(result['pos'])
+    other_param, other_info = get_other_param_info(result['other'])
+
+    param = dict_apply_reduce(
+        [pos_param, other_param], 
+        lambda x: np.concatenate(x,axis=-1))
+    info = dict_apply_reduce(
+        [pos_info, other_info], 
+        lambda x: np.concatenate(x,axis=-1))
+
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=param['scale'],
+        offset=param['offset'],
+        input_stats_dict=info
+    )
+
+
+def robomimic_abs_action_only_dual_arm_normalizer_from_stat(stat):
+    Da = stat['max'].shape[-1]
+    Dah = Da // 2
+    result = dict_apply_split(
+        stat, lambda x: {
+            'pos0': x[...,:3],
+            'other0': x[...,3:Dah],
+            'pos1': x[...,Dah:Dah+3],
+            'other1': x[...,Dah+3:]
+    })
+
+    def get_pos_param_info(stat, output_max=1, output_min=-1, range_eps=1e-7):
+        # -1, 1 normalization
+        input_max = stat['max']
+        input_min = stat['min']
+        input_range = input_max - input_min
+        ignore_dim = input_range < range_eps
+        input_range[ignore_dim] = output_max - output_min
+        scale = (output_max - output_min) / input_range
+        offset = output_min - scale * input_min
+        offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
+
+        return {'scale': scale, 'offset': offset}, stat
+
+    
+    def get_other_param_info(stat):
+        example = stat['max']
+        scale = np.ones_like(example)
+        offset = np.zeros_like(example)
+        info = {
+            'max': np.ones_like(example),
+            'min': np.full_like(example, -1),
+            'mean': np.zeros_like(example),
+            'std': np.ones_like(example)
+        }
+        return {'scale': scale, 'offset': offset}, info
+
+    pos0_param, pos0_info = get_pos_param_info(result['pos0'])
+    pos1_param, pos1_info = get_pos_param_info(result['pos1'])
+    other0_param, other0_info = get_other_param_info(result['other0'])
+    other1_param, other1_info = get_other_param_info(result['other1'])
+
+    param = dict_apply_reduce(
+        [pos0_param, other0_param, pos1_param, other1_param], 
+        lambda x: np.concatenate(x,axis=-1))
+    info = dict_apply_reduce(
+        [pos0_info, other0_info, pos1_info, other1_info], 
+        lambda x: np.concatenate(x,axis=-1))
+
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=param['scale'],
+        offset=param['offset'],
+        input_stats_dict=info
+    )
+
+
+def array_to_stats(arr: np.ndarray):
+    stat = {
+        'min': np.min(arr, axis=0),
+        'max': np.max(arr, axis=0),
+        'mean': np.mean(arr, axis=0),
+        'std': np.std(arr, axis=0)
+    }
+    return stat
diff --git a/diffusion_policy/common/pose_trajectory_interpolator.py b/diffusion_policy/common/pose_trajectory_interpolator.py
new file mode 100644
index 0000000..063943f
--- /dev/null
+++ b/diffusion_policy/common/pose_trajectory_interpolator.py
@@ -0,0 +1,208 @@
+from typing import Union
+import numbers
+import numpy as np
+import scipy.interpolate as si
+import scipy.spatial.transform as st
+
+def rotation_distance(a: st.Rotation, b: st.Rotation) -> float:
+    return (b * a.inv()).magnitude()
+
+def pose_distance(start_pose, end_pose):
+    start_pose = np.array(start_pose)
+    end_pose = np.array(end_pose)
+    start_pos = start_pose[:3]
+    end_pos = end_pose[:3]
+    start_rot = st.Rotation.from_rotvec(start_pose[3:])
+    end_rot = st.Rotation.from_rotvec(end_pose[3:])
+    pos_dist = np.linalg.norm(end_pos - start_pos)
+    rot_dist = rotation_distance(start_rot, end_rot)
+    return pos_dist, rot_dist
+
+class PoseTrajectoryInterpolator:
+    def __init__(self, times: np.ndarray, poses: np.ndarray):
+        assert len(times) >= 1
+        assert len(poses) == len(times)
+        if not isinstance(times, np.ndarray):
+            times = np.array(times)
+        if not isinstance(poses, np.ndarray):
+            poses = np.array(poses)
+
+        if len(times) == 1:
+            # special treatement for single step interpolation
+            self.single_step = True
+            self._times = times
+            self._poses = poses
+        else:
+            self.single_step = False
+            assert np.all(times[1:] >= times[:-1])
+
+            pos = poses[:,:3]
+            rot = st.Rotation.from_rotvec(poses[:,3:])
+
+            self.pos_interp = si.interp1d(times, pos, 
+                axis=0, assume_sorted=True)
+            self.rot_interp = st.Slerp(times, rot)
+    
+    @property
+    def times(self) -> np.ndarray:
+        if self.single_step:
+            return self._times
+        else:
+            return self.pos_interp.x
+    
+    @property
+    def poses(self) -> np.ndarray:
+        if self.single_step:
+            return self._poses
+        else:
+            n = len(self.times)
+            poses = np.zeros((n, 6))
+            poses[:,:3] = self.pos_interp.y
+            poses[:,3:] = self.rot_interp(self.times).as_rotvec()
+            return poses
+
+    def trim(self, 
+            start_t: float, end_t: float
+            ) -> "PoseTrajectoryInterpolator":
+        assert start_t <= end_t
+        times = self.times
+        should_keep = (start_t < times) & (times < end_t)
+        keep_times = times[should_keep]
+        all_times = np.concatenate([[start_t], keep_times, [end_t]])
+        # remove duplicates, Slerp requries strictly increasing x
+        all_times = np.unique(all_times)
+        # interpolate
+        all_poses = self(all_times)
+        return PoseTrajectoryInterpolator(times=all_times, poses=all_poses)
+    
+    def drive_to_waypoint(self, 
+            pose, time, curr_time,
+            max_pos_speed=np.inf, 
+            max_rot_speed=np.inf
+        ) -> "PoseTrajectoryInterpolator":
+        assert(max_pos_speed > 0)
+        assert(max_rot_speed > 0)
+        time = max(time, curr_time)
+        
+        curr_pose = self(curr_time)
+        pos_dist, rot_dist = pose_distance(curr_pose, pose)
+        pos_min_duration = pos_dist / max_pos_speed
+        rot_min_duration = rot_dist / max_rot_speed
+        duration = time - curr_time
+        duration = max(duration, max(pos_min_duration, rot_min_duration))
+        assert duration >= 0
+        last_waypoint_time = curr_time + duration
+
+        # insert new pose
+        trimmed_interp = self.trim(curr_time, curr_time)
+        times = np.append(trimmed_interp.times, [last_waypoint_time], axis=0)
+        poses = np.append(trimmed_interp.poses, [pose], axis=0)
+
+        # create new interpolator
+        final_interp = PoseTrajectoryInterpolator(times, poses)
+        return final_interp
+
+    def schedule_waypoint(self,
+            pose, time, 
+            max_pos_speed=np.inf, 
+            max_rot_speed=np.inf,
+            curr_time=None,
+            last_waypoint_time=None
+        ) -> "PoseTrajectoryInterpolator":
+        assert(max_pos_speed > 0)
+        assert(max_rot_speed > 0)
+        if last_waypoint_time is not None:
+            assert curr_time is not None
+
+        # trim current interpolator to between curr_time and last_waypoint_time
+        start_time = self.times[0]
+        end_time = self.times[-1]
+        assert start_time <= end_time
+
+        if curr_time is not None:
+            if time <= curr_time:
+                # if insert time is earlier than current time
+                # no effect should be done to the interpolator
+                return self
+            # now, curr_time < time
+            start_time = max(curr_time, start_time)
+
+            if last_waypoint_time is not None:
+                # if last_waypoint_time is earlier than start_time
+                # use start_time
+                if time <= last_waypoint_time:
+                    end_time = curr_time
+                else:
+                    end_time = max(last_waypoint_time, curr_time)
+            else:
+                end_time = curr_time
+
+        end_time = min(end_time, time)
+        start_time = min(start_time, end_time)
+        # end time should be the latest of all times except time
+        # after this we can assume order (proven by zhenjia, due to the 2 min operations)
+
+        # Constraints:
+        # start_time <= end_time <= time (proven by zhenjia)
+        # curr_time <= start_time (proven by zhenjia)
+        # curr_time <= time (proven by zhenjia)
+        
+        # time can't change
+        # last_waypoint_time can't change
+        # curr_time can't change
+        assert start_time <= end_time
+        assert end_time <= time
+        if last_waypoint_time is not None:
+            if time <= last_waypoint_time:
+                assert end_time == curr_time
+            else:
+                assert end_time == max(last_waypoint_time, curr_time)
+
+        if curr_time is not None:
+            assert curr_time <= start_time
+            assert curr_time <= time
+
+        trimmed_interp = self.trim(start_time, end_time)
+        # after this, all waypoints in trimmed_interp is within start_time and end_time
+        # and is earlier than time
+
+        # determine speed
+        duration = time - end_time
+        end_pose = trimmed_interp(end_time)
+        pos_dist, rot_dist = pose_distance(pose, end_pose)
+        pos_min_duration = pos_dist / max_pos_speed
+        rot_min_duration = rot_dist / max_rot_speed
+        duration = max(duration, max(pos_min_duration, rot_min_duration))
+        assert duration >= 0
+        last_waypoint_time = end_time + duration
+
+        # insert new pose
+        times = np.append(trimmed_interp.times, [last_waypoint_time], axis=0)
+        poses = np.append(trimmed_interp.poses, [pose], axis=0)
+
+        # create new interpolator
+        final_interp = PoseTrajectoryInterpolator(times, poses)
+        return final_interp
+
+
+    def __call__(self, t: Union[numbers.Number, np.ndarray]) -> np.ndarray:
+        is_single = False
+        if isinstance(t, numbers.Number):
+            is_single = True
+            t = np.array([t])
+        
+        pose = np.zeros((len(t), 6))
+        if self.single_step:
+            pose[:] = self._poses[0]
+        else:
+            start_time = self.times[0]
+            end_time = self.times[-1]
+            t = np.clip(t, start_time, end_time)
+
+            pose = np.zeros((len(t), 6))
+            pose[:,:3] = self.pos_interp(t)
+            pose[:,3:] = self.rot_interp(t).as_rotvec()
+
+        if is_single:
+            pose = pose[0]
+        return pose
diff --git a/diffusion_policy/common/precise_sleep.py b/diffusion_policy/common/precise_sleep.py
new file mode 100644
index 0000000..83ef62c
--- /dev/null
+++ b/diffusion_policy/common/precise_sleep.py
@@ -0,0 +1,25 @@
+import time
+
+def precise_sleep(dt: float, slack_time: float=0.001, time_func=time.monotonic):
+    """
+    Use hybrid of time.sleep and spinning to minimize jitter.
+    Sleep dt - slack_time seconds first, then spin for the rest.
+    """
+    t_start = time_func()
+    if dt > slack_time:
+        time.sleep(dt - slack_time)
+    t_end = t_start + dt
+    while time_func() < t_end:
+        pass
+    return
+
+def precise_wait(t_end: float, slack_time: float=0.001, time_func=time.monotonic):
+    t_start = time_func()
+    t_wait = t_end - t_start
+    if t_wait > 0:
+        t_sleep = t_wait - slack_time
+        if t_sleep > 0:
+            time.sleep(t_sleep)
+        while time_func() < t_end:
+            pass
+    return
diff --git a/diffusion_policy/common/pymunk_override.py b/diffusion_policy/common/pymunk_override.py
new file mode 100644
index 0000000..78ecaa2
--- /dev/null
+++ b/diffusion_policy/common/pymunk_override.py
@@ -0,0 +1,248 @@
+# ----------------------------------------------------------------------------
+# pymunk
+# Copyright (c) 2007-2016 Victor Blomqvist
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ----------------------------------------------------------------------------
+
+"""This submodule contains helper functions to help with quick prototyping 
+using pymunk together with pygame.
+
+Intended to help with debugging and prototyping, not for actual production use
+in a full application. The methods contained in this module is opinionated 
+about your coordinate system and not in any way optimized. 
+"""
+
+__docformat__ = "reStructuredText"
+
+__all__ = [
+    "DrawOptions",
+    "get_mouse_pos",
+    "to_pygame",
+    "from_pygame",
+    "lighten",
+    "positive_y_is_up",
+]
+
+from typing import List, Sequence, Tuple
+
+import pygame
+
+import numpy as np
+
+import pymunk
+from pymunk.space_debug_draw_options import SpaceDebugColor
+from pymunk.vec2d import Vec2d
+
+positive_y_is_up: bool = False
+"""Make increasing values of y point upwards.
+
+When True::
+
+    y
+    ^
+    |      . (3, 3)
+    |
+    |   . (2, 2)
+    |
+    +------ > x
+    
+When False::
+
+    +------ > x
+    |
+    |   . (2, 2)
+    |
+    |      . (3, 3)
+    v
+    y
+    
+"""
+
+
+class DrawOptions(pymunk.SpaceDebugDrawOptions):
+    def __init__(self, surface: pygame.Surface) -> None:
+        """Draw a pymunk.Space on a pygame.Surface object.
+
+        Typical usage::
+
+        >>> import pymunk
+        >>> surface = pygame.Surface((10,10))
+        >>> space = pymunk.Space()
+        >>> options = pymunk.pygame_util.DrawOptions(surface)
+        >>> space.debug_draw(options)
+
+        You can control the color of a shape by setting shape.color to the color
+        you want it drawn in::
+
+        >>> c = pymunk.Circle(None, 10)
+        >>> c.color = pygame.Color("pink")
+
+        See pygame_util.demo.py for a full example
+
+        Since pygame uses a coordiante system where y points down (in contrast
+        to many other cases), you either have to make the physics simulation
+        with Pymunk also behave in that way, or flip everything when you draw.
+
+        The easiest is probably to just make the simulation behave the same
+        way as Pygame does. In that way all coordinates used are in the same
+        orientation and easy to reason about::
+
+        >>> space = pymunk.Space()
+        >>> space.gravity = (0, -1000)
+        >>> body = pymunk.Body()
+        >>> body.position = (0, 0) # will be positioned in the top left corner
+        >>> space.debug_draw(options)
+
+        To flip the drawing its possible to set the module property
+        :py:data:`positive_y_is_up` to True. Then the pygame drawing will flip
+        the simulation upside down before drawing::
+
+        >>> positive_y_is_up = True
+        >>> body = pymunk.Body()
+        >>> body.position = (0, 0)
+        >>> # Body will be position in bottom left corner
+
+        :Parameters:
+                surface : pygame.Surface
+                    Surface that the objects will be drawn on
+        """
+        self.surface = surface
+        super(DrawOptions, self).__init__()
+
+    def draw_circle(
+        self,
+        pos: Vec2d,
+        angle: float,
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        p = to_pygame(pos, self.surface)
+
+        pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0)
+        pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius-4), 0)
+
+        circle_edge = pos + Vec2d(radius, 0).rotated(angle)
+        p2 = to_pygame(circle_edge, self.surface)
+        line_r = 2 if radius > 20 else 1
+        # pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r)
+
+    def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None:
+        p1 = to_pygame(a, self.surface)
+        p2 = to_pygame(b, self.surface)
+
+        pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2])
+
+    def draw_fat_segment(
+        self,
+        a: Tuple[float, float],
+        b: Tuple[float, float],
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        p1 = to_pygame(a, self.surface)
+        p2 = to_pygame(b, self.surface)
+
+        r = round(max(1, radius * 2))
+        pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r)
+        if r > 2:
+            orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])]
+            if orthog[0] == 0 and orthog[1] == 0:
+                return
+            scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5
+            orthog[0] = round(orthog[0] * scale)
+            orthog[1] = round(orthog[1] * scale)
+            points = [
+                (p1[0] - orthog[0], p1[1] - orthog[1]),
+                (p1[0] + orthog[0], p1[1] + orthog[1]),
+                (p2[0] + orthog[0], p2[1] + orthog[1]),
+                (p2[0] - orthog[0], p2[1] - orthog[1]),
+            ]
+            pygame.draw.polygon(self.surface, fill_color.as_int(), points)
+            pygame.draw.circle(
+                self.surface,
+                fill_color.as_int(),
+                (round(p1[0]), round(p1[1])),
+                round(radius),
+            )
+            pygame.draw.circle(
+                self.surface,
+                fill_color.as_int(),
+                (round(p2[0]), round(p2[1])),
+                round(radius),
+            )
+
+    def draw_polygon(
+        self,
+        verts: Sequence[Tuple[float, float]],
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        ps = [to_pygame(v, self.surface) for v in verts]
+        ps += [ps[0]]
+
+        radius = 2
+        pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps)
+
+        if radius > 0:
+            for i in range(len(verts)):
+                a = verts[i]
+                b = verts[(i + 1) % len(verts)]
+                self.draw_fat_segment(a, b, radius, fill_color, fill_color)
+
+    def draw_dot(
+        self, size: float, pos: Tuple[float, float], color: SpaceDebugColor
+    ) -> None:
+        p = to_pygame(pos, self.surface)
+        pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0)
+
+
+def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]:
+    """Get position of the mouse pointer in pymunk coordinates."""
+    p = pygame.mouse.get_pos()
+    return from_pygame(p, surface)
+
+
+def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
+    """Convenience method to convert pymunk coordinates to pygame surface
+    local coordinates.
+
+    Note that in case positive_y_is_up is False, this function wont actually do
+    anything except converting the point to integers.
+    """
+    if positive_y_is_up:
+        return round(p[0]), surface.get_height() - round(p[1])
+    else:
+        return round(p[0]), round(p[1])
+
+
+def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
+    """Convenience method to convert pygame surface local coordinates to
+    pymunk coordinates
+    """
+    return to_pygame(p, surface)
+
+
+def light_color(color: SpaceDebugColor):
+    color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255]))
+    color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3])
+    return color
diff --git a/diffusion_policy/common/pymunk_util.py b/diffusion_policy/common/pymunk_util.py
new file mode 100644
index 0000000..4245994
--- /dev/null
+++ b/diffusion_policy/common/pymunk_util.py
@@ -0,0 +1,52 @@
+import pygame
+import pymunk
+import pymunk.pygame_util
+import numpy as np
+
+COLLTYPE_DEFAULT = 0
+COLLTYPE_MOUSE = 1
+COLLTYPE_BALL = 2
+
+def get_body_type(static=False):
+    body_type = pymunk.Body.DYNAMIC
+    if static:
+        body_type = pymunk.Body.STATIC
+    return body_type
+
+
+def create_rectangle(space,
+        pos_x,pos_y,width,height,
+        density=3,static=False):
+    body = pymunk.Body(body_type=get_body_type(static))
+    body.position = (pos_x,pos_y)
+    shape = pymunk.Poly.create_box(body,(width,height))
+    shape.density = density
+    space.add(body,shape)
+    return body, shape
+
+
+def create_rectangle_bb(space, 
+        left, bottom, right, top, 
+        **kwargs):
+    pos_x = (left + right) / 2
+    pos_y = (top + bottom) / 2
+    height = top - bottom
+    width = right - left
+    return create_rectangle(space, pos_x, pos_y, width, height, **kwargs)
+
+def create_circle(space, pos_x, pos_y, radius, density=3, static=False):
+    body = pymunk.Body(body_type=get_body_type(static))
+    body.position = (pos_x, pos_y)
+    shape = pymunk.Circle(body, radius=radius)
+    shape.density = density
+    shape.collision_type = COLLTYPE_BALL
+    space.add(body, shape)
+    return body, shape
+
+def get_body_state(body):
+    state = np.zeros(6, dtype=np.float32)
+    state[:2] = body.position
+    state[2] = body.angle
+    state[3:5] = body.velocity
+    state[5] = body.angular_velocity
+    return state
diff --git a/diffusion_policy/common/pytorch_util.py b/diffusion_policy/common/pytorch_util.py
new file mode 100644
index 0000000..a9262d4
--- /dev/null
+++ b/diffusion_policy/common/pytorch_util.py
@@ -0,0 +1,82 @@
+from typing import Dict, Callable, List
+import collections
+import torch
+import torch.nn as nn
+
+def dict_apply(
+        x: Dict[str, torch.Tensor], 
+        func: Callable[[torch.Tensor], torch.Tensor]
+        ) -> Dict[str, torch.Tensor]:
+    result = dict()
+    for key, value in x.items():
+        if isinstance(value, dict):
+            result[key] = dict_apply(value, func)
+        else:
+            result[key] = func(value)
+    return result
+
+def pad_remaining_dims(x, target):
+    assert x.shape == target.shape[:len(x.shape)]
+    return x.reshape(x.shape + (1,)*(len(target.shape) - len(x.shape)))
+
+def dict_apply_split(
+        x: Dict[str, torch.Tensor], 
+        split_func: Callable[[torch.Tensor], Dict[str, torch.Tensor]]
+        ) -> Dict[str, torch.Tensor]:
+    results = collections.defaultdict(dict)
+    for key, value in x.items():
+        result = split_func(value)
+        for k, v in result.items():
+            results[k][key] = v
+    return results
+
+def dict_apply_reduce(
+        x: List[Dict[str, torch.Tensor]],
+        reduce_func: Callable[[List[torch.Tensor]], torch.Tensor]
+        ) -> Dict[str, torch.Tensor]:
+    result = dict()
+    for key in x[0].keys():
+        result[key] = reduce_func([x_[key] for x_ in x])
+    return result
+
+
+def replace_submodules(
+        root_module: nn.Module, 
+        predicate: Callable[[nn.Module], bool], 
+        func: Callable[[nn.Module], nn.Module]) -> nn.Module:
+    """
+    predicate: Return true if the module is to be replaced.
+    func: Return new module to use.
+    """
+    if predicate(root_module):
+        return func(root_module)
+
+    bn_list = [k.split('.') for k, m 
+        in root_module.named_modules(remove_duplicate=True) 
+        if predicate(m)]
+    for *parent, k in bn_list:
+        parent_module = root_module
+        if len(parent) > 0:
+            parent_module = root_module.get_submodule('.'.join(parent))
+        if isinstance(parent_module, nn.Sequential):
+            src_module = parent_module[int(k)]
+        else:
+            src_module = getattr(parent_module, k)
+        tgt_module = func(src_module)
+        if isinstance(parent_module, nn.Sequential):
+            parent_module[int(k)] = tgt_module
+        else:
+            setattr(parent_module, k, tgt_module)
+    # verify that all BN are replaced
+    bn_list = [k.split('.') for k, m 
+        in root_module.named_modules(remove_duplicate=True) 
+        if predicate(m)]
+    assert len(bn_list) == 0
+    return root_module
+
+def optimizer_to(optimizer, device):
+    for state in optimizer.state.values():
+        for k, v in state.items():
+            if isinstance(v, torch.Tensor):
+                state[k] = v.to(device=device)
+    return optimizer
diff --git a/diffusion_policy/common/replay_buffer.py b/diffusion_policy/common/replay_buffer.py
new file mode 100644
index 0000000..09dda9b
--- /dev/null
+++ b/diffusion_policy/common/replay_buffer.py
@@ -0,0 +1,588 @@
+from typing import Union, Dict, Optional
+import os
+import math
+import numbers
+import zarr
+import numcodecs
+import numpy as np
+from functools import cached_property
+
+def check_chunks_compatible(chunks: tuple, shape: tuple):
+    assert len(shape) == len(chunks)
+    for c in chunks:
+        assert isinstance(c, numbers.Integral)
+        assert c > 0
+
+def rechunk_recompress_array(group, name, 
+        chunks=None, chunk_length=None,
+        compressor=None, tmp_key='_temp'):
+    old_arr = group[name]
+    if chunks is None:
+        if chunk_length is not None:
+            chunks = (chunk_length,) + old_arr.chunks[1:]
+        else:
+            chunks = old_arr.chunks
+    check_chunks_compatible(chunks, old_arr.shape)
+    
+    if compressor is None:
+        compressor = old_arr.compressor
+    
+    if (chunks == old_arr.chunks) and (compressor == old_arr.compressor):
+        # no change
+        return old_arr
+
+    # rechunk recompress
+    group.move(name, tmp_key)
+    old_arr = group[tmp_key]
+    n_copied, n_skipped, n_bytes_copied = zarr.copy(
+        source=old_arr,
+        dest=group,
+        name=name,
+        chunks=chunks,
+        compressor=compressor,
+    )
+    del group[tmp_key]
+    arr = group[name]
+    return arr
+
+def get_optimal_chunks(shape, dtype, 
+        target_chunk_bytes=2e6, 
+        max_chunk_length=None):
+    """
+    Common shapes
+    T,D
+    T,N,D
+    T,H,W,C
+    T,N,H,W,C
+    """
+    itemsize = np.dtype(dtype).itemsize
+    # reversed
+    rshape = list(shape[::-1])
+    if max_chunk_length is not None:
+        rshape[-1] = int(max_chunk_length)
+    split_idx = len(shape)-1
+    for i in range(len(shape)-1):
+        this_chunk_bytes = itemsize * np.prod(rshape[:i])
+        next_chunk_bytes = itemsize * np.prod(rshape[:i+1])
+        if this_chunk_bytes <= target_chunk_bytes \
+            and next_chunk_bytes > target_chunk_bytes:
+            split_idx = i
+
+    rchunks = rshape[:split_idx]
+    item_chunk_bytes = itemsize * np.prod(rshape[:split_idx])
+    this_max_chunk_length = rshape[split_idx]
+    next_chunk_length = min(this_max_chunk_length, math.ceil(
+            target_chunk_bytes / item_chunk_bytes))
+    rchunks.append(next_chunk_length)
+    len_diff = len(shape) - len(rchunks)
+    rchunks.extend([1] * len_diff)
+    chunks = tuple(rchunks[::-1])
+    # print(np.prod(chunks) * itemsize / target_chunk_bytes)
+    return chunks
+
+
+class ReplayBuffer:
+    """
+    Zarr-based temporal datastructure.
+    Assumes first dimention to be time. Only chunk in time dimension.
+    """
+    def __init__(self, 
+            root: Union[zarr.Group, 
+            Dict[str,dict]]):
+        """
+        Dummy constructor. Use copy_from* and create_from* class methods instead.
+        """
+        assert('data' in root)
+        assert('meta' in root)
+        assert('episode_ends' in root['meta'])
+        for key, value in root['data'].items():
+            assert(value.shape[0] == root['meta']['episode_ends'][-1])
+        self.root = root
+    
+    # ============= create constructors ===============
+    @classmethod
+    def create_empty_zarr(cls, storage=None, root=None):
+        if root is None:
+            if storage is None:
+                storage = zarr.MemoryStore()
+            root = zarr.group(store=storage)
+        data = root.require_group('data', overwrite=False)
+        meta = root.require_group('meta', overwrite=False)
+        if 'episode_ends' not in meta:
+            episode_ends = meta.zeros('episode_ends', shape=(0,), dtype=np.int64,
+                compressor=None, overwrite=False)
+        return cls(root=root)
+    
+    @classmethod
+    def create_empty_numpy(cls):
+        root = {
+            'data': dict(),
+            'meta': {
+                'episode_ends': np.zeros((0,), dtype=np.int64)
+            }
+        }
+        return cls(root=root)
+    
+    @classmethod
+    def create_from_group(cls, group, **kwargs):
+        if 'data' not in group:
+            # create from stratch
+            buffer = cls.create_empty_zarr(root=group, **kwargs)
+        else:
+            # already exist
+            buffer = cls(root=group, **kwargs)
+        return buffer
+
+    @classmethod
+    def create_from_path(cls, zarr_path, mode='r', **kwargs):
+        """
+        Open a on-disk zarr directly (for dataset larger than memory).
+        Slower.
+        """
+        group = zarr.open(os.path.expanduser(zarr_path), mode)
+        return cls.create_from_group(group, **kwargs)
+    
+    # ============= copy constructors ===============
+    @classmethod
+    def copy_from_store(cls, src_store, store=None, keys=None, 
+            chunks: Dict[str,tuple]=dict(), 
+            compressors: Union[dict, str, numcodecs.abc.Codec]=dict(), 
+            if_exists='replace',
+            **kwargs):
+        """
+        Load to memory.
+        """
+        src_root = zarr.group(src_store)
+        root = None
+        if store is None:
+            # numpy backend
+            meta = dict()
+            for key, value in src_root['meta'].items():
+                if len(value.shape) == 0:
+                    meta[key] = np.array(value)
+                else:
+                    meta[key] = value[:]
+
+            if keys is None:
+                keys = src_root['data'].keys()
+            data = dict()
+            for key in keys:
+                arr = src_root['data'][key]
+                data[key] = arr[:]
+
+            root = {
+                'meta': meta,
+                'data': data
+            }
+        else:
+            root = zarr.group(store=store)
+            # copy without recompression
+            n_copied, n_skipped, n_bytes_copied = zarr.copy_store(source=src_store, dest=store,
+                source_path='/meta', dest_path='/meta', if_exists=if_exists)
+            data_group = root.create_group('data', overwrite=True)
+            if keys is None:
+                keys = src_root['data'].keys()
+            for key in keys:
+                value = src_root['data'][key]
+                cks = cls._resolve_array_chunks(
+                    chunks=chunks, key=key, array=value)
+                cpr = cls._resolve_array_compressor(
+                    compressors=compressors, key=key, array=value)
+                if cks == value.chunks and cpr == value.compressor:
+                    # copy without recompression
+                    this_path = '/data/' + key
+                    n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
+                        source=src_store, dest=store,
+                        source_path=this_path, dest_path=this_path,
+                        if_exists=if_exists
+                    )
+                else:
+                    # copy with recompression
+                    n_copied, n_skipped, n_bytes_copied = zarr.copy(
+                        source=value, dest=data_group, name=key,
+                        chunks=cks, compressor=cpr, if_exists=if_exists
+                    )
+        buffer = cls(root=root)
+        return buffer
+    
+    @classmethod
+    def copy_from_path(cls, zarr_path, backend=None, store=None, keys=None, 
+            chunks: Dict[str,tuple]=dict(), 
+            compressors: Union[dict, str, numcodecs.abc.Codec]=dict(), 
+            if_exists='replace',
+            **kwargs):
+        """
+        Copy a on-disk zarr to in-memory compressed.
+        Recommended
+        """
+        if backend == 'numpy':
+            print('backend argument is depreacted!')
+            store = None
+        group = zarr.open(os.path.expanduser(zarr_path), 'r')
+        return cls.copy_from_store(src_store=group.store, store=store, 
+            keys=keys, chunks=chunks, compressors=compressors, 
+            if_exists=if_exists, **kwargs)
+
+    # ============= save methods ===============
+    def save_to_store(self, store, 
+            chunks: Optional[Dict[str,tuple]]=dict(),
+            compressors: Union[str, numcodecs.abc.Codec, dict]=dict(),
+            if_exists='replace', 
+            **kwargs):
+        
+        root = zarr.group(store)
+        if self.backend == 'zarr':
+            # recompression free copy
+            n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
+                source=self.root.store, dest=store,
+                source_path='/meta', dest_path='/meta', if_exists=if_exists)
+        else:
+            meta_group = root.create_group('meta', overwrite=True)
+            # save meta, no chunking
+            for key, value in self.root['meta'].items():
+                _ = meta_group.array(
+                    name=key,
+                    data=value, 
+                    shape=value.shape, 
+                    chunks=value.shape)
+        
+        # save data, chunk
+        data_group = root.create_group('data', overwrite=True)
+        for key, value in self.root['data'].items():
+            cks = self._resolve_array_chunks(
+                chunks=chunks, key=key, array=value)
+            cpr = self._resolve_array_compressor(
+                compressors=compressors, key=key, array=value)
+            if isinstance(value, zarr.Array):
+                if cks == value.chunks and cpr == value.compressor:
+                    # copy without recompression
+                    this_path = '/data/' + key
+                    n_copied, n_skipped, n_bytes_copied = zarr.copy_store(
+                        source=self.root.store, dest=store,
+                        source_path=this_path, dest_path=this_path, if_exists=if_exists)
+                else:
+                    # copy with recompression
+                    n_copied, n_skipped, n_bytes_copied = zarr.copy(
+                        source=value, dest=data_group, name=key,
+                        chunks=cks, compressor=cpr, if_exists=if_exists
+                    )
+            else:
+                # numpy
+                _ = data_group.array(
+                    name=key,
+                    data=value,
+                    chunks=cks,
+                    compressor=cpr
+                )
+        return store
+
+    def save_to_path(self, zarr_path,             
+            chunks: Optional[Dict[str,tuple]]=dict(),
+            compressors: Union[str, numcodecs.abc.Codec, dict]=dict(), 
+            if_exists='replace', 
+            **kwargs):
+        store = zarr.DirectoryStore(os.path.expanduser(zarr_path))
+        return self.save_to_store(store, chunks=chunks, 
+            compressors=compressors, if_exists=if_exists, **kwargs)
+
+    @staticmethod
+    def resolve_compressor(compressor='default'):
+        if compressor == 'default':
+            compressor = numcodecs.Blosc(cname='lz4', clevel=5, 
+                shuffle=numcodecs.Blosc.NOSHUFFLE)
+        elif compressor == 'disk':
+            compressor = numcodecs.Blosc('zstd', clevel=5, 
+                shuffle=numcodecs.Blosc.BITSHUFFLE)
+        return compressor
+
+    @classmethod
+    def _resolve_array_compressor(cls, 
+            compressors: Union[dict, str, numcodecs.abc.Codec], key, array):
+        # allows compressor to be explicitly set to None
+        cpr = 'nil'
+        if isinstance(compressors, dict):
+            if key in compressors:
+                cpr = cls.resolve_compressor(compressors[key])
+            elif isinstance(array, zarr.Array):
+                cpr = array.compressor
+        else:
+            cpr = cls.resolve_compressor(compressors)
+        # backup default
+        if cpr == 'nil':
+            cpr = cls.resolve_compressor('default')
+        return cpr
+    
+    @classmethod
+    def _resolve_array_chunks(cls,
+            chunks: Union[dict, tuple], key, array):
+        cks = None
+        if isinstance(chunks, dict):
+            if key in chunks:
+                cks = chunks[key]
+            elif isinstance(array, zarr.Array):
+                cks = array.chunks
+        elif isinstance(chunks, tuple):
+            cks = chunks
+        else:
+            raise TypeError(f"Unsupported chunks type {type(chunks)}")
+        # backup default
+        if cks is None:
+            cks = get_optimal_chunks(shape=array.shape, dtype=array.dtype)
+        # check
+        check_chunks_compatible(chunks=cks, shape=array.shape)
+        return cks
+    
+    # ============= properties =================
+    @cached_property
+    def data(self):
+        return self.root['data']
+    
+    @cached_property
+    def meta(self):
+        return self.root['meta']
+
+    def update_meta(self, data):
+        # sanitize data
+        np_data = dict()
+        for key, value in data.items():
+            if isinstance(value, np.ndarray):
+                np_data[key] = value
+            else:
+                arr = np.array(value)
+                if arr.dtype == object:
+                    raise TypeError(f"Invalid value type {type(value)}")
+                np_data[key] = arr
+
+        meta_group = self.meta
+        if self.backend == 'zarr':
+            for key, value in np_data.items():
+                _ = meta_group.array(
+                    name=key,
+                    data=value, 
+                    shape=value.shape, 
+                    chunks=value.shape,
+                    overwrite=True)
+        else:
+            meta_group.update(np_data)
+        
+        return meta_group
+    
+    @property
+    def episode_ends(self):
+        return self.meta['episode_ends']
+    
+    def get_episode_idxs(self):
+        import numba
+        numba.jit(nopython=True)
+        def _get_episode_idxs(episode_ends):
+            result = np.zeros((episode_ends[-1],), dtype=np.int64)
+            for i in range(len(episode_ends)):
+                start = 0
+                if i > 0:
+                    start = episode_ends[i-1]
+                end = episode_ends[i]
+                for idx in range(start, end):
+                    result[idx] = i
+            return result
+        return _get_episode_idxs(self.episode_ends)
+        
+    
+    @property
+    def backend(self):
+        backend = 'numpy'
+        if isinstance(self.root, zarr.Group):
+            backend = 'zarr'
+        return backend
+    
+    # =========== dict-like API ==============
+    def __repr__(self) -> str:
+        if self.backend == 'zarr':
+            return str(self.root.tree())
+        else:
+            return super().__repr__()
+
+    def keys(self):
+        return self.data.keys()
+    
+    def values(self):
+        return self.data.values()
+    
+    def items(self):
+        return self.data.items()
+    
+    def __getitem__(self, key):
+        return self.data[key]
+
+    def __contains__(self, key):
+        return key in self.data
+
+    # =========== our API ==============
+    @property
+    def n_steps(self):
+        if len(self.episode_ends) == 0:
+            return 0
+        return self.episode_ends[-1]
+    
+    @property
+    def n_episodes(self):
+        return len(self.episode_ends)
+
+    @property
+    def chunk_size(self):
+        if self.backend == 'zarr':
+            return next(iter(self.data.arrays()))[-1].chunks[0]
+        return None
+
+    @property
+    def episode_lengths(self):
+        ends = self.episode_ends[:]
+        ends = np.insert(ends, 0, 0)
+        lengths = np.diff(ends)
+        return lengths
+
+    def add_episode(self, 
+            data: Dict[str, np.ndarray], 
+            chunks: Optional[Dict[str,tuple]]=dict(),
+            compressors: Union[str, numcodecs.abc.Codec, dict]=dict()):
+        assert(len(data) > 0)
+        is_zarr = (self.backend == 'zarr')
+
+        curr_len = self.n_steps
+        episode_length = None
+        for key, value in data.items():
+            assert(len(value.shape) >= 1)
+            if episode_length is None:
+                episode_length = len(value)
+            else:
+                assert(episode_length == len(value))
+        new_len = curr_len + episode_length
+
+        for key, value in data.items():
+            new_shape = (new_len,) + value.shape[1:]
+            # create array
+            if key not in self.data:
+                if is_zarr:
+                    cks = self._resolve_array_chunks(
+                        chunks=chunks, key=key, array=value)
+                    cpr = self._resolve_array_compressor(
+                        compressors=compressors, key=key, array=value)
+                    arr = self.data.zeros(name=key, 
+                        shape=new_shape, 
+                        chunks=cks,
+                        dtype=value.dtype,
+                        compressor=cpr)
+                else:
+                    # copy data to prevent modify
+                    arr = np.zeros(shape=new_shape, dtype=value.dtype)
+                    self.data[key] = arr
+            else:
+                arr = self.data[key]
+                assert(value.shape[1:] == arr.shape[1:])
+                # same method for both zarr and numpy
+                if is_zarr:
+                    arr.resize(new_shape)
+                else:
+                    arr.resize(new_shape, refcheck=False)
+            # copy data
+            arr[-value.shape[0]:] = value
+        
+        # append to episode ends
+        episode_ends = self.episode_ends
+        if is_zarr:
+            episode_ends.resize(episode_ends.shape[0] + 1)
+        else:
+            episode_ends.resize(episode_ends.shape[0] + 1, refcheck=False)
+        episode_ends[-1] = new_len
+
+        # rechunk
+        if is_zarr:
+            if episode_ends.chunks[0] < episode_ends.shape[0]:
+                rechunk_recompress_array(self.meta, 'episode_ends', 
+                    chunk_length=int(episode_ends.shape[0] * 1.5))
+    
+    def drop_episode(self):
+        is_zarr = (self.backend == 'zarr')
+        episode_ends = self.episode_ends[:].copy()
+        assert(len(episode_ends) > 0)
+        start_idx = 0
+        if len(episode_ends) > 1:
+            start_idx = episode_ends[-2]
+        for key, value in self.data.items():
+            new_shape = (start_idx,) + value.shape[1:]
+            if is_zarr:
+                value.resize(new_shape)
+            else:
+                value.resize(new_shape, refcheck=False)
+        if is_zarr:
+            self.episode_ends.resize(len(episode_ends)-1)
+        else:
+            self.episode_ends.resize(len(episode_ends)-1, refcheck=False)
+    
+    def pop_episode(self):
+        assert(self.n_episodes > 0)
+        episode = self.get_episode(self.n_episodes-1, copy=True)
+        self.drop_episode()
+        return episode
+
+    def extend(self, data):
+        self.add_episode(data)
+
+    def get_episode(self, idx, copy=False):
+        idx = list(range(len(self.episode_ends)))[idx]
+        start_idx = 0
+        if idx > 0:
+            start_idx = self.episode_ends[idx-1]
+        end_idx = self.episode_ends[idx]
+        result = self.get_steps_slice(start_idx, end_idx, copy=copy)
+        return result
+    
+    def get_episode_slice(self, idx):
+        start_idx = 0
+        if idx > 0:
+            start_idx = self.episode_ends[idx-1]
+        end_idx = self.episode_ends[idx]
+        return slice(start_idx, end_idx)
+
+    def get_steps_slice(self, start, stop, step=None, copy=False):
+        _slice = slice(start, stop, step)
+
+        result = dict()
+        for key, value in self.data.items():
+            x = value[_slice]
+            if copy and isinstance(value, np.ndarray):
+                x = x.copy()
+            result[key] = x
+        return result
+    
+    # =========== chunking =============
+    def get_chunks(self) -> dict:
+        assert self.backend == 'zarr'
+        chunks = dict()
+        for key, value in self.data.items():
+            chunks[key] = value.chunks
+        return chunks
+    
+    def set_chunks(self, chunks: dict):
+        assert self.backend == 'zarr'
+        for key, value in chunks.items():
+            if key in self.data:
+                arr = self.data[key]
+                if value != arr.chunks:
+                    check_chunks_compatible(chunks=value, shape=arr.shape)
+                    rechunk_recompress_array(self.data, key, chunks=value)
+
+    def get_compressors(self) -> dict:
+        assert self.backend == 'zarr'
+        compressors = dict()
+        for key, value in self.data.items():
+            compressors[key] = value.compressor
+        return compressors
+
+    def set_compressors(self, compressors: dict):
+        assert self.backend == 'zarr'
+        for key, value in compressors.items():
+            if key in self.data:
+                arr = self.data[key]
+                compressor = self.resolve_compressor(value)
+                if compressor != arr.compressor:
+                    rechunk_recompress_array(self.data, key, compressor=compressor)
diff --git a/diffusion_policy/common/robomimic_config_util.py b/diffusion_policy/common/robomimic_config_util.py
new file mode 100644
index 0000000..85768bb
--- /dev/null
+++ b/diffusion_policy/common/robomimic_config_util.py
@@ -0,0 +1,47 @@
+from omegaconf import OmegaConf
+from robomimic.config import config_factory
+import robomimic.scripts.generate_paper_configs as gpc
+from robomimic.scripts.generate_paper_configs import (
+    modify_config_for_default_image_exp,
+    modify_config_for_default_low_dim_exp,
+    modify_config_for_dataset,
+)
+
+def get_robomimic_config(
+        algo_name='bc_rnn', 
+        hdf5_type='low_dim', 
+        task_name='square', 
+        dataset_type='ph'
+    ):
+    base_dataset_dir = '/tmp/null'
+    filter_key = None
+
+    # decide whether to use low-dim or image training defaults
+    modifier_for_obs = modify_config_for_default_image_exp
+    if hdf5_type in ["low_dim", "low_dim_sparse", "low_dim_dense"]:
+        modifier_for_obs = modify_config_for_default_low_dim_exp
+
+    algo_config_name = "bc" if algo_name == "bc_rnn" else algo_name
+    config = config_factory(algo_name=algo_config_name)
+    # turn into default config for observation modalities (e.g.: low-dim or rgb)
+    config = modifier_for_obs(config)
+    # add in config based on the dataset
+    config = modify_config_for_dataset(
+        config=config, 
+        task_name=task_name, 
+        dataset_type=dataset_type, 
+        hdf5_type=hdf5_type, 
+        base_dataset_dir=base_dataset_dir,
+        filter_key=filter_key,
+    )
+    # add in algo hypers based on dataset
+    algo_config_modifier = getattr(gpc, f'modify_{algo_name}_config_for_dataset')
+    config = algo_config_modifier(
+        config=config, 
+        task_name=task_name, 
+        dataset_type=dataset_type, 
+        hdf5_type=hdf5_type,
+    )
+    return config
+    
+
diff --git a/diffusion_policy/common/robomimic_util.py b/diffusion_policy/common/robomimic_util.py
new file mode 100644
index 0000000..24f25fb
--- /dev/null
+++ b/diffusion_policy/common/robomimic_util.py
@@ -0,0 +1,177 @@
+import numpy as np
+import copy
+
+import h5py
+import robomimic.utils.obs_utils as ObsUtils
+import robomimic.utils.file_utils as FileUtils
+import robomimic.utils.env_utils as EnvUtils
+from scipy.spatial.transform import Rotation
+
+from robomimic.config import config_factory
+
+
+class RobomimicAbsoluteActionConverter:
+    def __init__(self, dataset_path, algo_name='bc'):
+        # default BC config
+        config = config_factory(algo_name=algo_name)
+
+        # read config to set up metadata for observation modalities (e.g. detecting rgb observations)
+        # must ran before create dataset
+        ObsUtils.initialize_obs_utils_with_config(config)
+
+        env_meta = FileUtils.get_env_metadata_from_dataset(dataset_path)
+        abs_env_meta = copy.deepcopy(env_meta)
+        abs_env_meta['env_kwargs']['controller_configs']['control_delta'] = False
+
+        env = EnvUtils.create_env_from_metadata(
+            env_meta=env_meta,
+            render=False, 
+            render_offscreen=False,
+            use_image_obs=False, 
+        )
+        assert len(env.env.robots) in (1, 2)
+
+        abs_env = EnvUtils.create_env_from_metadata(
+            env_meta=abs_env_meta,
+            render=False, 
+            render_offscreen=False,
+            use_image_obs=False, 
+        )
+        assert not abs_env.env.robots[0].controller.use_delta
+
+        self.env = env
+        self.abs_env = abs_env
+        self.file = h5py.File(dataset_path, 'r')
+    
+    def __len__(self):
+        return len(self.file['data'])
+
+    def convert_actions(self, 
+            states: np.ndarray, 
+            actions: np.ndarray) -> np.ndarray:
+        """
+        Given state and delta action sequence
+        generate equivalent goal position and orientation for each step
+        keep the original gripper action intact.
+        """
+        # in case of multi robot
+        # reshape (N,14) to (N,2,7)
+        # or (N,7) to (N,1,7)
+        stacked_actions = actions.reshape(*actions.shape[:-1],-1,7)
+
+        env = self.env
+        # generate abs actions
+        action_goal_pos = np.zeros(
+            stacked_actions.shape[:-1]+(3,), 
+            dtype=stacked_actions.dtype)
+        action_goal_ori = np.zeros(
+            stacked_actions.shape[:-1]+(3,), 
+            dtype=stacked_actions.dtype)
+        action_gripper = stacked_actions[...,[-1]]
+        for i in range(len(states)):
+            _ = env.reset_to({'states': states[i]})
+
+            # taken from robot_env.py L#454
+            for idx, robot in enumerate(env.env.robots):
+                # run controller goal generator
+                robot.control(stacked_actions[i,idx], policy_step=True)
+            
+                # read pos and ori from robots
+                controller = robot.controller
+                action_goal_pos[i,idx] = controller.goal_pos
+                action_goal_ori[i,idx] = Rotation.from_matrix(
+                    controller.goal_ori).as_rotvec()
+
+        stacked_abs_actions = np.concatenate([
+            action_goal_pos,
+            action_goal_ori,
+            action_gripper
+        ], axis=-1)
+        abs_actions = stacked_abs_actions.reshape(actions.shape)
+        return abs_actions
+
+    def convert_idx(self, idx):
+        file = self.file
+        demo = file[f'data/demo_{idx}']
+        # input
+        states = demo['states'][:]
+        actions = demo['actions'][:]
+
+        # generate abs actions
+        abs_actions = self.convert_actions(states, actions)
+        return abs_actions
+
+    def convert_and_eval_idx(self, idx):
+        env = self.env
+        abs_env = self.abs_env
+        file = self.file
+        # first step have high error for some reason, not representative
+        eval_skip_steps = 1
+
+        demo = file[f'data/demo_{idx}']
+        # input
+        states = demo['states'][:]
+        actions = demo['actions'][:]
+
+        # generate abs actions
+        abs_actions = self.convert_actions(states, actions)
+
+        # verify
+        robot0_eef_pos = demo['obs']['robot0_eef_pos'][:]
+        robot0_eef_quat = demo['obs']['robot0_eef_quat'][:]
+
+        delta_error_info = self.evaluate_rollout_error(
+            env, states, actions, robot0_eef_pos, robot0_eef_quat, 
+            metric_skip_steps=eval_skip_steps)
+        abs_error_info = self.evaluate_rollout_error(
+            abs_env, states, abs_actions, robot0_eef_pos, robot0_eef_quat,
+            metric_skip_steps=eval_skip_steps)
+
+        info = {
+            'delta_max_error': delta_error_info,
+            'abs_max_error': abs_error_info
+        }
+        return abs_actions, info
+
+    @staticmethod
+    def evaluate_rollout_error(env, 
+            states, actions, 
+            robot0_eef_pos, 
+            robot0_eef_quat, 
+            metric_skip_steps=1):
+        # first step have high error for some reason, not representative
+
+        # evaluate abs actions
+        rollout_next_states = list()
+        rollout_next_eef_pos = list()
+        rollout_next_eef_quat = list()
+        obs = env.reset_to({'states': states[0]})
+        for i in range(len(states)):
+            obs = env.reset_to({'states': states[i]})
+            obs, reward, done, info = env.step(actions[i])
+            obs = env.get_observation()
+            rollout_next_states.append(env.get_state()['states'])
+            rollout_next_eef_pos.append(obs['robot0_eef_pos'])
+            rollout_next_eef_quat.append(obs['robot0_eef_quat'])
+        rollout_next_states = np.array(rollout_next_states)
+        rollout_next_eef_pos = np.array(rollout_next_eef_pos)
+        rollout_next_eef_quat = np.array(rollout_next_eef_quat)
+
+        next_state_diff = states[1:] - rollout_next_states[:-1]
+        max_next_state_diff = np.max(np.abs(next_state_diff[metric_skip_steps:]))
+
+        next_eef_pos_diff = robot0_eef_pos[1:] - rollout_next_eef_pos[:-1]
+        next_eef_pos_dist = np.linalg.norm(next_eef_pos_diff, axis=-1)
+        max_next_eef_pos_dist = next_eef_pos_dist[metric_skip_steps:].max()
+
+        next_eef_rot_diff = Rotation.from_quat(robot0_eef_quat[1:]) \
+            * Rotation.from_quat(rollout_next_eef_quat[:-1]).inv()
+        next_eef_rot_dist = next_eef_rot_diff.magnitude()
+        max_next_eef_rot_dist = next_eef_rot_dist[metric_skip_steps:].max()
+
+        info = {
+            'state': max_next_state_diff,
+            'pos': max_next_eef_pos_dist,
+            'rot': max_next_eef_rot_dist
+        }
+        return info
diff --git a/diffusion_policy/common/sampler.py b/diffusion_policy/common/sampler.py
new file mode 100644
index 0000000..4accaa0
--- /dev/null
+++ b/diffusion_policy/common/sampler.py
@@ -0,0 +1,153 @@
+from typing import Optional
+import numpy as np
+import numba
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+
+
+@numba.jit(nopython=True)
+def create_indices(
+    episode_ends:np.ndarray, sequence_length:int, 
+    episode_mask: np.ndarray,
+    pad_before: int=0, pad_after: int=0,
+    debug:bool=True) -> np.ndarray:
+    episode_mask.shape == episode_ends.shape        
+    pad_before = min(max(pad_before, 0), sequence_length-1)
+    pad_after = min(max(pad_after, 0), sequence_length-1)
+
+    indices = list()
+    for i in range(len(episode_ends)):
+        if not episode_mask[i]:
+            # skip episode
+            continue
+        start_idx = 0
+        if i > 0:
+            start_idx = episode_ends[i-1]
+        end_idx = episode_ends[i]
+        episode_length = end_idx - start_idx
+        
+        min_start = -pad_before
+        max_start = episode_length - sequence_length + pad_after
+        
+        # range stops one idx before end
+        for idx in range(min_start, max_start+1):
+            buffer_start_idx = max(idx, 0) + start_idx
+            buffer_end_idx = min(idx+sequence_length, episode_length) + start_idx
+            start_offset = buffer_start_idx - (idx+start_idx)
+            end_offset = (idx+sequence_length+start_idx) - buffer_end_idx
+            sample_start_idx = 0 + start_offset
+            sample_end_idx = sequence_length - end_offset
+            if debug:
+                assert(start_offset >= 0)
+                assert(end_offset >= 0)
+                assert (sample_end_idx - sample_start_idx) == (buffer_end_idx - buffer_start_idx)
+            indices.append([
+                buffer_start_idx, buffer_end_idx, 
+                sample_start_idx, sample_end_idx])
+    indices = np.array(indices)
+    return indices
+
+
+def get_val_mask(n_episodes, val_ratio, seed=0):
+    val_mask = np.zeros(n_episodes, dtype=bool)
+    if val_ratio <= 0:
+        return val_mask
+
+    # have at least 1 episode for validation, and at least 1 episode for train
+    n_val = min(max(1, round(n_episodes * val_ratio)), n_episodes-1)
+    rng = np.random.default_rng(seed=seed)
+    val_idxs = rng.choice(n_episodes, size=n_val, replace=False)
+    val_mask[val_idxs] = True
+    return val_mask
+
+
+def downsample_mask(mask, max_n, seed=0):
+    # subsample training data
+    train_mask = mask
+    if (max_n is not None) and (np.sum(train_mask) > max_n):
+        n_train = int(max_n)
+        curr_train_idxs = np.nonzero(train_mask)[0]
+        rng = np.random.default_rng(seed=seed)
+        train_idxs_idx = rng.choice(len(curr_train_idxs), size=n_train, replace=False)
+        train_idxs = curr_train_idxs[train_idxs_idx]
+        train_mask = np.zeros_like(train_mask)
+        train_mask[train_idxs] = True
+        assert np.sum(train_mask) == n_train
+    return train_mask
+
+class SequenceSampler:
+    def __init__(self, 
+        replay_buffer: ReplayBuffer, 
+        sequence_length:int,
+        pad_before:int=0,
+        pad_after:int=0,
+        keys=None,
+        key_first_k=dict(),
+        episode_mask: Optional[np.ndarray]=None,
+        ):
+        """
+        key_first_k: dict str: int
+            Only take first k data from these keys (to improve perf)
+        """
+
+        super().__init__()
+        assert(sequence_length >= 1)
+        if keys is None:
+            keys = list(replay_buffer.keys())
+        
+        episode_ends = replay_buffer.episode_ends[:]
+        if episode_mask is None:
+            episode_mask = np.ones(episode_ends.shape, dtype=bool)
+
+        if np.any(episode_mask):
+            indices = create_indices(episode_ends, 
+                sequence_length=sequence_length, 
+                pad_before=pad_before, 
+                pad_after=pad_after,
+                episode_mask=episode_mask
+                )
+        else:
+            indices = np.zeros((0,4), dtype=np.int64)
+
+        # (buffer_start_idx, buffer_end_idx, sample_start_idx, sample_end_idx)
+        self.indices = indices 
+        self.keys = list(keys) # prevent OmegaConf list performance problem
+        self.sequence_length = sequence_length
+        self.replay_buffer = replay_buffer
+        self.key_first_k = key_first_k
+    
+    def __len__(self):
+        return len(self.indices)
+        
+    def sample_sequence(self, idx):
+        buffer_start_idx, buffer_end_idx, sample_start_idx, sample_end_idx \
+            = self.indices[idx]
+        result = dict()
+        for key in self.keys:
+            input_arr = self.replay_buffer[key]
+            # performance optimization, avoid small allocation if possible
+            if key not in self.key_first_k:
+                sample = input_arr[buffer_start_idx:buffer_end_idx]
+            else:
+                # performance optimization, only load used obs steps
+                n_data = buffer_end_idx - buffer_start_idx
+                k_data = min(self.key_first_k[key], n_data)
+                # fill value with Nan to catch bugs
+                # the non-loaded region should never be used
+                sample = np.full((n_data,) + input_arr.shape[1:], 
+                    fill_value=np.nan, dtype=input_arr.dtype)
+                try:
+                    sample[:k_data] = input_arr[buffer_start_idx:buffer_start_idx+k_data]
+                except Exception as e:
+                    import pdb; pdb.set_trace()
+            data = sample
+            if (sample_start_idx > 0) or (sample_end_idx < self.sequence_length):
+                data = np.zeros(
+                    shape=(self.sequence_length,) + input_arr.shape[1:],
+                    dtype=input_arr.dtype)
+                if sample_start_idx > 0:
+                    data[:sample_start_idx] = sample[0]
+                if sample_end_idx < self.sequence_length:
+                    data[sample_end_idx:] = sample[-1]
+                data[sample_start_idx:sample_end_idx] = sample
+            result[key] = data
+        return result
diff --git a/diffusion_policy/common/timestamp_accumulator.py b/diffusion_policy/common/timestamp_accumulator.py
new file mode 100644
index 0000000..e1eb285
--- /dev/null
+++ b/diffusion_policy/common/timestamp_accumulator.py
@@ -0,0 +1,222 @@
+from typing import List, Tuple, Optional, Dict
+import math
+import numpy as np
+
+
+def get_accumulate_timestamp_idxs(
+    timestamps: List[float],  
+    start_time: float, 
+    dt: float, 
+    eps:float=1e-5,
+    next_global_idx: Optional[int]=0,
+    allow_negative=False
+    ) -> Tuple[List[int], List[int], int]:
+    """
+    For each dt window, choose the first timestamp in the window.
+    Assumes timestamps sorted. One timestamp might be chosen mulitple times due to dropped frames.
+    next_global_idx should start at 0 normally, and then use the returned next_global_idx. 
+    However, when overwiting previous values are desired, set last_global_idx to None.
+
+    Returns:
+    local_idxs: which index in the given timestamps array to chose from
+    global_idxs: the global index of each chosen timestamp
+    next_global_idx: used for next call.
+    """
+    local_idxs = list()
+    global_idxs = list()
+    for local_idx, ts in enumerate(timestamps):
+        # add eps * dt to timestamps so that when ts == start_time + k * dt 
+        # is always recorded as kth element (avoiding floating point errors)
+        global_idx = math.floor((ts - start_time) / dt + eps)
+        if (not allow_negative) and (global_idx < 0):
+            continue
+        if next_global_idx is None:
+            next_global_idx = global_idx
+
+        n_repeats = max(0, global_idx - next_global_idx + 1)
+        for i in range(n_repeats):
+            local_idxs.append(local_idx)
+            global_idxs.append(next_global_idx + i)
+        next_global_idx += n_repeats
+    return local_idxs, global_idxs, next_global_idx
+
+
+def align_timestamps(    
+        timestamps: List[float], 
+        target_global_idxs: List[int], 
+        start_time: float, 
+        dt: float, 
+        eps:float=1e-5):
+    if isinstance(target_global_idxs, np.ndarray):
+        target_global_idxs = target_global_idxs.tolist()
+    assert len(target_global_idxs) > 0
+
+    local_idxs, global_idxs, _ = get_accumulate_timestamp_idxs(
+        timestamps=timestamps,
+        start_time=start_time,
+        dt=dt,
+        eps=eps,
+        next_global_idx=target_global_idxs[0],
+        allow_negative=True
+    )
+    if len(global_idxs) > len(target_global_idxs):
+        # if more steps avaliable, truncate
+        global_idxs = global_idxs[:len(target_global_idxs)]
+        local_idxs = local_idxs[:len(target_global_idxs)]
+    
+    if len(global_idxs) == 0:
+        import pdb; pdb.set_trace()
+
+    for i in range(len(target_global_idxs) - len(global_idxs)):
+        # if missing, repeat
+        local_idxs.append(len(timestamps)-1)
+        global_idxs.append(global_idxs[-1] + 1)
+    assert global_idxs == target_global_idxs
+    assert len(local_idxs) == len(global_idxs)
+    return local_idxs
+
+
+class TimestampObsAccumulator:
+    def __init__(self, 
+            start_time: float, 
+            dt: float, 
+            eps: float=1e-5):
+        self.start_time = start_time
+        self.dt = dt
+        self.eps = eps
+        self.obs_buffer = dict()
+        self.timestamp_buffer = None
+        self.next_global_idx = 0
+    
+    def __len__(self):
+        return self.next_global_idx
+    
+    @property
+    def data(self):
+        if self.timestamp_buffer is None:
+            return dict()
+        result = dict()
+        for key, value in self.obs_buffer.items():
+            result[key] = value[:len(self)]
+        return result
+
+    @property
+    def actual_timestamps(self):
+        if self.timestamp_buffer is None:
+            return np.array([])
+        return self.timestamp_buffer[:len(self)]
+    
+    @property
+    def timestamps(self):
+        if self.timestamp_buffer is None:
+            return np.array([])
+        return self.start_time + np.arange(len(self)) * self.dt
+
+    def put(self, data: Dict[str, np.ndarray], timestamps: np.ndarray):
+        """
+        data:
+            key: T,*
+        """
+
+        local_idxs, global_idxs, self.next_global_idx = get_accumulate_timestamp_idxs(
+            timestamps=timestamps,
+            start_time=self.start_time,
+            dt=self.dt,
+            eps=self.eps,
+            next_global_idx=self.next_global_idx
+        )
+
+        if len(global_idxs) > 0:
+            if self.timestamp_buffer is None:
+                # first allocation
+                self.obs_buffer = dict()
+                for key, value in data.items():
+                    self.obs_buffer[key] = np.zeros_like(value)
+                self.timestamp_buffer = np.zeros(
+                    (len(timestamps),), dtype=np.float64)
+            
+            this_max_size = global_idxs[-1] + 1
+            if this_max_size > len(self.timestamp_buffer):
+                # reallocate
+                new_size = max(this_max_size, len(self.timestamp_buffer) * 2)
+                for key in list(self.obs_buffer.keys()):
+                    new_shape = (new_size,) + self.obs_buffer[key].shape[1:]
+                    self.obs_buffer[key] = np.resize(self.obs_buffer[key], new_shape)
+                self.timestamp_buffer = np.resize(self.timestamp_buffer, (new_size))
+            
+            # write data
+            for key, value in self.obs_buffer.items():
+                value[global_idxs] = data[key][local_idxs]
+            self.timestamp_buffer[global_idxs] = timestamps[local_idxs]
+
+
+class TimestampActionAccumulator:
+    def __init__(self, 
+            start_time: float, 
+            dt: float, 
+            eps: float=1e-5):
+        """
+        Different from Obs accumulator, the action accumulator
+        allows overwriting previous values.
+        """
+        self.start_time = start_time
+        self.dt = dt
+        self.eps = eps
+        self.action_buffer = None
+        self.timestamp_buffer = None
+        self.size = 0
+    
+    def __len__(self):
+        return self.size
+    
+    @property
+    def actions(self):
+        if self.action_buffer is None:
+            return np.array([])
+        return self.action_buffer[:len(self)]
+    
+    @property
+    def actual_timestamps(self):
+        if self.timestamp_buffer is None:
+            return np.array([])
+        return self.timestamp_buffer[:len(self)]
+    
+    @property
+    def timestamps(self):
+        if self.timestamp_buffer is None:
+            return np.array([])
+        return self.start_time + np.arange(len(self)) * self.dt
+
+    def put(self, actions: np.ndarray, timestamps: np.ndarray):
+        """
+        Note: timestamps is the time when the action will be issued, 
+        not when the action will be completed (target_timestamp)
+        """
+
+        local_idxs, global_idxs, _ = get_accumulate_timestamp_idxs(
+            timestamps=timestamps,
+            start_time=self.start_time,
+            dt=self.dt,
+            eps=self.eps,
+            # allows overwriting previous actions
+            next_global_idx=None
+        )
+
+        if len(global_idxs) > 0:
+            if self.timestamp_buffer is None:
+                # first allocation
+                self.action_buffer = np.zeros_like(actions)
+                self.timestamp_buffer = np.zeros((len(actions),), dtype=np.float64)
+
+            this_max_size = global_idxs[-1] + 1
+            if this_max_size > len(self.timestamp_buffer):
+                # reallocate
+                new_size = max(this_max_size, len(self.timestamp_buffer) * 2)
+                new_shape = (new_size,) + self.action_buffer.shape[1:]
+                self.action_buffer = np.resize(self.action_buffer, new_shape)
+                self.timestamp_buffer = np.resize(self.timestamp_buffer, (new_size,))
+            
+            # potentally rewrite old data (as expected)
+            self.action_buffer[global_idxs] = actions[local_idxs]
+            self.timestamp_buffer[global_idxs] = timestamps[local_idxs]
+            self.size = max(self.size, this_max_size)
diff --git a/diffusion_policy/config/task/blockpush_lowdim_seed.yaml b/diffusion_policy/config/task/blockpush_lowdim_seed.yaml
new file mode 100644
index 0000000..aa1e636
--- /dev/null
+++ b/diffusion_policy/config/task/blockpush_lowdim_seed.yaml
@@ -0,0 +1,34 @@
+name: blockpush_lowdim_seed
+
+obs_dim: 16
+action_dim: 2
+keypoint_dim: 2
+obs_eef_target: True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 350
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  fps: 5
+  past_action: ${past_action_visible}
+  abs_action: False
+  obs_eef_target: ${task.obs_eef_target}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
+  zarr_path: data/block_pushing/multimodal_push_seed.zarr
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_eef_target: ${task.obs_eef_target}
+  use_manual_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml b/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml
new file mode 100644
index 0000000..0649121
--- /dev/null
+++ b/diffusion_policy/config/task/blockpush_lowdim_seed_abs.yaml
@@ -0,0 +1,34 @@
+name: blockpush_lowdim_seed_abs
+
+obs_dim: 16
+action_dim: 2
+keypoint_dim: 2
+obs_eef_target: True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.blockpush_lowdim_runner.BlockPushLowdimRunner
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 350
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  fps: 5
+  past_action: ${past_action_visible}
+  abs_action: True
+  obs_eef_target: ${task.obs_eef_target}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.blockpush_lowdim_dataset.BlockPushLowdimDataset
+  zarr_path: data/block_pushing/multimodal_push_seed_abs.zarr
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_eef_target: ${task.obs_eef_target}
+  use_manual_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/can_image.yaml b/diffusion_policy/config/task/can_image.yaml
new file mode 100644
index 0000000..10158bb
--- /dev/null
+++ b/diffusion_policy/config/task/can_image.yaml
@@ -0,0 +1,64 @@
+name: can_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [7]
+
+task_name: &task_name can
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/can_image_abs.yaml b/diffusion_policy/config/task/can_image_abs.yaml
new file mode 100644
index 0000000..eee34dc
--- /dev/null
+++ b/diffusion_policy/config/task/can_image_abs.yaml
@@ -0,0 +1,64 @@
+name: can_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [10]
+
+task_name: &task_name can
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/can_lowdim.yaml b/diffusion_policy/config/task/can_lowdim.yaml
new file mode 100644
index 0000000..ba8d20f
--- /dev/null
+++ b/diffusion_policy/config/task/can_lowdim.yaml
@@ -0,0 +1,45 @@
+name: can_lowdim
+
+obs_dim: 23
+action_dim: 7
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name can
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/can_lowdim_abs.yaml b/diffusion_policy/config/task/can_lowdim_abs.yaml
new file mode 100644
index 0000000..a9f723b
--- /dev/null
+++ b/diffusion_policy/config/task/can_lowdim_abs.yaml
@@ -0,0 +1,46 @@
+name: can_lowdim
+
+obs_dim: 23
+action_dim: 10
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name can
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  rotation_rep: rotation_6d
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/kitchen_lowdim.yaml b/diffusion_policy/config/task/kitchen_lowdim.yaml
new file mode 100644
index 0000000..a57d5d8
--- /dev/null
+++ b/diffusion_policy/config/task/kitchen_lowdim.yaml
@@ -0,0 +1,33 @@
+name: kitchen_lowdim
+
+obs_dim: 60
+action_dim: 9
+keypoint_dim: 3
+
+dataset_dir: &dataset_dir data/kitchen
+
+env_runner:
+  _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
+  dataset_dir: *dataset_dir
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 280
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_hw: [240, 360]
+  fps: 12.5
+  past_action: ${past_action_visible}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.kitchen_lowdim_dataset.KitchenLowdimDataset
+  dataset_dir: *dataset_dir
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/kitchen_lowdim_abs.yaml b/diffusion_policy/config/task/kitchen_lowdim_abs.yaml
new file mode 100644
index 0000000..ecd0691
--- /dev/null
+++ b/diffusion_policy/config/task/kitchen_lowdim_abs.yaml
@@ -0,0 +1,38 @@
+name: kitchen_lowdim
+
+obs_dim: 60
+action_dim: 9
+keypoint_dim: 3
+
+abs_action: True
+robot_noise_ratio: 0.1
+
+env_runner:
+  _target_: diffusion_policy.env_runner.kitchen_lowdim_runner.KitchenLowdimRunner
+  dataset_dir: data/kitchen
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 280
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_hw: [240, 360]
+  fps: 12.5
+  past_action: ${past_action_visible}
+  abs_action: ${task.abs_action}
+  robot_noise_ratio: ${task.robot_noise_ratio}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.kitchen_mjl_lowdim_dataset.KitchenMjlLowdimDataset
+  dataset_dir: data/kitchen/kitchen_demos_multitask
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  abs_action: ${task.abs_action}
+  robot_noise_ratio: ${task.robot_noise_ratio}
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/lift_image.yaml b/diffusion_policy/config/task/lift_image.yaml
new file mode 100644
index 0000000..8ddde45
--- /dev/null
+++ b/diffusion_policy/config/task/lift_image.yaml
@@ -0,0 +1,64 @@
+name: lift_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [7]
+
+task_name: &task_name lift
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 1
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 3
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/lift_image_abs.yaml b/diffusion_policy/config/task/lift_image_abs.yaml
new file mode 100644
index 0000000..b25002f
--- /dev/null
+++ b/diffusion_policy/config/task/lift_image_abs.yaml
@@ -0,0 +1,63 @@
+name: lift_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [10]
+
+task_name: &task_name lift
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/lift_lowdim.yaml b/diffusion_policy/config/task/lift_lowdim.yaml
new file mode 100644
index 0000000..5b1b2d2
--- /dev/null
+++ b/diffusion_policy/config/task/lift_lowdim.yaml
@@ -0,0 +1,46 @@
+name: lift_lowdim
+
+obs_dim: 19
+action_dim: 7
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name lift
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/lift_lowdim_abs.yaml b/diffusion_policy/config/task/lift_lowdim_abs.yaml
new file mode 100644
index 0000000..fe9d17b
--- /dev/null
+++ b/diffusion_policy/config/task/lift_lowdim_abs.yaml
@@ -0,0 +1,47 @@
+name: lift_lowdim
+
+obs_dim: 19
+action_dim: 10
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name lift
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 3
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  rotation_rep: rotation_6d
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/pusht_image.yaml b/diffusion_policy/config/task/pusht_image.yaml
new file mode 100644
index 0000000..36b5dd3
--- /dev/null
+++ b/diffusion_policy/config/task/pusht_image.yaml
@@ -0,0 +1,40 @@
+name: pusht_image
+
+image_shape: &image_shape [3, 96, 96]
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    image:
+      shape: *image_shape
+      type: rgb
+    agent_pos:
+      shape: [2]
+      type: low_dim
+  action:
+    shape: [2]
+
+env_runner:
+  _target_: diffusion_policy.env_runner.pusht_image_runner.PushTImageRunner
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  legacy_test: True
+  test_start_seed: 100000
+  max_steps: 300
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  fps: 10
+  past_action: ${past_action_visible}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.pusht_image_dataset.PushTImageDataset
+  zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  seed: 42
+  val_ratio: 0.02
+  max_train_episodes: 90
diff --git a/diffusion_policy/config/task/pusht_lowdim.yaml b/diffusion_policy/config/task/pusht_lowdim.yaml
new file mode 100644
index 0000000..3ec7eb5
--- /dev/null
+++ b/diffusion_policy/config/task/pusht_lowdim.yaml
@@ -0,0 +1,34 @@
+name: pusht_lowdim
+
+obs_dim: 20 # 9*2 keypoints + 2 state
+action_dim: 2
+keypoint_dim: 2
+
+env_runner:
+  _target_: diffusion_policy.env_runner.pusht_keypoints_runner.PushTKeypointsRunner
+  keypoint_visible_rate: ${keypoint_visible_rate}
+  n_train: 6
+  n_train_vis: 2
+  train_start_seed: 0
+  n_test: 50
+  n_test_vis: 4
+  legacy_test: True
+  test_start_seed: 100000
+  max_steps: 300
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  fps: 10
+  agent_keypoints: False
+  past_action: ${past_action_visible}
+  n_envs: null
+
+dataset:
+  _target_: diffusion_policy.dataset.pusht_dataset.PushTLowdimDataset
+  zarr_path: data/pusht/pusht_cchi_v7_replay.zarr
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  seed: 42
+  val_ratio: 0.02
+  max_train_episodes: 90
diff --git a/diffusion_policy/config/task/real_pusht_image.yaml b/diffusion_policy/config/task/real_pusht_image.yaml
new file mode 100644
index 0000000..a3f7c3f
--- /dev/null
+++ b/diffusion_policy/config/task/real_pusht_image.yaml
@@ -0,0 +1,47 @@
+name: real_image
+
+image_shape: [3, 240, 320]
+dataset_path: data/pusht_real/real_pusht_20230105
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    # camera_0:
+    #   shape: ${task.image_shape}
+    #   type: rgb
+    camera_1:
+      shape: ${task.image_shape}
+      type: rgb
+    # camera_2:
+    #   shape: ${task.image_shape}
+    #   type: rgb
+    camera_3:
+      shape: ${task.image_shape}
+      type: rgb
+    # camera_4:
+    #   shape: ${task.image_shape}
+    #   type: rgb
+    robot_eef_pose:
+      shape: [2]
+      type: low_dim
+  action: 
+    shape: [2]
+
+env_runner:
+  _target_: diffusion_policy.env_runner.real_pusht_image_runner.RealPushTImageRunner
+
+dataset:
+  _target_: diffusion_policy.dataset.real_pusht_image_dataset.RealPushTImageDataset
+  shape_meta: *shape_meta
+  dataset_path: ${task.dataset_path}
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  n_latency_steps: ${n_latency_steps}
+  use_cache: True
+  seed: 42
+  val_ratio: 0.00
+  max_train_episodes: null
+  delta_action: False
+
diff --git a/diffusion_policy/config/task/square_image.yaml b/diffusion_policy/config/task/square_image.yaml
new file mode 100644
index 0000000..827151b
--- /dev/null
+++ b/diffusion_policy/config/task/square_image.yaml
@@ -0,0 +1,64 @@
+name: square_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [7]
+
+task_name: &task_name square
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/square_image_abs.yaml b/diffusion_policy/config/task/square_image_abs.yaml
new file mode 100644
index 0000000..d27a916
--- /dev/null
+++ b/diffusion_policy/config/task/square_image_abs.yaml
@@ -0,0 +1,64 @@
+name: square_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    agentview_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [10]
+
+task_name: &task_name square
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'agentview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/square_lowdim.yaml b/diffusion_policy/config/task/square_lowdim.yaml
new file mode 100644
index 0000000..f93e062
--- /dev/null
+++ b/diffusion_policy/config/task/square_lowdim.yaml
@@ -0,0 +1,46 @@
+name: square_lowdim
+
+obs_dim: 23
+action_dim: 7
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name square
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
+  max_train_episodes: null
diff --git a/diffusion_policy/config/task/square_lowdim_abs.yaml b/diffusion_policy/config/task/square_lowdim_abs.yaml
new file mode 100644
index 0000000..f69b3c8
--- /dev/null
+++ b/diffusion_policy/config/task/square_lowdim_abs.yaml
@@ -0,0 +1,47 @@
+name: square_lowdim
+
+obs_dim: 23
+action_dim: 10
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name square
+dataset_type: &dataset_type ph
+abs_action: &abs_action True
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
+
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  # use python's eval function as resolver, single-quoted string as argument
+  max_steps: ${eval:'500 if "${task.dataset_type}" == "mh" else 400'}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
+  max_train_episodes: null
diff --git a/diffusion_policy/config/task/tool_hang_image.yaml b/diffusion_policy/config/task/tool_hang_image.yaml
new file mode 100644
index 0000000..8a8b298
--- /dev/null
+++ b/diffusion_policy/config/task/tool_hang_image.yaml
@@ -0,0 +1,63 @@
+name: tool_hang_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    sideview_image:
+      shape: [3, 240, 240]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 240, 240]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [7]
+
+task_name: &task_name tool_hang
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'sideview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/tool_hang_image_abs.yaml b/diffusion_policy/config/task/tool_hang_image_abs.yaml
new file mode 100644
index 0000000..068bdc9
--- /dev/null
+++ b/diffusion_policy/config/task/tool_hang_image_abs.yaml
@@ -0,0 +1,63 @@
+name: tool_hang_image_abs
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    sideview_image:
+      shape: [3, 240, 240]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 240, 240]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [10]
+
+task_name: &task_name tool_hang
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  # costs 1GB per env
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'sideview_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/tool_hang_lowdim.yaml b/diffusion_policy/config/task/tool_hang_lowdim.yaml
new file mode 100644
index 0000000..3826439
--- /dev/null
+++ b/diffusion_policy/config/task/tool_hang_lowdim.yaml
@@ -0,0 +1,45 @@
+name: tool_hang_lowdim
+
+obs_dim: 53
+action_dim: 7
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name tool_hang
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+# seed 42 will crash MuJoCo for some reason.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml b/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml
new file mode 100644
index 0000000..bfdb34d
--- /dev/null
+++ b/diffusion_policy/config/task/tool_hang_lowdim_abs.yaml
@@ -0,0 +1,46 @@
+name: tool_hang_lowdim
+
+obs_dim: 53
+action_dim: 10
+keypoint_dim: 3
+
+obs_keys: &obs_keys ['object', 'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos']
+task_name: &task_name tool_hang
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+# seed 42 will crash MuJoCo for some reason.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  rotation_rep: rotation_6d
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/transport_image.yaml b/diffusion_policy/config/task/transport_image.yaml
new file mode 100644
index 0000000..c9c24cc
--- /dev/null
+++ b/diffusion_policy/config/task/transport_image.yaml
@@ -0,0 +1,75 @@
+name: transport_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    shouldercamera0_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+    shouldercamera1_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot1_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot1_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot1_eef_quat:
+      shape: [4]
+    robot1_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [14]
+
+task_name: &task_name transport
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'shouldercamera0_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/transport_image_abs.yaml b/diffusion_policy/config/task/transport_image_abs.yaml
new file mode 100644
index 0000000..74e76c6
--- /dev/null
+++ b/diffusion_policy/config/task/transport_image_abs.yaml
@@ -0,0 +1,75 @@
+name: transport_image
+
+shape_meta: &shape_meta
+  # acceptable types: rgb, low_dim
+  obs:
+    shouldercamera0_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot0_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot0_eef_quat:
+      shape: [4]
+    robot0_gripper_qpos:
+      shape: [2]
+    shouldercamera1_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot1_eye_in_hand_image:
+      shape: [3, 84, 84]
+      type: rgb
+    robot1_eef_pos:
+      shape: [3]
+      # type default: low_dim
+    robot1_eef_quat:
+      shape: [4]
+    robot1_gripper_qpos:
+      shape: [2]
+  action: 
+    shape: [20]
+
+task_name: &task_name transport
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/image_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_image_runner.RobomimicImageRunner
+  dataset_path: *dataset_path
+  shape_meta: *shape_meta
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  render_obs_key: 'shouldercamera0_image'
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  tqdm_interval_sec: 1.0
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_image_dataset.RobomimicReplayImageDataset
+  shape_meta: *shape_meta
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  n_obs_steps: ${dataset_obs_steps}
+  abs_action: *abs_action
+  rotation_rep: 'rotation_6d'
+  use_legacy_normalizer: False
+  use_cache: True
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/transport_lowdim.yaml b/diffusion_policy/config/task/transport_lowdim.yaml
new file mode 100644
index 0000000..99e279c
--- /dev/null
+++ b/diffusion_policy/config/task/transport_lowdim.yaml
@@ -0,0 +1,49 @@
+name: transport_lowdim
+
+obs_dim: 59 # 41+(3+4+2)*2
+action_dim: 14 # 7*2
+keypoint_dim: 3
+
+obs_keys: &obs_keys [
+  'object', 
+  'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
+  'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
+]
+task_name: &task_name transport
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim.hdf5
+abs_action: &abs_action False
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 5
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/task/transport_lowdim_abs.yaml b/diffusion_policy/config/task/transport_lowdim_abs.yaml
new file mode 100644
index 0000000..7cc3014
--- /dev/null
+++ b/diffusion_policy/config/task/transport_lowdim_abs.yaml
@@ -0,0 +1,49 @@
+name: transport_lowdim
+
+obs_dim: 59 # 41+(3+4+2)*2
+action_dim: 20 # 10*2
+keypoint_dim: 3
+
+obs_keys: &obs_keys [
+  'object', 
+  'robot0_eef_pos', 'robot0_eef_quat', 'robot0_gripper_qpos', 
+  'robot1_eef_pos', 'robot1_eef_quat', 'robot1_gripper_qpos'
+]
+task_name: &task_name transport
+dataset_type: &dataset_type ph
+dataset_path: &dataset_path data/robomimic/datasets/${task.task_name}/${task.dataset_type}/low_dim_abs.hdf5
+abs_action: &abs_action True
+
+env_runner:
+  _target_: diffusion_policy.env_runner.robomimic_lowdim_runner.RobomimicLowdimRunner
+  dataset_path: *dataset_path
+  obs_keys: *obs_keys
+  n_train: 6
+  n_train_vis: 2
+  train_start_idx: 0
+  n_test: 50
+  n_test_vis: 4
+  test_start_seed: 100000
+  max_steps: 700
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+  n_latency_steps: ${n_latency_steps}
+  render_hw: [128,128]
+  fps: 10
+  crf: 22
+  past_action: ${past_action_visible}
+  abs_action: *abs_action
+  n_envs: 28
+# evaluation at this config requires a 16 core 64GB instance.
+
+dataset:
+  _target_: diffusion_policy.dataset.robomimic_replay_lowdim_dataset.RobomimicReplayLowdimDataset
+  dataset_path: *dataset_path
+  horizon: ${horizon}
+  pad_before: ${eval:'${n_obs_steps}-1+${n_latency_steps}'}
+  pad_after: ${eval:'${n_action_steps}-1'}
+  obs_keys: *obs_keys
+  abs_action: *abs_action
+  use_legacy_normalizer: False
+  seed: 42
+  val_ratio: 0.02
diff --git a/diffusion_policy/config/train_bet_lowdim_workspace.yaml b/diffusion_policy/config/train_bet_lowdim_workspace.yaml
new file mode 100644
index 0000000..63a3436
--- /dev/null
+++ b/diffusion_policy/config/train_bet_lowdim_workspace.yaml
@@ -0,0 +1,132 @@
+defaults:
+  - _self_
+  - task: blockpush_lowdim_seed
+
+name: train_bet_lowdim
+_target_: diffusion_policy.workspace.train_bet_lowdim_workspace.TrainBETLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+keypoint_dim: ${task.keypoint_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 3
+n_obs_steps: 3
+n_action_steps: 1
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_local_cond: False
+obs_as_global_cond: False
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.bet_lowdim_policy.BETLowdimPolicy
+
+  action_ae:
+    _target_: diffusion_policy.model.bet.action_ae.discretizers.k_means.KMeansDiscretizer
+    num_bins: 24
+    action_dim: ${action_dim}
+    predict_offsets: True
+  
+  obs_encoding_net:
+    _target_: torch.nn.Identity
+    output_dim: ${obs_dim}
+  
+  state_prior:
+    _target_: diffusion_policy.model.bet.latent_generators.mingpt.MinGPT
+
+    discrete_input: false
+    input_dim: ${obs_dim}
+
+    vocab_size: ${policy.action_ae.num_bins}
+
+    # Architecture details
+    n_layer: 4
+    n_head: 4
+    n_embd: 72
+
+    block_size: ${horizon}  # Length of history/context
+    predict_offsets: True
+    offset_loss_scale: 1000.0  # actions are very small
+    focal_loss_gamma: 2.0
+    action_dim: ${action_dim}
+
+  horizon: ${horizon}
+  n_obs_steps: ${n_obs_steps}
+  n_action_steps: ${n_action_steps}
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  learning_rate: 0.0001 # 1e-4
+  weight_decay: 0.1
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 5000
+  gradient_accumulate_every: 1
+  grad_norm_clip: 1.0
+  enable_normalizer: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.yaml b/diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.yaml
new file mode 100644
index 0000000..0a90039
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_transformer_hybrid_workspace.yaml
@@ -0,0 +1,141 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_transformer_hybrid
+_target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 10
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+
+  crop_shape: [76, 76]
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+
+  n_layer: 8
+  n_cond_layers: 0  # >0: use transformer encoder for cond, otherwise use MLP
+  n_head: 4
+  n_emb: 256
+  p_drop_emb: 0.0
+  p_drop_attn: 0.3
+  causal_attn: True
+  time_as_cond: True # if false, use BERT like encoder only arch, time as input
+  obs_as_cond: ${obs_as_cond}
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  transformer_weight_decay: 1.0e-3
+  obs_encoder_weight_decay: 1.0e-6
+  learning_rate: 1.0e-4
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  # Transformer needs LR warmup
+  lr_warmup_steps: 1000
+  num_epochs: 3050
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_transformer_lowdim_kitchen_workspace.yaml b/diffusion_policy/config/train_diffusion_transformer_lowdim_kitchen_workspace.yaml
new file mode 100644
index 0000000..937f230
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_transformer_lowdim_kitchen_workspace.yaml
@@ -0,0 +1,146 @@
+defaults:
+  - _self_
+  - task: kitchen_lowdim_abs
+
+name: train_diffusion_transformer_lowdim
+_target_: diffusion_policy.workspace.train_diffusion_transformer_lowdim_workspace.TrainDiffusionTransformerLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 4
+n_action_steps: 8
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_cond: True
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_transformer_lowdim_policy.DiffusionTransformerLowdimPolicy
+
+  model:
+    _target_: diffusion_policy.model.diffusion.transformer_for_diffusion.TransformerForDiffusion
+    input_dim: ${eval:'${action_dim} if ${obs_as_cond} else ${obs_dim} + ${action_dim}'}
+    output_dim: ${policy.model.input_dim}
+    horizon: ${horizon}
+    n_obs_steps: ${n_obs_steps}
+    cond_dim: ${eval:'${obs_dim} if ${obs_as_cond} else 0'}
+
+    n_layer: 8
+    n_head: 4
+    n_emb: 768
+    p_drop_emb: 0.0
+    p_drop_attn: 0.1
+
+    causal_attn: True
+    time_as_cond: True # if false, use BERT like encoder only arch, time as input
+    obs_as_cond: ${obs_as_cond}
+    n_cond_layers: 0 # >0: use transformer encoder for cond, otherwise use MLP
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${n_action_steps}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_cond: ${obs_as_cond}
+  pred_action_steps_only: ${pred_action_steps_only}
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  learning_rate: 1.0e-4
+  weight_decay: 1.0e-3
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  # Transformer needs LR warmup
+  lr_warmup_steps: 1000
+  num_epochs: 5000
+  gradient_accumulate_every: 1
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_transformer_lowdim_pusht_workspace.yaml b/diffusion_policy/config/train_diffusion_transformer_lowdim_pusht_workspace.yaml
new file mode 100644
index 0000000..ffedd86
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_transformer_lowdim_pusht_workspace.yaml
@@ -0,0 +1,146 @@
+defaults:
+  - _self_
+  - task: pusht_lowdim
+
+name: train_diffusion_transformer_lowdim
+_target_: diffusion_policy.workspace.train_diffusion_transformer_lowdim_workspace.TrainDiffusionTransformerLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_cond: True
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_transformer_lowdim_policy.DiffusionTransformerLowdimPolicy
+
+  model:
+    _target_: diffusion_policy.model.diffusion.transformer_for_diffusion.TransformerForDiffusion
+    input_dim: ${eval:'${action_dim} if ${obs_as_cond} else ${obs_dim} + ${action_dim}'}
+    output_dim: ${policy.model.input_dim}
+    horizon: ${horizon}
+    n_obs_steps: ${n_obs_steps}
+    cond_dim: ${eval:'${obs_dim} if ${obs_as_cond} else 0'}
+
+    n_layer: 8
+    n_head: 4
+    n_emb: 256
+    p_drop_emb: 0.0
+    p_drop_attn: 0.01
+
+    causal_attn: True
+    time_as_cond: True # if false, use BERT like encoder only arch, time as input
+    obs_as_cond: ${obs_as_cond}
+    n_cond_layers: 0 # >0: use transformer encoder for cond, otherwise use MLP
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${n_action_steps}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_cond: ${obs_as_cond}
+  pred_action_steps_only: ${pred_action_steps_only}
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  learning_rate: 1.0e-4
+  weight_decay: 1.0e-1
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  # Transformer needs LR warmup
+  lr_warmup_steps: 1000
+  num_epochs: 8000
+  gradient_accumulate_every: 1
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_transformer_lowdim_workspace.yaml b/diffusion_policy/config/train_diffusion_transformer_lowdim_workspace.yaml
new file mode 100644
index 0000000..5f91d37
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_transformer_lowdim_workspace.yaml
@@ -0,0 +1,146 @@
+defaults:
+  - _self_
+  - task: blockpush_lowdim_seed
+
+name: train_diffusion_transformer_lowdim
+_target_: diffusion_policy.workspace.train_diffusion_transformer_lowdim_workspace.TrainDiffusionTransformerLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 5
+n_obs_steps: 3
+n_action_steps: 1
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_cond: True
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_transformer_lowdim_policy.DiffusionTransformerLowdimPolicy
+
+  model:
+    _target_: diffusion_policy.model.diffusion.transformer_for_diffusion.TransformerForDiffusion
+    input_dim: ${eval:'${action_dim} if ${obs_as_cond} else ${obs_dim} + ${action_dim}'}
+    output_dim: ${policy.model.input_dim}
+    horizon: ${horizon}
+    n_obs_steps: ${n_obs_steps}
+    cond_dim: ${eval:'${obs_dim} if ${obs_as_cond} else 0'}
+
+    n_layer: 8
+    n_head: 4
+    n_emb: 256
+    p_drop_emb: 0.0
+    p_drop_attn: 0.3
+
+    causal_attn: True
+    time_as_cond: True # if false, use BERT like encoder only arch, time as input
+    obs_as_cond: ${obs_as_cond}
+    n_cond_layers: 0 # >0: use transformer encoder for cond, otherwise use MLP
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${n_action_steps}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_cond: ${obs_as_cond}
+  pred_action_steps_only: ${pred_action_steps_only}
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  learning_rate: 1.0e-4
+  weight_decay: 1.0e-3
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  # Transformer needs LR warmup
+  lr_warmup_steps: 1000
+  num_epochs: 5000
+  gradient_accumulate_every: 1
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_transformer_real_hybrid_workspace.yaml b/diffusion_policy/config/train_diffusion_transformer_real_hybrid_workspace.yaml
new file mode 100644
index 0000000..69bfad0
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_transformer_real_hybrid_workspace.yaml
@@ -0,0 +1,144 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_diffusion_transformer_hybrid
+_target_: diffusion_policy.workspace.train_diffusion_transformer_hybrid_workspace.TrainDiffusionTransformerHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_transformer_hybrid_image_policy.DiffusionTransformerHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 8
+
+  crop_shape: [216, 288] # ch, cw 320x240 90%
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+
+  n_layer: 8
+  n_cond_layers: 0  # >0: use transformer encoder for cond, otherwise use MLP
+  n_head: 4
+  n_emb: 256
+  p_drop_emb: 0.0
+  p_drop_attn: 0.3
+  causal_attn: True
+  time_as_cond: True # if false, use BERT like encoder only arch, time as input
+  obs_as_cond: ${obs_as_cond}
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: True
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: True
+
+optimizer:
+  transformer_weight_decay: 1.0e-3
+  obs_encoder_weight_decay: 1.0e-6
+  learning_rate: 1.0e-4
+  betas: [0.9, 0.95]
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  # Transformer needs LR warmup
+  lr_warmup_steps: 500
+  num_epochs: 600
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_loss
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_ddim_hybrid_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_ddim_hybrid_workspace.yaml
new file mode 100644
index 0000000..414cbba
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_ddim_hybrid_workspace.yaml
@@ -0,0 +1,140 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_unet_hybrid
+_target_: diffusion_policy.workspace.train_diffusion_unet_hybrid_workspace.TrainDiffusionUnetHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_hybrid_image_policy.DiffusionUnetHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 8
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: [76, 76]
+  crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [256,512,1024]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 3000
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_ddim_lowdim_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_ddim_lowdim_workspace.yaml
new file mode 100644
index 0000000..90cd2f8
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_ddim_lowdim_workspace.yaml
@@ -0,0 +1,146 @@
+defaults:
+  - _self_
+  - task: pusht_lowdim
+
+name: train_diffusion_unet_lowdim
+_target_: diffusion_policy.workspace.train_diffusion_unet_lowdim_workspace.TrainDiffusionUnetLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+keypoint_dim: ${task.keypoint_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_local_cond: False
+obs_as_global_cond: False
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_lowdim_policy.DiffusionUnetLowdimPolicy
+
+  model:
+    _target_: diffusion_policy.model.diffusion.conditional_unet1d.ConditionalUnet1D
+    input_dim: "${eval: ${task.action_dim} if ${obs_as_local_cond} or ${obs_as_global_cond} else ${task.obs_dim} + ${task.action_dim}}"
+    local_cond_dim: "${eval: ${task.obs_dim} if ${obs_as_local_cond} else None}"
+    global_cond_dim: "${eval: ${task.obs_dim}*${n_obs_steps} if ${obs_as_global_cond} else None}"
+    diffusion_step_embed_dim: 256
+    down_dims: [256, 512, 1024]
+    kernel_size: 5
+    n_groups: 8
+    cond_predict_scale: True
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 8
+  obs_as_local_cond: ${obs_as_local_cond}
+  obs_as_global_cond: ${obs_as_global_cond}
+  pred_action_steps_only: ${pred_action_steps_only}
+  oa_step_convention: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 8000
+  gradient_accumulate_every: 1
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_hybrid_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_hybrid_workspace.yaml
new file mode 100644
index 0000000..9f9b1a6
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_hybrid_workspace.yaml
@@ -0,0 +1,137 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_unet_hybrid
+_target_: diffusion_policy.workspace.train_diffusion_unet_hybrid_workspace.TrainDiffusionUnetHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_hybrid_image_policy.DiffusionUnetHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_global_cond: ${obs_as_global_cond}
+  crop_shape: [76, 76]
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 3050
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_image_pretrained_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_image_pretrained_workspace.yaml
new file mode 100644
index 0000000..122bea3
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_image_pretrained_workspace.yaml
@@ -0,0 +1,149 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_unet_image
+_target_: diffusion_policy.workspace.train_diffusion_unet_image_workspace.TrainDiffusionUnetImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_image_policy.DiffusionUnetImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  obs_encoder:
+    _target_: diffusion_policy.model.vision.multi_image_obs_encoder.MultiImageObsEncoder
+    shape_meta: ${shape_meta}
+    rgb_model:
+      _target_: diffusion_policy.model.vision.model_getter.get_resnet
+      name: resnet18
+      weights: IMAGENET1K_V1 # or r3m
+    resize_shape: [256, 256]
+    crop_shape: [224, 224]
+    random_crop: False
+    use_group_norm: False
+    share_rgb_model: True
+    imagenet_norm: True
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 4
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 4
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 8000
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  freeze_encoder: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_image_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_image_workspace.yaml
new file mode 100644
index 0000000..f5636e3
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_image_workspace.yaml
@@ -0,0 +1,150 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_unet_image
+_target_: diffusion_policy.workspace.train_diffusion_unet_image_workspace.TrainDiffusionUnetImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_image_policy.DiffusionUnetImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  obs_encoder:
+    _target_: diffusion_policy.model.vision.multi_image_obs_encoder.MultiImageObsEncoder
+    shape_meta: ${shape_meta}
+    rgb_model:
+      _target_: diffusion_policy.model.vision.model_getter.get_resnet
+      name: resnet18
+      weights: null
+    resize_shape: null
+    crop_shape: [76, 76]
+    # constant center crop
+    random_crop: True
+    use_group_norm: True
+    share_rgb_model: False
+    imagenet_norm: True
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 4
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 4
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 8000
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  freeze_encoder: False
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_lowdim_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_lowdim_workspace.yaml
new file mode 100644
index 0000000..7b4d35f
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_lowdim_workspace.yaml
@@ -0,0 +1,143 @@
+defaults:
+  - _self_
+  - task: pusht_lowdim
+
+name: train_diffusion_unet_lowdim
+_target_: diffusion_policy.workspace.train_diffusion_unet_lowdim_workspace.TrainDiffusionUnetLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+keypoint_dim: ${task.keypoint_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_local_cond: False
+obs_as_global_cond: True
+pred_action_steps_only: False
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_lowdim_policy.DiffusionUnetLowdimPolicy
+
+  model:
+    _target_: diffusion_policy.model.diffusion.conditional_unet1d.ConditionalUnet1D
+    input_dim: "${eval: ${task.action_dim} if ${obs_as_local_cond} or ${obs_as_global_cond} else ${task.obs_dim} + ${task.action_dim}}"
+    local_cond_dim: "${eval: ${task.obs_dim} if ${obs_as_local_cond} else None}"
+    global_cond_dim: "${eval: ${task.obs_dim}*${n_obs_steps} if ${obs_as_global_cond} else None}"
+    diffusion_step_embed_dim: 256
+    down_dims: [256, 512, 1024]
+    kernel_size: 5
+    n_groups: 8
+    cond_predict_scale: True
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_local_cond: ${obs_as_local_cond}
+  obs_as_global_cond: ${obs_as_global_cond}
+  pred_action_steps_only: ${pred_action_steps_only}
+  oa_step_convention: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 5000
+  gradient_accumulate_every: 1
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_real_hybrid_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_real_hybrid_workspace.yaml
new file mode 100644
index 0000000..5b32366
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_real_hybrid_workspace.yaml
@@ -0,0 +1,141 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_diffusion_unet_hybrid
+_target_: diffusion_policy.workspace.train_diffusion_unet_hybrid_workspace.TrainDiffusionUnetHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_hybrid_image_policy.DiffusionUnetHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 8
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: [76, 76] # 84x84 90%
+  crop_shape: [216, 288] # ch, cw 320x240 90%
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [256,512,1024]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: True
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: True
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 600
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_loss
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_real_image_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_real_image_workspace.yaml
new file mode 100644
index 0000000..f6187ef
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_real_image_workspace.yaml
@@ -0,0 +1,152 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_diffusion_unet_image
+_target_: diffusion_policy.workspace.train_diffusion_unet_image_workspace.TrainDiffusionUnetImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_image_policy.DiffusionUnetImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  obs_encoder:
+    _target_: diffusion_policy.model.vision.multi_image_obs_encoder.MultiImageObsEncoder
+    shape_meta: ${shape_meta}
+    rgb_model:
+      _target_: diffusion_policy.model.vision.model_getter.get_resnet
+      name: resnet18
+      weights: null
+    resize_shape: [240, 320]
+    crop_shape: [216, 288] # ch, cw 240x320 90%
+    random_crop: True
+    use_group_norm: True
+    share_rgb_model: False
+    imagenet_norm: True
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: True
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: True
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 600
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  freeze_encoder: False
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_loss
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_real_pretrained_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_real_pretrained_workspace.yaml
new file mode 100644
index 0000000..f129a1e
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_real_pretrained_workspace.yaml
@@ -0,0 +1,152 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_diffusion_unet_image
+_target_: diffusion_policy.workspace.train_diffusion_unet_image_workspace.TrainDiffusionUnetImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 2
+n_action_steps: 8
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+obs_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_image_policy.DiffusionUnetImagePolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddim.DDIMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    # beta_schedule is important
+    # this is the best we found
+    beta_schedule: squaredcos_cap_v2
+    clip_sample: True
+    set_alpha_to_one: True
+    steps_offset: 0
+    prediction_type: epsilon # or sample
+
+  obs_encoder:
+    _target_: diffusion_policy.model.vision.multi_image_obs_encoder.MultiImageObsEncoder
+    shape_meta: ${shape_meta}
+    rgb_model:
+      _target_: diffusion_policy.model.vision.model_getter.get_resnet
+      name: resnet18
+      weights: IMAGENET1K_V1 # or r3m
+    resize_shape: [224,224]
+    crop_shape: null
+    random_crop: False
+    use_group_norm: False
+    share_rgb_model: True
+    imagenet_norm: True
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  obs_as_global_cond: ${obs_as_global_cond}
+  # crop_shape: null
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: True
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: True
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 600
+  gradient_accumulate_every: 1
+  # EMA destroys performance when used with BatchNorm
+  # replace BatchNorm with GroupNorm.
+  use_ema: True
+  freeze_encoder: True
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_loss
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_diffusion_unet_video_workspace.yaml b/diffusion_policy/config/train_diffusion_unet_video_workspace.yaml
new file mode 100644
index 0000000..6abe7b6
--- /dev/null
+++ b/diffusion_policy/config/train_diffusion_unet_video_workspace.yaml
@@ -0,0 +1,128 @@
+defaults:
+  - _self_
+  - task: lift_image_abs
+
+name: train_diffusion_unet_video
+_target_: diffusion_policy.workspace.train_diffusion_unet_video_workspace.TrainDiffusionUnetVideoWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 16
+n_obs_steps: 4
+n_action_steps: 8
+past_action_visible: False
+keypoint_visible_rate: 1.0
+lowdim_as_global_cond: True
+
+policy:
+  _target_: diffusion_policy.policy.diffusion_unet_video_policy.DiffusionUnetVideoPolicy
+
+  shape_meta: ${shape_meta}
+  
+  noise_scheduler:
+    _target_: diffusers.schedulers.scheduling_ddpm.DDPMScheduler
+    num_train_timesteps: 100
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: squaredcos_cap_v2
+    variance_type: fixed_small # Yilun's paper uses fixed_small_log instead, but easy to cause Nan
+    clip_sample: True # required when predict_epsilon=False
+    prediction_type: epsilon # or sample
+
+  rgb_net:
+    _target_: diffusion_policy.model.obs_encoder.video_core.VideoCore
+
+    backbone:
+      _target_: diffusion_policy.model.obs_encoder.video_core.VideoResNet
+
+      norm_groups: 8
+      input_channel: 3
+      model_depth: 50 # ResNet 50 (18,34 not yet avaliable)
+    
+    pool:
+      _target_: diffusion_policy.model.ibc.global_avgpool.GlobalAvgpool
+
+      dim: [2,3,4]
+
+  horizon: ${horizon}
+  n_action_steps: ${n_action_steps}
+  n_obs_steps: ${n_obs_steps}
+  num_inference_steps: 100
+  lowdim_as_global_cond: ${lowdim_as_global_cond}
+  diffusion_step_embed_dim: 128
+  down_dims: [512, 1024, 2048]
+  kernel_size: 5
+  n_groups: 8
+  cond_predict_scale: True
+
+  # TemporalAggregator Parameters
+  channel_mults: [1,1]
+  n_blocks_per_level: 1
+  ta_kernel_size: 3
+  ta_n_groups: 1
+
+  # scheduler.step params
+  # predict_epsilon: True
+
+ema:
+  _target_: diffusion_policy.model.diffusion.ema_model.EMAModel
+  update_after_step: 0
+  inv_gamma: 1.0
+  power: 0.75
+  min_value: 0.0
+  max_value: 0.9999
+
+dataloader:
+  batch_size: 32
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 0.0001 # 1e-4
+  betas: [0.95, 0.999]
+  eps: 0.00000001 # 1e-8
+  weight_decay: 0.000001 # 1e-6
+
+training:
+  device: "cuda:0"
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 1500
+  gradient_accumulate_every: 1
+  eval_every: 5000
+  eval_first: False
+  val_every: 300
+  use_ema: False
+  tqdm_interval_sec: 1.0
+  seed: 42
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:03d}-test_score={test_score:.3f}.ckpt'
+  save_last_ckpt: False
+  save_last_snapshot: False
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_ibc_dfo_hybrid_workspace.yaml b/diffusion_policy/config/train_ibc_dfo_hybrid_workspace.yaml
new file mode 100644
index 0000000..b6f5d47
--- /dev/null
+++ b/diffusion_policy/config/train_ibc_dfo_hybrid_workspace.yaml
@@ -0,0 +1,111 @@
+defaults:
+  - _self_
+  - task: pusht_image
+
+name: train_ibc_dfo_hybrid
+_target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 2
+n_obs_steps: 2
+n_action_steps: 1
+n_latency_steps: 0
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+
+  horizon: ${horizon}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  dropout: 0.1
+  train_n_neg: 1024
+  pred_n_iter: 5
+  pred_n_samples: 1024
+  kevin_inference: False
+  andy_train: False
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+  crop_shape: [84, 84]
+
+dataloader:
+  batch_size: 128
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 128
+  num_workers: 8
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 3050
+  gradient_accumulate_every: 1
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  sample_max_batch: 128
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_ibc_dfo_lowdim_workspace.yaml b/diffusion_policy/config/train_ibc_dfo_lowdim_workspace.yaml
new file mode 100644
index 0000000..9e73a93
--- /dev/null
+++ b/diffusion_policy/config/train_ibc_dfo_lowdim_workspace.yaml
@@ -0,0 +1,109 @@
+defaults:
+  - _self_
+  - task: pusht_lowdim
+
+name: train_ibc_dfo_lowdim
+_target_: diffusion_policy.workspace.train_ibc_dfo_lowdim_workspace.TrainIbcDfoLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+keypoint_dim: ${task.keypoint_dim}
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 2
+n_obs_steps: 2
+n_action_steps: 1
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.ibc_dfo_lowdim_policy.IbcDfoLowdimPolicy
+
+  horizon: ${horizon}
+  obs_dim: ${obs_dim}
+  action_dim: ${action_dim}
+  n_action_steps: ${eval:'${n_action_steps}+${n_latency_steps}'}
+  n_obs_steps: ${n_obs_steps}
+  dropout: 0.1
+  train_n_neg: 1024
+  pred_n_iter: 5
+  pred_n_samples: 1024
+  kevin_inference: False
+  andy_train: False
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 5000
+  gradient_accumulate_every: 1
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  sample_max_batch: 128
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_ibc_dfo_real_hybrid_workspace.yaml b/diffusion_policy/config/train_ibc_dfo_real_hybrid_workspace.yaml
new file mode 100644
index 0000000..8255dc1
--- /dev/null
+++ b/diffusion_policy/config/train_ibc_dfo_real_hybrid_workspace.yaml
@@ -0,0 +1,111 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_ibc_dfo_hybrid
+_target_: diffusion_policy.workspace.train_ibc_dfo_hybrid_workspace.TrainIbcDfoHybridWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: 2
+n_obs_steps: 2
+n_action_steps: 1
+n_latency_steps: 1
+dataset_obs_steps: ${n_obs_steps}
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.ibc_dfo_hybrid_image_policy.IbcDfoHybridImagePolicy
+
+  shape_meta: ${shape_meta}
+
+  horizon: ${horizon}
+  n_action_steps: ${n_action_steps}
+  n_obs_steps: ${n_obs_steps}
+  dropout: 0.1
+  train_n_neg: 256
+  pred_n_iter: 3
+  pred_n_samples: 1024
+  kevin_inference: False
+  andy_train: False
+  obs_encoder_group_norm: True
+  eval_fixed_crop: True
+  crop_shape: [216, 288] # ch, cw 320x240 90%
+
+dataloader:
+  batch_size: 128
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 128
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+optimizer:
+  _target_: torch.optim.AdamW
+  lr: 1.0e-4
+  betas: [0.95, 0.999]
+  eps: 1.0e-8
+  weight_decay: 1.0e-6
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  lr_scheduler: cosine
+  lr_warmup_steps: 500
+  num_epochs: 1000
+  gradient_accumulate_every: 1
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 5
+  val_every: 1
+  sample_every: 5
+  sample_max_batch: 128
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_action_mse_error
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_action_mse_error={train_action_mse_error:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_robomimic_image_workspace.yaml b/diffusion_policy/config/train_robomimic_image_workspace.yaml
new file mode 100644
index 0000000..4cea6ab
--- /dev/null
+++ b/diffusion_policy/config/train_robomimic_image_workspace.yaml
@@ -0,0 +1,92 @@
+defaults:
+  - _self_
+  - task: lift_image
+
+name: train_robomimic_image
+_target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: &horizon 10
+n_obs_steps: 1
+n_action_steps: 1
+n_latency_steps: 0
+dataset_obs_steps: *horizon
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy
+  shape_meta: ${shape_meta}
+  algo_name: bc_rnn
+  obs_type: image
+  # oc.select resolver: key, default
+  task_name: ${oc.select:task.task_name,lift}
+  dataset_type: ${oc.select:task.dataset_type,ph}
+  crop_shape: [76,76]
+
+dataloader:
+  batch_size: 64
+  num_workers: 16
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 64
+  num_workers: 16
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  num_epochs: 3050
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_robomimic_lowdim_workspace.yaml b/diffusion_policy/config/train_robomimic_lowdim_workspace.yaml
new file mode 100644
index 0000000..a657d1b
--- /dev/null
+++ b/diffusion_policy/config/train_robomimic_lowdim_workspace.yaml
@@ -0,0 +1,92 @@
+defaults:
+  - _self_
+  - task: pusht_lowdim
+
+name: train_robomimic_lowdim
+_target_: diffusion_policy.workspace.train_robomimic_lowdim_workspace.TrainRobomimicLowdimWorkspace
+
+obs_dim: ${task.obs_dim}
+action_dim: ${task.action_dim}
+transition_dim: "${eval: ${task.obs_dim} + ${task.action_dim}}"
+task_name: ${task.name}
+exp_name: "default"
+
+horizon: 10
+n_obs_steps: 1
+n_action_steps: 1
+n_latency_steps: 0
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.robomimic_lowdim_policy.RobomimicLowdimPolicy
+  action_dim: ${action_dim}
+  obs_dim: ${obs_dim}
+  algo_name: bc_rnn
+  obs_type: low_dim
+  # oc.select resolver: key, default
+  task_name: ${oc.select:task.task_name,lift}
+  dataset_type: ${oc.select:task.dataset_type,ph}
+
+dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: True
+  pin_memory: True
+  persistent_workers: False
+
+val_dataloader:
+  batch_size: 256
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  num_epochs: 5000
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: test_mean_score
+    mode: max
+    k: 5
+    format_str: 'epoch={epoch:04d}-test_mean_score={test_mean_score:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/config/train_robomimic_real_image_workspace.yaml b/diffusion_policy/config/train_robomimic_real_image_workspace.yaml
new file mode 100644
index 0000000..983976c
--- /dev/null
+++ b/diffusion_policy/config/train_robomimic_real_image_workspace.yaml
@@ -0,0 +1,92 @@
+defaults:
+  - _self_
+  - task: real_pusht_image
+
+name: train_robomimic_image
+_target_: diffusion_policy.workspace.train_robomimic_image_workspace.TrainRobomimicImageWorkspace
+
+task_name: ${task.name}
+shape_meta: ${task.shape_meta}
+exp_name: "default"
+
+horizon: &horizon 10
+n_obs_steps: 1
+n_action_steps: 1
+n_latency_steps: 1
+dataset_obs_steps: *horizon
+past_action_visible: False
+keypoint_visible_rate: 1.0
+
+policy:
+  _target_: diffusion_policy.policy.robomimic_image_policy.RobomimicImagePolicy
+  shape_meta: ${shape_meta}
+  algo_name: bc_rnn
+  obs_type: image
+  # oc.select resolver: key, default
+  task_name: ${oc.select:task.task_name,tool_hang}
+  dataset_type: ${oc.select:task.dataset_type,ph}
+  crop_shape: [216, 288] # ch, cw 320x240 90%
+
+dataloader:
+  batch_size: 32
+  num_workers: 8
+  shuffle: True
+  pin_memory: True
+  persistent_workers: True
+
+val_dataloader:
+  batch_size: 32
+  num_workers: 1
+  shuffle: False
+  pin_memory: True
+  persistent_workers: False
+
+training:
+  device: "cuda:0"
+  seed: 42
+  debug: False
+  resume: True
+  # optimization
+  num_epochs: 1000
+  # training loop control
+  # in epochs
+  rollout_every: 50
+  checkpoint_every: 50
+  val_every: 1
+  sample_every: 5
+  # steps per epoch
+  max_train_steps: null
+  max_val_steps: null
+  # misc
+  tqdm_interval_sec: 1.0
+
+logging:
+  project: diffusion_policy_debug
+  resume: True
+  mode: online
+  name: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+  tags: ["${name}", "${task_name}", "${exp_name}"]
+  id: null
+  group: null
+
+checkpoint:
+  topk:
+    monitor_key: train_loss
+    mode: min
+    k: 5
+    format_str: 'epoch={epoch:04d}-train_loss={train_loss:.3f}.ckpt'
+  save_last_ckpt: True
+  save_last_snapshot: False
+
+multi_run:
+  run_dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  wandb_name_base: ${now:%Y.%m.%d-%H.%M.%S}_${name}_${task_name}
+
+hydra:
+  job:
+    override_dirname: ${name}
+  run:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+  sweep:
+    dir: data/outputs/${now:%Y.%m.%d}/${now:%H.%M.%S}_${name}_${task_name}
+    subdir: ${hydra.job.num}
diff --git a/diffusion_policy/dataset/base_dataset.py b/diffusion_policy/dataset/base_dataset.py
new file mode 100644
index 0000000..72a39e8
--- /dev/null
+++ b/diffusion_policy/dataset/base_dataset.py
@@ -0,0 +1,51 @@
+from typing import Dict
+
+import torch
+import torch.nn
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+
+class BaseLowdimDataset(torch.utils.data.Dataset):
+    def get_validation_dataset(self) -> 'BaseLowdimDataset':
+        # return an empty dataset by default
+        return BaseLowdimDataset()
+
+    def get_normalizer(self, **kwargs) -> LinearNormalizer:
+        raise NotImplementedError()
+
+    def get_all_actions(self) -> torch.Tensor:
+        raise NotImplementedError()
+    
+    def __len__(self) -> int:
+        return 0
+    
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        """
+        output:
+            obs: T, Do
+            action: T, Da
+        """
+        raise NotImplementedError()
+
+
+class BaseImageDataset(torch.utils.data.Dataset):
+    def get_validation_dataset(self) -> 'BaseLowdimDataset':
+        # return an empty dataset by default
+        return BaseImageDataset()
+
+    def get_normalizer(self, **kwargs) -> LinearNormalizer:
+        raise NotImplementedError()
+
+    def get_all_actions(self) -> torch.Tensor:
+        raise NotImplementedError()
+    
+    def __len__(self) -> int:
+        return 0
+    
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        """
+        output:
+            obs: 
+                key: T, *
+            action: T, Da
+        """
+        raise NotImplementedError()
diff --git a/diffusion_policy/dataset/blockpush_lowdim_dataset.py b/diffusion_policy/dataset/blockpush_lowdim_dataset.py
new file mode 100644
index 0000000..86242c2
--- /dev/null
+++ b/diffusion_policy/dataset/blockpush_lowdim_dataset.py
@@ -0,0 +1,126 @@
+from typing import Dict
+import torch
+import numpy as np
+import copy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import SequenceSampler, get_val_mask
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+
+class BlockPushLowdimDataset(BaseLowdimDataset):
+    def __init__(self, 
+            zarr_path, 
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            obs_key='obs',
+            action_key='action',
+            obs_eef_target=True,
+            use_manual_normalizer=False,
+            seed=42,
+            val_ratio=0.0
+            ):
+        super().__init__()
+        self.replay_buffer = ReplayBuffer.copy_from_path(
+            zarr_path, keys=[obs_key, action_key])
+
+        val_mask = get_val_mask(
+            n_episodes=self.replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        self.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask)
+        self.obs_key = obs_key
+        self.action_key = action_key
+        self.obs_eef_target = obs_eef_target
+        self.use_manual_normalizer = use_manual_normalizer
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, mode='limits', **kwargs):
+        data = self._sample_to_data(self.replay_buffer)
+
+        normalizer = LinearNormalizer()
+        if not self.use_manual_normalizer:
+            normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
+        else:
+            x = data['obs']
+            stat = {
+                'max': np.max(x, axis=0),
+                'min': np.min(x, axis=0),
+                'mean': np.mean(x, axis=0),
+                'std': np.std(x, axis=0)
+            }
+
+            is_x = np.zeros(stat['max'].shape, dtype=bool)
+            is_y = np.zeros_like(is_x)
+            is_x[[0,3,6,8,10,13]] = True
+            is_y[[1,4,7,9,11,14]] = True
+            is_rot = ~(is_x|is_y)
+
+            def normalizer_with_masks(stat, masks):
+                global_scale = np.ones_like(stat['max'])
+                global_offset = np.zeros_like(stat['max'])
+                for mask in masks:
+                    output_max = 1
+                    output_min = -1
+                    input_max = stat['max'][mask].max()
+                    input_min = stat['min'][mask].min()
+                    input_range = input_max - input_min
+                    scale = (output_max - output_min) / input_range
+                    offset = output_min - scale * input_min
+                    global_scale[mask] = scale
+                    global_offset[mask] = offset
+                return SingleFieldLinearNormalizer.create_manual(
+                    scale=global_scale,
+                    offset=global_offset,
+                    input_stats_dict=stat
+                )
+
+            normalizer['obs'] = normalizer_with_masks(stat, [is_x, is_y, is_rot])
+            normalizer['action'] = SingleFieldLinearNormalizer.create_fit(
+                data['action'], last_n_dims=1, mode=mode, **kwargs)
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+
+    def __len__(self) -> int:
+        return len(self.sampler)
+
+    def _sample_to_data(self, sample):
+        obs = sample[self.obs_key] # T, D_o
+        if not self.obs_eef_target:
+            obs[:,8:10] = 0
+        data = {
+            'obs': obs,
+            'action': sample[self.action_key], # T, D_a
+        }
+        return data
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        sample = self.sampler.sample_sequence(idx)
+        data = self._sample_to_data(sample)
+
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
diff --git a/diffusion_policy/dataset/kitchen_lowdim_dataset.py b/diffusion_policy/dataset/kitchen_lowdim_dataset.py
new file mode 100644
index 0000000..601e21c
--- /dev/null
+++ b/diffusion_policy/dataset/kitchen_lowdim_dataset.py
@@ -0,0 +1,91 @@
+from typing import Dict
+import torch
+import numpy as np
+import copy
+import pathlib
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import SequenceSampler, get_val_mask
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+
+class KitchenLowdimDataset(BaseLowdimDataset):
+    def __init__(self,
+            dataset_dir,
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            seed=42,
+            val_ratio=0.0
+        ):
+        super().__init__()
+
+        data_directory = pathlib.Path(dataset_dir)
+        observations = np.load(data_directory / "observations_seq.npy")
+        actions = np.load(data_directory / "actions_seq.npy")
+        masks = np.load(data_directory / "existence_mask.npy")
+
+        self.replay_buffer = ReplayBuffer.create_empty_numpy()
+        for i in range(len(masks)):
+            eps_len = int(masks[i].sum())
+            obs = observations[i,:eps_len].astype(np.float32)
+            action = actions[i,:eps_len].astype(np.float32)
+            data = {                              
+                'obs': obs,
+                'action': action
+            }
+            self.replay_buffer.add_episode(data)
+        
+        val_mask = get_val_mask(
+            n_episodes=self.replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        self.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask)
+
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, mode='limits', **kwargs):
+        data = {
+            'obs': self.replay_buffer['obs'],
+            'action': self.replay_buffer['action']
+        }
+        if 'range_eps' not in kwargs:
+            # to prevent blowing up dims that barely change
+            kwargs['range_eps'] = 5e-2
+        normalizer = LinearNormalizer()
+        normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+
+    def __len__(self) -> int:
+        return len(self.sampler)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        sample = self.sampler.sample_sequence(idx)
+        data = sample
+
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
diff --git a/diffusion_policy/dataset/kitchen_mjl_lowdim_dataset.py b/diffusion_policy/dataset/kitchen_mjl_lowdim_dataset.py
new file mode 100644
index 0000000..e317381
--- /dev/null
+++ b/diffusion_policy/dataset/kitchen_mjl_lowdim_dataset.py
@@ -0,0 +1,112 @@
+from typing import Dict
+import torch
+import numpy as np
+import copy
+import pathlib
+from tqdm import tqdm
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import SequenceSampler, get_val_mask
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env.kitchen.kitchen_util import parse_mjl_logs
+
+class KitchenMjlLowdimDataset(BaseLowdimDataset):
+    def __init__(self,
+            dataset_dir,
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            abs_action=True,
+            robot_noise_ratio=0.0,
+            seed=42,
+            val_ratio=0.0
+        ):
+        super().__init__()
+
+        if not abs_action:
+            raise NotImplementedError()
+
+        robot_pos_noise_amp = np.array([0.1   , 0.1   , 0.1   , 0.1   , 0.1   , 0.1   , 0.1   , 0.1   ,
+            0.1   , 0.005 , 0.005 , 0.0005, 0.0005, 0.0005, 0.0005, 0.0005,
+            0.0005, 0.005 , 0.005 , 0.005 , 0.1   , 0.1   , 0.1   , 0.005 ,
+            0.005 , 0.005 , 0.1   , 0.1   , 0.1   , 0.005 ], dtype=np.float32)
+        rng = np.random.default_rng(seed=seed)
+
+        data_directory = pathlib.Path(dataset_dir)
+        self.replay_buffer = ReplayBuffer.create_empty_numpy()
+        for i, mjl_path in enumerate(tqdm(list(data_directory.glob('*/*.mjl')))):
+            try:
+                data = parse_mjl_logs(str(mjl_path.absolute()), skipamount=40)
+                qpos = data['qpos'].astype(np.float32)
+                obs = np.concatenate([
+                    qpos[:,:9],
+                    qpos[:,-21:],
+                    np.zeros((len(qpos),30),dtype=np.float32)
+                ], axis=-1)
+                if robot_noise_ratio > 0:
+                    # add observation noise to match real robot
+                    noise = robot_noise_ratio * robot_pos_noise_amp * rng.uniform(
+                        low=-1., high=1., size=(obs.shape[0], 30))
+                    obs[:,:30] += noise
+                episode = {
+                    'obs': obs,
+                    'action': data['ctrl'].astype(np.float32)
+                }
+                self.replay_buffer.add_episode(episode)
+            except Exception as e:
+                print(i, e)
+
+        val_mask = get_val_mask(
+            n_episodes=self.replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        self.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask)
+
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, mode='limits', **kwargs):
+        data = {
+            'obs': self.replay_buffer['obs'],
+            'action': self.replay_buffer['action']
+        }
+        if 'range_eps' not in kwargs:
+            # to prevent blowing up dims that barely change
+            kwargs['range_eps'] = 5e-2
+        normalizer = LinearNormalizer()
+        normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+
+    def __len__(self) -> int:
+        return len(self.sampler)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        sample = self.sampler.sample_sequence(idx)
+        data = sample
+
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
diff --git a/diffusion_policy/dataset/pusht_dataset.py b/diffusion_policy/dataset/pusht_dataset.py
new file mode 100644
index 0000000..dc3ec1c
--- /dev/null
+++ b/diffusion_policy/dataset/pusht_dataset.py
@@ -0,0 +1,97 @@
+from typing import Dict
+import torch
+import numpy as np
+import copy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import (
+    SequenceSampler, get_val_mask, downsample_mask)
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+
+class PushTLowdimDataset(BaseLowdimDataset):
+    def __init__(self, 
+            zarr_path, 
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            obs_key='keypoint',
+            state_key='state',
+            action_key='action',
+            seed=42,
+            val_ratio=0.0,
+            max_train_episodes=None
+            ):
+        super().__init__()
+        self.replay_buffer = ReplayBuffer.copy_from_path(
+            zarr_path, keys=[obs_key, state_key, action_key])
+
+        val_mask = get_val_mask(
+            n_episodes=self.replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        train_mask = downsample_mask(
+            mask=train_mask, 
+            max_n=max_train_episodes, 
+            seed=seed)
+
+        self.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask
+            )
+        self.obs_key = obs_key
+        self.state_key = state_key
+        self.action_key = action_key
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, mode='limits', **kwargs):
+        data = self._sample_to_data(self.replay_buffer)
+        normalizer = LinearNormalizer()
+        normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer[self.action_key])
+
+    def __len__(self) -> int:
+        return len(self.sampler)
+
+    def _sample_to_data(self, sample):
+        keypoint = sample[self.obs_key]
+        state = sample[self.state_key]
+        agent_pos = state[:,:2]
+        obs = np.concatenate([
+            keypoint.reshape(keypoint.shape[0], -1), 
+            agent_pos], axis=-1)
+
+        data = {
+            'obs': obs, # T, D_o
+            'action': sample[self.action_key], # T, D_a
+        }
+        return data
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        sample = self.sampler.sample_sequence(idx)
+        data = self._sample_to_data(sample)
+
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
diff --git a/diffusion_policy/dataset/pusht_image_dataset.py b/diffusion_policy/dataset/pusht_image_dataset.py
new file mode 100644
index 0000000..f096a8f
--- /dev/null
+++ b/diffusion_policy/dataset/pusht_image_dataset.py
@@ -0,0 +1,102 @@
+from typing import Dict
+import torch
+import numpy as np
+import copy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import (
+    SequenceSampler, get_val_mask, downsample_mask)
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.common.normalize_util import get_image_range_normalizer
+
+class PushTImageDataset(BaseImageDataset):
+    def __init__(self,
+            zarr_path, 
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            seed=42,
+            val_ratio=0.0,
+            max_train_episodes=None
+            ):
+        
+        super().__init__()
+        self.replay_buffer = ReplayBuffer.copy_from_path(
+            zarr_path, keys=['img', 'state', 'action'])
+        val_mask = get_val_mask(
+            n_episodes=self.replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        train_mask = downsample_mask(
+            mask=train_mask, 
+            max_n=max_train_episodes, 
+            seed=seed)
+
+        self.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask)
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, mode='limits', **kwargs):
+        data = {
+            'action': self.replay_buffer['action'],
+            'agent_pos': self.replay_buffer['state'][...,:2]
+        }
+        normalizer = LinearNormalizer()
+        normalizer.fit(data=data, last_n_dims=1, mode=mode, **kwargs)
+        normalizer['image'] = get_image_range_normalizer()
+        return normalizer
+
+    def __len__(self) -> int:
+        return len(self.sampler)
+
+    def _sample_to_data(self, sample):
+        agent_pos = sample['state'][:,:2].astype(np.float32) # (agent_posx2, block_posex3)
+        image = np.moveaxis(sample['img'],-1,1)/255
+
+        data = {
+            'obs': {
+                'image': image, # T, 3, 96, 96
+                'agent_pos': agent_pos, # T, 2
+            },
+            'action': sample['action'].astype(np.float32) # T, 2
+        }
+        return data
+    
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        sample = self.sampler.sample_sequence(idx)
+        data = self._sample_to_data(sample)
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
+
+
+def test():
+    import os
+    zarr_path = os.path.expanduser('~/dev/diffusion_policy/data/pusht/pusht_cchi_v7_replay.zarr')
+    dataset = PushTImageDataset(zarr_path, horizon=16)
+
+    # from matplotlib import pyplot as plt
+    # normalizer = dataset.get_normalizer()
+    # nactions = normalizer['action'].normalize(dataset.replay_buffer['action'])
+    # diff = np.diff(nactions, axis=0)
+    # dists = np.linalg.norm(np.diff(nactions, axis=0), axis=-1)
diff --git a/diffusion_policy/dataset/real_pusht_image_dataset.py b/diffusion_policy/dataset/real_pusht_image_dataset.py
new file mode 100644
index 0000000..85017fa
--- /dev/null
+++ b/diffusion_policy/dataset/real_pusht_image_dataset.py
@@ -0,0 +1,291 @@
+from typing import Dict, List
+import torch
+import numpy as np
+import zarr
+import os
+import shutil
+from filelock import FileLock
+from threadpoolctl import threadpool_limits
+from omegaconf import OmegaConf
+import cv2
+import json
+import hashlib
+import copy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import (
+    SequenceSampler, get_val_mask, downsample_mask)
+from diffusion_policy.real_world.real_data_conversion import real_data_to_replay_buffer
+from diffusion_policy.common.normalize_util import (
+    get_range_normalizer_from_stat,
+    get_image_range_normalizer,
+    get_identity_normalizer_from_stat,
+    array_to_stats
+)
+
+class RealPushTImageDataset(BaseImageDataset):
+    def __init__(self,
+            shape_meta: dict,
+            dataset_path: str,
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            n_obs_steps=None,
+            n_latency_steps=0,
+            use_cache=False,
+            seed=42,
+            val_ratio=0.0,
+            max_train_episodes=None,
+            delta_action=False,
+        ):
+        assert os.path.isdir(dataset_path)
+        
+        replay_buffer = None
+        if use_cache:
+            # fingerprint shape_meta
+            shape_meta_json = json.dumps(OmegaConf.to_container(shape_meta), sort_keys=True)
+            shape_meta_hash = hashlib.md5(shape_meta_json.encode('utf-8')).hexdigest()
+            cache_zarr_path = os.path.join(dataset_path, shape_meta_hash + '.zarr.zip')
+            cache_lock_path = cache_zarr_path + '.lock'
+            print('Acquiring lock on cache.')
+            with FileLock(cache_lock_path):
+                if not os.path.exists(cache_zarr_path):
+                    # cache does not exists
+                    try:
+                        print('Cache does not exist. Creating!')
+                        replay_buffer = _get_replay_buffer(
+                            dataset_path=dataset_path,
+                            shape_meta=shape_meta,
+                            store=zarr.MemoryStore()
+                        )
+                        print('Saving cache to disk.')
+                        with zarr.ZipStore(cache_zarr_path) as zip_store:
+                            replay_buffer.save_to_store(
+                                store=zip_store
+                            )
+                    except Exception as e:
+                        shutil.rmtree(cache_zarr_path)
+                        raise e
+                else:
+                    print('Loading cached ReplayBuffer from Disk.')
+                    with zarr.ZipStore(cache_zarr_path, mode='r') as zip_store:
+                        replay_buffer = ReplayBuffer.copy_from_store(
+                            src_store=zip_store, store=zarr.MemoryStore())
+                    print('Loaded!')
+        else:
+            replay_buffer = _get_replay_buffer(
+                dataset_path=dataset_path,
+                shape_meta=shape_meta,
+                store=zarr.MemoryStore()
+            )
+        
+        if delta_action:
+            # replace action as relative to previous frame
+            actions = replay_buffer['action'][:]
+            # suport positions only at this time
+            assert actions.shape[1] <= 3
+            actions_diff = np.zeros_like(actions)
+            episode_ends = replay_buffer.episode_ends[:]
+            for i in range(len(episode_ends)):
+                start = 0
+                if i > 0:
+                    start = episode_ends[i-1]
+                end = episode_ends[i]
+                # delta action is the difference between previous desired postion and the current
+                # it should be scheduled at the previous timestep for the curren timestep
+                # to ensure consistency with positional mode
+                actions_diff[start+1:end] = np.diff(actions[start:end], axis=0)
+            replay_buffer['action'][:] = actions_diff
+
+        rgb_keys = list()
+        lowdim_keys = list()
+        obs_shape_meta = shape_meta['obs']
+        for key, attr in obs_shape_meta.items():
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                rgb_keys.append(key)
+            elif type == 'low_dim':
+                lowdim_keys.append(key)
+        
+        key_first_k = dict()
+        if n_obs_steps is not None:
+            # only take first k obs from images
+            for key in rgb_keys + lowdim_keys:
+                key_first_k[key] = n_obs_steps
+
+        val_mask = get_val_mask(
+            n_episodes=replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        train_mask = downsample_mask(
+            mask=train_mask, 
+            max_n=max_train_episodes, 
+            seed=seed)
+
+        sampler = SequenceSampler(
+            replay_buffer=replay_buffer, 
+            sequence_length=horizon+n_latency_steps,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask,
+            key_first_k=key_first_k)
+        
+        self.replay_buffer = replay_buffer
+        self.sampler = sampler
+        self.shape_meta = shape_meta
+        self.rgb_keys = rgb_keys
+        self.lowdim_keys = lowdim_keys
+        self.n_obs_steps = n_obs_steps
+        self.val_mask = val_mask
+        self.horizon = horizon
+        self.n_latency_steps = n_latency_steps
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon+self.n_latency_steps,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=self.val_mask
+            )
+        val_set.val_mask = ~self.val_mask
+        return val_set
+
+    def get_normalizer(self, **kwargs) -> LinearNormalizer:
+        normalizer = LinearNormalizer()
+
+        # action
+        normalizer['action'] = SingleFieldLinearNormalizer.create_fit(
+            self.replay_buffer['action'])
+        
+        # obs
+        for key in self.lowdim_keys:
+            normalizer[key] = SingleFieldLinearNormalizer.create_fit(
+                self.replay_buffer[key])
+        
+        # image
+        for key in self.rgb_keys:
+            normalizer[key] = get_image_range_normalizer()
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+
+    def __len__(self):
+        return len(self.sampler)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        threadpool_limits(1)
+        data = self.sampler.sample_sequence(idx)
+
+        # to save RAM, only return first n_obs_steps of OBS
+        # since the rest will be discarded anyway.
+        # when self.n_obs_steps is None
+        # this slice does nothing (takes all)
+        T_slice = slice(self.n_obs_steps)
+
+        obs_dict = dict()
+        for key in self.rgb_keys:
+            # move channel last to channel first
+            # T,H,W,C
+            # convert uint8 image to float32
+            obs_dict[key] = np.moveaxis(data[key][T_slice],-1,1
+                ).astype(np.float32) / 255.
+            # T,C,H,W
+            # save ram
+            del data[key]
+        for key in self.lowdim_keys:
+            obs_dict[key] = data[key][T_slice].astype(np.float32)
+            # save ram
+            del data[key]
+        
+        action = data['action'].astype(np.float32)
+        # handle latency by dropping first n_latency_steps action
+        # observations are already taken care of by T_slice
+        if self.n_latency_steps > 0:
+            action = action[self.n_latency_steps:]
+
+        torch_data = {
+            'obs': dict_apply(obs_dict, torch.from_numpy),
+            'action': torch.from_numpy(action)
+        }
+        return torch_data
+
+def zarr_resize_index_last_dim(zarr_arr, idxs):
+    actions = zarr_arr[:]
+    actions = actions[...,idxs]
+    zarr_arr.resize(zarr_arr.shape[:-1] + (len(idxs),))
+    zarr_arr[:] = actions
+    return zarr_arr
+
+def _get_replay_buffer(dataset_path, shape_meta, store):
+    # parse shape meta
+    rgb_keys = list()
+    lowdim_keys = list()
+    out_resolutions = dict()
+    lowdim_shapes = dict()
+    obs_shape_meta = shape_meta['obs']
+    for key, attr in obs_shape_meta.items():
+        type = attr.get('type', 'low_dim')
+        shape = tuple(attr.get('shape'))
+        if type == 'rgb':
+            rgb_keys.append(key)
+            c,h,w = shape
+            out_resolutions[key] = (w,h)
+        elif type == 'low_dim':
+            lowdim_keys.append(key)
+            lowdim_shapes[key] = tuple(shape)
+            if 'pose' in key:
+                assert tuple(shape) in [(2,),(6,)]
+    
+    action_shape = tuple(shape_meta['action']['shape'])
+    assert action_shape in [(2,),(6,)]
+
+    # load data
+    cv2.setNumThreads(1)
+    with threadpool_limits(1):
+        replay_buffer = real_data_to_replay_buffer(
+            dataset_path=dataset_path,
+            out_store=store,
+            out_resolutions=out_resolutions,
+            lowdim_keys=lowdim_keys + ['action'],
+            image_keys=rgb_keys
+        )
+
+    # transform lowdim dimensions
+    if action_shape == (2,):
+        # 2D action space, only controls X and Y
+        zarr_arr = replay_buffer['action']
+        zarr_resize_index_last_dim(zarr_arr, idxs=[0,1])
+    
+    for key, shape in lowdim_shapes.items():
+        if 'pose' in key and shape == (2,):
+            # only take X and Y
+            zarr_arr = replay_buffer[key]
+            zarr_resize_index_last_dim(zarr_arr, idxs=[0,1])
+
+    return replay_buffer
+
+
+def test():
+    import hydra
+    from omegaconf import OmegaConf
+    OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+    with hydra.initialize('../diffusion_policy/config'):
+        cfg = hydra.compose('train_robomimic_real_image_workspace')
+        OmegaConf.resolve(cfg)
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+
+    from matplotlib import pyplot as plt
+    normalizer = dataset.get_normalizer()
+    nactions = normalizer['action'].normalize(dataset.replay_buffer['action'][:])
+    diff = np.diff(nactions, axis=0)
+    dists = np.linalg.norm(np.diff(nactions, axis=0), axis=-1)
+    _ = plt.hist(dists, bins=100); plt.title('real action velocity')
diff --git a/diffusion_policy/dataset/robomimic_replay_image_dataset.py b/diffusion_policy/dataset/robomimic_replay_image_dataset.py
new file mode 100644
index 0000000..2728e9e
--- /dev/null
+++ b/diffusion_policy/dataset/robomimic_replay_image_dataset.py
@@ -0,0 +1,373 @@
+from typing import Dict, List
+import torch
+import numpy as np
+import h5py
+from tqdm import tqdm
+import zarr
+import os
+import shutil
+import copy
+import json
+import hashlib
+from filelock import FileLock
+from threadpoolctl import threadpool_limits
+import concurrent.futures
+import multiprocessing
+from omegaconf import OmegaConf
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.dataset.base_dataset import BaseImageDataset, LinearNormalizer
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.model.common.rotation_transformer import RotationTransformer
+from diffusion_policy.codecs.imagecodecs_numcodecs import register_codecs, Jpeg2k
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import SequenceSampler, get_val_mask
+from diffusion_policy.common.normalize_util import (
+    robomimic_abs_action_only_normalizer_from_stat,
+    robomimic_abs_action_only_dual_arm_normalizer_from_stat,
+    get_range_normalizer_from_stat,
+    get_image_range_normalizer,
+    get_identity_normalizer_from_stat,
+    array_to_stats
+)
+register_codecs()
+
+class RobomimicReplayImageDataset(BaseImageDataset):
+    def __init__(self,
+            shape_meta: dict,
+            dataset_path: str,
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            n_obs_steps=None,
+            abs_action=False,
+            rotation_rep='rotation_6d', # ignored when abs_action=False
+            use_legacy_normalizer=False,
+            use_cache=False,
+            seed=42,
+            val_ratio=0.0
+        ):
+        rotation_transformer = RotationTransformer(
+            from_rep='axis_angle', to_rep=rotation_rep)
+
+        replay_buffer = None
+        if use_cache:
+            cache_zarr_path = dataset_path + '.zarr.zip'
+            cache_lock_path = cache_zarr_path + '.lock'
+            print('Acquiring lock on cache.')
+            with FileLock(cache_lock_path):
+                if not os.path.exists(cache_zarr_path):
+                    # cache does not exists
+                    try:
+                        print('Cache does not exist. Creating!')
+                        # store = zarr.DirectoryStore(cache_zarr_path)
+                        replay_buffer = _convert_robomimic_to_replay(
+                            store=zarr.MemoryStore(), 
+                            shape_meta=shape_meta, 
+                            dataset_path=dataset_path, 
+                            abs_action=abs_action, 
+                            rotation_transformer=rotation_transformer)
+                        print('Saving cache to disk.')
+                        with zarr.ZipStore(cache_zarr_path) as zip_store:
+                            replay_buffer.save_to_store(
+                                store=zip_store
+                            )
+                    except Exception as e:
+                        shutil.rmtree(cache_zarr_path)
+                        raise e
+                else:
+                    print('Loading cached ReplayBuffer from Disk.')
+                    with zarr.ZipStore(cache_zarr_path, mode='r') as zip_store:
+                        replay_buffer = ReplayBuffer.copy_from_store(
+                            src_store=zip_store, store=zarr.MemoryStore())
+                    print('Loaded!')
+        else:
+            replay_buffer = _convert_robomimic_to_replay(
+                store=zarr.MemoryStore(), 
+                shape_meta=shape_meta, 
+                dataset_path=dataset_path, 
+                abs_action=abs_action, 
+                rotation_transformer=rotation_transformer)
+
+        rgb_keys = list()
+        lowdim_keys = list()
+        obs_shape_meta = shape_meta['obs']
+        for key, attr in obs_shape_meta.items():
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                rgb_keys.append(key)
+            elif type == 'low_dim':
+                lowdim_keys.append(key)
+        
+        # for key in rgb_keys:
+        #     replay_buffer[key].compressor.numthreads=1
+
+        key_first_k = dict()
+        if n_obs_steps is not None:
+            # only take first k obs from images
+            for key in rgb_keys + lowdim_keys:
+                key_first_k[key] = n_obs_steps
+
+        val_mask = get_val_mask(
+            n_episodes=replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        sampler = SequenceSampler(
+            replay_buffer=replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask,
+            key_first_k=key_first_k)
+        
+        self.replay_buffer = replay_buffer
+        self.sampler = sampler
+        self.shape_meta = shape_meta
+        self.rgb_keys = rgb_keys
+        self.lowdim_keys = lowdim_keys
+        self.abs_action = abs_action
+        self.n_obs_steps = n_obs_steps
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+        self.use_legacy_normalizer = use_legacy_normalizer
+
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, **kwargs) -> LinearNormalizer:
+        normalizer = LinearNormalizer()
+
+        # action
+        stat = array_to_stats(self.replay_buffer['action'])
+        if self.abs_action:
+            if stat['mean'].shape[-1] > 10:
+                # dual arm
+                this_normalizer = robomimic_abs_action_only_dual_arm_normalizer_from_stat(stat)
+            else:
+                this_normalizer = robomimic_abs_action_only_normalizer_from_stat(stat)
+            
+            if self.use_legacy_normalizer:
+                this_normalizer = normalizer_from_stat(stat)
+        else:
+            # already normalized
+            this_normalizer = get_identity_normalizer_from_stat(stat)
+        normalizer['action'] = this_normalizer
+
+        # obs
+        for key in self.lowdim_keys:
+            stat = array_to_stats(self.replay_buffer[key])
+
+            if key.endswith('pos'):
+                this_normalizer = get_range_normalizer_from_stat(stat)
+            elif key.endswith('quat'):
+                # quaternion is in [-1,1] already
+                this_normalizer = get_identity_normalizer_from_stat(stat)
+            elif key.endswith('qpos'):
+                this_normalizer = get_range_normalizer_from_stat(stat)
+            else:
+                raise RuntimeError('unsupported')
+            normalizer[key] = this_normalizer
+
+        # image
+        for key in self.rgb_keys:
+            normalizer[key] = get_image_range_normalizer()
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+
+    def __len__(self):
+        return len(self.sampler)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        threadpool_limits(1)
+        data = self.sampler.sample_sequence(idx)
+
+        # to save RAM, only return first n_obs_steps of OBS
+        # since the rest will be discarded anyway.
+        # when self.n_obs_steps is None
+        # this slice does nothing (takes all)
+        T_slice = slice(self.n_obs_steps)
+
+        obs_dict = dict()
+        for key in self.rgb_keys:
+            # move channel last to channel first
+            # T,H,W,C
+            # convert uint8 image to float32
+            obs_dict[key] = np.moveaxis(data[key][T_slice],-1,1
+                ).astype(np.float32) / 255.
+            # T,C,H,W
+            del data[key]
+        for key in self.lowdim_keys:
+            obs_dict[key] = data[key][T_slice].astype(np.float32)
+            del data[key]
+
+        torch_data = {
+            'obs': dict_apply(obs_dict, torch.from_numpy),
+            'action': torch.from_numpy(data['action'].astype(np.float32))
+        }
+        return torch_data
+
+
+def _convert_actions(raw_actions, abs_action, rotation_transformer):
+    actions = raw_actions
+    if abs_action:
+        is_dual_arm = False
+        if raw_actions.shape[-1] == 14:
+            # dual arm
+            raw_actions = raw_actions.reshape(-1,2,7)
+            is_dual_arm = True
+
+        pos = raw_actions[...,:3]
+        rot = raw_actions[...,3:6]
+        gripper = raw_actions[...,6:]
+        rot = rotation_transformer.forward(rot)
+        raw_actions = np.concatenate([
+            pos, rot, gripper
+        ], axis=-1).astype(np.float32)
+    
+        if is_dual_arm:
+            raw_actions = raw_actions.reshape(-1,20)
+        actions = raw_actions
+    return actions
+
+
+def _convert_robomimic_to_replay(store, shape_meta, dataset_path, abs_action, rotation_transformer, 
+        n_workers=None, max_inflight_tasks=None):
+    if n_workers is None:
+        n_workers = multiprocessing.cpu_count()
+    if max_inflight_tasks is None:
+        max_inflight_tasks = n_workers * 5
+
+    # parse shape_meta
+    rgb_keys = list()
+    lowdim_keys = list()
+    # construct compressors and chunks
+    obs_shape_meta = shape_meta['obs']
+    for key, attr in obs_shape_meta.items():
+        shape = attr['shape']
+        type = attr.get('type', 'low_dim')
+        if type == 'rgb':
+            rgb_keys.append(key)
+        elif type == 'low_dim':
+            lowdim_keys.append(key)
+    
+    root = zarr.group(store)
+    data_group = root.require_group('data', overwrite=True)
+    meta_group = root.require_group('meta', overwrite=True)
+
+    with h5py.File(dataset_path) as file:
+        # count total steps
+        demos = file['data']
+        episode_ends = list()
+        prev_end = 0
+        for i in range(len(demos)):
+            demo = demos[f'demo_{i}']
+            episode_length = demo['actions'].shape[0]
+            episode_end = prev_end + episode_length
+            prev_end = episode_end
+            episode_ends.append(episode_end)
+        n_steps = episode_ends[-1]
+        episode_starts = [0] + episode_ends[:-1]
+        _ = meta_group.array('episode_ends', episode_ends, 
+            dtype=np.int64, compressor=None, overwrite=True)
+
+        # save lowdim data
+        for key in tqdm(lowdim_keys + ['action'], desc="Loading lowdim data"):
+            data_key = 'obs/' + key
+            if key == 'action':
+                data_key = 'actions'
+            this_data = list()
+            for i in range(len(demos)):
+                demo = demos[f'demo_{i}']
+                this_data.append(demo[data_key][:].astype(np.float32))
+            this_data = np.concatenate(this_data, axis=0)
+            if key == 'action':
+                this_data = _convert_actions(
+                    raw_actions=this_data,
+                    abs_action=abs_action,
+                    rotation_transformer=rotation_transformer
+                )
+                assert this_data.shape == (n_steps,) + tuple(shape_meta['action']['shape'])
+            else:
+                assert this_data.shape == (n_steps,) + tuple(shape_meta['obs'][key]['shape'])
+            _ = data_group.array(
+                name=key,
+                data=this_data,
+                shape=this_data.shape,
+                chunks=this_data.shape,
+                compressor=None,
+                dtype=this_data.dtype
+            )
+        
+        def img_copy(zarr_arr, zarr_idx, hdf5_arr, hdf5_idx):
+            try:
+                zarr_arr[zarr_idx] = hdf5_arr[hdf5_idx]
+                # make sure we can successfully decode
+                _ = zarr_arr[zarr_idx]
+                return True
+            except Exception as e:
+                return False
+        
+        with tqdm(total=n_steps*len(rgb_keys), desc="Loading image data", mininterval=1.0) as pbar:
+            # one chunk per thread, therefore no synchronization needed
+            with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as executor:
+                futures = set()
+                for key in rgb_keys:
+                    data_key = 'obs/' + key
+                    shape = tuple(shape_meta['obs'][key]['shape'])
+                    c,h,w = shape
+                    this_compressor = Jpeg2k(level=50)
+                    img_arr = data_group.require_dataset(
+                        name=key,
+                        shape=(n_steps,h,w,c),
+                        chunks=(1,h,w,c),
+                        compressor=this_compressor,
+                        dtype=np.uint8
+                    )
+                    for episode_idx in range(len(demos)):
+                        demo = demos[f'demo_{episode_idx}']
+                        hdf5_arr = demo['obs'][key]
+                        for hdf5_idx in range(hdf5_arr.shape[0]):
+                            if len(futures) >= max_inflight_tasks:
+                                # limit number of inflight tasks
+                                completed, futures = concurrent.futures.wait(futures, 
+                                    return_when=concurrent.futures.FIRST_COMPLETED)
+                                for f in completed:
+                                    if not f.result():
+                                        raise RuntimeError('Failed to encode image!')
+                                pbar.update(len(completed))
+
+                            zarr_idx = episode_starts[episode_idx] + hdf5_idx
+                            futures.add(
+                                executor.submit(img_copy, 
+                                    img_arr, zarr_idx, hdf5_arr, hdf5_idx))
+                completed, futures = concurrent.futures.wait(futures)
+                for f in completed:
+                    if not f.result():
+                        raise RuntimeError('Failed to encode image!')
+                pbar.update(len(completed))
+
+    replay_buffer = ReplayBuffer(root)
+    return replay_buffer
+
+def normalizer_from_stat(stat):
+    max_abs = np.maximum(stat['max'].max(), np.abs(stat['min']).max())
+    scale = np.full_like(stat['max'], fill_value=1/max_abs)
+    offset = np.zeros_like(stat['max'])
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=scale,
+        offset=offset,
+        input_stats_dict=stat
+    )
diff --git a/diffusion_policy/dataset/robomimic_replay_lowdim_dataset.py b/diffusion_policy/dataset/robomimic_replay_lowdim_dataset.py
new file mode 100644
index 0000000..c642da9
--- /dev/null
+++ b/diffusion_policy/dataset/robomimic_replay_lowdim_dataset.py
@@ -0,0 +1,168 @@
+from typing import Dict, List
+import torch
+import numpy as np
+import h5py
+from tqdm import tqdm
+import copy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset, LinearNormalizer
+from diffusion_policy.model.common.normalizer import LinearNormalizer, SingleFieldLinearNormalizer
+from diffusion_policy.model.common.rotation_transformer import RotationTransformer
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.sampler import (
+    SequenceSampler, get_val_mask, downsample_mask)
+from diffusion_policy.common.normalize_util import (
+    robomimic_abs_action_only_normalizer_from_stat,
+    robomimic_abs_action_only_dual_arm_normalizer_from_stat,
+    get_identity_normalizer_from_stat,
+    array_to_stats
+)
+
+class RobomimicReplayLowdimDataset(BaseLowdimDataset):
+    def __init__(self,
+            dataset_path: str,
+            horizon=1,
+            pad_before=0,
+            pad_after=0,
+            obs_keys: List[str]=[
+                'object', 
+                'robot0_eef_pos', 
+                'robot0_eef_quat', 
+                'robot0_gripper_qpos'],
+            abs_action=False,
+            rotation_rep='rotation_6d',
+            use_legacy_normalizer=False,
+            seed=42,
+            val_ratio=0.0,
+            max_train_episodes=None
+        ):
+        obs_keys = list(obs_keys)
+        rotation_transformer = RotationTransformer(
+            from_rep='axis_angle', to_rep=rotation_rep)
+
+        replay_buffer = ReplayBuffer.create_empty_numpy()
+        with h5py.File(dataset_path) as file:
+            demos = file['data']
+            for i in tqdm(range(len(demos)), desc="Loading hdf5 to ReplayBuffer"):
+                demo = demos[f'demo_{i}']
+                episode = _data_to_obs(
+                    raw_obs=demo['obs'],
+                    raw_actions=demo['actions'][:].astype(np.float32),
+                    obs_keys=obs_keys,
+                    abs_action=abs_action,
+                    rotation_transformer=rotation_transformer)
+                replay_buffer.add_episode(episode)
+
+        val_mask = get_val_mask(
+            n_episodes=replay_buffer.n_episodes, 
+            val_ratio=val_ratio,
+            seed=seed)
+        train_mask = ~val_mask
+        train_mask = downsample_mask(
+            mask=train_mask, 
+            max_n=max_train_episodes, 
+            seed=seed)
+
+        sampler = SequenceSampler(
+            replay_buffer=replay_buffer, 
+            sequence_length=horizon,
+            pad_before=pad_before, 
+            pad_after=pad_after,
+            episode_mask=train_mask)
+        
+        self.replay_buffer = replay_buffer
+        self.sampler = sampler
+        self.abs_action = abs_action
+        self.train_mask = train_mask
+        self.horizon = horizon
+        self.pad_before = pad_before
+        self.pad_after = pad_after
+        self.use_legacy_normalizer = use_legacy_normalizer
+    
+    def get_validation_dataset(self):
+        val_set = copy.copy(self)
+        val_set.sampler = SequenceSampler(
+            replay_buffer=self.replay_buffer, 
+            sequence_length=self.horizon,
+            pad_before=self.pad_before, 
+            pad_after=self.pad_after,
+            episode_mask=~self.train_mask
+            )
+        val_set.train_mask = ~self.train_mask
+        return val_set
+
+    def get_normalizer(self, **kwargs) -> LinearNormalizer:
+        normalizer = LinearNormalizer()
+
+        # action
+        stat = array_to_stats(self.replay_buffer['action'])
+        if self.abs_action:
+            if stat['mean'].shape[-1] > 10:
+                # dual arm
+                this_normalizer = robomimic_abs_action_only_dual_arm_normalizer_from_stat(stat)
+            else:
+                this_normalizer = robomimic_abs_action_only_normalizer_from_stat(stat)
+            
+            if self.use_legacy_normalizer:
+                this_normalizer = normalizer_from_stat(stat)
+        else:
+            # already normalized
+            this_normalizer = get_identity_normalizer_from_stat(stat)
+        normalizer['action'] = this_normalizer
+        
+        # aggregate obs stats
+        obs_stat = array_to_stats(self.replay_buffer['obs'])
+
+
+        normalizer['obs'] = normalizer_from_stat(obs_stat)
+        return normalizer
+
+    def get_all_actions(self) -> torch.Tensor:
+        return torch.from_numpy(self.replay_buffer['action'])
+    
+    def __len__(self):
+        return len(self.sampler)
+
+    def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
+        data = self.sampler.sample_sequence(idx)
+        torch_data = dict_apply(data, torch.from_numpy)
+        return torch_data
+
+def normalizer_from_stat(stat):
+    max_abs = np.maximum(stat['max'].max(), np.abs(stat['min']).max())
+    scale = np.full_like(stat['max'], fill_value=1/max_abs)
+    offset = np.zeros_like(stat['max'])
+    return SingleFieldLinearNormalizer.create_manual(
+        scale=scale,
+        offset=offset,
+        input_stats_dict=stat
+    )
+    
+def _data_to_obs(raw_obs, raw_actions, obs_keys, abs_action, rotation_transformer):
+    obs = np.concatenate([
+        raw_obs[key] for key in obs_keys
+    ], axis=-1).astype(np.float32)
+
+    if abs_action:
+        is_dual_arm = False
+        if raw_actions.shape[-1] == 14:
+            # dual arm
+            raw_actions = raw_actions.reshape(-1,2,7)
+            is_dual_arm = True
+
+        pos = raw_actions[...,:3]
+        rot = raw_actions[...,3:6]
+        gripper = raw_actions[...,6:]
+        rot = rotation_transformer.forward(rot)
+        raw_actions = np.concatenate([
+            pos, rot, gripper
+        ], axis=-1).astype(np.float32)
+    
+        if is_dual_arm:
+            raw_actions = raw_actions.reshape(-1,20)
+    
+    data = {
+        'obs': obs,
+        'action': raw_actions
+    }
+    return data
diff --git a/diffusion_policy/env/block_pushing/assets/block.urdf b/diffusion_policy/env/block_pushing/assets/block.urdf
new file mode 100644
index 0000000..10b44dd
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/block.urdf
@@ -0,0 +1,31 @@
+<?xml version="0.0" ?>
+<robot name="box.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+         <box size="0.04 0.04 0.04"/>
+      </geometry>
+      <material name="red">
+        <color rgba="1 0.3412 0.3490 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+             <box size="0.04 0.04 0.04"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/block2.urdf b/diffusion_policy/env/block_pushing/assets/block2.urdf
new file mode 100644
index 0000000..36c081b
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/block2.urdf
@@ -0,0 +1,31 @@
+<?xml version="0.0" ?>
+<robot name="box2.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+         <box size="0.04 0.04 0.04"/>
+      </geometry>
+      <material name="red">
+        <color rgba="0.3412 1 0.3490 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+             <box size="0.04 0.04 0.04"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/blue_cube.urdf b/diffusion_policy/env/block_pushing/assets/blocks/blue_cube.urdf
new file mode 100644
index 0000000..089e804
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/blue_cube.urdf
@@ -0,0 +1,30 @@
+<?xml version="1.0" ?>
+<robot name="blue_cube.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="0.5"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="1.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="cube.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+       <material name="blue">
+        <color rgba="0.4 0.4 1.0 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="cube.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/cube.obj b/diffusion_policy/env/block_pushing/assets/blocks/cube.obj
new file mode 100644
index 0000000..bc1d007
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/cube.obj
@@ -0,0 +1,446 @@
+# Blender v2.92.0 OBJ File: ''
+# www.blender.org
+mtllib cube.mtl
+o square_blue_block_Cube.001
+v 0.000000 0.000000 -0.000000
+v 0.000000 0.038100 -0.000000
+v -0.014865 0.000000 0.018106
+v -0.016277 0.002032 0.019826
+v -0.015863 0.000595 0.019322
+v -0.019826 0.002032 0.016277
+v -0.018106 0.000000 0.014865
+v -0.019322 0.000595 0.015863
+v -0.018052 0.002032 0.019351
+v -0.016494 0.000000 0.017681
+v -0.017595 0.000595 0.018862
+v -0.019351 0.002032 0.018052
+v -0.017681 0.000000 0.016494
+v -0.018862 0.000595 0.017595
+v -0.018106 0.038100 0.014865
+v -0.019826 0.036068 0.016277
+v -0.019322 0.037505 0.015863
+v -0.016277 0.036068 0.019826
+v -0.014865 0.038100 0.018106
+v -0.015863 0.037505 0.019322
+v -0.017681 0.038100 0.016494
+v -0.019351 0.036068 0.018052
+v -0.018862 0.037505 0.017595
+v -0.016494 0.038100 0.017681
+v -0.018052 0.036068 0.019351
+v -0.017595 0.037505 0.018862
+v -0.018106 0.000000 -0.014865
+v -0.019826 0.002032 -0.016277
+v -0.019322 0.000595 -0.015863
+v -0.016277 0.002032 -0.019826
+v -0.014865 0.000000 -0.018106
+v -0.015863 0.000595 -0.019322
+v -0.017681 0.000000 -0.016494
+v -0.019351 0.002032 -0.018052
+v -0.018862 0.000595 -0.017595
+v -0.016494 0.000000 -0.017681
+v -0.018052 0.002032 -0.019351
+v -0.017595 0.000595 -0.018862
+v -0.014865 0.038100 -0.018106
+v -0.016277 0.036068 -0.019826
+v -0.015863 0.037505 -0.019322
+v -0.019826 0.036068 -0.016277
+v -0.018106 0.038100 -0.014865
+v -0.019322 0.037505 -0.015863
+v -0.018052 0.036068 -0.019351
+v -0.016494 0.038100 -0.017681
+v -0.017595 0.037505 -0.018862
+v -0.019351 0.036068 -0.018052
+v -0.017681 0.038100 -0.016494
+v -0.018862 0.037505 -0.017595
+v 0.018106 0.000000 0.014865
+v 0.019826 0.002032 0.016277
+v 0.019322 0.000595 0.015863
+v 0.016277 0.002032 0.019826
+v 0.014865 0.000000 0.018106
+v 0.015863 0.000595 0.019322
+v 0.017681 0.000000 0.016494
+v 0.019351 0.002032 0.018052
+v 0.018862 0.000595 0.017595
+v 0.016494 0.000000 0.017681
+v 0.018052 0.002032 0.019351
+v 0.017595 0.000595 0.018862
+v 0.014865 0.038100 0.018106
+v 0.016277 0.036068 0.019826
+v 0.015863 0.037505 0.019322
+v 0.019826 0.036068 0.016277
+v 0.018106 0.038100 0.014865
+v 0.019322 0.037505 0.015863
+v 0.018052 0.036068 0.019351
+v 0.016494 0.038100 0.017681
+v 0.017595 0.037505 0.018862
+v 0.019351 0.036068 0.018052
+v 0.017681 0.038100 0.016494
+v 0.018862 0.037505 0.017595
+v 0.014865 0.000000 -0.018106
+v 0.016277 0.002032 -0.019826
+v 0.015863 0.000595 -0.019322
+v 0.019826 0.002032 -0.016277
+v 0.018106 0.000000 -0.014865
+v 0.019322 0.000595 -0.015863
+v 0.016494 0.000000 -0.017681
+v 0.018052 0.002032 -0.019351
+v 0.017595 0.000595 -0.018862
+v 0.017681 0.000000 -0.016494
+v 0.019351 0.002032 -0.018052
+v 0.018862 0.000595 -0.017595
+v 0.018106 0.038100 -0.014865
+v 0.019826 0.036068 -0.016277
+v 0.019322 0.037505 -0.015863
+v 0.016277 0.036068 -0.019826
+v 0.014865 0.038100 -0.018106
+v 0.015863 0.037505 -0.019322
+v 0.019351 0.036068 -0.018052
+v 0.017681 0.038100 -0.016494
+v 0.018862 0.037505 -0.017595
+v 0.018052 0.036068 -0.019351
+v 0.016494 0.038100 -0.017681
+v 0.017595 0.037505 -0.018862
+vt 0.811987 0.285513
+vt 0.811986 0.268118
+vt 0.822561 0.276819
+vt 0.831255 0.287393
+vt 0.813860 0.287394
+vt 0.836073 0.319870
+vt 0.856069 0.319870
+vt 0.856069 0.320950
+vt 0.836073 0.320950
+vt 0.813867 0.266245
+vt 0.831262 0.266244
+vt 0.832202 0.266496
+vt 0.832884 0.267184
+vt 0.856069 0.297507
+vt 0.836074 0.297507
+vt 0.836074 0.278381
+vt 0.856070 0.278381
+vt 0.812238 0.267178
+vt 0.812926 0.266496
+vt 0.856070 0.275143
+vt 0.836074 0.275143
+vt 0.836074 0.256018
+vt 0.856070 0.256018
+vt 0.833135 0.268125
+vt 0.833136 0.285520
+vt 0.833136 0.260613
+vt 0.832884 0.261553
+vt 0.822561 0.251912
+vt 0.836074 0.342234
+vt 0.856070 0.342234
+vt 0.856070 0.343313
+vt 0.836074 0.343313
+vt 0.832196 0.287142
+vt 0.831262 0.241337
+vt 0.832202 0.241589
+vt 0.832884 0.286460
+vt 0.812238 0.286454
+vt 0.811986 0.243211
+vt 0.812238 0.242271
+vt 0.836073 0.300745
+vt 0.856069 0.300745
+vt 0.836073 0.298586
+vt 0.856069 0.298586
+vt 0.856069 0.299665
+vt 0.836073 0.299665
+vt 0.856070 0.276222
+vt 0.836074 0.276222
+vt 0.836074 0.277302
+vt 0.856070 0.277302
+vt 0.836074 0.253859
+vt 0.856070 0.253859
+vt 0.856070 0.254938
+vt 0.836074 0.254938
+vt 0.812920 0.287142
+vt 0.856069 0.322029
+vt 0.836073 0.322029
+vt 0.813860 0.262487
+vt 0.812920 0.262235
+vt 0.836073 0.323108
+vt 0.856069 0.323108
+vt 0.832884 0.242277
+vt 0.833135 0.243218
+vt 0.812238 0.261547
+vt 0.811987 0.260606
+vt 0.813868 0.241338
+vt 0.812926 0.241589
+vt 0.831255 0.262486
+vt 0.832196 0.262235
+vt 0.811196 0.267507
+vt 0.811506 0.266500
+vt 0.810258 0.267085
+vt 0.810726 0.265896
+vt 0.811523 0.262225
+vt 0.811189 0.261224
+vt 0.810732 0.262868
+vt 0.810258 0.261658
+vt 0.812248 0.265781
+vt 0.811605 0.264990
+vt 0.812242 0.262967
+vt 0.811638 0.263747
+vt 0.813249 0.265447
+vt 0.812815 0.264516
+vt 0.813249 0.263277
+vt 0.812827 0.264215
+vt 0.833926 0.261224
+vt 0.833616 0.262231
+vt 0.834864 0.261646
+vt 0.834396 0.262835
+vt 0.833599 0.266506
+vt 0.833933 0.267507
+vt 0.834390 0.265863
+vt 0.834864 0.267073
+vt 0.832874 0.262950
+vt 0.833517 0.263741
+vt 0.832880 0.265764
+vt 0.833484 0.264984
+vt 0.831873 0.263284
+vt 0.832307 0.264215
+vt 0.831873 0.265453
+vt 0.832295 0.264516
+vt 0.831873 0.240547
+vt 0.832880 0.240857
+vt 0.832295 0.239609
+vt 0.833484 0.240077
+vt 0.832874 0.287857
+vt 0.831873 0.288191
+vt 0.833517 0.288648
+vt 0.832307 0.289122
+vt 0.833599 0.241599
+vt 0.834390 0.240956
+vt 0.833616 0.287138
+vt 0.834396 0.287742
+vt 0.833933 0.242600
+vt 0.834864 0.242166
+vt 0.833926 0.286131
+vt 0.834864 0.286553
+vt 0.811196 0.242600
+vt 0.811506 0.241593
+vt 0.810258 0.242178
+vt 0.810726 0.240989
+vt 0.811523 0.287132
+vt 0.811189 0.286131
+vt 0.810732 0.287775
+vt 0.810258 0.286565
+vt 0.812248 0.240874
+vt 0.811605 0.240083
+vt 0.812242 0.287874
+vt 0.811638 0.288654
+vt 0.813249 0.240540
+vt 0.812815 0.239609
+vt 0.813249 0.288184
+vt 0.812827 0.289122
+vn 0.0000 -1.0000 0.0000
+vn 0.9739 0.1816 -0.1363
+vn 0.9739 -0.1816 -0.1363
+vn 0.8444 -0.2052 -0.4948
+vn 0.8444 0.2052 -0.4948
+vn 0.1363 -0.1816 0.9739
+vn 0.1363 0.1816 0.9739
+vn -0.1363 0.1816 0.9739
+vn -0.1363 -0.1816 0.9739
+vn -0.9739 -0.1816 0.1362
+vn -0.9739 0.1816 0.1362
+vn -0.9739 0.1816 -0.1362
+vn -0.9739 -0.1816 -0.1362
+vn -0.0000 1.0000 -0.0000
+vn -0.1363 0.1816 -0.9739
+vn -0.1363 -0.1816 -0.9739
+vn -0.4948 -0.2052 -0.8444
+vn -0.4948 0.2052 -0.8444
+vn 0.9739 0.1816 0.1363
+vn 0.9739 -0.1816 0.1363
+vn 0.4948 0.2052 0.8444
+vn 0.4948 -0.2052 0.8444
+vn 0.8444 -0.2052 0.4948
+vn 0.8444 0.2052 0.4948
+vn -0.8444 -0.2052 0.4948
+vn -0.8444 0.2052 0.4948
+vn -0.4948 0.2052 0.8444
+vn -0.4948 -0.2052 0.8444
+vn -0.8444 -0.2052 -0.4948
+vn -0.8444 0.2052 -0.4948
+vn 0.4948 -0.2052 -0.8444
+vn 0.4948 0.2052 -0.8444
+vn 0.1363 0.1816 -0.9739
+vn 0.1363 -0.1816 -0.9739
+vn -0.0965 -0.9775 0.1874
+vn -0.0187 -0.9731 0.2298
+vn -0.0935 -0.6743 0.7325
+vn -0.3529 -0.6994 0.6215
+vn -0.0187 0.9731 0.2298
+vn -0.0965 0.9775 0.1874
+vn -0.3529 0.6994 0.6215
+vn -0.0935 0.6743 0.7325
+vn -0.1874 -0.9775 0.0965
+vn -0.6215 -0.6994 0.3529
+vn -0.1874 0.9775 0.0965
+vn -0.6215 0.6994 0.3529
+vn -0.2298 -0.9731 0.0187
+vn -0.7325 -0.6743 0.0935
+vn -0.2298 0.9731 0.0187
+vn -0.7325 0.6743 0.0935
+vn -0.0965 0.9775 -0.1874
+vn -0.0187 0.9731 -0.2298
+vn -0.0935 0.6743 -0.7325
+vn -0.3529 0.6994 -0.6215
+vn -0.0187 -0.9731 -0.2298
+vn -0.0965 -0.9775 -0.1874
+vn -0.3529 -0.6994 -0.6215
+vn -0.0935 -0.6743 -0.7325
+vn -0.1874 0.9775 -0.0965
+vn -0.6215 0.6994 -0.3529
+vn -0.1874 -0.9775 -0.0965
+vn -0.6215 -0.6994 -0.3529
+vn -0.2298 0.9731 -0.0187
+vn -0.7325 0.6743 -0.0935
+vn -0.2298 -0.9731 -0.0187
+vn -0.7325 -0.6743 -0.0935
+vn 0.1874 0.9775 -0.0965
+vn 0.2298 0.9731 -0.0187
+vn 0.7325 0.6743 -0.0935
+vn 0.6215 0.6994 -0.3529
+vn 0.2298 -0.9731 -0.0187
+vn 0.1874 -0.9775 -0.0965
+vn 0.6215 -0.6994 -0.3529
+vn 0.7325 -0.6743 -0.0935
+vn 0.0965 0.9775 -0.1874
+vn 0.3529 0.6994 -0.6215
+vn 0.0965 -0.9775 -0.1874
+vn 0.3529 -0.6994 -0.6215
+vn 0.0187 0.9731 -0.2298
+vn 0.0935 0.6743 -0.7325
+vn 0.0187 -0.9731 -0.2298
+vn 0.0935 -0.6743 -0.7325
+vn 0.0965 0.9775 0.1874
+vn 0.0187 0.9731 0.2298
+vn 0.0935 0.6743 0.7325
+vn 0.3529 0.6994 0.6215
+vn 0.0187 -0.9731 0.2298
+vn 0.0965 -0.9775 0.1874
+vn 0.3529 -0.6994 0.6215
+vn 0.0935 -0.6743 0.7325
+vn 0.1874 0.9775 0.0965
+vn 0.6215 0.6994 0.3529
+vn 0.1874 -0.9775 0.0965
+vn 0.6215 -0.6994 0.3529
+vn 0.2298 0.9731 0.0187
+vn 0.7325 0.6743 0.0935
+vn 0.2298 -0.9731 0.0187
+vn 0.7325 -0.6743 0.0935
+usemtl toybox.001
+s 1
+f 55/1/1 3/2/1 1/3/1
+f 79/4/1 51/5/1 1/3/1
+f 88/6/2 78/7/3 85/8/4 93/9/5
+f 7/10/1 27/11/1 1/3/1
+f 33/12/1 36/13/1 1/3/1
+f 54/14/6 64/15/7 18/16/8 4/17/9
+f 10/18/1 13/19/1 1/3/1
+f 6/20/10 16/21/11 42/22/12 28/23/13
+f 31/24/1 75/25/1 1/3/1
+f 39/26/14 46/27/14 2/28/14
+f 40/29/15 30/30/16 37/31/17 45/32/18
+f 84/33/1 79/4/1 1/3/1
+f 87/34/14 94/35/14 2/28/14
+f 75/25/1 81/36/1 1/3/1
+f 60/37/1 55/1/1 1/3/1
+f 63/38/14 70/39/14 2/28/14
+f 78/7/3 88/6/2 66/40/19 52/41/20
+f 69/42/21 61/43/22 58/44/23 72/45/24
+f 13/19/1 7/10/1 1/3/1
+f 12/46/25 22/47/26 16/21/11 6/20/10
+f 81/36/1 84/33/1 1/3/1
+f 4/17/9 18/16/8 25/48/27 9/49/28
+f 45/50/18 37/51/17 34/52/29 48/53/30
+f 72/45/24 58/44/23 52/41/20 66/40/19
+f 57/54/1 60/37/1 1/3/1
+f 93/9/5 85/8/4 82/55/31 96/56/32
+f 9/49/28 25/48/27 22/47/26 12/46/25
+f 48/53/30 34/52/29 28/23/13 42/22/12
+f 36/13/1 31/24/1 1/3/1
+f 15/57/14 21/58/14 2/28/14
+f 30/30/16 40/29/15 90/59/33 76/60/34
+f 64/15/7 54/14/6 61/43/22 69/42/21
+f 97/61/14 91/62/14 2/28/14
+f 51/5/1 57/54/1 1/3/1
+f 24/63/14 19/64/14 2/28/14
+f 67/65/14 87/34/14 2/28/14
+f 73/66/14 67/65/14 2/28/14
+f 21/58/14 24/63/14 2/28/14
+f 43/67/14 15/57/14 2/28/14
+f 70/39/14 73/66/14 2/28/14
+f 19/64/14 63/38/14 2/28/14
+f 46/27/14 49/68/14 2/28/14
+f 49/68/14 43/67/14 2/28/14
+f 91/62/14 39/26/14 2/28/14
+f 96/56/32 82/55/31 76/60/34 90/59/33
+f 94/35/14 97/61/14 2/28/14
+f 27/11/1 33/12/1 1/3/1
+f 10/18/35 3/2/36 5/69/37 11/70/38
+f 11/70/38 5/69/37 4/71/9 9/72/28
+f 19/64/39 24/63/40 26/73/41 20/74/42
+f 20/74/42 26/73/41 25/75/27 18/76/8
+f 13/19/43 10/18/35 11/70/38 14/77/44
+f 14/77/44 11/70/38 9/72/28 12/78/25
+f 24/63/40 21/58/45 23/79/46 26/73/41
+f 26/73/41 23/79/46 22/80/26 25/75/27
+f 7/10/47 13/19/43 14/77/44 8/81/48
+f 8/81/48 14/77/44 12/78/25 6/82/10
+f 21/58/45 15/57/49 17/83/50 23/79/46
+f 23/79/46 17/83/50 16/84/11 22/80/26
+f 46/27/51 39/26/52 41/85/53 47/86/54
+f 47/86/54 41/85/53 40/87/15 45/88/18
+f 31/24/55 36/13/56 38/89/57 32/90/58
+f 32/90/58 38/89/57 37/91/17 30/92/16
+f 49/68/59 46/27/51 47/86/54 50/93/60
+f 50/93/60 47/86/54 45/88/18 48/94/30
+f 36/13/56 33/12/61 35/95/62 38/89/57
+f 38/89/57 35/95/62 34/96/29 37/91/17
+f 43/67/63 49/68/59 50/93/60 44/97/64
+f 44/97/64 50/93/60 48/94/30 42/98/12
+f 33/12/61 27/11/65 29/99/66 35/95/62
+f 35/95/62 29/99/66 28/100/13 34/96/29
+f 94/35/67 87/34/68 89/101/69 95/102/70
+f 95/102/70 89/101/69 88/103/2 93/104/5
+f 79/4/71 84/33/72 86/105/73 80/106/74
+f 80/106/74 86/105/73 85/107/4 78/108/3
+f 97/61/75 94/35/67 95/102/70 98/109/76
+f 98/109/76 95/102/70 93/104/5 96/110/32
+f 84/33/72 81/36/77 83/111/78 86/105/73
+f 86/105/73 83/111/78 82/112/31 85/107/4
+f 91/62/79 97/61/75 98/109/76 92/113/80
+f 92/113/80 98/109/76 96/110/32 90/114/33
+f 81/36/77 75/25/81 77/115/82 83/111/78
+f 83/111/78 77/115/82 76/116/34 82/112/31
+f 70/39/83 63/38/84 65/117/85 71/118/86
+f 71/118/86 65/117/85 64/119/7 69/120/21
+f 55/1/87 60/37/88 62/121/89 56/122/90
+f 56/122/90 62/121/89 61/123/22 54/124/6
+f 73/66/91 70/39/83 71/118/86 74/125/92
+f 74/125/92 71/118/86 69/120/21 72/126/24
+f 60/37/88 57/54/93 59/127/94 62/121/89
+f 62/121/89 59/127/94 58/128/23 61/123/22
+f 67/65/95 73/66/91 74/125/92 68/129/96
+f 68/129/96 74/125/92 72/126/24 66/130/19
+f 57/54/93 51/5/97 53/131/98 59/127/94
+f 59/127/94 53/131/98 52/132/20 58/128/23
+f 6/82/10 28/100/13 29/99/66 8/81/48
+f 8/81/48 29/99/66 27/11/65 7/10/47
+f 30/92/16 76/116/34 77/115/82 32/90/58
+f 32/90/58 77/115/82 75/25/81 31/24/55
+f 78/108/3 52/132/20 53/131/98 80/106/74
+f 80/106/74 53/131/98 51/5/97 79/4/71
+f 54/124/6 4/71/9 5/69/37 56/122/90
+f 56/122/90 5/69/37 3/2/36 55/1/87
+f 15/57/49 43/67/63 44/97/64 17/83/50
+f 17/83/50 44/97/64 42/98/12 16/84/11
+f 63/38/84 19/64/39 20/74/42 65/117/85
+f 65/117/85 20/74/42 18/76/8 64/119/7
+f 66/130/19 88/103/2 89/101/69 68/129/96
+f 68/129/96 89/101/69 87/34/68 67/65/95
+f 90/114/33 40/87/15 41/85/53 92/113/80
+f 92/113/80 41/85/53 39/26/52 91/62/79
+f 3/2/1 10/18/1 1/3/1
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/green_star.urdf b/diffusion_policy/env/block_pushing/assets/blocks/green_star.urdf
new file mode 100644
index 0000000..d1c303a
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/green_star.urdf
@@ -0,0 +1,30 @@
+<?xml version="1.0" ?>
+<robot name="green_star.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="0.5"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="1.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="star.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+       <material name="green">
+        <color rgba="0.4 1.0 0.4 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="star.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/moon.obj b/diffusion_policy/env/block_pushing/assets/blocks/moon.obj
new file mode 100644
index 0000000..2a862c6
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/moon.obj
@@ -0,0 +1,446 @@
+# Blender v2.92.0 OBJ File: ''
+# www.blender.org
+mtllib block.mtl
+o moon_red_block_Cylinder.012
+v -0.009181 0.000000 -0.006901
+v -0.012205 0.002032 -0.009549
+v -0.011319 0.000595 -0.008773
+v -0.021856 0.000000 -0.011560
+v -0.021140 0.002032 -0.012614
+v -0.021633 0.000595 -0.012305
+v -0.021140 0.036068 -0.012614
+v -0.021856 0.038100 -0.011560
+v -0.021633 0.037505 -0.012305
+v -0.014039 0.002032 0.009151
+v -0.012620 0.000000 0.008341
+v -0.013624 0.000595 0.008914
+v -0.012620 0.038100 0.008341
+v -0.014039 0.036068 0.009151
+v -0.013624 0.037505 0.008914
+v -0.024317 0.002032 -0.000986
+v -0.021966 0.000000 -0.000780
+v -0.023628 0.000595 -0.000925
+v -0.012205 0.036068 -0.009549
+v -0.009181 0.038100 -0.006901
+v -0.011319 0.037505 -0.008773
+v -0.021966 0.038100 -0.000780
+v -0.024317 0.036068 -0.000986
+v -0.023628 0.037505 -0.000925
+v -0.023768 0.000000 -0.007207
+v -0.026243 0.002032 -0.008076
+v -0.025518 0.000595 -0.007821
+v -0.023815 0.000000 -0.010019
+v -0.026267 0.002032 -0.011168
+v -0.025549 0.000595 -0.010832
+v -0.024498 0.002032 -0.012861
+v -0.023166 0.000000 -0.011548
+v -0.023825 0.000595 -0.012476
+v -0.026243 0.036068 -0.008076
+v -0.023768 0.038100 -0.007207
+v -0.025518 0.037505 -0.007821
+v -0.023166 0.038100 -0.011548
+v -0.024498 0.036068 -0.012861
+v -0.023825 0.037505 -0.012476
+v -0.026267 0.036068 -0.011168
+v -0.023815 0.038100 -0.010019
+v -0.025549 0.037505 -0.010832
+v -0.000000 0.038100 0.001138
+v -0.000000 0.000000 0.001138
+v -0.000000 0.000000 -0.006070
+v -0.000000 0.002032 -0.008427
+v -0.000000 0.000595 -0.007737
+v -0.000000 0.036068 -0.008427
+v -0.000000 0.038100 -0.006070
+v -0.000000 0.037505 -0.007737
+v 0.009181 0.000000 -0.006901
+v 0.012205 0.002032 -0.009549
+v 0.011319 0.000595 -0.008773
+v 0.021856 0.000000 -0.011560
+v 0.021140 0.002032 -0.012614
+v 0.021633 0.000595 -0.012305
+v 0.021140 0.036068 -0.012614
+v 0.021856 0.038100 -0.011560
+v 0.021633 0.037505 -0.012305
+v -0.000000 0.002032 0.012861
+v -0.000000 0.000000 0.011641
+v -0.000000 0.000595 0.012504
+v -0.000000 0.038100 0.011640
+v -0.000000 0.036068 0.012861
+v -0.000000 0.037505 0.012503
+v 0.014039 0.002032 0.009151
+v 0.012620 0.000000 0.008341
+v 0.013624 0.000595 0.008914
+v 0.012620 0.038100 0.008341
+v 0.014039 0.036068 0.009151
+v 0.013624 0.037505 0.008914
+v 0.024317 0.002032 -0.000986
+v 0.021966 0.000000 -0.000780
+v 0.023628 0.000595 -0.000925
+v 0.012205 0.036068 -0.009549
+v 0.009181 0.038100 -0.006901
+v 0.011319 0.037505 -0.008773
+v 0.021966 0.038100 -0.000780
+v 0.024317 0.036068 -0.000986
+v 0.023628 0.037505 -0.000925
+v 0.023768 0.000000 -0.007207
+v 0.026243 0.002032 -0.008076
+v 0.025518 0.000595 -0.007821
+v 0.023815 0.000000 -0.010019
+v 0.026267 0.002032 -0.011168
+v 0.025549 0.000595 -0.010832
+v 0.024498 0.002032 -0.012861
+v 0.023166 0.000000 -0.011548
+v 0.023825 0.000595 -0.012476
+v 0.026243 0.036068 -0.008076
+v 0.023768 0.038100 -0.007207
+v 0.025518 0.037505 -0.007821
+v 0.023166 0.038100 -0.011548
+v 0.024498 0.036068 -0.012861
+v 0.023825 0.037505 -0.012476
+v 0.026267 0.036068 -0.011168
+v 0.023815 0.038100 -0.010019
+v 0.025549 0.037505 -0.010832
+vt 0.710579 0.167405
+vt 0.710864 0.161788
+vt 0.714864 0.161788
+vt 0.686288 0.163021
+vt 0.706306 0.163021
+vt 0.706306 0.165002
+vt 0.686288 0.165002
+vt 0.708556 0.175784
+vt 0.708475 0.175088
+vt 0.721180 0.161788
+vt 0.719512 0.169043
+vt 0.714650 0.174731
+vt 0.711007 0.176048
+vt 0.720186 0.145021
+vt 0.718589 0.144931
+vt 0.714732 0.130671
+vt 0.709411 0.176139
+vt 0.721121 0.143971
+vt 0.721041 0.144667
+vt 0.686288 0.138411
+vt 0.706306 0.138411
+vt 0.706306 0.146951
+vt 0.686288 0.146951
+vt 0.706306 0.155441
+vt 0.686288 0.155441
+vt 0.706306 0.159762
+vt 0.686288 0.159762
+vt 0.714946 0.143614
+vt 0.686288 0.177766
+vt 0.686288 0.170557
+vt 0.706306 0.170557
+vt 0.706306 0.177766
+vt 0.686288 0.161581
+vt 0.706306 0.161581
+vt 0.718732 0.130671
+vt 0.719018 0.136287
+vt 0.710084 0.137925
+vt 0.708416 0.130671
+vt 0.709447 0.168778
+vt 0.709791 0.161788
+vt 0.708408 0.169256
+vt 0.708838 0.161788
+vt 0.707932 0.175020
+vt 0.707041 0.174762
+vt 0.722722 0.161788
+vt 0.720733 0.170191
+vt 0.719979 0.169645
+vt 0.721789 0.161788
+vt 0.708863 0.139073
+vt 0.706874 0.130671
+vt 0.707807 0.130671
+vt 0.709618 0.138527
+vt 0.715051 0.176717
+vt 0.714702 0.175778
+vt 0.719806 0.130670
+vt 0.720149 0.137661
+vt 0.720758 0.130670
+vt 0.721188 0.138138
+vt 0.721665 0.143903
+vt 0.722555 0.143645
+vt 0.714546 0.145599
+vt 0.714894 0.144661
+vt 0.721564 0.145099
+vt 0.720661 0.146046
+vt 0.722312 0.145803
+vt 0.721098 0.146965
+vt 0.708935 0.177163
+vt 0.708032 0.176217
+vt 0.708498 0.178082
+vt 0.707284 0.176921
+vt 0.718902 0.146070
+vt 0.719002 0.147136
+vt 0.710695 0.177187
+vt 0.710594 0.178254
+vt 0.710579 0.156171
+vt 0.686288 0.192510
+vt 0.686288 0.190530
+vt 0.706306 0.190530
+vt 0.706306 0.192510
+vt 0.708556 0.147792
+vt 0.708475 0.148488
+vt 0.719512 0.154534
+vt 0.714650 0.148845
+vt 0.711007 0.147528
+vt 0.720186 0.116320
+vt 0.718589 0.116410
+vt 0.709411 0.147437
+vt 0.721121 0.117370
+vt 0.721041 0.116674
+vt 0.686288 0.129870
+vt 0.706306 0.129870
+vt 0.686288 0.121380
+vt 0.706306 0.121380
+vt 0.686288 0.117059
+vt 0.706306 0.117059
+vt 0.714946 0.117727
+vt 0.706306 0.184974
+vt 0.686288 0.184974
+vt 0.706306 0.193950
+vt 0.686288 0.193950
+vt 0.719018 0.125054
+vt 0.710084 0.123416
+vt 0.706306 0.115240
+vt 0.686288 0.115240
+vt 0.709447 0.154798
+vt 0.708408 0.154321
+vt 0.707932 0.148556
+vt 0.707041 0.148814
+vt 0.719979 0.153931
+vt 0.720733 0.153385
+vt 0.708863 0.122268
+vt 0.709618 0.122814
+vt 0.714702 0.147798
+vt 0.715051 0.146860
+vt 0.720149 0.123680
+vt 0.721188 0.123203
+vt 0.721665 0.117438
+vt 0.722555 0.117696
+vt 0.714546 0.115742
+vt 0.714894 0.116680
+vt 0.720661 0.115295
+vt 0.721564 0.116241
+vt 0.721098 0.114376
+vt 0.722312 0.115538
+vt 0.708032 0.147359
+vt 0.708935 0.146413
+vt 0.707284 0.146655
+vt 0.708498 0.145494
+vt 0.718901 0.115271
+vt 0.719002 0.114205
+vt 0.710695 0.146389
+vt 0.710595 0.145323
+vn 0.0000 -1.0000 -0.0000
+vn -0.3609 -0.1941 -0.9122
+vn -0.3609 0.1941 -0.9122
+vn 0.2092 0.1359 -0.9684
+vn 0.2092 -0.1359 -0.9684
+vn 0.0000 1.0000 -0.0000
+vn -0.0000 -0.1176 0.9931
+vn -0.0000 0.1176 0.9931
+vn -0.5010 0.1369 0.8546
+vn -0.5010 -0.1369 0.8546
+vn -0.8572 0.1847 0.4806
+vn -0.8572 -0.1847 0.4806
+vn -0.9678 0.2183 0.1257
+vn -0.9678 -0.2183 0.1257
+vn 0.0000 -0.2255 -0.9742
+vn 0.2279 -0.1771 -0.9575
+vn 0.2279 0.1771 -0.9575
+vn 0.0000 0.2255 -0.9742
+vn -0.8945 -0.2361 -0.3797
+vn -0.8945 0.2361 -0.3797
+vn 0.0000 -0.9870 -0.1606
+vn 0.0247 -0.9852 -0.1694
+vn 0.1624 -0.6740 -0.7206
+vn 0.0000 -0.7551 -0.6556
+vn 0.0583 -0.9460 -0.3190
+vn 0.1517 -0.5680 -0.8089
+vn -0.4024 -0.5836 0.7053
+vn 0.0000 -0.5393 0.8421
+vn -0.1318 -0.9470 0.2929
+vn 0.0000 -0.9390 0.3440
+vn 0.0000 0.5393 0.8421
+vn -0.4024 0.5836 0.7053
+vn 0.0000 0.9390 0.3440
+vn -0.1318 0.9470 0.2929
+vn -0.6387 -0.6655 0.3863
+vn -0.2030 -0.9669 0.1548
+vn 0.0247 0.9852 -0.1694
+vn -0.0000 0.9870 -0.1606
+vn 0.0000 0.7551 -0.6556
+vn 0.1624 0.6740 -0.7206
+vn 0.0583 0.9460 -0.3190
+vn 0.1517 0.5680 -0.8089
+vn -0.6387 0.6655 0.3863
+vn -0.2030 0.9669 0.1548
+vn -0.1804 0.9821 -0.0543
+vn -0.0951 0.9531 -0.2874
+vn -0.2234 0.6362 -0.7384
+vn -0.6431 0.7203 -0.2600
+vn -0.0951 -0.9531 -0.2874
+vn -0.1804 -0.9821 -0.0543
+vn -0.6431 -0.7203 -0.2600
+vn -0.2234 -0.6362 -0.7384
+vn -0.1858 0.9820 0.0343
+vn -0.6801 0.7265 0.0987
+vn -0.1858 -0.9820 0.0343
+vn -0.6801 -0.7265 0.0987
+vn 0.3609 -0.1941 -0.9122
+vn -0.2092 -0.1359 -0.9684
+vn -0.2092 0.1359 -0.9684
+vn 0.3609 0.1941 -0.9122
+vn 0.5010 -0.1369 0.8546
+vn 0.5010 0.1369 0.8546
+vn 0.8572 -0.1847 0.4806
+vn 0.8572 0.1847 0.4806
+vn 0.9678 -0.2183 0.1257
+vn 0.9678 0.2183 0.1257
+vn -0.2279 0.1771 -0.9575
+vn -0.2279 -0.1771 -0.9575
+vn 0.8945 0.2361 -0.3797
+vn 0.8945 -0.2361 -0.3797
+vn -0.1624 -0.6740 -0.7206
+vn -0.0247 -0.9852 -0.1694
+vn -0.1517 -0.5680 -0.8089
+vn -0.0583 -0.9460 -0.3190
+vn 0.4024 -0.5836 0.7053
+vn 0.1318 -0.9470 0.2929
+vn 0.4024 0.5836 0.7053
+vn 0.1318 0.9470 0.2929
+vn 0.6387 -0.6655 0.3863
+vn 0.2030 -0.9669 0.1548
+vn -0.0247 0.9852 -0.1694
+vn -0.1624 0.6740 -0.7206
+vn -0.0583 0.9460 -0.3190
+vn -0.1517 0.5680 -0.8089
+vn 0.6387 0.6655 0.3863
+vn 0.2030 0.9669 0.1548
+vn 0.1804 0.9821 -0.0543
+vn 0.6431 0.7203 -0.2600
+vn 0.2234 0.6362 -0.7384
+vn 0.0951 0.9531 -0.2874
+vn 0.0951 -0.9531 -0.2874
+vn 0.2234 -0.6362 -0.7384
+vn 0.6431 -0.7203 -0.2600
+vn 0.1804 -0.9821 -0.0543
+vn 0.1858 0.9820 0.0343
+vn 0.6801 0.7265 0.0987
+vn 0.6801 -0.7265 0.0987
+vn 0.1858 -0.9820 0.0343
+usemtl toybox
+s 1
+f 1/1/1 45/2/1 44/3/1
+f 31/4/2 38/5/3 7/6/4 5/7/5
+f 32/8/1 4/9/1 44/3/1
+f 61/10/1 11/11/1 44/3/1
+f 11/11/1 17/12/1 44/3/1
+f 17/12/1 25/13/1 44/3/1
+f 41/14/6 35/15/6 43/16/6
+f 28/17/1 32/8/1 44/3/1
+f 8/18/6 37/19/6 43/16/6
+f 37/19/6 41/14/6 43/16/6
+f 25/13/1 28/17/1 44/3/1
+f 60/20/7 64/21/8 14/22/9 10/23/10
+f 10/23/10 14/22/9 23/24/11 16/25/12
+f 16/25/12 23/24/11 34/26/13 26/27/14
+f 35/15/6 22/28/6 43/16/6
+f 4/9/1 1/1/1 44/3/1
+f 46/29/15 2/30/16 19/31/17 48/32/18
+f 38/5/3 31/4/2 29/33/19 40/34/20
+f 49/35/6 20/36/6 43/16/6
+f 13/37/6 63/38/6 43/16/6
+f 2/30/16 5/7/5 7/6/4 19/31/17
+f 40/34/20 29/33/19 26/27/14 34/26/13
+f 20/36/6 8/18/6 43/16/6
+f 22/28/6 13/37/6 43/16/6
+f 45/2/21 1/1/22 3/39/23 47/40/24
+f 47/40/24 3/39/23 2/41/16 46/42/15
+f 1/1/22 4/9/25 6/43/26 3/39/23
+f 3/39/23 6/43/26 5/44/5 2/41/16
+f 60/45/7 10/46/10 12/47/27 62/48/28
+f 62/48/28 12/47/27 11/11/29 61/10/30
+f 14/49/9 64/50/8 65/51/31 15/52/32
+f 15/52/32 65/51/31 63/38/33 13/37/34
+f 10/46/10 16/53/12 18/54/35 12/47/27
+f 12/47/27 18/54/35 17/12/36 11/11/29
+f 20/36/37 49/35/38 50/55/39 21/56/40
+f 21/56/40 50/55/39 48/57/18 19/58/17
+f 8/18/41 20/36/37 21/56/40 9/59/42
+f 9/59/42 21/56/40 19/58/17 7/60/4
+f 23/61/11 14/49/9 15/52/32 24/62/43
+f 24/62/43 15/52/32 13/37/34 22/28/44
+f 41/14/45 37/19/46 39/63/47 42/64/48
+f 42/64/48 39/63/47 38/65/3 40/66/20
+f 32/8/49 28/17/50 30/67/51 33/68/52
+f 33/68/52 30/67/51 29/69/19 31/70/2
+f 35/15/53 41/14/45 42/64/48 36/71/54
+f 36/71/54 42/64/48 40/66/20 34/72/13
+f 28/17/50 25/13/55 27/73/56 30/67/51
+f 30/67/51 27/73/56 26/74/14 29/69/19
+f 25/13/55 17/12/36 18/54/35 27/73/56
+f 27/73/56 18/54/35 16/53/12 26/74/14
+f 22/28/44 35/15/53 36/71/54 24/62/43
+f 24/62/43 36/71/54 34/72/13 23/61/11
+f 4/9/25 32/8/49 33/68/52 6/43/26
+f 6/43/26 33/68/52 31/70/2 5/44/5
+f 37/19/46 8/18/41 9/59/42 39/63/47
+f 39/63/47 9/59/42 7/60/4 38/65/3
+f 51/75/1 44/3/1 45/2/1
+f 87/76/57 55/77/58 57/78/59 94/79/60
+f 88/80/1 44/3/1 54/81/1
+f 61/10/1 44/3/1 67/82/1
+f 67/82/1 44/3/1 73/83/1
+f 73/83/1 44/3/1 81/84/1
+f 97/85/6 43/16/6 91/86/6
+f 84/87/1 44/3/1 88/80/1
+f 58/88/6 43/16/6 93/89/6
+f 93/89/6 43/16/6 97/85/6
+f 81/84/1 44/3/1 84/87/1
+f 60/20/7 66/90/61 70/91/62 64/21/8
+f 66/90/61 72/92/63 79/93/64 70/91/62
+f 72/92/63 82/94/65 90/95/66 79/93/64
+f 91/86/6 43/16/6 78/96/6
+f 54/81/1 44/3/1 51/75/1
+f 46/29/15 48/32/18 75/97/67 52/98/68
+f 94/79/60 96/99/69 85/100/70 87/76/57
+f 49/35/6 43/16/6 76/101/6
+f 69/102/6 43/16/6 63/38/6
+f 52/98/68 75/97/67 57/78/59 55/77/58
+f 96/103/69 90/95/66 82/94/65 85/104/70
+f 76/101/6 43/16/6 58/88/6
+f 78/96/6 43/16/6 69/102/6
+f 45/2/21 47/40/24 53/105/71 51/75/72
+f 47/40/24 46/42/15 52/106/68 53/105/71
+f 51/75/72 53/105/71 56/107/73 54/81/74
+f 53/105/71 52/106/68 55/108/58 56/107/73
+f 60/45/7 62/48/28 68/109/75 66/110/61
+f 62/48/28 61/10/30 67/82/76 68/109/75
+f 70/111/62 71/112/77 65/51/31 64/50/8
+f 71/112/77 69/102/78 63/38/33 65/51/31
+f 66/110/61 68/109/75 74/113/79 72/114/63
+f 68/109/75 67/82/76 73/83/80 74/113/79
+f 76/101/81 77/115/82 50/55/39 49/35/38
+f 77/115/82 75/116/67 48/57/18 50/55/39
+f 58/88/83 59/117/84 77/115/82 76/101/81
+f 59/117/84 57/118/59 75/116/67 77/115/82
+f 79/119/64 80/120/85 71/112/77 70/111/62
+f 80/120/85 78/96/86 69/102/78 71/112/77
+f 97/85/87 98/121/88 95/122/89 93/89/90
+f 98/121/88 96/123/69 94/124/60 95/122/89
+f 88/80/91 89/125/92 86/126/93 84/87/94
+f 89/125/92 87/127/57 85/128/70 86/126/93
+f 91/86/95 92/129/96 98/121/88 97/85/87
+f 92/129/96 90/130/66 96/123/69 98/121/88
+f 84/87/94 86/126/93 83/131/97 81/84/98
+f 86/126/93 85/128/70 82/132/65 83/131/97
+f 81/84/98 83/131/97 74/113/79 73/83/80
+f 83/131/97 82/132/65 72/114/63 74/113/79
+f 78/96/86 80/120/85 92/129/96 91/86/95
+f 80/120/85 79/119/64 90/130/66 92/129/96
+f 54/81/74 56/107/73 89/125/92 88/80/91
+f 56/107/73 55/108/58 87/127/57 89/125/92
+f 93/89/90 95/122/89 59/117/84 58/88/83
+f 95/122/89 94/124/60 57/118/59 59/117/84
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/pentagon.obj b/diffusion_policy/env/block_pushing/assets/blocks/pentagon.obj
new file mode 100644
index 0000000..7054e41
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/pentagon.obj
@@ -0,0 +1,419 @@
+# Blender v2.92.0 OBJ File: ''
+# www.blender.org
+mtllib pentagon.mtl
+o pentagon_yellow_block_Cube.003
+v -0.000000 0.000000 -0.001329
+v -0.000000 0.038100 -0.001329
+v -0.010789 0.000000 0.015873
+v -0.011940 0.002032 0.017708
+v -0.011603 0.000595 0.017170
+v -0.014201 0.002032 0.016092
+v -0.012833 0.000000 0.014413
+v -0.013800 0.000595 0.015600
+v -0.013310 0.002032 0.017235
+v -0.012029 0.000000 0.015448
+v -0.012935 0.000595 0.016711
+v -0.012833 0.038100 0.014413
+v -0.014201 0.036068 0.016092
+v -0.013800 0.037505 0.015600
+v -0.011940 0.036068 0.017708
+v -0.010789 0.038100 0.015873
+v -0.011603 0.037505 0.017170
+v -0.012029 0.038100 0.015448
+v -0.013310 0.036068 0.017235
+v -0.012935 0.037505 0.016711
+v -0.020074 0.000000 -0.006755
+v -0.022213 0.002032 -0.007333
+v -0.021586 0.000595 -0.007164
+v -0.021345 0.002032 -0.009898
+v -0.019288 0.000000 -0.009072
+v -0.020743 0.000595 -0.009656
+v -0.022188 0.002032 -0.008754
+v -0.020054 0.000000 -0.008040
+v -0.021563 0.000595 -0.008545
+v -0.019288 0.038100 -0.009072
+v -0.021345 0.036068 -0.009898
+v -0.020743 0.037505 -0.009656
+v -0.022213 0.036068 -0.007333
+v -0.020074 0.038100 -0.006755
+v -0.021586 0.037505 -0.007164
+v -0.022188 0.036068 -0.008754
+v -0.020054 0.038100 -0.008040
+v -0.021563 0.037505 -0.008545
+v 0.012833 0.000000 0.014413
+v 0.014201 0.002032 0.016092
+v 0.013800 0.000595 0.015600
+v 0.011940 0.002032 0.017708
+v 0.010789 0.000000 0.015873
+v 0.011603 0.000595 0.017170
+v 0.012029 0.000000 0.015448
+v 0.013310 0.002032 0.017235
+v 0.012935 0.000595 0.016711
+v 0.010789 0.038100 0.015873
+v 0.011940 0.036068 0.017708
+v 0.011603 0.037505 0.017170
+v 0.014201 0.036068 0.016092
+v 0.012833 0.038100 0.014413
+v 0.013800 0.037505 0.015600
+v 0.013310 0.036068 0.017235
+v 0.012029 0.038100 0.015448
+v 0.012935 0.037505 0.016711
+v 0.019288 0.000000 -0.009072
+v 0.021345 0.002032 -0.009898
+v 0.020743 0.000595 -0.009656
+v 0.022213 0.002032 -0.007333
+v 0.020074 0.000000 -0.006755
+v 0.021586 0.000595 -0.007164
+v 0.022188 0.002032 -0.008754
+v 0.020054 0.000000 -0.008040
+v 0.021563 0.000595 -0.008545
+v 0.020074 0.038100 -0.006755
+v 0.022213 0.036068 -0.007333
+v 0.021586 0.037505 -0.007164
+v 0.021345 0.036068 -0.009898
+v 0.019288 0.038100 -0.009072
+v 0.020743 0.037505 -0.009656
+v 0.022188 0.036068 -0.008754
+v 0.020054 0.038100 -0.008040
+v 0.021563 0.037505 -0.008545
+v 0.001283 0.038100 -0.021099
+v 0.001420 0.036068 -0.023214
+v 0.001380 0.037505 -0.022595
+v -0.001420 0.036068 -0.023214
+v -0.001283 0.038100 -0.021099
+v -0.001380 0.037505 -0.022595
+v 0.000000 0.036068 -0.023607
+v -0.000000 0.038100 -0.021456
+v -0.000000 0.037505 -0.022977
+v -0.001283 0.000000 -0.021099
+v -0.001420 0.002032 -0.023214
+v -0.001380 0.000595 -0.022595
+v 0.001420 0.002032 -0.023214
+v 0.001283 0.000000 -0.021099
+v 0.001380 0.000595 -0.022595
+v -0.000000 0.000000 -0.021456
+v 0.000000 0.002032 -0.023607
+v -0.000000 0.000595 -0.022977
+vt 0.718822 0.029303
+vt 0.730852 0.033346
+vt 0.721614 0.040891
+vt 0.709480 0.040139
+vt 0.717419 0.029729
+vt 0.717181 0.051685
+vt 0.709481 0.041565
+vt 0.686374 0.033371
+vt 0.706477 0.033371
+vt 0.706477 0.034211
+vt 0.686374 0.034211
+vt 0.731711 0.034535
+vt 0.731731 0.047644
+vt 0.732155 0.013538
+vt 0.732386 0.014251
+vt 0.720023 0.014212
+vt 0.724218 0.025374
+vt 0.723581 0.025796
+vt 0.731459 0.033813
+vt 0.686374 0.065488
+vt 0.706477 0.065488
+vt 0.706477 0.066344
+vt 0.686374 0.066344
+vt 0.730865 0.048779
+vt 0.718604 0.052165
+vt 0.717826 0.052123
+vt 0.710784 0.021757
+vt 0.710178 0.021290
+vt 0.706477 0.050528
+vt 0.686374 0.050528
+vt 0.686374 0.049672
+vt 0.706477 0.049672
+vt 0.709250 0.040852
+vt 0.718055 0.029306
+vt 0.686374 0.051384
+vt 0.706477 0.051384
+vt 0.686374 0.035050
+vt 0.706477 0.035050
+vt 0.686374 0.019217
+vt 0.706477 0.019217
+vt 0.731482 0.048351
+vt 0.709906 0.007458
+vt 0.710155 0.006751
+vt 0.686374 0.003322
+vt 0.706477 0.003322
+vt 0.706477 0.017476
+vt 0.686374 0.017476
+vt 0.706477 0.081821
+vt 0.686374 0.081821
+vt 0.686374 0.067199
+vt 0.706477 0.067199
+vt 0.723811 0.002980
+vt 0.724455 0.003418
+vt 0.706477 0.082661
+vt 0.686374 0.082661
+vt 0.686374 0.002483
+vt 0.706477 0.002483
+vt 0.732157 0.014964
+vt 0.723033 0.002938
+vt 0.722814 0.025800
+vt 0.686374 0.018347
+vt 0.706477 0.018347
+vt 0.709926 0.020568
+vt 0.710771 0.006324
+vt 0.733509 0.048629
+vt 0.733072 0.049555
+vt 0.732273 0.048949
+vt 0.732572 0.048190
+vt 0.732296 0.050225
+vt 0.731620 0.049441
+vt 0.734168 0.013254
+vt 0.734379 0.014256
+vt 0.733377 0.014254
+vt 0.733156 0.013468
+vt 0.734166 0.015257
+vt 0.733156 0.015039
+vt 0.707468 0.041849
+vt 0.707257 0.040847
+vt 0.708259 0.040849
+vt 0.708480 0.041634
+vt 0.707471 0.039846
+vt 0.708480 0.040063
+vt 0.708128 0.006474
+vt 0.708564 0.005547
+vt 0.709364 0.006153
+vt 0.709064 0.006913
+vt 0.709341 0.004877
+vt 0.710016 0.005662
+vt 0.731599 0.032714
+vt 0.732251 0.033244
+vt 0.732301 0.031933
+vt 0.733073 0.032652
+vt 0.723869 0.026728
+vt 0.723033 0.026764
+vt 0.724166 0.027687
+vt 0.723129 0.027790
+vt 0.732546 0.034029
+vt 0.733509 0.033612
+vt 0.724576 0.026281
+vt 0.725089 0.027190
+vt 0.710038 0.022388
+vt 0.709386 0.021859
+vt 0.709336 0.023170
+vt 0.708563 0.022451
+vt 0.717767 0.028375
+vt 0.718604 0.028339
+vt 0.717470 0.027416
+vt 0.718508 0.027313
+vt 0.709090 0.021074
+vt 0.708128 0.021490
+vt 0.717060 0.028822
+vt 0.716547 0.027913
+vt 0.723265 0.002005
+vt 0.724118 0.002071
+vt 0.723385 0.000970
+vt 0.724440 0.001117
+vt 0.717519 0.053032
+vt 0.716801 0.052568
+vt 0.717197 0.053986
+vt 0.716269 0.053464
+vt 0.724836 0.002535
+vt 0.725367 0.001639
+vt 0.718372 0.053098
+vt 0.718251 0.054133
+vn 0.0000 -1.0000 0.0000
+vn -0.6839 0.1913 -0.7040
+vn -0.6839 -0.1913 -0.7040
+vn -0.9271 -0.2054 -0.3134
+vn -0.9271 0.2054 -0.3134
+vn 0.0000 1.0000 -0.0000
+vn 0.1696 0.1897 0.9671
+vn 0.1696 -0.1897 0.9671
+vn 0.5696 -0.2019 0.7968
+vn 0.5696 0.2019 0.7968
+vn -0.5696 -0.2019 0.7968
+vn -0.5696 0.2019 0.7968
+vn -0.8602 0.1898 0.4734
+vn -0.8602 -0.1898 0.4734
+vn -0.1696 0.1897 0.9671
+vn -0.1696 -0.1897 0.9671
+vn -0.9708 0.1916 0.1446
+vn -0.9708 -0.1916 0.1446
+vn -0.4034 0.1878 -0.8955
+vn -0.4034 -0.1878 -0.8955
+vn 0.6839 0.1913 -0.7040
+vn 0.6839 -0.1913 -0.7040
+vn 0.4034 -0.1878 -0.8955
+vn 0.4034 0.1878 -0.8955
+vn 0.9708 -0.1916 0.1446
+vn 0.9708 0.1916 0.1446
+vn 0.8602 0.1898 0.4734
+vn 0.8602 -0.1898 0.4734
+vn 0.9271 -0.2054 -0.3134
+vn 0.9271 0.2054 -0.3134
+vn 0.0000 0.1985 -0.9801
+vn 0.0000 -0.1985 -0.9801
+vn -0.6793 -0.6969 -0.2300
+vn -0.7180 -0.6862 0.1166
+vn -0.2003 -0.9774 -0.0681
+vn -0.2163 -0.9753 0.0454
+vn -0.5001 -0.6857 -0.5289
+vn -0.1444 -0.9752 -0.1679
+vn -0.6793 0.6969 -0.2300
+vn -0.5001 0.6857 -0.5289
+vn -0.2003 0.9774 -0.0681
+vn -0.1444 0.9752 -0.1679
+vn -0.7180 0.6862 0.1166
+vn -0.2163 0.9753 0.0454
+vn 0.6793 -0.6969 -0.2300
+vn 0.5001 -0.6857 -0.5289
+vn 0.2003 -0.9774 -0.0681
+vn 0.1444 -0.9752 -0.1679
+vn 0.7180 -0.6862 0.1166
+vn 0.2163 -0.9753 0.0454
+vn 0.6793 0.6969 -0.2300
+vn 0.7180 0.6862 0.1166
+vn 0.2003 0.9774 -0.0681
+vn 0.2163 0.9753 0.0454
+vn 0.5001 0.6857 -0.5289
+vn 0.1444 0.9752 -0.1679
+vn -0.1241 -0.9769 0.1739
+vn -0.0267 -0.9752 0.2199
+vn -0.1178 -0.6841 0.7198
+vn -0.4189 -0.6935 0.5862
+vn -0.0267 0.9752 0.2199
+vn -0.1242 0.9769 0.1739
+vn -0.4189 0.6935 0.5862
+vn -0.1178 0.6841 0.7198
+vn -0.1992 -0.9752 0.0965
+vn -0.6427 -0.6843 0.3444
+vn -0.1992 0.9752 0.0965
+vn -0.6427 0.6843 0.3444
+vn 0.1242 0.9769 0.1739
+vn 0.0267 0.9752 0.2199
+vn 0.1178 0.6841 0.7198
+vn 0.4189 0.6935 0.5862
+vn 0.0267 -0.9752 0.2199
+vn 0.1241 -0.9769 0.1739
+vn 0.4189 -0.6935 0.5862
+vn 0.1178 -0.6841 0.7198
+vn 0.1992 0.9752 0.0965
+vn 0.6427 0.6843 0.3444
+vn 0.1992 -0.9752 0.0965
+vn 0.6427 -0.6843 0.3444
+vn -0.0000 0.9764 -0.2159
+vn 0.1005 0.9750 -0.1981
+vn 0.3067 0.6821 -0.6638
+vn -0.0000 0.6900 -0.7238
+vn 0.1005 -0.9750 -0.1981
+vn 0.0000 -0.9764 -0.2159
+vn 0.0000 -0.6900 -0.7238
+vn 0.3067 -0.6821 -0.6638
+vn -0.1005 0.9750 -0.1981
+vn -0.3067 0.6821 -0.6638
+vn -0.1005 -0.9750 -0.1981
+vn -0.3067 -0.6821 -0.6638
+usemtl toybox
+s 1
+f 43/1/1 3/2/1 1/3/1
+f 61/4/1 39/5/1 1/3/1
+f 88/6/1 57/7/1 1/3/1
+f 31/8/2 24/9/3 27/10/4 36/11/5
+f 7/12/1 21/13/1 1/3/1
+f 30/14/6 37/15/6 2/16/6
+f 12/17/6 18/18/6 2/16/6
+f 10/19/1 7/12/1 1/3/1
+f 49/20/7 42/21/8 46/22/9 54/23/10
+f 25/24/1 84/25/1 1/3/1
+f 90/26/1 88/6/1 1/3/1
+f 3/2/1 10/19/1 1/3/1
+f 48/27/6 55/28/6 2/16/6
+f 9/29/11 19/30/12 13/31/13 6/32/14
+f 64/33/1 61/4/1 1/3/1
+f 39/5/1 45/34/1 1/3/1
+f 42/21/8 49/20/7 15/35/15 4/36/16
+f 6/32/14 13/31/13 33/37/17 22/38/18
+f 24/9/3 31/8/2 78/39/19 85/40/20
+f 21/13/1 28/41/1 1/3/1
+f 66/42/6 73/43/6 2/16/6
+f 69/44/21 58/45/22 87/46/23 76/47/24
+f 45/34/1 43/1/1 1/3/1
+f 60/48/25 67/49/26 51/50/27 40/51/28
+f 82/52/6 79/53/6 2/16/6
+f 84/25/1 90/26/1 1/3/1
+f 67/49/26 60/48/25 63/54/29 72/55/30
+f 28/41/1 25/24/1 1/3/1
+f 72/56/30 63/57/29 58/45/22 69/44/21
+f 37/15/6 34/58/6 2/16/6
+f 79/53/6 30/14/6 2/16/6
+f 75/59/6 82/52/6 2/16/6
+f 18/18/6 16/60/6 2/16/6
+f 54/23/10 46/22/9 40/51/28 51/50/27
+f 81/61/31 91/62/32 85/40/20 78/39/19
+f 16/60/6 48/27/6 2/16/6
+f 52/63/6 66/42/6 2/16/6
+f 36/11/5 27/10/4 22/38/18 33/37/17
+f 73/43/6 70/64/6 2/16/6
+f 57/7/1 64/33/1 1/3/1
+f 70/64/6 75/59/6 2/16/6
+f 34/58/6 12/17/6 2/16/6
+f 76/47/24 87/46/23 91/62/32 81/61/31
+f 55/28/6 52/63/6 2/16/6
+f 22/65/18 27/66/4 29/67/33 23/68/34
+f 23/68/34 29/67/33 28/41/35 21/13/36
+f 27/66/4 24/69/3 26/70/37 29/67/33
+f 29/67/33 26/70/37 25/24/38 28/41/35
+f 31/71/2 36/72/5 38/73/39 32/74/40
+f 32/74/40 38/73/39 37/15/41 30/14/42
+f 36/72/5 33/75/17 35/76/43 38/73/39
+f 38/73/39 35/76/43 34/58/44 37/15/41
+f 58/77/22 63/78/29 65/79/45 59/80/46
+f 59/80/46 65/79/45 64/33/47 57/7/48
+f 63/78/29 60/81/25 62/82/49 65/79/45
+f 65/79/45 62/82/49 61/4/50 64/33/47
+f 67/83/26 72/84/30 74/85/51 68/86/52
+f 68/86/52 74/85/51 73/43/53 66/42/54
+f 72/84/30 69/87/21 71/88/55 74/85/51
+f 74/85/51 71/88/55 70/64/56 73/43/53
+f 10/19/57 3/2/58 5/89/59 11/90/60
+f 11/90/60 5/89/59 4/91/16 9/92/11
+f 16/60/61 18/18/62 20/93/63 17/94/64
+f 17/94/64 20/93/63 19/95/12 15/96/15
+f 7/12/65 10/19/57 11/90/60 8/97/66
+f 8/97/66 11/90/60 9/92/11 6/98/14
+f 18/18/62 12/17/67 14/99/68 20/93/63
+f 20/93/63 14/99/68 13/100/13 19/95/12
+f 55/28/69 48/27/70 50/101/71 56/102/72
+f 56/102/72 50/101/71 49/103/7 54/104/10
+f 43/1/73 45/34/74 47/105/75 44/106/76
+f 44/106/76 47/105/75 46/107/9 42/108/8
+f 52/63/77 55/28/69 56/102/72 53/109/78
+f 53/109/78 56/102/72 54/104/10 51/110/27
+f 45/34/74 39/5/79 41/111/80 47/105/75
+f 47/105/75 41/111/80 40/112/28 46/107/9
+f 82/52/81 75/59/82 77/113/83 83/114/84
+f 83/114/84 77/113/83 76/115/24 81/116/31
+f 88/6/85 90/26/86 92/117/87 89/118/88
+f 89/118/88 92/117/87 91/119/32 87/120/23
+f 79/53/89 82/52/81 83/114/84 80/121/90
+f 80/121/90 83/114/84 81/116/31 78/122/19
+f 90/26/86 84/25/91 86/123/92 92/117/87
+f 92/117/87 86/123/92 85/124/20 91/119/32
+f 6/98/14 22/65/18 23/68/34 8/97/66
+f 8/97/66 23/68/34 21/13/36 7/12/65
+f 60/81/25 40/112/28 41/111/80 62/82/49
+f 62/82/49 41/111/80 39/5/79 61/4/50
+f 42/108/8 4/91/16 5/89/59 44/106/76
+f 44/106/76 5/89/59 3/2/58 43/1/73
+f 12/17/67 34/58/44 35/76/43 14/99/68
+f 14/99/68 35/76/43 33/75/17 13/100/13
+f 48/27/70 16/60/61 17/94/64 50/101/71
+f 50/101/71 17/94/64 15/96/15 49/103/7
+f 51/110/27 67/83/26 68/86/52 53/109/78
+f 53/109/78 68/86/52 66/42/54 52/63/77
+f 84/25/91 25/24/38 26/70/37 86/123/92
+f 86/123/92 26/70/37 24/69/3 85/124/20
+f 30/14/42 79/53/89 80/121/90 32/74/40
+f 32/74/40 80/121/90 78/122/19 31/71/2
+f 87/120/23 58/77/22 59/80/46 89/118/88
+f 89/118/88 59/80/46 57/7/48 88/6/85
+f 69/87/21 76/115/24 77/113/83 71/88/55
+f 71/88/55 77/113/83 75/59/82 70/64/56
+f 4/36/16 15/35/15 19/30/12 9/29/11
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/red_moon.urdf b/diffusion_policy/env/block_pushing/assets/blocks/red_moon.urdf
new file mode 100644
index 0000000..36c2b5a
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/red_moon.urdf
@@ -0,0 +1,30 @@
+<?xml version="1.0" ?>
+<robot name="red_moon.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="0.5"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="1.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="moon.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+       <material name="red">
+        <color rgba="1 0.4 0.4 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="moon.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/star.obj b/diffusion_policy/env/block_pushing/assets/blocks/star.obj
new file mode 100644
index 0000000..1c1d696
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/star.obj
@@ -0,0 +1,689 @@
+# Blender v2.92.0 OBJ File: ''
+# www.blender.org
+mtllib star.mtl
+o star_green_block_star0_block
+v -0.000030 0.000000 0.001549
+v -0.000030 0.038100 0.001549
+v 0.006429 0.000000 -0.009092
+v 0.007380 0.002032 -0.010659
+v 0.007101 0.000595 -0.010200
+v 0.009909 0.002032 -0.008940
+v 0.008636 0.000000 -0.007597
+v 0.009536 0.000595 -0.008547
+v 0.008636 0.038100 -0.007597
+v 0.009909 0.036068 -0.008940
+v 0.009536 0.037505 -0.008547
+v 0.007380 0.036068 -0.010659
+v 0.006429 0.038100 -0.009092
+v 0.007101 0.037505 -0.010200
+v -0.006436 0.038100 -0.009091
+v -0.007380 0.036068 -0.010659
+v -0.007103 0.037505 -0.010200
+v -0.009909 0.036068 -0.008940
+v -0.008642 0.038100 -0.007595
+v -0.009538 0.037505 -0.008546
+v -0.008642 0.000000 -0.007595
+v -0.009909 0.002032 -0.008940
+v -0.009538 0.000595 -0.008546
+v -0.007380 0.002032 -0.010659
+v -0.006436 0.000000 -0.009091
+v -0.007103 0.000595 -0.010200
+v -0.011672 0.000000 0.006854
+v -0.013354 0.002032 0.007620
+v -0.012861 0.000595 0.007396
+v -0.014336 0.002032 0.004714
+v -0.012526 0.000000 0.004313
+v -0.013806 0.000595 0.004596
+v -0.012526 0.038100 0.004313
+v -0.014336 0.036068 0.004714
+v -0.013806 0.037505 0.004596
+v -0.013354 0.036068 0.007620
+v -0.011672 0.038100 0.006854
+v -0.012861 0.037505 0.007396
+v 0.012521 0.000000 0.004314
+v 0.014336 0.002032 0.004714
+v 0.013805 0.000595 0.004597
+v 0.013354 0.002032 0.007620
+v 0.011668 0.000000 0.006855
+v 0.012860 0.000595 0.007396
+v 0.011668 0.038100 0.006855
+v 0.013354 0.036068 0.007620
+v 0.012860 0.037505 0.007396
+v 0.014336 0.036068 0.004714
+v 0.012521 0.038100 0.004314
+v 0.013805 0.037505 0.004597
+v -0.010002 0.000000 0.018088
+v -0.011405 0.002032 0.020414
+v -0.010994 0.000595 0.019733
+v -0.013985 0.002032 0.018583
+v -0.012256 0.000000 0.016473
+v -0.013478 0.000595 0.017965
+v -0.013300 0.002032 0.020351
+v -0.011669 0.000000 0.018040
+v -0.012822 0.000595 0.019674
+v -0.012256 0.038100 0.016473
+v -0.013985 0.036068 0.018583
+v -0.013478 0.037505 0.017965
+v -0.011405 0.036068 0.020414
+v -0.010002 0.038100 0.018088
+v -0.010994 0.037505 0.019733
+v -0.011669 0.038100 0.018040
+v -0.013300 0.036068 0.020351
+v -0.012822 0.037505 0.019674
+v -0.018964 0.000000 -0.003158
+v -0.021672 0.002032 -0.003831
+v -0.020879 0.000595 -0.003634
+v -0.020696 0.002032 -0.006698
+v -0.018097 0.000000 -0.005661
+v -0.019935 0.000595 -0.006394
+v -0.019426 0.000000 -0.004715
+v -0.022190 0.002032 -0.005607
+v -0.021380 0.000595 -0.005346
+v -0.018097 0.038100 -0.005661
+v -0.020696 0.036068 -0.006698
+v -0.019935 0.037505 -0.006394
+v -0.021672 0.036068 -0.003831
+v -0.018964 0.038100 -0.003158
+v -0.020879 0.037505 -0.003634
+v -0.022190 0.036068 -0.005607
+v -0.019426 0.038100 -0.004715
+v -0.021380 0.037505 -0.005346
+v 0.012251 0.000000 0.016476
+v 0.013985 0.002032 0.018583
+v 0.013477 0.000595 0.017966
+v 0.011405 0.002032 0.020414
+v 0.009996 0.000000 0.018089
+v 0.010992 0.000595 0.019733
+v 0.011663 0.000000 0.018043
+v 0.013300 0.002032 0.020351
+v 0.012821 0.000595 0.019675
+v 0.009996 0.038100 0.018089
+v 0.011405 0.036068 0.020414
+v 0.010992 0.037505 0.019733
+v 0.013985 0.036068 0.018583
+v 0.012251 0.038100 0.016476
+v 0.013477 0.037505 0.017966
+v 0.013300 0.036068 0.020351
+v 0.011663 0.038100 0.018043
+v 0.012821 0.037505 0.019675
+v 0.018094 0.000000 -0.005663
+v 0.020696 0.002032 -0.006698
+v 0.019934 0.000595 -0.006395
+v 0.021672 0.002032 -0.003831
+v 0.018961 0.000000 -0.003159
+v 0.020878 0.000595 -0.003635
+v 0.019423 0.000000 -0.004716
+v 0.022190 0.002032 -0.005607
+v 0.021380 0.000595 -0.005346
+v 0.018961 0.038100 -0.003159
+v 0.021672 0.036068 -0.003831
+v 0.020878 0.037505 -0.003635
+v 0.020696 0.036068 -0.006698
+v 0.018094 0.038100 -0.005663
+v 0.019934 0.037505 -0.006395
+v 0.022190 0.036068 -0.005607
+v 0.019423 0.038100 -0.004716
+v 0.021380 0.037505 -0.005346
+v 0.001426 0.038100 -0.016730
+v 0.001638 0.036068 -0.019383
+v 0.001576 0.037505 -0.018606
+v -0.001638 0.036068 -0.019383
+v -0.001434 0.038100 -0.016729
+v -0.001578 0.037505 -0.018606
+v 0.000000 0.036068 -0.020414
+v -0.000004 0.038100 -0.017644
+v -0.000001 0.037505 -0.019603
+v -0.001434 0.000000 -0.016729
+v -0.001638 0.002032 -0.019383
+v -0.001578 0.000595 -0.018606
+v 0.001638 0.002032 -0.019383
+v 0.001426 0.000000 -0.016730
+v 0.001576 0.000595 -0.018606
+v -0.000004 0.000000 -0.017644
+v 0.000000 0.002032 -0.020414
+v -0.000001 0.000595 -0.019603
+v 0.001733 0.000000 0.015081
+v 0.001984 0.002032 0.017005
+v 0.001910 0.000595 0.016441
+v -0.001984 0.002032 0.017005
+v -0.001741 0.000000 0.015080
+v -0.001912 0.000595 0.016441
+v -0.001741 0.038100 0.015080
+v -0.001984 0.036068 0.017005
+v -0.001912 0.037505 0.016441
+v 0.001984 0.036068 0.017005
+v 0.001733 0.038100 0.015081
+v 0.001910 0.037505 0.016441
+vt 0.884326 0.143054
+vt 0.881637 0.147199
+vt 0.874485 0.145636
+vt 0.870501 0.139220
+vt 0.873534 0.134807
+vt 0.837942 0.112414
+vt 0.837942 0.131533
+vt 0.832314 0.131533
+vt 0.832314 0.112414
+vt 0.883572 0.123603
+vt 0.883611 0.122735
+vt 0.892897 0.129050
+vt 0.881240 0.148682
+vt 0.881539 0.153913
+vt 0.898370 0.134009
+vt 0.897030 0.139070
+vt 0.873770 0.153188
+vt 0.868788 0.154800
+vt 0.903052 0.129620
+vt 0.899208 0.132723
+vt 0.862147 0.112414
+vt 0.862147 0.131533
+vt 0.855821 0.131533
+vt 0.855821 0.112414
+vt 0.830184 0.112414
+vt 0.830184 0.131533
+vt 0.824015 0.131533
+vt 0.824015 0.112414
+vt 0.874942 0.134807
+vt 0.877932 0.139104
+vt 0.880430 0.154782
+vt 0.875317 0.153178
+vt 0.866663 0.147297
+vt 0.863946 0.143378
+vt 0.884646 0.135960
+vt 0.883837 0.135640
+vt 0.874236 0.134337
+vt 0.813888 0.131533
+vt 0.813888 0.112414
+vt 0.814927 0.112414
+vt 0.814927 0.131533
+vt 0.881273 0.154719
+vt 0.863186 0.112414
+vt 0.863186 0.131533
+vt 0.867919 0.154747
+vt 0.867585 0.153944
+vt 0.845799 0.112414
+vt 0.845799 0.131533
+vt 0.840171 0.131533
+vt 0.840171 0.112414
+vt 0.903484 0.128858
+vt 0.883832 0.141627
+vt 0.884500 0.142196
+vt 0.864368 0.141965
+vt 0.869278 0.140166
+vt 0.854097 0.112414
+vt 0.854097 0.131533
+vt 0.847929 0.131533
+vt 0.847929 0.112414
+vt 0.896527 0.139753
+vt 0.867312 0.149178
+vt 0.822292 0.112414
+vt 0.822292 0.131533
+vt 0.872132 0.131533
+vt 0.872132 0.112414
+vt 0.877999 0.112414
+vt 0.877999 0.131533
+vt 0.864225 0.131533
+vt 0.864225 0.112414
+vt 0.870414 0.112414
+vt 0.870414 0.131533
+vt 0.815966 0.112414
+vt 0.815966 0.131533
+vt 0.891348 0.136441
+vt 0.889875 0.135971
+vt 0.846864 0.131533
+vt 0.846864 0.112414
+vt 0.831249 0.131533
+vt 0.831249 0.112414
+vt 0.879159 0.140026
+vt 0.896695 0.118860
+vt 0.898202 0.123877
+vt 0.895355 0.118422
+vt 0.896169 0.118193
+vt 0.863717 0.142542
+vt 0.889642 0.122199
+vt 0.891100 0.121679
+vt 0.899083 0.125135
+vt 0.805981 0.112414
+vt 0.805981 0.131533
+vt 0.800114 0.131533
+vt 0.800114 0.112414
+vt 0.807699 0.112414
+vt 0.807699 0.131533
+vt 0.884410 0.122387
+vt 0.895707 0.139551
+vt 0.879086 0.112414
+vt 0.879086 0.131533
+vt 0.883766 0.134776
+vt 0.884979 0.130164
+vt 0.903027 0.128111
+vt 0.799027 0.131533
+vt 0.799027 0.112414
+vt 0.884945 0.128175
+vt 0.882437 0.147351
+vt 0.881976 0.149022
+vt 0.883261 0.147612
+vt 0.882837 0.149234
+vt 0.898778 0.123306
+vt 0.899783 0.124719
+vt 0.899515 0.122813
+vt 0.900468 0.124192
+vt 0.899922 0.133113
+vt 0.898965 0.134558
+vt 0.900624 0.133616
+vt 0.899718 0.135026
+vt 0.878303 0.138385
+vt 0.879696 0.139416
+vt 0.878850 0.137688
+vt 0.880184 0.138702
+vt 0.889274 0.121462
+vt 0.890920 0.120885
+vt 0.889004 0.120636
+vt 0.890592 0.120060
+vt 0.875420 0.153985
+vt 0.873676 0.154006
+vt 0.875394 0.154873
+vt 0.873704 0.154874
+vt 0.891195 0.137238
+vt 0.889532 0.136718
+vt 0.890894 0.138073
+vt 0.889289 0.137551
+vt 0.868700 0.139582
+vt 0.870085 0.138524
+vt 0.868187 0.138882
+vt 0.869517 0.137842
+vt 0.862942 0.143128
+vt 0.862743 0.142252
+vt 0.862000 0.143160
+vt 0.861880 0.141993
+vt 0.883020 0.136246
+vt 0.882908 0.135355
+vt 0.882298 0.136785
+vt 0.882012 0.135647
+vt 0.863391 0.141636
+vt 0.862626 0.141085
+vt 0.883840 0.136603
+vt 0.883310 0.137383
+vt 0.897417 0.140057
+vt 0.896867 0.140740
+vt 0.898025 0.140801
+vt 0.897174 0.141610
+vt 0.874217 0.133293
+vt 0.874968 0.133747
+vt 0.874208 0.132371
+vt 0.875284 0.132840
+vt 0.896006 0.140567
+vt 0.896002 0.141529
+vt 0.873468 0.133750
+vt 0.873134 0.132848
+vt 0.895621 0.117396
+vt 0.896476 0.117194
+vt 0.895584 0.116434
+vt 0.896753 0.116313
+vt 0.881903 0.155553
+vt 0.881031 0.155656
+vt 0.882465 0.156286
+vt 0.881325 0.156572
+vt 0.897048 0.117859
+vt 0.897631 0.117095
+vt 0.882214 0.154732
+vt 0.883023 0.155251
+vt 0.882691 0.123051
+vt 0.882773 0.122155
+vt 0.881783 0.122789
+vt 0.882031 0.121638
+vt 0.867329 0.155578
+vt 0.866946 0.154764
+vt 0.866810 0.156317
+vt 0.866182 0.155321
+vt 0.883582 0.121769
+vt 0.883025 0.121005
+vt 0.868222 0.155664
+vt 0.867960 0.156572
+vt 0.904030 0.128049
+vt 0.904478 0.128840
+vt 0.904906 0.127731
+vt 0.905368 0.128825
+vt 0.885439 0.141872
+vt 0.885289 0.142769
+vt 0.886281 0.141582
+vt 0.886221 0.142768
+vt 0.904056 0.129646
+vt 0.904943 0.129934
+vt 0.884767 0.141260
+vt 0.885502 0.140686
+vt 0.865721 0.149838
+vt 0.865007 0.147749
+vt 0.865827 0.147457
+vt 0.866565 0.149562
+vt 0.883268 0.130306
+vt 0.883225 0.128099
+vt 0.884112 0.128070
+vt 0.884138 0.130300
+vn 0.0000 -1.0000 0.0000
+vn 0.1760 -0.1821 0.9674
+vn 0.1760 0.1821 0.9674
+vn 0.1373 0.1995 0.9702
+vn 0.1373 -0.1995 0.9702
+vn -0.0000 1.0000 -0.0000
+vn 0.8706 -0.1992 0.4499
+vn 0.8706 0.1992 0.4499
+vn 0.8522 0.1751 0.4930
+vn 0.8522 -0.1751 0.4930
+vn -0.9656 -0.1964 0.1706
+vn -0.9656 0.1964 0.1706
+vn -0.9767 0.1745 0.1252
+vn -0.9767 -0.1745 0.1252
+vn -0.4160 0.1947 -0.8883
+vn -0.4160 -0.1947 -0.8883
+vn -0.9185 -0.2428 -0.3119
+vn -0.9185 0.2428 -0.3119
+vn 0.9185 -0.2429 -0.3119
+vn 0.9185 0.2429 -0.3119
+vn -0.1372 -0.1993 0.9703
+vn -0.1372 0.1993 0.9703
+vn -0.1761 0.1819 0.9674
+vn -0.1761 -0.1819 0.9674
+vn 0.9766 -0.1749 0.1252
+vn 0.9766 0.1749 0.1252
+vn 0.9655 0.1968 0.1705
+vn 0.9655 -0.1968 0.1705
+vn -0.8523 -0.1748 0.4930
+vn -0.8523 0.1748 0.4930
+vn 0.7051 0.1756 -0.6870
+vn 0.7051 -0.1756 -0.6870
+vn 0.6736 -0.1964 -0.7125
+vn 0.6736 0.1964 -0.7125
+vn 0.4161 0.1947 -0.8883
+vn 0.4161 -0.1947 -0.8883
+vn 0.3770 -0.1709 -0.9103
+vn 0.3770 0.1709 -0.9103
+vn -0.8706 -0.1990 0.4499
+vn -0.8706 0.1990 0.4499
+vn 0.5621 0.2389 0.7918
+vn 0.5621 -0.2389 0.7918
+vn -0.5620 0.2388 0.7919
+vn -0.5620 -0.2388 0.7919
+vn -0.7052 -0.1752 -0.6870
+vn -0.7052 0.1752 -0.6870
+vn -0.6736 0.1961 -0.7126
+vn -0.6736 -0.1961 -0.7126
+vn -0.3770 -0.1709 -0.9103
+vn -0.3770 0.1709 -0.9103
+vn 0.0001 -0.2357 -0.9718
+vn 0.0001 0.2357 -0.9718
+vn 0.0822 -0.9807 -0.1776
+vn 0.1339 -0.9813 -0.1382
+vn 0.5148 -0.6952 -0.5016
+vn 0.2776 -0.6869 -0.6716
+vn 0.1339 0.9813 -0.1382
+vn 0.0822 0.9807 -0.1776
+vn 0.2776 0.6869 -0.6716
+vn 0.5148 0.6952 -0.5016
+vn -0.0822 0.9807 -0.1777
+vn -0.1341 0.9812 -0.1385
+vn -0.5153 0.6946 -0.5020
+vn -0.2777 0.6868 -0.6717
+vn -0.1341 -0.9812 -0.1385
+vn -0.0822 -0.9807 -0.1777
+vn -0.2777 -0.6868 -0.6717
+vn -0.5153 -0.6946 -0.5020
+vn 0.1701 0.9813 0.0905
+vn 0.1906 0.9812 0.0307
+vn 0.7146 0.6935 0.0914
+vn 0.6235 0.6939 0.3603
+vn 0.1906 -0.9812 0.0307
+vn 0.1701 -0.9813 0.0905
+vn 0.6235 -0.6939 0.3603
+vn 0.7146 -0.6935 0.0914
+vn -0.1911 0.9811 0.0308
+vn -0.1703 0.9812 0.0907
+vn -0.6239 0.6934 0.3605
+vn -0.7153 0.6928 0.0914
+vn -0.1703 -0.9812 0.0907
+vn -0.1911 -0.9811 0.0308
+vn -0.7153 -0.6928 0.0914
+vn -0.6239 -0.6934 0.3605
+vn -0.1140 -0.9807 0.1590
+vn 0.0574 -0.9732 0.2225
+vn 0.1230 -0.6920 0.7113
+vn -0.3997 -0.7242 0.5619
+vn 0.0574 0.9732 0.2225
+vn -0.1140 0.9807 0.1590
+vn -0.3997 0.7242 0.5619
+vn 0.1230 0.6920 0.7113
+vn -0.2413 -0.9704 0.0099
+vn -0.7222 -0.6844 0.1004
+vn -0.2413 0.9704 0.0099
+vn -0.7222 0.6844 0.1004
+vn -0.1821 0.9811 -0.0651
+vn -0.0726 0.9686 -0.2377
+vn -0.2844 0.6806 -0.6752
+vn -0.6483 0.7279 -0.2233
+vn -0.0726 -0.9686 -0.2377
+vn -0.1821 -0.9811 -0.0651
+vn -0.6483 -0.7279 -0.2233
+vn -0.2844 -0.6806 -0.6752
+vn -0.1966 0.9707 0.1379
+vn -0.6321 0.6875 0.3576
+vn -0.1966 -0.9707 0.1379
+vn -0.6321 -0.6875 0.3576
+vn 0.1820 0.9811 -0.0652
+vn 0.1964 0.9708 0.1377
+vn 0.6318 0.6878 0.3574
+vn 0.6481 0.7280 -0.2234
+vn 0.1964 -0.9708 0.1377
+vn 0.1820 -0.9811 -0.0652
+vn 0.6481 -0.7280 -0.2234
+vn 0.6318 -0.6878 0.3574
+vn 0.0726 0.9686 -0.2378
+vn 0.2843 0.6806 -0.6752
+vn 0.0726 -0.9686 -0.2378
+vn 0.2843 -0.6806 -0.6752
+vn 0.1137 0.9807 0.1592
+vn -0.0576 0.9731 0.2229
+vn -0.1231 0.6917 0.7116
+vn 0.3994 0.7243 0.5620
+vn -0.0576 -0.9731 0.2229
+vn 0.1137 -0.9807 0.1592
+vn 0.3994 -0.7243 0.5620
+vn -0.1231 -0.6917 0.7116
+vn 0.2406 0.9706 0.0100
+vn 0.7217 0.6849 0.1003
+vn 0.2407 -0.9706 0.0100
+vn 0.7217 -0.6849 0.1003
+vn -0.0002 0.9803 -0.1975
+vn 0.1845 0.9711 -0.1512
+vn 0.5181 0.6852 -0.5119
+vn -0.0002 0.7214 -0.6926
+vn 0.1845 -0.9711 -0.1512
+vn -0.0002 -0.9803 -0.1975
+vn -0.0002 -0.7214 -0.6926
+vn 0.5181 -0.6852 -0.5119
+vn -0.1850 0.9710 -0.1515
+vn -0.5185 0.6847 -0.5122
+vn -0.1850 -0.9710 -0.1515
+vn -0.5185 -0.6847 -0.5122
+vn 0.1263 -0.7034 0.6995
+vn -0.1265 -0.7031 0.6997
+vn 0.0277 -0.9822 0.1859
+vn -0.0277 -0.9821 0.1861
+vn -0.1265 0.7031 0.6997
+vn 0.1263 0.7034 0.6995
+vn -0.0277 0.9821 0.1861
+vn 0.0277 0.9822 0.1859
+usemtl toybox.002
+s 1
+f 136/1/1 3/2/1 1/3/1
+f 31/4/1 69/5/1 1/3/1
+f 144/6/2 148/7/3 63/8/4 52/9/5
+f 96/10/6 103/11/6 2/12/6
+f 7/13/1 105/14/1 1/3/1
+f 19/15/6 78/16/6 2/12/6
+f 43/17/1 87/18/1 1/3/1
+f 127/19/6 15/20/6 2/12/6
+f 15/20/6 19/15/6 2/12/6
+f 108/21/7 115/22/8 48/23/9 40/24/10
+f 54/25/11 61/26/12 36/27/13 28/28/14
+f 73/29/1 21/30/1 1/3/1
+f 109/31/1 39/32/1 1/3/1
+f 1/3/1 145/33/1 51/34/1
+f 60/35/6 66/36/6 2/12/6
+f 75/37/1 73/29/1 1/3/1
+f 79/38/15 72/39/16 76/40/17 84/41/18
+f 111/42/1 109/31/1 1/3/1
+f 115/22/8 108/21/7 112/43/19 120/44/20
+f 93/45/1 91/46/1 1/3/1
+f 90/47/21 97/48/22 150/49/23 142/50/24
+f 130/51/6 127/19/6 2/12/6
+f 132/52/1 138/53/1 1/3/1
+f 55/54/1 27/55/1 1/3/1
+f 42/56/25 46/57/26 99/58/27 88/59/28
+f 78/16/6 85/60/6 2/12/6
+f 91/46/1 141/61/1 1/3/1
+f 69/5/1 75/37/1 1/3/1
+f 30/62/29 28/28/14 36/27/13 34/63/30
+f 12/64/31 4/65/32 135/66/33 124/67/34
+f 117/68/35 106/69/36 6/70/37 10/71/38
+f 84/41/18 76/40/17 70/72/39 81/73/40
+f 33/74/6 37/75/6 2/12/6
+f 42/56/25 40/24/10 48/23/9 46/57/26
+f 102/76/41 94/77/42 88/59/28 99/58/27
+f 31/4/1 1/3/1 27/55/1
+f 52/9/5 63/8/4 67/78/43 57/79/44
+f 25/80/1 132/52/1 1/3/1
+f 118/81/6 9/82/6 2/12/6
+f 114/83/6 121/84/6 2/12/6
+f 58/85/1 55/54/1 1/3/1
+f 148/7/3 144/6/2 142/50/24 150/49/23
+f 57/79/44 67/78/43 61/26/12 54/25/11
+f 45/86/6 49/87/6 2/12/6
+f 120/44/20 112/43/19 106/69/36 117/68/35
+f 138/53/1 136/1/1 1/3/1
+f 9/82/6 13/88/6 2/12/6
+f 30/62/29 34/63/30 81/73/40 70/72/39
+f 39/32/1 43/17/1 1/3/1
+f 51/34/1 58/85/1 1/3/1
+f 24/89/45 16/90/46 126/91/47 133/92/48
+f 24/89/45 22/93/49 18/94/50 16/90/46
+f 87/18/1 93/45/1 1/3/1
+f 103/11/6 100/95/6 2/12/6
+f 105/14/1 111/42/1 1/3/1
+f 82/96/6 33/74/6 2/12/6
+f 37/75/6 60/35/6 2/12/6
+f 72/39/16 79/38/15 18/94/50 22/93/49
+f 121/84/6 118/81/6 2/12/6
+f 100/95/6 45/86/6 2/12/6
+f 124/67/34 135/66/33 139/97/51 129/98/52
+f 25/80/1 1/3/1 21/30/1
+f 64/99/6 147/100/6 2/12/6
+f 123/101/6 130/51/6 2/12/6
+f 66/36/6 64/99/6 2/12/6
+f 129/102/52 139/103/51 133/92/48 126/91/47
+f 49/87/6 114/83/6 2/12/6
+f 13/88/6 123/101/6 2/12/6
+f 85/60/6 82/96/6 2/12/6
+f 2/12/6 151/104/6 96/10/6
+f 3/2/1 7/13/1 1/3/1
+f 147/100/6 151/104/6 2/12/6
+f 141/61/1 145/33/1 1/3/1
+f 97/48/22 90/47/21 94/77/42 102/76/41
+f 7/13/53 3/2/54 5/105/55 8/106/56
+f 8/106/56 5/105/55 4/107/32 6/108/37
+f 13/88/57 9/82/58 11/109/59 14/110/60
+f 14/110/60 11/109/59 10/111/38 12/112/31
+f 19/15/61 15/20/62 17/113/63 20/114/64
+f 20/114/64 17/113/63 16/115/46 18/116/50
+f 25/80/65 21/30/66 23/117/67 26/118/68
+f 26/118/68 23/117/67 22/119/49 24/120/45
+f 49/87/69 45/86/70 47/121/71 50/122/72
+f 50/122/72 47/121/71 46/123/26 48/124/9
+f 43/17/73 39/32/74 41/125/75 44/126/76
+f 44/126/76 41/125/75 40/127/10 42/128/25
+f 37/75/77 33/74/78 35/129/79 38/130/80
+f 38/130/80 35/129/79 34/131/30 36/132/13
+f 31/4/81 27/55/82 29/133/83 32/134/84
+f 32/134/84 29/133/83 28/135/14 30/136/29
+f 58/85/85 51/34/86 53/137/87 59/138/88
+f 59/138/88 53/137/87 52/139/5 57/140/44
+f 64/99/89 66/36/90 68/141/91 65/142/92
+f 65/142/92 68/141/91 67/143/43 63/144/4
+f 55/54/93 58/85/85 59/138/88 56/145/94
+f 56/145/94 59/138/88 57/140/44 54/146/11
+f 66/36/90 60/35/95 62/147/96 68/141/91
+f 68/141/91 62/147/96 61/148/12 67/143/43
+f 85/60/97 78/16/98 80/149/99 86/150/100
+f 86/150/100 80/149/99 79/151/15 84/152/18
+f 73/29/101 75/37/102 77/153/103 74/154/104
+f 74/154/104 77/153/103 76/155/17 72/156/16
+f 82/96/105 85/60/97 86/150/100 83/157/106
+f 83/157/106 86/150/100 84/152/18 81/158/40
+f 75/37/102 69/5/107 71/159/108 77/153/103
+f 77/153/103 71/159/108 70/160/39 76/155/17
+f 121/84/109 114/83/110 116/161/111 122/162/112
+f 122/162/112 116/161/111 115/163/8 120/164/20
+f 109/31/113 111/42/114 113/165/115 110/166/116
+f 110/166/116 113/165/115 112/167/19 108/168/7
+f 118/81/117 121/84/109 122/162/112 119/169/118
+f 119/169/118 122/162/112 120/164/20 117/170/35
+f 111/42/114 105/14/119 107/171/120 113/165/115
+f 113/165/115 107/171/120 106/172/36 112/167/19
+f 103/11/121 96/10/122 98/173/123 104/174/124
+f 104/174/124 98/173/123 97/175/22 102/176/41
+f 91/46/125 93/45/126 95/177/127 92/178/128
+f 92/178/128 95/177/127 94/179/42 90/180/21
+f 100/95/129 103/11/121 104/174/124 101/181/130
+f 101/181/130 104/174/124 102/176/41 99/182/27
+f 93/45/126 87/18/131 89/183/132 95/177/127
+f 95/177/127 89/183/132 88/184/28 94/179/42
+f 130/51/133 123/101/134 125/185/135 131/186/136
+f 131/186/136 125/185/135 124/187/34 129/188/52
+f 136/1/137 138/53/138 140/189/139 137/190/140
+f 137/190/140 140/189/139 139/191/51 135/192/33
+f 127/19/141 130/51/133 131/186/136 128/193/142
+f 128/193/142 131/186/136 129/188/52 126/194/47
+f 138/53/138 132/52/143 134/195/144 140/189/139
+f 140/189/139 134/195/144 133/196/48 139/191/51
+f 27/55/82 55/54/93 56/145/94 29/133/83
+f 29/133/83 56/145/94 54/146/11 28/135/14
+f 60/35/95 37/75/77 38/130/80 62/147/96
+f 62/147/96 38/130/80 36/132/13 61/148/12
+f 69/5/107 31/4/81 32/134/84 71/159/108
+f 71/159/108 32/134/84 30/136/29 70/160/39
+f 33/74/78 82/96/105 83/157/106 35/129/79
+f 35/129/79 83/157/106 81/158/40 34/131/30
+f 21/30/66 73/29/101 74/154/104 23/117/67
+f 23/117/67 74/154/104 72/156/16 22/119/49
+f 78/16/98 19/15/61 20/114/64 80/149/99
+f 80/149/99 20/114/64 18/116/50 79/151/15
+f 87/18/131 43/17/73 44/126/76 89/183/132
+f 89/183/132 44/126/76 42/128/25 88/184/28
+f 45/86/70 100/95/129 101/181/130 47/121/71
+f 47/121/71 101/181/130 99/182/27 46/123/26
+f 9/82/58 118/81/117 119/169/118 11/109/59
+f 11/109/59 119/169/118 117/170/35 10/111/38
+f 105/14/119 7/13/53 8/106/56 107/171/120
+f 107/171/120 8/106/56 6/108/37 106/172/36
+f 39/32/74 109/31/113 110/166/116 41/125/75
+f 41/125/75 110/166/116 108/168/7 40/127/10
+f 114/83/110 49/87/69 50/122/72 116/161/111
+f 116/161/111 50/122/72 48/124/9 115/163/8
+f 123/101/134 13/88/57 14/110/60 125/185/135
+f 125/185/135 14/110/60 12/112/31 124/187/34
+f 3/2/54 136/1/137 137/190/140 5/105/55
+f 5/105/55 137/190/140 135/192/33 4/107/32
+f 132/52/143 25/80/65 26/118/68 134/195/144
+f 134/195/144 26/118/68 24/120/45 133/196/48
+f 15/20/62 127/19/141 128/193/142 17/113/63
+f 17/113/63 128/193/142 126/194/47 16/115/46
+f 142/197/24 144/198/2 146/199/145 143/200/146
+f 143/200/146 146/199/145 145/33/147 141/61/148
+f 148/201/3 150/202/23 152/203/149 149/204/150
+f 149/204/150 152/203/149 151/104/151 147/100/152
+f 141/61/148 91/46/125 92/178/128 143/200/146
+f 143/200/146 92/178/128 90/180/21 142/197/24
+f 96/10/122 151/104/151 152/203/149 98/173/123
+f 98/173/123 152/203/149 150/202/23 97/175/22
+f 51/34/86 145/33/147 146/199/145 53/137/87
+f 53/137/87 146/199/145 144/198/2 52/139/5
+f 147/100/152 64/99/89 65/142/92 149/204/150
+f 149/204/150 65/142/92 63/144/4 148/201/3
+f 12/64/31 10/71/38 6/70/37 4/65/32
diff --git a/diffusion_policy/env/block_pushing/assets/blocks/yellow_pentagon.urdf b/diffusion_policy/env/block_pushing/assets/blocks/yellow_pentagon.urdf
new file mode 100644
index 0000000..4f5bdf6
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/blocks/yellow_pentagon.urdf
@@ -0,0 +1,30 @@
+<?xml version="1.0" ?>
+<robot name="yellow_pentagon.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="0.5"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="1.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".01"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="pentagon.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+       <material name="yellow">
+        <color rgba="1 1 0.4 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="pentagon.obj" scale="1.0 1.0 1.0"/>
+      </geometry>
+    </collision>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/insert.urdf b/diffusion_policy/env/block_pushing/assets/insert.urdf
new file mode 100644
index 0000000..d912ff1
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/insert.urdf
@@ -0,0 +1,66 @@
+<?xml version="0.0" ?>
+<robot name="ell.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="0.3"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+
+    <visual>
+      <origin rpy="0 0 0" xyz="0.025 0 0"/>
+      <geometry>
+        <box size=".08 .03 .04"/>
+      </geometry>
+      <material name="red">
+        <color rgba="0. 0.3412 0.3490 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.025 0 0"/>
+      <geometry>
+        <box size=".08 .03 .04"/>
+      </geometry>
+    </collision>
+
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0.025 0"/>
+      <geometry>
+        <box size=".03 .08 .0399"/>
+      </geometry>
+      <material name="dark">
+        <color rgba="0. 0.3412 0.3490 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0.025 0"/>
+      <geometry>
+        <box size=".03 .08 .0399"/>
+      </geometry>
+    </collision>
+
+
+    <visual>
+      <origin rpy="0 0 0" xyz="0.075 0.025 0"/>
+      <geometry>
+        <box size=".03 .08 .0399"/>
+      </geometry>
+      <material name="dark">
+        <color rgba="0. 0.3412 0.3490 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0.075 0.025 0"/>
+      <geometry>
+        <box size=".03 .08 .0399"/>
+      </geometry>
+    </collision>
+
+
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/plane.obj b/diffusion_policy/env/block_pushing/assets/plane.obj
new file mode 100644
index 0000000..6062095
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/plane.obj
@@ -0,0 +1,18 @@
+# Blender v2.66 (sub 1) OBJ File: ''
+# www.blender.org
+mtllib plane.mtl
+o Plane
+v 15.000000 -15.000000 0.000000
+v 15.000000 15.000000 0.000000
+v -15.000000 15.000000 0.000000
+v -15.000000  -15.000000 0.000000
+
+vt 15.000000 0.000000
+vt 15.000000 15.000000
+vt 0.000000 15.000000
+vt 0.000000 0.000000
+
+usemtl Material
+s off
+f 1/1 2/2 3/3
+f 1/1 3/3 4/4
diff --git a/diffusion_policy/env/block_pushing/assets/suction/base.obj b/diffusion_policy/env/block_pushing/assets/suction/base.obj
new file mode 100644
index 0000000..bdf1bb3
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/base.obj
@@ -0,0 +1,396 @@
+# Object Export From Tinkercad Server 2015
+
+mtllib obj.mtl
+
+o obj_0
+v 7.413         37.27         25
+v 7.413         37.27         0
+v 33.513         17.913         25
+v 35.107         14.542         25
+v 35.107         14.542         0
+v 33.513         17.913         0
+v -31.596         21.112         25
+v -31.596         21.112         0
+v -33.513         17.913         0
+v -33.513         17.913         25
+v 3.725         37.817         0
+v 3.725         37.817         25
+v -29.374         24.107         25
+v -29.374         24.107         0
+v 0         38         0
+v 11.031         36.364         25
+v 11.031         36.364         0
+v 14.542         35.107         25
+v 14.542         35.107         0
+v -37.27         -7.413         25
+v 17.913         33.513         25
+v -37.27         -7.413         0
+v -11.031         -36.364         25
+v 17.913         33.513         0
+v -36.364         -11.031         0
+v -14.542         -35.107         25
+v -26.87         26.87         25
+v -36.364         -11.031         25
+v -14.542         -35.107         0
+v -26.87         26.87         0
+v -3.725         37.817         0
+v -11.031         -36.364         0
+v 24.107         -29.374         25
+v -17.913         -33.513         25
+v -17.913         -33.513         0
+v 21.112         -31.596         25
+v 21.112         -31.596         0
+v 24.107         -29.374         0
+v -3.725         37.817         25
+v 0         38         25
+v 17.913         -33.513         25
+v 17.913         -33.513         0
+v 37.817         -3.725         25
+v -37.817         -3.725         25
+v -37.817         -3.725         0
+v 37.817         -3.725         0
+v 38         0         0
+v -21.112         -31.596         25
+v 38         0         25
+v 14.542         -35.107         25
+v -21.112         -31.596         0
+v 36.364         11.031         25
+v 14.542         -35.107         0
+v 36.364         11.031         0
+v 37.27         -7.413         25
+v 37.27         -7.413         0
+v -24.107         29.374         25
+v 21.112         31.596         25
+v -24.107         29.374         0
+v 36.364         -11.031         25
+v 21.112         31.596         0
+v 36.364         -11.031         0
+v -38         0         25
+v 11.031         -36.364         25
+v 11.031         -36.364         0
+v -21.112         31.596         25
+v -21.112         31.596         0
+v 7.413         -37.27         25
+v 7.413         -37.27         0
+v -17.913         33.513         25
+v -17.913         33.513         0
+v 24.107         29.374         0
+v -38         0         0
+v 26.87         26.87         0
+v 35.107         -14.542         25
+v 29.374         24.107         0
+v 35.107         -14.542         0
+v 31.596         21.112         0
+v 3.725         -37.817         25
+v 37.27         7.413         25
+v 3.725         -37.817         0
+v 24.107         29.374         25
+v -24.107         -29.374         25
+v -14.542         35.107         25
+v -24.107         -29.374         0
+v -14.542         35.107         0
+v 37.27         7.413         0
+v 33.513         -17.913         25
+v 37.817         3.725         0
+v 33.513         -17.913         0
+v -11.031         36.364         25
+v -11.031         36.364         0
+v -7.413         37.27         25
+v 26.87         26.87         25
+v -7.413         37.27         0
+v -26.87         -26.87         25
+v -26.87         -26.87         0
+v 37.817         3.725         25
+v 0         -38         25
+v 31.596         -21.112         25
+v 0         -38         0
+v 31.596         -21.112         0
+v 29.374         24.107         25
+v -29.374         -24.107         25
+v -29.374         -24.107         0
+v 29.374         -24.107         25
+v 31.596         21.112         25
+v 29.374         -24.107         0
+v -3.725         -37.817         25
+v -31.596         -21.112         25
+v -3.725         -37.817         0
+v -31.596         -21.112         0
+v -37.817         3.725         25
+v -37.817         3.725         0
+v 26.87         -26.87         25
+v 26.87         -26.87         0
+v -37.27         7.413         0
+v -37.27         7.413         25
+v -7.413         -37.27         25
+v -33.513         -17.913         25
+v -33.513         -17.913         0
+v -7.413         -37.27         0
+v -36.364         11.031         25
+v -36.364         11.031         0
+v -35.107         -14.542         25
+v -35.107         -14.542         0
+v -35.107         14.542         25
+v -35.107         14.542         0
+# 128 vertices
+
+g group_0_2829873
+
+usemtl color_2829873
+s 0
+
+f 3     4     5
+f 3     5     6
+f 7     8     9
+f 7     9     10
+f 1     2     11
+f 1     11     12
+f 16     17     2
+f 16     2     1
+f 13     14     8
+f 13     8     7
+f 18     19     17
+f 18     17     16
+f 23     26     29
+f 23     29     32
+f 26     34     35
+f 26     35     29
+f 33     36     37
+f 33     37     38
+f 15     31     39
+f 15     39     40
+f 20     22     25
+f 20     25     28
+f 36     41     42
+f 36     42     37
+f 44     45     22
+f 44     22     20
+f 21     24     19
+f 21     19     18
+f 43     46     47
+f 43     47     49
+f 27     30     14
+f 27     14     13
+f 41     50     53
+f 41     53     42
+f 43     55     56
+f 43     56     46
+f 34     48     51
+f 34     51     35
+f 4     52     54
+f 4     54     5
+f 50     64     65
+f 50     65     53
+f 57     59     30
+f 57     30     27
+f 55     60     62
+f 55     62     56
+f 66     67     59
+f 66     59     57
+f 64     68     69
+f 64     69     65
+f 58     61     24
+f 58     24     21
+f 63     73     45
+f 63     45     44
+f 70     71     67
+f 70     67     66
+f 60     75     77
+f 60     77     62
+f 82     72     61
+f 82     61     58
+f 68     79     81
+f 68     81     69
+f 48     83     85
+f 48     85     51
+f 84     86     71
+f 84     71     70
+f 75     88     90
+f 75     90     77
+f 91     92     86
+f 91     86     84
+f 15     11     2
+f 15     2     17
+f 15     17     19
+f 15     19     24
+f 15     24     61
+f 15     61     72
+f 15     72     74
+f 15     74     76
+f 15     76     78
+f 15     78     6
+f 15     6     5
+f 15     5     54
+f 15     54     87
+f 15     87     89
+f 15     89     47
+f 52     80     87
+f 52     87     54
+f 94     74     72
+f 94     72     82
+f 93     95     92
+f 93     92     91
+f 83     96     97
+f 83     97     85
+f 80     98     89
+f 80     89     87
+f 79     99     101
+f 79     101     81
+f 88     100     102
+f 88     102     90
+f 94     103     76
+f 94     76     74
+f 39     31     95
+f 39     95     93
+f 104     105     97
+f 104     97     96
+f 98     49     47
+f 98     47     89
+f 100     106     108
+f 100     108     102
+f 103     107     78
+f 103     78     76
+f 101     99     109
+f 101     109     111
+f 110     112     105
+f 110     105     104
+f 113     114     73
+f 113     73     63
+f 106     115     116
+f 106     116     108
+f 107     3     6
+f 107     6     78
+f 118     117     114
+f 118     114     113
+f 120     121     112
+f 120     112     110
+f 109     119     122
+f 109     122     111
+f 123     124     117
+f 123     117     118
+f 125     126     121
+f 125     121     120
+f 127     128     124
+f 127     124     123
+f 115     33     38
+f 115     38     116
+f 10     9     128
+f 10     128     127
+f 28     25     126
+f 28     126     125
+f 119     23     32
+f 119     32     122
+f 73     114     117
+f 73     117     124
+f 73     124     128
+f 73     128     9
+f 73     9     8
+f 73     8     14
+f 73     14     30
+f 73     30     59
+f 73     59     67
+f 73     67     71
+f 73     71     86
+f 73     86     92
+f 73     92     95
+f 73     95     31
+f 73     31     15
+f 81     101     15
+f 47     46     15
+f 46     56     15
+f 56     62     15
+f 62     77     15
+f 77     90     15
+f 90     102     15
+f 102     108     15
+f 108     116     15
+f 116     38     15
+f 38     37     15
+f 37     42     15
+f 42     53     15
+f 53     65     15
+f 69     15     65
+f 81     15     69
+f 45     73     15
+f 101     111     15
+f 111     122     15
+f 122     32     15
+f 32     29     15
+f 29     35     15
+f 35     51     15
+f 51     85     15
+f 85     97     15
+f 97     105     15
+f 105     112     15
+f 112     121     15
+f 121     126     15
+f 126     25     15
+f 22     15     25
+f 45     15     22
+f 49     98     80
+f 49     80     52
+f 49     52     4
+f 49     4     3
+f 49     3     107
+f 49     107     103
+f 49     103     94
+f 49     94     82
+f 49     82     58
+f 49     58     21
+f 49     21     18
+f 49     18     16
+f 49     16     1
+f 49     1     12
+f 49     12     40
+f 40     39     93
+f 40     93     91
+f 40     91     84
+f 40     84     70
+f 40     70     66
+f 40     66     57
+f 40     57     27
+f 40     27     13
+f 40     13     7
+f 40     7     10
+f 40     10     127
+f 40     127     123
+f 40     123     118
+f 40     118     113
+f 63     40     113
+f 68     40     79
+f 64     40     68
+f 50     40     64
+f 41     40     50
+f 36     40     41
+f 33     40     36
+f 115     40     33
+f 106     40     115
+f 100     40     106
+f 88     40     100
+f 75     40     88
+f 60     40     75
+f 55     40     60
+f 43     40     55
+f 49     40     43
+f 99     79     40
+f 20     40     44
+f 28     40     20
+f 125     40     28
+f 120     40     125
+f 110     40     120
+f 104     40     110
+f 96     40     104
+f 83     40     96
+f 48     40     83
+f 34     40     48
+f 26     40     34
+f 23     40     26
+f 119     40     23
+f 109     40     119
+f 99     40     109
+f 63     44     40
+f 12     11     15
+f 12     15     40
+# 252 faces
+
+ #end of obj_0
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/cylinder.urdf b/diffusion_policy/env/block_pushing/assets/suction/cylinder.urdf
new file mode 100644
index 0000000..d5779f3
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/cylinder.urdf
@@ -0,0 +1,98 @@
+<?xml version="0.0" ?>
+<robot name="cylinder.urdf">
+  <link name="headLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="darkgrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <joint name="tipJoint" type="fixed">
+    <parent link="headLink"/>
+    <child link="tipLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.029"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint>
+
+  <link name="tipLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.005"/>
+      </geometry>
+      <material name="blue">
+        <color rgba="0.18039216, 0.50588235, 0.77254902 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.05" radius="0.005"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <!-- <link name="asdfLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.028" radius="0.001"/>
+      </geometry>
+    </visual>
+  </link>
+
+  <joint name="asdfoint" type="fixed">
+    <parent link="tipLink"/>
+    <child link="asdfLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint> -->
+
+
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/cylinder_real.urdf b/diffusion_policy/env/block_pushing/assets/suction/cylinder_real.urdf
new file mode 100644
index 0000000..b4bb8b7
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/cylinder_real.urdf
@@ -0,0 +1,98 @@
+<?xml version="0.0" ?>
+<robot name="cylinder_real.urdf">
+  <link name="headLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="darkgrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <joint name="tipJoint" type="fixed">
+    <parent link="headLink"/>
+    <child link="tipLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.029"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint>
+
+  <link name="tipLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.135" radius="0.0127"/>
+      </geometry>
+      <material name="blue">
+        <color rgba="0.5, 0.5, 0.5 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.135" radius="0.0127"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <!-- <link name="asdfLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.028" radius="0.001"/>
+      </geometry>
+    </visual>
+  </link>
+
+  <joint name="asdfoint" type="fixed">
+    <parent link="tipLink"/>
+    <child link="asdfLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint> -->
+
+
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/head.obj b/diffusion_policy/env/block_pushing/assets/suction/head.obj
new file mode 100644
index 0000000..49cefdd
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/head.obj
@@ -0,0 +1,396 @@
+# Object Export From Tinkercad Server 2015
+
+mtllib obj.mtl
+
+o obj_0
+v 0.001         5.001         0
+v 0.492         4.977         0
+v 0.977         4.905         0
+v 1.453         4.786         0
+v 1.915         4.621         0
+v 2.358         4.411         0
+v 2.779         4.159         0
+v 3.173         3.866         0
+v 3.537         3.537         0
+v 3.866         3.173         0
+v 4.159         2.779         0
+v 4.411         2.358         0
+v 4.621         1.915         0
+v 4.786         1.453         0
+v 4.905         0.977         0
+v 4.977         0.492         0
+v 5.001         0.001         0
+v -2.776         4.159         0
+v 4.159         -2.776         30
+v -2.356         4.411         0
+v -1.912         4.621         0
+v -1.45         4.786         0
+v -0.974         4.905         0
+v -0.489         4.977         0
+v 4.411         -2.356         30
+v 4.621         -1.912         30
+v 4.786         -1.45         30
+v -4.999         0.001         0
+v -4.974         0.492         0
+v -4.903         0.977         0
+v -4.783         1.453         0
+v -4.618         1.915         0
+v -4.408         2.358         0
+v 4.905         -0.974         30
+v -4.156         2.779         0
+v -3.864         3.173         0
+v -3.534         3.537         0
+v -3.171         3.866         0
+v 4.977         -0.489         30
+v 4.977         -0.489         0
+v 4.905         -0.974         0
+v 4.786         -1.45         0
+v 4.621         -1.912         0
+v 4.411         -2.356         0
+v 4.159         -2.776         0
+v 0.001         -4.999         30
+v 3.866         -3.171         0
+v 3.537         -3.534         0
+v 3.173         -3.864         0
+v 2.779         -4.156         0
+v 2.358         -4.408         0
+v 1.915         -4.618         0
+v 0.492         -4.974         30
+v 1.453         -4.783         0
+v 0.977         -4.902         0
+v 0.492         -4.974         0
+v 0.001         -4.999         0
+v 0.977         -4.902         30
+v 1.453         -4.783         30
+v 1.915         -4.618         30
+v -3.171         -3.864         0
+v -3.534         -3.534         0
+v -3.864         -3.171         0
+v -4.156         -2.776         0
+v -4.408         -2.356         0
+v -4.618         -1.912         0
+v -4.783         -1.45         0
+v -4.903         -0.974         0
+v -4.974         -0.489         0
+v 2.358         -4.408         30
+v 2.779         -4.156         30
+v -0.489         -4.974         0
+v -0.974         -4.902         0
+v -1.45         -4.783         0
+v -1.912         -4.618         0
+v -2.356         -4.408         0
+v -2.776         -4.156         0
+v 3.173         -3.864         30
+v 3.537         -3.534         30
+v 0.001         5.001         30
+v 2.779         4.159         30
+v 2.358         4.411         30
+v 1.915         4.621         30
+v 3.866         -3.171         30
+v 1.453         4.786         30
+v 0.977         4.905         30
+v 0.492         4.977         30
+v 5.001         0.001         30
+v 4.977         0.492         30
+v 4.905         0.977         30
+v 4.786         1.453         30
+v 4.621         1.915         30
+v 4.411         2.358         30
+v 4.159         2.779         30
+v 3.866         3.173         30
+v 3.537         3.537         30
+v 3.173         3.866         30
+v -0.489         4.977         30
+v -0.974         4.905         30
+v -1.45         4.786         30
+v -1.912         4.621         30
+v -2.356         4.411         30
+v -2.776         4.159         30
+v -3.171         3.866         30
+v -3.534         3.537         30
+v -3.864         3.173         30
+v -4.156         2.779         30
+v -4.408         2.358         30
+v -4.618         1.915         30
+v -4.783         1.453         30
+v -4.903         0.977         30
+v -4.974         0.492         30
+v -4.999         0.001         30
+v -4.974         -0.489         30
+v -4.903         -0.974         30
+v -4.783         -1.45         30
+v -4.618         -1.912         30
+v -4.408         -2.356         30
+v -4.156         -2.776         30
+v -3.864         -3.171         30
+v -3.534         -3.534         30
+v -3.171         -3.864         30
+v -0.489         -4.974         30
+v -2.776         -4.156         30
+v -2.356         -4.408         30
+v -1.912         -4.618         30
+v -1.45         -4.783         30
+v -0.974         -4.902         30
+# 128 vertices
+
+g group_0_16089887
+
+usemtl color_16089887
+s 0
+
+f 2     3     1
+f 1     3     4
+f 1     4     5
+f 1     5     6
+f 1     6     7
+f 1     7     8
+f 9     10     8
+f 8     10     11
+f 8     11     12
+f 8     12     13
+f 8     13     14
+f 8     14     15
+f 8     15     16
+f 8     16     17
+f 8     17     40
+f 22     8     21
+f 20     21     8
+f 23     8     22
+f 24     8     23
+f 1     8     24
+f 18     20     8
+f 31     8     30
+f 32     8     31
+f 33     8     32
+f 35     8     33
+f 29     30     8
+f 36     8     35
+f 37     8     36
+f 38     8     37
+f 18     8     38
+f 28     29     8
+f 44     8     43
+f 42     43     8
+f 41     42     8
+f 45     8     44
+f 47     8     45
+f 50     8     49
+f 51     8     50
+f 48     49     8
+f 52     8     51
+f 54     8     52
+f 55     8     54
+f 56     8     55
+f 57     8     56
+f 72     8     57
+f 47     48     8
+f 40     41     8
+f 64     8     63
+f 65     8     64
+f 66     8     65
+f 62     63     8
+f 67     8     66
+f 68     8     67
+f 69     8     68
+f 28     8     69
+f 61     62     8
+f 72     73     8
+f 75     8     74
+f 73     74     8
+f 76     8     75
+f 77     8     76
+f 61     8     77
+f 98     82     80
+f 80     83     85
+f 80     85     86
+f 80     86     87
+f 19     45     44
+f 19     44     25
+f 43     26     44
+f 43     42     27
+f 42     41     27
+f 89     90     88
+f 88     90     91
+f 88     91     92
+f 88     92     93
+f 88     93     94
+f 88     94     95
+f 96     97     88
+f 97     81     88
+f 82     88     81
+f 95     96     88
+f 114     115     82
+f 25     44     26
+f 34     27     41
+f 43     27     26
+f 34     41     40
+f 99     82     98
+f 100     82     99
+f 101     82     100
+f 102     82     101
+f 103     82     102
+f 83     80     82
+f 55     58     53
+f 34     40     39
+f 58     55     54
+f 82     103     104
+f 82     104     105
+f 82     105     106
+f 82     106     107
+f 82     107     108
+f 82     108     109
+f 82     109     110
+f 82     110     111
+f 82     111     112
+f 82     112     113
+f 82     113     114
+f 56     53     57
+f 53     56     55
+f 39     88     82
+f 19     25     82
+f 25     26     82
+f 27     82     26
+f 34     82     27
+f 39     82     34
+f 57     53     46
+f 70     82     60
+f 59     60     82
+f 58     59     82
+f 71     82     70
+f 53     58     82
+f 78     82     71
+f 79     82     78
+f 84     82     79
+f 19     82     84
+f 46     53     82
+f 122     124     82
+f 71     50     78
+f 54     59     58
+f 5     83     82
+f 71     51     50
+f 48     78     49
+f 59     54     60
+f 52     60     54
+f 5     85     83
+f 49     78     50
+f 52     51     60
+f 70     60     51
+f 116     82     115
+f 117     82     116
+f 118     82     117
+f 119     82     118
+f 120     82     119
+f 121     82     120
+f 122     82     121
+f 85     4     3
+f 71     70     51
+f 46     82     123
+f 125     126     82
+f 123     82     128
+f 127     82     126
+f 128     82     127
+f 124     125     82
+f 48     79     78
+f 24     98     80
+f 48     47     79
+f 84     79     47
+f 16     89     88
+f 84     47     19
+f 16     90     89
+f 95     94     11
+f 14     91     90
+f 82     7     6
+f 92     91     13
+f 12     92     13
+f 80     1     24
+f 8     97     96
+f 76     75     125
+f 57     46     72
+f 126     75     127
+f 72     123     128
+f 46     123     72
+f 81     97     7
+f 7     82     81
+f 69     68     114
+f 85     3     86
+f 115     67     116
+f 117     116     66
+f 86     3     87
+f 2     87     3
+f 37     105     104
+f 116     67     66
+f 2     80     87
+f 121     120     62
+f 66     65     118
+f 66     118     117
+f 121     62     122
+f 61     122     62
+f 65     119     118
+f 120     119     63
+f 107     35     33
+f 115     114     68
+f 122     61     124
+f 120     63     62
+f 68     67     115
+f 14     13     91
+f 77     124     61
+f 113     29     28
+f 77     76     124
+f 125     124     76
+f 12     93     92
+f 126     125     75
+f 119     65     64
+f 113     28     114
+f 69     114     28
+f 127     75     74
+f 112     111     29
+f 94     93     12
+f 74     73     127
+f 64     63     119
+f 128     73     72
+f 112     29     113
+f 104     18     38
+f 104     38     37
+f 98     24     23
+f 8     7     97
+f 35     107     106
+f 40     88     39
+f 110     109     31
+f 101     21     20
+f 128     127     73
+f 98     23     99
+f 23     100     99
+f 6     5     82
+f 101     20     102
+f 85     5     4
+f 20     18     103
+f 20     103     102
+f 18     104     103
+f 1     80     2
+f 37     106     105
+f 107     33     108
+f 33     109     108
+f 88     17     16
+f 15     90     16
+f 15     14     90
+f 94     12     11
+f 10     95     11
+f 95     10     96
+f 9     96     10
+f 9     8     96
+f 40     17     88
+f 100     22     101
+f 109     33     32
+f 32     31     109
+f 111     110     30
+f 31     30     110
+f 111     30     29
+f 100     23     22
+f 21     101     22
+f 37     36     106
+f 36     35     106
+f 19     47     45
+# 252 faces
+
+ #end of obj_0
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/mid.obj b/diffusion_policy/env/block_pushing/assets/suction/mid.obj
new file mode 100644
index 0000000..fdf238e
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/mid.obj
@@ -0,0 +1,2134 @@
+# Object Export From Tinkercad Server 2015
+
+mtllib obj.mtl
+
+o obj_0
+v 24.107         -29.374         0
+v 21.112         -31.596         0
+v 30.297         8.013         24.448
+v 17.913         -33.513         0
+v 26.312         25.97         20.313
+v 14.542         -35.107         0
+v 31.366         0         24.448
+v 29         0         24.7348
+v -29.4178         -24.048         15.875
+v -29.3593         -24.1232         15.875
+v -17.841         30.499         22.151
+v 23.821         -23.511         23.561
+v -9.235         34.018         22.151
+v 34.5141         -15.7959         17
+v 35.107         -14.542         17
+v 35.4949         -13.4585         17
+v 35.0704         -14.4701         17
+v 35.107         -14.542         16.5646
+v 19.086         -32.629         18.172
+v 34.7754         -15.2433         16.808
+v 36.871         -9.751         18.172
+v 0         35.218         22.151
+v 23.0418         -29.8391         17
+v 19.2451         -32.7147         17
+v 21.112         -31.596         17
+v 24.107         -29.374         17
+v 0         33.25         23.561
+v 27.224         26.87         15.875
+v -27.224         -26.87         15.875
+v -20.1874         32.1501         16.1531
+v -20.4974         31.9643         15.875
+v 38.172         0         18.172
+v 18.605         -31.807         20.313
+v -9.3502         36.7849         15.875
+v 35.943         -9.506         20.313
+v -9.965         36.705         15.875
+v 37.21         0         20.313
+v 17.84         -30.499         22.151
+v -10.342         36.5365         15.9699
+v -10.4325         36.5139         15.875
+v 27.751         -8.418         24.75
+v 26.792         -11.098         24.75
+v 26.792         -11.098         24.6977
+v 27.751         -8.418         24.7164
+v 34.465         -9.115         22.151
+v 35.681         0         22.151
+v 25.576         -13.67         24.7131
+v 22.179         -21.891         24.448
+v 33.687         0         23.561
+v 27.0371         26.6856         14.025
+v -27.0371         -26.6856         14.025
+v 37.188         -9.835         15.875
+v -32.6246         -19.3954         13.578
+v 20.1874         -32.1501         16.1531
+v 20.4974         -31.9643         15.875
+v 26.87         26.87         14.3533
+v 38.5         0         15.875
+v -26.87         -26.87         14.3533
+v -31.596         -21.112         15.2913
+v -32.4905         -19.6194         13.8015
+v 25.576         -13.67         24.75
+v -15.683         -26.811         24.448
+v 25.1718         -14.3444         24.7268
+v 24.6473         -15.2195         24.7117
+v -19.1645         32.763         14.6776
+v -36.871         -9.751         18.172
+v 24.113         -16.111         24.6963
+v -38.172         0         18.172
+v -35.4943         -13.4603         17
+v -35.107         -14.542         17
+v -35.1653         -14.2444         17
+v -35.3852         -13.7649         16.8777
+v -13.1265         13.1265         35.747
+v -18.625         18.625         24.75
+v -13.1265         -13.1265         35.747
+v 25.23         24.903         22.151
+v -9.9461         36.6357         15.3646
+v -9.4809         36.7522         15.7634
+v -18.625         -18.625         24.75
+v 16.844         -28.795         23.561
+v -34.5149         -15.7941         17
+v -35.107         -14.542         16.5651
+v -13         -13         35.747
+v -13         -10         35.747
+v -13         10         35.747
+v -13         13         35.747
+v -13.671         25.576         24.75
+v -16.112         24.113         24.75
+v -18.397         22.417         24.75
+v -20.506         20.506         24.75
+v -22.417         18.397         24.75
+v -24.113         16.112         24.75
+v -25.576         13.671         24.75
+v -35.943         -9.506         20.313
+v -26.793         11.098         24.75
+v -27.751         8.418         24.75
+v -37.211         0         20.313
+v -28.443         5.658         24.75
+v -28.86         2.843         24.75
+v 24.113         -16.111         24.75
+v -29         0         24.75
+v 36.6961         -9.7048         13.1747
+v 36.523         -10.3962         13.4177
+v 36.4086         -10.8531         13.578
+v -8.719         32.117         23.561
+v -28.86         -2.842         24.75
+v -28.443         -5.658         24.75
+v -27.751         -8.418         24.75
+v -26.793         -11.098         24.75
+v -25.576         -13.67         24.75
+v -24.113         -16.111         24.75
+v -22.417         -18.397         24.75
+v -20.506         -20.506         24.75
+v 36.871         -9.751         13.578
+v 22.417         -18.397         24.6885
+v -34.465         -9.115         22.151
+v -18.397         -22.417         24.75
+v -16.112         -24.113         24.75
+v -13.671         -25.576         24.75
+v -35.681         0         22.151
+v 37.9544         -0.9279         13.3691
+v -27.9473         25.6813         17
+v 37.8998         -2.0405         13.578
+v 19.1645         -32.763         14.6776
+v -28.6531         24.9024         16.4377
+v -8.118         29.904         24.448
+v 37.0837         -8.1571         13.578
+v -33.688         0         23.561
+v 38.172         0         13.578
+v 0         30.959         24.448
+v 22.417         -18.397         24.75
+v -25.23         -24.903         22.151
+v 20.6333         -20.3655         24.7131
+v 20.506         -20.506         24.7087
+v 0         29         24.6886
+v 38         0         13.1954
+v -38.5         0         15.875
+v -37.188         -9.835         15.875
+v 13         13         35.747
+v 15.683         -26.811         24.448
+v 11.098         26.793         24.75
+v 8.418         27.751         24.75
+v 5.658         28.443         24.75
+v 35.3852         -13.7649         14.8723
+v 2.842         28.86         24.75
+v 35.107         -14.542         15.1849
+v 0         29         24.75
+v 17.913         -33.513         17
+v 19.107         -32.7975         17
+v 18.2144         -33.3324         15.875
+v -2.843         28.86         24.75
+v -5.658         28.443         24.75
+v -8.418         27.751         24.75
+v -11.098         26.793         24.75
+v 28.6531         24.9024         15.3123
+v 19.25         -32.909         15.875
+v 20.506         -20.506         24.75
+v 36.364         -11.031         13.7743
+v 2.2501         28.8892         24.6663
+v 19.5525         -21.3699         24.6948
+v 18.397         -22.417         24.6779
+v 2.842         28.86         24.6604
+v 37.817         -3.725         14.5724
+v 37.3366         -6.964         14.2345
+v 37.27         -7.413         14.1876
+v 37.2447         -7.5139         14.105
+v 5.658         28.443         24.6667
+v 18.397         -22.417         24.75
+v 16.111         -24.113         24.6813
+v 7.5897         27.9587         24.6954
+v 8.418         27.751         24.6804
+v 18.3829         -33.2314         15.6624
+v 15.683         26.811         24.448
+v 16.111         -24.113         24.75
+v 33.058         -18.838         18.172
+v 14.6252         -25.0035         24.7044
+v -28.6531         -24.9024         15.3123
+v 13.67         -25.576         24.6836
+v 9.5463         27.3477         24.6737
+v 33.342         -19         15.875
+v 11.098         26.793         24.6646
+v 13.67         -25.576         24.75
+v 14.4322         -34.7087         17
+v 14.542         -35.107         17
+v -23.821         -23.511         23.561
+v 13         10         78
+v 13         -10         78
+v 13         -10         35.747
+v 13         10         35.747
+v 13         13         78
+v -36.6961         -9.7048         13.1747
+v -36.9012         -8.8859         13.3881
+v -37.0837         -8.1571         13.578
+v 13         -13         78
+v 13         -13         35.747
+v -38         0         0
+v -37.817         3.725         0
+v -37.27         7.413         0
+v -36.364         11.031         0
+v -35.107         14.542         0
+v -33.513         17.913         0
+v -31.596         21.112         0
+v -29.374         24.107         0
+v 32.9839         -18.7959         13.3878
+v 32.787         -19.1245         13.492
+v -37.8998         -2.0405         13.578
+v -38         0         13.195
+v 33.058         -18.838         13.578
+v -37.817         -3.725         0
+v -36.871         -9.751         13.578
+v -29.374         -24.107         0
+v -31.596         -21.112         0
+v -33.513         -17.913         0
+v -35.107         -14.542         0
+v -36.364         -11.031         0
+v -37.27         -7.413         0
+v -12.9343         25.9244         24.6781
+v -11.098         26.793         24.6646
+v -29         0         24.7348
+v -28.9728         0.5514         24.7292
+v 23.821         23.511         23.561
+v -28.86         2.843         24.7058
+v -38.172         0         13.578
+v 33.29         -18.2852         13.578
+v -26.87         26.87         0
+v -8.418         27.751         24.6804
+v -26.4007         27.2953         17
+v -26.992         26.641         18.172
+v -7.5897         27.9587         24.6954
+v -5.658         28.443         24.6667
+v 33.513         -17.913         14.0141
+v -36.4086         -10.8531         13.578
+v 33.3822         -18.1313         13.7583
+v -26.312         25.97         20.313
+v -5.3976         28.4816         24.6661
+v -2.843         28.86         24.6604
+v 29.374         24.107         0
+v 31.596         21.112         0
+v 33.513         17.913         0
+v 35.107         14.542         0
+v 36.364         11.031         0
+v 37.27         7.413         0
+v 37.817         3.725         0
+v 38         0         0
+v -29.374         24.107         17
+v -31.596         21.112         17
+v -37.7626         -4.0915         14.5336
+v -30.0813         22.9362         17
+v -37.817         -3.725         14.5719
+v 30.297         -8.013         24.448
+v -37.8341         -3.3776         14.3669
+v -13.671         -25.576         24.6835
+v 37.817         -3.725         0
+v 37.27         -7.413         0
+v 36.364         -11.031         0
+v 35.107         -14.542         0
+v 33.513         -17.913         0
+v 31.596         -21.112         0
+v 29.374         -24.107         0
+v -13         13         78
+v -22.179         -21.891         24.448
+v 14.542         35.107         0
+v 17.913         33.513         0
+v 21.112         31.596         0
+v 24.107         29.374         0
+v -10         10         35.747
+v -10         -10         35.747
+v 10         -10         35.747
+v 10         10         35.747
+v -10         10         78
+v -10         -10         78
+v 26.87         26.87         0
+v -31.921         20.5697         17
+v 10         10         78
+v 10         -10         78
+v -31.596         21.112         16.4587
+v 22.179         21.891         24.448
+v -37.27         -7.413         14.1873
+v 32.54         -8.606         23.561
+v 11.031         -36.364         0
+v -25.0698         28.5015         15.875
+v 7.413         -37.27         0
+v -27.224         26.87         15.875
+v 3.725         -37.817         0
+v -32.225         18.364         20.313
+v 0         -38         0
+v -3.725         -37.817         0
+v -7.413         -37.27         0
+v -11.031         -36.364         0
+v -30.901         17.609         22.151
+v -34.7754         -15.2433         14.942
+v 13.67         25.576         24.6836
+v -35.107         -14.542         15.1854
+v 8.719         -32.117         23.561
+v -36.0809         -11.8217         14.0923
+v -36.364         -11.031         13.7744
+v 13.67         25.576         24.75
+v 13         10         0
+v 13         13         0
+v 26.87         -26.87         0
+v -16.112         -24.113         24.6813
+v 29.374         -24.107         17
+v 31.596         -21.112         17
+v 30.0813         -22.9362         17
+v -14.6261         -25.0036         24.7044
+v -29.174         16.625         23.561
+v 8.118         -29.904         24.448
+v -33.058         -18.838         18.172
+v 31.921         -20.5697         17
+v 0         -30.959         24.448
+v 31.596         -21.112         16.4587
+v -25.6086         28.0132         15.419
+v -33.342         -19         15.875
+v 13         -13         0
+v 13         -10         0
+v 0         -29         24.6886
+v -27.0371         26.6856         14.025
+v -26.87         26.87         14.3533
+v 14.6252         25.0035         24.7044
+v 32.225         -18.364         20.313
+v -29.4178         24.048         15.875
+v 0         -38         17
+v 3.725         -37.817         17
+v 7.413         -37.27         17
+v 4.8389         -37.2129         17
+v 16.111         24.113         24.6813
+v 0         -37.8413         17
+v -29.3593         24.1232         15.875
+v 30.901         -17.609         22.151
+v -27.164         15.479         24.448
+v 9.88         -36.393         18.172
+v 0         -37.676         18.172
+v 16.111         24.113         24.75
+v 9.9234         -36.5522         17
+v 9.9461         -36.6357         16.3854
+v -16.2025         -24.0458         24.6812
+v 29.174         -16.625         23.561
+v 11.031         -36.364         17
+v -18.397         -22.417         24.6779
+v 33.058         18.838         18.172
+v -33.1638         -18.4957         13.4995
+v 18.397         22.417         24.6779
+v -33.29         -18.2852         13.578
+v 16.2014         24.0459         24.6812
+v -33.058         -18.838         13.578
+v -28.443         5.658         24.7109
+v 33.342         19         15.875
+v -32.9839         -18.7959         13.3878
+v 9.631         -35.476         20.313
+v 18.397         22.417         24.75
+v -27.164         -15.479         24.448
+v 29.4178         -24.048         15.875
+v 29.3593         -24.1232         15.875
+v 0         -36.727         20.313
+v -33.513         -17.913         14.0143
+v -32.6246         19.3954         13.578
+v -25.23         24.903         22.151
+v 20.506         20.506         24.7087
+v 9.235         -34.018         22.151
+v -20.506         -20.506         24.7087
+v 27.164         15.479         24.448
+v 0         -35.218         22.151
+v -31.6197         21.0725         15.252
+v -31.596         21.112         15.2914
+v 33.1638         18.4957         13.4995
+v 33.29         18.2852         13.578
+v 33.058         18.838         13.578
+v -14.542         -35.107         0
+v -17.913         -33.513         0
+v -21.112         -31.596         0
+v -24.107         -29.374         0
+v 0         -33.25         23.561
+v 32.9839         18.7959         13.3878
+v 20.506         20.506         24.75
+v -21.5138         -19.3937         24.7009
+v -22.417         -18.397         24.6885
+v -20.6333         -20.3655         24.7131
+v 32.6246         -19.3954         13.578
+v 21.5138         19.3937         24.7009
+v 22.417         18.397         24.6885
+v 33.513         17.913         14.0143
+v 20.6333         20.3655         24.7131
+v -32.54         8.606         23.561
+v 9.3502         -36.7849         15.875
+v -24.113         -16.111         24.6964
+v 31.596         -21.112         15.2914
+v 31.6197         -21.0725         15.252
+v 9.965         -36.705         15.875
+v -24.107         -29.374         17
+v -21.112         -31.596         17
+v 10.342         -36.5365         15.9699
+v -23.2383         -29.6906         17
+v 10.4325         -36.5139         15.875
+v 22.417         18.397         24.75
+v -19.086         -32.629         18.172
+v 36.871         9.751         18.172
+v -19.2451         -32.7147         17
+v -25.172         -14.344         24.7269
+v 35.4943         13.4603         17
+v -25.576         -13.67         24.7132
+v 35.107         14.542         17
+v 35.1653         14.2444         17
+v -30.297         8.013         24.448
+v 35.3852         13.7649         16.8777
+v 24.113         16.112         24.6962
+v -31.366         0         24.448
+v -18.605         -31.807         20.313
+v 34.5149         15.7941         17
+v 24.113         16.112         24.75
+v 19.25         32.909         15.875
+v 35.107         14.542         16.5651
+v 25.1722         14.3447         24.7267
+v 25.576         13.671         24.7131
+v 19.107         32.7975         17
+v 17.913         33.513         17
+v 18.3829         33.2314         16.0876
+v -25.6732         -13.4645         24.712
+v -26.793         -11.098         24.6976
+v -17.841         -30.499         22.151
+v 35.943         9.506         20.313
+v 25.576         13.671         24.75
+v -33.058         18.838         18.172
+v 18.2144         33.3324         15.875
+v -33.342         19         15.875
+v -27.751         8.418         24.7164
+v -28.0047         7.4061         24.7357
+v -27.8234         8.1291         24.7219
+v 25.6751         13.4612         24.7118
+v -27.751         -8.418         24.7164
+v 34.465         9.115         22.151
+v 26.792         11.098         24.6977
+v -20.4974         -31.9643         15.875
+v -28.1502         -6.8258         24.7275
+v -28.443         -5.658         24.7109
+v -28.0047         -7.4061         24.7357
+v 26.792         11.098         24.75
+v 27.751         8.418         24.7164
+v 9.9461         -36.6357         15.3646
+v 9.4809         -36.7522         15.7634
+v 27.751         8.418         24.75
+v -28.86         -2.842         24.7059
+v 28.1502         6.8258         24.7275
+v 28.443         5.658         24.7109
+v -32.9839         18.7959         13.3878
+v -32.787         19.1245         13.492
+v 28.0047         7.4061         24.7357
+v -33.058         18.838         13.578
+v -13         10         78
+v 37.188         9.835         15.875
+v -13         -10         78
+v 28.443         5.658         24.75
+v 14.542         35.107         17
+v -23.821         23.511         23.561
+v 14.6609         34.6154         17
+v -13         -13         78
+v 28.86         2.843         24.7058
+v -8.719         -32.117         23.561
+v 28.86         2.843         24.75
+v -33.29         18.2852         13.578
+v 29         0         24.75
+v 0         38         0
+v 3.725         37.817         0
+v 7.413         37.27         0
+v 11.031         36.364         0
+v -3.725         37.817         0
+v -8.118         -29.904         24.448
+v 28.9728         -0.5513         24.7292
+v -11.031         36.364         0
+v -7.413         37.27         0
+v 28.86         -2.842         24.7059
+v -19.1645         -32.763         14.6776
+v -20.1874         -32.1501         15.5969
+v -33.513         17.913         14.0141
+v -33.3822         18.1313         13.7583
+v -26.793         11.098         24.6976
+v -34.5141         15.7959         17
+v -35.107         14.542         17
+v -35.4949         13.4585         17
+v -35.0704         14.4701         17
+v -35.107         14.542         16.5646
+v -34.7754         15.2433         16.808
+v -14.542         -35.107         17
+v -36.871         9.751         18.172
+v -14.6609         -34.6154         17
+v -13         13         0
+v 24.107         29.374         17
+v 21.112         31.596         17
+v 23.2383         29.6906         17
+v 10         10         0
+v 10         -10         0
+v -10         -10         0
+v -10         10         0
+v -13         -10         0
+v -13         10         0
+v -11.031         -36.364         17
+v 19.086         32.629         18.172
+v -9.88         -36.393         18.172
+v 19.2451         32.7147         17
+v 27.9473         -25.6813         17
+v -22.179         21.891         24.448
+v -9.9234         -36.5522         17
+v 28.6531         -24.9024         16.4377
+v -9.9461         -36.6357         16.3854
+v -35.943         9.506         20.313
+v -7.413         -37.27         17
+v -3.725         -37.817         17
+v -5.0844         -37.1806         17
+v 36.6961         9.7048         13.1747
+v 36.9011         8.8863         13.3882
+v 18.605         31.807         20.313
+v 37.0837         8.1571         13.578
+v 37.8998         2.0405         13.578
+v -25.576         13.671         24.7132
+v -34.465         9.115         22.151
+v 36.871         9.751         13.578
+v -13         -13         0
+v -19.25         -32.909         15.875
+v -9.631         -35.476         20.313
+v -19.107         -32.7975         17
+v -17.913         -33.513         17
+v 17.84         30.499         22.151
+v -16.844         28.795         23.561
+v -18.3829         -33.2314         16.0876
+v -9.235         -34.018         22.151
+v -18.2144         -33.3324         15.875
+v 11.031         36.364         17
+v -37.188         9.835         15.875
+v 20.4974         31.9643         15.875
+v 9.88         36.393         18.172
+v 36.4086         10.8531         13.578
+v 9.9234         36.5522         17
+v -10.4325         -36.5139         15.875
+v 9.9461         36.6357         16.3854
+v -24.113         16.112         24.6963
+v -9.965         -36.705         15.875
+v -25.1725         14.3443         24.7268
+v -24.6486         15.2183         24.7117
+v 37.7626         4.0915         14.5336
+v 7.413         37.27         17
+v 37.817         3.725         14.5719
+v 3.725         37.817         17
+v 0         38         17
+v 0         37.8413         17
+v 5.0844         37.1806         17
+v -9.4809         -36.7522         15.9866
+v -9.3502         -36.7849         15.875
+v 37.8341         3.3776         14.3669
+v 28.86         -2.842         24.75
+v 0         37.676         18.172
+v 28.443         -5.658         24.7109
+v 27.164         -15.479         24.448
+v 9.631         35.476         20.313
+v 37.27         7.413         14.1873
+v -24.107         29.374         0
+v -21.112         31.596         0
+v -17.913         33.513         0
+v -14.542         35.107         0
+v 0         36.727         20.313
+v 9.235         34.018         22.151
+v 19.1645         32.763         14.6776
+v 20.1874         32.1501         15.5969
+v -22.417         18.397         24.6885
+v 35.107         14.542         15.1854
+v 34.7754         15.2433         14.942
+v 36.0809         11.8217         14.0923
+v 36.364         11.031         13.7744
+v -15.683         26.811         24.448
+v 10.4325         36.5139         15.875
+v -36.6961         9.7048         13.1747
+v -36.523         10.3962         13.4177
+v -36.4086         10.8531         13.578
+v 9.965         36.705         15.875
+v 26.4007         -27.2953         17
+v -26.87         -26.87         0
+v -36.871         9.751         13.578
+v 26.992         -26.641         18.172
+v 9.4809         36.7522         15.9866
+v 9.3502         36.7849         15.875
+v 31.921         20.5696         17
+v 31.596         21.112         17
+v 29.374         24.107         17
+v 30.232         22.7423         17
+v -37.9544         0.9279         13.3691
+v 31.596         21.112         16.4586
+v -37.8998         2.0405         13.578
+v 31.6197         21.0725         16.498
+v 26.312         -25.97         20.313
+v -20.6333         20.3655         24.7131
+v -20.506         20.506         24.7087
+v -37.0837         8.1571         13.578
+v 13.1265         13.1265         35.747
+v 13.1265         -13.1265         35.747
+v 18.625         18.625         24.75
+v 18.625         -18.625         24.75
+v -9.9461         -36.6357         15.3646
+v -10.342         -36.5365         15.7801
+v 25.0698         -28.5015         15.875
+v 32.225         18.364         20.313
+v 27.224         -26.87         15.875
+v -19.5525         21.3699         24.6948
+v -18.397         22.417         24.6779
+v -35.3852         13.7649         14.8723
+v -35.107         14.542         15.1849
+v 30.901         17.609         22.151
+v -14.4322         34.7087         17
+v -14.542         35.107         17
+v 0         -29         24.75
+v 2.842         -28.86         24.75
+v 5.658         -28.443         24.75
+v 8.418         -27.751         24.75
+v 11.098         -26.792         24.75
+v -16.112         24.113         24.6813
+v 28.443         -5.658         24.75
+v -11.098         -26.792         24.75
+v -8.418         -27.751         24.75
+v -5.658         -28.443         24.75
+v -36.364         11.031         13.7743
+v -2.843         -28.86         24.75
+v 29.174         16.625         23.561
+v -14.6261         25.0036         24.7044
+v -13.671         25.576         24.6836
+v -37.817         3.725         14.5724
+v -37.3366         6.964         14.2345
+v -37.27         7.413         14.1876
+v 25.6086         -28.0132         15.419
+v -37.2447         7.5139         14.105
+v 27.0371         -26.6856         14.025
+v 26.87         -26.87         14.3533
+v 12.9343         -25.9238         24.6782
+v 11.098         -26.792         24.6647
+v 9.9461         36.6357         15.3646
+v 10.342         36.5365         15.7801
+v 29.3593         24.1232         15.875
+v 29.4178         24.048         15.875
+v 16.844         28.795         23.561
+v 8.418         -27.751         24.6804
+v 27.8234         -8.1291         24.7219
+v 28.0047         -7.4061         24.7357
+v -30.297         -8.013         24.448
+v 7.5897         -27.9587         24.6954
+v 5.658         -28.443         24.6667
+v 25.6086         28.0132         16.331
+v 8.719         32.117         23.561
+v 25.0698         28.5015         15.875
+v 5.3977         -28.4816         24.6661
+v 2.842         -28.86         24.6604
+v -17.913         33.513         17
+v -19.107         32.7975         17
+v -18.2144         33.3324         15.875
+v -16.844         -28.795         23.561
+v 8.118         29.904         24.448
+v -32.54         -8.606         23.561
+v -19.25         32.909         15.875
+v -2.2503         -28.8892         24.6663
+v -2.843         -28.86         24.6604
+v -30.232         -22.7423         17
+v -31.921         -20.5696         17
+v -31.596         -21.112         17
+v -29.374         -24.107         17
+v -5.658         -28.443         24.6667
+v -25.6086         -28.0132         16.331
+v -31.596         -21.112         16.4586
+v -31.6197         -21.0725         16.498
+v -3.725         37.817         17
+v -7.413         37.27         17
+v -4.8389         37.2129         17
+v -25.0698         -28.5015         15.875
+v -7.5897         -27.9587         24.6954
+v -8.418         -27.751         24.6804
+v 25.23         -24.903         22.151
+v -18.3829         33.2314         15.6624
+v -9.88         36.393         18.172
+v -9.9234         36.5522         17
+v -9.9461         36.6357         16.3854
+v 32.6246         19.3954         13.578
+v -9.5447         -27.3478         24.6738
+v 27.9472         25.6814         17
+v -11.098         -26.792         24.6647
+v -32.225         -18.364         20.313
+v 26.992         26.641         18.172
+v -11.031         36.364         17
+v 31.596         21.112         15.2913
+v 32.4905         19.6194         13.8015
+v -30.901         -17.609         22.151
+v 26.4004         27.2956         17
+v -9.631         35.476         20.313
+v -19.086         32.629         18.172
+v -29.174         -16.625         23.561
+v -23.0418         29.8391         17
+v -19.2451         32.7147         17
+v -21.112         31.596         17
+v -24.107         29.374         17
+v -27.9472         -25.6814         17
+v 32.54         8.606         23.561
+v -26.992         -26.641         18.172
+v -18.605         31.807         20.313
+v -26.4004         -27.2956         17
+v -26.312         -25.97         20.313
+# 698 vertices
+
+g group_0_40919
+
+usemtl color_40919
+s 0
+
+f 139     190     189
+f 186     189     190
+f 139     86     190
+f 260     190     86
+f 266     269     270
+f 274     270     269
+f 85     448     86
+f 260     86     448
+f 448     270     260
+f 190     260     270
+f 190     270     274
+f 190     274     186
+f 494     485     492
+f 489     492     485
+f 485     299     489
+f 298     489     299
+# 16 faces
+
+g group_0_4634441
+
+usemtl color_4634441
+s 0
+
+f 186     187     188
+f 188     189     186
+f 269     268     274
+f 275     274     268
+f 186     274     275
+f 186     275     187
+f 489     298     490
+f 315     490     298
+# 8 faces
+
+g group_0_8273816
+
+usemtl color_8273816
+s 0
+
+f 267     266     271
+f 270     271     266
+f 84     448     85
+f 450     448     84
+f 271     270     448
+f 271     448     450
+f 492     491     493
+f 492     493     494
+# 8 faces
+
+g group_0_15277357
+
+usemtl color_15277357
+s 0
+
+f 188     187     194
+f 194     195     188
+f 268     267     275
+f 271     275     267
+f 83     455     84
+f 450     84     455
+f 271     455     275
+f 450     455     271
+f 455     194     275
+f 187     275     194
+f 493     491     516
+f 314     516     491
+f 314     491     490
+f 314     490     315
+f 455     195     194
+f 83     195     455
+# 16 faces
+
+g group_0_16089887
+
+usemtl color_16089887
+s 0
+
+f 7     456     8
+f 16     17     15
+f 14     15     17
+f 15     18     16
+f 15     14     20
+f 15     20     18
+f 17     16     21
+f 24     25     23
+f 23     25     26
+f 18     52     16
+f 20     180     18
+f 52     18     180
+f 24     54     25
+f 691     30     31
+f 21     175     17
+f 14     17     175
+f 24     23     19
+f 180     20     14
+f 19     156     24
+f 21     52     32
+f 469     78     34
+f 175     21     35
+f 21     32     37
+f 21     37     35
+f 468     681     40
+f 40     681     39
+f 36     40     39
+f 43     44     42
+f 41     42     44
+f 40     77     468
+f 35     46     45
+f 9     59     211
+f 211     574     51
+f 35     37     46
+f 556     555     65
+f 46     49     45
+f 2     1     25
+f 25     54     55
+f 21     16     52
+f 59     60     212
+f 57     32     52
+f 26     25     1
+f 124     2     55
+f 212     211     59
+f 644     28     56
+f 50     56     28
+f 38     12     80
+f 51     58     29
+f 61     47     42
+f 59     313     60
+f 53     212     60
+f 47     43     42
+f 691     31     555
+f 68     137     66
+f 692     691     554
+f 69     70     71
+f 244     136     253
+f 65     555     31
+f 69     72     70
+f 69     71     66
+f 66     71     308
+f 76     598     5
+f 73     79     75
+f 64     67     48
+f 78     469     77
+f 66     138     69
+f 74     79     73
+f 70     81     71
+f 82     81     70
+f 78     77     36
+f 72     82     70
+f 73     75     84
+f 85     73     84
+f 86     73     85
+f 83     84     75
+f 71     81     308
+f 81     82     313
+f 81     313     308
+f 80     12     48
+f 90     74     89
+f 89     74     88
+f 91     74     90
+f 87     88     74
+f 34     78     36
+f 61     100     63
+f 63     47     61
+f 163     164     253
+f 469     468     77
+f 94     97     68
+f 95     79     93
+f 93     79     92
+f 96     79     95
+f 98     79     96
+f 99     79     98
+f 101     79     99
+f 79     74     92
+f 91     92     74
+f 77     40     36
+f 64     63     100
+f 67     64     100
+f 103     255     102
+f 104     255     103
+f 255     254     102
+f 115     48     67
+f 66     94     68
+f 146     256     144
+f 144     256     158
+f 22     13     105
+f 134     160     48
+f 133     134     48
+f 108     79     107
+f 106     107     79
+f 109     79     108
+f 110     79     109
+f 111     79     110
+f 112     79     111
+f 113     79     112
+f 117     79     113
+f 101     106     79
+f 103     102     114
+f 48     115     133
+f 22     105     27
+f 97     94     116
+f 127     102     254
+f 79     117     118
+f 79     118     119
+f 114     104     103
+f 116     120     97
+f 695     656     693
+f 124     4     2
+f 80     48     140
+f 163     253     123
+f 121     123     253
+f 105     126     27
+f 254     253     164
+f 165     166     254
+f 27     126     130
+f 100     131     67
+f 25     55     2
+f 115     67     131
+f 652     120     116
+f 123     121     129
+f 122     245     248
+f 102     127     114
+f 122     125     245
+f 422     228     248
+f 217     218     126
+f 218     226     126
+f 66     137     138
+f 72     69     138
+f 48     160     140
+f 591     74     73
+f 86     139     591
+f 86     591     73
+f 80     359     38
+f 121     136     129
+f 132     419     650
+f 140     294     80
+f 145     74     143
+f 142     143     74
+f 147     74     145
+f 141     142     74
+f 237     272     50
+f 148     4     150
+f 148     150     149
+f 153     74     152
+f 154     74     153
+f 87     74     154
+f 151     152     74
+f 147     151     74
+f 156     19     149
+f 183     149     19
+f 155     237     50
+f 146     144     52
+f 150     156     149
+f 313     82     138
+f 82     72     138
+f 52     180     146
+f 157     134     133
+f 115     131     133
+f 255     104     158
+f 157     133     131
+f 50     28     155
+f 651     130     159
+f 160     161     140
+f 159     162     651
+f 162     167     651
+f 172     156     150
+f 114     52     104
+f 158     104     52
+f 144     158     52
+f 145     162     159
+f 135     147     159
+f 145     159     147
+f 51     177     211
+f 55     54     156
+f 165     254     164
+f 156     124     55
+f 171     179     651
+f 127     254     166
+f 24     156     54
+f 170     651     167
+f 129     57     123
+f 161     160     168
+f 157     168     160
+f 163     123     57
+f 157     160     134
+f 164     163     57
+f 140     161     169
+f 114     127     166
+f 143     167     145
+f 145     167     162
+f 10     29     693
+f 184     6     4
+f 179     173     651
+f 169     176     140
+f 124     172     4
+f 150     4     172
+f 174     169     168
+f 57     52     164
+f 165     164     52
+f 114     166     52
+f 58     51     574
+f 161     168     169
+f 165     52     166
+f 142     171     170
+f 142     170     143
+f 143     170     167
+f 176     178     140
+f 216     215     191
+f 156     172     124
+f 179     181     173
+f 177     29     10
+f 141     181     179
+f 171     142     179
+f 141     179     142
+f 184     148     183
+f 149     183     148
+f 177     10     211
+f 182     178     176
+f 176     174     182
+f 319     326     173
+f 177     51     29
+f 14     175     180
+f 176     169     174
+f 141     292     181
+f 35     320     175
+f 181     292     173
+f 193     216     192
+f 191     192     216
+f 349     33     38
+f 216     278     209
+f 185     132     650
+f 198     494     197
+f 205     204     208
+f 196     209     207
+f 4     148     184
+f 224     204     257
+f 192     191     210
+f 204     224     208
+f 197     494     196
+f 493     516     214
+f 214     215     493
+f 216     493     215
+f 583     197     207
+f 222     346     403
+f 426     403     346
+f 154     218     217
+f 621     87     217
+f 154     217     87
+f 257     256     231
+f 99     222     220
+f 219     101     220
+f 99     220     101
+f 231     233     257
+f 193     192     210
+f 635     221     76
+f 223     207     206
+f 387     378     180
+f 233     231     180
+f 208     233     180
+f 153     226     154
+f 218     154     226
+f 689     227     228
+f 208     224     233
+f 283     227     281
+f 229     230     126
+f 235     126     230
+f 126     226     229
+f 191     215     232
+f 152     229     153
+f 293     295     214
+f 635     173     221
+f 152     230     229
+f 226     153     229
+f 62     261     185
+f 122     248     228
+f 277     173     344
+f 231     146     180
+f 224     257     233
+f 301     336     62
+f 130     235     236
+f 232     210     191
+f 130     126     235
+f 206     207     209
+f 151     236     235
+f 151     235     152
+f 319     173     292
+f 62     305     301
+f 235     230     152
+f 228     285     234
+f 246     248     245
+f 249     209     247
+f 228     422     285
+f 130     236     135
+f 239     299     238
+f 240     299     239
+f 241     298     240
+f 242     298     241
+f 253     315     244
+f 243     298     242
+f 244     298     243
+f 249     251     209
+f 62     252     305
+f 245     276     246
+f 135     236     151
+f 135     151     147
+f 125     328     245
+f 266     267     268
+f 266     268     269
+f 265     299     264
+f 263     264     299
+f 137     249     138
+f 246     273     248
+f 247     138     249
+f 237     238     299
+f 8     467     7
+f 7     467     250
+f 246     276     273
+f 249     137     251
+f 223     251     137
+f 467     470     250
+f 193     278     216
+f 248     273     422
+f 261     62     336
+f 247     209     278
+f 251     206     209
+f 221     173     277
+f 276     424     273
+f 228     227     283
+f 193     210     138
+f 193     138     278
+f 247     278     138
+f 344     342     277
+f 45     49     279
+f 125     122     283
+f 283     122     228
+f 326     344     173
+f 223     206     251
+f 279     250     337
+f 279     7     250
+f 279     49     7
+f 215     214     296
+f 295     296     214
+f 297     292     141
+f 296     232     215
+f 299     240     298
+f 254     315     253
+f 234     285     290
+f 255     315     254
+f 1     314     300
+f 257     314     256
+f 210     232     296
+f 210     296     295
+f 259     300     314
+f 303     304     302
+f 285     514     290
+f 2     314     1
+f 259     352     302
+f 303     302     311
+f 293     291     313
+f 138     293     313
+f 140     307     294
+f 314     2     4
+f 293     138     295
+f 210     295     138
+f 304     175     576
+f 303     309     304
+f 303     311     309
+f 312     554     318
+f 261     336     339
+f 304     309     175
+f 256     315     255
+f 258     259     314
+f 257     258     314
+f 315     256     314
+f 298     244     315
+f 645     646     310
+f 283     281     312
+f 311     180     309
+f 309     180     175
+f 640     641     307
+f 310     307     645
+f 318     225     317
+f 629     630     307
+f 688     185     261
+f 317     283     318
+f 312     318     283
+f 307     140     629
+f 261     351     688
+f 276     245     321
+f 324     325     323
+f 338     334     324
+f 325     327     323
+f 322     323     327
+f 305     118     301
+f 679     66     308
+f 119     118     305
+f 292     297     319
+f 119     305     252
+f 333     319     297
+f 424     276     321
+f 66     679     94
+f 331     332     325
+f 327     325     332
+f 121     253     136
+f 319     333     326
+f 325     324     334
+f 125     283     328
+f 320     35     45
+f 338     324     335
+f 255     158     256
+f 320     45     329
+f 146     231     256
+f 334     331     325
+f 212     348     213
+f 212     53     348
+f 379     361     277
+f 377     375     261
+f 279     337     329
+f 184     183     338
+f 334     338     183
+f 360     261     339
+f 329     45     279
+f 291     214     355
+f 213     355     214
+f 293     214     291
+f 343     213     341
+f 355     213     343
+f 335     391     338
+f 334     183     331
+f 19     331     183
+f 341     345     343
+f 222     99     346
+f 98     346     99
+f 339     336     117
+f 118     117     336
+f 118     336     301
+f 445     202     444
+f 332     331     349
+f 213     348     341
+f 344     350     342
+f 311     302     352
+f 348     345     341
+f 326     333     344
+f 350     344     333
+f 259     302     353
+f 349     354     332
+f 347     579     340
+f 364     203     202
+f 348     53     345
+f 328     317     203
+f 352     180     311
+f 379     380     361
+f 687     228     234
+f 33     349     331
+f 364     202     363
+f 382     379     277
+f 375     351     261
+f 331     19     33
+f 277     342     358
+f 277     358     382
+f 354     349     359
+f 345     313     343
+f 291     355     313
+f 343     313     355
+f 261     360     377
+f 345     60     313
+f 359     362     354
+f 345     53     60
+f 113     360     117
+f 234     290     357
+f 245     203     321
+f 364     321     203
+f 349     38     359
+f 117     360     339
+f 365     366     239
+f 381     239     366
+f 9     313     59
+f 225     203     317
+f 245     328     203
+f 294     362     359
+f 357     290     306
+f 257     204     258
+f 564     240     381
+f 372     362     294
+f 398     400     351
+f 367     366     365
+f 365     239     373
+f 204     205     258
+f 294     359     80
+f 321     364     424
+f 363     424     364
+f 342     350     358
+f 373     367     365
+f 375     376     351
+f 372     294     307
+f 374     358     350
+f 310     372     307
+f 283     317     328
+f 258     205     378
+f 112     376     375
+f 377     113     375
+f 112     375     113
+f 113     377     360
+f 383     306     290
+f 384     335     324
+f 208     378     205
+f 367     347     366
+f 381     366     347
+f 347     564     381
+f 453     357     306
+f 378     387     258
+f 386     258     387
+f 335     384     388
+f 352     259     386
+f 258     386     259
+f 330     453     306
+f 184     338     280
+f 400     417     351
+f 403     306     383
+f 338     391     393
+f 338     393     280
+f 390     392     389
+f 227     692     281
+f 351     376     385
+f 370     432     390
+f 388     393     391
+f 380     379     394
+f 374     394     379
+f 379     382     374
+f 390     389     370
+f 208     180     378
+f 306     403     330
+f 367     683     347
+f 111     385     112
+f 376     112     385
+f 374     382     358
+f 386     387     180
+f 398     351     385
+f 390     397     392
+f 388     391     335
+f 352     386     180
+f 417     418     639
+f 396     32     57
+f 361     412     413
+f 413     428     361
+f 639     351     417
+f 399     401     402
+f 397     395     392
+f 318     554     225
+f 432     517     397
+f 401     399     404
+f 110     398     111
+f 412     361     405
+f 184     280     6
+f 312     281     554
+f 399     402     396
+f 402     340     396
+f 380     405     361
+f 383     406     403
+f 110     400     398
+f 385     111     398
+f 396     449     399
+f 323     284     324
+f 401     408     402
+f 380     394     405
+f 409     405     394
+f 323     286     284
+f 411     408     401
+f 3     361     428
+f 401     404     411
+f 402     408     340
+f 281     692     554
+f 416     423     415
+f 414     416     415
+f 282     324     284
+f 428     431     3
+f 411     347     408
+f 408     347     340
+f 426     427     403
+f 330     403     427
+f 418     429     639
+f 529     496     454
+f 109     418     417
+f 109     417     110
+f 420     37     32
+f 219     220     406
+f 220     403     406
+f 536     537     330
+f 110     417     400
+f 421     413     412
+f 410     416     414
+f 220     222     403
+f 405     409     412
+f 396     420     32
+f 421     412     409
+f 439     384     282
+f 410     423     416
+f 425     475     330
+f 475     513     330
+f 273     424     422
+f 435     639     429
+f 37     420     430
+f 286     323     322
+f 418     109     429
+f 108     429     109
+f 330     427     425
+f 397     390     432
+f 476     422     424
+f 406     433     434
+f 430     46     37
+f 3     437     446
+f 406     639     433
+f 96     425     427
+f 96     427     426
+f 96     426     98
+f 431     428     436
+f 421     436     428
+f 421     428     413
+f 504     285     422
+f 439     282     438
+f 426     346     98
+f 514     285     504
+f 107     434     433
+f 433     435     107
+f 324     282     384
+f 107     435     108
+f 7     3     442
+f 439     438     388
+f 435     429     108
+f 436     440     437
+f 436     437     431
+f 434     441     406
+f 438     282     280
+f 106     441     434
+f 106     434     107
+f 696     234     357
+f 438     393     388
+f 7     442     443
+f 443     456     7
+f 384     439     388
+f 441     219     406
+f 201     444     202
+f 356     363     202
+f 356     202     445
+f 263     262     415
+f 357     453     11
+f 446     442     3
+f 447     445     444
+f 441     106     219
+f 101     219     106
+f 415     423     263
+f 396     57     449
+f 393     438     280
+f 451     443     442
+f 347     411     449
+f 440     451     446
+f 442     446     451
+f 415     452     454
+f 414     415     454
+f 57     547     540
+f 540     449     57
+f 446     437     440
+f 356     445     447
+f 456     443     458
+f 451     458     443
+f 404     399     449
+f 444     201     459
+f 473     474     201
+f 452     526     454
+f 458     460     8
+f 458     8     456
+f 237     299     272
+f 262     263     299
+f 272     299     265
+f 449     411     404
+f 444     459     447
+f 638     637     250
+f 363     356     424
+f 447     424     356
+f 457     310     466
+f 330     500     453
+f 457     372     310
+f 470     550     250
+f 638     250     550
+f 370     472     432
+f 463     299     462
+f 462     299     461
+f 389     371     370
+f 262     299     464
+f 512     136     243
+f 464     299     463
+f 485     461     299
+f 527     424     603
+f 447     474     424
+f 330     537     500
+f 473     424     474
+f 459     201     474
+f 459     474     447
+f 465     485     469
+f 536     330     513
+f 478     479     477
+f 476     477     479
+f 576     175     320
+f 477     480     478
+f 95     475     425
+f 95     425     96
+f 480     477     481
+f 476     481     477
+f 262     452     415
+f 11     696     357
+f 479     478     483
+f 316     654     310
+f 466     310     654
+f 461     485     465
+f 487     488     486
+f 587     320     329
+f 489     490     491
+f 489     491     492
+f 654     655     466
+f 264     528     487
+f 480     527     478
+f 505     501     495
+f 481     424     480
+f 527     480     424
+f 453     522     11
+f 482     495     484
+f 395     497     518
+f 483     422     479
+f 476     479     422
+f 286     314     284
+f 282     284     314
+f 6     280     314
+f 280     282     314
+f 487     498     488
+f 499     302     304
+f 395     518     407
+f 424     481     476
+f 501     484     495
+f 244     243     136
+f 499     502     302
+f 565     566     240
+f 505     495     503
+f 576     499     304
+f 410     496     498
+f 498     496     488
+f 4     6     314
+f 497     484     501
+f 414     454     496
+f 322     327     506
+f 505     506     507
+f 507     506     327
+f 501     505     507
+f 410     414     496
+f 287     516     286
+f 353     302     502
+f 422     483     504
+f 511     242     509
+f 508     509     242
+f 508     242     241
+f 483     68     97
+f 503     545     505
+f 545     546     505
+f 370     516     369
+f 507     327     332
+f 368     516     289
+f 483     97     504
+f 552     496     529
+f 332     497     507
+f 501     507     497
+f 504     120     514
+f 513     95     93
+f 504     97     120
+f 509     508     515
+f 288     289     516
+f 287     288     516
+f 314     286     516
+f 395     397     517
+f 496     552     510
+f 95     513     475
+f 484     519     520
+f 484     520     482
+f 530     508     241
+f 482     520     369
+f 520     519     523
+f 523     525     520
+f 497     354     518
+f 497     332     354
+f 497     395     484
+f 519     484     395
+f 517     519     395
+f 290     514     383
+f 136     512     129
+f 559     521     510
+f 514     120     128
+f 524     419     407
+f 599     502     499
+f 664     465     469
+f 517     523     519
+f 522     453     500
+f 514     128     383
+f 511     509     515
+f 518     524     407
+f 664     461     465
+f 128     406     383
+f 518     362     524
+f 369     520     525
+f 525     471     369
+f 539     531     526
+f 517     525     523
+f 518     354     362
+f 239     381     240
+f 599     353     502
+f 524     457     419
+f 483     478     527
+f 498     487     528
+f 563     565     240
+f 524     372     457
+f 527     68     483
+f 264     487     486
+f 524     362     372
+f 482     369     368
+f 137     68     527
+f 531     454     526
+f 495     289     532
+f 532     503     495
+f 515     508     530
+f 528     410     498
+f 353     627     259
+f 539     526     533
+f 495     482     289
+f 471     472     370
+f 370     369     471
+f 500     537     534
+f 503     532     535
+f 529     454     531
+f 199     617     200
+f 472     471     517
+f 603     473     200
+f 540     243     538
+f 242     553     243
+f 542     543     541
+f 539     541     544
+f 544     541     543
+f 531     539     544
+f 535     546     545
+f 540     547     243
+f 57     129     547
+f 548     470     467
+f 8     460     467
+f 548     467     460
+f 533     577     539
+f 92     534     537
+f 92     537     536
+f 92     536     93
+f 517     432     472
+f 544     543     549
+f 531     544     529
+f 93     536     513
+f 549     529     544
+f 535     545     503
+f 449     540     538
+f 471     525     517
+f 242     511     553
+f 538     243     553
+f 556     557     485
+f 555     556     485
+f 469     485     468
+f 547     512     243
+f 569     199     198
+f 529     558     552
+f 423     560     263
+f 337     250     551
+f 225     554     485
+f 529     549     558
+f 200     494     199
+f 511     515     449
+f 522     500     567
+f 553     511     449
+f 202     203     485
+f 538     553     449
+f 552     22     559
+f 201     200     473
+f 589     600     500
+f 588     589     500
+f 129     512     547
+f 552     558     22
+f 623     198     197
+f 554     555     485
+f 557     468     485
+f 528     264     561
+f 560     561     264
+f 560     264     263
+f 200     201     485
+f 563     240     564
+f 196     207     197
+f 534     562     500
+f 265     264     486
+f 559     27     643
+f 562     534     91
+f 241     240     566
+f 22     27     559
+f 622     623     197
+f 566     530     241
+f 92     91     534
+f 561     560     410
+f 595     289     288
+f 526     464     568
+f 526     568     533
+f 105     567     126
+f 515     530     566
+f 515     566     565
+f 482     368     289
+f 570     199     569
+f 571     199     570
+f 452     262     464
+f 526     452     464
+f 201     202     485
+f 203     225     485
+f 485     494     200
+f 493     196     494
+f 12     337     551
+f 602     200     617
+f 572     533     568
+f 286     322     287
+f 561     410     528
+f 211     516     574
+f 563     564     347
+f 449     563     347
+f 288     287     505
+f 570     569     575
+f 560     423     410
+f 573     597     599
+f 23     573     576
+f 505     546     288
+f 563     449     565
+f 515     565     449
+f 624     626     198
+f 577     578     539
+f 576     573     599
+f 572     578     577
+f 581     582     580
+f 579     580     582
+f 575     571     570
+f 371     574     516
+f 559     510     552
+f 581     580     584
+f 584     634     581
+f 622     197     585
+f 585     197     583
+f 600     601     567
+f 287     506     505
+f 580     579     586
+f 580     586     584
+f 572     631     578
+f 600     567     500
+f 572     577     533
+f 569     198     590
+f 626     590     198
+f 19     576     587
+f 500     562     588
+f 347     584     586
+f 506     287     322
+f 213     214     516
+f 493     216     209
+f 587     576     320
+f 541     463     462
+f 589     588     90
+f 562     91     588
+f 680     340     582
+f 579     582     340
+f 90     588     91
+f 594     593     592
+f 591     592     593
+f 199     494     198
+f 585     583     223
+f 596     289     595
+f 532     289     596
+f 593     74     591
+f 569     590     575
+f 586     579     347
+f 620     621     567
+f 79     594     75
+f 596     595     535
+f 396     340     598
+f 139     189     591
+f 592     591     189
+f 592     189     188
+f 592     188     195
+f 209     196     493
+f 212     213     516
+f 368     369     516
+f 370     371     516
+f 516     211     212
+f 598     420     396
+f 532     596     535
+f 583     207     223
+f 462     461     541
+f 499     576     599
+f 567     601     612
+f 199     571     617
+f 595     288     546
+f 440     593     451
+f 436     593     440
+f 458     451     593
+f 421     593     436
+f 409     593     421
+f 394     593     409
+f 374     593     394
+f 350     333     593
+f 460     458     593
+f 603     200     602
+f 89     601     600
+f 89     600     90
+f 595     546     535
+f 90     600     589
+f 539     463     541
+f 592     75     594
+f 606     647     605
+f 648     605     647
+f 83     75     195
+f 592     195     75
+f 74     593     333
+f 141     74     297
+f 333     297     74
+f 374     350     593
+f 603     602     527
+f 598     604     420
+f 607     608     79
+f 608     609     79
+f 79     609     610
+f 79     610     611
+f 79     611     182
+f 473     603     424
+f 601     89     612
+f 61     593     100
+f 42     593     61
+f 131     100     594
+f 41     593     42
+f 613     593     41
+f 548     593     613
+f 460     593     548
+f 594     100     593
+f 157     131     594
+f 616     79     615
+f 614     615     79
+f 607     79     618
+f 618     79     616
+f 119     614     79
+f 598     76     604
+f 88     612     89
+f 300     259     627
+f 621     217     567
+f 182     174     79
+f 168     594     174
+f 79     174     594
+f 594     168     157
+f 617     571     527
+f 527     571     575
+f 567     612     620
+f 602     617     527
+f 621     620     87
+f 619     604     221
+f 685     642     486
+f 624     198     623
+f 76     221     604
+f 87     620     88
+f 612     88     620
+f 496     680     488
+f 126     567     217
+f 223     137     585
+f 361     694     619
+f 625     599     597
+f 221     277     619
+f 622     585     137
+f 178     629     140
+f 300     627     628
+f 619     277     361
+f 623     622     137
+f 575     590     626
+f 627     599     628
+f 631     464     463
+f 539     578     463
+f 631     463     578
+f 630     636     307
+f 625     628     599
+f 632     464     631
+f 568     464     632
+f 419     457     650
+f 510     521     5
+f 611     630     629
+f 611     629     182
+f 137     527     623
+f 624     623     527
+f 575     626     527
+f 457     466     650
+f 353     599     627
+f 521     76     5
+f 632     631     572
+f 629     178     182
+f 624     527     626
+f 548     613     550
+f 548     550     470
+f 634     584     347
+f 640     307     636
+f 392     697     389
+f 521     635     76
+f 581     237     633
+f 568     632     572
+f 630     611     636
+f 681     468     557
+f 667     389     661
+f 661     389     697
+f 461     542     541
+f 610     636     611
+f 347     682     634
+f 697     392     695
+f 395     695     392
+f 645     307     641
+f 128     639     406
+f 695     395     407
+f 641     640     609
+f 610     609     640
+f 610     640     636
+f 559     643     521
+f 435     433     639
+f 637     44     551
+f 44     43     551
+f 606     681     557
+f 551     250     637
+f 644     642     28
+f 646     645     608
+f 609     608     645
+f 329     670     587
+f 609     645     641
+f 647     556     649
+f 647     649     648
+f 310     646     316
+f 329     337     670
+f 608     607     316
+f 608     316     646
+f 653     687     648
+f 652     128     120
+f 649     653     648
+f 56     272     265
+f 30     690     653
+f 64     48     551
+f 652     639     128
+f 643     130     651
+f 655     660     466
+f 671     653     649
+f 265     644     56
+f 351     639     652
+f 670     337     12
+f 618     655     654
+f 316     607     654
+f 618     654     607
+f 12     551     48
+f 643     27     130
+f 642     644     486
+f 31     30     653
+f 170     171     651
+f 675     373     238
+f 26     573     23
+f 653     65     31
+f 466     668     669
+f 597     573     26
+f 657     658     656
+f 658     659     656
+f 656     659     693
+f 239     238     373
+f 660     668     466
+f 135     159     130
+f 576     19     23
+f 658     662     659
+f 644     265     486
+f 662     658     663
+f 657     663     658
+f 587     33     19
+f 665     666     664
+f 681     673     665
+f 666     543     664
+f 542     664     543
+f 655     618     660
+f 616     660     618
+f 634     682     237
+f 469     34     665
+f 34     674     665
+f 313     662     663
+f 669     676     466
+f 676     62     466
+f 665     664     469
+f 556     647     557
+f 667     661     29
+f 635     521     643
+f 542     461     664
+f 33     670     38
+f 676     678     62
+f 657     308     313
+f 649     556     671
+f 65     671     556
+f 635     643     651
+f 672     549     666
+f 543     666     549
+f 389     667     371
+f 606     557     647
+f 695     308     656
+f 656     308     657
+f 669     668     615
+f 616     615     668
+f 651     173     635
+f 670     33     587
+f 666     665     673
+f 616     668     660
+f 663     657     313
+f 681     665     674
+f 674     39     681
+f 56     50     272
+f 683     675     238
+f 682     683     238
+f 653     671     65
+f 581     677     582
+f 633     677     581
+f 373     675     367
+f 673     672     666
+f 678     676     614
+f 615     614     676
+f 615     676     669
+f 698     679     308
+f 680     582     677
+f 677     28     680
+f 36     674     34
+f 633     237     155
+f 252     62     678
+f 667     58     371
+f 670     12     38
+f 606     605     681
+f 673     681     605
+f 678     614     252
+f 238     237     682
+f 581     634     237
+f 695     698     308
+f 633     28     677
+f 574     371     58
+f 119     252     614
+f 11     13     686
+f 633     155     28
+f 13     522     105
+f 58     667     29
+f 687     672     605
+f 673     605     672
+f 488     685     486
+f 11     522     13
+f 94     684     116
+f 36     39     674
+f 682     347     683
+f 567     105     522
+f 300     628     1
+f 679     684     94
+f 549     672     686
+f 605     648     687
+f 679     698     132
+f 62     185     650
+f 367     675     683
+f 679     132     684
+f 684     688     116
+f 625     1     628
+f 604     430     420
+f 488     680     685
+f 28     685     680
+f 686     558     549
+f 116     688     652
+f 62     650     466
+f 690     691     689
+f 691     692     689
+f 689     692     227
+f 185     688     684
+f 642     685     28
+f 132     185     684
+f 555     554     691
+f 351     652     688
+f 340     680     5
+f 597     26     1
+f 690     30     691
+f 29     695     693
+f 46     430     694
+f 510     5     680
+f 211     10     659
+f 693     659     10
+f 659     662     9
+f 659     9     211
+f 604     619     430
+f 558     686     13
+f 662     313     9
+f 340     5     598
+f 228     687     689
+f 690     689     687
+f 653     690     687
+f 694     430     619
+f 680     496     510
+f 29     697     695
+f 13     22     558
+f 672     696     686
+f 625     597     1
+f 637     638     41
+f 44     637     41
+f 696     672     687
+f 638     550     613
+f 613     41     638
+f 29     661     697
+f 63     64     551
+f 687     234     696
+f 47     63     551
+f 3     431     437
+f 47     551     43
+f 686     696     11
+f 695     407     698
+f 694     49     46
+f 698     407     419
+f 3     49     694
+f 419     132     698
+f 694     361     3
+f 49     3     7
+# 1344 faces
+
+ #end of obj_0
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/suction-base.urdf b/diffusion_policy/env/block_pushing/assets/suction/suction-base.urdf
new file mode 100644
index 0000000..fa5abde
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/suction-base.urdf
@@ -0,0 +1,69 @@
+<?xml version="0.0" ?>
+<robot name="suction-base.urdf">
+  <material name="DarkGrey">
+    <color rgba="0.2 0.2 0.2 1.0"/>
+  </material>
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="base.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="DarkGrey"/>
+    </visual>
+    <!-- <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="base.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision> -->
+  </link>
+
+  <joint name="headJoint" type="fixed">
+    <parent link="baseLink"/>
+    <child link="midLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.015"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint>
+
+  <link name="midLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="mid.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="DarkGrey"/>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="mid.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/suction-head-long.urdf b/diffusion_policy/env/block_pushing/assets/suction/suction-head-long.urdf
new file mode 100644
index 0000000..b743b89
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/suction-head-long.urdf
@@ -0,0 +1,101 @@
+<?xml version="0.0" ?>
+<robot name="suction-head.urdf">
+  <link name="headLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="darkgrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <joint name="tipJoint" type="fixed">
+    <parent link="headLink"/>
+    <child link="tipLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.029"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint>
+
+  <link name="tipLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="tip.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="blue">
+        <color rgba="0.18039216, 0.50588235, 0.77254902 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <!-- <geometry>
+        <mesh filename="tip.obj" scale="0.001 0.001 0.001"/>
+      </geometry> -->
+      <geometry>
+        <cylinder length="0.028" radius="0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <!-- <link name="asdfLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <cylinder length="0.028" radius="0.001"/>
+      </geometry>
+    </visual>
+  </link>
+
+  <joint name="asdfoint" type="fixed">
+    <parent link="tipLink"/>
+    <child link="asdfLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.0"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint> -->
+
+
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/suction-head.urdf b/diffusion_policy/env/block_pushing/assets/suction/suction-head.urdf
new file mode 100644
index 0000000..38e89cb
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/suction-head.urdf
@@ -0,0 +1,70 @@
+<?xml version="0.0" ?>
+<robot name="suction-head.urdf">
+  <link name="headLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="darkgrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="head.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+  <joint name="tipJoint" type="fixed">
+    <parent link="headLink"/>
+    <child link="tipLink"/>
+    <origin rpy="0.0 0.0 0.0" xyz="0.0 0.0 0.029"/>
+    <axis xyz="0 0 1"/>
+    <limit effort="150.0" lower="-6.28318530718" upper="6.28318530718" velocity="3.15"/>
+    <dynamics damping="10.0" friction="0.0"/>
+  </joint>
+
+  <link name="tipLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <rolling_friction value="0.0001"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="tip.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+      <material name="blue">
+        <color rgba="0.18039216, 0.50588235, 0.77254902 1"/>
+      </material>
+    </visual>
+    <collision>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="tip.obj" scale="0.001 0.001 0.001"/>
+      </geometry>
+    </collision>
+  </link>
+
+
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/suction/tip.obj b/diffusion_policy/env/block_pushing/assets/suction/tip.obj
new file mode 100644
index 0000000..5c9b5e1
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/suction/tip.obj
@@ -0,0 +1,4182 @@
+# Object Export From Tinkercad Server 2015
+
+mtllib obj.mtl
+
+o obj_0
+v -7.73         6.3469         14.497
+v -8.315         5.5589         14.497
+v -8.819         4.7169         14.497
+v -9.239         3.8299         14.497
+v -9.57         2.9059         14.497
+v -9.808         1.9539         14.497
+v -9.952         0.9829         14.497
+v -10         0.0029         14.497
+v -9.952         -0.9771         14.497
+v -9.808         -1.9481         14.497
+v -9.57         -2.9001         14.497
+v -9.239         -3.8241         14.497
+v -8.819         -4.7111         14.497
+v -8.315         -5.5531         14.497
+v -7.73         -6.3411         14.497
+v 5.752         -3.3181         8.031
+v 5.913         -3.4111         7.997
+v 1.6906         6.3125         9.8999
+v 4.696         4.6989         5.963
+v 1.6906         6.3125         9.8999
+v 3.321         -5.7491         4.031
+v -6.224         3.5959         9.863
+v 3.321         -5.7491         9.963
+v 3.414         -5.9101         3.997
+v 4.619         4.6219         5.8999
+v 0         -9.9971         14.497
+v 0.98         -9.9491         14.497
+v 1.951         -9.8051         14.497
+v 2.903         -9.5671         14.497
+v 3.827         -9.2361         14.497
+v 4.3254         -9.0001         14.497
+v -4.3254         -9.0001         14.497
+v -3.827         -9.2361         14.497
+v -2.903         -9.5671         14.497
+v 1.86         6.9439         9.863
+v -1.951         -9.8051         14.497
+v -0.98         -9.9491         14.497
+v -6.503         -7.583         14.497
+v 6.3422         -7.7249         14.495
+v 3.2663         -5.6544         4.0941
+v 5.5544         -8.3098         14.495
+v -6.352         3.6699         9.704
+v 3.2661         -5.6545         4.0941
+v 4.7127         -8.8136         14.495
+v 4.828         4.8309         5.997
+v 4.3187         -9.0001         14.495
+v -3.7724         5.2718         4.0942
+v 1.898         7.0879         9.704
+v 3.414         -5.9101         9.997
+v 5.6573         -3.2632         8.0941
+v 5.6572         -3.2634         8.0941
+v 4.959         4.9619         5.963
+v -6.451         3.7269         9.497
+v 3.506         -6.0711         9.963
+v 1.928         7.1979         9.497
+v 5.081         5.0839         5.863
+v 2.6786         -4.6364         6.997
+v 1.3864         -5.1721         6.997
+v 3.2661         -5.6545         9.8999
+v 3.2663         -5.6544         9.8999
+v 3.7879         -3.785         6.997
+v -4.3187         -9.0001         14.495
+v -4.7127         -8.8136         14.495
+v -5.5544         -8.3098         14.495
+v -6.3422         -7.7249         14.495
+v -6.503         -7.5792         14.495
+v 5.081         5.0839         4.131
+v 0         5.36         6.997
+v 1.3864         5.1771         6.997
+v 7.069         7.0718         14.495
+v 4.959         4.9619         4.031
+v 2.6748         -5.8996         5.8999
+v 0.975         4.9069         0.999
+v 1.451         4.7879         0.999
+v 1.913         4.6219         0.999
+v 2.357         4.4119         0.999
+v 2.778         4.1599         0.999
+v 0         5.0029         0.999
+v 0.49         4.9789         0.999
+v -1.3864         5.1771         6.997
+v 3.172         3.8679         0.999
+v 3.535         3.5379         0.999
+v 3.865         3.1749         0.999
+v 4.157         2.7809         0.999
+v 0.8324         3.1095         0.999
+v 0         3.2193         0.999
+v 5.1743         1.3893         6.997
+v -2.778         4.1599         0.999
+v -2.357         4.4119         0.999
+v -1.914         4.6219         0.999
+v -1.452         4.7879         0.999
+v 5.3571         0.0029         6.997
+v -0.976         4.9069         0.999
+v -0.49         4.9789         0.999
+v -3.172         3.8679         0.999
+v 4.6393         2.6814         6.997
+v 0.49         4.9789         2.7016
+v 0         5.0029         2.6637
+v 0.975         4.9069         2.6941
+v 1.2927         4.8274         2.6591
+v 4.828         4.8309         3.997
+v 2.2842         -6.0614         4.0939
+v -4.9704         1.5106         11.1312
+v -5.0256         1.3493         11.1413
+v 5.0251         1.3493         11.1412
+v -5.1562         0.5106         11.1223
+v -5.0863         1.0146         11.1257
+v 3.593         -6.2201         5.863
+v 0.5554         2.0757         0
+v 0         2.1489         0
+v 0         5.2078         11.139
+v 4.696         4.6989         4.031
+v -4.828         4.8309         7.997
+v 4.409         2.3599         0.999
+v 4.619         1.9159         0.999
+v 1.6082         2.7883         0.999
+v 1.451         4.7879         2.6816
+v 4.7851         1.9849         11.121
+v 1.913         4.6219         2.7036
+v 4.9698         1.5106         11.1311
+v 2.357         4.4119         2.6809
+v 2.4972         4.328         2.6584
+v 3.7731         -5.2654         8.0941
+v 4.1127         5.0106         4.0942
+v 4.619         4.6219         4.0941
+v -1.0117         5.0889         11.1255
+v -4.697         4.6989         8.031
+v 1.073         1.8614         0
+v -0.5077         5.1589         11.1222
+v -5.318         5.3209         4.738
+v 3.506         -6.0711         8.031
+v -5.335         5.3379         4.997
+v -4.6192         4.6217         8.0942
+v -4.6191         4.6219         8.0942
+v 3.414         -5.9101         7.997
+v 4.785         1.4539         0.999
+v 4.904         0.9779         0.999
+v 4.976         0.4929         0.999
+v 2.2742         2.2771         0.999
+v 3.772         -6.5311         4.997
+v 2.778         4.1599         2.6938
+v -5.267         5.2699         4.497
+v 2.8644         4.0959         2.6955
+v 3.172         3.8679         2.7016
+v 3.535         3.5379         2.6632
+v -5.187         5.1889         4.29
+v 6.415         1.7219         9.963
+v 3.321         -5.7491         8.031
+v 1.5174         1.5203         0
+v -5.0077         4.1155         8.0942
+v -5.187         5.1889         5.704
+v 3.593         6.2259         5.863
+v 3.2663         -5.6544         8.0941
+v 6.595         1.7699         9.997
+v 3.2661         -5.6545         8.0941
+v -5.267         5.2699         5.497
+v 3.772         6.5369         4.997
+v 6.774         1.8179         9.963
+v 3.667         6.3549         5.704
+v 5         0.0029         0.999
+v 2.7854         1.6111         0.999
+v 4.976         -0.4871         0.999
+v 4.904         -0.9731         0.999
+v -5.318         5.3209         5.256
+v 3.865         3.1749         2.7016
+v 3.667         -6.3491         5.704
+v 4.157         2.7809         2.6938
+v 4.3251         2.5         2.6584
+v 6.3933         1.0597         9.8999
+v 3.724         -6.4481         5.497
+v 6.3096         1.6935         9.8999
+v 6.3096         1.6935         9.8999
+v 2.6748         -5.8996         9.8999
+v 1.8585         1.0759         0
+v 6.941         1.8629         9.863
+v 3.76         -6.5101         5.256
+v -4.1127         5.0106         5.8998
+v 3.414         5.9159         7.997
+v 7.085         1.9009         9.704
+v 2.2842         -6.0614         8.0939
+v 3.724         6.4539         5.497
+v 3.1066         0.8353         0.999
+v 3.76         -6.5101         4.738
+v -4.697         4.6989         5.963
+v 4.785         -1.4491         0.999
+v 4.619         -1.9111         0.999
+v 3.321         5.7549         8.031
+v 4.409         2.3599         2.6809
+v 3.724         -6.4481         4.497
+v 4.619         1.9159         2.7036
+v 3.76         6.5159         5.256
+v -4.6191         4.6219         5.8998
+v 4.785         1.4539         2.6816
+v -4.6192         4.6217         5.8998
+v 4.8246         1.2956         2.6591
+v -4.828         4.8309         5.997
+v 3.772         -6.5311         8.997
+v 3.667         -6.3491         4.29
+v 3.593         -6.2201         9.863
+v 2.0728         0.5583         0
+v 3.2661         5.6602         8.0941
+v 3.2663         5.6601         8.0941
+v 3.593         -6.2201         4.131
+v -4.959         4.9619         5.963
+v 3.667         -6.3491         9.704
+v 3.76         6.5159         4.738
+v -5.082         5.0839         5.863
+v 3.724         -6.4481         9.497
+v 3.2164         0.0029         0.999
+v 4.409         -2.3541         0.999
+v 3.724         6.4539         4.497
+v 4.157         -2.7751         0.999
+v 3.865         -3.1691         0.999
+v -6.074         3.5089         8.031
+v 4.904         0.9779         2.6941
+v 4.976         0.4929         2.7016
+v 5         0.0029         2.6637
+v -5.082         5.0839         4.131
+v 3.76         -6.5101         9.256
+v 3.667         6.3549         4.29
+v -4.959         4.9619         4.031
+v 2.6746         5.9051         8.0941
+v -4.828         4.8309         3.997
+v 3.593         6.2259         4.131
+v 2.1461         0.0029         0
+v 3.76         -6.5101         8.738
+v 3.724         -6.4481         8.497
+v 3.216         -0.0001         0.999
+v 3.535         -3.5331         0.999
+v 3.172         -3.8621         0.999
+v -4.697         4.6989         4.031
+v 2.778         -4.1541         0.999
+v 3.1066         -0.8296         0.999
+v 4.9999         -0.0001         2.6639
+v 4.976         -0.4871         2.7016
+v 4.904         -0.9731         2.6942
+v 4.8248         -1.2899         2.6593
+v 3.667         -6.3491         8.29
+v 7.195         1.9309         9.497
+v 1.953         7.2909         8.997
+v 3.593         -6.2201         8.131
+v -4.6192         4.6217         4.0942
+v -4.6191         4.6219         4.0942
+v 2.0728         -0.5525         0
+v -6.0642         2.2871         9.9001
+v 2.357         -4.4071         0.999
+v 1.913         -4.6171         0.999
+v 2.7854         -1.6054         0.999
+v 4.785         -1.4491         2.682
+v 4.619         -1.9111         2.704
+v 4.409         -2.3541         2.6809
+v 1.946         7.2669         9.256
+v -6.534         3.7749         8.997
+v 4.3251         -2.4943         2.6584
+v 1.0536         -6.3912         5.9001
+v -5.0077         4.1155         4.0942
+v 1.6906         -6.3075         5.9001
+v 1.6907         -6.3074         5.9001
+v 1.719         -6.4131         5.963
+v 1.8585         -1.0702         0
+v -6.513         3.7629         9.256
+v 1.767         -6.5921         5.997
+v 1.946         7.2669         8.738
+v 0.49         -4.9731         0.999
+v 2.2742         -2.2714         0.999
+v 1.451         -4.7821         0.999
+v 0.975         -4.9011         0.999
+v 4.157         -2.7751         2.6938
+v 1.815         -6.7711         5.963
+v 4.093         -2.8615         2.6955
+v 1.928         7.1979         8.497
+v 3.865         -3.1691         2.7016
+v 3.5355         -3.5326         2.6639
+v 1.86         -6.9381         5.863
+v -6.513         3.7629         8.738
+v 1.953         -7.2851         4.997
+v 1.898         -7.0821         5.704
+v 1.5174         -1.5146         0
+v 1.898         7.0879         8.29
+v -6.451         3.7269         8.497
+v 1.898         7.0879         8.29
+v 1.898         7.0879         8.29
+v 1.928         -7.1921         5.497
+v 3.7746         5.2698         5.9
+v 1.86         6.9439         8.131
+v 1.898         7.0879         8.29
+v -6.352         3.6699         8.29
+v 0         -4.9971         0.999
+v 1.6082         -2.7826         0.999
+v 3.2663         5.6601         5.8999
+v 1.946         -7.2611         5.256
+v -0.49         -4.9731         0.999
+v -0.976         -4.9011         0.999
+v 3.2661         5.6602         5.8999
+v 3.321         5.7549         5.963
+v 3.535         -3.5331         2.664
+v 3.172         -3.8621         2.7016
+v 1.815         6.7769         8.031
+v 2.778         -4.1541         2.6938
+v 2.4975         -4.3227         2.6589
+v -6.224         3.5959         8.131
+v 1.719         -6.4131         9.963
+v 3.414         5.9159         5.997
+v 1.898         -7.0821         4.29
+v 1.767         -6.5921         9.997
+v 3.506         6.0759         5.963
+v 1.073         -1.8556         0
+v 1.86         -6.9381         4.131
+v 1.815         -6.7711         9.963
+v 0.8324         -3.1042         0.999
+v -5.2683         3.7759         5.8999
+v -1.452         -4.7821         0.999
+v -1.914         -4.6171         0.999
+v 1.815         -6.7711         4.031
+v 2.357         -4.4071         2.6817
+v 1.913         -4.6171         2.7042
+v 1.451         -4.7821         2.6811
+v 1.953         -7.2851         8.997
+v 1.2926         -4.8218         2.6585
+v 3.506         6.0759         4.031
+v -5.752         3.3239         5.963
+v 1.767         -6.5921         3.997
+v 0         6.6449         9.963
+v 1.0536         -6.3912         9.9001
+v 1.6906         -6.3075         9.9001
+v -5.6572         3.2692         5.8999
+v -5.6573         3.269         5.8999
+v 1.6907         -6.3074         9.9001
+v 0.5554         -2.0702         0
+v 3.414         5.9159         3.997
+v 1.719         -6.4131         4.031
+v 1.86         -6.9381         9.863
+v -5.913         3.4169         5.997
+v 0         6.8299         9.997
+v 1.898         -7.0821         9.704
+v -6.074         3.5089         5.963
+v 3.321         5.7549         4.031
+v 0         7.0159         9.963
+v 0         -3.2136         0.999
+v -2.357         -4.4071         0.999
+v -2.778         -4.1541         0.999
+v -3.172         -3.8621         0.999
+v 0.975         -4.9011         2.6936
+v 1.928         -7.1921         9.497
+v 0.49         -4.9731         2.7013
+v 0         -4.9971         2.6637
+v 0         7.1889         9.863
+v 3.2661         5.6602         4.0941
+v 3.2663         5.6601         4.0941
+v 1.6907         -6.3074         4.0939
+v 1.6906         -6.3075         4.0939
+v 1.946         -7.2611         9.256
+v -6.416         1.7219         9.963
+v 0         7.3379         9.704
+v 1.946         -7.2611         4.738
+v 0         -2.1432         0
+v 1.928         -7.1921         4.497
+v -6.595         1.7699         9.997
+v 0         7.4519         9.497
+v -0.003         -3.2132         0.999
+v -3.536         -3.5331         0.999
+v -3.865         -3.1691         0.999
+v -4.157         -2.7751         0.999
+v 1.898         -7.0821         8.29
+v -0.8324         -3.1042         0.999
+v -0.003         -4.997         2.6639
+v -6.774         1.8179         9.963
+v -0.49         -4.9731         2.7013
+v -0.976         -4.9011         2.6937
+v -1.2926         -4.822         2.6587
+v 0         -6.6391         5.963
+v 1.86         -6.9381         8.131
+v -6.074         3.5089         4.031
+v 0         -6.5298         5.8998
+v 2.2813         6.068         5.8999
+v -6.3103         1.6935         9.9001
+v -6.3103         1.6934         9.9001
+v -5.913         3.4169         3.997
+v 0         -6.8251         5.997
+v 0.6335         6.452         9.8999
+v -0.5554         -2.0702         0
+v 1.815         -6.7711         8.031
+v -6.941         1.8629         9.863
+v 0         -7.0101         5.963
+v -4.41         -2.3541         0.999
+v -4.62         -1.9111         0.999
+v -1.6082         -2.7826         0.999
+v 0         6.5356         9.8998
+v 1.767         -6.5921         7.997
+v -1.452         -4.7821         2.6815
+v 0         -7.1831         5.863
+v -1.914         -4.6171         2.7046
+v -7.085         1.9009         9.704
+v -2.357         -4.4071         2.6817
+v -5.752         3.3239         4.031
+v -2.4975         -4.3227         2.6589
+v 7.545         0.0029         8.997
+v 0         -7.5421         4.997
+v 1.719         -6.4131         8.031
+v -7.195         1.9309         9.497
+v 0         -7.3321         5.704
+v 7.52         0.0029         9.256
+v -5.6572         3.2692         4.0941
+v -5.6573         3.269         4.0941
+v -1.073         -1.8556         0
+v 0         -7.4461         5.497
+v -6.0642         2.2871         5.9001
+v 0         -7.5181         5.256
+v -4.976         -0.4871         0.999
+v -2.2742         -2.2714         0.999
+v -4.785         -1.4491         0.999
+v -4.904         -0.9731         0.999
+v -2.778         -4.1541         2.6938
+v -3.172         -3.8621         2.7016
+v -3.536         -3.5331         2.6645
+v 1.6907         -6.3074         8.0939
+v 1.6906         -6.3075         8.0939
+v 0         -7.0101         4.031
+v -5.9025         2.6776         4.0941
+v 1.946         -7.2611         8.738
+v 2.6746         5.9051         4.0941
+v -2.6786         4.6421         6.997
+v -6.224         3.5959         5.863
+v 0         -6.8251         3.997
+v -1.5174         -1.5146         0
+v 1.928         -7.1921         8.497
+v -6.534         3.7749         4.997
+v -5.205         0.0029         11.139
+v -6.352         3.6699         5.704
+v 3.7879         3.7907         6.997
+v -5         0.0029         0.999
+v -4.976         0.4929         0.999
+v -4.904         0.9779         0.999
+v 7.52         0.0029         8.738
+v 0         -6.6391         4.031
+v -2.7854         -1.6054         0.999
+v -6.451         3.7269         5.497
+v 0         -6.6391         9.963
+v 7.449         0.0029         8.497
+v -6.513         3.7629         5.256
+v -3.865         -3.1691         2.7016
+v -5.0863         -1.0089         11.1257
+v -4.157         -2.7751         2.6938
+v -5.1562         -0.5049         11.1223
+v 0         -6.8251         9.997
+v -4.3256         -2.4946         2.6589
+v -7.071         7.0739         14.497
+v 0         -6.5298         4.0942
+v 0.6333         -6.4466         4.0942
+v 0         -7.0101         9.963
+v -1.8585         -1.0702         0
+v -6.513         3.7629         4.738
+v 0         -7.5181         4.738
+v -6.451         3.7269         4.497
+v 0         -7.4461         4.497
+v 7.335         0.0029         8.29
+v 0         -6.5298         9.8998
+v -4.785         1.4539         0.999
+v -4.62         1.9159         0.999
+v -5.913         3.4169         7.997
+v -3.1071         -0.8296         0.999
+v -6.352         3.6699         4.29
+v 0         -7.3321         4.29
+v 7.186         0.0029         8.131
+v -6.224         3.5959         4.131
+v 0         -7.1831         4.131
+v -4.41         -2.3541         2.6817
+v 0         -7.1831         9.863
+v -4.62         -1.9111         2.7046
+v -4.785         -1.4491         2.6815
+v 7.013         0.0029         8.031
+v -4.8248         -1.2898         2.6587
+v -5.752         3.3239         8.031
+v 0         -7.1831         9.863
+v 0         -7.1831         9.863
+v 0         -7.3321         9.704
+v 0         -7.1831         9.863
+v -1.3465         5.0279         11.1412
+v -2.0731         -0.5525         0
+v 0         -7.4461         9.497
+v -5.6572         3.2692         8.0941
+v -5.6573         3.269         8.0941
+v -4.41         2.3599         0.999
+v -4.157         2.7809         0.999
+v -3.865         3.1749         0.999
+v -3.2164         0.0029         0.999
+v -3.216         -0.0001         0.999
+v -1.9821         4.7879         11.121
+v -4.9999         -0.0001         2.6639
+v -5         0.0029         2.6637
+v -1.5077         4.9727         11.1311
+v 0         -7.0101         8.031
+v -5.9025         2.6776         8.0941
+v -4.904         -0.9731         2.6937
+v -4.976         -0.4871         2.7013
+v 1.719         6.4179         5.963
+v -2.1461         0.0029         0
+v 0         -6.8251         7.997
+v -3.1071         0.8353         0.999
+v 1.767         6.5979         7.997
+v -3.536         3.5379         0.999
+v -4.976         0.4929         2.7013
+v 1.6906         6.3125         5.8999
+v -4.904         0.9779         2.6936
+v 1.6906         6.3125         5.8999
+v -4.8246         1.2954         2.6585
+v 1.767         6.5979         5.997
+v 6.595         -1.7641         7.997
+v 1.815         6.7769         5.963
+v 1.719         6.4179         8.031
+v -2.0731         0.5583         0
+v -6.4495         -0.6304         4.0942
+v 1.86         6.9439         5.863
+v -6.3103         1.6934         5.9001
+v -6.3103         1.6935         5.9001
+v 6.415         -1.7161         8.031
+v -6.416         1.7219         5.963
+v -2.7854         1.6111         0.999
+v -6.394         -1.0508         5.9001
+v -4.785         1.4539         2.6811
+v -4.62         1.9159         2.7042
+v -6.595         1.7699         5.997
+v 1.0569         6.3961         8.0941
+v 1.6906         6.3125         8.0941
+v -6.3103         -1.6877         5.9001
+v 1.6906         6.3125         8.0941
+v -4.41         2.3599         2.6817
+v -6.3103         -1.6878         5.9001
+v -6.416         -1.7161         5.963
+v -4.3256         2.5003         2.6589
+v 6.3933         -1.054         8.0941
+v 1.953         7.2909         4.997
+v 6.3096         -1.6877         8.0941
+v 6.3096         -1.6878         8.0941
+v -6.774         1.8179         5.963
+v 1.898         7.0879         5.704
+v 0         7.5479         8.997
+v -6.595         -1.7641         5.997
+v -1.8585         1.0759         0
+v -6.941         1.8629         5.863
+v 0         7.5229         9.256
+v 1.928         7.1979         5.497
+v -6.774         -1.8121         5.963
+v -7.288         1.9559         8.997
+v -7.288         1.9559         4.997
+v 1.946         7.2669         5.256
+v 4.6393         -2.6757         6.997
+v 0         7.5229         8.738
+v -6.941         -1.8571         5.863
+v -2.2742         2.2771         0.999
+v 0         7.4519         8.497
+v -7.085         1.9009         5.704
+v -4.157         2.7809         2.6938
+v -7.288         -1.9501         4.997
+v -3.865         3.1749         2.7016
+v -3.536         3.5379         2.664
+v -7.085         -1.8951         5.704
+v -3.5355         3.5383         2.6639
+v 0         7.3379         8.29
+v 1.946         7.2669         4.738
+v -7.195         1.9309         5.497
+v -1.5174         1.5203         0
+v -7.195         -1.9251         5.497
+v -7.264         1.9489         9.256
+v 0         7.1889         8.131
+v 1.928         7.1979         4.497
+v -7.264         1.9489         5.256
+v -7.264         -1.9441         5.256
+v -1.6082         2.7883         0.999
+v -2.778         4.1599         2.6938
+v -2.8644         4.0959         2.6955
+v -2.4972         4.328         2.6584
+v 0         7.0159         8.031
+v 1.898         7.0879         4.29
+v 4.5804         2.4512         11.1312
+v 4.5055         2.6042         11.1412
+v -7.085         1.9009         8.29
+v -3.172         3.8679         2.7016
+v -7.085         1.9009         4.29
+v 1.898         7.0879         4.29
+v 1.898         7.0879         4.29
+v -7.085         1.9009         8.29
+v -1.073         1.8614         0
+v -7.085         -1.8951         4.29
+v -7.085         1.9009         4.29
+v 0         -6.6391         8.031
+v -7.085         1.9009         4.29
+v -7.085         1.9009         8.29
+v -7.085         1.9009         8.29
+v -6.941         1.8629         4.131
+v -7.085         1.9009         4.29
+v 4.3118         2.884         11.1255
+v -6.941         1.8629         8.131
+v -7.085         1.9009         8.29
+v -6.941         -1.8571         4.131
+v 4.0048         3.2896         11.1222
+v -6.774         1.8179         4.031
+v -0.8324         3.1095         0.999
+v -2.357         4.4119         2.6809
+v -6.774         1.8179         8.031
+v 0         -6.5298         8.0942
+v -1.914         4.6219         2.704
+v 0.6333         -6.4466         8.0942
+v -1.452         4.7879         2.682
+v -6.774         -1.8121         4.031
+v -1.2928         4.8277         2.6593
+v -0.5554         2.0757         0
+v -6.595         1.7699         3.997
+v -6.595         -1.7641         3.997
+v 0         -7.4461         8.497
+v -0.003         3.2189         0.999
+v 6.223         3.5959         9.863
+v -0.003         5.0027         2.6639
+v 0         -7.3321         8.29
+v -6.416         1.7219         4.031
+v -0.976         4.9069         2.6942
+v -6.416         -1.7161         4.031
+v 6.352         3.6699         9.704
+v -0.49         4.9789         2.7016
+v 0         -7.1831         8.131
+v -6.394         1.0565         4.0939
+v 6.451         3.7269         9.497
+v -6.3103         1.6934         4.0939
+v -6.3103         1.6935         4.0939
+v 1.86         6.9439         4.131
+v 1.898         7.0879         4.29
+v 8.3126         5.5573         14.495
+v 8.8165         4.7155         14.495
+v -7.264         1.9489         8.738
+v 9.2364         3.8288         14.495
+v -6.3103         -1.6878         4.0939
+v -6.3103         -1.6877         4.0939
+v 9.5663         2.905         14.495
+v -7.264         1.9489         4.738
+v 9.8052         1.9533         14.495
+v 9.9492         0.9826         14.495
+v 9.9971         0.0029         14.495
+v -7.195         1.9309         8.497
+v -7.195         1.9309         4.497
+v -7.264         -1.9441         4.738
+v 7.7278         6.345         14.495
+v -7.195         -1.9251         4.497
+v -6.4495         0.6361         5.8998
+v -5.752         -3.3181         9.963
+v -5.9025         -2.6719         5.8999
+v -6.642         0.0029         9.963
+v 9.9492         -0.9769         14.495
+v 9.8052         -1.9476         14.495
+v 9.5663         -2.8993         14.495
+v 9.2364         -3.823         14.495
+v -6.642         0.0029         5.963
+v 8.8165         -4.7098         14.495
+v 8.3126         -5.5516         14.495
+v 7.7278         -6.3393         14.495
+v -5.913         -3.4111         9.997
+v -6.5327         0.0029         5.8998
+v -6.503         7.5887         14.497
+v 0         10         14.495
+v 0.9797         9.952         14.495
+v 1.9504         9.8081         14.495
+v -6.074         -3.5041         9.963
+v 2.9022         9.5691         14.495
+v -6.828         0.0029         9.997
+v 3.8259         9.2392         14.495
+v -1.719         6.4179         9.963
+v 4.7127         8.8193         14.495
+v -6.0642         -2.2814         4.0939
+v 5.5544         8.3155         14.495
+v -6.828         0.0029         5.997
+v -7.013         0.0029         9.963
+v -7.013         0.0029         5.963
+v -1.767         6.5979         9.997
+v 6.3422         7.7307         14.495
+v 1.815         6.7769         4.031
+v -7.186         0.0029         5.863
+v -5.6573         -3.2632         9.8999
+v -5.6572         -3.2634         9.8999
+v -6.503         7.5849         14.495
+v -6.3422         7.7307         14.495
+v -1.815         6.7769         9.963
+v -5.5544         8.3155         14.495
+v -7.545         0.0029         4.997
+v -6.4495         0.6361         9.8998
+v -4.7127         8.8193         14.495
+v -3.8259         9.2392         14.495
+v -2.9022         9.5691         14.495
+v -1.9504         9.8081         14.495
+v -7.335         0.0029         5.704
+v -0.9797         9.952         14.495
+v -6.5327         0.0029         9.8998
+v -1.0569         6.3961         9.8999
+v -5.752         -3.3181         5.963
+v -7.449         0.0029         5.497
+v 5.752         3.3239         9.963
+v -1.6906         6.3125         9.8999
+v -1.6906         6.3125         9.8999
+v -7.521         0.0029         5.256
+v -5.6573         -3.2632         5.8999
+v -5.6572         -3.2634         5.8999
+v -6.224         -3.5901         9.863
+v 5.913         3.4169         9.997
+v -1.86         6.9439         9.863
+v -5.913         -3.4111         5.997
+v -6.352         -3.6641         9.704
+v -1.899         7.0879         9.704
+v -7.186         0.0029         9.863
+v 6.073         3.5089         9.963
+v -6.074         -3.5041         5.963
+v -7.013         0.0029         4.031
+v -6.451         -3.7211         9.497
+v -7.186         0.0029         9.863
+v -7.186         0.0029         9.863
+v -1.928         7.1979         9.497
+v -7.335         0.0029         9.704
+v -7.186         0.0029         9.863
+v -6.828         0.0029         3.997
+v 5.6573         3.269         9.8999
+v 5.6572         3.2692         9.8999
+v -7.545         0.0029         8.997
+v -7.521         0.0029         9.256
+v -7.449         0.0029         9.497
+v -6.074         -3.5041         4.031
+v -6.642         0.0029         4.031
+v -5.913         -3.4111         3.997
+v -7.521         0.0029         8.738
+v -6.5327         0.0029         4.0942
+v 5.9022         2.6774         9.8999
+v -5.752         -3.3181         4.031
+v -7.521         0.0029         4.738
+v -0.6333         -6.4466         9.8998
+v -7.449         0.0029         4.497
+v 7.288         1.9559         8.997
+v -5.6572         -3.2634         4.0941
+v -5.6573         -3.2632         4.0941
+v -7.335         0.0029         4.29
+v 7.264         1.9489         9.256
+v -7.186         0.0029         4.131
+v 2.6786         4.6421         6.997
+v 1.767         6.5979         3.997
+v 7.264         1.9489         8.738
+v 7.195         1.9309         8.497
+v 6.4491         -0.6307         5.8999
+v -5.269         -3.7694         4.0942
+v 1.719         6.4179         4.031
+v 0         -7.5421         8.997
+v 7.085         1.9009         8.29
+v 0         -7.5181         9.256
+v -3.7879         3.7907         6.997
+v 6.941         1.8629         8.131
+v -5.0256         -1.3436         11.1413
+v 0         -7.5181         8.738
+v 6.774         1.8179         8.031
+v 1.0569         6.3961         4.0941
+v 1.6906         6.3125         4.0941
+v 1.6906         6.3125         4.0941
+v -2.4483         4.5832         11.1312
+v -2.6013         4.5083         11.1412
+v -4.9704         -1.5049         11.1312
+v 0         -5.3543         6.997
+v -3.2868         4.0077         11.1222
+v -2.8811         4.3147         11.1255
+v -7.071         -7.0681         14.497
+v 7.069         -7.0661         14.495
+v -6.224         -3.5901         5.863
+v -1.719         -6.4131         9.963
+v 0         6.8299         7.997
+v -6.595         1.7699         7.997
+v 1.0118         -5.0835         11.1257
+v 1.3464         -5.0228         11.1413
+v 0.5077         -5.1533         11.1223
+v 6.415         -1.7161         5.963
+v 0         -5.2021         11.139
+v 0.6335         6.452         5.8999
+v -1.767         -6.5921         9.997
+v 6.3096         -1.6878         5.8999
+v 6.3096         -1.6877         5.8999
+v 0         6.6449         8.031
+v 6.827         0.0029         7.997
+v -1.815         -6.7711         9.963
+v -6.416         1.7219         8.031
+v 0         6.5356         5.8998
+v 6.595         -1.7641         5.997
+v 0         6.6449         5.963
+v -1.953         -7.2851         8.997
+v 6.774         -1.8121         5.963
+v 0         6.5356         8.0942
+v 6.642         0.0029         8.031
+v 0         6.8299         5.997
+v -6.394         1.0565         8.0939
+v -1.6907         -6.3074         9.9001
+v -1.6906         -6.3075         9.9001
+v 6.941         -1.8571         5.863
+v -6.534         -3.7701         4.997
+v -6.3103         1.6934         8.0939
+v -1.86         -6.9381         9.863
+v -6.3103         1.6935         8.0939
+v 6.4491         0.6364         8.0941
+v 0         7.0159         5.963
+v 6.5327         0.0029         8.0942
+v 7.288         -1.9501         4.997
+v -1.899         -7.0821         9.704
+v 0         7.1889         5.863
+v 7.085         -1.8951         5.704
+v 0         7.5479         4.997
+v -1.928         -7.1921         9.497
+v 7.195         -1.9251         5.497
+v 0         7.3379         5.704
+v -7.013         0.0029         8.031
+v 7.264         -1.9441         5.256
+v -0.6335         6.452         8.0941
+v -1.947         -7.2611         9.256
+v -6.352         -3.6641         5.704
+v -1.953         7.2909         8.997
+v -6.451         -3.7211         5.497
+v 2.4484         -4.5775         11.1312
+v 5.1743         -1.3836         6.997
+v 2.6013         -4.5026         11.1412
+v 1.9822         -4.7824         11.1211
+v 7.264         -1.9441         4.738
+v 1.5077         -4.9675         11.1312
+v -1.899         -7.0821         8.29
+v -1.947         7.2669         9.256
+v -6.513         -3.7571         5.256
+v 7.195         -1.9251         4.497
+v -1.86         -6.9381         8.131
+v -6.513         -3.7571         4.738
+v -1.947         7.2669         8.738
+v -1.815         -6.7711         8.031
+v -6.451         -3.7211         4.497
+v 7.085         -1.8951         4.29
+v -7.449         0.0029         8.497
+v -1.928         7.1979         8.497
+v 7.085         -1.8952         4.29
+v 7.085         -1.8952         4.29
+v -6.352         -3.6641         4.29
+v 6.941         -1.8571         4.131
+v -1.767         -6.5921         7.997
+v 3.6803         3.6832         11.1389
+v 7.085         -1.8952         4.29
+v -7.335         0.0029         8.29
+v -6.224         -3.5901         4.131
+v 6.774         -1.8121         4.031
+v 3.6803         -3.6775         11.1389
+v -1.899         7.0879         8.29
+v 6.595         -1.7641         3.997
+v 3.2868         -4.002         11.1222
+v -7.186         0.0029         8.131
+v 2.8811         -4.309         11.1255
+v -1.719         -6.4131         8.031
+v 2.8811         4.3147         11.1255
+v -1.86         6.9439         8.131
+v -0.6333         -6.4466         5.8998
+v 3.2868         4.0077         11.1222
+v -1.0536         -6.3912         8.0939
+v 6.415         -1.7161         4.031
+v 0         7.4519         5.497
+v -1.6906         -6.3075         8.0939
+v -1.815         6.7769         8.031
+v -1.6907         -6.3074         8.0939
+v 0         7.5229         5.256
+v 6.3933         -1.054         4.0941
+v 6.3096         -1.6877         4.0941
+v 6.3096         -1.6878         4.0941
+v -1.947         -7.2611         8.738
+v -1.928         -7.1921         8.497
+v 4.3118         -2.8782         11.1255
+v 4.5055         -2.5985         11.1412
+v 4.0048         -3.2839         11.1222
+v 0         7.5229         4.738
+v 0         7.4519         4.497
+v 5.186         5.1889         9.704
+v 0         7.3379         4.29
+v 5.267         5.2699         9.497
+v 0         7.1889         4.131
+v 4.9698         -1.5049         11.1311
+v 5.0251         -1.3436         11.1412
+v 4.7851         -1.9792         11.121
+v 4.5804         -2.4455         11.1312
+v -2.2842         -6.0614         9.9001
+v 0         7.0159         4.031
+v 0         6.8299         3.997
+v 5.156         -0.5049         11.1222
+v 5.086         -1.0089         11.1255
+v -2.6767         -5.8989         8.0942
+v -3.321         -5.7491         9.963
+v -6.416         -1.7161         9.963
+v 6.223         -3.5901         5.863
+v -3.414         -5.9101         9.997
+v -6.595         -1.7641         9.997
+v 6.534         -3.7701         4.997
+v -3.507         -6.0711         9.963
+v 6.352         -3.6641         5.704
+v -6.774         -1.8121         9.963
+v 6.451         -3.7211         5.497
+v -3.7709         -5.2674         9.8997
+v 6.513         -3.7571         5.256
+v 0         6.6449         4.031
+v -3.2666         -5.6543         9.8998
+v -3.2664         -5.6545         9.8998
+v 4.696         4.6989         9.963
+v -6.394         -1.0508         9.9001
+v -6.3103         -1.6877         9.9001
+v -6.3103         -1.6878         9.9001
+v 6.513         -3.7571         4.738
+v 4.828         4.8309         9.997
+v 0         6.5356         4.0942
+v -6.941         -1.8571         9.863
+v 4.959         4.9619         9.963
+v 6.451         -3.7211         4.497
+v 6.223         3.5959         5.863
+v -3.593         6.2259         9.863
+v -7.085         -1.8951         9.704
+v 5.0077         4.1155         9.8998
+v 6.352         -3.6641         4.29
+v -3.668         6.3549         9.704
+v 4.619         4.6219         9.8999
+v 6.223         -3.5901         4.131
+v -3.725         6.4539         9.497
+v -7.195         -1.9251         9.497
+v 5.081         5.0839         9.863
+v -3.507         -6.0711         8.031
+v 6.534         3.7749         8.997
+v 6.0651         -2.2784         5.8999
+v -1.6906         -6.3075         5.9001
+v -1.6907         -6.3074         5.9001
+v -1.719         -6.4131         5.963
+v -3.414         -5.9101         7.997
+v 6.513         3.7629         9.256
+v -1.767         -6.5921         5.997
+v -4.5804         -2.4455         11.1312
+v -4.5055         -2.5985         11.1412
+v -4.7853         -1.9793         11.1211
+v 6.513         3.7629         8.738
+v 5.9022         -2.6717         4.0941
+v -1.815         -6.7711         5.963
+v -3.321         -5.7491         8.031
+v 6.451         3.7269         8.497
+v 5.2677         -3.7709         5.8999
+v 6.352         3.6699         8.29
+v 6.534         3.7749         4.997
+v -1.86         -6.9381         5.863
+v 5.6572         -3.2634         5.8999
+v -2.6765         5.9044         9.8998
+v 5.6573         -3.2632         5.8999
+v 5.752         -3.3181         5.963
+v -3.2664         -5.6545         8.0942
+v -3.2666         -5.6543         8.0942
+v 6.223         3.5959         8.131
+v -3.321         5.7549         9.963
+v -1.953         -7.2851         4.997
+v 5.913         -3.4111         5.997
+v -3.6803         -3.6775         11.1389
+v -1.899         -7.0821         5.704
+v -3.414         5.9159         9.997
+v -4.0048         -3.2839         11.1222
+v -4.3118         -2.8782         11.1255
+v 6.073         -3.5041         5.963
+v -1.928         -7.1921         5.497
+v -4.6393         2.6814         6.997
+v -3.507         6.0759         9.963
+v -6.828         0.0029         7.997
+v -1.947         -7.2611         5.256
+v 6.073         -3.5041         4.031
+v 5.913         -3.4111         3.997
+v -2.8811         -4.309         11.1255
+v -3.2664         5.6602         9.8998
+v -2.6013         -4.5026         11.1412
+v -3.2666         5.66         9.8998
+v -3.2868         -4.002         11.1222
+v -6.642         0.0029         8.031
+v -1.899         -7.0821         4.29
+v 5.752         -3.3181         4.031
+v -1.86         -6.9381         4.131
+v -6.5327         0.0029         8.0942
+v -1.815         -6.7711         4.031
+v -1.5077         -4.9675         11.1312
+v -1.3464         -5.0228         11.1413
+v -1.9822         -4.7824         11.1211
+v -2.4484         -4.5775         11.1312
+v 5.6573         -3.2632         4.0941
+v 5.6572         -3.2634         4.0941
+v -1.767         -6.5921         3.997
+v -0.5077         -5.1533         11.1223
+v -1.0118         -5.0835         11.1257
+v 6.073         3.5089         8.031
+v -1.719         -6.4131         4.031
+v -3.773         -6.5311         8.997
+v -3.593         -6.2201         9.863
+v -3.6803         3.6832         11.1389
+v -6.4495         -0.6304         8.0942
+v -3.668         -6.3491         9.704
+v -1.0536         -6.3912         4.0939
+v -1.6906         -6.3075         4.0939
+v -1.6907         -6.3074         4.0939
+v -4.3118         2.884         11.1255
+v -7.288         -1.9501         8.997
+v -4.0048         3.2896         11.1222
+v 6.352         3.6699         5.704
+v -1.947         -7.2611         4.738
+v -1.928         -7.1921         4.497
+v 6.451         3.7269         5.497
+v 6.415         -1.7161         9.963
+v -7.264         -1.9441         9.256
+v 6.0651         2.2841         8.0941
+v -1.767         6.5979         7.997
+v 6.595         -1.7641         9.997
+v 6.513         3.7629         5.256
+v 6.774         -1.8121         9.963
+v -7.085         -1.8951         8.29
+v 6.595         1.7699         7.997
+v 6.513         3.7629         4.738
+v -2.2842         -6.0614         5.9001
+v -1.719         6.4179         8.031
+v 6.451         3.7269         4.497
+v 6.3096         -1.6878         9.8999
+v 6.3096         -1.6877         9.8999
+v -6.941         -1.8571         8.131
+v 6.352         3.6699         4.29
+v 6.415         1.7219         8.031
+v 6.941         -1.8571         9.863
+v 6.223         3.5959         4.131
+v 7.085         -1.8951         9.704
+v -1.6906         6.3125         8.0941
+v -6.774         -1.8121         8.031
+v -1.6906         6.3125         8.0941
+v -2.6767         -5.8989         4.0942
+v 6.3096         1.6935         8.0941
+v 6.3096         1.6935         8.0941
+v 7.195         -1.9251         9.497
+v -3.7709         -5.2674         5.8997
+v -3.321         -5.7491         5.963
+v -3.2664         -5.6545         5.8998
+v 4.1127         -5.0049         5.8998
+v -3.2666         -5.6543         5.8998
+v -3.414         -5.9101         5.997
+v 4.696         -4.6941         5.963
+v -3.507         -6.0711         5.963
+v 4.6189         -4.6164         5.8998
+v 4.6191         -4.6162         5.8998
+v -3.773         6.5369         8.997
+v -3.725         -6.4481         9.497
+v 4.828         -4.8251         5.997
+v 2.6013         4.5083         11.1412
+v -7.264         -1.9441         8.738
+v -3.76         -6.5101         9.256
+v -3.76         6.5159         9.256
+v 4.959         -4.9561         5.963
+v -7.195         -1.9251         8.497
+v 5.081         -5.0781         5.863
+v 1.9821         4.7879         11.121
+v 2.4483         4.5832         11.1312
+v -3.76         6.5159         8.738
+v -3.725         6.4539         8.497
+v -3.76         -6.5101         8.738
+v 6.534         -3.7701         8.997
+v -3.725         -6.4481         8.497
+v 6.223         -3.5901         9.863
+v -3.668         6.3549         8.29
+v 5.0077         -4.1098         4.0942
+v 6.352         -3.6641         9.704
+v -3.668         -6.3491         8.29
+v 5.318         5.3209         8.738
+v -3.507         -6.0711         4.031
+v 5.081         -5.0781         4.131
+v -5.9025         -2.6719         9.8999
+v 5.335         5.3379         8.997
+v 6.451         -3.7211         9.497
+v 4.959         -4.9561         4.031
+v -3.593         -6.2201         8.131
+v -3.593         6.2259         8.131
+v -3.414         -5.9101         3.997
+v 5.267         5.2699         8.497
+v 6.513         -3.7571         9.256
+v 4.828         -4.8251         3.997
+v 5.186         5.1889         8.29
+v -4.697         -4.6941         9.963
+v -3.321         -5.7491         4.031
+v 4.696         -4.6941         4.031
+v 6.513         -3.7571         8.738
+v 5.318         5.3209         9.256
+v -4.828         -4.8251         9.997
+v -3.2664         -5.6545         4.0942
+v -2.2813         6.068         8.0941
+v -3.2666         -5.6543         4.0942
+v 6.451         -3.7211         8.497
+v 4.6191         -4.6162         4.0942
+v 4.6189         -4.6164         4.0942
+v -4.959         -4.9561         9.963
+v 6.352         -3.6641         8.29
+v 5.318         -5.3151         4.738
+v -5.0077         -4.1098         9.8998
+v 5.335         -5.3321         4.997
+v 6.223         -3.5901         8.131
+v 5.267         -5.2641         4.497
+v -4.6191         -4.6163         9.8997
+v -5.082         -5.0781         9.863
+v 5.186         -5.1841         4.29
+v -5.082         -5.0781         8.131
+v 5.186         -5.1841         5.704
+v 6.0651         -2.2784         9.8999
+v -4.959         -4.9561         8.031
+v 5.752         -3.3181         9.963
+v 5.267         -5.2641         5.497
+v 5.318         -5.3151         5.256
+v -3.507         6.0759         8.031
+v 5.913         -3.4111         9.997
+v 6.073         -3.5041         9.963
+v -3.593         -6.2201         5.863
+v 5.2677         -3.7709         9.8999
+v -3.773         -6.5311         4.997
+v -5.175         1.3893         6.997
+v -3.668         -6.3491         5.704
+v 5.6572         -3.2634         9.8999
+v 5.6573         -3.2632         9.8999
+v -6.595         -1.7641         7.997
+v -3.725         -6.4481         5.497
+v 5.6572         3.2692         5.8999
+v 5.6573         3.269         5.8999
+v -3.76         -6.5101         5.256
+v 5.752         3.3239         5.963
+v 6.073         -3.5041         8.031
+v -6.416         -1.7161         8.031
+v 5.913         3.4169         5.997
+v 5.081         5.0839         8.131
+v 4.959         4.9619         8.031
+v -3.76         -6.5101         4.738
+v 6.073         3.5089         5.963
+v -3.725         -6.4481         4.497
+v -6.3103         -1.6878         8.0939
+v -6.3103         -1.6877         8.0939
+v -3.668         -6.3491         4.29
+v -3.593         -6.2201         4.131
+v -6.0642         -2.2814         8.0939
+v -5.0077         -4.1098         5.8998
+v 5.267         3.7775         4.094
+v -4.6191         -4.6163         5.8997
+v 3.2663         -5.6544         5.8999
+v 3.2661         -5.6545         5.8999
+v -4.697         -4.6941         5.963
+v 3.593         6.2259         9.863
+v 3.321         -5.7491         5.963
+v -4.1127         -5.0049         8.0942
+v -6.074         -3.5041         8.031
+v -5.187         5.1889         9.704
+v -4.828         -4.8251         5.997
+v 3.414         -5.9101         5.997
+v 3.667         6.3549         9.704
+v -5.267         5.2699         9.497
+v 3.724         6.4539         9.497
+v 3.506         -6.0711         5.963
+v -4.959         -4.9561         5.963
+v 6.073         3.5089         4.031
+v -5.082         -5.0781         5.863
+v 5.913         3.4169         3.997
+v -4.697         4.6989         9.963
+v 5.752         3.3239         4.031
+v 3.321         5.7549         9.963
+v -4.828         4.8309         9.997
+v 3.7731         -5.2654         4.0941
+v 5.6572         3.2692         4.0941
+v 5.6573         3.269         4.0941
+v 3.414         5.9159         9.997
+v -4.959         4.9619         9.963
+v -5.082         -5.0781         4.131
+v 3.506         -6.0711         4.031
+v 3.506         6.0759         9.963
+v -4.959         -4.9561         4.031
+v -4.1127         5.0106         9.8998
+v 5.9022         2.6774         5.8999
+v -4.828         -4.8251         3.997
+v 3.7746         5.2698         9.9
+v -6.534         -3.7701         8.997
+v -4.6191         4.6219         9.8998
+v 3.2663         5.6601         9.8999
+v -4.6192         4.6217         9.8998
+v 3.2661         5.6602         9.8999
+v -6.513         -3.7571         9.256
+v -4.697         -4.6941         4.031
+v -5.082         5.0839         9.863
+v -4.1127         -5.0049         4.0942
+v -6.513         -3.7571         8.738
+v -4.6191         -4.6163         4.0943
+v -6.451         -3.7211         8.497
+v 4.696         -4.6941         9.963
+v 5.267         3.7775         8.094
+v 5.913         3.4169         7.997
+v -6.352         -3.6641         8.29
+v 4.828         -4.8251         9.997
+v 5.752         3.3239         8.031
+v 4.959         -4.9561         9.963
+v -6.224         -3.5901         8.131
+v 5.6572         3.2692         8.0941
+v 4.1127         -5.0049         9.8998
+v 5.6573         3.269         8.0941
+v 4.6189         -4.6164         9.8998
+v 4.6191         -4.6162         9.8998
+v 5.081         -5.0781         9.863
+v -4.5055         2.6042         11.1412
+v 1.5077         4.9727         11.1311
+v 1.3465         5.0279         11.1412
+v -4.7853         1.985         11.1211
+v 0.5077         5.1589         11.1222
+v 1.0117         5.0889         11.1255
+v -4.5804         2.4512         11.1312
+v 5.0077         -4.1098         8.0942
+v 5.081         -5.0781         8.131
+v 6.0651         2.2841         4.0941
+v -5.3571         0.0029         6.997
+v 4.959         -4.9561         8.031
+v 6.3933         1.0597         5.8999
+v 4.828         -4.8251         7.997
+v -3.414         5.9159         7.997
+v 6.415         1.7219         5.963
+v -5.913         -3.4111         7.997
+v 6.3096         1.6935         5.8999
+v 6.3096         1.6935         5.8999
+v -5.318         -5.3151         8.738
+v 4.696         -4.6941         8.031
+v -3.321         5.7549         8.031
+v -5.335         -5.3321         8.997
+v 6.595         1.7699         5.997
+v 4.828         4.8309         7.997
+v -5.267         -5.2641         8.497
+v -5.752         -3.3181         8.031
+v 6.774         1.8179         5.963
+v -3.2664         5.6602         8.0942
+v 4.6191         -4.6162         8.0942
+v -3.2666         5.66         8.0942
+v 4.6189         -4.6164         8.0942
+v -5.187         -5.1841         8.29
+v 6.941         1.8629         5.863
+v 5.318         -5.3151         8.738
+v 4.696         4.6989         8.031
+v -5.6572         -3.2634         8.0941
+v 5.335         -5.3321         8.997
+v -5.187         -5.1841         9.704
+v -5.6573         -3.2632         8.0941
+v 7.288         1.9559         4.997
+v -5.318         -5.3151         4.738
+v 5.267         -5.2641         8.497
+v 7.085         1.9009         5.704
+v 4.1127         5.0106         8.0942
+v -5.335         -5.3321         4.997
+v 4.619         4.6219         8.0941
+v -5.267         -5.2641         9.497
+v -5.267         -5.2641         4.497
+v -3.7724         5.2718         8.0942
+v 5.186         -5.1841         8.29
+v 7.195         1.9309         5.497
+v -5.318         -5.3151         9.256
+v -5.187         -5.1841         4.29
+v 5.186         -5.1841         9.704
+v 7.264         1.9489         5.256
+v 3.772         6.5369         8.997
+v -5.187         -5.1841         5.704
+v -5.269         -3.7694         8.0942
+v 5.267         -5.2641         9.497
+v 7.264         1.9489         4.738
+v -5.267         -5.2641         5.497
+v 7.195         1.9309         4.497
+v 5.318         -5.3151         9.256
+v 3.76         6.5159         9.256
+v -5.318         -5.3151         5.256
+v 7.085         1.9009         4.29
+v 6.941         1.8629         4.131
+v 3.76         6.5159         8.738
+v -5.318         5.3209         8.738
+v 6.774         1.8179         4.031
+v -5.335         5.3379         8.997
+v 3.724         6.4539         8.497
+v -5.267         5.2699         8.497
+v 6.595         1.7699         3.997
+v 3.667         6.3549         8.29
+v -5.187         5.1889         8.29
+v 3.593         6.2259         8.131
+v 6.415         1.7219         4.031
+v 5.205         0.0029         11.139
+v -5.318         5.3209         9.256
+v 5.086         1.0146         11.1255
+v 5.156         0.5106         11.1222
+v -5.175         -1.3836         6.997
+v 6.3096         1.6935         4.0941
+v 6.3096         1.6935         4.0941
+v 6.642         0.0029         9.963
+v 6.827         0.0029         9.997
+v 7.013         0.0029         9.963
+v 7.186         0.0029         9.863
+v -4.828         -4.8251         7.997
+v 7.335         0.0029         9.704
+v 6.5327         0.0029         5.8998
+v -5.082         5.0839         8.131
+v 6.642         0.0029         5.963
+v 7.449         0.0029         9.497
+v -4.697         -4.6941         8.031
+v -4.959         4.9619         8.031
+v 6.5327         0.0029         9.8998
+v 6.827         0.0029         5.997
+v -4.6191         -4.6163         8.0943
+v -4.6393         -2.6757         6.997
+v 7.013         0.0029         5.963
+v 6.4491         -0.6307         9.8999
+v -3.7879         -3.785         6.997
+v 7.186         0.0029         5.863
+v 3.506         6.0759         8.031
+v -2.6786         -4.6364         6.997
+v 7.545         0.0029         4.997
+v -1.3864         -5.1721         6.997
+v 7.335         0.0029         5.704
+v 7.288         -1.9501         8.997
+v 7.449         0.0029         5.497
+v 5.318         5.3209         4.738
+v 7.52         0.0029         5.256
+v 5.335         5.3379         4.997
+v 7.264         -1.9441         9.256
+v 5.267         5.2699         4.497
+v 5.186         5.1889         4.29
+v 7.264         -1.9441         8.738
+v 7.52         0.0029         4.738
+v 5.186         5.1889         5.704
+v 7.195         -1.9251         8.497
+v 5.267         5.2699         5.497
+v 7.449         0.0029         4.497
+v 5.318         5.3209         5.256
+v 7.085         -1.8951         8.29
+v 7.085         -1.8952         8.29
+v 7.085         -1.8952         8.29
+v 6.941         -1.8571         8.131
+v 7.085         -1.8952         8.29
+v 7.335         0.0029         4.29
+v 2.2813         6.068         9.8999
+v 6.774         -1.8121         8.031
+v 7.186         0.0029         4.131
+v 1.719         6.4179         9.963
+v 7.013         0.0029         4.031
+v 1.767         6.5979         9.997
+v 6.827         0.0029         3.997
+v 1.815         6.7769         9.963
+v 6.642         0.0029         4.031
+v 5.9022         -2.6717         8.0941
+v 6.4491         0.6364         4.0941
+v 6.5327         0.0029         4.0942
+v 5.0077         4.1155         5.8998
+v -5.752         3.3239         9.963
+v -5.913         3.4169         9.997
+v -6.074         3.5089         9.963
+v -0.6335         6.452         4.0941
+v -1.0569         6.3961         5.8999
+v -1.6906         6.3125         5.8999
+v -1.6906         6.3125         5.8999
+v -1.719         6.4179         5.963
+v -5.2683         3.7759         9.8999
+v -1.767         6.5979         5.997
+v -1.815         6.7769         5.963
+v -1.86         6.9439         5.863
+v -5.6572         3.2692         9.8999
+v -5.6573         3.269         9.8999
+v -1.953         7.2909         4.997
+v -1.899         7.0879         5.704
+v -1.928         7.1979         5.497
+v -1.947         7.2669         5.256
+v -1.947         7.2669         4.738
+v -1.928         7.1979         4.497
+v -1.899         7.0879         4.29
+v -1.86         6.9439         4.131
+v -1.815         6.7769         4.031
+v -1.767         6.5979         3.997
+v -1.719         6.4179         4.031
+v -1.6906         6.3125         4.0941
+v -1.6906         6.3125         4.0941
+v -3.593         6.2259         5.863
+v -3.773         6.5369         4.997
+v -3.668         6.3549         5.704
+v -3.725         6.4539         5.497
+v -3.76         6.5159         5.256
+v -3.76         6.5159         4.738
+v -3.725         6.4539         4.497
+v -3.668         6.3549         4.29
+v -3.593         6.2259         4.131
+v -2.6765         5.9044         5.8998
+v -2.2813         6.068         4.0941
+v -3.321         5.7549         5.963
+v -3.2664         5.6602         5.8998
+v -3.2666         5.66         5.8998
+v -3.414         5.9159         5.997
+v -3.507         6.0759         5.963
+v -3.507         6.0759         4.031
+v -3.414         5.9159         3.997
+v -3.321         5.7549         4.031
+v -3.2664         5.6602         4.0942
+v -3.2666         5.66         4.0942
+# 1390 vertices
+
+g group_0_undefined
+
+usemtl color_undefined
+s 0
+
+f 3     657     2
+f 4     657     3
+f 5     657     4
+f 6     657     5
+f 7     657     6
+f 10     657     9
+f 11     657     10
+f 27     28     26
+f 26     28     29
+f 26     29     30
+f 26     30     31
+f 31     32     33
+f 31     33     34
+f 31     34     36
+f 31     36     37
+f 31     37     26
+f 46     31     30
+f 30     818     46
+f 29     820     30
+f 50     16     51
+f 772     27     26
+f 50     1339     16
+f 1127     52     45
+f 46     62     31
+f 772     26     37
+f 59     173     23
+f 33     32     62
+f 59     23     60
+f 63     979     62
+f 979     978     62
+f 31     62     32
+f 21     43     40
+f 40     1159     21
+f 63     62     678
+f 46     678     62
+f 257     258     58
+f 1137     57     1138
+f 23     1193     60
+f 75     85     74
+f 76     116     75
+f 77     116     76
+f 74     85     73
+f 73     85     79
+f 259     1141     72
+f 81     116     77
+f 82     139     81
+f 83     139     82
+f 84     161     83
+f 86     611     78
+f 94     78     611
+f 611     598     94
+f 93     94     598
+f 78     79     86
+f 85     86     79
+f 91     93     598
+f 90     91     598
+f 95     88     569
+f 569     550     95
+f 78     98     79
+f 97     79     98
+f 99     73     79
+f 99     79     97
+f 99     100     73
+f 74     73     100
+f 76     122     77
+f 81     77     143
+f 141     143     77
+f 21     102     43
+f 592     627     576
+f 93     616     94
+f 619     613     94
+f 570     88     571
+f 85     109     110
+f 85     110     86
+f 97     753     99
+f 1279     171     105
+f 115     161     114
+f 114     161     84
+f 85     75     116
+f 117     74     100
+f 74     117     75
+f 119     75     117
+f 121     76     75
+f 121     75     119
+f 141     77     122
+f 121     122     76
+f 145     82     81
+f 145     83     82
+f 167     84     83
+f 172     120     105
+f 167     168     84
+f 109     85     128
+f 116     128     85
+f 112     125     124
+f 129     111     388
+f 100     755     117
+f 119     421     121
+f 137     182     136
+f 138     182     137
+f 136     182     115
+f 116     81     139
+f 126     129     691
+f 133     127     134
+f 81     143     144
+f 133     150     127
+f 81     144     145
+f 150     133     748
+f 172     105     171
+f 1275     1304     285
+f 1135     125     112
+f 139     149     128
+f 139     128     116
+f 134     748     133
+f 145     144     125
+f 147     1285     154
+f 141     349     143
+f 148     155     153
+f 153     123     148
+f 143     124     144
+f 160     209     138
+f 139     83     161
+f 162     228     160
+f 145     165     83
+f 165     167     83
+f 188     114     168
+f 114     84     168
+f 190     115     188
+f 156     1374     164
+f 193     195     136
+f 216     138     215
+f 171     169     147
+f 60     817     59
+f 235     236     162
+f 302     23     173
+f 171     147     172
+f 149     139     174
+f 161     174     139
+f 127     150     473
+f 166     283     170
+f 285     1304     298
+f 1160     167     1135
+f 170     291     176
+f 145     125     165
+f 148     135     399
+f 170     283     291
+f 168     167     1160
+f 1304     178     298
+f 58     57     180
+f 276     140     176
+f 291     276     176
+f 155     148     180
+f 161     115     182
+f 177     184     192
+f 210     248     186
+f 186     233     185
+f 163     233     162
+f 114     188     115
+f 180     148     399
+f 136     115     193
+f 190     193     115
+f 194     192     184
+f 215     137     195
+f 137     136     195
+f 137     215     138
+f 160     138     217
+f 160     234     162
+f 163     162     236
+f 236     237     163
+f 200     174     182
+f 161     182     174
+f 1289     179     175
+f 190     188     1161
+f 202     201     187
+f 187     201     222
+f 188     168     1161
+f 199     332     335
+f 204     196     1385
+f 1160     1161     168
+f 204     1385     1370
+f 204     1370     207
+f 182     138     209
+f 212     248     210
+f 213     248     212
+f 202     187     1242
+f 335     344     205
+f 216     217     138
+f 208     205     344
+f 1150     269     274
+f 185     249     186
+f 250     251     186
+f 221     218     1386
+f 183     357     189
+f 182     209     225
+f 182     225     200
+f 198     189     304
+f 1202     1203     380
+f 1340     215     1283
+f 357     304     189
+f 195     1283     215
+f 215     1340     216
+f 209     160     228
+f 232     289     230
+f 230     265     229
+f 229     265     213
+f 228     162     233
+f 163     185     233
+f 160     217     234
+f 235     162     234
+f 185     163     237
+f 250     186     249
+f 210     254     212
+f 21     331     102
+f 227     364     238
+f 272     273     213
+f 233     244     228
+f 244     225     228
+f 209     228     225
+f 242     231     243
+f 47     243     231
+f 242     256     231
+f 241     382     131
+f 103     1201     245
+f 235     861     236
+f 236     862     237
+f 373     221     223
+f 247     289     246
+f 246     289     232
+f 233     186     248
+f 237     249     185
+f 206     532     157
+f 173     328     302
+f 206     211     560
+f 210     186     251
+f 268     212     254
+f 251     254     210
+f 23     305     49
+f 212     270     213
+f 257     58     255
+f 239     402     736
+f 309     54     49
+f 299     300     232
+f 72     58     258
+f 371     259     255
+f 257     255     259
+f 233     248     260
+f 233     260     244
+f 197     219     318
+f 258     257     259
+f 321     184     196
+f 227     226     426
+f 196     204     333
+f 258     259     72
+f 863     250     249
+f 204     207     336
+f 224     626     220
+f 863     249     237
+f 224     320     625
+f 261     400     53
+f 426     364     227
+f 320     674     625
+f 863     934     250
+f 248     213     265
+f 247     266     310
+f 267     310     266
+f 264     310     267
+f 212     268     270
+f 262     384     269
+f 272     213     270
+f 296     229     273
+f 229     213     273
+f 146     454     142
+f 297     230     296
+f 299     232     230
+f 221     465     218
+f 416     180     399
+f 1273     281     271
+f 278     260     265
+f 248     265     260
+f 277     166     108
+f 108     274     277
+f 332     199     54
+f 191     542     181
+f 231     256     395
+f 205     199     335
+f 1271     280     275
+f 279     281     282
+f 283     166     277
+f 277     406     283
+f 279     271     281
+f 254     981     268
+f 268     981     270
+f 272     270     1059
+f 279     282     286
+f 279     286     285
+f 282     281     286
+f 1086     273     272
+f 19     45     295
+f 277     401     406
+f 265     230     289
+f 288     339     264
+f 156     164     437
+f 293     312     365
+f 375     738     294
+f 284     295     290
+f 294     290     295
+f 229     296     230
+f 297     299     230
+f 315     246     300
+f 246     232     300
+f 142     452     130
+f 246     315     247
+f 295     375     294
+f 266     319     267
+f 291     398     276
+f 267     343     264
+f 345     264     343
+f 287     301     594
+f 345     346     264
+f 287     1291     301
+f 306     507     303
+f 265     289     307
+f 265     307     278
+f 500     178     187
+f 198     304     308
+f 1295     214     301
+f 299     297     1159
+f 1087     1159     297
+f 500     187     510
+f 301     1291     1295
+f 203     198     308
+f 959     748     311
+f 1086     296     273
+f 1086     1087     296
+f 302     305     23
+f 297     296     1087
+f 49     305     309
+f 510     187     222
+f 305     450     309
+f 466     308     304
+f 152     1319     159
+f 310     264     339
+f 289     247     310
+f 304     463     466
+f 340     387     313
+f 292     293     365
+f 387     365     313
+f 314     203     308
+f 184     321     311
+f 316     247     315
+f 311     194     184
+f 220     211     1316
+f 266     247     317
+f 316     317     247
+f 220     67     224
+f 343     267     319
+f 317     319     266
+f 1345     22     367
+f 1316     67     220
+f 314     308     418
+f 275     629     544
+f 275     544     253
+f 275     280     629
+f 1335     334     323
+f 173     820     328
+f 327     326     321
+f 311     321     326
+f 328     325     302
+f 325     324     302
+f 102     350     317
+f 319     317     351
+f 317     316     102
+f 289     310     329
+f 289     329     307
+f 315     43     316
+f 691     323     665
+f 196     333     321
+f 54     309     332
+f 674     320     330
+f 333     204     336
+f 336     522     333
+f 665     323     334
+f 53     393     42
+f 341     387     340
+f 361     410     342
+f 342     387     341
+f 365     360     292
+f 336     207     423
+f 24     322     331
+f 207     151     423
+f 331     21     24
+f 288     264     346
+f 368     292     366
+f 346     366     288
+f 288     366     292
+f 240     263     537
+f 368     369     292
+f 337     124     349
+f 369     370     293
+f 335     480     344
+f 392     313     390
+f 245     376     103
+f 103     376     104
+f 351     317     350
+f 349     348     337
+f 208     344     352
+f 104     377     107
+f 351     350     331
+f 337     348     421
+f 376     245     353
+f 352     219     208
+f 350     102     331
+f 276     355     140
+f 276     453     355
+f 449     448     345
+f 449     343     351
+f 140     355     183
+f 356     329     339
+f 310     339     329
+f 48     354     347
+f 378     373     223
+f 183     355     357
+f 318     219     352
+f 421     348     121
+f 319     351     343
+f 358     1345     367
+f 343     449     345
+f 318     352     745
+f 339     288     360
+f 455     463     357
+f 330     112     337
+f 304     357     463
+f 48     55     354
+f 363     436     362
+f 362     410     361
+f 288     292     360
+f 354     55     359
+f 293     292     369
+f 390     312     370
+f 312     293     370
+f 124     143     349
+f 364     372     238
+f 337     112     124
+f 340     396     341
+f 341     413     342
+f 414     342     413
+f 372     241     238
+f 55     252     359
+f 1343     353     245
+f 414     415     342
+f 252     240     541
+f 255     374     371
+f 377     104     376
+f 929     379     371
+f 371     379     259
+f 1202     380     111
+f 365     381     360
+f 381     356     360
+f 339     360     356
+f 372     382     241
+f 376     353     377
+f 147     169     1284
+f 366     346     448
+f 380     323     388
+f 259     379     262
+f 388     111     380
+f 366     448     368
+f 1285     147     1284
+f 448     992     368
+f 262     379     384
+f 372     492     382
+f 1286     158     154
+f 386     436     385
+f 385     436     363
+f 312     313     365
+f 383     367     22
+f 154     1285     1286
+f 131     389     135
+f 269     391     274
+f 312     390     313
+f 378     223     231
+f 269     384     391
+f 340     313     394
+f 392     394     313
+f 389     131     382
+f 323     691     388
+f 384     941     391
+f 413     341     396
+f 394     396     340
+f 498     389     382
+f 361     342     415
+f 382     492     498
+f 22     42     383
+f 338     35     347
+f 415     362     361
+f 398     453     276
+f 443     363     362
+f 378     231     395
+f 35     48     347
+f 42     393     383
+f 399     389     586
+f 443     446     363
+f 263     548     537
+f 274     391     401
+f 401     277     274
+f 263     271     548
+f 404     395     403
+f 405     381     387
+f 365     387     381
+f 403     395     256
+f 239     1293     402
+f 393     53     400
+f 399     586     603
+f 370     993     390
+f 1111     959     407
+f 397     736     402
+f 390     994     392
+f 408     291     283
+f 389     399     135
+f 261     564     400
+f 171     1279     169
+f 283     406     408
+f 321     407     327
+f 672     338     680
+f 387     342     410
+f 386     411     461
+f 412     461     411
+f 409     461     412
+f 338     672     334
+f 680     338     347
+f 417     603     58
+f 398     291     408
+f 180     416     58
+f 385     363     446
+f 608     395     615
+f 399     603     417
+f 417     416     399
+f 469     386     467
+f 378     395     608
+f 318     420     197
+f 347     354     702
+f 404     419     395
+f 615     395     419
+f 197     420     226
+f 122     121     348
+f 748     959     150
+f 744     421     755
+f 349     122     348
+f 424     322     314
+f 425     405     410
+f 387     410     405
+f 314     418     424
+f 418     975     982
+f 426     226     420
+f 396     1084     413
+f 541     713     359
+f 10     9     442
+f 414     413     1180
+f 427     132     130
+f 364     426     614
+f 346     345     448
+f 201     738     222
+f 369     368     992
+f 433     499     432
+f 432     486     431
+f 429     423     151
+f 431     487     409
+f 410     362     436
+f 732     397     434
+f 151     156     429
+f 429     156     437
+f 106     683     428
+f 377     683     107
+f 440     567     561
+f 415     441     362
+f 441     443     362
+f 331     322     435
+f 440     561     437
+f 421     744     337
+f 467     385     446
+f 438     445     302
+f 385     467     386
+f 1     657     447
+f 122     349     141
+f 124     125     144
+f 411     386     470
+f 437     164     440
+f 322     424     435
+f 411     472     412
+f 445     305     302
+f 427     567     440
+f 494     409     412
+f 445     450     305
+f 290     738     284
+f 164     132     440
+f 448     449     435
+f 435     449     331
+f 440     132     427
+f 410     436     451
+f 410     451     425
+f 351     331     449
+f 25     19     284
+f 503     69     375
+f 69     773     68
+f 503     505     69
+f 324     770     457
+f 733     443     743
+f 901     444     690
+f 295     496     375
+f 130     452     427
+f 446     443     733
+f 634     452     454
+f 355     453     455
+f 324     438     302
+f 453     1000     455
+f 441     743     443
+f 1324     456     439
+f 455     357     355
+f 459     499     458
+f 458     499     433
+f 324     457     438
+f 452     142     454
+f 461     409     487
+f 436     386     461
+f 453     999     1000
+f 468     332     309
+f 431     490     432
+f 462     454     146
+f 502     504     432
+f 433     506     458
+f 491     686     478
+f 113     460     214
+f 521     459     520
+f 751     318     745
+f 585     462     591
+f 462     146     218
+f 469     470     386
+f 127     460     113
+f 466     418     308
+f 450     468     309
+f 494     412     472
+f 470     472     411
+f 468     474     475
+f 761     681     757
+f 756     757     684
+f 462     218     465
+f 460     127     473
+f 684     685     756
+f 450     474     468
+f 685     491     488
+f 332     468     476
+f 477     468     475
+f 477     476     468
+f 469     667     470
+f 467     446     734
+f 475     474     477
+f 465     221     373
+f 436     461     479
+f 436     479     451
+f 332     476     335
+f 733     734     446
+f 469     467     734
+f 480     335     476
+f 522     336     535
+f 388     691     129
+f 481     959     482
+f 336     423     535
+f 473     150     481
+f 482     473     481
+f 434     397     1317
+f 484     518     483
+f 485     518     484
+f 483     518     459
+f 487     431     486
+f 482     493     473
+f 488     756     685
+f 434     1320     439
+f 489     490     431
+f 502     432     490
+f 462     639     454
+f 458     520     459
+f 456     1327     464
+f 639     462     585
+f 521     527     459
+f 493     482     959
+f 756     488     943
+f 465     591     462
+f 478     696     491
+f 505     773     69
+f 409     494     495
+f 503     375     496
+f 431     409     489
+f 495     489     409
+f 496     295     303
+f 202     738     201
+f 486     497     487
+f 497     479     487
+f 461     487     479
+f 588     287     594
+f 472     632     494
+f 495     494     512
+f 512     726     495
+f 486     432     499
+f 499     459     518
+f 501     550     485
+f 95     550     501
+f 433     432     504
+f 505     503     496
+f 520     458     506
+f 504     506     433
+f 1324     1327     456
+f 178     500     298
+f 505     496     773
+f 483     530     484
+f 593     594     301
+f 303     507     496
+f 484     553     485
+f 555     485     553
+f 489     495     726
+f 69     68     523
+f 507     783     496
+f 555     556     485
+f 407     515     1111
+f 526     222     69
+f 593     301     214
+f 403     530     404
+f 507     306     509
+f 486     499     511
+f 486     511     497
+f 514     1111     515
+f 502     621     504
+f 321     517     407
+f 798     509     513
+f 623     506     504
+f 490     726     502
+f 623     504     621
+f 513     509     152
+f 515     407     517
+f 321     333     517
+f 333     522     517
+f 509     306     152
+f 95     571     88
+f 525     519     529
+f 515     517     514
+f 651     529     519
+f 643     514     517
+f 510     766     500
+f 517     651     643
+f 526     69     524
+f 523     524     69
+f 483     459     527
+f 513     152     159
+f 553     484     530
+f 527     530     483
+f 528     525     529
+f 524     510     526
+f 222     526     510
+f 651     517     522
+f 510     524     523
+f 533     516     534
+f 527     404     530
+f 624     419     521
+f 516     533     531
+f 683     353     646
+f 539     511     518
+f 499     518     511
+f 353     358     646
+f 159     536     513
+f 423     540     535
+f 536     181     542
+f 541     359     252
+f 521     520     624
+f 623     520     506
+f 529     538     692
+f 623     624     520
+f 423     429     540
+f 536     159     181
+f 534     816     533
+f 533     816     531
+f 541     240     537
+f 419     404     527
+f 538     671     543
+f 518     485     550
+f 543     675     549
+f 671     675     543
+f 683     377     353
+f 570     572     88
+f 546     542     191
+f 452     634     545
+f 89     599     90
+f 427     452     545
+f 501     485     556
+f 548     271     551
+f 540     429     552
+f 191     157     546
+f 578     95     558
+f 556     558     501
+f 501     558     95
+f 393     711     383
+f 546     157     532
+f 571     95     578
+f 714     393     400
+f 256     242     555
+f 256     553     403
+f 552     429     437
+f 562     539     550
+f 518     550     539
+f 549     688     557
+f 551     279     559
+f 714     400     721
+f 243     556     242
+f 530     403     553
+f 279     551     271
+f 206     560     532
+f 564     721     400
+f 437     561     552
+f 279     285     559
+f 553     256     555
+f 559     285     565
+f 596     641     592
+f 627     628     576
+f 555     242     556
+f 243     558     556
+f 285     298     565
+f 261     253     564
+f 568     563     697
+f 89     88     572
+f 599     89     572
+f 564     253     544
+f 560     211     566
+f 602     90     599
+f 427     545     567
+f 565     298     573
+f 602     604     90
+f 634     454     639
+f 211     220     566
+f 574     580     581
+f 831     582     577
+f 550     569     583
+f 550     583     562
+f 1389     572     1390
+f 579     585     587
+f 582     588     589
+f 582     589     577
+f 1390     570     47
+f 243     47     578
+f 579     639     585
+f 582     638     588
+f 571     47     570
+f 591     579     587
+f 579     591     590
+f 587     585     591
+f 577     589     594
+f 577     594     593
+f 500     766     573
+f 570     1390     572
+f 589     588     594
+f 573     298     500
+f 465     590     591
+f 578     47     571
+f 243     578     558
+f 88     89     569
+f 576     718     592
+f 389     498     586
+f 601     759     603
+f 590     465     373
+f 510     777     766
+f 91     90     604
+f 616     93     606
+f 606     93     91
+f 604     606     91
+f 595     709     605
+f 373     597     590
+f 601     603     586
+f 607     583     598
+f 569     598     583
+f 510     523     777
+f 597     378     608
+f 604     602     1380
+f 602     599     1389
+f 318     751     420
+f 572     1389     599
+f 593     214     600
+f 597     373     378
+f 1389     1380     602
+f 610     426     420
+f 600     214     460
+f 89     90     569
+f 598     569     90
+f 426     610     614
+f 613     98     78
+f 619     94     616
+f 460     767     600
+f 78     94     613
+f 723     617     609
+f 86     110     611
+f 110     607     611
+f 598     611     607
+f 364     620     372
+f 606     1369     616
+f 364     614     620
+f 716     608     615
+f 460     473     767
+f 179     618     612
+f 723     512     617
+f 537     548     813
+f 620     492     372
+f 612     175     179
+f 620     828     492
+f 619     616     1346
+f 780     473     493
+f 493     1111     796
+f 574     566     580
+f 220     580     566
+f 527     521     419
+f 872     574     625
+f 512     494     632
+f 626     574     581
+f 128     149     109
+f 149     174     109
+f 174     200     109
+f 110     109     200
+f 110     200     225
+f 110     225     244
+f 110     244     260
+f 110     260     278
+f 110     278     307
+f 110     307     329
+f 110     329     356
+f 110     356     381
+f 110     381     405
+f 425     607     405
+f 451     607     425
+f 479     607     451
+f 497     607     479
+f 511     607     497
+f 539     607     511
+f 562     607     539
+f 583     607     562
+f 110     405     607
+f 624     615     419
+f 624     623     615
+f 623     621     615
+f 472     470     632
+f 512     632     617
+f 632     631     617
+f 631     667     617
+f 636     633     637
+f 635     633     636
+f 630     649     633
+f 928     622     736
+f 596     838     641
+f 627     592     641
+f 280     638     629
+f 574     626     625
+f 673     649     70
+f 731     639     579
+f 581     580     626
+f 582     831     638
+f 280     287     638
+f 287     588     638
+f 690     428     683
+f 147     154     694
+f 559     832     551
+f 656     1208     643
+f 158     701     154
+f 656     519     1208
+f 158     175     707
+f 522     669     651
+f 627     649     628
+f 648     633     649
+f 647     633     648
+f 637     633     647
+f 656     643     651
+f 358     663     646
+f 519     656     651
+f 667     631     470
+f 663     358     367
+f 659     658     39
+f 658     689     39
+f 664     39     666
+f 669     522     535
+f 677     931     676
+f 651     669     529
+f 529     669     538
+f 663     367     670
+f 669     671     538
+f 669     535     671
+f 224     625     626
+f 334     672     665
+f 850     1043     668
+f 580     220     626
+f 668     673     850
+f 175     612     707
+f 535     540     671
+f 367     383     670
+f 665     672     949
+f 671     540     675
+f 668     649     673
+f 676     644     677
+f 989     678     760
+f 679     760     678
+f 106     107     683
+f 657     678     447
+f 239     618     179
+f 540     552     675
+f 681     761     679
+f 622     618     239
+f 682     545     634
+f 491     685     686
+f 239     736     622
+f 549     675     688
+f 645     698     1299
+f 822     537     813
+f 675     552     688
+f 696     478     695
+f 478     126     695
+f 691     695     126
+f 645     692     698
+f 685     684     39
+f 689     687     39
+f 687     686     39
+f 686     685     39
+f 684     681     39
+f 679     39     681
+f 678     39     679
+f 693     563     557
+f 690     683     646
+f 695     691     665
+f 557     688     693
+f 552     561     688
+f 12     657     11
+f 8     657     7
+f 688     561     693
+f 706     893     670
+f 699     1299     698
+f 699     1134     1299
+f 8     7     428
+f 695     665     696
+f 697     563     693
+f 711     670     383
+f 561     567     693
+f 698     692     699
+f 393     715     711
+f 693     567     697
+f 568     697     682
+f 680     347     702
+f 568     682     554
+f 697     545     682
+f 444     9     428
+f 567     545     697
+f 706     711     712
+f 354     705     702
+f 670     711     706
+f 1236     704     710
+f 715     706     712
+f 605     716     609
+f 706     715     714
+f 8     428     9
+f 718     576     717
+f 716     605     709
+f 712     711     715
+f 608     709     597
+f 717     694     718
+f 705     359     713
+f 709     608     716
+f 715     393     714
+f 354     359     705
+f 490     489     726
+f 621     502     726
+f 541     822     713
+f 920     612     618
+f 723     716     615
+f 541     537     822
+f 730     765     438
+f 721     564     720
+f 615     621     723
+f 438     774     445
+f 694     154     701
+f 438     765     774
+f 158     707     701
+f 544     720     564
+f 716     723     609
+f 779     450     445
+f 445     774     779
+f 575     118     727
+f 172     727     120
+f 779     795     450
+f 667     469     734
+f 680     954     672
+f 723     621     726
+f 576     575     717
+f 726     512     723
+f 727     694     717
+f 147     694     727
+f 554     729     640
+f 727     717     575
+f 172     147     727
+f 729     554     682
+f 772     730     457
+f 1003     720     719
+f 682     634     729
+f 457     730     438
+f 729     731     640
+f 831     1044     725
+f 731     642     640
+f 450     795     474
+f 639     731     729
+f 634     639     729
+f 735     642     731
+f 795     801     474
+f 731     579     735
+f 728     667     734
+f 743     728     733
+f 818     815     46
+f 735     584     642
+f 734     733     728
+f 28     27     768
+f 69     222     738
+f 737     595     584
+f 202     1242     738
+f 928     736     732
+f 690     646     901
+f 27     772     770
+f 584     735     737
+f 736     397     732
+f 579     590     735
+f 1     2     657
+f 735     590     737
+f 709     595     737
+f 737     597     709
+f 1167     722     724
+f 984     37     36
+f 737     590     597
+f 739     674     330
+f 740     732     434
+f 976     977     34
+f 8     9     657
+f 976     34     33
+f 64     967     63
+f 740     439     741
+f 100     99     754
+f 434     439     740
+f 762     952     66
+f 38     762     66
+f 969     66     952
+f 724     728     1170
+f 997     995     1
+f 447     989     1
+f 755     100     754
+f 529     645     528
+f 344     747     352
+f 529     692     645
+f 344     480     747
+f 1111     493     959
+f 538     703     692
+f 476     805     480
+f 481     150     959
+f 1201     4     1204
+f 538     543     703
+f 747     745     352
+f 2     1198     3
+f 997     1     989
+f 640     793     554
+f 447     678     989
+f 760     679     761
+f 739     330     337
+f 739     337     744
+f 745     864     751
+f 610     420     751
+f 744     881     739
+f 66     678     38
+f 751     865     610
+f 428     690     444
+f 464     471     749
+f 584     595     835
+f 902     750     442
+f 471     752     749
+f 724     722     609
+f 865     751     864
+f 614     825     620
+f 57     123     153
+f 722     605     609
+f 724     609     617
+f 155     180     57
+f 417     58     416
+f 759     58     603
+f 617     728     724
+f 631     632     470
+f 235     234     1341
+f 728     617     667
+f 902     903     750
+f 38     657     762
+f 684     757     681
+f 557     563     812
+f 759     601     854
+f 968     1168     761
+f 843     654     763
+f 601     586     854
+f 843     39     846
+f 763     39     843
+f 554     823     568
+f 757     968     761
+f 543     708     703
+f 846     39     848
+f 41     848     39
+f 44     817     41
+f 764     708     549
+f 543     549     708
+f 741     439     456
+f 818     30     820
+f 754     744     755
+f 767     808     600
+f 29     28     769
+f 820     29     769
+f 768     769     28
+f 741     456     746
+f 770     768     27
+f 744     754     753
+f 746     456     464
+f 749     746     464
+f 781     68     773
+f 653     666     654
+f 496     783     773
+f 923     771     775
+f 776     775     771
+f 34     977     36
+f 783     507     788
+f 742     776     771
+f 767     473     780
+f 523     786     777
+f 778     752     471
+f 773     783     781
+f 979     63     967
+f 969     965     65
+f 65     66     969
+f 788     507     509
+f 784     864     745
+f 780     961     767
+f 782     957     785
+f 799     92     797
+f 549     557     764
+f 790     765     791
+f 787     797     1019
+f 785     957     887
+f 791     765     730
+f 509     798     788
+f 778     516     787
+f 769     768     325
+f 1350     783     788
+f 493     796     780
+f 324     325     768
+f 801     476     477
+f 796     794     780
+f 794     789     780
+f 799     797     787
+f 772     457     770
+f 766     858     573
+f 477     474     801
+f 798     513     802
+f 768     770     324
+f 787     531     799
+f 795     991     801
+f 719     544     629
+f 719     720     544
+f 476     801     805
+f 792     892     803
+f 118     120     727
+f 778     471     508
+f 804     532     560
+f 92     799     531
+f 847     840     593
+f 802     536     807
+f 508     516     778
+f 787     516     531
+f 480     805     811
+f 786     810     777
+f 593     600     847
+f 805     1045     811
+f 640     826     793
+f 480     811     747
+f 809     806     896
+f 764     557     812
+f 745     747     784
+f 747     811     784
+f 92     531     816
+f 815     817     44
+f 848     41     817
+f 534     1339     816
+f 44     46     815
+f 794     1111     789
+f 630     628     649
+f 652     666     653
+f 814     812     563
+f 173     59     815
+f 800     904     819
+f 814     568     823
+f 769     328     820
+f 325     328     769
+f 563     568     814
+f 1050     1199     664
+f 666     1051     664
+f 554     793     823
+f 815     59     817
+f 70     838     673
+f 818     173     815
+f 818     820     173
+f 70     641     838
+f 725     719     629
+f 828     620     825
+f 827     813     548
+f 831     725     638
+f 830     833     834
+f 829     826     642
+f 827     551     832
+f 640     642     826
+f 827     548     551
+f 830     824     833
+f 492     828     837
+f 839     830     834
+f 830     839     836
+f 834     833     839
+f 629     638     725
+f 584     835     829
+f 882     1277     647
+f 492     837     498
+f 642     584     829
+f 648     883     647
+f 875     876     649
+f 1251     835     841
+f 649     650     875
+f 877     875     650
+f 841     835     595
+f 877     650     878
+f 652     878     650
+f 866     867     653
+f 878     652     867
+f 577     840     831
+f 868     866     654
+f 841     595     605
+f 759     854     1307
+f 841     605     722
+f 857     859     1307
+f 577     593     840
+f 832     559     844
+f 600     808     847
+f 850     673     853
+f 565     851     844
+f 673     838     853
+f 863     237     862
+f 846     848     1193
+f 374     852     371
+f 498     837     849
+f 926     371     852
+f 498     849     586
+f 857     1307     854
+f 371     926     929
+f 886     901     646
+f 60     1193     848
+f 817     60     848
+f 802     513     536
+f 559     565     844
+f 886     646     663
+f 858     1070     851
+f 838     916     853
+f 536     542     807
+f 851     573     858
+f 807     542     856
+f 855     1336     845
+f 853     1171     850
+f 854     586     849
+f 859     857     849
+f 857     854     849
+f 861     862     236
+f 565     573     851
+f 856     546     860
+f 862     855     863
+f 542     546     856
+f 855     862     861
+f 653     867     652
+f 653     654     866
+f 546     532     860
+f 442     444     901
+f 654     843     868
+f 860     532     804
+f 650     666     652
+f 934     980     251
+f 610     821     614
+f 821     610     865
+f 821     825     614
+f 804     560     869
+f 945     951     771
+f 1302     1305     1142
+f 560     566     869
+f 792     785     887
+f 869     566     870
+f 872     1362     870
+f 870     574     872
+f 766     777     1005
+f 637     647     1277
+f 574     870     566
+f 800     890     904
+f 777     1013     1005
+f 721     720     919
+f 882     647     883
+f 883     648     876
+f 649     876     648
+f 824     819     909
+f 1003     719     996
+f 909     914     824
+f 674     880     874
+f 839     917     836
+f 625     874     872
+f 833     914     839
+f 777     810     1013
+f 842     836     963
+f 877     1100     875
+f 877     878     1100
+f 867     1114     878
+f 625     674     874
+f 946     1305     884
+f 665     949     943
+f 884     1307     859
+f 863     855     934
+f 849     936     884
+f 827     1040     813
+f 657     38     678
+f 406     962     408
+f 806     894     896
+f 12     13     657
+f 13     14     657
+f 14     15     657
+f 800     809     890
+f 15     762     657
+f 899     898     885
+f 879     899     885
+f 444     442     9
+f 463     455     971
+f 1000     971     455
+f 889     663     670
+f 463     973     466
+f 886     663     889
+f 418     466     975
+f 887     892     792
+f 758     12     11
+f 488     491     943
+f 881     880     739
+f 894     806     803
+f 889     670     893
+f 10     750     11
+f 894     803     892
+f 965     898     967
+f 873     928     1080
+f 674     739     880
+f 98     906     97
+f 893     706     907
+f 899     967     898
+f 1346     906     619
+f 744     897     881
+f 911     702     705
+f 898     895     885
+f 753     754     99
+f 902     442     901
+f 744     753     897
+f 930     931     13
+f 809     896     890
+f 901     886     902
+f 932     930     12
+f 753     97     906
+f 960     954     680
+f 701     905     900
+f 903     902     886
+f 613     619     906
+f 897     753     906
+f 442     750     10
+f 680     702     960
+f 707     908     905
+f 904     890     1090
+f 819     904     909
+f 117     755     119
+f 421     119     755
+f 827     1052     1040
+f 466     973     975
+f 956     14     931
+f 914     833     824
+f 706     714     907
+f 424     418     982
+f 838     913     916
+f 758     11     750
+f 911     705     915
+f 907     714     912
+f 66     65     678
+f 917     839     914
+f 986     424     982
+f 916     913     900
+f 435     424     986
+f 912     721     919
+f 836     917     963
+f 705     713     915
+f 915     713     918
+f 912     714     721
+f 922     732     740
+f 1046     918     822
+f 945     771     923
+f 64     678     65
+f 918     713     822
+f 622     873     871
+f 928     873     622
+f 925     926     924
+f 852     924     926
+f 952     762     15
+f 251     250     934
+f 956     15     14
+f 1080     922     1066
+f 1080     928     922
+f 922     928     732
+f 696     943     491
+f 757     756     966
+f 943     966     756
+f 13     12     930
+f 845     972     855
+f 1066     922     933
+f 12     758     932
+f 996     725     1044
+f 719     725     996
+f 922     740     933
+f 927     1294     936
+f 379     935     384
+f 855     972     934
+f 936     1294     1142
+f 379     929     935
+f 941     384     935
+f 932     1065     930
+f 696     665     943
+f 937     746     939
+f 944     942     945
+f 927     936     837
+f 1142     1305     947
+f 950     999     398
+f 758     750     903
+f 945     923     944
+f 907     912     700
+f 966     943     949
+f 936     1142     947
+f 946     884     936
+f 672     954     949
+f 746     749     939
+f 947     946     936
+f 790     879     765
+f 782     771     951
+f 885     765     879
+f 885     888     765
+f 953     401     391
+f 888     774     765
+f 955     952     15
+f 779     774     891
+f 955     15     956
+f 957     782     951
+f 1383     177     422
+f 1382     1383     422
+f 953     391     941
+f 13     931     14
+f 774     888     891
+f 422     177     748
+f 957     951     1047
+f 1112     953     941
+f 933     741     937
+f 401     953     958
+f 740     741     933
+f 957     1047     1049
+f 784     1054     864
+f 952     955     1091
+f 957     1049     887
+f 955     956     1091
+f 949     954     1155
+f 794     796     1111
+f 401     958     406
+f 741     746     937
+f 1056     865     864
+f 948     939     749
+f 930     676     931
+f 677     1091     956
+f 1099     1103     892
+f 962     406     958
+f 821     865     1061
+f 702     911     960
+f 931     677     956
+f 961     808     767
+f 842     964     845
+f 398     408     950
+f 408     962     950
+f 964     842     963
+f 965     967     64
+f 1110     950     962
+f 64     65     965
+f 837     828     927
+f 961     780     970
+f 1208     990     974
+f 968     966     949
+f 974     789     1208
+f 837     936     849
+f 895     969     1095
+f 913     694     900
+f 970     780     789
+f 969     895     965
+f 952     1095     969
+f 701     900     694
+f 701     707     905
+f 463     971     973
+f 895     898     965
+f 859     849     884
+f 972     845     964
+f 718     913     592
+f 879     790     976
+f 973     971     1132
+f 1132     971     1131
+f 694     913     718
+f 738     430     284
+f 1059     270     981
+f 984     36     977
+f 974     970     789
+f 908     612     920
+f 978     976     33
+f 908     707     612
+f 903     1065     758
+f 33     62     978
+f 981     254     980
+f 254     251     980
+f 618     871     920
+f 980     972     981
+f 981     972     1059
+f 1048     1044     831
+f 972     980     934
+f 791     977     790
+f 976     978     879
+f 978     979     879
+f 976     790     977
+f 805     801     1041
+f 618     622     871
+f 899     879     979
+f 899     979     967
+f 805     1041     1045
+f 840     847     1009
+f 1204     3     1198
+f 808     1017     847
+f 1068     963     917
+f 772     37     983
+f 37     984     983
+f 992     993     369
+f 1024     808     961
+f 994     390     993
+f 987     1054     784
+f 772     983     730
+f 779     891     988
+f 779     988     795
+f 988     991     795
+f 1086     272     1059
+f 370     369     993
+f 974     990     970
+f 991     988     1236
+f 942     547     938
+f 983     984     730
+f 791     730     984
+f 61     938     547
+f 791     984     977
+f 913     596     592
+f 435     992     448
+f 838     596     913
+f 995     1198     2
+f 2     1     995
+f 994     993     986
+f 993     992     986
+f 435     986     992
+f 890     1092     1090
+f 904     1090     1094
+f 909     904     1094
+f 654     666     763
+f 649     666     650
+f 627     641     649
+f 41     678     44
+f 44     678     46
+f 39     678     41
+f 999     453     398
+f 997     1351     995
+f 1103     894     892
+f 1000     999     1128
+f 942     938     945
+f 1016     1015     1002
+f 919     720     1003
+f 1128     1131     1000
+f 1000     1131     971
+f 948     752     985
+f 766     1005     858
+f 948     749     752
+f 985     752     1010
+f 1007     1238     940
+f 1025     810     80
+f 778     1010     752
+f 924     1307     925
+f 1097     914     909
+f 1011     940     1258
+f 876     875     1015
+f 1015     1016     876
+f 1301     1016     1002
+f 787     1010     778
+f 787     1019     1010
+f 1009     847     1017
+f 92     87     797
+f 1068     1074     963
+f 994     1026     392
+f 1026     1082     394
+f 396     394     1082
+f 964     963     1074
+f 786     68     810
+f 1013     810     1025
+f 892     887     1099
+f 394     392     1026
+f 1060     1022     1020
+f 797     87     1028
+f 1013     1025     1023
+f 1067     1029     1022
+f 1019     797     1028
+f 1017     808     1024
+f 1022     1060     1067
+f 1027     1004     1019
+f 1028     1027     1019
+f 896     894     1104
+f 775     816     923
+f 923     547     944
+f 961     1115     1024
+f 547     923     816
+f 942     944     547
+f 1092     890     896
+f 96     1004     87
+f 1027     87     1004
+f 1027     1028     87
+f 1030     1031     1034
+f 1015     1100     1002
+f 858     1105     1070
+f 1012     1032     1031
+f 1002     1106     1006
+f 1034     1031     1032
+f 1002     1102     1106
+f 1033     1038     61
+f 1039     938     61
+f 1202     111     659
+f 1122     970     990
+f 1036     945     938
+f 1023     1083     1013
+f 1039     61     1038
+f 666     668     1043
+f 1037     929     1035
+f 1151     1037     1035
+f 1038     1033     1036
+f 1038     1036     1039
+f 1039     1036     938
+f 1050     664     1051
+f 801     991     1041
+f 1236     1245     991
+f 1051     666     1043
+f 1172     996     1044
+f 1102     1002     1100
+f 39     763     666
+f 1036     1042     945
+f 1008     1107     1057
+f 991     1245     1041
+f 1171     853     916
+f 945     1042     951
+f 1047     951     1042
+f 1174     1043     850
+f 1203     660     1200
+f 1046     813     1040
+f 662     664     1199
+f 822     813     1046
+f 987     811     1045
+f 831     840     1048
+f 1048     840     1009
+f 811     987     784
+f 1107     1008     1006
+f 1050     1051     1330
+f 1180     413     1084
+f 1174     1176     1043
+f 64     63     678
+f 1052     827     832
+f 655     644     889
+f 1052     832     1053
+f 1020     1008     1057
+f 1056     864     1054
+f 1053     844     1058
+f 932     758     1065
+f 1057     1060     1020
+f 676     930     1065
+f 1061     865     1056
+f 972     1078     1059
+f 1097     1064     914
+f 914     1064     917
+f 903     886     1065
+f 676     1065     644
+f 821     1069     825
+f 1053     832     844
+f 821     1061     1069
+f 933     1062     1066
+f 1064     1068     917
+f 1058     851     1070
+f 644     1065     886
+f 825     921     828
+f 1058     844     851
+f 1069     921     825
+f 1067     1073     1029
+f 644     886     889
+f 927     828     921
+f 1062     933     937
+f 893     655     889
+f 1091     1095     952
+f 655     893     661
+f 1105     858     1005
+f 907     661     893
+f 1076     655     1081
+f 1083     1023     80
+f 1082     1084     396
+f 964     1078     972
+f 1077     1180     1084
+f 1082     1026     1077
+f 1084     1082     1077
+f 964     1074     1078
+f 1219     1013     1083
+f 925     1012     926
+f 1031     926     1012
+f 1031     1035     926
+f 1035     929     926
+f 1078     1087     1086
+f 1078     1086     1059
+f 1037     935     929
+f 935     1108     941
+f 907     700     661
+f 1091     677     644
+f 1162     908     1166
+f 644     1076     1091
+f 905     908     1162
+f 1095     1091     1076
+f 895     1095     1076
+f 999     1126     1128
+f 710     704     919
+f 966     968     757
+f 909     1094     1097
+f 1266     1062     1072
+f 1003     1177     710
+f 1071     982     975
+f 887     1049     1099
+f 1177     996     1172
+f 986     982     1077
+f 1166     908     920
+f 1100     878     1114
+f 1113     1114     867
+f 1077     982     1071
+f 704     700     912
+f 1077     1026     986
+f 871     1147     1140
+f 1026     994     986
+f 1147     873     1149
+f 704     912     919
+f 871     873     1147
+f 873     1080     1149
+f 1103     1104     894
+f 919     1003     710
+f 958     1119     962
+f 896     1104     1092
+f 1003     996     1177
+f 1006     1106     1107
+f 1119     1110     962
+f 1062     937     1072
+f 326     959     311
+f 935     1037     1108
+f 1075     1072     939
+f 194     311     748
+f 1146     1036     1141
+f 937     939     1072
+f 998     910     1241
+f 1146     1042     1036
+f 192     194     748
+f 1075     939     948
+f 1047     1042     1150
+f 1109     1113     866
+f 760     761     1168
+f 867     866     1113
+f 1112     941     1108
+f 1100     1015     875
+f 1109     1102     1113
+f 1114     1113     1102
+f 1094     1090     189
+f 1116     958     953
+f 1102     1100     1114
+f 1269     1040     1052
+f 1116     953     1112
+f 198     1094     189
+f 1024     1115     1143
+f 1117     96     1118
+f 1097     203     1064
+f 1119     958     1116
+f 1118     1120     1117
+f 1157     900     905
+f 1119     1116     1263
+f 1115     970     1122
+f 1115     961     970
+f 24     1074     1068
+f 948     1124     1075
+f 950     1126     999
+f 1110     1126     950
+f 1127     1123     1225
+f 1103     1099     170
+f 911     1163     960
+f 1127     1225     1231
+f 1127     1231     910
+f 61     547     1205
+f 990     1130     1122
+f 183     1092     140
+f 1104     140     1092
+f 1129     1122     1130
+f 1092     183     1090
+f 189     1090     183
+f 170     176     1103
+f 1133     1129     1281
+f 176     1104     1103
+f 176     140     1104
+f 973     1063     975
+f 1159     40     299
+f 1132     1063     973
+f 1159     1087     1078
+f 1130     1281     1129
+f 1063     1071     975
+f 915     1144     1179
+f 61     57     1033
+f 1179     911     915
+f 58     72     57
+f 1148     1144     918
+f 1136     1302     1134
+f 1137     1033     57
+f 1134     699     692
+f 72     1138     57
+f 1138     72     1141
+f 918     1046     1148
+f 1136     1134     1139
+f 259     262     1141
+f 1205     51     16
+f 1138     1141     1137
+f 1137     1141     1033
+f 1030     1136     1139
+f 866     868     1109
+f 1141     1036     1033
+f 1139     1031     1030
+f 1109     868     1196
+f 644     655     1076
+f 1141     262     1146
+f 703     708     1145
+f 1146     262     269
+f 661     1088     1081
+f 703     1145     1139
+f 1232     1079     1055
+f 1079     1232     1240
+f 1240     1085     1079
+f 1042     1146     1150
+f 915     918     1144
+f 1146     269     1150
+f 1049     1047     108
+f 1150     108     1047
+f 1150     274     108
+f 1049     108     166
+f 1099     1049     166
+f 812     1255     1153
+f 166     170     1099
+f 1173     1175     989
+f 1107     1106     1190
+f 415     1182     441
+f 102     316     43
+f 300     43     315
+f 1074     21     1078
+f 21     1074     24
+f 1157     905     1162
+f 300     299     40
+f 1158     954     960
+f 40     43     300
+f 1207     190     1161
+f 1158     1155     954
+f 743     441     1182
+f 1078     21     1159
+f 1158     960     1163
+f 1156     1161     1160
+f 1160     1135     1156
+f 1097     1094     198
+f 167     165     1135
+f 125     1135     165
+f 1206     1093     1089
+f 198     203     1097
+f 112     1156     1135
+f 1064     203     1165
+f 25     430     1342
+f 1165     1068     1064
+f 314     1165     203
+f 1342     1117     1120
+f 1165     24     1068
+f 1121     1211     17
+f 96     87     1169
+f 1165     314     322
+f 1118     96     1169
+f 1115     1122     1214
+f 1118     1169     1120
+f 949     1155     1168
+f 1214     1122     1224
+f 989     760     1168
+f 1168     968     949
+f 24     1165     322
+f 1174     850     1171
+f 1168     1173     989
+f 1122     1129     1133
+f 1122     1133     1224
+f 1120     1221     1123
+f 1168     1155     1173
+f 1170     1063     1167
+f 1057     1107     1197
+f 1123     1221     1225
+f 1154     1152     1272
+f 1330     1051     1176
+f 1176     1174     1157
+f 1174     1171     1157
+f 1176     1157     1330
+f 1175     1173     1155
+f 661     1081     655
+f 920     1140     1166
+f 700     1088     661
+f 920     871     1140
+f 1060     1252     1257
+f 1088     700     1096
+f 1067     1060     1257
+f 1261     1073     1067
+f 1163     911     1179
+f 1257     1261     1067
+f 1180     1182     414
+f 1171     900     1157
+f 1235     1055     1073
+f 916     900     1171
+f 1193     843     846
+f 1172     1044     1181
+f 843     1196     868
+f 415     414     1182
+f 948     985     1124
+f 985     1125     1124
+f 743     1182     1178
+f 1182     1180     1178
+f 1139     1134     692
+f 1189     1233     1222
+f 703     1139     692
+f 1181     1048     1183
+f 1109     1184     1102
+f 1044     1048     1181
+f 1192     1185     1189
+f 1145     708     1151
+f 1109     1196     1184
+f 1184     49     1188
+f 985     1186     1125
+f 1151     708     764
+f 700     704     1096
+f 1144     1148     42
+f 1186     985     1010
+f 1151     764     1153
+f 1102     1184     1188
+f 704     1236     1096
+f 1187     1183     1009
+f 1186     1019     1189
+f 764     812     1153
+f 1102     1188     1106
+f 1186     1010     1019
+f 69     738     375
+f 290     294     738
+f 1190     1106     1188
+f 1048     1009     1183
+f 1019     1004     1189
+f 786     523     68
+f 1222     1186     1189
+f 1197     1107     1190
+f 1187     1009     1017
+f 422     1083     80
+f 1189     1004     1194
+f 1191     1187     1017
+f 1196     843     1195
+f 80     810     68
+f 1023     1025     80
+f 1189     1194     1192
+f 885     895     1076
+f 1192     96     1185
+f 1193     1195     843
+f 1191     1017     1024
+f 885     1081     888
+f 1194     1004     96
+f 1191     1024     1143
+f 1076     1081     885
+f 1251     841     1164
+f 106     428     7
+f 1214     1143     1115
+f 1088     888     1081
+f 1290     92     742
+f 92     816     742
+f 742     816     776
+f 6     107     7
+f 106     7     107
+f 891     888     1088
+f 1167     1164     722
+f 775     776     816
+f 107     6     104
+f 891     1096     988
+f 430     1185     96
+f 1244     1185     430
+f 1195     1184     1196
+f 1088     1096     891
+f 1192     1194     96
+f 1164     841     722
+f 1167     724     1170
+f 5     104     6
+f 1201     103     4
+f 660     1203     659
+f 3     1204     4
+f 1178     1170     728
+f 126     478     687
+f 687     478     686
+f 687     689     126
+f 129     126     689
+f 1172     1181     1220
+f 728     743     1178
+f 660     659     39
+f 658     111     689
+f 129     689     111
+f 658     659     111
+f 1202     659     1203
+f 1199     1200     662
+f 662     1200     660
+f 61     1227     1229
+f 1229     123     61
+f 662     660     39
+f 1069     1061     1098
+f 1050     1330     1199
+f 103     104     5
+f 1098     1101     1069
+f 5     4     103
+f 327     407     959
+f 1101     921     1069
+f 1227     61     1205
+f 1255     812     814
+f 326     327     959
+f 18     20     1200
+f 1356     245     1204
+f 16     1218     1205
+f 1200     20     1203
+f 921     1288     927
+f 1288     921     1101
+f 1234     1237     1299
+f 1355     1356     1198
+f 193     190     1207
+f 1130     990     1281
+f 1208     1281     990
+f 1208     789     1111
+f 1226     1083     422
+f 1035     1031     1145
+f 1139     1145     1031
+f 1209     1121     1093
+f 1145     1151     1035
+f 1156     1207     1161
+f 1083     1226     1219
+f 1093     1206     1209
+f 1037     1153     1108
+f 92     1210     87
+f 1151     1153     1037
+f 1072     1270     1266
+f 1096     1236     988
+f 1270     1072     1075
+f 1211     1121     1209
+f 1216     87     1215
+f 87     1216     1169
+f 1210     1213     1215
+f 1131     1128     1251
+f 1169     1213     1120
+f 1005     1212     1105
+f 1132     1131     1164
+f 1212     1013     1219
+f 1164     1167     1132
+f 1212     1005     1013
+f 1216     1213     1169
+f 1215     1213     1216
+f 1063     1132     1167
+f 1213     1297     1221
+f 1170     1071     1063
+f 1275     1124     1125
+f 1181     1217     1220
+f 1213     1221     1120
+f 1054     1217     1223
+f 17     1218     16
+f 1170     1178     1071
+f 1211     148     1218
+f 1178     1077     1071
+f 1223     1056     1054
+f 1222     1125     1186
+f 17     1211     1218
+f 1178     1180     1077
+f 1183     1223     1217
+f 57     61     123
+f 1181     1183     1217
+f 1061     1056     1230
+f 1228     1219     1226
+f 1218     123     1229
+f 1223     1230     1056
+f 1225     1303     1231
+f 1227     1205     1218
+f 1229     1227     1218
+f 1112     1259     1116
+f 1228     1247     1219
+f 1230     1098     1061
+f 1232     1055     1235
+f 1295     1105     1212
+f 1231     1303     1308
+f 1133     1299     1237
+f 1247     1228     422
+f 1237     1224     1133
+f 1234     1256     1224
+f 1237     1234     1224
+f 1241     910     1231
+f 1187     1191     1230
+f 1233     1244     1242
+f 1126     1239     1246
+f 1245     1236     710
+f 1308     1241     1231
+f 1226     422     1228
+f 1248     1089     1085
+f 998     1241     1249
+f 1085     1240     1248
+f 1098     1191     1143
+f 829     1246     1239
+f 1041     1250     1045
+f 1101     1143     1214
+f 1249     1001     998
+f 1248     1206     1089
+f 1242     1244     430
+f 422     748     1247
+f 1041     1245     1250
+f 1128     1246     1251
+f 710     1177     1245
+f 829     835     1246
+f 1233     1185     1244
+f 1246     835     1251
+f 1060     1057     1252
+f 1245     1177     1250
+f 1253     1001     1249
+f 1185     1233     1189
+f 1131     1251     1164
+f 1249     1312     1253
+f 1045     1250     1220
+f 1252     1057     1197
+f 1253     1007     1001
+f 1288     1214     1224
+f 1254     1062     1266
+f 1112     1108     1255
+f 1254     1066     1062
+f 1238     1007     1253
+f 1108     1153     1255
+f 1267     1052     1053
+f 1140     1337     1166
+f 940     1238     1258
+f 1271     1058     1274
+f 1259     1112     1255
+f 1140     1147     35
+f 1255     814     1259
+f 48     1147     1149
+f 1274     1058     1070
+f 1172     1220     1250
+f 1172     1250     1177
+f 987     1217     1054
+f 55     1262     252
+f 1220     1217     987
+f 55     1149     1262
+f 1070     1105     1291
+f 1116     1259     1263
+f 1011     1260     1014
+f 1262     1149     1080
+f 1011     1258     1260
+f 1235     1073     1261
+f 1261     197     1235
+f 1264     1018     1014
+f 1259     823     1263
+f 1254     252     1262
+f 1264     1014     1260
+f 1045     1220     987
+f 1080     1066     1262
+f 1265     1018     1264
+f 1101     1098     1143
+f 1184     1195     1193
+f 1066     1254     1262
+f 23     1184     1193
+f 58     759     255
+f 1265     1021     1018
+f 23     49     1184
+f 793     826     1243
+f 374     255     759
+f 1188     54     1190
+f 1239     1243     826
+f 51     547     50
+f 1188     49     54
+f 51     1205     547
+f 826     829     1239
+f 153     155     57
+f 54     199     1190
+f 1268     1152     1021
+f 253     1269     1267
+f 1021     1265     1268
+f 1259     814     823
+f 793     1243     1263
+f 1052     1267     1269
+f 793     1263     823
+f 1110     1239     1126
+f 1187     1223     1183
+f 1272     1152     1268
+f 1239     1110     1243
+f 238     1240     227
+f 1187     1230     1223
+f 635     105     633
+f 1240     238     1248
+f 1126     1246     1128
+f 1230     1191     1098
+f 1271     1267     1053
+f 637     1277     636
+f 1248     241     1206
+f 238     241     1248
+f 641     70     649
+f 1263     1243     1119
+f 1206     241     131
+f 575     576     628
+f 1119     1243     1110
+f 1209     1206     131
+f 628     630     575
+f 118     575     630
+f 1053     1058     1271
+f 633     120     630
+f 118     630     120
+f 195     193     1282
+f 1209     131     135
+f 1282     1283     195
+f 1101     1214     1288
+f 286     1275     285
+f 135     1211     1209
+f 1279     105     635
+f 120     633     105
+f 286     281     1273
+f 286     1273     1275
+f 1280     1279     636
+f 1274     1070     1291
+f 636     1277     1280
+f 1211     135     148
+f 666     649     668
+f 1218     148     123
+f 514     643     1111
+f 1197     1190     199
+f 1208     1111     643
+f 1278     53     1148
+f 1154     1276     1156
+f 882     1301     1277
+f 876     1016     883
+f 882     883     1301
+f 199     205     1197
+f 635     636     1279
+f 1148     1046     1278
+f 1234     1299     1256
+f 1252     1197     205
+f 261     1278     1269
+f 1277     1296     1280
+f 1299     1133     1281
+f 1257     1252     208
+f 1154     1272     1276
+f 1280     169     1279
+f 1207     1282     193
+f 662     39     664
+f 205     208     1252
+f 1256     1302     1298
+f 208     219     1257
+f 1288     1224     1294
+f 1278     1040     1269
+f 1261     1257     219
+f 1046     1040     1278
+f 1276     1340     1283
+f 1156     1276     1207
+f 1282     1207     1276
+f 997     989     1175
+f 1351     1355     995
+f 1283     1282     1276
+f 219     197     1261
+f 1266     263     240
+f 226     1235     197
+f 92     1290     1210
+f 1266     240     1254
+f 1235     226     1232
+f 1266     1270     263
+f 1210     1215     87
+f 1240     1232     227
+f 1270     271     263
+f 1290     1292     1210
+f 226     227     1232
+f 1213     1210     1292
+f 1270     1273     271
+f 1351     997     1175
+f 1292     1297     1213
+f 1293     239     179
+f 35     1337     1140
+f 179     1289     1293
+f 1294     1224     1256
+f 1288     1294     927
+f 1300     1221     1297
+f 48     35     1147
+f 1290     742     1292
+f 1284     169     1296
+f 771     1292     742
+f 169     1280     1296
+f 48     1149     55
+f 1291     1105     1295
+f 1298     1302     1142
+f 113     214     1295
+f 1300     1225     1221
+f 158     1287     175
+f 1294     1256     1298
+f 1287     158     1286
+f 1287     1289     175
+f 1298     1142     1294
+f 1016     1301     883
+f 1292     771     782
+f 1296     1301     1284
+f 1297     1292     782
+f 1284     1301     1002
+f 525     1281     519
+f 1208     519     1281
+f 1006     1285     1284
+f 1006     1008     1285
+f 1297     782     785
+f 1302     1256     1299
+f 252     1254     240
+f 1300     1297     785
+f 113     1295     1212
+f 1299     1281     645
+f 528     645     1281
+f 1273     1270     1075
+f 525     528     1281
+f 1296     1277     1301
+f 1273     1124     1275
+f 1300     785     792
+f 134     1247     748
+f 946     947     1305
+f 1300     792     1303
+f 1124     1273     1075
+f 113     1212     1219
+f 1286     1020     1287
+f 1303     1225     1300
+f 1299     1134     1302
+f 113     1219     127
+f 1307     884     1305
+f 1308     1303     803
+f 792     803     1303
+f 1030     1034     1305
+f 1032     1305     1034
+f 1289     1029     1293
+f 1302     1030     1305
+f 1136     1030     1302
+f 1314     402     1293
+f 1318     1306     819
+f 925     1307     1012
+f 1309     397     402
+f 1306     800     819
+f 1307     1305     1012
+f 1032     1012     1305
+f 1318     1238     1306
+f 1307     852     759
+f 374     759     852
+f 1317     1320     434
+f 1219     1247     127
+f 924     852     1307
+f 1238     1253     1306
+f 1253     1312     1306
+f 206     157     1311
+f 1222     178     1304
+f 1311     1315     206
+f 1247     134     127
+f 1284     1002     1006
+f 1285     1008     1286
+f 1304     1275     1125
+f 1020     1286     1008
+f 1313     157     191
+f 803     806     1308
+f 1125     1222     1304
+f 1308     806     1310
+f 1241     1310     1249
+f 1001     1007     1321
+f 1287     1022     1289
+f 1323     1007     940
+f 1287     1020     1022
+f 1289     1022     1029
+f 1310     1241     1308
+f 1312     1310     809
+f 1344     1158     1163
+f 1314     1293     1029
+f 1233     187     178
+f 1310     806     809
+f 1029     1073     1314
+f 402     1314     1309
+f 1233     178     1222
+f 1249     1310     1312
+f 1073     1055     1314
+f 809     800     1312
+f 1055     1309     1314
+f 1312     800     1306
+f 430     738     1242
+f 1317     397     1309
+f 1018     1021     1316
+f 253     1267     275
+f 1233     1242     187
+f 1267     1271     275
+f 998     1001     1319
+f 1318     1258     1238
+f 1324     439     1320
+f 819     824     1318
+f 1001     1321     1319
+f 1318     824     1322
+f 1322     1260     1258
+f 287     280     1274
+f 1324     1325     1326
+f 1274     280     1271
+f 1324     1320     1325
+f 1089     1325     1320
+f 1162     1333     1157
+f 1318     1322     1258
+f 1324     1326     1328
+f 1324     1328     1327
+f 1329     1322     830
+f 1326     1325     1328
+f 1274     1291     287
+f 295     284     19
+f 1051     1043     1176
+f 464     1331     471
+f 295     45     303
+f 464     1327     1331
+f 306     303     52
+f 824     830     1322
+f 1157     1333     1330
+f 52     303     45
+f 1329     1260     1322
+f 1331     508     471
+f 306     52     56
+f 1264     1260     1329
+f 56     152     306
+f 830     836     1329
+f 1329     836     1332
+f 157     1313     1311
+f 1200     1199     18
+f 206     1315     211
+f 1332     1264     1329
+f 1315     1316     211
+f 508     16     516
+f 380     1203     20
+f 836     842     1332
+f 1179     1144     22
+f 16     1339     516
+f 224     71     320
+f 380     1333     323
+f 71     224     67
+f 101     320     71
+f 1264     1332     1265
+f 320     101     330
+f 1148     53     42
+f 18     1330     1333
+f 20     18     1333
+f 1317     1309     1079
+f 330     101     112
+f 1055     1079     1309
+f 1333     380     20
+f 1079     1085     1317
+f 152     56     1319
+f 1334     1332     842
+f 1085     1320     1317
+f 1265     1332     1334
+f 159     1321     181
+f 1085     1089     1320
+f 1335     1333     1162
+f 1321     159     1319
+f 191     181     1323
+f 1335     1162     1166
+f 1334     1268     1265
+f 1321     1323     181
+f 1323     1313     191
+f 323     1333     1335
+f 1334     845     1336
+f 19     1123     45
+f 842     845     1334
+f 19     1120     1123
+f 45     1123     1127
+f 1268     1334     1336
+f 1337     334     1335
+f 56     52     910
+f 910     52     1127
+f 1339     534     516
+f 1337     338     334
+f 998     56     910
+f 1336     1272     1268
+f 1011     1313     940
+f 1313     1323     940
+f 1337     1335     1166
+f 261     53     1278
+f 1011     1311     1313
+f 35     338     1337
+f 1328     1093     1327
+f 1315     1311     1014
+f 1093     1328     1089
+f 1325     1089     1328
+f 1311     1011     1014
+f 1327     1121     1331
+f 1316     1315     1018
+f 1093     1121     1327
+f 17     508     1331
+f 1315     1014     1018
+f 1330     18     1199
+f 1338     1272     1336
+f 67     1316     1021
+f 547     816     1339
+f 1276     1272     1338
+f 1269     253     261
+f 508     17     16
+f 217     216     1341
+f 1340     1341     216
+f 1021     1152     67
+f 217     1341     234
+f 71     67     1152
+f 1339     50     547
+f 1276     1338     1340
+f 101     71     1154
+f 855     1338     1336
+f 1152     1154     71
+f 1156     101     1154
+f 112     101     1156
+f 861     235     1341
+f 1121     17     1331
+f 1341     1340     1338
+f 998     1319     56
+f 855     861     1338
+f 1341     1338     861
+f 1155     1351     1175
+f 1321     1007     1323
+f 781     1347     68
+f 96     1342     430
+f 25     284     430
+f 1117     1342     96
+f 1155     1343     1351
+f 19     1342     1120
+f 25     1342     19
+f 802     807     1354
+f 804     869     1357
+f 1155     1158     1343
+f 1359     856     860
+f 1343     1158     1344
+f 880     1364     874
+f 353     1343     1344
+f 353     1344     358
+f 358     1344     1345
+f 1345     1344     1163
+f 98     613     906
+f 1163     1179     1345
+f 906     1346     897
+f 80     68     1347
+f 1349     80     1348
+f 1347     1348     80
+f 1347     781     783
+f 783     1350     1347
+f 1348     1347     1350
+f 1352     1350     788
+f 1198     995     1355
+f 1348     1350     1349
+f 788     798     1352
+f 1352     798     1353
+f 1353     802     1354
+f 1353     798     802
+f 1201     1204     245
+f 1351     1343     1355
+f 1357     869     1361
+f 1356     1355     1343
+f 1354     807     1358
+f 1179     22     1345
+f 856     1359     1358
+f 856     1358     807
+f 22     1144     42
+f 1360     1359     860
+f 1374     1360     1357
+f 1360     804     1357
+f 860     804     1360
+f 1204     1198     1356
+f 870     1362     1361
+f 869     870     1361
+f 1356     1343     245
+f 1362     872     1363
+f 874     1364     1363
+f 872     874     1363
+f 880     1365     1364
+f 1365     881     1366
+f 1365     880     881
+f 1366     881     897
+f 1346     1369     1367
+f 897     1346     1367
+f 1366     897     1367
+f 1368     1369     606
+f 1346     616     1369
+f 1369     1368     1367
+f 1349     1379     80
+f 1350     1352     1381
+f 1365     1378     1364
+f 1368     1380     1367
+f 1354     1358     1370
+f 1371     132     164
+f 1361     1371     1357
+f 1371     1361     1375
+f 1358     1372     1370
+f 1358     1359     1372
+f 1372     1359     1373
+f 1374     156     1373
+f 1373     1360     1374
+f 1373     1359     1360
+f 164     1374     1371
+f 1374     1357     1371
+f 1371     1375     132
+f 1375     130     132
+f 1376     142     130
+f 1376     130     1375
+f 1361     1362     1375
+f 1375     1362     1376
+f 1377     142     1376
+f 1376     1363     1377
+f 1362     1363     1376
+f 146     1377     1378
+f 1377     1364     1378
+f 1377     1363     1364
+f 422     80     1379
+f 1382     422     1379
+f 1379     1349     1350
+f 1350     1381     1379
+f 1387     1366     1367
+f 1380     1368     604
+f 606     604     1368
+f 1380     1389     1388
+f 1367     1380     1388
+f 192     748     177
+f 177     1381     184
+f 1379     1381     1382
+f 1383     1382     1381
+f 1383     1381     177
+f 1384     1352     1353
+f 1384     196     184
+f 1381     1352     1384
+f 184     1381     1384
+f 196     1384     1385
+f 1384     1353     1385
+f 1385     1354     1370
+f 1385     1353     1354
+f 1372     207     1370
+f 151     207     1372
+f 151     1373     156
+f 1373     151     1372
+f 142     1377     146
+f 1378     218     146
+f 1378     1386     218
+f 1378     1365     1386
+f 1386     1387     221
+f 1365     1366     1386
+f 1386     1366     1387
+f 1387     1367     1388
+f 223     1387     1388
+f 1388     231     223
+f 1390     1388     1389
+f 231     1388     47
+f 1390     47     1388
+f 1387     223     221
+# 2776 faces
+
+ #end of obj_0
+
diff --git a/diffusion_policy/env/block_pushing/assets/workspace.urdf b/diffusion_policy/env/block_pushing/assets/workspace.urdf
new file mode 100644
index 0000000..0c888dc
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/workspace.urdf
@@ -0,0 +1,30 @@
+<?xml version="0.0" ?>
+<robot name="workspace.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0"/>
+      <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="plane.obj" scale="0.0167 0.0333 1"/>
+      </geometry>
+      <material name="DarkGrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <!-- <collision>
+      <origin rpy="0 0 0" xyz="0 0 -0.005"/>
+      <geometry>
+        <box size="2.0 3.0 0.01"/>
+      </geometry>
+    </collision> -->
+  </link>
+</robot>
+
diff --git a/diffusion_policy/env/block_pushing/assets/workspace_real.urdf b/diffusion_policy/env/block_pushing/assets/workspace_real.urdf
new file mode 100644
index 0000000..2cc2d1d
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/workspace_real.urdf
@@ -0,0 +1,29 @@
+<?xml version="0.0" ?>
+<robot name="workspace.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <mass value="0"/>
+      <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="plane.obj" scale="0.01524 0.02032 1"/>
+      </geometry>
+      <material name="DarkGrey">
+        <color rgba="0.2 0.2 0.2 1"/>
+      </material>
+    </visual>
+    <!-- <collision>
+      <origin rpy="0 0 0" xyz="0 0 -0.005"/>
+      <geometry>
+        <box size="2.0 3.0 0.01"/>
+      </geometry>
+    </collision> -->
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/zone.obj b/diffusion_policy/env/block_pushing/assets/zone.obj
new file mode 100644
index 0000000..b985550
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/zone.obj
@@ -0,0 +1,64 @@
+# Object Export From Tinkercad Server 2015
+
+mtllib obj.mtl
+
+o obj_0
+v 10         -10         20
+v 10         -10         0
+v 10         10         0
+v 10         10         20
+v 9.002         9.003         20
+v 9.002         -9.002         20
+v -10         10         0
+v -10         10         20
+v -9.003         9.003         20
+v -9.003         9.003         0
+v 9.002         9.003         0
+v 9.002         -9.002         0
+v -9.003         -9.002         0
+v -9.003         -9.002         20
+v -10         -10         0
+v -10         -10         20
+# 16 vertices
+
+g group_0_15277357
+
+usemtl color_15277357
+s 0
+
+f 1     2     3
+f 1     3     4
+f 4     5     6
+f 4     6     1
+f 9     10     11
+f 9     11     5
+f 6     12     13
+f 6     13     14
+f 10     9     14
+f 10     14     13
+f 7     10     13
+f 7     13     15
+f 4     8     5
+f 9     5     8
+f 8     7     15
+f 8     15     16
+f 10     7     11
+f 3     11     7
+f 11     3     12
+f 2     12     3
+f 14     16     6
+f 1     6     16
+f 16     15     2
+f 16     2     1
+f 9     8     14
+f 16     14     8
+f 7     8     3
+f 4     3     8
+f 2     15     12
+f 13     12     15
+f 12     6     5
+f 12     5     11
+# 32 faces
+
+ #end of obj_0
+
diff --git a/diffusion_policy/env/block_pushing/assets/zone.urdf b/diffusion_policy/env/block_pushing/assets/zone.urdf
new file mode 100644
index 0000000..7cc2395
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/zone.urdf
@@ -0,0 +1,23 @@
+<?xml version="1.0" ?>
+<robot name="zone.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.01 0 0.02"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="zone.obj" scale="0.006 0.006 0.00005"/>
+      </geometry>
+      <material name="green">
+        <color rgba="0 1 0 1"/>
+      </material>
+    </visual>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/assets/zone2.urdf b/diffusion_policy/env/block_pushing/assets/zone2.urdf
new file mode 100644
index 0000000..672edaf
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/assets/zone2.urdf
@@ -0,0 +1,23 @@
+<?xml version="1.0" ?>
+<robot name="zone2.urdf">
+  <link name="baseLink">
+    <contact>
+      <lateral_friction value="1.0"/>
+      <inertia_scaling value="3.0"/>
+    </contact>
+    <inertial>
+      <origin rpy="0 0 0" xyz="-0.01 0 0.02"/>
+       <mass value=".1"/>
+       <inertia ixx="1" ixy="0" ixz="0" iyy="1" iyz="0" izz="1"/>
+    </inertial>
+    <visual>
+      <origin rpy="0 0 0" xyz="0 0 0"/>
+      <geometry>
+        <mesh filename="zone.obj" scale="0.006 0.006 0.00005"/>
+      </geometry>
+      <material name="red">
+        <color rgba="1 0 0 1"/>
+      </material>
+    </visual>
+  </link>
+</robot>
diff --git a/diffusion_policy/env/block_pushing/block_pushing.py b/diffusion_policy/env/block_pushing/block_pushing.py
new file mode 100644
index 0000000..8cc52ba
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/block_pushing.py
@@ -0,0 +1,1092 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Simple block environments for the XArm."""
+
+import collections
+import enum
+import math
+import time
+from typing import Dict, List, Optional, Tuple, Union
+
+import gym
+from gym import spaces
+from gym.envs import registration
+from diffusion_policy.env.block_pushing.utils import utils_pybullet
+from diffusion_policy.env.block_pushing.utils import xarm_sim_robot
+from diffusion_policy.env.block_pushing.utils.pose3d import Pose3d
+from diffusion_policy.env.block_pushing.utils.utils_pybullet import ObjState
+from diffusion_policy.env.block_pushing.utils.utils_pybullet import XarmState
+import numpy as np
+from scipy.spatial import transform
+import pybullet
+import pybullet_utils.bullet_client as bullet_client
+
+import matplotlib.pyplot as plt
+
+BLOCK_URDF_PATH = "third_party/py/envs/assets/block.urdf"
+PLANE_URDF_PATH = "third_party/bullet/examples/pybullet/gym/pybullet_data/" "plane.urdf"
+WORKSPACE_URDF_PATH = "third_party/py/envs/assets/workspace.urdf"
+ZONE_URDF_PATH = "third_party/py/envs/assets/zone.urdf"
+INSERT_URDF_PATH = "third_party/py/envs/assets/insert.urdf"
+
+EFFECTOR_HEIGHT = 0.06
+EFFECTOR_DOWN_ROTATION = transform.Rotation.from_rotvec([0, math.pi, 0])
+
+WORKSPACE_BOUNDS = np.array(((0.15, -0.5), (0.7, 0.5)))
+
+# Min/max bounds calculated from oracle data using:
+# ibc/environments/board2d_dataset_statistics.ipynb
+# to calculate [mean - 3 * std, mean + 3 * std] using the oracle data.
+# pylint: disable=line-too-long
+ACTION_MIN = np.array([-0.02547718, -0.02090043], np.float32)
+ACTION_MAX = np.array([0.02869084, 0.04272365], np.float32)
+EFFECTOR_TARGET_TRANSLATION_MIN = np.array(
+    [0.1774151772260666, -0.6287994794547558], np.float32
+)
+EFFECTOR_TARGET_TRANSLATION_MAX = np.array(
+    [0.5654461532831192, 0.5441607423126698], np.float32
+)
+EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MIN = np.array(
+    [-0.07369826920330524, -0.11395704373717308], np.float32
+)
+EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MAX = np.array(
+    [0.10131562314927578, 0.19391131028532982], np.float32
+)
+EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MIN = np.array(
+    [-0.17813862301409245, -0.3309651017189026], np.float32
+)
+EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MAX = np.array(
+    [0.23726161383092403, 0.8404090404510498], np.float32
+)
+BLOCK_ORIENTATION_COS_SIN_MIN = np.array(
+    [-2.0649861991405487, -0.6154364347457886], np.float32
+)
+BLOCK_ORIENTATION_COS_SIN_MAX = np.array(
+    [1.6590178310871124, 1.8811014890670776], np.float32
+)
+TARGET_ORIENTATION_COS_SIN_MIN = np.array(
+    [-1.0761439241468906, -0.8846937336493284], np.float32
+)
+TARGET_ORIENTATION_COS_SIN_MAX = np.array(
+    [-0.8344330154359341, 0.8786859593819827], np.float32
+)
+
+# Hardcoded Pose joints to make sure we don't have surprises from using the
+# IK solver on reset. The joint poses correspond to the Pose with:
+#   rotation = rotation3.Rotation3.from_axis_angle([0, 1, 0], math.pi)
+#   translation = np.array([0.3, -0.4, 0.07])
+INITIAL_JOINT_POSITIONS = np.array(
+    [
+        -0.9254632489674508,
+        0.6990770671568564,
+        -1.106629064060494,
+        0.0006653351931553931,
+        0.3987969742311386,
+        -4.063402065624296,
+    ]
+)
+
+DEFAULT_CAMERA_POSE = (1.0, 0, 0.75)
+DEFAULT_CAMERA_ORIENTATION = (np.pi / 4, np.pi, -np.pi / 2)
+IMAGE_WIDTH = 320
+IMAGE_HEIGHT = 240
+CAMERA_INTRINSICS = (
+    0.803 * IMAGE_WIDTH,  # fx
+    0,
+    IMAGE_WIDTH / 2.0,  # cx
+    0,
+    0.803 * IMAGE_WIDTH,  # fy
+    IMAGE_HEIGHT / 2.0,  # cy
+    0,
+    0,
+    1,
+)
+
+# "Realistic" visuals.
+X_MIN_REAL = 0.15
+X_MAX_REAL = 0.6
+Y_MIN_REAL = -0.3048
+Y_MAX_REAL = 0.3048
+WORKSPACE_BOUNDS_REAL = np.array(((X_MIN_REAL, Y_MIN_REAL), (X_MAX_REAL, Y_MAX_REAL)))
+WORKSPACE_URDF_PATH_REAL = "third_party/py/ibc/environments/assets/workspace_real.urdf"
+CAMERA_POSE_REAL = (0.75, 0, 0.5)
+CAMERA_ORIENTATION_REAL = (np.pi / 5, np.pi, -np.pi / 2)
+
+IMAGE_WIDTH_REAL = 320
+IMAGE_HEIGHT_REAL = 180
+CAMERA_INTRINSICS_REAL = (
+    0.803 * IMAGE_WIDTH_REAL,  # fx
+    0,
+    IMAGE_WIDTH_REAL / 2.0,  # cx
+    0,
+    0.803 * IMAGE_WIDTH_REAL,  # fy
+    IMAGE_HEIGHT_REAL / 2.0,  # cy
+    0,
+    0,
+    1,
+)
+# pylint: enable=line-too-long
+
+
+def build_env_name(task, shared_memory, use_image_obs, use_normalized_env=False):
+    """Construct the env name from parameters."""
+    if isinstance(task, str):
+        task = BlockTaskVariant[task]
+    env_name = "Block" + task.value
+
+    if use_image_obs:
+        env_name = env_name + "Rgb"
+    if use_normalized_env:
+        env_name = env_name + "Normalized"
+    if shared_memory:
+        env_name = "Shared" + env_name
+
+    env_name = env_name + "-v0"
+
+    return env_name
+
+
+class BlockTaskVariant(enum.Enum):
+    REACH = "Reach"
+    REACH_NORMALIZED = "ReachNormalized"
+    PUSH = "Push"
+    PUSH_NORMALIZED = "PushNormalized"
+    INSERT = "Insert"
+
+
+def sleep_spin(sleep_time_sec):
+    """Spin wait sleep. Avoids time.sleep accuracy issues on Windows."""
+    if sleep_time_sec <= 0:
+        return
+    t0 = time.perf_counter()
+    while time.perf_counter() - t0 < sleep_time_sec:
+        pass
+
+
+class BlockPush(gym.Env):
+    """Simple XArm environment for block pushing."""
+
+    def __init__(
+        self,
+        control_frequency=10.0,
+        task=BlockTaskVariant.PUSH,
+        image_size=None,
+        shared_memory=False,
+        seed=None,
+        goal_dist_tolerance=0.01,
+        effector_height=None,
+        visuals_mode="default",
+        abs_action=False
+    ):
+        """Creates an env instance.
+
+        Args:
+          control_frequency: Control frequency for the arm. Each env step will
+            advance the simulation by 1/control_frequency seconds.
+          task: enum for which task, see BlockTaskVariant enum.
+          image_size: Optional image size (height, width). If None, no image
+            observations will be used.
+          shared_memory: If True `pybullet.SHARED_MEMORY` is used to connect to
+            pybullet. Useful to debug.
+          seed: Optional seed for the environment.
+          goal_dist_tolerance: float, how far away from the goal to terminate.
+          effector_height: float, custom height for end effector.
+          visuals_mode: 'default' or 'real'.
+        """
+        # pybullet.connect(pybullet.GUI)
+        # pybullet.resetDebugVisualizerCamera(
+        #     cameraDistance=1.5,
+        #     cameraYaw=0,
+        #     cameraPitch=-40,
+        #     cameraTargetPosition=[0.55, -0.35, 0.2],
+        # )
+        if visuals_mode != "default" and visuals_mode != "real":
+            raise ValueError("visuals_mode must be `real` or `default`.")
+        self._task = task
+        self._connection_mode = pybullet.DIRECT
+        if shared_memory:
+            self._connection_mode = pybullet.SHARED_MEMORY
+
+        self.goal_dist_tolerance = goal_dist_tolerance
+
+        self.effector_height = effector_height or EFFECTOR_HEIGHT
+
+        self._visuals_mode = visuals_mode
+        if visuals_mode == "default":
+            self._camera_pose = DEFAULT_CAMERA_POSE
+            self._camera_orientation = DEFAULT_CAMERA_ORIENTATION
+            self.workspace_bounds = WORKSPACE_BOUNDS
+            self._image_size = image_size
+            self._camera_instrinsics = CAMERA_INTRINSICS
+            self._workspace_urdf_path = WORKSPACE_URDF_PATH
+        else:
+            self._camera_pose = CAMERA_POSE_REAL
+            self._camera_orientation = CAMERA_ORIENTATION_REAL
+            self.workspace_bounds = WORKSPACE_BOUNDS_REAL
+            self._image_size = image_size
+            self._camera_instrinsics = CAMERA_INTRINSICS_REAL
+            self._workspace_urdf_path = WORKSPACE_URDF_PATH_REAL
+
+        self.action_space = spaces.Box(low=-0.1, high=0.1, shape=(2,))  # x, y
+        self.observation_space = self._create_observation_space(image_size)
+
+        self._rng = np.random.RandomState(seed=seed)
+        self._block_ids = None
+        self._previous_state = None
+        self._robot = None
+        self._workspace_uid = None
+        self._target_id = None
+        self._target_pose = None
+        self._target_effector_pose = None
+        self._pybullet_client = None
+        self.reach_target_translation = None
+        self._setup_pybullet_scene()
+        self._saved_state = None
+
+        assert isinstance(self._pybullet_client, bullet_client.BulletClient)
+        self._control_frequency = control_frequency
+        self._step_frequency = (
+            1 / self._pybullet_client.getPhysicsEngineParameters()["fixedTimeStep"]
+        )
+
+        self._last_loop_time = None
+        self._last_loop_frame_sleep_time = None
+        if self._step_frequency % self._control_frequency != 0:
+            raise ValueError(
+                "Control frequency should be a multiple of the "
+                "configured Bullet TimeStep."
+            )
+        self._sim_steps_per_step = int(self._step_frequency / self._control_frequency)
+
+        self.rendered_img = None
+        self._abs_action = abs_action
+
+        # Use saved_state and restore to make reset safe as no simulation state has
+        # been updated at this state, but the assets are now loaded.
+        self.save_state()
+        self.reset()
+
+    @property
+    def pybullet_client(self):
+        return self._pybullet_client
+
+    @property
+    def robot(self):
+        return self._robot
+
+    @property
+    def workspace_uid(self):
+        return self._workspace_uid
+
+    @property
+    def target_effector_pose(self):
+        return self._target_effector_pose
+
+    @property
+    def target_pose(self):
+        return self._target_pose
+
+    @property
+    def control_frequency(self):
+        return self._control_frequency
+
+    @property
+    def connection_mode(self):
+        return self._connection_mode
+
+    def save_state(self):
+        self._saved_state = self._pybullet_client.saveState()
+
+    def set_goal_dist_tolerance(self, val):
+        self.goal_dist_tolerance = val
+
+    def get_control_frequency(self):
+        return self._control_frequency
+
+    def compute_state(self):
+        return self._compute_state()
+
+    def get_goal_translation(self):
+        """Return the translation component of the goal (2D)."""
+        if self._task == BlockTaskVariant.REACH:
+            return np.concatenate([self.reach_target_translation, [0]])
+        else:
+            return self._target_pose.translation if self._target_pose else None
+
+    def get_obj_ids(self):
+        return self._block_ids
+
+    def _setup_workspace_and_robot(self, end_effector="suction"):
+        self._pybullet_client.resetSimulation()
+        self._pybullet_client.configureDebugVisualizer(pybullet.COV_ENABLE_GUI, 0)
+        self._pybullet_client.setPhysicsEngineParameter(enableFileCaching=0)
+        self._pybullet_client.setGravity(0, 0, -9.8)
+
+        utils_pybullet.load_urdf(
+            self._pybullet_client, PLANE_URDF_PATH, basePosition=[0, 0, -0.001]
+        )
+        self._workspace_uid = utils_pybullet.load_urdf(
+            self._pybullet_client,
+            self._workspace_urdf_path,
+            basePosition=[0.35, 0, 0.0],
+        )
+
+        self._robot = xarm_sim_robot.XArmSimRobot(
+            self._pybullet_client,
+            initial_joint_positions=INITIAL_JOINT_POSITIONS,
+            end_effector=end_effector,
+            color="white" if self._visuals_mode == "real" else "default",
+        )
+
+    def _setup_pybullet_scene(self):
+        self._pybullet_client = bullet_client.BulletClient(self._connection_mode)
+
+        # Temporarily disable rendering to speed up loading URDFs.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)
+
+        self._setup_workspace_and_robot()
+
+        if self._task == BlockTaskVariant.INSERT:
+            target_urdf_path = INSERT_URDF_PATH
+        else:
+            target_urdf_path = ZONE_URDF_PATH
+
+        self._target_id = utils_pybullet.load_urdf(
+            self._pybullet_client, target_urdf_path, useFixedBase=True
+        )
+        self._block_ids = [
+            utils_pybullet.load_urdf(
+                self._pybullet_client, BLOCK_URDF_PATH, useFixedBase=False
+            )
+        ]
+
+        # Re-enable rendering.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
+
+        self.step_simulation_to_stabilize()
+
+    def step_simulation_to_stabilize(self, nsteps=100):
+        for _ in range(nsteps):
+            self._pybullet_client.stepSimulation()
+
+    def seed(self, seed=None):
+        self._rng = np.random.RandomState(seed=seed)
+
+    def _set_robot_target_effector_pose(self, pose):
+        self._target_effector_pose = pose
+        self._robot.set_target_effector_pose(pose)
+
+    def reset(self, reset_poses=True):
+        workspace_center_x = 0.4
+
+        if reset_poses:
+            self._pybullet_client.restoreState(self._saved_state)
+
+            rotation = transform.Rotation.from_rotvec([0, math.pi, 0])
+            translation = np.array([0.3, -0.4, self.effector_height])
+            starting_pose = Pose3d(rotation=rotation, translation=translation)
+            self._set_robot_target_effector_pose(starting_pose)
+
+            # Reset block pose.
+            block_x = workspace_center_x + self._rng.uniform(low=-0.1, high=0.1)
+            block_y = -0.2 + self._rng.uniform(low=-0.15, high=0.15)
+            block_translation = np.array([block_x, block_y, 0])
+            block_sampled_angle = self._rng.uniform(math.pi)
+            block_rotation = transform.Rotation.from_rotvec([0, 0, block_sampled_angle])
+
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._block_ids[0],
+                block_translation.tolist(),
+                block_rotation.as_quat().tolist(),
+            )
+
+            # Reset target pose.
+            target_x = workspace_center_x + self._rng.uniform(low=-0.10, high=0.10)
+            target_y = 0.2 + self._rng.uniform(low=-0.15, high=0.15)
+            target_translation = np.array([target_x, target_y, 0.020])
+
+            target_sampled_angle = math.pi + self._rng.uniform(
+                low=-math.pi / 6, high=math.pi / 6
+            )
+            target_rotation = transform.Rotation.from_rotvec(
+                [0, 0, target_sampled_angle]
+            )
+
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._target_id,
+                target_translation.tolist(),
+                target_rotation.as_quat().tolist(),
+            )
+        else:
+            (
+                target_translation,
+                target_orientation_quat,
+            ) = self._pybullet_client.getBasePositionAndOrientation(self._target_id)
+            target_rotation = transform.Rotation.from_quat(target_orientation_quat)
+            target_translation = np.array(target_translation)
+
+        self._target_pose = Pose3d(
+            rotation=target_rotation, translation=target_translation
+        )
+
+        if reset_poses:
+            self.step_simulation_to_stabilize()
+
+        state = self._compute_state()
+        self._previous_state = state
+
+        if self._task == BlockTaskVariant.REACH:
+            self._compute_reach_target(state)
+
+        self._init_goal_distance = self._compute_goal_distance(state)
+        init_goal_eps = 1e-7
+        assert self._init_goal_distance > init_goal_eps
+        self.best_fraction_reduced_goal_dist = 0.0
+
+        return state
+
+    def _compute_goal_distance(self, state):
+        goal_translation = self.get_goal_translation()
+        if self._task != BlockTaskVariant.REACH:
+            goal_distance = np.linalg.norm(
+                state["block_translation"] - goal_translation[0:2]
+            )
+        else:
+            goal_distance = np.linalg.norm(
+                state["effector_translation"] - goal_translation[0:2]
+            )
+        return goal_distance
+
+    def _compute_reach_target(self, state):
+        xy_block = state["block_translation"]
+        xy_target = state["target_translation"]
+
+        xy_block_to_target = xy_target - xy_block
+        xy_dir_block_to_target = (xy_block_to_target) / np.linalg.norm(
+            xy_block_to_target
+        )
+        self.reach_target_translation = xy_block + -1 * xy_dir_block_to_target * 0.05
+
+    def _compute_state(self):
+        effector_pose = self._robot.forward_kinematics()
+        block_position_and_orientation = (
+            self._pybullet_client.getBasePositionAndOrientation(self._block_ids[0])
+        )
+        block_pose = Pose3d(
+            rotation=transform.Rotation.from_quat(block_position_and_orientation[1]),
+            translation=block_position_and_orientation[0],
+        )
+
+        def _yaw_from_pose(pose):
+            return np.array([pose.rotation.as_euler("xyz", degrees=False)[-1]])
+
+        obs = collections.OrderedDict(
+            block_translation=block_pose.translation[0:2],
+            block_orientation=_yaw_from_pose(block_pose),
+            effector_translation=effector_pose.translation[0:2],
+            effector_target_translation=self._target_effector_pose.translation[0:2],
+            target_translation=self._target_pose.translation[0:2],
+            target_orientation=_yaw_from_pose(self._target_pose),
+        )
+        if self._image_size is not None:
+            obs["rgb"] = self._render_camera(self._image_size)
+        return obs
+
+    def _step_robot_and_sim(self, action):
+        """Steps the robot and pybullet sim."""
+        # Compute target_effector_pose by shifting the effector's pose by the
+        # action.
+        if self._abs_action:
+            target_effector_translation = np.array([action[0], action[1], 0])
+        else:
+            target_effector_translation = np.array(
+                self._target_effector_pose.translation
+            ) + np.array([action[0], action[1], 0])
+
+        target_effector_translation[0:2] = np.clip(
+            target_effector_translation[0:2],
+            self.workspace_bounds[0],
+            self.workspace_bounds[1],
+        )
+        target_effector_translation[-1] = self.effector_height
+        target_effector_pose = Pose3d(
+            rotation=EFFECTOR_DOWN_ROTATION, translation=target_effector_translation
+        )
+
+        self._set_robot_target_effector_pose(target_effector_pose)
+
+        # Update sleep time dynamically to stay near real-time.
+        frame_sleep_time = 0
+        if self._connection_mode == pybullet.SHARED_MEMORY:
+            cur_time = time.time()
+            if self._last_loop_time is not None:
+                # Calculate the total, non-sleeping time from the previous frame, this
+                # includes the actual step as well as any compute that happens in the
+                # caller thread (model inference, etc).
+                compute_time = (
+                    cur_time
+                    - self._last_loop_time
+                    - self._last_loop_frame_sleep_time * self._sim_steps_per_step
+                )
+                # Use this to calculate the current frame's total sleep time to ensure
+                # that env.step runs at policy rate. This is an estimate since the
+                # previous frame's compute time may not match the current frame.
+                total_sleep_time = max((1 / self._control_frequency) - compute_time, 0)
+                # Now spread this out over the inner sim steps. This doesn't change
+                # control in any way, but makes the animation appear smooth.
+                frame_sleep_time = total_sleep_time / self._sim_steps_per_step
+            else:
+                # No estimate of the previous frame's compute, assume it is zero.
+                frame_sleep_time = 1 / self._step_frequency
+
+            # Cache end of this loop time, to compute sleep time on next iteration.
+            self._last_loop_time = cur_time
+            self._last_loop_frame_sleep_time = frame_sleep_time
+
+        for _ in range(self._sim_steps_per_step):
+            if self._connection_mode == pybullet.SHARED_MEMORY:
+                sleep_spin(frame_sleep_time)
+            self._pybullet_client.stepSimulation()
+
+    def step(self, action):
+        self._step_robot_and_sim(action)
+
+        state = self._compute_state()
+
+        goal_distance = self._compute_goal_distance(state)
+        fraction_reduced_goal_distance = 1.0 - (
+            goal_distance / self._init_goal_distance
+        )
+        if fraction_reduced_goal_distance > self.best_fraction_reduced_goal_dist:
+            self.best_fraction_reduced_goal_dist = fraction_reduced_goal_distance
+
+        done = False
+        reward = self.best_fraction_reduced_goal_dist
+
+        # Terminate the episode if the block is close enough to the target.
+        if goal_distance < self.goal_dist_tolerance:
+            reward = 1.0
+            done = True
+
+        return state, reward, done, {}
+
+    @property
+    def succeeded(self):
+        state = self._compute_state()
+        goal_distance = self._compute_goal_distance(state)
+        if goal_distance < self.goal_dist_tolerance:
+            return True
+        return False
+
+    @property
+    def goal_distance(self):
+        state = self._compute_state()
+        return self._compute_goal_distance(state)
+
+    def render(self, mode="rgb_array"):
+        if self._image_size is not None:
+            image_size = self._image_size
+        else:
+            # This allows rendering even for state-only obs,
+            # for visualization.
+            image_size = (IMAGE_HEIGHT, IMAGE_WIDTH)
+
+        data = self._render_camera(image_size=(image_size[0], image_size[1]))
+        if mode == "human":
+            if self.rendered_img is None:
+                self.rendered_img = plt.imshow(
+                    np.zeros((image_size[0], image_size[1], 4))
+                )
+            else:
+                self.rendered_img.set_data(data)
+            plt.draw()
+            plt.pause(0.00001)
+        return data
+
+    def close(self):
+        self._pybullet_client.disconnect()
+
+    def calc_camera_params(self, image_size):
+        # Mimic RealSense D415 camera parameters.
+        intrinsics = self._camera_instrinsics
+
+        # Set default camera poses.
+        front_position = self._camera_pose
+        front_rotation = self._camera_orientation
+        front_rotation = self._pybullet_client.getQuaternionFromEuler(front_rotation)
+        # Default camera configs.
+        zrange = (0.01, 10.0)
+
+        # OpenGL camera settings.
+        lookdir = np.float32([0, 0, 1]).reshape(3, 1)
+        updir = np.float32([0, -1, 0]).reshape(3, 1)
+        rotation = self._pybullet_client.getMatrixFromQuaternion(front_rotation)
+        rotm = np.float32(rotation).reshape(3, 3)
+        lookdir = (rotm @ lookdir).reshape(-1)
+        updir = (rotm @ updir).reshape(-1)
+        lookat = front_position + lookdir
+        focal_len = intrinsics[0]
+        znear, zfar = zrange
+        viewm = self._pybullet_client.computeViewMatrix(front_position, lookat, updir)
+        fovh = (image_size[0] / 2) / focal_len
+        fovh = 180 * np.arctan(fovh) * 2 / np.pi
+
+        # Notes: 1) FOV is vertical FOV 2) aspect must be float
+        aspect_ratio = image_size[1] / image_size[0]
+        projm = self._pybullet_client.computeProjectionMatrixFOV(
+            fovh, aspect_ratio, znear, zfar
+        )
+
+        return viewm, projm, front_position, lookat, updir
+
+    def _render_camera(self, image_size):
+        """Render RGB image with RealSense configuration."""
+        viewm, projm, _, _, _ = self.calc_camera_params(image_size)
+
+        # Render with OpenGL camera settings.
+        _, _, color, _, _ = self._pybullet_client.getCameraImage(
+            width=image_size[1],
+            height=image_size[0],
+            viewMatrix=viewm,
+            projectionMatrix=projm,
+            flags=pybullet.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX,
+            renderer=pybullet.ER_BULLET_HARDWARE_OPENGL,
+        )
+
+        # Get color image.
+        color_image_size = (image_size[0], image_size[1], 4)
+        color = np.array(color, dtype=np.uint8).reshape(color_image_size)
+        color = color[:, :, :3]  # remove alpha channel
+
+        return color.astype(np.uint8)
+
+    def _create_observation_space(self, image_size):
+        pi2 = math.pi * 2
+
+        obs_dict = collections.OrderedDict(
+            block_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            block_orientation=spaces.Box(low=-pi2, high=pi2, shape=(1,)),  # phi
+            effector_translation=spaces.Box(
+                low=self.workspace_bounds[0] - 0.1,  # Small buffer for to IK noise.
+                high=self.workspace_bounds[1] + 0.1,
+            ),  # x,y
+            effector_target_translation=spaces.Box(
+                low=self.workspace_bounds[0] - 0.1,  # Small buffer for to IK noise.
+                high=self.workspace_bounds[1] + 0.1,
+            ),  # x,y
+            target_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            target_orientation=spaces.Box(
+                low=-pi2,
+                high=pi2,
+                shape=(1,),
+            ),  # theta
+        )
+        if image_size is not None:
+            obs_dict["rgb"] = spaces.Box(
+                low=0, high=255, shape=(image_size[0], image_size[1], 3), dtype=np.uint8
+            )
+        return spaces.Dict(obs_dict)
+
+    def get_pybullet_state(self):
+        """Save pybullet state of the scene.
+
+        Returns:
+          dict containing 'robots', 'robot_end_effectors', 'targets', 'objects',
+            each containing a list of ObjState.
+        """
+        state: Dict[str, List[ObjState]] = {}
+
+        state["robots"] = [
+            XarmState.get_bullet_state(
+                self._pybullet_client,
+                self.robot.xarm,
+                target_effector_pose=self._target_effector_pose,
+                goal_translation=self.get_goal_translation(),
+            )
+        ]
+
+        state["robot_end_effectors"] = []
+        if self.robot.end_effector:
+            state["robot_end_effectors"].append(
+                ObjState.get_bullet_state(
+                    self._pybullet_client, self.robot.end_effector
+                )
+            )
+
+        state["targets"] = []
+        if self._target_id:
+            state["targets"].append(
+                ObjState.get_bullet_state(self._pybullet_client, self._target_id)
+            )
+
+        state["objects"] = []
+        for obj_id in self.get_obj_ids():
+            state["objects"].append(
+                ObjState.get_bullet_state(self._pybullet_client, obj_id)
+            )
+
+        return state
+
+    def set_pybullet_state(self, state):
+        """Restore pyullet state.
+
+        WARNING: py_environment wrapper assumes environments aren't reset in their
+        constructor and will often reset the environment unintentionally. It is
+        always recommeneded that you call env.reset on the tfagents wrapper before
+        playback (replaying pybullet_state).
+
+        Args:
+          state: dict containing 'robots', 'robot_end_effectors', 'targets',
+            'objects', each containing a list of ObjState.
+        """
+
+        assert isinstance(state["robots"][0], XarmState)
+        xarm_state: XarmState = state["robots"][0]
+        xarm_state.set_bullet_state(self._pybullet_client, self.robot.xarm)
+        self._set_robot_target_effector_pose(xarm_state.target_effector_pose)
+
+        def _set_state_safe(obj_state, obj_id):
+            if obj_state is not None:
+                assert obj_id is not None, "Cannot set state for missing object."
+                obj_state.set_bullet_state(self._pybullet_client, obj_id)
+            else:
+                assert obj_id is None, f"No state found for obj_id {obj_id}"
+
+        robot_end_effectors = state["robot_end_effectors"]
+        _set_state_safe(
+            None if not robot_end_effectors else robot_end_effectors[0],
+            self.robot.end_effector,
+        )
+
+        targets = state["targets"]
+        _set_state_safe(None if not targets else targets[0], self._target_id)
+
+        obj_ids = self.get_obj_ids()
+        assert len(state["objects"]) == len(obj_ids), "State length mismatch"
+        for obj_state, obj_id in zip(state["objects"], obj_ids):
+            _set_state_safe(obj_state, obj_id)
+
+        self.reset(reset_poses=False)
+
+
+class BlockPushNormalized(gym.Env):
+    """Simple XArm environment for block pushing, normalized state and actions."""
+
+    def __init__(
+        self,
+        control_frequency=10.0,
+        task=BlockTaskVariant.PUSH_NORMALIZED,
+        image_size=None,
+        shared_memory=False,
+        seed=None,
+    ):
+        """Creates an env instance.
+
+        Args:
+          control_frequency: Control frequency for the arm. Each env step will
+            advance the simulation by 1/control_frequency seconds.
+          task: enum for which task, see BlockTaskVariant enum.
+          image_size: Optional image size (height, width). If None, no image
+            observations will be used.
+          shared_memory: If True `pybullet.SHARED_MEMORY` is used to connect to
+            pybullet. Useful to debug.
+          seed: Optional seed for the environment.
+        """
+        # Map normalized task to unnormalized task.
+        if task == BlockTaskVariant.PUSH_NORMALIZED:
+            env_task = BlockTaskVariant.PUSH
+        elif task == BlockTaskVariant.REACH_NORMALIZED:
+            env_task = BlockTaskVariant.REACH
+        else:
+            raise ValueError("Unsupported task %s" % str(task))
+        self._env = BlockPush(
+            control_frequency, env_task, image_size, shared_memory, seed
+        )
+        self.action_space = spaces.Box(low=-1, high=1, shape=(2,))
+        self.observation_space = spaces.Dict(
+            collections.OrderedDict(
+                effector_target_translation=spaces.Box(low=-1, high=1, shape=(2,)),
+                effector_target_to_block_translation=spaces.Box(
+                    low=-1, high=1, shape=(2,)
+                ),
+                block_orientation_cos_sin=spaces.Box(low=-1, high=1, shape=(2,)),
+                effector_target_to_target_translation=spaces.Box(
+                    low=-1, high=1, shape=(2,)
+                ),
+                target_orientation_cos_sin=spaces.Box(low=-1, high=1, shape=(2,)),
+            )
+        )
+        self.reset()
+
+    def get_control_frequency(self):
+        return self._env.get_control_frequency()
+
+    @property
+    def reach_target_translation(self):
+        return self._env.reach_target_translation
+
+    def seed(self, seed=None):
+        self._env.seed(seed)
+
+    def reset(self):
+        state = self._env.reset()
+        return self.calc_normalized_state(state)
+
+    def step(self, action):
+        # The environment is normalized [mean-3*std, mean+3*std] -> [-1, 1].
+        action = np.clip(action, a_min=-1.0, a_max=1.0)
+        state, reward, done, info = self._env.step(
+            self.calc_unnormalized_action(action)
+        )
+        state = self.calc_normalized_state(state)
+        reward = reward * 100  # Keep returns in [0, 100]
+        return state, reward, done, info
+
+    def render(self, mode="rgb_array"):
+        return self._env.render(mode)
+
+    def close(self):
+        self._env.close()
+
+    @staticmethod
+    def _normalize(values, values_min, values_max):
+        offset = (values_max + values_min) * 0.5
+        scale = (values_max - values_min) * 0.5
+        return (values - offset) / scale  # [min, max] -> [-1, 1]
+
+    @staticmethod
+    def _unnormalize(values, values_min, values_max):
+        offset = (values_max + values_min) * 0.5
+        scale = (values_max - values_min) * 0.5
+        return values * scale + offset  # [-1, 1] -> [min, max]
+
+    @classmethod
+    def calc_normalized_action(cls, action):
+        return cls._normalize(action, ACTION_MIN, ACTION_MAX)
+
+    @classmethod
+    def calc_unnormalized_action(cls, norm_action):
+        return cls._unnormalize(norm_action, ACTION_MIN, ACTION_MAX)
+
+    @classmethod
+    def calc_normalized_state(cls, state):
+
+        effector_target_translation = cls._normalize(
+            state["effector_target_translation"],
+            EFFECTOR_TARGET_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TRANSLATION_MAX,
+        )
+
+        effector_target_to_block_translation = cls._normalize(
+            state["block_translation"] - state["effector_target_translation"],
+            EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MAX,
+        )
+        ori = state["block_orientation"][0]
+        block_orientation_cos_sin = cls._normalize(
+            np.array([math.cos(ori), math.sin(ori)], np.float32),
+            BLOCK_ORIENTATION_COS_SIN_MIN,
+            BLOCK_ORIENTATION_COS_SIN_MAX,
+        )
+
+        effector_target_to_target_translation = cls._normalize(
+            state["target_translation"] - state["effector_target_translation"],
+            EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MAX,
+        )
+        ori = state["target_orientation"][0]
+        target_orientation_cos_sin = cls._normalize(
+            np.array([math.cos(ori), math.sin(ori)], np.float32),
+            TARGET_ORIENTATION_COS_SIN_MIN,
+            TARGET_ORIENTATION_COS_SIN_MAX,
+        )
+
+        # Note: We do not include effector_translation in the normalized state.
+        # This means the unnormalized -> normalized mapping is not invertable.
+        return collections.OrderedDict(
+            effector_target_translation=effector_target_translation,
+            effector_target_to_block_translation=effector_target_to_block_translation,
+            block_orientation_cos_sin=block_orientation_cos_sin,
+            effector_target_to_target_translation=effector_target_to_target_translation,
+            target_orientation_cos_sin=target_orientation_cos_sin,
+        )
+
+    @classmethod
+    def calc_unnormalized_state(cls, norm_state):
+
+        effector_target_translation = cls._unnormalize(
+            norm_state["effector_target_translation"],
+            EFFECTOR_TARGET_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TRANSLATION_MAX,
+        )
+        # Note: normalized state does not include effector_translation state, this
+        # means this component will be missing (and is marked nan).
+        effector_translation = np.array([np.nan, np.nan], np.float32)
+
+        effector_target_to_block_translation = cls._unnormalize(
+            norm_state["effector_target_to_block_translation"],
+            EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TO_BLOCK_TRANSLATION_MAX,
+        )
+        block_translation = (
+            effector_target_to_block_translation + effector_target_translation
+        )
+        ori_cos_sin = cls._unnormalize(
+            norm_state["block_orientation_cos_sin"],
+            BLOCK_ORIENTATION_COS_SIN_MIN,
+            BLOCK_ORIENTATION_COS_SIN_MAX,
+        )
+        block_orientation = np.array(
+            [math.atan2(ori_cos_sin[1], ori_cos_sin[0])], np.float32
+        )
+
+        effector_target_to_target_translation = cls._unnormalize(
+            norm_state["effector_target_to_target_translation"],
+            EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MIN,
+            EFFECTOR_TARGET_TO_TARGET_TRANSLATION_MAX,
+        )
+        target_translation = (
+            effector_target_to_target_translation + effector_target_translation
+        )
+        ori_cos_sin = cls._unnormalize(
+            norm_state["target_orientation_cos_sin"],
+            TARGET_ORIENTATION_COS_SIN_MIN,
+            TARGET_ORIENTATION_COS_SIN_MAX,
+        )
+        target_orientation = np.array(
+            [math.atan2(ori_cos_sin[1], ori_cos_sin[0])], np.float32
+        )
+
+        return collections.OrderedDict(
+            block_translation=block_translation,
+            block_orientation=block_orientation,
+            effector_translation=effector_translation,
+            effector_target_translation=effector_target_translation,
+            target_translation=target_translation,
+            target_orientation=target_orientation,
+        )
+
+    def get_pybullet_state(self):
+        return self._env.get_pybullet_state()
+
+    def set_pybullet_state(self, state):
+        return self._env.set_pybullet_state(state)
+
+    @property
+    def pybullet_client(self):
+        return self._env.pybullet_client
+
+    def calc_camera_params(self, image_size):
+        return self._env.calc_camera_params(image_size)
+
+    def _compute_state(self):
+        return self.calc_normalized_state(
+            self._env._compute_state()
+        )  # pylint: disable=protected-access
+
+
+# Make sure we only register once to allow us to reload the module in colab for
+# debugging.
+if "BlockPush-v0" in registration.registry.env_specs:
+    del registration.registry.env_specs["BlockInsert-v0"]
+    del registration.registry.env_specs["BlockPush-v0"]
+    del registration.registry.env_specs["BlockPushNormalized-v0"]
+    del registration.registry.env_specs["BlockPushRgbNormalized-v0"]
+    del registration.registry.env_specs["BlockReach-v0"]
+    del registration.registry.env_specs["BlockReachNormalized-v0"]
+    del registration.registry.env_specs["BlockReachRgbNormalized-v0"]
+    del registration.registry.env_specs["SharedBlockInsert-v0"]
+    del registration.registry.env_specs["SharedBlockPush-v0"]
+    del registration.registry.env_specs["SharedBlockReach-v0"]
+
+registration.register(
+    id="BlockInsert-v0",
+    entry_point=BlockPush,
+    kwargs=dict(task=BlockTaskVariant.INSERT),
+    max_episode_steps=50,
+)
+registration.register(id="BlockPush-v0", entry_point=BlockPush, max_episode_steps=100)
+registration.register(
+    id="BlockPushNormalized-v0",
+    entry_point=BlockPushNormalized,
+    kwargs=dict(task=BlockTaskVariant.PUSH_NORMALIZED),
+    max_episode_steps=100,
+)
+registration.register(
+    id="BlockPushRgb-v0",
+    entry_point=BlockPush,
+    max_episode_steps=100,
+    kwargs=dict(image_size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
+)
+registration.register(
+    id="BlockPushRgbNormalized-v0",
+    entry_point=BlockPushNormalized,
+    kwargs=dict(
+        task=BlockTaskVariant.PUSH_NORMALIZED, image_size=(IMAGE_HEIGHT, IMAGE_WIDTH)
+    ),
+    max_episode_steps=100,
+)
+registration.register(
+    id="BlockReach-v0",
+    entry_point=BlockPush,
+    kwargs=dict(task=BlockTaskVariant.REACH),
+    max_episode_steps=50,
+)
+registration.register(
+    id="BlockReachRgb-v0",
+    entry_point=BlockPush,
+    max_episode_steps=100,
+    kwargs=dict(task=BlockTaskVariant.REACH, image_size=(IMAGE_HEIGHT, IMAGE_WIDTH)),
+)
+registration.register(
+    id="BlockReachNormalized-v0",
+    entry_point=BlockPushNormalized,
+    kwargs=dict(task=BlockTaskVariant.REACH_NORMALIZED),
+    max_episode_steps=50,
+)
+registration.register(
+    id="BlockReachRgbNormalized-v0",
+    entry_point=BlockPushNormalized,
+    kwargs=dict(
+        task=BlockTaskVariant.REACH_NORMALIZED, image_size=(IMAGE_HEIGHT, IMAGE_WIDTH)
+    ),
+    max_episode_steps=50,
+)
+
+registration.register(
+    id="SharedBlockInsert-v0",
+    entry_point=BlockPush,
+    kwargs=dict(task=BlockTaskVariant.INSERT, shared_memory=True),
+    max_episode_steps=50,
+)
+registration.register(
+    id="SharedBlockPush-v0",
+    entry_point=BlockPush,
+    kwargs=dict(shared_memory=True),
+    max_episode_steps=100,
+)
+registration.register(
+    id="SharedBlockPushNormalized-v0",
+    entry_point=BlockPushNormalized,
+    kwargs=dict(task=BlockTaskVariant.PUSH_NORMALIZED, shared_memory=True),
+    max_episode_steps=100,
+)
+registration.register(
+    id="SharedBlockReach-v0",
+    entry_point=BlockPush,
+    kwargs=dict(task=BlockTaskVariant.REACH, shared_memory=True),
+    max_episode_steps=50,
+)
diff --git a/diffusion_policy/env/block_pushing/block_pushing_discontinuous.py b/diffusion_policy/env/block_pushing/block_pushing_discontinuous.py
new file mode 100644
index 0000000..06e9dc7
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/block_pushing_discontinuous.py
@@ -0,0 +1,338 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Discontinuous block pushing."""
+import collections
+import enum
+import math
+from typing import List, Optional
+
+from gym import spaces
+from gym.envs import registration
+from diffusion_policy.env.block_pushing import block_pushing
+from diffusion_policy.env.block_pushing.utils import utils_pybullet
+from diffusion_policy.env.block_pushing.utils.pose3d import Pose3d
+import numpy as np
+from scipy.spatial import transform
+import pybullet
+import pybullet_utils.bullet_client as bullet_client
+
+ZONE2_URDF_PATH = "third_party/py/envs/assets/zone2.urdf"
+
+MIN_TARGET_DIST = 0.15
+NUM_RESET_ATTEMPTS = 1000
+
+
+def build_env_name(task, shared_memory, use_image_obs):
+    """Construct the env name from parameters."""
+    del task
+    env_name = "BlockPushDiscontinuous"
+
+    if use_image_obs:
+        env_name = env_name + "Rgb"
+
+    if shared_memory:
+        env_name = "Shared" + env_name
+
+    env_name = env_name + "-v0"
+
+    return env_name
+
+
+class BlockTaskVariant(enum.Enum):
+    REACH = "Reach"
+    REACH_NORMALIZED = "ReachNormalized"
+    PUSH = "Push"
+    PUSH_NORMALIZED = "PushNormalized"
+    INSERT = "Insert"
+
+
+# pytype: skip-file
+class BlockPushDiscontinuous(block_pushing.BlockPush):
+    """Discontinuous block pushing."""
+
+    def __init__(
+        self,
+        control_frequency=10.0,
+        task=BlockTaskVariant.PUSH,
+        image_size=None,
+        shared_memory=False,
+        seed=None,
+        goal_dist_tolerance=0.04,
+    ):
+        super(BlockPushDiscontinuous, self).__init__(
+            control_frequency=control_frequency,
+            task=task,
+            image_size=image_size,
+            shared_memory=shared_memory,
+            seed=seed,
+            goal_dist_tolerance=goal_dist_tolerance,
+        )
+
+    @property
+    def target_poses(self):
+        return self._target_poses
+
+    def get_goal_translation(self):
+        """Return the translation component of the goal (2D)."""
+        if self._target_poses:
+            return [i.translation for i in self._target_poses]
+        else:
+            return None
+
+    def _setup_pybullet_scene(self):
+        self._pybullet_client = bullet_client.BulletClient(self._connection_mode)
+
+        # Temporarily disable rendering to speed up loading URDFs.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)
+
+        self._setup_workspace_and_robot()
+        target_urdf_path = block_pushing.ZONE_URDF_PATH
+
+        self._target_ids = []
+        for _ in [block_pushing.ZONE_URDF_PATH, ZONE2_URDF_PATH]:
+            self._target_ids.append(
+                utils_pybullet.load_urdf(
+                    self._pybullet_client, target_urdf_path, useFixedBase=True
+                )
+            )
+        self._block_ids = [
+            utils_pybullet.load_urdf(
+                self._pybullet_client, block_pushing.BLOCK_URDF_PATH, useFixedBase=False
+            )
+        ]
+
+        # Re-enable rendering.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
+
+        self.step_simulation_to_stabilize()
+
+    def _reset_target_poses(self, workspace_center_x):
+        """Resets target poses."""
+        self._target_poses = [None for _ in range(len(self._target_ids))]
+
+        def _reset_target_pose(idx, avoid=None):
+            def _get_random_translation():
+                # Choose x,y randomly.
+                target_x = workspace_center_x + self._rng.uniform(low=-0.10, high=0.10)
+                # Fix ys for this environment.
+                if idx == 0:
+                    target_y = 0
+                else:
+                    target_y = 0.4
+                target_translation = np.array([target_x, target_y, 0.020])
+                return target_translation
+
+            if avoid is None:
+                target_translation = _get_random_translation()
+            else:
+                # Reject targets too close to `avoid`.
+                for _ in range(NUM_RESET_ATTEMPTS):
+                    target_translation = _get_random_translation()
+                    dist = np.linalg.norm(target_translation[0] - avoid[0])
+                    if dist > MIN_TARGET_DIST:
+                        break
+            target_sampled_angle = math.pi + self._rng.uniform(
+                low=-math.pi / 6, high=math.pi / 6
+            )
+            target_rotation = transform.Rotation.from_rotvec(
+                [0, 0, target_sampled_angle]
+            )
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._target_ids[idx],
+                target_translation.tolist(),
+                target_rotation.as_quat().tolist(),
+            )
+            self._target_poses[idx] = Pose3d(
+                rotation=target_rotation, translation=target_translation
+            )
+
+        try_idx = 0
+        while True:
+            # Choose the first target.
+            _reset_target_pose(0)
+            # Choose the second target, avoiding the first.
+            _reset_target_pose(1, avoid=self._target_poses[0].translation)
+            dist = np.linalg.norm(
+                self._target_poses[0].translation[0]
+                - self._target_poses[1].translation[0]
+            )
+            if dist > MIN_TARGET_DIST:
+                break
+            try_idx += 1
+            if try_idx >= NUM_RESET_ATTEMPTS:
+                raise ValueError("could not find matching target")
+        assert dist > MIN_TARGET_DIST
+
+    def reset(self):
+        self._pybullet_client.restoreState(self._saved_state)
+
+        rotation = transform.Rotation.from_rotvec([0, math.pi, 0])
+        translation = np.array([0.3, -0.4, block_pushing.EFFECTOR_HEIGHT])
+        starting_pose = Pose3d(rotation=rotation, translation=translation)
+        self._set_robot_target_effector_pose(starting_pose)
+
+        workspace_center_x = 0.4
+
+        # Reset block pose.
+        block_x = workspace_center_x + self._rng.uniform(low=-0.1, high=0.1)
+        block_y = -0.2 + self._rng.uniform(low=-0.15, high=0.15)
+        block_translation = np.array([block_x, block_y, 0])
+        block_sampled_angle = self._rng.uniform(math.pi)
+        block_rotation = transform.Rotation.from_rotvec([0, 0, block_sampled_angle])
+
+        self._pybullet_client.resetBasePositionAndOrientation(
+            self._block_ids[0],
+            block_translation.tolist(),
+            block_rotation.as_quat().tolist(),
+        )
+
+        # Reset target pose.
+        self._reset_target_poses(workspace_center_x)
+
+        self.step_simulation_to_stabilize()
+        state = self._compute_state()
+        self._previous_state = state
+        self.min_dist_to_first_goal = np.inf
+        self.min_dist_to_second_goal = np.inf
+        self.steps = 0
+        return state
+
+    def _compute_goal_distance(self, state):
+        # Reward is 1. blocks is inside any target.
+        return np.mean([self.min_dist_to_first_goal, self.min_dist_to_second_goal])
+
+    def _compute_state(self):
+        effector_pose = self._robot.forward_kinematics()
+        block_position_and_orientation = (
+            self._pybullet_client.getBasePositionAndOrientation(self._block_ids[0])
+        )
+        block_pose = Pose3d(
+            rotation=transform.Rotation.from_quat(block_position_and_orientation[1]),
+            translation=block_position_and_orientation[0],
+        )
+
+        def _yaw_from_pose(pose):
+            return np.array([pose.rotation.as_euler("xyz", degrees=False)[-1]])
+
+        obs = collections.OrderedDict(
+            block_translation=block_pose.translation[0:2],
+            block_orientation=_yaw_from_pose(block_pose),
+            effector_translation=effector_pose.translation[0:2],
+            effector_target_translation=self._target_effector_pose.translation[0:2],
+            target_translation=self._target_poses[0].translation[0:2],
+            target_orientation=_yaw_from_pose(self._target_poses[0]),
+            target2_translation=self._target_poses[1].translation[0:2],
+            target2_orientation=_yaw_from_pose(self._target_poses[1]),
+        )
+        if self._image_size is not None:
+            obs["rgb"] = self._render_camera(self._image_size)
+        return obs
+
+    def step(self, action):
+        self._step_robot_and_sim(action)
+        state = self._compute_state()
+        reward = self._get_reward(state)
+        done = False
+        if reward > 0.0:
+            done = True
+        # Cache so we can compute success.
+        self.state = state
+        return state, reward, done, {}
+
+    def dist(self, state, target):
+        # Reward is 1. blocks is inside any target.
+        return np.linalg.norm(
+            state["block_translation"] - state["%s_translation" % target]
+        )
+
+    def _get_reward(self, state):
+        """Reward is 1.0 if agent hits both goals and stays at second."""
+        # This also statefully updates these values.
+        self.min_dist_to_first_goal = min(
+            self.dist(state, "target"), self.min_dist_to_first_goal
+        )
+        self.min_dist_to_second_goal = min(
+            self.dist(state, "target2"), self.min_dist_to_second_goal
+        )
+
+        def _reward(thresh):
+            reward_first = True if self.min_dist_to_first_goal < thresh else False
+            reward_second = True if self.min_dist_to_second_goal < thresh else False
+            return 1.0 if (reward_first and reward_second) else 0.0
+
+        reward = _reward(self.goal_dist_tolerance)
+        return reward
+
+    @property
+    def succeeded(self):
+        thresh = self.goal_dist_tolerance
+        hit_first = True if self.min_dist_to_first_goal < thresh else False
+        hit_second = True if self.min_dist_to_first_goal < thresh else False
+        current_distance_to_second = self.dist(self.state, "target2")
+        still_at_second = True if current_distance_to_second < thresh else False
+        return hit_first and hit_second and still_at_second
+
+    def _create_observation_space(self, image_size):
+        pi2 = math.pi * 2
+
+        obs_dict = collections.OrderedDict(
+            block_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            block_orientation=spaces.Box(low=-pi2, high=pi2, shape=(1,)),  # phi
+            effector_translation=spaces.Box(
+                # Small buffer for to IK noise.
+                low=block_pushing.WORKSPACE_BOUNDS[0] - 0.1,
+                high=block_pushing.WORKSPACE_BOUNDS[1] + 0.1,
+            ),  # x,y
+            effector_target_translation=spaces.Box(
+                # Small buffer for to IK noise.
+                low=block_pushing.WORKSPACE_BOUNDS[0] - 0.1,
+                high=block_pushing.WORKSPACE_BOUNDS[1] + 0.1,
+            ),  # x,y
+            target_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            target_orientation=spaces.Box(
+                low=-pi2,
+                high=pi2,
+                shape=(1,),
+            ),  # theta
+            target2_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            target2_orientation=spaces.Box(
+                low=-pi2,
+                high=pi2,
+                shape=(1,),
+            ),  # theta
+        )
+        if image_size is not None:
+            obs_dict["rgb"] = spaces.Box(
+                low=0, high=255, shape=(image_size[0], image_size[1], 3), dtype=np.uint8
+            )
+        return spaces.Dict(obs_dict)
+
+
+if "BlockPushDiscontinuous-v0" in registration.registry.env_specs:
+    del registration.registry.env_specs["BlockPushDiscontinuous-v0"]
+
+registration.register(
+    id="BlockPushDiscontinuous-v0",
+    entry_point=BlockPushDiscontinuous,
+    max_episode_steps=200,
+)
+
+registration.register(
+    id="BlockPushDiscontinuousRgb-v0",
+    entry_point=BlockPushDiscontinuous,
+    max_episode_steps=200,
+    kwargs=dict(image_size=(block_pushing.IMAGE_HEIGHT, block_pushing.IMAGE_WIDTH)),
+)
diff --git a/diffusion_policy/env/block_pushing/block_pushing_multimodal.py b/diffusion_policy/env/block_pushing/block_pushing_multimodal.py
new file mode 100644
index 0000000..3ca9212
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/block_pushing_multimodal.py
@@ -0,0 +1,802 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Multimodal block environments for the XArm."""
+
+import collections
+import logging
+import math
+from typing import Dict, List, Optional, Union
+import copy
+import time
+
+from gym import spaces
+from gym.envs import registration
+from diffusion_policy.env.block_pushing import block_pushing
+from diffusion_policy.env.block_pushing.utils import utils_pybullet
+from diffusion_policy.env.block_pushing.utils.pose3d import Pose3d
+from diffusion_policy.env.block_pushing.utils.utils_pybullet import ObjState
+from diffusion_policy.env.block_pushing.utils.utils_pybullet import XarmState
+import numpy as np
+from scipy.spatial import transform
+import pybullet
+import pybullet_utils.bullet_client as bullet_client
+
+# pytype: skip-file
+BLOCK2_URDF_PATH = "third_party/py/envs/assets/block2.urdf"
+ZONE2_URDF_PATH = "third_party/py/envs/assets/zone2.urdf"
+
+# When resetting multiple targets, they should all be this far apart.
+MIN_BLOCK_DIST = 0.1
+MIN_TARGET_DIST = 0.12
+# pylint: enable=line-too-long
+NUM_RESET_ATTEMPTS = 1000
+
+# Random movement of blocks
+RANDOM_X_SHIFT = 0.1
+RANDOM_Y_SHIFT = 0.15
+
+logging.basicConfig(
+    level="INFO",
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    filemode="w",
+)
+logger = logging.getLogger()
+
+
+def build_env_name(task, shared_memory, use_image_obs):
+    """Construct the env name from parameters."""
+    del task
+    env_name = "BlockPushMultimodal"
+
+    if use_image_obs:
+        env_name = env_name + "Rgb"
+
+    if shared_memory:
+        env_name = "Shared" + env_name
+
+    env_name = env_name + "-v0"
+
+    return env_name
+
+
+class BlockPushEventManager:
+    def __init__(self):
+        self.event_steps = {
+            'REACH_0': -1,
+            'REACH_1': -1,
+            'TARGET_0_0': -1,
+            'TARGET_0_1': -1,
+            'TARGET_1_0': -1,
+            'TARGET_1_1': -1
+        }
+    
+    def reach(self, step, block_id):
+        key = f'REACH_{block_id}'
+        if self.event_steps[key] < 0:
+            self.event_steps[key] = step
+    
+    def target(self, step, block_id, target_id):
+        key = f'TARGET_{block_id}_{target_id}'
+        if self.event_steps[key] < 0:
+            self.event_steps[key] = step
+
+    def reset(self):
+        for key in list(self.event_steps):
+            self.event_steps[key] = -1
+    
+    def get_info(self):
+        return copy.deepcopy(self.event_steps)
+
+class BlockPushMultimodal(block_pushing.BlockPush):
+    """2 blocks, 2 targets."""
+
+    def __init__(
+        self,
+        control_frequency=10.0,
+        task=block_pushing.BlockTaskVariant.PUSH,
+        image_size=None,
+        shared_memory=False,
+        seed=None,
+        goal_dist_tolerance=0.05,
+        abs_action=False
+    ):
+        """Creates an env instance.
+
+        Args:
+          control_frequency: Control frequency for the arm. Each env step will
+            advance the simulation by 1/control_frequency seconds.
+          task: enum for which task, see BlockTaskVariant enum.
+          image_size: Optional image size (height, width). If None, no image
+            observations will be used.
+          shared_memory: If True `pybullet.SHARED_MEMORY` is used to connect to
+            pybullet. Useful to debug.
+          seed: Optional seed for the environment.
+          goal_dist_tolerance: float, how far away from the goal to terminate.
+        """
+        self._target_ids = None
+        self._target_poses = None
+        self._event_manager = BlockPushEventManager()
+        super(BlockPushMultimodal, self).__init__(
+            control_frequency=control_frequency,
+            task=task,
+            image_size=image_size,
+            shared_memory=shared_memory,
+            seed=seed,
+            goal_dist_tolerance=goal_dist_tolerance,
+        )
+        self._init_distance = [-1.0, -1.0]
+        self._in_target = [[-1.0, -1.0], [-1.0, -1.0]]
+        self._first_move = [-1, -1]
+        self._step_num = 0
+        self._abs_action = abs_action
+
+    @property
+    def target_poses(self):
+        return self._target_poses
+
+    def get_goal_translation(self):
+        """Return the translation component of the goal (2D)."""
+        if self._target_poses:
+            return [i.translation for i in self._target_poses]
+        else:
+            return None
+
+    def _setup_pybullet_scene(self):
+        self._pybullet_client = bullet_client.BulletClient(self._connection_mode)
+
+        # Temporarily disable rendering to speed up loading URDFs.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 0)
+
+        self._setup_workspace_and_robot()
+
+        self._target_ids = [
+            utils_pybullet.load_urdf(self._pybullet_client, i, useFixedBase=True)
+            for i in [block_pushing.ZONE_URDF_PATH, ZONE2_URDF_PATH]
+        ]
+        self._block_ids = []
+        for i in [block_pushing.BLOCK_URDF_PATH, BLOCK2_URDF_PATH]:
+            self._block_ids.append(
+                utils_pybullet.load_urdf(self._pybullet_client, i, useFixedBase=False)
+            )
+
+        # Re-enable rendering.
+        pybullet.configureDebugVisualizer(pybullet.COV_ENABLE_RENDERING, 1)
+
+        self.step_simulation_to_stabilize()
+
+    def _reset_block_poses(self, workspace_center_x):
+        """Resets block poses."""
+
+        # Helper for choosing random block position.
+        def _reset_block_pose(idx, add=0.0, avoid=None):
+            def _get_random_translation():
+                block_x = (
+                    workspace_center_x
+                    + add
+                    + self._rng.uniform(low=-RANDOM_X_SHIFT, high=RANDOM_X_SHIFT)
+                )
+                block_y = -0.2 + self._rng.uniform(
+                    low=-RANDOM_Y_SHIFT, high=RANDOM_Y_SHIFT
+                )
+                block_translation = np.array([block_x, block_y, 0])
+                return block_translation
+
+            if avoid is None:
+                block_translation = _get_random_translation()
+            else:
+                # Reject targets too close to `avoid`.
+                for _ in range(NUM_RESET_ATTEMPTS):
+                    block_translation = _get_random_translation()
+                    dist = np.linalg.norm(block_translation[0] - avoid[0])
+                    # print('block inner try_idx %d, dist %.3f' % (try_idx, dist))
+                    if dist > MIN_BLOCK_DIST:
+                        break
+            block_sampled_angle = self._rng.uniform(math.pi)
+            block_rotation = transform.Rotation.from_rotvec([0, 0, block_sampled_angle])
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._block_ids[idx],
+                block_translation.tolist(),
+                block_rotation.as_quat().tolist(),
+            )
+            return block_translation
+
+        # Reject targets too close to `avoid`.
+        for _ in range(NUM_RESET_ATTEMPTS):
+            # Reset first block.
+            b0_translation = _reset_block_pose(0)
+            # Reset second block away from first block.
+            b1_translation = _reset_block_pose(1, avoid=b0_translation)
+            dist = np.linalg.norm(b0_translation[0] - b1_translation[0])
+            if dist > MIN_BLOCK_DIST:
+                break
+        else:
+            raise ValueError("could not find matching block")
+        assert dist > MIN_BLOCK_DIST
+
+    def _reset_target_poses(self, workspace_center_x):
+        """Resets target poses."""
+
+        def _reset_target_pose(idx, add=0.0, avoid=None):
+            def _get_random_translation():
+                # Choose x,y randomly.
+                target_x = (
+                    workspace_center_x
+                    + add
+                    + self._rng.uniform(
+                        low=-0.05 * RANDOM_X_SHIFT, high=0.05 * RANDOM_X_SHIFT
+                    )
+                )
+                target_y = 0.2 + self._rng.uniform(
+                    low=-0.05 * RANDOM_Y_SHIFT, high=0.05 * RANDOM_Y_SHIFT
+                )
+                target_translation = np.array([target_x, target_y, 0.020])
+                return target_translation
+
+            if avoid is None:
+                target_translation = _get_random_translation()
+            else:
+                # Reject targets too close to `avoid`.
+                for _ in range(NUM_RESET_ATTEMPTS):
+                    target_translation = _get_random_translation()
+                    dist = np.linalg.norm(target_translation[0] - avoid[0])
+                    # print('target inner try_idx %d, dist %.3f' % (try_idx, dist))
+                    if dist > MIN_TARGET_DIST:
+                        break
+            target_sampled_angle = math.pi + self._rng.uniform(
+                low=-math.pi / 30, high=math.pi / 30
+            )
+            target_rotation = transform.Rotation.from_rotvec(
+                [0, 0, target_sampled_angle]
+            )
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._target_ids[idx],
+                target_translation.tolist(),
+                target_rotation.as_quat().tolist(),
+            )
+            self._target_poses[idx] = Pose3d(
+                rotation=target_rotation, translation=target_translation
+            )
+
+        if self._target_poses is None:
+            self._target_poses = [None for _ in range(len(self._target_ids))]
+
+        for _ in range(NUM_RESET_ATTEMPTS):
+            # Choose the first target.
+            add = 0.12 * self._rng.choice([-1, 1])
+            # Randomly flip the location of the targets.
+            _reset_target_pose(0, add=add)
+            _reset_target_pose(1, add=-add, avoid=self._target_poses[0].translation)
+            dist = np.linalg.norm(
+                self._target_poses[0].translation[0]
+                - self._target_poses[1].translation[0]
+            )
+            if dist > MIN_TARGET_DIST:
+                break
+        else:
+            raise ValueError("could not find matching target")
+        assert dist > MIN_TARGET_DIST
+
+    def _reset_object_poses(self, workspace_center_x, workspace_center_y):
+        # Reset block poses.
+        self._reset_block_poses(workspace_center_x)
+
+        # Reset target poses.
+        self._reset_target_poses(workspace_center_x)
+
+        self._init_distance = [-1.0, -1.0]
+        self._in_target = [[-1.0, -1.0], [-1.0, -1.0]]
+        self._step_num = 0
+
+    def reset(self, reset_poses=True):
+        workspace_center_x = 0.4
+        workspace_center_y = 0.0
+
+        if reset_poses:
+            self._pybullet_client.restoreState(self._saved_state)
+
+            rotation = transform.Rotation.from_rotvec([0, math.pi, 0])
+            translation = np.array([0.3, -0.4, block_pushing.EFFECTOR_HEIGHT])
+            starting_pose = Pose3d(rotation=rotation, translation=translation)
+            self._set_robot_target_effector_pose(starting_pose)
+            self._reset_object_poses(workspace_center_x, workspace_center_y)
+
+        # else:
+        self._target_poses = [
+            self._get_target_pose(idx) for idx in self._target_ids
+        ]
+
+        if reset_poses:
+            self.step_simulation_to_stabilize()
+
+        state = self._compute_state()
+        self._previous_state = state
+        self._event_manager.reset()
+        return state
+
+    def _get_target_pose(self, idx):
+        (
+            target_translation,
+            target_orientation_quat,
+        ) = self._pybullet_client.getBasePositionAndOrientation(idx)
+        target_rotation = transform.Rotation.from_quat(target_orientation_quat)
+        target_translation = np.array(target_translation)
+        return Pose3d(rotation=target_rotation, translation=target_translation)
+
+    def _compute_reach_target(self, state):
+        xy_block = state["block_translation"]
+        xy_target = state["target_translation"]
+
+        xy_block_to_target = xy_target - xy_block
+        xy_dir_block_to_target = (xy_block_to_target) / np.linalg.norm(
+            xy_block_to_target
+        )
+        self.reach_target_translation = xy_block + -1 * xy_dir_block_to_target * 0.05
+
+    def _compute_state(self):
+        effector_pose = self._robot.forward_kinematics()
+
+        def _get_block_pose(idx):
+            block_position_and_orientation = (
+                self._pybullet_client.getBasePositionAndOrientation(
+                    self._block_ids[idx]
+                )
+            )
+            block_pose = Pose3d(
+                rotation=transform.Rotation.from_quat(
+                    block_position_and_orientation[1]
+                ),
+                translation=block_position_and_orientation[0],
+            )
+            return block_pose
+
+        block_poses = [_get_block_pose(i) for i in range(len(self._block_ids))]
+
+        def _yaw_from_pose(pose):
+            return np.array([pose.rotation.as_euler("xyz", degrees=False)[-1] % np.pi])
+
+        obs = collections.OrderedDict(
+            block_translation=block_poses[0].translation[0:2],
+            block_orientation=_yaw_from_pose(block_poses[0]),
+            block2_translation=block_poses[1].translation[0:2],
+            block2_orientation=_yaw_from_pose(block_poses[1]),
+            effector_translation=effector_pose.translation[0:2],
+            effector_target_translation=self._target_effector_pose.translation[0:2],
+            target_translation=self._target_poses[0].translation[0:2],
+            target_orientation=_yaw_from_pose(self._target_poses[0]),
+            target2_translation=self._target_poses[1].translation[0:2],
+            target2_orientation=_yaw_from_pose(self._target_poses[1]),
+        )
+
+        for i in range(2):
+            new_distance = np.linalg.norm(
+                block_poses[i].translation[0:2]
+            )  # + np.linalg.norm(_yaw_from_pose(block_poses[i]))
+            if self._init_distance[i] == -1:
+                self._init_distance[i] = new_distance
+            else:
+                if self._init_distance[i] != 100:
+                    if np.abs(new_distance - self._init_distance[i]) > 1e-3:
+                        logger.info(f"Block {i} moved on step {self._step_num}")
+                        self._event_manager.reach(step=self._step_num, block_id=i)
+                        self._init_distance[i] = 100
+
+        self._step_num += 1
+        if self._image_size is not None:
+            obs["rgb"] = self._render_camera(self._image_size)
+        return obs
+
+    def step(self, action):
+        self._step_robot_and_sim(action)
+
+        state = self._compute_state()
+        done = False
+        reward = self._get_reward(state)
+        if reward >= 0.5:
+            # Terminate the episode if both blocks are close enough to the targets.
+            done = True
+
+        info = self._event_manager.get_info()
+        return state, reward, done, info
+    
+    def _step_robot_and_sim(self, action):
+        """Steps the robot and pybullet sim."""
+        # Compute target_effector_pose by shifting the effector's pose by the
+        # action.
+        if self._abs_action:
+            target_effector_translation = np.array([action[0], action[1], 0])
+        else:
+            target_effector_translation = np.array(
+                self._target_effector_pose.translation
+            ) + np.array([action[0], action[1], 0])
+
+        target_effector_translation[0:2] = np.clip(
+            target_effector_translation[0:2],
+            self.workspace_bounds[0],
+            self.workspace_bounds[1],
+        )
+        target_effector_translation[-1] = self.effector_height
+        target_effector_pose = Pose3d(
+            rotation=block_pushing.EFFECTOR_DOWN_ROTATION, translation=target_effector_translation
+        )
+
+        self._set_robot_target_effector_pose(target_effector_pose)
+
+        # Update sleep time dynamically to stay near real-time.
+        frame_sleep_time = 0
+        if self._connection_mode == pybullet.SHARED_MEMORY:
+            cur_time = time.time()
+            if self._last_loop_time is not None:
+                # Calculate the total, non-sleeping time from the previous frame, this
+                # includes the actual step as well as any compute that happens in the
+                # caller thread (model inference, etc).
+                compute_time = (
+                    cur_time
+                    - self._last_loop_time
+                    - self._last_loop_frame_sleep_time * self._sim_steps_per_step
+                )
+                # Use this to calculate the current frame's total sleep time to ensure
+                # that env.step runs at policy rate. This is an estimate since the
+                # previous frame's compute time may not match the current frame.
+                total_sleep_time = max((1 / self._control_frequency) - compute_time, 0)
+                # Now spread this out over the inner sim steps. This doesn't change
+                # control in any way, but makes the animation appear smooth.
+                frame_sleep_time = total_sleep_time / self._sim_steps_per_step
+            else:
+                # No estimate of the previous frame's compute, assume it is zero.
+                frame_sleep_time = 1 / self._step_frequency
+
+            # Cache end of this loop time, to compute sleep time on next iteration.
+            self._last_loop_time = cur_time
+            self._last_loop_frame_sleep_time = frame_sleep_time
+
+        for _ in range(self._sim_steps_per_step):
+            if self._connection_mode == pybullet.SHARED_MEMORY:
+                block_pushing.sleep_spin(frame_sleep_time)
+            self._pybullet_client.stepSimulation()
+
+    def _get_reward(self, state):
+        # Reward is 1. if both blocks are inside targets, but not the same target.
+        targets = ["target", "target2"]
+
+        def _block_target_dist(block, target):
+            return np.linalg.norm(
+                state["%s_translation" % block] - state["%s_translation" % target]
+            )
+
+        def _closest_target(block):
+            # Distances to all targets.
+            dists = [_block_target_dist(block, t) for t in targets]
+            # Which is closest.
+            closest_target = targets[np.argmin(dists)]
+            closest_dist = np.min(dists)
+            # Is it in the closest target?
+            in_target = closest_dist < self.goal_dist_tolerance
+            return closest_target, in_target
+
+        blocks = ["block", "block2"]
+
+        reward = 0.0
+
+        for t_i, t in enumerate(targets):
+            for b_i, b in enumerate(blocks):
+                if self._in_target[t_i][b_i] == -1:
+                    dist = _block_target_dist(b, t)
+                    if dist < self.goal_dist_tolerance:
+                        self._in_target[t_i][b_i] = 0
+                        logger.info(
+                            f"Block {b_i} entered target {t_i} on step {self._step_num}"
+                        )
+                        self._event_manager.target(step=self._step_num, block_id=b_i, target_id=t_i)
+                        reward += 0.49
+
+        b0_closest_target, b0_in_target = _closest_target("block")
+        b1_closest_target, b1_in_target = _closest_target("block2")
+        # reward = 0.0
+        if b0_in_target and b1_in_target and (b0_closest_target != b1_closest_target):
+            reward = 0.51
+        return reward
+
+    def _compute_goal_distance(self, state):
+        blocks = ["block", "block2"]
+
+        def _target_block_dist(target, block):
+            return np.linalg.norm(
+                state["%s_translation" % block] - state["%s_translation" % target]
+            )
+
+        def _closest_block_dist(target):
+            dists = [_target_block_dist(target, b) for b in blocks]
+            closest_dist = np.min(dists)
+            return closest_dist
+
+        t0_closest_dist = _closest_block_dist("target")
+        t1_closest_dist = _closest_block_dist("target2")
+        return np.mean([t0_closest_dist, t1_closest_dist])
+
+    @property
+    def succeeded(self):
+        state = self._compute_state()
+        reward = self._get_reward(state)
+        if reward >= 0.5:
+            return True
+        return False
+
+    def _create_observation_space(self, image_size):
+        pi2 = math.pi * 2
+
+        obs_dict = collections.OrderedDict(
+            block_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            block_orientation=spaces.Box(low=-pi2, high=pi2, shape=(1,)),  # phi
+            block2_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            block2_orientation=spaces.Box(low=-pi2, high=pi2, shape=(1,)),  # phi
+            effector_translation=spaces.Box(
+                low=block_pushing.WORKSPACE_BOUNDS[0] - 0.1,
+                high=block_pushing.WORKSPACE_BOUNDS[1] + 0.1,
+            ),  # x,y
+            effector_target_translation=spaces.Box(
+                low=block_pushing.WORKSPACE_BOUNDS[0] - 0.1,
+                high=block_pushing.WORKSPACE_BOUNDS[1] + 0.1,
+            ),  # x,y
+            target_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            target_orientation=spaces.Box(
+                low=-pi2,
+                high=pi2,
+                shape=(1,),
+            ),  # theta
+            target2_translation=spaces.Box(low=-5, high=5, shape=(2,)),  # x,y
+            target2_orientation=spaces.Box(
+                low=-pi2,
+                high=pi2,
+                shape=(1,),
+            ),  # theta
+        )
+        if image_size is not None:
+            obs_dict["rgb"] = spaces.Box(
+                low=0, high=255, shape=(image_size[0], image_size[1], 3), dtype=np.uint8
+            )
+        return spaces.Dict(obs_dict)
+
+    def get_pybullet_state(self):
+        """Save pybullet state of the scene.
+
+        Returns:
+          dict containing 'robots', 'robot_end_effectors', 'targets', 'objects',
+            each containing a list of ObjState.
+        """
+        state: Dict[str, List[ObjState]] = {}
+
+        state["robots"] = [
+            XarmState.get_bullet_state(
+                self._pybullet_client,
+                self.robot.xarm,
+                target_effector_pose=self._target_effector_pose,
+                goal_translation=None,
+            )
+        ]
+
+        state["robot_end_effectors"] = []
+        if self.robot.end_effector:
+            state["robot_end_effectors"].append(
+                ObjState.get_bullet_state(
+                    self._pybullet_client, self.robot.end_effector
+                )
+            )
+
+        state["targets"] = []
+        if self._target_ids:
+            for target_id in self._target_ids:
+                state["targets"].append(
+                    ObjState.get_bullet_state(self._pybullet_client, target_id)
+                )
+
+        state["objects"] = []
+        for obj_id in self.get_obj_ids():
+            state["objects"].append(
+                ObjState.get_bullet_state(self._pybullet_client, obj_id)
+            )
+
+        return state
+
+    def set_pybullet_state(self, state):
+        """Restore pyullet state.
+
+        WARNING: py_environment wrapper assumes environments aren't reset in their
+        constructor and will often reset the environment unintentionally. It is
+        always recommeneded that you call env.reset on the tfagents wrapper before
+        playback (replaying pybullet_state).
+
+        Args:
+          state: dict containing 'robots', 'robot_end_effectors', 'targets',
+            'objects', each containing a list of ObjState.
+        """
+
+        assert isinstance(state["robots"][0], XarmState)
+        xarm_state: XarmState = state["robots"][0]
+        xarm_state.set_bullet_state(self._pybullet_client, self.robot.xarm)
+        self._set_robot_target_effector_pose(xarm_state.target_effector_pose)
+
+        def _set_state_safe(obj_state, obj_id):
+            if obj_state is not None:
+                assert obj_id is not None, "Cannot set state for missing object."
+                obj_state.set_bullet_state(self._pybullet_client, obj_id)
+            else:
+                assert obj_id is None, f"No state found for obj_id {obj_id}"
+
+        robot_end_effectors = state["robot_end_effectors"]
+        _set_state_safe(
+            None if not robot_end_effectors else robot_end_effectors[0],
+            self.robot.end_effector,
+        )
+
+        for target_state, target_id in zip(state["targets"], self._target_ids):
+            _set_state_safe(target_state, target_id)
+
+        obj_ids = self.get_obj_ids()
+        assert len(state["objects"]) == len(obj_ids), "State length mismatch"
+        for obj_state, obj_id in zip(state["objects"], obj_ids):
+            _set_state_safe(obj_state, obj_id)
+
+        self.reset(reset_poses=False)
+
+
+class BlockPushHorizontalMultimodal(BlockPushMultimodal):
+    def _reset_object_poses(self, workspace_center_x, workspace_center_y):
+        # Reset block poses.
+        self._reset_block_poses(workspace_center_y)
+
+        # Reset target poses.
+        self._reset_target_poses(workspace_center_y)
+
+    def _reset_block_poses(self, workspace_center_y):
+        """Resets block poses."""
+
+        # Helper for choosing random block position.
+        def _reset_block_pose(idx, add=0.0, avoid=None):
+            def _get_random_translation():
+                block_x = 0.35 + 0.5 * self._rng.uniform(
+                    low=-RANDOM_X_SHIFT, high=RANDOM_X_SHIFT
+                )
+                block_y = (
+                    workspace_center_y
+                    + add
+                    + 0.5 * self._rng.uniform(low=-RANDOM_Y_SHIFT, high=RANDOM_Y_SHIFT)
+                )
+                block_translation = np.array([block_x, block_y, 0])
+                return block_translation
+
+            if avoid is None:
+                block_translation = _get_random_translation()
+            else:
+                # Reject targets too close to `avoid`.
+                for _ in range(NUM_RESET_ATTEMPTS):
+                    block_translation = _get_random_translation()
+                    dist = np.linalg.norm(block_translation[0] - avoid[0])
+                    # print('block inner try_idx %d, dist %.3f' % (try_idx, dist))
+                    if dist > MIN_BLOCK_DIST:
+                        break
+            block_sampled_angle = self._rng.uniform(math.pi)
+            block_rotation = transform.Rotation.from_rotvec([0, 0, block_sampled_angle])
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._block_ids[idx],
+                block_translation.tolist(),
+                block_rotation.as_quat().tolist(),
+            )
+            return block_translation
+
+        # Reject targets too close to `avoid`.
+        for _ in range(NUM_RESET_ATTEMPTS):
+            # Reset first block.
+            add = 0.2 * self._rng.choice([-1, 1])
+            b0_translation = _reset_block_pose(0, add=add)
+            # Reset second block away from first block.
+            b1_translation = _reset_block_pose(1, add=-add, avoid=b0_translation)
+            dist = np.linalg.norm(b0_translation[0] - b1_translation[0])
+            if dist > MIN_BLOCK_DIST:
+                break
+        else:
+            raise ValueError("could not find matching block")
+        assert dist > MIN_BLOCK_DIST
+
+    def _reset_target_poses(self, workspace_center_y):
+        """Resets target poses."""
+
+        def _reset_target_pose(idx, add=0.0, avoid=None):
+            def _get_random_translation():
+                # Choose x,y randomly.
+                target_x = 0.5 + self._rng.uniform(
+                    low=-0.05 * RANDOM_X_SHIFT, high=0.05 * RANDOM_X_SHIFT
+                )
+                target_y = (
+                    workspace_center_y
+                    + add
+                    + self._rng.uniform(
+                        low=-0.05 * RANDOM_Y_SHIFT, high=0.05 * RANDOM_Y_SHIFT
+                    )
+                )
+                target_translation = np.array([target_x, target_y, 0.020])
+                return target_translation
+
+            if avoid is None:
+                target_translation = _get_random_translation()
+            else:
+                # Reject targets too close to `avoid`.
+                for _ in range(NUM_RESET_ATTEMPTS):
+                    target_translation = _get_random_translation()
+                    dist = np.linalg.norm(target_translation[0] - avoid[0])
+                    # print('target inner try_idx %d, dist %.3f' % (try_idx, dist))
+                    if dist > MIN_TARGET_DIST:
+                        break
+            target_sampled_angle = math.pi + self._rng.uniform(
+                low=-math.pi / 30, high=math.pi / 30
+            )
+            target_rotation = transform.Rotation.from_rotvec(
+                [0, 0, target_sampled_angle]
+            )
+            self._pybullet_client.resetBasePositionAndOrientation(
+                self._target_ids[idx],
+                target_translation.tolist(),
+                target_rotation.as_quat().tolist(),
+            )
+            self._target_poses[idx] = Pose3d(
+                rotation=target_rotation, translation=target_translation
+            )
+
+        if self._target_poses is None:
+            self._target_poses = [None for _ in range(len(self._target_ids))]
+
+        for _ in range(NUM_RESET_ATTEMPTS):
+            # Choose the first target.
+            add = 0.2 * self._rng.choice([-1, 1])
+            # Randomly flip the location of the targets.
+            _reset_target_pose(0, add=add)
+            _reset_target_pose(1, add=-add, avoid=self._target_poses[0].translation)
+            dist = np.linalg.norm(
+                self._target_poses[0].translation[0]
+                - self._target_poses[1].translation[0]
+            )
+            break
+            # if dist > MIN_TARGET_DIST:
+            #     break
+        else:
+            raise ValueError("could not find matching target")
+        # assert dist > MIN_TARGET_DIST
+
+
+if "BlockPushMultimodal-v0" in registration.registry.env_specs:
+    del registration.registry.env_specs["BlockPushMultimodal-v0"]
+
+registration.register(
+    id="BlockPushMultimodal-v0", entry_point=BlockPushMultimodal, max_episode_steps=350
+)
+
+registration.register(
+    id="BlockPushMultimodalFlipped-v0",
+    entry_point=BlockPushHorizontalMultimodal,
+    max_episode_steps=25,
+)
+
+registration.register(
+    id="SharedBlockPushMultimodal-v0",
+    entry_point=BlockPushMultimodal,
+    kwargs=dict(shared_memory=True),
+    max_episode_steps=350,
+)
+registration.register(
+    id="BlockPushMultimodalRgb-v0",
+    entry_point=BlockPushMultimodal,
+    max_episode_steps=350,
+    kwargs=dict(image_size=(block_pushing.IMAGE_HEIGHT, block_pushing.IMAGE_WIDTH)),
+)
diff --git a/diffusion_policy/env/block_pushing/oracles/discontinuous_push_oracle.py b/diffusion_policy/env/block_pushing/oracles/discontinuous_push_oracle.py
new file mode 100644
index 0000000..2fd074b
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/oracles/discontinuous_push_oracle.py
@@ -0,0 +1,70 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Pushes to first target, waits, then pushes to second target."""
+
+import diffusion_policy.env.block_pushing.oracles.oriented_push_oracle as oriented_push_oracle_module
+import numpy as np
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
+from tf_agents.typing import types
+
+# Only used for debug visualization.
+import pybullet  # pylint: disable=unused-import
+
+
+class DiscontinuousOrientedPushOracle(oriented_push_oracle_module.OrientedPushOracle):
+    """Pushes to first target, waits, then pushes to second target."""
+
+    def __init__(self, env, goal_tolerance=0.04, wait=0):
+        super(DiscontinuousOrientedPushOracle, self).__init__(env)
+        self._countdown = 0
+        self._wait = wait
+        self._goal_dist_tolerance = goal_tolerance
+
+    def reset(self):
+        self.phase = "move_to_pre_block"
+        self._countdown = 0
+
+    def _action(self, time_step, policy_state):
+        if time_step.is_first():
+            self.reset()
+            # Move to first target first.
+            self._current_target = "target"
+            self._has_switched = False
+
+        def _block_target_dist(block, target):
+            dist = np.linalg.norm(
+                time_step.observation["%s_translation" % block]
+                - time_step.observation["%s_translation" % target]
+            )
+            return dist
+
+        d1 = _block_target_dist("block", "target")
+        if d1 < self._goal_dist_tolerance and not self._has_switched:
+            self._countdown = self._wait
+            # If first block has been pushed to first target, switch to second block.
+            self._has_switched = True
+            self._current_target = "target2"
+
+        xy_delta = self._get_action_for_block_target(
+            time_step, block="block", target=self._current_target
+        )
+
+        if self._countdown > 0:
+            xy_delta = np.zeros_like(xy_delta)
+            self._countdown -= 1
+
+        return policy_step.PolicyStep(action=np.asarray(xy_delta, dtype=np.float32))
diff --git a/diffusion_policy/env/block_pushing/oracles/multimodal_push_oracle.py b/diffusion_policy/env/block_pushing/oracles/multimodal_push_oracle.py
new file mode 100644
index 0000000..29a6386
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/oracles/multimodal_push_oracle.py
@@ -0,0 +1,187 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Oracle for multimodal pushing task."""
+import diffusion_policy.env.block_pushing.oracles.oriented_push_oracle as oriented_push_oracle_module
+import numpy as np
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
+from tf_agents.typing import types
+
+# Only used for debug visualization.
+import pybullet  # pylint: disable=unused-import
+
+
+class MultimodalOrientedPushOracle(oriented_push_oracle_module.OrientedPushOracle):
+    """Oracle for multimodal pushing task."""
+
+    def __init__(self, env, goal_dist_tolerance=0.04, action_noise_std=0.0):
+        super(MultimodalOrientedPushOracle, self).__init__(env)
+        self._goal_dist_tolerance = goal_dist_tolerance
+        self._action_noise_std = action_noise_std
+
+    def reset(self):
+        self.origin = None
+        self.first_preblock = None
+        self.phase = "move_to_pre_block"
+
+    def _get_move_to_preblock(self, xy_pre_block, xy_ee):
+        max_step_velocity = 0.3
+        # Go 5 cm away from the block, on the line between the block and target.
+        xy_delta_to_preblock = xy_pre_block - xy_ee
+        diff = np.linalg.norm(xy_delta_to_preblock)
+        if diff < 0.001:
+            self.phase = "move_to_block"
+            if self.first_preblock is None:
+                self.first_preblock = np.copy(xy_pre_block)
+        xy_delta = xy_delta_to_preblock
+        return xy_delta, max_step_velocity
+
+    def _get_action_for_block_target(self, time_step, block="block", target="target"):
+        # Specifying this as velocity makes it independent of control frequency.
+        max_step_velocity = 0.35
+
+        info = self._get_action_info(time_step, block, target)
+
+        if self.origin is None:
+            self.origin = np.copy(info.xy_ee)
+
+        if self.phase == "move_to_pre_block":
+            xy_delta, max_step_velocity = self._get_move_to_preblock(
+                info.xy_pre_block, info.xy_ee
+            )
+
+        if self.phase == "return_to_first_preblock":
+            max_step_velocity = 0.3
+            if self.first_preblock is None:
+                self.first_preblock = self.origin
+            # Return to the first preblock.
+            xy_delta_to_origin = self.first_preblock - info.xy_ee
+            diff = np.linalg.norm(xy_delta_to_origin)
+            if diff < 0.001:
+                self.phase = "return_to_origin"
+            xy_delta = xy_delta_to_origin
+
+        if self.phase == "return_to_origin":
+            max_step_velocity = 0.3
+            # Go 5 cm away from the block, on the line between the block and target.
+            xy_delta_to_origin = self.origin - info.xy_ee
+            diff = np.linalg.norm(xy_delta_to_origin)
+            if diff < 0.001:
+                self.phase = "move_to_pre_block"
+            xy_delta = xy_delta_to_origin
+
+        if self.phase == "move_to_block":
+            xy_delta = self._get_move_to_block(
+                info.xy_delta_to_nexttoblock,
+                info.theta_threshold_to_orient,
+                info.theta_error,
+            )
+
+        if self.phase == "push_block":
+            xy_delta = self._get_push_block(
+                info.theta_error,
+                info.theta_threshold_to_orient,
+                info.xy_delta_to_touchingblock,
+            )
+
+        orient_circle_diameter = 0.025
+
+        if self.phase == "orient_block_left" or self.phase == "orient_block_right":
+            max_step_velocity = 0.15
+
+        if self.phase == "orient_block_left":
+            xy_delta = self._get_orient_block_left(
+                info.xy_dir_block_to_ee,
+                orient_circle_diameter,
+                info.xy_block,
+                info.xy_ee,
+                info.theta_error,
+                info.theta_threshold_flat_enough,
+            )
+
+        if self.phase == "orient_block_right":
+            xy_delta = self._get_orient_block_right(
+                info.xy_dir_block_to_ee,
+                orient_circle_diameter,
+                info.xy_block,
+                info.xy_ee,
+                info.theta_error,
+                info.theta_threshold_flat_enough,
+            )
+
+        if self._action_noise_std != 0.0:
+            xy_delta += self._np_random_state.randn(2) * self._action_noise_std
+
+        max_step_distance = max_step_velocity * (1 / self._env.get_control_frequency())
+        length = np.linalg.norm(xy_delta)
+        if length > max_step_distance:
+            xy_direction = xy_delta / length
+            xy_delta = xy_direction * max_step_distance
+        return xy_delta
+
+    def _choose_goal_order(self):
+        """Chooses block->target order for multimodal pushing."""
+        # Define all possible ((first_block, first_target),
+        # (second_block, second_target)).
+        possible_orders = [
+            (("block", "target"), ("block2", "target2")),
+            (("block", "target2"), ("block2", "target")),
+            (("block2", "target"), ("block", "target2")),
+            (("block2", "target2"), ("block", "target")),
+        ]
+        # import pdb; pdb.set_trace()
+        # result = random.choice(possible_orders)
+        result = possible_orders[self._env._rng.choice(len(possible_orders))]
+        return result
+
+    def _action(self, time_step, policy_state):
+        if time_step.is_first():
+            self.reset()
+            (
+                (self._first_block, self._first_target),
+                (self._second_block, self._second_target),
+            ) = self._choose_goal_order()
+            self._current_block, self._current_target = (
+                self._first_block,
+                self._first_target,
+            )
+            self._has_switched = False
+
+        def _block_target_dist(block, target):
+            dist = np.linalg.norm(
+                time_step.observation["%s_translation" % block]
+                - time_step.observation["%s_translation" % target]
+            )
+            return dist
+
+        if (
+            _block_target_dist(self._first_block, self._first_target)
+            < self._goal_dist_tolerance
+            and not self._has_switched
+        ):
+            # If first block has been pushed to first target, switch to second block.
+            self._current_block, self._current_target = (
+                self._second_block,
+                self._second_target,
+            )
+            self._has_switched = True
+            self.phase = "return_to_first_preblock"
+
+        xy_delta = self._get_action_for_block_target(
+            time_step, block=self._current_block, target=self._current_target
+        )
+
+        return policy_step.PolicyStep(action=np.asarray(xy_delta, dtype=np.float32))
diff --git a/diffusion_policy/env/block_pushing/oracles/oriented_push_oracle.py b/diffusion_policy/env/block_pushing/oracles/oriented_push_oracle.py
new file mode 100644
index 0000000..39c4c6a
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/oracles/oriented_push_oracle.py
@@ -0,0 +1,258 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Oracle for pushing task which orients the block then pushes it."""
+
+import diffusion_policy.env.block_pushing.oracles.pushing_info as pushing_info_module
+import numpy as np
+from tf_agents.policies import py_policy
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
+from tf_agents.typing import types
+
+# Only used for debug visualization.
+import pybullet  # pylint: disable=unused-import
+
+
+class OrientedPushOracle(py_policy.PyPolicy):
+    """Oracle for pushing task which orients the block then pushes it."""
+
+    def __init__(self, env, action_noise_std=0.0):
+        super(OrientedPushOracle, self).__init__(
+            env.time_step_spec(), env.action_spec()
+        )
+        self._env = env
+        self._np_random_state = np.random.RandomState(0)
+        self.phase = "move_to_pre_block"
+        self._action_noise_std = action_noise_std
+
+    def reset(self):
+        self.phase = "move_to_pre_block"
+
+    def get_theta_from_vector(self, vector):
+        return np.arctan2(vector[1], vector[0])
+
+    def theta_to_rotation2d(self, theta):
+        r = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
+        return r
+
+    def rotate(self, theta, xy_dir_block_to_ee):
+        rot_2d = self.theta_to_rotation2d(theta)
+        return rot_2d @ xy_dir_block_to_ee
+
+    def _get_action_info(self, time_step, block, target):
+        xy_block = time_step.observation["%s_translation" % block][:2]
+        theta_block = time_step.observation["%s_orientation" % block]
+        xy_target = time_step.observation["%s_translation" % target][:2]
+        xy_ee = time_step.observation["effector_target_translation"][:2]
+
+        xy_block_to_target = xy_target - xy_block
+        xy_dir_block_to_target = (xy_block_to_target) / np.linalg.norm(
+            xy_block_to_target
+        )
+        theta_to_target = self.get_theta_from_vector(xy_dir_block_to_target)
+
+        theta_error = theta_to_target - theta_block
+        # Block has 4-way symmetry.
+        while theta_error > np.pi / 4:
+            theta_error -= np.pi / 2.0
+        while theta_error < -np.pi / 4:
+            theta_error += np.pi / 2.0
+
+        xy_pre_block = xy_block + -xy_dir_block_to_target * 0.05
+        xy_nexttoblock = xy_block + -xy_dir_block_to_target * 0.03
+        xy_touchingblock = xy_block + -xy_dir_block_to_target * 0.01
+        xy_delta_to_nexttoblock = xy_nexttoblock - xy_ee
+        xy_delta_to_touchingblock = xy_touchingblock - xy_ee
+
+        xy_block_to_ee = xy_ee - xy_block
+        xy_dir_block_to_ee = xy_block_to_ee / np.linalg.norm(xy_block_to_ee)
+
+        theta_threshold_to_orient = 0.2
+        theta_threshold_flat_enough = 0.03
+        return pushing_info_module.PushingInfo(
+            xy_block=xy_block,
+            xy_ee=xy_ee,
+            xy_pre_block=xy_pre_block,
+            xy_delta_to_nexttoblock=xy_delta_to_nexttoblock,
+            xy_delta_to_touchingblock=xy_delta_to_touchingblock,
+            xy_dir_block_to_ee=xy_dir_block_to_ee,
+            theta_threshold_to_orient=theta_threshold_to_orient,
+            theta_threshold_flat_enough=theta_threshold_flat_enough,
+            theta_error=theta_error,
+        )
+
+    def _get_move_to_preblock(self, xy_pre_block, xy_ee):
+        max_step_velocity = 0.3
+        # Go 5 cm away from the block, on the line between the block and target.
+        xy_delta_to_preblock = xy_pre_block - xy_ee
+        diff = np.linalg.norm(xy_delta_to_preblock)
+        if diff < 0.001:
+            self.phase = "move_to_block"
+        xy_delta = xy_delta_to_preblock
+        return xy_delta, max_step_velocity
+
+    def _get_move_to_block(
+        self, xy_delta_to_nexttoblock, theta_threshold_to_orient, theta_error
+    ):
+        diff = np.linalg.norm(xy_delta_to_nexttoblock)
+        if diff < 0.001:
+            self.phase = "push_block"
+        # If need to re-oorient, then re-orient.
+        if theta_error > theta_threshold_to_orient:
+            self.phase = "orient_block_left"
+        if theta_error < -theta_threshold_to_orient:
+            self.phase = "orient_block_right"
+        # Otherwise, push into the block.
+        xy_delta = xy_delta_to_nexttoblock
+        return xy_delta
+
+    def _get_push_block(
+        self, theta_error, theta_threshold_to_orient, xy_delta_to_touchingblock
+    ):
+        # If need to reorient, go back to move_to_pre_block, move_to_block first.
+        if theta_error > theta_threshold_to_orient:
+            self.phase = "move_to_pre_block"
+        if theta_error < -theta_threshold_to_orient:
+            self.phase = "move_to_pre_block"
+        xy_delta = xy_delta_to_touchingblock
+        return xy_delta
+
+    def _get_orient_block_left(
+        self,
+        xy_dir_block_to_ee,
+        orient_circle_diameter,
+        xy_block,
+        xy_ee,
+        theta_error,
+        theta_threshold_flat_enough,
+    ):
+        xy_dir_block_to_ee = self.rotate(0.2, xy_dir_block_to_ee)
+        xy_block_to_ee = xy_dir_block_to_ee * orient_circle_diameter
+        xy_push_left_spot = xy_block + xy_block_to_ee
+        xy_delta = xy_push_left_spot - xy_ee
+        if theta_error < theta_threshold_flat_enough:
+            self.phase = "move_to_pre_block"
+        return xy_delta
+
+    def _get_orient_block_right(
+        self,
+        xy_dir_block_to_ee,
+        orient_circle_diameter,
+        xy_block,
+        xy_ee,
+        theta_error,
+        theta_threshold_flat_enough,
+    ):
+        xy_dir_block_to_ee = self.rotate(-0.2, xy_dir_block_to_ee)
+        xy_block_to_ee = xy_dir_block_to_ee * orient_circle_diameter
+        xy_push_left_spot = xy_block + xy_block_to_ee
+        xy_delta = xy_push_left_spot - xy_ee
+        if theta_error > -theta_threshold_flat_enough:
+            self.phase = "move_to_pre_block"
+        return xy_delta
+
+    def _get_action_for_block_target(self, time_step, block="block", target="target"):
+        # Specifying this as velocity makes it independent of control frequency.
+        max_step_velocity = 0.35
+        info = self._get_action_info(time_step, block, target)
+
+        if self.phase == "move_to_pre_block":
+            xy_delta, max_step_velocity = self._get_move_to_preblock(
+                info.xy_pre_block, info.xy_ee
+            )
+
+        if self.phase == "move_to_block":
+            xy_delta = self._get_move_to_block(
+                info.xy_delta_to_nexttoblock,
+                info.theta_threshold_to_orient,
+                info.theta_error,
+            )
+
+        if self.phase == "push_block":
+            xy_delta = self._get_push_block(
+                info.theta_error,
+                info.theta_threshold_to_orient,
+                info.xy_delta_to_touchingblock,
+            )
+
+        orient_circle_diameter = 0.025
+
+        if self.phase == "orient_block_left" or self.phase == "orient_block_right":
+            max_step_velocity = 0.15
+
+        if self.phase == "orient_block_left":
+            xy_delta = self._get_orient_block_left(
+                info.xy_dir_block_to_ee,
+                orient_circle_diameter,
+                info.xy_block,
+                info.xy_ee,
+                info.theta_error,
+                info.theta_threshold_flat_enough,
+            )
+
+        if self.phase == "orient_block_right":
+            xy_delta = self._get_orient_block_right(
+                info.xy_dir_block_to_ee,
+                orient_circle_diameter,
+                info.xy_block,
+                info.xy_ee,
+                info.theta_error,
+                info.theta_threshold_flat_enough,
+            )
+
+        if self._action_noise_std != 0.0:
+            xy_delta += self._np_random_state.randn(2) * self._action_noise_std
+
+        max_step_distance = max_step_velocity * (1 / self._env.get_control_frequency())
+        length = np.linalg.norm(xy_delta)
+        if length > max_step_distance:
+            xy_direction = xy_delta / length
+            xy_delta = xy_direction * max_step_distance
+        return xy_delta
+
+    def _action(self, time_step, policy_state):
+        if time_step.is_first():
+            self.reset()
+        xy_delta = self._get_action_for_block_target(
+            time_step, block="block", target="target"
+        )
+        return policy_step.PolicyStep(action=np.asarray(xy_delta, dtype=np.float32))
+
+
+class OrientedPushNormalizedOracle(py_policy.PyPolicy):
+    """Oracle for pushing task which orients the block then pushes it."""
+
+    def __init__(self, env):
+        super(OrientedPushNormalizedOracle, self).__init__(
+            env.time_step_spec(), env.action_spec()
+        )
+        self._oracle = OrientedPushOracle(env)
+        self._env = env
+
+    def reset(self):
+        self._oracle.reset()
+
+    def _action(self, time_step, policy_state):
+        time_step = time_step._asdict()
+        time_step["observation"] = self._env.calc_unnormalized_state(
+            time_step["observation"]
+        )
+        step = self._oracle._action(
+            ts.TimeStep(**time_step), policy_state
+        )  # pylint: disable=protected-access
+        return policy_step.PolicyStep(
+            action=self._env.calc_normalized_action(step.action)
+        )
diff --git a/diffusion_policy/env/block_pushing/oracles/pushing_info.py b/diffusion_policy/env/block_pushing/oracles/pushing_info.py
new file mode 100644
index 0000000..0c01cbb
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/oracles/pushing_info.py
@@ -0,0 +1,35 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Dataclass holding info needed for pushing oracles."""
+import dataclasses
+from typing import Any
+
+
+@dataclasses.dataclass
+class PushingInfo:
+    """Holds onto info necessary for pushing state machine."""
+
+    xy_block: Any = None
+    xy_ee: Any = None
+    xy_pre_block: Any = None
+    xy_delta_to_nexttoblock: Any = None
+    xy_delta_to_touchingblock: Any = None
+    xy_dir_block_to_ee: Any = None
+    theta_threshold_to_orient: Any = None
+    theta_threshold_flat_enough: Any = None
+    theta_error: Any = None
+    obstacle_poses: Any = None
+    distance_to_target: Any = None
diff --git a/diffusion_policy/env/block_pushing/oracles/reach_oracle.py b/diffusion_policy/env/block_pushing/oracles/reach_oracle.py
new file mode 100644
index 0000000..8b7746c
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/oracles/reach_oracle.py
@@ -0,0 +1,61 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Reach oracle."""
+import numpy as np
+from tf_agents.policies import py_policy
+from tf_agents.trajectories import policy_step
+from tf_agents.trajectories import time_step as ts
+from tf_agents.typing import types
+
+# Only used for debug visualization.
+import pybullet  # pylint: disable=unused-import
+
+
+class ReachOracle(py_policy.PyPolicy):
+    """Oracle for moving to a specific spot relative to the block and target."""
+
+    def __init__(self, env, block_pushing_oracles_action_std=0.0):
+        super(ReachOracle, self).__init__(env.time_step_spec(), env.action_spec())
+        self._env = env
+        self._np_random_state = np.random.RandomState(0)
+        self._block_pushing_oracles_action_std = block_pushing_oracles_action_std
+
+    def _action(self, time_step, policy_state):
+
+        # Specifying this as velocity makes it independent of control frequency.
+        max_step_velocity = 0.2
+
+        xy_ee = time_step.observation["effector_target_translation"]
+
+        # This should be observable from block and target translation,
+        # but re-using the computation from the env so that it's only done once, and
+        # used for reward / completion computation.
+        xy_pre_block = self._env.reach_target_translation
+
+        xy_delta = xy_pre_block - xy_ee
+
+        if self._block_pushing_oracles_action_std != 0.0:
+            xy_delta += (
+                self._np_random_state.randn(2) * self._block_pushing_oracles_action_std
+            )
+
+        max_step_distance = max_step_velocity * (1 / self._env.get_control_frequency())
+        length = np.linalg.norm(xy_delta)
+        if length > max_step_distance:
+            xy_direction = xy_delta / length
+            xy_delta = xy_direction * max_step_distance
+
+        return policy_step.PolicyStep(action=np.asarray(xy_delta, dtype=np.float32))
diff --git a/diffusion_policy/env/block_pushing/utils/pose3d.py b/diffusion_policy/env/block_pushing/utils/pose3d.py
new file mode 100644
index 0000000..a25beeb
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/utils/pose3d.py
@@ -0,0 +1,70 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A simple 6DOF pose container.
+"""
+
+import dataclasses
+import numpy as np
+from scipy.spatial import transform
+
+
+class NoCopyAsDict(object):
+    """Base class for dataclasses. Avoids a copy in the asdict() call."""
+
+    def asdict(self):
+        """Replacement for dataclasses.asdict.
+
+        TF Dataset does not handle dataclasses.asdict, which uses copy.deepcopy when
+        setting values in the output dict. This causes issues with tf.Dataset.
+        Instead, shallow copy contents.
+
+        Returns:
+          dict containing contents of dataclass.
+        """
+        return {k.name: getattr(self, k.name) for k in dataclasses.fields(self)}
+
+
+@dataclasses.dataclass
+class Pose3d(NoCopyAsDict):
+    """Simple container for translation and rotation."""
+
+    rotation: transform.Rotation
+    translation: np.ndarray
+
+    @property
+    def vec7(self):
+        return np.concatenate([self.translation, self.rotation.as_quat()])
+
+    def serialize(self):
+        return {
+            "rotation": self.rotation.as_quat().tolist(),
+            "translation": self.translation.tolist(),
+        }
+
+    @staticmethod
+    def deserialize(data):
+        return Pose3d(
+            rotation=transform.Rotation.from_quat(data["rotation"]),
+            translation=np.array(data["translation"]),
+        )
+
+    def __eq__(self, other):
+        return np.array_equal(
+            self.rotation.as_quat(), other.rotation.as_quat()
+        ) and np.array_equal(self.translation, other.translation)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
diff --git a/diffusion_policy/env/block_pushing/utils/utils_pybullet.py b/diffusion_policy/env/block_pushing/utils/utils_pybullet.py
new file mode 100644
index 0000000..d9a664d
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/utils/utils_pybullet.py
@@ -0,0 +1,451 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Assortment of utilities to interact with bullet within g3."""
+import dataclasses
+import datetime
+import getpass
+import gzip
+import json
+import os
+import time
+from typing import Any, Dict, List, Optional, Tuple
+
+from absl import logging
+from diffusion_policy.env.block_pushing.utils.pose3d import Pose3d
+import numpy as np
+from scipy.spatial import transform
+import six
+
+
+import pybullet
+import pybullet_data
+import pybullet_utils.bullet_client as bullet_client
+
+Vec3 = Tuple[float, float, float]
+Vec4 = Tuple[float, float, float, float]
+PYBULLET_STATE_VERSION = 2  # Basic versioning of serialized pybullet state.
+
+
+# Note about rotation_to_matrix and matrix_to_rotation below:
+# The abstractions below allow us to use older versions of scipy.
+def rotation_to_matrix(rotation):
+    if hasattr(rotation, "as_dcm"):
+        return rotation.as_dcm()
+    else:
+        assert hasattr(rotation, "as_matrix")
+        return rotation.as_matrix()
+
+
+def matrix_to_rotation(matrix):
+    if hasattr(transform.Rotation, "from_dcm"):
+        return transform.Rotation.from_dcm(matrix)
+    else:
+        assert hasattr(transform.Rotation, "from_matrix")
+        return transform.Rotation.from_matrix(matrix)
+
+
+def load_urdf(pybullet_client, file_path, *args, **kwargs):
+    """Loads the given URDF filepath."""
+
+    # Handles most general file open case.
+    try:
+        if os.path.exists(file_path):
+            return pybullet_client.loadURDF(file_path, *args, **kwargs)
+    except pybullet_client.error:
+        pass
+
+    try:
+        import pathlib
+        asset_path = str(pathlib.Path(__file__).parent.parent.joinpath('assets'))
+        if file_path.startswith("third_party/py/envs/assets/"):
+            pybullet_client.setAdditionalSearchPath(asset_path)
+            file_path = file_path[len("third_party/py/envs/assets/") :]
+        if file_path.startswith(
+            "third_party/bullet/examples/pybullet/gym/pybullet_data/"
+            ):
+            pybullet_client.setAdditionalSearchPath(pybullet_data.getDataPath())
+            file_path = file_path[55:]
+        # logging.info("Loading URDF %s", file_path)
+        return pybullet_client.loadURDF(file_path, *args, **kwargs)
+    except pybullet.error:
+        raise FileNotFoundError("Cannot load the URDF file {}".format(file_path))
+
+
+def add_visual_sphere(client, center=(0, 0, 0), radius=0.1, rgba=(0.5, 0.5, 0.5, 0.5)):
+    """Add a sphere to bullet scene (visual only, no physics).
+
+    Args:
+      client: pybullet client (or pybullet library handle).
+      center: Center of sphere.
+      radius: Sphere radius.
+      rgba: rgba color of sphere.
+
+    Returns:
+      Unique integer bullet id of constructed object.
+    """
+    vis_obj_id = client.createVisualShape(
+        client.GEOM_SPHERE, radius=radius, rgbaColor=rgba
+    )
+    obj_id = client.createMultiBody(
+        baseCollisionShapeIndex=-1, baseVisualShapeIndex=vis_obj_id, basePosition=center
+    )
+    return obj_id
+
+
+def pybullet_mat_to_numpy_4x4(pybullet_matrix):
+    assert len(pybullet_matrix) == 16, "pybullet matrix should be len 16"
+    return np.transpose(np.reshape(np.array(pybullet_matrix, dtype=np.float64), (4, 4)))
+
+
+def decompose_view_matrix(pybullet_view_matrix):
+    """Decompose view matrix into pos + quat format (assumes mat is rigid!)."""
+    # It would be MUCH better to use something from bullet, however pybullet does
+    # not expose all of the linear algebra library.
+    mat = pybullet_mat_to_numpy_4x4(pybullet_view_matrix)
+
+    # View matrix is now:
+    # | R_11 R_12 R_13 t_1 |
+    # | R_21 R_22 R_23 t_2 |
+    # | R_31 R_32 R_33 t_3 |
+    # |    0    0    0   1 |
+
+    # R is the inverse eye to target at orientation, and t is R * eye.
+    mat_view_to_world = np.linalg.inv(mat)
+
+    # mat_view_to_world is the view to world transform, therefore the translation
+    # component of this matrix is simply the world space position (since mat *
+    # (0, 0, 0, 1)) is just copying the right column.
+    world_xyz_view = np.copy(mat_view_to_world[0:3, 3])
+
+    mat_view_to_world[0:3, 3] = 0  # Zero out the position change.
+    world_quat_view = matrix_to_rotation(mat_view_to_world).as_quat()
+
+    return world_xyz_view, world_quat_view
+
+
+def world_obj_to_view(world_xyz_obj, world_quat_obj, camera_view, client):
+    """Transform object into view space."""
+    world_xyz_view, world_quat_view = decompose_view_matrix(camera_view)
+    view_xyz_world, view_quat_world = client.invertTransform(
+        world_xyz_view, world_quat_view
+    )
+    view_xyz_obj, view_quat_obj = client.multiplyTransforms(
+        view_xyz_world, view_quat_world, world_xyz_obj, world_quat_obj
+    )
+
+    return view_xyz_obj, view_quat_obj
+
+
+def image_xy_to_view_ray(xy, cam_width, cam_height, proj_mat_inv):
+    """Calculate view-space ray from pixel location."""
+    # Recall (from http://www.songho.ca/opengl/gl_projectionmatrix.html):
+    # xyzw_clip = M_proj * xyzw_eye, and
+    # xyz_ndc = xyzw_clip[0:3] / xwzw_clip[3].
+    xyz_ndc = np.array(
+        [2.0 * xy[0] / cam_width - 1.0, -(2.0 * xy[1] / cam_height - 1.0), 0]
+    )  # in [-1, 1]
+    xyzw_clip = np.concatenate([xyz_ndc, [1]])
+    xyzw_eye = proj_mat_inv @ xyzw_clip
+    origin = np.zeros(3)
+    vec = xyzw_eye[:3] / max(np.linalg.norm(xyzw_eye[:3]), 1e-6)
+    return origin, vec
+
+
+def view_ray_to_world_ray(origin, vec, view_mat_inv):
+    """Transform view-space ray into world space."""
+    origin = view_mat_inv @ np.concatenate([origin, [1]])
+    vec = view_mat_inv @ np.concatenate([vec, [0]])
+
+    return origin[:3], vec[:3]
+
+
+def ray_to_plane_test(ray_origin, ray_vec, plane_origin, plane_normal):
+    """Perform a ray-plane intersection test."""
+    ln = np.dot(plane_normal, ray_vec)
+    if abs(ln) < np.finfo(np.float32).eps:
+        return None
+
+    # Solve for the intersection fraction t.
+    t = np.dot(plane_normal, plane_origin - ray_origin) / ln
+    if t >= 0:
+        return ray_origin + ray_vec * t
+    else:
+        return None
+
+
+def get_workspace(env):
+    (
+        workspace_origin,
+        workspace_quat,
+    ) = env.pybullet_client.getBasePositionAndOrientation(env.workspace_uid)
+    workspace_normal = rotation_to_matrix(transform.Rotation.from_quat(workspace_quat))[
+        2, 0:3
+    ]
+
+    return workspace_origin, workspace_normal
+
+
+def reset_camera_pose(env, view_type):
+    """Reset camera pose to canonical frame."""
+    p = env.pybullet_client
+
+    if view_type == "POLICY":
+        camera_info = p.getDebugVisualizerCamera()
+        image_size = (camera_info[0], camera_info[1])
+
+        viewm, _, front_position, lookat, _ = env.calc_camera_params(image_size)
+
+        euler = matrix_to_rotation(pybullet_mat_to_numpy_4x4(viewm)[0:3, 0:3]).as_euler(
+            "xyz", degrees=False
+        )
+        pitch = euler[1]
+        yaw = -euler[2]
+        # The distance is a bit far away (the GL view has higher FOV).
+        distance = np.linalg.norm(front_position - lookat) * 0.6
+    elif view_type == "TOP_DOWN":
+        workspace_origin, _ = get_workspace(env)
+        distance = 0.5
+        lookat = workspace_origin
+        yaw = np.pi / 2
+        # Note: pi/2 pitch results in gimble lock and pybullet doesn't support it.
+        pitch = -(np.pi / 2 - 1e-5)
+    else:
+        raise ValueError("unsupported view_type %s" % view_type)
+    p.resetDebugVisualizerCamera(
+        cameraDistance=distance,
+        cameraYaw=360 * yaw / (2.0 * np.pi),
+        cameraPitch=360 * pitch / (2.0 * np.pi),
+        cameraTargetPosition=lookat,
+    )
+
+
+def _lists_to_tuple(obj):
+    if isinstance(obj, list):
+        return tuple([_lists_to_tuple(v) for v in obj])
+    else:
+        return obj
+
+
+@dataclasses.dataclass
+class ObjState:
+    """A container for storing pybullet object state."""
+
+    obj_id: int
+
+    # base_pose: (xyz, quat).
+    base_pose: Tuple[Vec3, Vec4]
+    # base_vel: (vel, ang_vel).
+    base_vel: Tuple[Vec3, Vec3]
+    joint_info: Any
+    joint_state: Any
+
+    @staticmethod
+    def get_bullet_state(client, obj_id):
+        """Read Pybullet internal state."""
+        base_pose = client.getBasePositionAndOrientation(obj_id)
+        base_vel = client.getBaseVelocity(obj_id)
+
+        joint_info = []
+        joint_state = []
+        for i in range(client.getNumJoints(obj_id)):
+            joint_state.append(client.getJointState(obj_id, i))
+            joint_info.append(ObjState._get_joint_info(client, obj_id, i))
+
+        return ObjState(
+            obj_id=obj_id,
+            base_pose=base_pose,
+            base_vel=base_vel,
+            joint_info=tuple(joint_info),
+            joint_state=tuple(joint_state),
+        )
+
+    @staticmethod
+    def _get_joint_info(client, obj_id, joint_index):
+        ji = client.getJointInfo(obj_id, joint_index)
+        return tuple([v if not isinstance(v, bytes) else v.decode("utf-8") for v in ji])
+
+    def set_bullet_state(self, client, obj_id):
+        """Hard set the current bullet state."""
+        xyz, quat = self.base_pose
+        client.resetBasePositionAndOrientation(obj_id, xyz, quat)
+        vel, ang_vel = self.base_vel
+        client.resetBaseVelocity(obj_id, vel, ang_vel)
+
+        njoints = client.getNumJoints(obj_id)
+        if njoints != len(self.joint_info) or njoints != len(self.joint_state):
+            raise ValueError("Incorrect number of joint info state pairs.")
+
+        for i, (joint_info, joint_state) in enumerate(
+            zip(self.joint_info, self.joint_state)
+        ):
+            joint_index = joint_info[0]
+            if joint_index != i:
+                raise ValueError("Joint index mismatch.")
+
+            # Check that the current joint we're trying to restore state for has the
+            # same info as the state joint.
+            cur_joint_info = ObjState._get_joint_info(client, obj_id, joint_index)
+            if cur_joint_info != joint_info:
+                raise ValueError(
+                    "joint_info mismatch %s vs %s (expected)"
+                    % (str(cur_joint_info), str(joint_info))
+                )
+            joint_position = joint_state[0]
+            joint_velocity = joint_state[1]
+            client.resetJointState(
+                obj_id, i, targetValue=joint_position, targetVelocity=joint_velocity
+            )
+
+    def serialize(self):
+        return {
+            "obj_id": self.obj_id,
+            "base_pose": self.base_pose,
+            "base_vel": self.base_vel,
+            "joint_info": self.joint_info,
+            "joint_state": self.joint_state,
+        }
+
+    @staticmethod
+    def deserialize(data):
+        return ObjState(
+            obj_id=_lists_to_tuple(data["obj_id"]),
+            base_pose=_lists_to_tuple(data["base_pose"]),
+            base_vel=_lists_to_tuple(data["base_vel"]),
+            joint_info=_lists_to_tuple(data["joint_info"]),
+            joint_state=_lists_to_tuple(data["joint_state"]),
+        )
+
+
+@dataclasses.dataclass
+class XarmState(ObjState):
+    """A container for storing pybullet robot state."""
+
+    # The set point of the robot's controller.
+    target_effector_pose: Pose3d
+    goal_translation: Optional[Vec3]
+
+    @staticmethod
+    def get_bullet_state(client, obj_id, target_effector_pose, goal_translation):
+        if goal_translation is not None:
+            goal_translation = tuple(goal_translation.tolist())
+        return XarmState(
+            **dataclasses.asdict(ObjState.get_bullet_state(client, obj_id)),
+            target_effector_pose=target_effector_pose,
+            goal_translation=goal_translation
+        )
+
+    def serialize(self):
+        data = ObjState.serialize(self)
+        data["target_effector_pose"] = self.target_effector_pose.serialize()
+        if self.goal_translation is not None:
+            data["goal_translation"] = self.goal_translation
+        else:
+            data["goal_translation"] = []
+        return data
+
+    @staticmethod
+    def deserialize(data):
+        goal_translation = (
+            None
+            if not data["goal_translation"]
+            else _lists_to_tuple(data["goal_translation"])
+        )
+        return XarmState(
+            obj_id=data["obj_id"],
+            base_pose=_lists_to_tuple(data["base_pose"]),
+            base_vel=_lists_to_tuple(data["base_vel"]),
+            joint_info=_lists_to_tuple(data["joint_info"]),
+            joint_state=_lists_to_tuple(data["joint_state"]),
+            goal_translation=goal_translation,
+            target_effector_pose=Pose3d.deserialize(data["target_effector_pose"]),
+        )
+
+
+def _serialize_pybullet_state(pybullet_state):
+    """Convert data to POD types."""
+    if isinstance(pybullet_state, list):
+        return [_serialize_pybullet_state(entry) for entry in pybullet_state]
+    elif isinstance(pybullet_state, dict):
+        assert "_serialized_obj_name" not in pybullet_state
+        return {
+            key: _serialize_pybullet_state(value)
+            for key, value in pybullet_state.items()
+        }
+    elif isinstance(pybullet_state, (XarmState, ObjState)):
+        return {
+            "_serialized_obj_name": type(pybullet_state).__name__,
+            "_serialized_data": pybullet_state.serialize(),
+        }
+    elif isinstance(pybullet_state, int):
+        return pybullet_state
+    else:
+        raise ValueError(
+            "Unhandled type for object %s, type %s"
+            % (str(pybullet_state), type(pybullet_state))
+        )
+
+
+def _deserialize_pybullet_state(state):
+    """Parse data from POD types."""
+    if isinstance(state, list):
+        return [_deserialize_pybullet_state(item) for item in state]
+    elif isinstance(state, dict):
+        if "_serialized_obj_name" in state:
+            if state["_serialized_obj_name"] == XarmState.__name__:
+                return XarmState.deserialize(state["_serialized_data"])
+            elif state["_serialized_obj_name"] == ObjState.__name__:
+                return ObjState.deserialize(state["_serialized_data"])
+            else:
+                raise ValueError("Unsupported: %s" % state["_serialized_obj_name"])
+        else:
+            return {
+                key: _deserialize_pybullet_state(value) for key, value in state.items()
+            }
+    elif isinstance(state, int):
+        return state
+    else:
+        raise ValueError("Unhandled type for object %s" % str(state))
+
+
+def write_pybullet_state(filename, pybullet_state, task, actions=None):
+    """Serialize pybullet state to json file."""
+    import torch
+    data = {
+        "pybullet_state": _serialize_pybullet_state(pybullet_state),
+        "state_version": PYBULLET_STATE_VERSION,
+        "ts_ms": int(time.mktime(datetime.datetime.now().timetuple())) * 1000,
+        "user": getpass.getuser(),
+        "task": task,
+        "actions": actions if actions is not None else [],
+    }
+    torch.save(data, filename)
+
+
+def read_pybullet_state(filename):
+    """Deserialize pybullet state from json file."""
+    import torch
+    data = torch.load(filename)
+
+    assert isinstance(data, dict)
+
+    if data["state_version"] != PYBULLET_STATE_VERSION:
+        raise ValueError(
+            "incompatible state data (version %d, expected %d)"
+            % (data["state_version"], PYBULLET_STATE_VERSION)
+        )
+
+    data["pybullet_state"] = _deserialize_pybullet_state(data["pybullet_state"])
+    return data
diff --git a/diffusion_policy/env/block_pushing/utils/xarm_sim_robot.py b/diffusion_policy/env/block_pushing/utils/xarm_sim_robot.py
new file mode 100644
index 0000000..600dd7e
--- /dev/null
+++ b/diffusion_policy/env/block_pushing/utils/xarm_sim_robot.py
@@ -0,0 +1,230 @@
+# coding=utf-8
+# Copyright 2022 The Reach ML Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""XArm Robot Kinematics."""
+from diffusion_policy.env.block_pushing.utils import utils_pybullet
+from diffusion_policy.env.block_pushing.utils.pose3d import Pose3d
+import numpy as np
+from scipy.spatial import transform
+import pybullet
+
+XARM_URDF_PATH = (
+    "third_party/bullet/examples/pybullet/gym/pybullet_data/" "xarm/xarm6_robot.urdf"
+)
+SUCTION_URDF_PATH = "third_party/py/envs/assets/suction/" "suction-head-long.urdf"
+CYLINDER_URDF_PATH = "third_party/py/envs/assets/suction/" "cylinder.urdf"
+CYLINDER_REAL_URDF_PATH = "third_party/py/envs/assets/suction/" "cylinder_real.urdf"
+HOME_JOINT_POSITIONS = np.deg2rad([0, -20, -80, 0, 100, -30])
+
+
+class XArmSimRobot:
+    """A simulated PyBullet XArm robot, mostly for forward/inverse kinematics."""
+
+    def __init__(
+        self,
+        pybullet_client,
+        initial_joint_positions=HOME_JOINT_POSITIONS,
+        end_effector="none",
+        color="default",
+    ):
+        self._pybullet_client = pybullet_client
+        self.initial_joint_positions = initial_joint_positions
+
+        if color == "default":
+            self.xarm = utils_pybullet.load_urdf(
+                pybullet_client, XARM_URDF_PATH, [0, 0, 0]
+            )
+        else:
+            raise ValueError("Unrecognized xarm color %s" % color)
+
+        # Get revolute joints of robot (skip fixed joints).
+        joints = []
+        joint_indices = []
+        for i in range(self._pybullet_client.getNumJoints(self.xarm)):
+            joint_info = self._pybullet_client.getJointInfo(self.xarm, i)
+            if joint_info[2] == pybullet.JOINT_REVOLUTE:
+                joints.append(joint_info[0])
+                joint_indices.append(i)
+                # Note examples in pybullet do this, but it is not clear what the
+                # benefits are.
+                self._pybullet_client.changeDynamics(
+                    self.xarm, i, linearDamping=0, angularDamping=0
+                )
+
+        self._n_joints = len(joints)
+        self._joints = tuple(joints)
+        self._joint_indices = tuple(joint_indices)
+
+        # Move robot to home joint configuration
+        self.reset_joints(self.initial_joint_positions)
+        self.effector_link = 6
+
+        if (
+            end_effector == "suction"
+            or end_effector == "cylinder"
+            or end_effector == "cylinder_real"
+        ):
+            self.end_effector = self._setup_end_effector(end_effector)
+        else:
+            if end_effector != "none":
+                raise ValueError('end_effector "%s" is not supported.' % end_effector)
+            self.end_effector = None
+
+    def _setup_end_effector(self, end_effector):
+        """Adds a suction or cylinder end effector."""
+        pose = self.forward_kinematics()
+        if end_effector == "suction":
+            body = utils_pybullet.load_urdf(
+                self._pybullet_client,
+                SUCTION_URDF_PATH,
+                pose.translation,
+                pose.rotation.as_quat(),
+            )
+        elif end_effector == "cylinder":
+            body = utils_pybullet.load_urdf(
+                self._pybullet_client,
+                CYLINDER_URDF_PATH,
+                pose.translation,
+                pose.rotation.as_quat(),
+            )
+        elif end_effector == "cylinder_real":
+            body = utils_pybullet.load_urdf(
+                self._pybullet_client,
+                CYLINDER_REAL_URDF_PATH,
+                pose.translation,
+                pose.rotation.as_quat(),
+            )
+        else:
+            raise ValueError('end_effector "%s" is not supported.' % end_effector)
+
+        constraint_id = self._pybullet_client.createConstraint(
+            parentBodyUniqueId=self.xarm,
+            parentLinkIndex=6,
+            childBodyUniqueId=body,
+            childLinkIndex=-1,
+            jointType=pybullet.JOINT_FIXED,
+            jointAxis=(0, 0, 0),
+            parentFramePosition=(0, 0, 0),
+            childFramePosition=(0, 0, 0),
+        )
+        self._pybullet_client.changeConstraint(constraint_id, maxForce=50)
+
+        return body
+
+    def reset_joints(self, joint_values):
+        """Sets the position of the Robot's joints.
+
+        *Note*: This should only be used at the start while not running the
+                simulation resetJointState overrides all physics simulation.
+
+        Args:
+          joint_values: Iterable with desired joint positions.
+        """
+        for i in range(self._n_joints):
+            self._pybullet_client.resetJointState(
+                self.xarm, self._joints[i], joint_values[i]
+            )
+
+    def get_joints_measured(self):
+        joint_states = self._pybullet_client.getJointStates(
+            self.xarm, self._joint_indices
+        )
+        joint_positions = np.array([state[0] for state in joint_states])
+        joint_velocities = np.array([state[1] for state in joint_states])
+        joint_torques = np.array([state[3] for state in joint_states])
+        return joint_positions, joint_velocities, joint_torques
+
+    def get_joint_positions(self):
+        joint_states = self._pybullet_client.getJointStates(
+            self.xarm, self._joint_indices
+        )
+        joint_positions = np.array([state[0] for state in joint_states])
+        return joint_positions
+
+    def forward_kinematics(self):
+        """Forward kinematics."""
+        effector_state = self._pybullet_client.getLinkState(
+            self.xarm, self.effector_link
+        )
+        return Pose3d(
+            translation=np.array(effector_state[0]),
+            rotation=transform.Rotation.from_quat(effector_state[1]),
+        )
+
+    def inverse_kinematics(
+        self, world_effector_pose, max_iterations=100, residual_threshold=1e-10
+    ):
+        """Inverse kinematics.
+
+        Args:
+          world_effector_pose: Target Pose3d for the robot's end effector.
+          max_iterations: Refine the IK solution until the distance between target
+            and actual end effector position is below this threshold, or the
+            maxNumIterations is reached. Default is 20 iterations.
+          residual_threshold: Refine the IK solution until the distance between
+            target and actual end effector position is below this threshold, or the
+            maxNumIterations is reached.
+
+        Returns:
+          Numpy array with required joint angles to reach the requested pose.
+        """
+        return np.array(
+            self._pybullet_client.calculateInverseKinematics(
+                self.xarm,
+                self.effector_link,
+                world_effector_pose.translation,
+                world_effector_pose.rotation.as_quat(),  # as_quat returns xyzw.
+                lowerLimits=[-17] * 6,
+                upperLimits=[17] * 6,
+                jointRanges=[17] * 6,
+                restPoses=[0, 0] + self.get_joint_positions()[2:].tolist(),
+                maxNumIterations=max_iterations,
+                residualThreshold=residual_threshold,
+            )
+        )
+
+    def set_target_effector_pose(self, world_effector_pose):
+        target_joint_positions = self.inverse_kinematics(world_effector_pose)
+        self.set_target_joint_positions(target_joint_positions)
+
+    def set_target_joint_velocities(self, target_joint_velocities):
+        self._pybullet_client.setJointMotorControlArray(
+            self.xarm,
+            self._joint_indices,
+            pybullet.VELOCITY_CONTROL,
+            targetVelocities=target_joint_velocities,
+            forces=[5 * 240.0] * 6,
+        )
+
+    def set_target_joint_positions(self, target_joint_positions):
+        self._pybullet_client.setJointMotorControlArray(
+            self.xarm,
+            self._joint_indices,
+            pybullet.POSITION_CONTROL,
+            targetPositions=target_joint_positions,
+            forces=[5 * 240.0] * 6,
+        )
+
+    def set_alpha_transparency(self, alpha):
+        visual_shape_data = self._pybullet_client.getVisualShapeData(self.xarm)
+
+        for i in range(self._pybullet_client.getNumJoints(self.xarm)):
+            object_id, link_index, _, _, _, _, _, rgba_color = visual_shape_data[i]
+            assert object_id == self.xarm, "xarm id mismatch."
+            assert link_index == i, "Link visual data was returned out of order."
+            rgba_color = list(rgba_color[0:3]) + [alpha]
+            self._pybullet_client.changeVisualShape(
+                self.xarm, linkIndex=i, rgbaColor=rgba_color
+            )
diff --git a/diffusion_policy/env/kitchen/__init__.py b/diffusion_policy/env/kitchen/__init__.py
new file mode 100644
index 0000000..f553f35
--- /dev/null
+++ b/diffusion_policy/env/kitchen/__init__.py
@@ -0,0 +1,30 @@
+"""Environments using kitchen and Franka robot."""
+from gym.envs.registration import register
+
+register(
+    id="kitchen-microwave-kettle-light-slider-v0",
+    entry_point="diffusion_policy.env.kitchen.v0:KitchenMicrowaveKettleLightSliderV0",
+    max_episode_steps=280,
+    reward_threshold=1.0,
+)
+
+register(
+    id="kitchen-microwave-kettle-burner-light-v0",
+    entry_point="diffusion_policy.env.kitchen.v0:KitchenMicrowaveKettleBottomBurnerLightV0",
+    max_episode_steps=280,
+    reward_threshold=1.0,
+)
+
+register(
+    id="kitchen-kettle-microwave-light-slider-v0",
+    entry_point="diffusion_policy.env.kitchen.v0:KitchenKettleMicrowaveLightSliderV0",
+    max_episode_steps=280,
+    reward_threshold=1.0,
+)
+
+register(
+    id="kitchen-all-v0",
+    entry_point="diffusion_policy.env.kitchen.v0:KitchenAllV0",
+    max_episode_steps=280,
+    reward_threshold=1.0,
+)
diff --git a/diffusion_policy/env/kitchen/base.py b/diffusion_policy/env/kitchen/base.py
new file mode 100644
index 0000000..61c9650
--- /dev/null
+++ b/diffusion_policy/env/kitchen/base.py
@@ -0,0 +1,153 @@
+import sys
+import os
+# hack to import adept envs
+ADEPT_DIR = os.path.join(os.path.dirname(__file__), 'relay_policy_learning', 'adept_envs')
+sys.path.append(ADEPT_DIR)
+
+import logging
+import numpy as np
+import adept_envs
+from adept_envs.franka.kitchen_multitask_v0 import KitchenTaskRelaxV1
+
+OBS_ELEMENT_INDICES = {
+    "bottom burner": np.array([11, 12]),
+    "top burner": np.array([15, 16]),
+    "light switch": np.array([17, 18]),
+    "slide cabinet": np.array([19]),
+    "hinge cabinet": np.array([20, 21]),
+    "microwave": np.array([22]),
+    "kettle": np.array([23, 24, 25, 26, 27, 28, 29]),
+}
+OBS_ELEMENT_GOALS = {
+    "bottom burner": np.array([-0.88, -0.01]),
+    "top burner": np.array([-0.92, -0.01]),
+    "light switch": np.array([-0.69, -0.05]),
+    "slide cabinet": np.array([0.37]),
+    "hinge cabinet": np.array([0.0, 1.45]),
+    "microwave": np.array([-0.75]),
+    "kettle": np.array([-0.23, 0.75, 1.62, 0.99, 0.0, 0.0, -0.06]),
+}
+BONUS_THRESH = 0.3
+logger = logging.getLogger()
+
+
+class KitchenBase(KitchenTaskRelaxV1):
+    # A string of element names. The robot's task is then to modify each of
+    # these elements appropriately.
+    TASK_ELEMENTS = []
+    ALL_TASKS = [
+        "bottom burner",
+        "top burner",
+        "light switch",
+        "slide cabinet",
+        "hinge cabinet",
+        "microwave",
+        "kettle",
+    ]
+    REMOVE_TASKS_WHEN_COMPLETE = True
+    TERMINATE_ON_TASK_COMPLETE = True
+    TERMINATE_ON_WRONG_COMPLETE = False
+    COMPLETE_IN_ANY_ORDER = (
+        True  # This allows for the tasks to be completed in arbitrary order.
+    )
+
+    def __init__(
+        self, dataset_url=None, ref_max_score=None, ref_min_score=None, 
+        use_abs_action=False,
+        **kwargs
+    ):
+        self.tasks_to_complete = list(self.TASK_ELEMENTS)
+        self.goal_masking = True
+        super(KitchenBase, self).__init__(use_abs_action=use_abs_action, **kwargs)
+
+    def set_goal_masking(self, goal_masking=True):
+        """Sets goal masking for goal-conditioned approaches (like RPL)."""
+        self.goal_masking = goal_masking
+
+    def _get_task_goal(self, task=None, actually_return_goal=False):
+        if task is None:
+            task = ["microwave", "kettle", "bottom burner", "light switch"]
+        new_goal = np.zeros_like(self.goal)
+        if self.goal_masking and not actually_return_goal:
+            return new_goal
+        for element in task:
+            element_idx = OBS_ELEMENT_INDICES[element]
+            element_goal = OBS_ELEMENT_GOALS[element]
+            new_goal[element_idx] = element_goal
+
+        return new_goal
+
+    def reset_model(self):
+        self.tasks_to_complete = list(self.TASK_ELEMENTS)
+        return super(KitchenBase, self).reset_model()
+
+    def _get_reward_n_score(self, obs_dict):
+        reward_dict, score = super(KitchenBase, self)._get_reward_n_score(obs_dict)
+        reward = 0.0
+        next_q_obs = obs_dict["qp"]
+        next_obj_obs = obs_dict["obj_qp"]
+        next_goal = self._get_task_goal(
+            task=self.TASK_ELEMENTS, actually_return_goal=True
+        )  # obs_dict['goal']
+        idx_offset = len(next_q_obs)
+        completions = []
+        all_completed_so_far = True
+        for element in self.tasks_to_complete:
+            element_idx = OBS_ELEMENT_INDICES[element]
+            distance = np.linalg.norm(
+                next_obj_obs[..., element_idx - idx_offset] - next_goal[element_idx]
+            )
+            complete = distance < BONUS_THRESH
+            condition = (
+                complete and all_completed_so_far
+                if not self.COMPLETE_IN_ANY_ORDER
+                else complete
+            )
+            if condition:  # element == self.tasks_to_complete[0]:
+                print("Task {} completed!".format(element))
+                completions.append(element)
+            all_completed_so_far = all_completed_so_far and complete
+        if self.REMOVE_TASKS_WHEN_COMPLETE:
+            [self.tasks_to_complete.remove(element) for element in completions]
+        bonus = float(len(completions))
+        reward_dict["bonus"] = bonus
+        reward_dict["r_total"] = bonus
+        score = bonus
+        return reward_dict, score
+
+    def step(self, a, b=None):
+        obs, reward, done, env_info = super(KitchenBase, self).step(a, b=b)
+        if self.TERMINATE_ON_TASK_COMPLETE:
+            done = not self.tasks_to_complete
+        if self.TERMINATE_ON_WRONG_COMPLETE:
+            all_goal = self._get_task_goal(task=self.ALL_TASKS)
+            for wrong_task in list(set(self.ALL_TASKS) - set(self.TASK_ELEMENTS)):
+                element_idx = OBS_ELEMENT_INDICES[wrong_task]
+                distance = np.linalg.norm(obs[..., element_idx] - all_goal[element_idx])
+                complete = distance < BONUS_THRESH
+                if complete:
+                    done = True
+                    break
+        env_info["completed_tasks"] = set(self.TASK_ELEMENTS) - set(
+            self.tasks_to_complete
+        )
+        return obs, reward, done, env_info
+
+    def get_goal(self):
+        """Loads goal state from dataset for goal-conditioned approaches (like RPL)."""
+        raise NotImplementedError
+
+    def _split_data_into_seqs(self, data):
+        """Splits dataset object into list of sequence dicts."""
+        seq_end_idxs = np.where(data["terminals"])[0]
+        start = 0
+        seqs = []
+        for end_idx in seq_end_idxs:
+            seqs.append(
+                dict(
+                    states=data["observations"][start : end_idx + 1],
+                    actions=data["actions"][start : end_idx + 1],
+                )
+            )
+            start = end_idx + 1
+        return seqs
diff --git a/diffusion_policy/env/kitchen/kitchen_lowdim_wrapper.py b/diffusion_policy/env/kitchen/kitchen_lowdim_wrapper.py
new file mode 100644
index 0000000..c6908a4
--- /dev/null
+++ b/diffusion_policy/env/kitchen/kitchen_lowdim_wrapper.py
@@ -0,0 +1,49 @@
+from typing import List, Dict, Optional, Optional
+import numpy as np
+import gym
+from gym.spaces import Box
+from diffusion_policy.env.kitchen.base import KitchenBase
+
+class KitchenLowdimWrapper(gym.Env):
+    def __init__(self,
+            env: KitchenBase,
+            init_qpos: Optional[np.ndarray]=None,
+            init_qvel: Optional[np.ndarray]=None,
+            render_hw = (240,360)
+        ):
+        self.env = env
+        self.init_qpos = init_qpos
+        self.init_qvel = init_qvel
+        self.render_hw = render_hw
+
+    @property
+    def action_space(self):
+        return self.env.action_space
+    
+    @property
+    def observation_space(self):
+        return self.env.observation_space
+
+    def seed(self, seed=None):
+        return self.env.seed(seed)
+
+    def reset(self):
+        if self.init_qpos is not None:
+            # reset anyway to be safe, not very expensive
+            _ = self.env.reset()
+            # start from known state
+            self.env.set_state(self.init_qpos, self.init_qvel)
+            obs = self.env._get_obs()
+            return obs
+            # obs, _, _, _ = self.env.step(np.zeros_like(
+            #     self.action_space.sample()))
+            # return obs
+        else:
+            return self.env.reset()
+
+    def render(self, mode='rgb_array'):
+        h, w = self.render_hw
+        return self.env.render(mode=mode, width=w, height=h)
+    
+    def step(self, a):
+        return self.env.step(a)
diff --git a/diffusion_policy/env/kitchen/kitchen_util.py b/diffusion_policy/env/kitchen/kitchen_util.py
new file mode 100644
index 0000000..59852c1
--- /dev/null
+++ b/diffusion_policy/env/kitchen/kitchen_util.py
@@ -0,0 +1,51 @@
+import struct
+import numpy as np
+
+def parse_mjl_logs(read_filename, skipamount):
+    with open(read_filename, mode='rb') as file:
+        fileContent = file.read()
+    headers = struct.unpack('iiiiiii', fileContent[:28])
+    nq = headers[0]
+    nv = headers[1]
+    nu = headers[2]
+    nmocap = headers[3]
+    nsensordata = headers[4]
+    nuserdata = headers[5]
+    name_len = headers[6]
+    name = struct.unpack(str(name_len) + 's', fileContent[28:28+name_len])[0]
+    rem_size = len(fileContent[28 + name_len:])
+    num_floats = int(rem_size/4)
+    dat = np.asarray(struct.unpack(str(num_floats) + 'f', fileContent[28+name_len:]))
+    recsz = 1 + nq + nv + nu + 7*nmocap + nsensordata + nuserdata
+    if rem_size % recsz != 0:
+        print("ERROR")
+    else:
+        dat = np.reshape(dat, (int(len(dat)/recsz), recsz))
+        dat = dat.T
+
+    time = dat[0,:][::skipamount] - 0*dat[0, 0]
+    qpos = dat[1:nq + 1, :].T[::skipamount, :]
+    qvel = dat[nq+1:nq+nv+1,:].T[::skipamount, :]
+    ctrl = dat[nq+nv+1:nq+nv+nu+1,:].T[::skipamount,:]
+    mocap_pos = dat[nq+nv+nu+1:nq+nv+nu+3*nmocap+1,:].T[::skipamount, :]
+    mocap_quat = dat[nq+nv+nu+3*nmocap+1:nq+nv+nu+7*nmocap+1,:].T[::skipamount, :]
+    sensordata = dat[nq+nv+nu+7*nmocap+1:nq+nv+nu+7*nmocap+nsensordata+1,:].T[::skipamount,:]
+    userdata = dat[nq+nv+nu+7*nmocap+nsensordata+1:,:].T[::skipamount,:]
+
+    data = dict(nq=nq,
+               nv=nv,
+               nu=nu,
+               nmocap=nmocap,
+               nsensordata=nsensordata,
+               name=name,
+               time=time,
+               qpos=qpos,
+               qvel=qvel,
+               ctrl=ctrl,
+               mocap_pos=mocap_pos,
+               mocap_quat=mocap_quat,
+               sensordata=sensordata,
+               userdata=userdata,
+               logName = read_filename
+               )
+    return data
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.pylintrc b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.pylintrc
new file mode 100644
index 0000000..9cda412
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.pylintrc
@@ -0,0 +1,433 @@
+[MASTER]
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-whitelist=
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=CVS
+
+# Add files or directories matching the regex patterns to the blacklist. The
+# regex matches against base names, not paths.
+ignore-patterns=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Specify a configuration file.
+#rcfile=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
+confidence=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=relative-beyond-top-level
+
+
+[REPORTS]
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+#msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+output-format=text
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit
+
+
+[LOGGING]
+
+# Format style used to check logging format string. `old` means using %
+# formatting, while `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=99999
+
+# List of optional constructs for which whitespace checking is disabled. `dict-
+# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
+# `trailing-comma` allows a space between comma and closing bracket: (a, ).
+# `empty-line` allows space-only lines.
+no-space-check=trailing-comma,
+               dict-separator
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+
+[SIMILARITIES]
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[BASIC]
+
+# Naming style matching correct argument names
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style
+argument-rgx=^[a-z][a-z0-9_]*$
+
+# Naming style matching correct attribute names
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style
+attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=
+
+# Naming style matching correct class attribute names
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style
+class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Naming style matching correct class names
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-style
+class-rgx=^_?[A-Z][a-zA-Z0-9]*$
+
+# Naming style matching correct constant names
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style
+const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=10
+
+# Naming style matching correct function names
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style
+function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=main,
+           _
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style
+inlinevar-rgx=^[a-z][a-z0-9_]*$
+
+# Naming style matching correct method names
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style
+method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
+
+# Naming style matching correct module names
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style
+module-rgx=^(_?[a-z][a-z0-9_]*)|__init__|PRESUBMIT|PRESUBMIT_unittest$
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=function:method
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=(__.*__|main)
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty,google3.pyglib.function_utils.cached.property
+
+# Naming style matching correct variable names
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style
+variable-rgx=^[a-z][a-z0-9_]*$
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package..
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+
+[IMPORTS]
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=optparse,tkinter.tix
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled).
+ext-import-graph=
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled).
+import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=cls
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception".
+overgeneral-exceptions=Exception
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.style.yapf b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.style.yapf
new file mode 100644
index 0000000..29f83ff
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/.style.yapf
@@ -0,0 +1,323 @@
+[style]
+# Align closing bracket with visual indentation.
+align_closing_bracket_with_visual_indent=False
+
+# Allow dictionary keys to exist on multiple lines. For example:
+#
+#   x = {
+#       ('this is the first element of a tuple',
+#        'this is the second element of a tuple'):
+#            value,
+#   }
+allow_multiline_dictionary_keys=False
+
+# Allow lambdas to be formatted on more than one line.
+allow_multiline_lambdas=False
+
+# Allow splitting before a default / named assignment in an argument list.
+allow_split_before_default_or_named_assigns=True
+
+# Allow splits before the dictionary value.
+allow_split_before_dict_value=True
+
+#   Let spacing indicate operator precedence. For example:
+#
+#     a = 1 * 2 + 3 / 4
+#     b = 1 / 2 - 3 * 4
+#     c = (1 + 2) * (3 - 4)
+#     d = (1 - 2) / (3 + 4)
+#     e = 1 * 2 - 3
+#     f = 1 + 2 + 3 + 4
+#
+# will be formatted as follows to indicate precedence:
+#
+#     a = 1*2 + 3/4
+#     b = 1/2 - 3*4
+#     c = (1+2) * (3-4)
+#     d = (1-2) / (3+4)
+#     e = 1*2 - 3
+#     f = 1 + 2 + 3 + 4
+#
+arithmetic_precedence_indication=False
+
+# Number of blank lines surrounding top-level function and class
+# definitions.
+blank_lines_around_top_level_definition=2
+
+# Insert a blank line before a class-level docstring.
+blank_line_before_class_docstring=False
+
+# Insert a blank line before a module docstring.
+blank_line_before_module_docstring=False
+
+# Insert a blank line before a 'def' or 'class' immediately nested
+# within another 'def' or 'class'. For example:
+#
+#   class Foo:
+#                      # <------ this blank line
+#     def method():
+#       ...
+blank_line_before_nested_class_or_def=True
+
+# Do not split consecutive brackets. Only relevant when
+# dedent_closing_brackets is set. For example:
+#
+#    call_func_that_takes_a_dict(
+#        {
+#            'key1': 'value1',
+#            'key2': 'value2',
+#        }
+#    )
+#
+# would reformat to:
+#
+#    call_func_that_takes_a_dict({
+#        'key1': 'value1',
+#        'key2': 'value2',
+#    })
+coalesce_brackets=False
+
+# The column limit.
+column_limit=80
+
+# The style for continuation alignment. Possible values are:
+#
+# - SPACE: Use spaces for continuation alignment. This is default behavior.
+# - FIXED: Use fixed number (CONTINUATION_INDENT_WIDTH) of columns
+#   (ie: CONTINUATION_INDENT_WIDTH/INDENT_WIDTH tabs) for continuation
+#   alignment.
+# - LESS: Slightly left if cannot vertically align continuation lines with
+#   indent characters.
+# - VALIGN-RIGHT: Vertically align continuation lines with indent
+#   characters. Slightly right (one more indent character) if cannot
+#   vertically align continuation lines with indent characters.
+#
+# For options FIXED, and VALIGN-RIGHT are only available when USE_TABS is
+# enabled.
+continuation_align_style=SPACE
+
+# Indent width used for line continuations.
+continuation_indent_width=4
+
+# Put closing brackets on a separate line, dedented, if the bracketed
+# expression can't fit in a single line. Applies to all kinds of brackets,
+# including function definitions and calls. For example:
+#
+#   config = {
+#       'key1': 'value1',
+#       'key2': 'value2',
+#   }        # <--- this bracket is dedented and on a separate line
+#
+#   time_series = self.remote_client.query_entity_counters(
+#       entity='dev3246.region1',
+#       key='dns.query_latency_tcp',
+#       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
+#       start_ts=now()-timedelta(days=3),
+#       end_ts=now(),
+#   )        # <--- this bracket is dedented and on a separate line
+dedent_closing_brackets=False
+
+# Disable the heuristic which places each list element on a separate line
+# if the list is comma-terminated.
+disable_ending_comma_heuristic=False
+
+# Place each dictionary entry onto its own line.
+each_dict_entry_on_separate_line=True
+
+# The regex for an i18n comment. The presence of this comment stops
+# reformatting of that line, because the comments are required to be
+# next to the string they translate.
+i18n_comment=#\..*
+
+# The i18n function call names. The presence of this function stops
+# reformattting on that line, because the string it has cannot be moved
+# away from the i18n comment.
+i18n_function_call=N_, _
+
+# Indent blank lines.
+indent_blank_lines=False
+
+# Indent the dictionary value if it cannot fit on the same line as the
+# dictionary key. For example:
+#
+#   config = {
+#       'key1':
+#           'value1',
+#       'key2': value1 +
+#               value2,
+#   }
+indent_dictionary_value=False
+
+# The number of columns to use for indentation.
+indent_width=4
+
+# Join short lines into one line. E.g., single line 'if' statements.
+join_multiple_lines=True
+
+# Do not include spaces around selected binary operators. For example:
+#
+#   1 + 2 * 3 - 4 / 5
+#
+# will be formatted as follows when configured with "*,/":
+#
+#   1 + 2*3 - 4/5
+#
+no_spaces_around_selected_binary_operators=
+
+# Use spaces around default or named assigns.
+spaces_around_default_or_named_assign=False
+
+# Use spaces around the power operator.
+spaces_around_power_operator=False
+
+# The number of spaces required before a trailing comment.
+# This can be a single value (representing the number of spaces
+# before each trailing comment) or list of values (representing
+# alignment column values; trailing comments within a block will
+# be aligned to the first column value that is greater than the maximum
+# line length within the block). For example:
+#
+# With spaces_before_comment=5:
+#
+#   1 + 1 # Adding values
+#
+# will be formatted as:
+#
+#   1 + 1     # Adding values <-- 5 spaces between the end of the statement and comment
+#
+# With spaces_before_comment=15, 20:
+#
+#   1 + 1 # Adding values
+#   two + two # More adding
+#
+#   longer_statement # This is a longer statement
+#   short # This is a shorter statement
+#
+#   a_very_long_statement_that_extends_beyond_the_final_column # Comment
+#   short # This is a shorter statement
+#
+# will be formatted as:
+#
+#   1 + 1          # Adding values <-- end of line comments in block aligned to col 15
+#   two + two      # More adding
+#
+#   longer_statement    # This is a longer statement <-- end of line comments in block aligned to col 20
+#   short               # This is a shorter statement
+#
+#   a_very_long_statement_that_extends_beyond_the_final_column  # Comment <-- the end of line comments are aligned based on the line length
+#   short                                                       # This is a shorter statement
+#
+spaces_before_comment=2
+
+# Insert a space between the ending comma and closing bracket of a list,
+# etc.
+space_between_ending_comma_and_closing_bracket=False
+
+# Split before arguments
+split_all_comma_separated_values=False
+
+# Split before arguments if the argument list is terminated by a
+# comma.
+split_arguments_when_comma_terminated=False
+
+# Set to True to prefer splitting before '&', '|' or '^' rather than
+# after.
+split_before_bitwise_operator=False
+
+# Split before the closing bracket if a list or dict literal doesn't fit on
+# a single line.
+split_before_closing_bracket=True
+
+# Split before a dictionary or set generator (comp_for). For example, note
+# the split before the 'for':
+#
+#   foo = {
+#       variable: 'Hello world, have a nice day!'
+#       for variable in bar if variable != 42
+#   }
+split_before_dict_set_generator=False
+
+# Split before the '.' if we need to split a longer expression:
+#
+#   foo = ('This is a really long string: {}, {}, {}, {}'.format(a, b, c, d))
+#
+# would reformat to something like:
+#
+#   foo = ('This is a really long string: {}, {}, {}, {}'
+#          .format(a, b, c, d))
+split_before_dot=False
+
+# Split after the opening paren which surrounds an expression if it doesn't
+# fit on a single line.
+split_before_expression_after_opening_paren=False
+
+# If an argument / parameter list is going to be split, then split before
+# the first argument.
+split_before_first_argument=False
+
+# Set to True to prefer splitting before 'and' or 'or' rather than
+# after.
+split_before_logical_operator=False
+
+# Split named assignments onto individual lines.
+split_before_named_assigns=True
+
+# Set to True to split list comprehensions and generators that have
+# non-trivial expressions and multiple clauses before each of these
+# clauses. For example:
+#
+#   result = [
+#       a_long_var + 100 for a_long_var in xrange(1000)
+#       if a_long_var % 10]
+#
+# would reformat to something like:
+#
+#   result = [
+#       a_long_var + 100
+#       for a_long_var in xrange(1000)
+#       if a_long_var % 10]
+split_complex_comprehension=True
+
+# The penalty for splitting right after the opening bracket.
+split_penalty_after_opening_bracket=30
+
+# The penalty for splitting the line after a unary operator.
+split_penalty_after_unary_operator=10000
+
+# The penalty for splitting right before an if expression.
+split_penalty_before_if_expr=0
+
+# The penalty of splitting the line around the '&', '|', and '^'
+# operators.
+split_penalty_bitwise_operator=300
+
+# The penalty for splitting a list comprehension or generator
+# expression.
+split_penalty_comprehension=2100
+
+# The penalty for characters over the column limit.
+split_penalty_excess_character=7000
+
+# The penalty incurred by adding a line split to the unwrapped line. The
+# more line splits added the higher the penalty.
+split_penalty_for_added_line_split=30
+
+# The penalty of splitting a list of "import as" names. For example:
+#
+#   from a_very_long_or_indented_module_name_yada_yad import (long_argument_1,
+#                                                             long_argument_2,
+#                                                             long_argument_3)
+#
+# would reformat to something like:
+#
+#   from a_very_long_or_indented_module_name_yada_yad import (
+#       long_argument_1, long_argument_2, long_argument_3)
+split_penalty_import_names=0
+
+# The penalty of splitting the line around the 'and' and 'or'
+# operators.
+split_penalty_logical_operator=300
+
+# Use the Tab character for indentation.
+use_tabs=False
+
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/__init__.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/__init__.py
new file mode 100644
index 0000000..5e92a7b
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/__init__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import adept_envs.franka
+
+from adept_envs.utils.configurable import global_config
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/base_robot.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/base_robot.py
new file mode 100644
index 0000000..5c6f30f
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/base_robot.py
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from collections import deque
+
+class BaseRobot(object):
+    """Base class for all robot classes."""
+
+    def __init__(self,
+                 n_jnt,
+                 n_obj,
+                 pos_bounds=None,
+                 vel_bounds=None,
+                 calibration_path=None,
+                 is_hardware=False,
+                 device_name=None,
+                 overlay=False,
+                 calibration_mode=False,
+                 observation_cache_maxsize=5):
+        """Create a new robot.
+        Args:
+            n_jnt: The number of dofs in the robot.
+            n_obj: The number of dofs in the object.
+            pos_bounds: (n_jnt, 2)-shape matrix denoting the min and max joint
+                position for each joint.
+            vel_bounds: (n_jnt, 2)-shape matrix denoting the min and max joint
+                velocity for each joint.
+            calibration_path: File path to the calibration configuration file to
+                use.
+            is_hardware: Whether to run on hardware or not.
+            device_name: The device path for the robot hardware. Only required
+                in legacy mode.
+            overlay: Whether to show a simulation overlay of the hardware.
+            calibration_mode: Start with motors disengaged.
+        """
+
+        assert n_jnt > 0
+        assert n_obj >= 0
+
+        self._n_jnt = n_jnt
+        self._n_obj = n_obj
+        self._n_dofs = n_jnt + n_obj
+
+        self._pos_bounds = None
+        if pos_bounds is not None:
+            pos_bounds = np.array(pos_bounds, dtype=np.float32)
+            assert pos_bounds.shape == (self._n_dofs, 2)
+            for low, high in pos_bounds:
+                assert low < high
+            self._pos_bounds = pos_bounds
+        self._vel_bounds = None
+        if vel_bounds is not None:
+            vel_bounds = np.array(vel_bounds, dtype=np.float32)
+            assert vel_bounds.shape == (self._n_dofs, 2)
+            for low, high in vel_bounds:
+                assert low < high
+            self._vel_bounds = vel_bounds
+
+        self._is_hardware = is_hardware
+        self._device_name = device_name
+        self._calibration_path = calibration_path
+        self._overlay = overlay
+        self._calibration_mode = calibration_mode
+        self._observation_cache_maxsize = observation_cache_maxsize
+
+        # Gets updated
+        self._observation_cache = deque([], maxlen=self._observation_cache_maxsize)
+
+
+    @property
+    def n_jnt(self):
+        return self._n_jnt
+
+    @property
+    def n_obj(self):
+        return self._n_obj
+
+    @property
+    def n_dofs(self):
+        return self._n_dofs
+
+    @property
+    def pos_bounds(self):
+        return self._pos_bounds
+
+    @property
+    def vel_bounds(self):
+        return self._vel_bounds
+
+    @property
+    def is_hardware(self):
+        return self._is_hardware
+
+    @property
+    def device_name(self):
+        return self._device_name
+
+    @property
+    def calibration_path(self):
+        return self._calibration_path
+
+    @property
+    def overlay(self):
+        return self._overlay
+
+    @property
+    def has_obj(self):
+        return self._n_obj > 0
+
+    @property
+    def calibration_mode(self):
+        return self._calibration_mode
+
+    @property
+    def observation_cache_maxsize(self):
+        return self._observation_cache_maxsize
+
+    @property
+    def observation_cache(self):
+        return self._observation_cache
+
+
+    def clip_positions(self, positions):
+        """Clips the given joint positions to the position bounds.
+
+        Args:
+            positions: The joint positions.
+
+        Returns:
+            The bounded joint positions.
+        """
+        if self.pos_bounds is None:
+            return positions
+        assert len(positions) == self.n_jnt or len(positions) == self.n_dofs
+        pos_bounds = self.pos_bounds[:len(positions)]
+        return np.clip(positions, pos_bounds[:, 0], pos_bounds[:, 1])
+
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/__init__.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/__init__.py
new file mode 100644
index 0000000..528f344
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/__init__.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from gym.envs.registration import register
+
+# Relax the robot
+register(
+    id='kitchen_relax-v1',
+    entry_point='adept_envs.franka.kitchen_multitask_v0:KitchenTaskRelaxV1',
+    max_episode_steps=280,
+)
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/assets/franka_kitchen_jntpos_act_ab.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/assets/franka_kitchen_jntpos_act_ab.xml
new file mode 100644
index 0000000..3441380
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/assets/franka_kitchen_jntpos_act_ab.xml
@@ -0,0 +1,94 @@
+<!--Copyright 2020 Google LLC-->
+
+<!--Licensed under the Apache License, Version 2.0 (the "License");-->
+<!--you may not use this file except in compliance with the License.-->
+<!--You may obtain a copy of the License at-->
+
+    <!--https://www.apache.org/licenses/LICENSE-2.0-->
+
+<!--Unless required by applicable law or agreed to in writing, software-->
+<!--distributed under the License is distributed on an "AS IS" BASIS,-->
+<!--WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.-->
+<!--See the License for the specific language governing permissions and-->
+<!--limitations under the License.-->
+
+<mujoco model="franka_mocap_studyTable_buttons">
+
+    <size njmax='1000' nconmax='1000'/>
+
+    <include file="../../../../adept_models/scenes/basic_scene.xml"/>
+    <include file="../../../../third_party/franka/assets/assets.xml"/>
+    <include file="../../../../third_party/franka/assets/actuator0.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/oven_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/counters_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/backwall_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/slidecabinet_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/hingecabinet_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/microwave_asset.xml"/>
+    <include file="../../../../adept_models/kitchen/assets/kettle_asset.xml"/>
+
+    <visual>
+    <global offwidth="2560" offheight="1920" />
+    <quality shadowsize="4096" offsamples="8" />
+    <map force="0.1" fogend="5" />
+    </visual>
+
+    <compiler inertiafromgeom='auto' inertiagrouprange='3 5' angle="radian"
+              meshdir="../../../../adept_models/kitchen"
+              texturedir="../../../../adept_models/kitchen"/>
+
+    <equality>
+        <weld body1="vive_controller" body2="world" solref="0.02 1" solimp=".7 .95 0.050"/>
+    </equality>
+
+    <worldbody>
+
+        <!-- Mocap -->
+        <body name="vive_controller" mocap="true" pos="-0.440 -0.092 2.026" euler="-1.57 0 -.785">
+            <geom type="box" group="2" pos='0 0 .142' size="0.02 0.10 0.03" contype="0" conaffinity="0" rgba=".9 .7 .95 0" euler="0 0 -.785"/>
+        </body>
+
+        <site name='target' pos='0 0 0' size='0.1' rgba='0 2 0 .2'/>
+        <camera name='left_cap' pos='-1.2 -0.5 1.8' quat='0.78 0.49 -0.22 -0.32' />
+        <camera name='right_cap' pos='1.2 -0.5 1.8' quat='0.76 0.5 0.21 0.35'/>
+
+        <!-- Robot -->
+        <body pos='0. 0 1.8' euler='0 0 1.57'>
+            <geom type='cylinder' size='.120 .90' pos='-.04 0 -0.90' class='panda_viz'/>
+            <include file="../../../../third_party/franka/assets/chain0.xml"/>
+        </body>
+
+        <body name='desk' pos='-0.1 0.75 0'>
+
+            <body name="counters1" pos="0 0 0" >
+                <include file="../../../../adept_models/kitchen/assets/counters_chain.xml"/>
+            </body>
+            <body name="oven" pos="0 0 0" >
+                <include file="../../../../adept_models/kitchen/assets/oven_chain.xml"/>
+            </body>
+            <body name="backwall" pos="0 0 0" >
+                <include file="../../../../adept_models/kitchen/assets/backwall_chain.xml"/>
+            </body>
+            <body name="slidecabinet" pos="0.4 0.3 2.6" >
+                <include file="../../../../adept_models/kitchen/assets/slidecabinet_chain.xml"/>
+            </body>
+            <body name="hingecabinet" pos="-0.504 0.28 2.6" >
+                <include file="../../../../adept_models/kitchen/assets/hingecabinet_chain.xml"/>
+            </body>
+            <body name="microwave" pos="-0.750 -0.025 1.6" euler="0 0 0.3">
+                <include file="../../../../adept_models/kitchen/assets/microwave_chain.xml"/>
+            </body>
+        </body>
+        <body name="kettle" pos="-0.269 0.35 1.626">
+            <freejoint/>
+            <include file="../../../../adept_models/kitchen/assets/kettle_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+
+    <keyframe>
+        <key qpos='0.16 -1.76 1.84 -2.51 0.36 0.79 1.55 0.00 0.0 1.25561e-05 1.57437e-07 1.25561e-05 1.57437e-07 1.25561e-05 1.57437e-07 1.25561e-05 1.57437e-07 8.24417e-05 9.48283e-05 0 0 0 0 -0.269 0.35 1.61523 1 1.34939e-19 -3.51612e-05 -7.50168e-19'/>
+    </keyframe>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/kitchen_multitask_v0.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/kitchen_multitask_v0.py
new file mode 100644
index 0000000..30bdd33
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/kitchen_multitask_v0.py
@@ -0,0 +1,221 @@
+""" Kitchen environment for long horizon manipulation """
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import numpy as np
+from adept_envs import robot_env
+from adept_envs.utils.configurable import configurable
+from gym import spaces
+from dm_control.mujoco import engine
+
+@configurable(pickleable=True)
+class KitchenV0(robot_env.RobotEnv):
+
+    CALIBRATION_PATHS = {
+        'default':
+        os.path.join(os.path.dirname(__file__), 'robot/franka_config.xml')
+    }
+    # Converted to velocity actuation
+    ROBOTS = {'robot': 'adept_envs.franka.robot.franka_robot:Robot_VelAct'}
+    MODEl = os.path.join(
+        os.path.dirname(__file__),
+        '../franka/assets/franka_kitchen_jntpos_act_ab.xml')
+    N_DOF_ROBOT = 9
+    N_DOF_OBJECT = 21
+
+    def __init__(self, 
+            robot_params={}, frame_skip=40, 
+            use_abs_action=False):
+        self.goal_concat = True
+        self.obs_dict = {}
+        self.robot_noise_ratio = 0.1  # 10% as per robot_config specs
+        self.goal = np.zeros((30,))
+        self.use_abs_action = use_abs_action
+        if use_abs_action:
+            self.ROBOTS = {'robot': 'adept_envs.franka.robot.franka_robot:Robot_PosAct'}
+
+        super().__init__(
+            self.MODEl,
+            robot=self.make_robot(
+                n_jnt=self.N_DOF_ROBOT,  #root+robot_jnts
+                n_obj=self.N_DOF_OBJECT,
+                **robot_params),
+            frame_skip=frame_skip,
+            camera_settings=dict(
+                distance=4.5,
+                azimuth=-66,
+                elevation=-65,
+            ),
+        )
+        self.init_qpos = self.sim.model.key_qpos[0].copy()
+
+        # For the microwave kettle slide hinge
+        self.init_qpos = np.array([ 1.48388023e-01, -1.76848573e+00,  1.84390296e+00, -2.47685760e+00,
+                                    2.60252026e-01,  7.12533105e-01,  1.59515394e+00,  4.79267505e-02,
+                                    3.71350919e-02, -2.66279850e-04, -5.18043486e-05,  3.12877220e-05,
+                                   -4.51199853e-05, -3.90842156e-06, -4.22629655e-05,  6.28065475e-05,
+                                    4.04984708e-05,  4.62730939e-04, -2.26906415e-04, -4.65501369e-04,
+                                   -6.44129196e-03, -1.77048263e-03,  1.08009684e-03, -2.69397440e-01,
+                                    3.50383255e-01,  1.61944683e+00,  1.00618764e+00,  4.06395120e-03,
+                                   -6.62095997e-03, -2.68278933e-04])
+
+        self.init_qvel = self.sim.model.key_qvel[0].copy()
+
+        self.act_mid = np.zeros(self.N_DOF_ROBOT)
+        self.act_amp = 2.0 * np.ones(self.N_DOF_ROBOT)
+
+        act_lower = -1*np.ones((self.N_DOF_ROBOT,))
+        act_upper =  1*np.ones((self.N_DOF_ROBOT,))
+        if use_abs_action:
+            act_lower = act_lower * 8.
+            act_upper = act_upper * 8.
+            self.act_amp = np.ones(self.N_DOF_ROBOT)
+
+        self.action_space = spaces.Box(act_lower, act_upper)
+
+        obs_upper = 8. * np.ones(self.obs_dim)
+        obs_lower = -obs_upper
+        self.observation_space = spaces.Box(obs_lower, obs_upper)
+
+    def _get_reward_n_score(self, obs_dict):
+        raise NotImplementedError()
+
+    def step(self, a, b=None):
+        if not self.use_abs_action:
+            a = np.clip(a, -1.0, 1.0)
+
+        if not self.initializing:
+            a = self.act_mid + a * self.act_amp  # mean center and scale
+        else:
+            self.goal = self._get_task_goal()  # update goal if init
+
+        self.robot.step(
+            self, a, step_duration=self.skip * self.model.opt.timestep)
+
+        # observations
+        obs = self._get_obs()
+
+        #rewards
+        reward_dict, score = self._get_reward_n_score(self.obs_dict)
+
+        # termination
+        done = False
+
+        # finalize step
+        env_info = {
+            'time': self.obs_dict['t'],
+            'obs_dict': self.obs_dict,
+            'rewards': reward_dict,
+            'score': score,
+            # don't render every frame
+            # 'images': np.asarray(self.render(mode='rgb_array'))
+        }
+        # self.render()
+        return obs, reward_dict['r_total'], done, env_info
+
+    def _get_obs(self):
+        t, qp, qv, obj_qp, obj_qv = self.robot.get_obs(
+            self, robot_noise_ratio=self.robot_noise_ratio)
+
+        self.obs_dict = {}
+        self.obs_dict['t'] = t
+        self.obs_dict['qp'] = qp
+        self.obs_dict['qv'] = qv
+        self.obs_dict['obj_qp'] = obj_qp
+        self.obs_dict['obj_qv'] = obj_qv
+        self.obs_dict['goal'] = self.goal
+        if self.goal_concat:
+            return np.concatenate([self.obs_dict['qp'], self.obs_dict['obj_qp'], self.obs_dict['goal']])
+
+    def reset_model(self):
+        reset_pos = self.init_qpos[:].copy()
+        reset_vel = self.init_qvel[:].copy()
+        self.robot.reset(self, reset_pos, reset_vel)
+        self.sim.forward()
+        self.goal = self._get_task_goal()  #sample a new goal on reset
+        return self._get_obs()
+
+    def evaluate_success(self, paths):
+        # score
+        mean_score_per_rollout = np.zeros(shape=len(paths))
+        for idx, path in enumerate(paths):
+            mean_score_per_rollout[idx] = np.mean(path['env_infos']['score'])
+        mean_score = np.mean(mean_score_per_rollout)
+
+        # success percentage
+        num_success = 0
+        num_paths = len(paths)
+        for path in paths:
+            num_success += bool(path['env_infos']['rewards']['bonus'][-1])
+        success_percentage = num_success * 100.0 / num_paths
+
+        # fuse results
+        return np.sign(mean_score) * (
+            1e6 * round(success_percentage, 2) + abs(mean_score))
+
+    def close_env(self):
+        self.robot.close()
+
+    def set_goal(self, goal):
+        self.goal = goal
+
+    def _get_task_goal(self):
+        return self.goal
+
+    # Only include goal
+    @property
+    def goal_space(self):
+        len_obs = self.observation_space.low.shape[0]
+        env_lim = np.abs(self.observation_space.low[0])
+        return spaces.Box(low=-env_lim, high=env_lim, shape=(len_obs//2,))
+
+    def convert_to_active_observation(self, observation):
+        return observation
+
+class KitchenTaskRelaxV1(KitchenV0):
+    """Kitchen environment with proper camera and goal setup"""
+
+    def __init__(self, use_abs_action=False):
+        super(KitchenTaskRelaxV1, self).__init__(
+            use_abs_action=use_abs_action)
+
+    def _get_reward_n_score(self, obs_dict):
+        reward_dict = {}
+        reward_dict['true_reward'] = 0.
+        reward_dict['bonus'] = 0.
+        reward_dict['r_total'] = 0.
+        score = 0.
+        return reward_dict, score
+
+    def render(self, mode='human', width=1280, height=720, custom=True, **kwargs):
+        if custom:
+            camera = engine.MovableCamera(self.sim, height, width)
+            if 'distance' not in kwargs:
+                kwargs['distance'] = 2.2
+            if 'lookat' not in kwargs:
+                kwargs['lookat'] = [-0.2, .5, 2.]
+            if 'azimuth' not in kwargs:
+                kwargs['azimuth'] = 70
+            if 'elevation' not in kwargs:
+                kwargs['elevation'] = -35
+            camera.set_pose(**kwargs)
+            img = camera.render()
+            return img
+        else:
+            return super(KitchenTaskRelaxV1, self).render(
+                mode=mode, width=width, height=height, **kwargs)
+
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_config.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_config.xml
new file mode 100644
index 0000000..aeb4f49
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_config.xml
@@ -0,0 +1,59 @@
+<!--Copyright 2020 Google LLC-->
+
+<!--Licensed under the Apache License, Version 2.0 (the "License");-->
+<!--you may not use this file except in compliance with the License.-->
+<!--You may obtain a copy of the License at-->
+
+    <!--https://www.apache.org/licenses/LICENSE-2.0-->
+
+<!--Unless required by applicable law or agreed to in writing, software-->
+<!--distributed under the License is distributed on an "AS IS" BASIS,-->
+<!--WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.-->
+<!--See the License for the specific language governing permissions and-->
+<!--limitations under the License.-->
+<config name='Franka'>
+
+	<!-- Franka -->
+	<qpos0 name='q0' mode='1' mj_dof='0' hardware_dof='0' scale='1' offset='0' pos_bound='-2.9 2.9' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos1 name='q1' mode='1' mj_dof='1' hardware_dof='1' scale='1' offset='0' pos_bound='-1.8 1.8' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos2 name='q2' mode='1' mj_dof='2' hardware_dof='2' scale='1' offset='0' pos_bound='-2.9 2.9' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos3 name='q3' mode='1' mj_dof='3' hardware_dof='3' scale='1' offset='0' pos_bound='-3.1 0.0' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos4 name='q4' mode='1' mj_dof='4' hardware_dof='4' scale='1' offset='0' pos_bound='-2.9 2.9' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos5 name='q5' mode='1' mj_dof='5' hardware_dof='5' scale='1' offset='0' pos_bound='00.0 3.8' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos6 name='q6' mode='1' mj_dof='6' hardware_dof='6' scale='1' offset='0' pos_bound='-2.9 2.9' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos7 name='q7' mode='1' mj_dof='7' hardware_dof='7' scale='1' offset='0' pos_bound='00.0 0.04' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos8 name='q8' mode='1' mj_dof='8' hardware_dof='8' scale='1' offset='0' pos_bound='0.0  0.04' vel_bound='-10 10' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+
+	<!-- Desk -->
+	<qpos9   name='deskSlideB' mode='1' mj_dof='9' hardware_dof='9' scale='1' offset='0' pos_bound='-.5 0.0' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos10  name='deskSlideT' mode='1' mj_dof='10' hardware_dof='10' scale='1' offset='0' pos_bound='-.5 0.0' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+
+	<!-- Buttons -->
+	<qpos11  name='rBotton' mode='1' mj_dof='11' hardware_dof='11' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+	<qpos12  name='gButton' mode='1' mj_dof='12' hardware_dof='12' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+	<qpos13  name='bButton' mode='1' mj_dof='13' hardware_dof='13' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+	<qpos14  name='rLight' mode='1' mj_dof='14' hardware_dof='14' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+	<qpos15  name='bLight' mode='1' mj_dof='15' hardware_dof='15' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+	<qpos16  name='gLight' mode='1' mj_dof='16' hardware_dof='16' scale='1' offset='0' pos_bound='-.005 0.0' vel_bound='-5 5' pos_noise_amp='0.0005' vel_noise_amp='0.005' />
+
+	<!-- Blocks -->
+	<qpos17 name='q17' mode='1' mj_dof='17' hardware_dof='17'  scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos18 name='q18' mode='1' mj_dof='18' hardware_dof='18' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos19 name='q19' mode='1' mj_dof='19' hardware_dof='19' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos20 name='q20' mode='1' mj_dof='20' hardware_dof='20' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos21 name='q21' mode='1' mj_dof='21' hardware_dof='21' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos22 name='q22' mode='1' mj_dof='22' hardware_dof='22' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos23 name='q23' mode='1' mj_dof='23' hardware_dof='23' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos24 name='q24' mode='1' mj_dof='24' hardware_dof='24' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos25 name='q25' mode='1' mj_dof='25' hardware_dof='25' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos26 name='q26' mode='1' mj_dof='26' hardware_dof='26' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos27 name='q27' mode='1' mj_dof='27' hardware_dof='27' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos28 name='q28' mode='1' mj_dof='28' hardware_dof='28' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos29 name='q29' mode='1' mj_dof='29' hardware_dof='29' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos30 name='q30' mode='1' mj_dof='30' hardware_dof='30' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos31 name='q31' mode='1' mj_dof='31' hardware_dof='31' scale='1' offset='0' pos_bound='-1.5 1.5' vel_bound='-5 5' pos_noise_amp='0.005' vel_noise_amp='0.005' />
+	<qpos32 name='q32' mode='1' mj_dof='32' hardware_dof='32' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos33 name='q33' mode='1' mj_dof='33' hardware_dof='33' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+	<qpos34 name='q34' mode='1' mj_dof='34' hardware_dof='34' scale='1' offset='0' pos_bound='-10.57 10.57' vel_bound='-.5 .5' pos_noise_amp='0.1' vel_noise_amp='0.1' />
+
+</config>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_robot.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_robot.py
new file mode 100644
index 0000000..087b9dd
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/franka/robot/franka_robot.py
@@ -0,0 +1,264 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os, getpass
+import numpy as np
+from termcolor import cprint
+import time
+import copy
+import click
+
+from adept_envs import base_robot
+from adept_envs.utils.config import (get_config_root_node, read_config_from_node)
+
+# obervations structure
+from collections import namedtuple
+observation = namedtuple('observation', ['time', 'qpos_robot', 'qvel_robot', 'qpos_object', 'qvel_object'])
+
+
+
+franka_interface = ''
+
+class Robot(base_robot.BaseRobot):
+
+    """
+    Abstracts away the differences between the robot_simulation and robot_hardware
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super(Robot, self).__init__(*args, **kwargs)
+        global franka_interface
+
+        # Read robot configurations
+        self._read_specs_from_config(robot_configs=self.calibration_path)
+
+
+        # Robot: Handware
+        if self.is_hardware:
+            if franka_interface == '':
+                raise NotImplementedError()
+                from handware.franka import franka
+
+                # initialize franka
+                self.franka_interface = franka()
+                franka_interface = self.franka_interface
+                cprint("Initializing %s Hardware (Status:%d)" % (self.robot_name, self.franka.okay(self.robot_hardware_dof)), 'white', 'on_grey')
+            else:
+                self.franka_interface = franka_interface
+                cprint("Reusing previours Franka session", 'white', 'on_grey')
+
+        # Robot: Simulation
+        else:
+            self.robot_name = "Franka"
+            cprint("Initializing %s sim" % self.robot_name, 'white', 'on_grey')
+
+        # Robot's time
+        self.time_start = time.time()
+        self.time = time.time()-self.time_start
+        self.time_render = -1 # time of rendering
+
+
+    # read specs from the calibration file
+    def _read_specs_from_config(self, robot_configs):
+        root, root_name = get_config_root_node(config_file_name=robot_configs)
+        self.robot_name = root_name[0]
+        self.robot_mode = np.zeros(self.n_dofs, dtype=int)
+        self.robot_mj_dof = np.zeros(self.n_dofs, dtype=int)
+        self.robot_hardware_dof = np.zeros(self.n_dofs, dtype=int)
+        self.robot_scale = np.zeros(self.n_dofs, dtype=float)
+        self.robot_offset = np.zeros(self.n_dofs, dtype=float)
+        self.robot_pos_bound = np.zeros([self.n_dofs, 2], dtype=float)
+        self.robot_vel_bound = np.zeros([self.n_dofs, 2], dtype=float)
+        self.robot_pos_noise_amp = np.zeros(self.n_dofs, dtype=float)
+        self.robot_vel_noise_amp = np.zeros(self.n_dofs, dtype=float)
+
+        print("Reading configurations for %s" % self.robot_name)
+        for i in range(self.n_dofs):
+            self.robot_mode[i] = read_config_from_node(root, "qpos"+str(i), "mode", int)
+            self.robot_mj_dof[i] = read_config_from_node(root, "qpos"+str(i), "mj_dof", int)
+            self.robot_hardware_dof[i] = read_config_from_node(root, "qpos"+str(i), "hardware_dof", int)
+            self.robot_scale[i] = read_config_from_node(root, "qpos"+str(i), "scale", float)
+            self.robot_offset[i] = read_config_from_node(root, "qpos"+str(i), "offset", float)
+            self.robot_pos_bound[i] = read_config_from_node(root, "qpos"+str(i), "pos_bound", float)
+            self.robot_vel_bound[i] = read_config_from_node(root, "qpos"+str(i), "vel_bound", float)
+            self.robot_pos_noise_amp[i] = read_config_from_node(root, "qpos"+str(i), "pos_noise_amp", float)
+            self.robot_vel_noise_amp[i] = read_config_from_node(root, "qpos"+str(i), "vel_noise_amp", float)
+
+
+    # convert to hardware space
+    def _de_calib(self, qp_mj, qv_mj=None):
+        qp_ad = (qp_mj-self.robot_offset)/self.robot_scale
+        if qv_mj is not None:
+            qv_ad = qv_mj/self.robot_scale
+            return qp_ad, qv_ad
+        else:
+            return qp_ad
+
+    # convert to mujoco space
+    def _calib(self, qp_ad, qv_ad):
+        qp_mj  =  qp_ad* self.robot_scale + self.robot_offset
+        qv_mj  =  qv_ad* self.robot_scale
+        return qp_mj, qv_mj
+
+
+    # refresh the observation cache
+    def _observation_cache_refresh(self, env):
+        for _ in range(self.observation_cache_maxsize):
+            self.get_obs(env, sim_mimic_hardware=False)
+
+    # get past observation
+    def get_obs_from_cache(self, env, index=-1):
+        assert (index>=0 and index<self.observation_cache_maxsize) or \
+                (index<0 and index>=-self.observation_cache_maxsize), \
+                "cache index out of bound. (cache size is %2d)"%self.observation_cache_maxsize
+        obs = self.observation_cache[index]
+        if self.has_obj:
+            return obs.time, obs.qpos_robot, obs.qvel_robot, obs.qpos_object, obs.qvel_object
+        else:
+            return obs.time, obs.qpos_robot, obs.qvel_robot
+
+
+    # get observation
+    def get_obs(self, env, robot_noise_ratio=1, object_noise_ratio=1, sim_mimic_hardware=True):
+        if self.is_hardware:
+            raise NotImplementedError()
+
+        else:
+            #Gather simulated observation
+            qp = env.sim.data.qpos[:self.n_jnt].copy()
+            qv = env.sim.data.qvel[:self.n_jnt].copy()
+            if self.has_obj:
+                qp_obj = env.sim.data.qpos[-self.n_obj:].copy()
+                qv_obj = env.sim.data.qvel[-self.n_obj:].copy()
+            else:
+                qp_obj = None
+                qv_obj = None
+            self.time = env.sim.data.time
+
+            # Simulate observation noise
+            if not env.initializing:
+                qp += robot_noise_ratio*self.robot_pos_noise_amp[:self.n_jnt]*env.np_random.uniform(low=-1., high=1., size=self.n_jnt)
+                qv += robot_noise_ratio*self.robot_vel_noise_amp[:self.n_jnt]*env.np_random.uniform(low=-1., high=1., size=self.n_jnt)
+                if self.has_obj:
+                    qp_obj += robot_noise_ratio*self.robot_pos_noise_amp[-self.n_obj:]*env.np_random.uniform(low=-1., high=1., size=self.n_obj)
+                    qv_obj += robot_noise_ratio*self.robot_vel_noise_amp[-self.n_obj:]*env.np_random.uniform(low=-1., high=1., size=self.n_obj)
+
+        # cache observations
+        obs = observation(time=self.time, qpos_robot=qp, qvel_robot=qv, qpos_object=qp_obj, qvel_object=qv_obj)
+        self.observation_cache.append(obs)
+
+        if self.has_obj:
+            return obs.time, obs.qpos_robot, obs.qvel_robot, obs.qpos_object, obs.qvel_object
+        else:
+            return obs.time, obs.qpos_robot, obs.qvel_robot
+
+
+    # enforce position specs.
+    def ctrl_position_limits(self, ctrl_position):
+        ctrl_feasible_position = np.clip(ctrl_position, self.robot_pos_bound[:self.n_jnt, 0], self.robot_pos_bound[:self.n_jnt, 1])
+        return ctrl_feasible_position
+
+
+    # step the robot env
+    def step(self, env, ctrl_desired, step_duration, sim_override=False):
+
+        # Populate observation cache during startup
+        if env.initializing:
+            self._observation_cache_refresh(env)
+
+        # enforce velocity limits
+        ctrl_feasible = self.ctrl_velocity_limits(ctrl_desired, step_duration)
+
+        # enforce position limits
+        ctrl_feasible = self.ctrl_position_limits(ctrl_feasible)
+
+        # Send controls to the robot
+        if self.is_hardware and (not sim_override):
+            raise NotImplementedError()
+        else:
+            env.do_simulation(ctrl_feasible, int(step_duration/env.sim.model.opt.timestep)) # render is folded in here
+
+        # Update current robot state on the overlay
+        if self.overlay:
+            env.sim.data.qpos[self.n_jnt:2*self.n_jnt] = env.desired_pose.copy()
+            env.sim.forward()
+
+        # synchronize time
+        if self.is_hardware:
+            time_now = (time.time()-self.time_start)
+            time_left_in_step = step_duration - (time_now-self.time)
+            if(time_left_in_step>0.0001):
+                time.sleep(time_left_in_step)
+        return 1
+
+
+    def reset(self, env, reset_pose, reset_vel, overlay_mimic_reset_pose=True, sim_override=False):
+        reset_pose = self.clip_positions(reset_pose)
+
+        if self.is_hardware:
+            raise NotImplementedError()
+        else:
+            env.sim.reset()
+            env.sim.data.qpos[:self.n_jnt] = reset_pose[:self.n_jnt].copy()
+            env.sim.data.qvel[:self.n_jnt] = reset_vel[:self.n_jnt].copy()
+            if self.has_obj:
+                env.sim.data.qpos[-self.n_obj:] = reset_pose[-self.n_obj:].copy()
+                env.sim.data.qvel[-self.n_obj:] = reset_vel[-self.n_obj:].copy()
+            env.sim.forward()
+
+        if self.overlay:
+            env.sim.data.qpos[self.n_jnt:2*self.n_jnt] = env.desired_pose[:self.n_jnt].copy()
+            env.sim.forward()
+
+        # refresh observation cache before exit
+        self._observation_cache_refresh(env)
+
+
+    def close(self):
+        if self.is_hardware:
+            cprint("Closing Franka hardware... ", 'white', 'on_grey', end='', flush=True)
+            status = 0
+            raise NotImplementedError()
+            cprint("Closed (Status: {})".format(status), 'white', 'on_grey', flush=True)
+        else:
+            cprint("Closing Franka sim", 'white', 'on_grey', flush=True)
+
+
+class Robot_PosAct(Robot):
+
+    # enforce velocity sepcs.
+    # ALERT: This depends on previous observation. This is not ideal as it breaks MDP addumptions. Be careful
+    def ctrl_velocity_limits(self, ctrl_position, step_duration):
+        last_obs = self.observation_cache[-1]
+        ctrl_desired_vel = (ctrl_position-last_obs.qpos_robot[:self.n_jnt])/step_duration
+
+        ctrl_feasible_vel = np.clip(ctrl_desired_vel, self.robot_vel_bound[:self.n_jnt, 0], self.robot_vel_bound[:self.n_jnt, 1])
+        ctrl_feasible_position = last_obs.qpos_robot[:self.n_jnt] + ctrl_feasible_vel*step_duration
+        return ctrl_feasible_position
+
+
+class Robot_VelAct(Robot):
+
+    # enforce velocity sepcs.
+    # ALERT: This depends on previous observation. This is not ideal as it breaks MDP addumptions. Be careful
+    def ctrl_velocity_limits(self, ctrl_velocity, step_duration):
+        last_obs = self.observation_cache[-1]
+
+        ctrl_feasible_vel = np.clip(ctrl_velocity, self.robot_vel_bound[:self.n_jnt, 0], self.robot_vel_bound[:self.n_jnt, 1])
+        ctrl_feasible_position = last_obs.qpos_robot[:self.n_jnt] + ctrl_feasible_vel*step_duration
+        return ctrl_feasible_position
+
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/mujoco_env.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/mujoco_env.py
new file mode 100644
index 0000000..e8cd9ec
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/mujoco_env.py
@@ -0,0 +1,202 @@
+"""Base environment for MuJoCo-based environments."""
+
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import collections
+import os
+import time
+from typing import Dict, Optional
+
+import gym
+from gym import spaces
+from gym.utils import seeding
+import numpy as np
+
+from adept_envs.simulation.sim_robot import MujocoSimRobot, RenderMode
+
+DEFAULT_RENDER_SIZE = 480
+
+USE_DM_CONTROL = True
+
+
+class MujocoEnv(gym.Env):
+    """Superclass for all MuJoCo environments."""
+
+    def __init__(self,
+                 model_path: str,
+                 frame_skip: int,
+                 camera_settings: Optional[Dict] = None,
+                 use_dm_backend: Optional[bool] = None,
+                 ):
+        """Initializes a new MuJoCo environment.
+
+        Args:
+            model_path: The path to the MuJoCo XML file.
+            frame_skip: The number of simulation steps per environment step. On
+              hardware this influences the duration of each environment step.
+            camera_settings: Settings to initialize the simulation camera. This
+              can contain the keys `distance`, `azimuth`, and `elevation`.
+            use_dm_backend: A boolean to switch between mujoco-py and dm_control.
+        """
+        self._seed()
+        if not os.path.isfile(model_path):
+            raise IOError(
+                '[MujocoEnv]: Model path does not exist: {}'.format(model_path))
+        self.frame_skip = frame_skip
+
+        self.sim_robot = MujocoSimRobot(
+            model_path,
+            use_dm_backend=use_dm_backend or USE_DM_CONTROL,
+            camera_settings=camera_settings)
+        self.sim = self.sim_robot.sim
+        self.model = self.sim_robot.model
+        self.data = self.sim_robot.data
+
+        self.metadata = {
+            'render.modes': ['human', 'rgb_array', 'depth_array'],
+            'video.frames_per_second': int(np.round(1.0 / self.dt))
+        }
+        self.mujoco_render_frames = False
+
+        self.init_qpos = self.data.qpos.ravel().copy()
+        self.init_qvel = self.data.qvel.ravel().copy()
+        observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
+        assert not done
+
+        bounds = self.model.actuator_ctrlrange.copy()
+        act_upper = bounds[:, 1]
+        act_lower = bounds[:, 0]
+
+        # Define the action and observation spaces.
+        # HACK: MJRL is still using gym 0.9.x so we can't provide a dtype.
+        try:
+            self.action_space = spaces.Box(
+                act_lower, act_upper, dtype=np.float32)
+            if isinstance(observation, collections.Mapping):
+                self.observation_space = spaces.Dict({
+                k: spaces.Box(-np.inf, np.inf, shape=v.shape, dtype=np.float32) for k, v in observation.items()})
+            else:
+                self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size
+                self.observation_space = spaces.Box(
+                -np.inf, np.inf, observation.shape, dtype=np.float32)
+
+        except TypeError:
+            # Fallback case for gym 0.9.x
+            self.action_space = spaces.Box(act_lower, act_upper)
+            assert not isinstance(observation, collections.Mapping), 'gym 0.9.x does not support dictionary observation.'
+            self.obs_dim = np.sum([o.size for o in observation]) if type(observation) is tuple else observation.size
+            self.observation_space = spaces.Box(
+                -np.inf, np.inf, observation.shape)
+
+    def seed(self, seed=None):  # Compatibility with new gym
+        return self._seed(seed)
+
+    def _seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    # methods to override:
+    # ----------------------------
+
+    def reset_model(self):
+        """Reset the robot degrees of freedom (qpos and qvel).
+
+        Implement this in each subclass.
+        """
+        raise NotImplementedError
+
+    # -----------------------------
+
+    def reset(self):  # compatibility with new gym
+        return self._reset()
+
+    def _reset(self):
+        self.sim.reset()
+        self.sim.forward()
+        ob = self.reset_model()
+        return ob
+
+    def set_state(self, qpos, qvel):
+        assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,)
+        # we are directly manipulating mujoco state here
+        data = self.sim.data # MjData
+        for i in range(self.model.nq):
+            data.qpos[i] = qpos[i]
+        for i in range(self.model.nv):
+            data.qvel[i] = qvel[i]
+        # state = np.concatenate([self.data.qpos, self.data.qvel, self.data.act])
+        # self.sim.set_state(state)
+        self.sim.forward()
+
+    @property
+    def dt(self):
+        return self.model.opt.timestep * self.frame_skip
+
+    def do_simulation(self, ctrl, n_frames):
+        for i in range(self.model.nu):
+            self.sim.data.ctrl[i] = ctrl[i]
+
+        for _ in range(n_frames):
+            self.sim.step()
+
+            # TODO(michaelahn): Remove this; render should be called separately.
+            if self.mujoco_render_frames is True:
+                self.mj_render()
+
+    def render(self,
+               mode='human',
+               width=DEFAULT_RENDER_SIZE,
+               height=DEFAULT_RENDER_SIZE,
+               camera_id=-1):
+        """Renders the environment.
+
+        Args:
+            mode: The type of rendering to use.
+                - 'human': Renders to a graphical window.
+                - 'rgb_array': Returns the RGB image as an np.ndarray.
+                - 'depth_array': Returns the depth image as an np.ndarray.
+            width: The width of the rendered image. This only affects offscreen
+                rendering.
+            height: The height of the rendered image. This only affects
+                offscreen rendering.
+            camera_id: The ID of the camera to use. By default, this is the free
+                camera. If specified, only affects offscreen rendering.
+        """
+        if mode == 'human':
+            self.sim_robot.renderer.render_to_window()
+        elif mode == 'rgb_array':
+            assert width and height
+            return self.sim_robot.renderer.render_offscreen(
+                width, height, mode=RenderMode.RGB, camera_id=camera_id)
+        elif mode == 'depth_array':
+            assert width and height
+            return self.sim_robot.renderer.render_offscreen(
+                width, height, mode=RenderMode.DEPTH, camera_id=camera_id)
+        else:
+            raise NotImplementedError(mode)
+
+    def close(self):
+        self.sim_robot.close()
+
+    def mj_render(self):
+        """Backwards compatibility with MJRL."""
+        self.render(mode='human')
+
+    def state_vector(self):
+        state = self.sim.get_state()
+        return np.concatenate([state.qpos.flat, state.qvel.flat])
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/robot_env.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/robot_env.py
new file mode 100644
index 0000000..76f11fa
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/robot_env.py
@@ -0,0 +1,166 @@
+"""Base class for robotics environments."""
+
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import inspect
+import os
+from typing import Dict, Optional
+
+import numpy as np
+
+
+from adept_envs import mujoco_env
+from adept_envs.base_robot import BaseRobot
+from adept_envs.utils.configurable import import_class_from_path
+from adept_envs.utils.constants import MODELS_PATH
+
+
+class RobotEnv(mujoco_env.MujocoEnv):
+    """Base environment for all adept robots."""
+
+    # Mapping of robot name to fully qualified class path.
+    # e.g. 'robot': 'adept_envs.dclaw.robot.Robot'
+    # Subclasses should override this to specify the Robot classes they support.
+    ROBOTS = {}
+
+    # Mapping of device path to the calibration file to use. If the device path
+    # is not found, the 'default' key is used.
+    # This can be overriden by subclasses.
+    CALIBRATION_PATHS = {}
+
+    def __init__(self,
+                 model_path: str,
+                 robot: BaseRobot,
+                 frame_skip: int,
+                 camera_settings: Optional[Dict] = None):
+        """Initializes a robotics environment.
+
+        Args:
+            model_path: The path to the model to run. Relative paths will be
+              interpreted as relative to the 'adept_models' folder.
+            robot: The Robot object to use.
+            frame_skip: The number of simulation steps per environment step. On
+              hardware this influences the duration of each environment step.
+            camera_settings: Settings to initialize the simulation camera. This
+              can contain the keys `distance`, `azimuth`, and `elevation`.
+        """
+        self._robot = robot
+
+        # Initial pose for first step.
+        self.desired_pose = np.zeros(self.n_jnt)
+
+        if not model_path.startswith('/'):
+            model_path = os.path.abspath(os.path.join(MODELS_PATH, model_path))
+
+        self.remote_viz = None
+
+        try:
+            from adept_envs.utils.remote_viz import RemoteViz
+            self.remote_viz = RemoteViz(model_path)
+        except ImportError:
+            pass          
+
+
+        self._initializing = True
+        super(RobotEnv, self).__init__(
+            model_path, frame_skip, camera_settings=camera_settings)
+        self._initializing = False
+
+
+    @property
+    def robot(self):
+        return self._robot
+
+    @property
+    def n_jnt(self):
+        return self._robot.n_jnt
+
+    @property
+    def n_obj(self):
+        return self._robot.n_obj
+
+    @property
+    def skip(self):
+        """Alias for frame_skip. Needed for MJRL."""
+        return self.frame_skip
+
+    @property
+    def initializing(self):
+        return self._initializing
+
+    def close_env(self):
+        if self._robot is not None:
+            self._robot.close()
+
+    def make_robot(self,
+                   n_jnt,
+                   n_obj=0,
+                   is_hardware=False,
+                   device_name=None,
+                   legacy=False,
+                   **kwargs):
+        """Creates a new robot for the environment.
+
+        Args:
+            n_jnt: The number of joints in the robot.
+            n_obj: The number of object joints in the robot environment.
+            is_hardware: Whether to run on hardware or not.
+            device_name: The device path for the robot hardware.
+            legacy: If true, runs using direct dynamixel communication rather
+              than DDS.
+            kwargs: See BaseRobot for other parameters.
+
+        Returns:
+            A Robot object.
+        """
+        if not self.ROBOTS:
+            raise NotImplementedError('Subclasses must override ROBOTS.')
+
+        if is_hardware and not device_name:
+            raise ValueError('Must provide device name if running on hardware.')
+
+        robot_name = 'dds_robot' if not legacy and is_hardware else 'robot'
+        if robot_name not in self.ROBOTS:
+            raise KeyError("Unsupported robot '{}', available: {}".format(
+                robot_name, list(self.ROBOTS.keys())))
+
+        cls = import_class_from_path(self.ROBOTS[robot_name])
+
+        calibration_path = None
+        if self.CALIBRATION_PATHS:
+            if not device_name:
+                calibration_name = 'default'
+            elif device_name not in self.CALIBRATION_PATHS:
+                print('Device "{}" not in CALIBRATION_PATHS; using default.'
+                      .format(device_name))
+                calibration_name = 'default'
+            else:
+                calibration_name = device_name
+
+            calibration_path = self.CALIBRATION_PATHS[calibration_name]
+            if not os.path.isfile(calibration_path):
+                raise OSError('Could not find calibration file at: {}'.format(
+                    calibration_path))
+
+        return cls(
+            n_jnt,
+            n_obj,
+            is_hardware=is_hardware,
+            device_name=device_name,
+            calibration_path=calibration_path,
+            **kwargs)
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/module.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/module.py
new file mode 100644
index 0000000..a1284c7
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/module.py
@@ -0,0 +1,126 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for caching Python modules related to simulation."""
+
+import sys
+
+_MUJOCO_PY_MODULE = None
+
+_DM_MUJOCO_MODULE = None
+_DM_VIEWER_MODULE = None
+_DM_RENDER_MODULE = None
+
+_GLFW_MODULE = None
+
+
+def get_mujoco_py():
+    """Returns the mujoco_py module."""
+    global _MUJOCO_PY_MODULE
+    if _MUJOCO_PY_MODULE:
+        return _MUJOCO_PY_MODULE
+    try:
+        import mujoco_py
+        # Override the warning function.
+        from mujoco_py.builder import cymj
+        cymj.set_warning_callback(_mj_warning_fn)
+    except ImportError:
+        print(
+            'Failed to import mujoco_py. Ensure that mujoco_py (using MuJoCo '
+            'v1.50) is installed.',
+            file=sys.stderr)
+        sys.exit(1)
+    _MUJOCO_PY_MODULE = mujoco_py
+    return mujoco_py
+
+
+def get_mujoco_py_mjlib():
+    """Returns the mujoco_py mjlib module."""
+
+    class MjlibDelegate:
+        """Wrapper that forwards mjlib calls."""
+
+        def __init__(self, lib):
+            self._lib = lib
+
+        def __getattr__(self, name: str):
+            if name.startswith('mj'):
+                return getattr(self._lib, '_' + name)
+            raise AttributeError(name)
+
+    return MjlibDelegate(get_mujoco_py().cymj)
+
+
+def get_dm_mujoco():
+    """Returns the DM Control mujoco module."""
+    global _DM_MUJOCO_MODULE
+    if _DM_MUJOCO_MODULE:
+        return _DM_MUJOCO_MODULE
+    try:
+        from dm_control import mujoco
+    except ImportError:
+        print(
+            'Failed to import dm_control.mujoco. Ensure that dm_control (using '
+            'MuJoCo v2.00) is installed.',
+            file=sys.stderr)
+        sys.exit(1)
+    _DM_MUJOCO_MODULE = mujoco
+    return mujoco
+
+
+def get_dm_viewer():
+    """Returns the DM Control viewer module."""
+    global _DM_VIEWER_MODULE
+    if _DM_VIEWER_MODULE:
+        return _DM_VIEWER_MODULE
+    try:
+        from dm_control import viewer
+    except ImportError:
+        print(
+            'Failed to import dm_control.viewer. Ensure that dm_control (using '
+            'MuJoCo v2.00) is installed.',
+            file=sys.stderr)
+        sys.exit(1)
+    _DM_VIEWER_MODULE = viewer
+    return viewer
+
+
+def get_dm_render():
+    """Returns the DM Control render module."""
+    global _DM_RENDER_MODULE
+    if _DM_RENDER_MODULE:
+        return _DM_RENDER_MODULE
+    try:
+        try:
+            from dm_control import _render
+            render = _render
+        except ImportError:
+            print('Warning: DM Control is out of date.')
+            from dm_control import render
+    except ImportError:
+        print(
+            'Failed to import dm_control.render. Ensure that dm_control (using '
+            'MuJoCo v2.00) is installed.',
+            file=sys.stderr)
+        sys.exit(1)
+    _DM_RENDER_MODULE = render
+    return render
+
+
+def _mj_warning_fn(warn_data: bytes):
+    """Warning function override for mujoco_py."""
+    print('WARNING: Mujoco simulation is unstable (has NaNs): {}'.format(
+        warn_data.decode()))
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/renderer.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/renderer.py
new file mode 100644
index 0000000..28b9de2
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/renderer.py
@@ -0,0 +1,293 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for viewing Physics objects in the DM Control viewer."""
+
+import abc
+import enum
+import sys
+from typing import Dict, Optional
+
+import numpy as np
+
+from adept_envs.simulation import module
+
+# Default window dimensions.
+DEFAULT_WINDOW_WIDTH = 1024
+DEFAULT_WINDOW_HEIGHT = 768
+
+DEFAULT_WINDOW_TITLE = 'MuJoCo Viewer'
+
+_MAX_RENDERBUFFER_SIZE = 2048
+
+
+class RenderMode(enum.Enum):
+    """Rendering modes for offscreen rendering."""
+    RGB = 0
+    DEPTH = 1
+    SEGMENTATION = 2
+
+
+class Renderer(abc.ABC):
+    """Base interface for rendering simulations."""
+
+    def __init__(self, camera_settings: Optional[Dict] = None):
+        self._camera_settings = camera_settings
+
+    @abc.abstractmethod
+    def close(self):
+        """Cleans up any resources being used by the renderer."""
+
+    @abc.abstractmethod
+    def render_to_window(self):
+        """Renders the simulation to a window."""
+
+    @abc.abstractmethod
+    def render_offscreen(self,
+                         width: int,
+                         height: int,
+                         mode: RenderMode = RenderMode.RGB,
+                         camera_id: int = -1) -> np.ndarray:
+        """Renders the camera view as a NumPy array of pixels.
+
+        Args:
+            width: The viewport width (pixels).
+            height: The viewport height (pixels).
+            mode: The rendering mode.
+            camera_id: The ID of the camera to render from. By default, uses
+                the free camera.
+
+        Returns:
+            A NumPy array of the pixels.
+        """
+
+    def _update_camera(self, camera):
+        """Updates the given camera to move to the initial settings."""
+        if not self._camera_settings:
+            return
+        distance = self._camera_settings.get('distance')
+        azimuth = self._camera_settings.get('azimuth')
+        elevation = self._camera_settings.get('elevation')
+        lookat = self._camera_settings.get('lookat')
+
+        if distance is not None:
+            camera.distance = distance
+        if azimuth is not None:
+            camera.azimuth = azimuth
+        if elevation is not None:
+            camera.elevation = elevation
+        if lookat is not None:
+            camera.lookat[:] = lookat
+
+
+class MjPyRenderer(Renderer):
+    """Class for rendering mujoco_py simulations."""
+
+    def __init__(self, sim, **kwargs):
+        assert isinstance(sim, module.get_mujoco_py().MjSim), \
+            'MjPyRenderer takes a mujoco_py MjSim object.'
+        super().__init__(**kwargs)
+        self._sim = sim
+        self._onscreen_renderer = None
+        self._offscreen_renderer = None
+
+    def render_to_window(self):
+        """Renders the simulation to a window."""
+        if not self._onscreen_renderer:
+            self._onscreen_renderer = module.get_mujoco_py().MjViewer(self._sim)
+            self._update_camera(self._onscreen_renderer.cam)
+
+        self._onscreen_renderer.render()
+
+    def render_offscreen(self,
+                         width: int,
+                         height: int,
+                         mode: RenderMode = RenderMode.RGB,
+                         camera_id: int = -1) -> np.ndarray:
+        """Renders the camera view as a NumPy array of pixels.
+
+        Args:
+            width: The viewport width (pixels).
+            height: The viewport height (pixels).
+            mode: The rendering mode.
+            camera_id: The ID of the camera to render from. By default, uses
+                the free camera.
+
+        Returns:
+            A NumPy array of the pixels.
+        """
+        if not self._offscreen_renderer:
+            self._offscreen_renderer = module.get_mujoco_py() \
+                .MjRenderContextOffscreen(self._sim)
+
+        # Update the camera configuration for the free-camera.
+        if camera_id == -1:
+            self._update_camera(self._offscreen_renderer.cam)
+
+        self._offscreen_renderer.render(width, height, camera_id)
+        if mode == RenderMode.RGB:
+            data = self._offscreen_renderer.read_pixels(
+                width, height, depth=False)
+            # Original image is upside-down, so flip it
+            return data[::-1, :, :]
+        elif mode == RenderMode.DEPTH:
+            data = self._offscreen_renderer.read_pixels(
+                width, height, depth=True)[1]
+            # Original image is upside-down, so flip it
+            return data[::-1, :]
+        else:
+            raise NotImplementedError(mode)
+
+    def close(self):
+        """Cleans up any resources being used by the renderer."""
+
+
+class DMRenderer(Renderer):
+    """Class for rendering DM Control Physics objects."""
+
+    def __init__(self, physics, **kwargs):
+        assert isinstance(physics, module.get_dm_mujoco().Physics), \
+            'DMRenderer takes a DM Control Physics object.'
+        super().__init__(**kwargs)
+        self._physics = physics
+        self._window = None
+
+        # Set the camera to lookat the center of the geoms. (mujoco_py does
+        # this automatically.
+        if 'lookat' not in self._camera_settings:
+            self._camera_settings['lookat'] = [
+                np.median(self._physics.data.geom_xpos[:, i]) for i in range(3)
+            ]
+
+    def render_to_window(self):
+        """Renders the Physics object to a window.
+
+        The window continuously renders the Physics in a separate thread.
+
+        This function is a no-op if the window was already created.
+        """
+        if not self._window:
+            self._window = DMRenderWindow()
+            self._window.load_model(self._physics)
+            self._update_camera(self._window.camera)
+        self._window.run_frame()
+
+    def render_offscreen(self,
+                         width: int,
+                         height: int,
+                         mode: RenderMode = RenderMode.RGB,
+                         camera_id: int = -1) -> np.ndarray:
+        """Renders the camera view as a NumPy array of pixels.
+
+        Args:
+            width: The viewport width (pixels).
+            height: The viewport height (pixels).
+            mode: The rendering mode.
+            camera_id: The ID of the camera to render from. By default, uses
+                the free camera.
+
+        Returns:
+            A NumPy array of the pixels.
+        """
+        mujoco = module.get_dm_mujoco()
+        # TODO(michaelahn): Consider caching the camera.
+        camera = mujoco.Camera(
+            physics=self._physics,
+            height=height,
+            width=width,
+            camera_id=camera_id)
+
+        # Update the camera configuration for the free-camera.
+        if camera_id == -1:
+            self._update_camera(
+                camera._render_camera,  # pylint: disable=protected-access
+            )
+
+        image = camera.render(
+            depth=(mode == RenderMode.DEPTH),
+            segmentation=(mode == RenderMode.SEGMENTATION))
+        camera._scene.free()  # pylint: disable=protected-access
+        return image
+
+    def close(self):
+        """Cleans up any resources being used by the renderer."""
+        if self._window:
+            self._window.close()
+            self._window = None
+
+
+class DMRenderWindow:
+    """Class that encapsulates a graphical window."""
+
+    def __init__(self,
+                 width: int = DEFAULT_WINDOW_WIDTH,
+                 height: int = DEFAULT_WINDOW_HEIGHT,
+                 title: str = DEFAULT_WINDOW_TITLE):
+        """Creates a graphical render window.
+
+        Args:
+            width: The width of the window.
+            height: The height of the window.
+            title: The title of the window.
+        """
+        dmv = module.get_dm_viewer()
+        self._viewport = dmv.renderer.Viewport(width, height)
+        self._window = dmv.gui.RenderWindow(width, height, title)
+        self._viewer = dmv.viewer.Viewer(self._viewport, self._window.mouse,
+                                         self._window.keyboard)
+        self._draw_surface = None
+        self._renderer = dmv.renderer.NullRenderer()
+
+    @property
+    def camera(self):
+        return self._viewer._camera._camera
+
+    def close(self):
+        self._viewer.deinitialize()
+        self._renderer.release()
+        self._draw_surface.free()
+        self._window.close()
+
+    def load_model(self, physics):
+        """Loads the given Physics object to render."""
+        self._viewer.deinitialize()
+
+        self._draw_surface = module.get_dm_render().Renderer(
+            max_width=_MAX_RENDERBUFFER_SIZE, max_height=_MAX_RENDERBUFFER_SIZE)
+        self._renderer = module.get_dm_viewer().renderer.OffScreenRenderer(
+            physics.model, self._draw_surface)
+
+        self._viewer.initialize(physics, self._renderer, touchpad=False)
+
+    def run_frame(self):
+        """Renders one frame of the simulation.
+
+        NOTE: This is extremely slow at the moment.
+        """
+        glfw = module.get_dm_viewer().gui.glfw_gui.glfw
+        glfw_window = self._window._context.window
+        if glfw.window_should_close(glfw_window):
+            sys.exit(0)
+
+        self._viewport.set_size(*self._window.shape)
+        self._viewer.render()
+        pixels = self._renderer.pixels
+
+        with self._window._context.make_current() as ctx:
+            ctx.call(self._window._update_gui_on_render_thread, glfw_window,
+                     pixels)
+        self._window._mouse.process_events()
+        self._window._keyboard.process_events()
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/sim_robot.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/sim_robot.py
new file mode 100644
index 0000000..195d524
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/simulation/sim_robot.py
@@ -0,0 +1,135 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for loading MuJoCo models."""
+
+import os
+from typing import Dict, Optional
+
+from adept_envs.simulation import module
+from adept_envs.simulation.renderer import DMRenderer, MjPyRenderer, RenderMode
+
+
+class MujocoSimRobot:
+    """Class that encapsulates a MuJoCo simulation.
+
+    This class exposes methods that are agnostic to the simulation backend.
+    Two backends are supported:
+    1. mujoco_py - MuJoCo v1.50
+    2. dm_control - MuJoCo v2.00
+    """
+
+    def __init__(self,
+                 model_file: str,
+                 use_dm_backend: bool = False,
+                 camera_settings: Optional[Dict] = None):
+        """Initializes a new simulation.
+
+        Args:
+            model_file: The MuJoCo XML model file to load.
+            use_dm_backend: If True, uses DM Control's Physics (MuJoCo v2.0) as
+              the backend for the simulation. Otherwise, uses mujoco_py (MuJoCo
+              v1.5) as the backend.
+            camera_settings: Settings to initialize the renderer's camera. This
+              can contain the keys `distance`, `azimuth`, and `elevation`.
+        """
+        self._use_dm_backend = use_dm_backend
+
+        if not os.path.isfile(model_file):
+            raise ValueError(
+                '[MujocoSimRobot] Invalid model file path: {}'.format(
+                    model_file))
+
+        if self._use_dm_backend:
+            dm_mujoco = module.get_dm_mujoco()
+            if model_file.endswith('.mjb'):
+                self.sim = dm_mujoco.Physics.from_binary_path(model_file)
+            else:
+                self.sim = dm_mujoco.Physics.from_xml_path(model_file)
+            self.model = self.sim.model
+            self._patch_mjlib_accessors(self.model, self.sim.data)
+            self.renderer = DMRenderer(
+                self.sim, camera_settings=camera_settings)
+        else:  # Use mujoco_py
+            mujoco_py = module.get_mujoco_py()
+            self.model = mujoco_py.load_model_from_path(model_file)
+            self.sim = mujoco_py.MjSim(self.model)
+            self.renderer = MjPyRenderer(
+                self.sim, camera_settings=camera_settings)
+
+        self.data = self.sim.data
+
+    def close(self):
+        """Cleans up any resources being used by the simulation."""
+        self.renderer.close()
+
+    def save_binary(self, path: str):
+        """Saves the loaded model to a binary .mjb file."""
+        if os.path.exists(path):
+            raise ValueError(
+                '[MujocoSimRobot] Path already exists: {}'.format(path))
+        if not path.endswith('.mjb'):
+            path = path + '.mjb'
+        if self._use_dm_backend:
+            self.model.save_binary(path)
+        else:
+            with open(path, 'wb') as f:
+                f.write(self.model.get_mjb())
+
+    def get_mjlib(self):
+        """Returns an object that exposes the low-level MuJoCo API."""
+        if self._use_dm_backend:
+            return module.get_dm_mujoco().wrapper.mjbindings.mjlib
+        else:
+            return module.get_mujoco_py_mjlib()
+
+    def _patch_mjlib_accessors(self, model, data):
+        """Adds accessors to the DM Control objects to support mujoco_py API."""
+        assert self._use_dm_backend
+        mjlib = self.get_mjlib()
+
+        def name2id(type_name, name):
+            obj_id = mjlib.mj_name2id(model.ptr,
+                                      mjlib.mju_str2Type(type_name.encode()),
+                                      name.encode())
+            if obj_id < 0:
+                raise ValueError('No {} with name "{}" exists.'.format(
+                    type_name, name))
+            return obj_id
+
+        if not hasattr(model, 'body_name2id'):
+            model.body_name2id = lambda name: name2id('body', name)
+
+        if not hasattr(model, 'geom_name2id'):
+            model.geom_name2id = lambda name: name2id('geom', name)
+
+        if not hasattr(model, 'site_name2id'):
+            model.site_name2id = lambda name: name2id('site', name)
+
+        if not hasattr(model, 'joint_name2id'):
+            model.joint_name2id = lambda name: name2id('joint', name)
+
+        if not hasattr(model, 'actuator_name2id'):
+            model.actuator_name2id = lambda name: name2id('actuator', name)
+
+        if not hasattr(model, 'camera_name2id'):
+            model.camera_name2id = lambda name: name2id('camera', name)
+
+        if not hasattr(data, 'body_xpos'):
+            data.body_xpos = data.xpos
+
+        if not hasattr(data, 'body_xquat'):
+            data.body_xquat = data.xquat
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/config.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/config.py
new file mode 100644
index 0000000..e6aea13
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/config.py
@@ -0,0 +1,99 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+try:
+    import cElementTree as ET
+except ImportError:
+    try:
+        # Python 2.5 need to import a different module
+        import xml.etree.cElementTree as ET
+    except ImportError:
+        exit_err("Failed to import cElementTree from any known place")
+
+CONFIG_XML_DATA = """
+<config name='dClaw1 dClaw2'>
+  <limits low="1 2" high="2 3"/>
+  <scale joint="10 20"/>
+  <data type="test1 test2"/>
+</config>
+"""
+
+
+# Read config from root
+def read_config_from_node(root_node, parent_name, child_name, dtype=int):
+    # find parent
+    parent_node = root_node.find(parent_name)
+    if parent_node == None:
+        quit("Parent %s not found" % parent_name)
+
+    # get child data
+    child_data = parent_node.get(child_name)
+    if child_data == None:
+        quit("Child %s not found" % child_name)
+
+    config_val = np.array(child_data.split(), dtype=dtype)
+    return config_val
+
+
+# get config frlom file or string
+def get_config_root_node(config_file_name=None, config_file_data=None):
+    try:
+        # get root
+        if config_file_data is None:
+            config_file_content = open(config_file_name, "r")
+            config = ET.parse(config_file_content)
+            root_node = config.getroot()
+        else:
+            root_node = ET.fromstring(config_file_data)
+
+        # get root data
+        root_data = root_node.get('name')
+        root_name = np.array(root_data.split(), dtype=str)
+    except:
+        quit("ERROR: Unable to process config file %s" % config_file_name)
+
+    return root_node, root_name
+
+
+# Read config from config_file
+def read_config_from_xml(config_file_name, parent_name, child_name, dtype=int):
+    root_node, root_name = get_config_root_node(
+        config_file_name=config_file_name)
+    return read_config_from_node(root_node, parent_name, child_name, dtype)
+
+
+# tests
+if __name__ == '__main__':
+    print("Read config and parse -------------------------")
+    root, root_name = get_config_root_node(config_file_data=CONFIG_XML_DATA)
+    print("Root:name \t", root_name)
+    print("limit:low \t", read_config_from_node(root, "limits", "low", float))
+    print("limit:high \t", read_config_from_node(root, "limits", "high", float))
+    print("scale:joint \t", read_config_from_node(root, "scale", "joint",
+                                                  float))
+    print("data:type \t", read_config_from_node(root, "data", "type", str))
+
+    # read straight from xml (dum the XML data as duh.xml for this test)
+    root, root_name = get_config_root_node(config_file_name="duh.xml")
+    print("Read from xml --------------------------------")
+    print("limit:low \t", read_config_from_xml("duh.xml", "limits", "low",
+                                               float))
+    print("limit:high \t",
+          read_config_from_xml("duh.xml", "limits", "high", float))
+    print("scale:joint \t",
+          read_config_from_xml("duh.xml", "scale", "joint", float))
+    print("data:type \t", read_config_from_xml("duh.xml", "data", "type", str))
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/configurable.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/configurable.py
new file mode 100644
index 0000000..6685a50
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/configurable.py
@@ -0,0 +1,163 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import importlib
+import inspect
+import os
+
+from gym.envs.registration import registry as gym_registry
+
+
+def import_class_from_path(class_path):
+    """Given 'path.to.module:object', imports and returns the object."""
+    module_path, class_name = class_path.split(":")
+    module = importlib.import_module(module_path)
+    return getattr(module, class_name)
+
+
+class ConfigCache(object):
+    """Configuration class to store constructor arguments.
+
+    This is used to store parameters to pass to Gym environments at init time.
+    """
+
+    def __init__(self):
+        self._configs = {}
+        self._default_config = {}
+
+    def set_default_config(self, config):
+        """Sets the default configuration used for all RobotEnv envs."""
+        self._default_config = dict(config)
+
+    def set_config(self, cls_or_env_id, config):
+        """Sets the configuration for the given environment within a context.
+
+        Args:
+            cls_or_env_id (Class | str): A class type or Gym environment ID to
+                configure.
+            config (dict): The configuration parameters.
+        """
+        config_key = self._get_config_key(cls_or_env_id)
+        self._configs[config_key] = dict(config)
+
+    def get_config(self, cls_or_env_id):
+        """Returns the configuration for the given env name.
+
+        Args:
+            cls_or_env_id (Class | str): A class type or Gym environment ID to
+                get the configuration of.
+        """
+        config_key = self._get_config_key(cls_or_env_id)
+        config = dict(self._default_config)
+        config.update(self._configs.get(config_key, {}))
+        return config
+
+    def clear_config(self, cls_or_env_id):
+        """Clears the configuration for the given ID."""
+        config_key = self._get_config_key(cls_or_env_id)
+        if config_key in self._configs:
+            del self._configs[config_key]
+
+    def _get_config_key(self, cls_or_env_id):
+        if inspect.isclass(cls_or_env_id):
+            return cls_or_env_id
+        env_id = cls_or_env_id
+        assert isinstance(env_id, str)
+        if env_id not in gym_registry.env_specs:
+            raise ValueError("Unregistered environment name {}.".format(env_id))
+        entry_point = gym_registry.env_specs[env_id]._entry_point
+        if callable(entry_point):
+            return entry_point
+        else:
+            return import_class_from_path(entry_point)
+
+
+# Global robot config.
+global_config = ConfigCache()
+
+
+def configurable(config_id=None, pickleable=False, config_cache=global_config):
+    """Class decorator to allow injection of constructor arguments.
+
+    This allows constructor arguments to be passed via ConfigCache.
+    Example usage:
+
+    @configurable()
+    class A:
+        def __init__(b=None, c=2, d='Wow'):
+            ...
+
+    global_config.set_config(A, {'b': 10, 'c': 20})
+    a = A()      # b=10, c=20, d='Wow'
+    a = A(b=30)  # b=30, c=20, d='Wow'
+
+    Args:
+        config_id: ID of the config to use. This defaults to the class type.
+        pickleable: Whether this class is pickleable. If true, causes the pickle
+            state to include the config and constructor arguments.
+        config_cache: The ConfigCache to use to read config data from. Uses
+            the global ConfigCache by default.
+    """
+    def cls_decorator(cls):
+        assert inspect.isclass(cls)
+
+        # Overwrite the class constructor to pass arguments from the config.
+        base_init = cls.__init__
+        def __init__(self, *args, **kwargs):
+
+            config = config_cache.get_config(config_id or type(self))
+            # Allow kwargs to override the config.
+            kwargs = {**config, **kwargs}
+
+            # print('Initializing {} with params: {}'.format(type(self).__name__,
+                                                           # kwargs))
+
+            if pickleable:
+                self._pkl_env_args = args
+                self._pkl_env_kwargs = kwargs
+
+            base_init(self, *args, **kwargs)
+        cls.__init__ = __init__
+
+        # If the class is pickleable, overwrite the state methods to save
+        # the constructor arguments and config.
+        if pickleable:
+            # Use same pickle keys as gym.utils.ezpickle for backwards compat.
+            PKL_ARGS_KEY = '_ezpickle_args'
+            PKL_KWARGS_KEY = '_ezpickle_kwargs'
+
+            def __getstate__(self):
+                return {
+                    PKL_ARGS_KEY: self._pkl_env_args,
+                    PKL_KWARGS_KEY: self._pkl_env_kwargs,
+                }
+            cls.__getstate__ = __getstate__
+
+            def __setstate__(self, data):
+                saved_args = data[PKL_ARGS_KEY]
+                saved_kwargs = data[PKL_KWARGS_KEY]
+
+                # Override the saved state with the current config.
+                config = config_cache.get_config(config_id or type(self))
+                # Allow kwargs to override the config.
+                kwargs = {**saved_kwargs, **config}
+
+                inst = type(self)(*saved_args, **kwargs)
+                self.__dict__.update(inst.__dict__)
+            cls.__setstate__ = __setstate__
+
+        return cls
+    return cls_decorator
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/constants.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/constants.py
new file mode 100644
index 0000000..9c63fb7
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/constants.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+ENVS_ROOT_PATH = os.path.abspath(os.path.join(
+    os.path.dirname(os.path.abspath(__file__)),
+    "../../"))
+
+MODELS_PATH = os.path.abspath(os.path.join(ENVS_ROOT_PATH, "../adept_models/"))
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/parse_demos.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/parse_demos.py
new file mode 100644
index 0000000..01f9c36
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/parse_demos.py
@@ -0,0 +1,221 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import click
+import glob
+import pickle
+import numpy as np
+from parse_mjl import parse_mjl_logs, viz_parsed_mjl_logs
+from mjrl.utils.gym_env import GymEnv
+import adept_envs
+import time as timer
+import skvideo.io
+import gym
+
+# headless renderer
+render_buffer = []  # rendering buffer
+
+
+def viewer(env,
+           mode='initialize',
+           filename='video',
+           frame_size=(640, 480),
+           camera_id=0,
+           render=None):
+    if render == 'onscreen':
+        env.mj_render()
+
+    elif render == 'offscreen':
+
+        global render_buffer
+        if mode == 'initialize':
+            render_buffer = []
+            mode = 'render'
+
+        if mode == 'render':
+            curr_frame = env.render(mode='rgb_array')
+            render_buffer.append(curr_frame)
+
+        if mode == 'save':
+            skvideo.io.vwrite(filename, np.asarray(render_buffer))
+            print("\noffscreen buffer saved", filename)
+
+    elif render == 'None':
+        pass
+
+    else:
+        print("unknown render: ", render)
+
+
+# view demos (physics ignored)
+def render_demos(env, data, filename='demo_rendering.mp4', render=None):
+    FPS = 30
+    render_skip = max(1, round(1. / \
+        (FPS * env.sim.model.opt.timestep * env.frame_skip)))
+    t0 = timer.time()
+
+    viewer(env, mode='initialize', render=render)
+    for i_frame in range(data['ctrl'].shape[0]):
+        env.sim.data.qpos[:] = data['qpos'][i_frame].copy()
+        env.sim.data.qvel[:] = data['qvel'][i_frame].copy()
+        env.sim.forward()
+        if i_frame % render_skip == 0:
+            viewer(env, mode='render', render=render)
+            print(i_frame, end=', ', flush=True)
+
+    viewer(env, mode='save', filename=filename, render=render)
+    print("time taken = %f" % (timer.time() - t0))
+
+
+# playback demos and get data(physics respected)
+def gather_training_data(env, data, filename='demo_playback.mp4', render=None):
+    env = env.env
+    FPS = 30
+    render_skip = max(1, round(1. / \
+        (FPS * env.sim.model.opt.timestep * env.frame_skip)))
+    t0 = timer.time()
+
+    # initialize
+    env.reset()
+    init_qpos = data['qpos'][0].copy()
+    init_qvel = data['qvel'][0].copy()
+    act_mid = env.act_mid
+    act_rng = env.act_amp
+
+    # prepare env
+    env.sim.data.qpos[:] = init_qpos
+    env.sim.data.qvel[:] = init_qvel
+    env.sim.forward()
+    viewer(env, mode='initialize', render=render)
+
+    # step the env and gather data
+    path_obs = None
+    for i_frame in range(data['ctrl'].shape[0] - 1):
+        # Reset every time step
+        # if i_frame % 1 == 0:
+        #     qp = data['qpos'][i_frame].copy()
+        #     qv = data['qvel'][i_frame].copy()
+        #     env.sim.data.qpos[:] = qp
+        #     env.sim.data.qvel[:] = qv
+        #     env.sim.forward()
+
+        obs = env._get_obs()
+
+        # Construct the action
+        # ctrl = (data['qpos'][i_frame + 1][:9] - obs[:9]) / (env.skip * env.model.opt.timestep)
+        ctrl = (data['ctrl'][i_frame] - obs[:9])/(env.skip*env.model.opt.timestep)
+        act = (ctrl - act_mid) / act_rng
+        act = np.clip(act, -0.999, 0.999)
+        next_obs, reward, done, env_info = env.step(act)
+        if path_obs is None:
+            path_obs = obs
+            path_act = act
+        else:
+            path_obs = np.vstack((path_obs, obs))
+            path_act = np.vstack((path_act, act))
+
+        # render when needed to maintain FPS
+        if i_frame % render_skip == 0:
+            viewer(env, mode='render', render=render)
+            print(i_frame, end=', ', flush=True)
+
+    # finalize
+    if render:
+        viewer(env, mode='save', filename=filename, render=render)
+
+    t1 = timer.time()
+    print("time taken = %f" % (t1 - t0))
+
+    # note that <init_qpos, init_qvel> are one step away from <path_obs[0], path_act[0]>
+    return path_obs, path_act, init_qpos, init_qvel
+
+
+# MAIN =========================================================
+@click.command(help="parse tele-op demos")
+@click.option('--env', '-e', type=str, help='gym env name', required=True)
+@click.option(
+    '--demo_dir',
+    '-d',
+    type=str,
+    help='directory with tele-op logs',
+    required=True)
+@click.option(
+    '--skip',
+    '-s',
+    type=int,
+    help='number of frames to skip (1:no skip)',
+    default=1)
+@click.option('--graph', '-g', type=bool, help='plot logs', default=False)
+@click.option('--save_logs', '-l', type=bool, help='save logs', default=False)
+@click.option(
+    '--view', '-v', type=str, help='render/playback', default='render')
+@click.option(
+    '--render', '-r', type=str, help='onscreen/offscreen', default='onscreen')
+def main(env, demo_dir, skip, graph, save_logs, view, render):
+
+    gym_env = gym.make(env)
+    paths = []
+    print("Scanning demo_dir: " + demo_dir + "=========")
+    for ind, file in enumerate(glob.glob(demo_dir + "*.mjl")):
+
+        # process logs
+        print("processing: " + file, end=': ')
+
+        data = parse_mjl_logs(file, skip)
+
+        print("log duration %0.2f" % (data['time'][-1] - data['time'][0]))
+
+        # plot logs
+        if (graph):
+            print("plotting: " + file)
+            viz_parsed_mjl_logs(data)
+
+        # save logs
+        if (save_logs):
+            pickle.dump(data, open(file[:-4] + ".pkl", 'wb'))
+
+        # render logs to video
+        if view == 'render':
+            render_demos(
+                gym_env,
+                data,
+                filename=data['logName'][:-4] + '_demo_render.mp4',
+                render=render)
+
+        # playback logs and gather data
+        elif view == 'playback':
+            try:
+                obs, act,init_qpos, init_qvel = gather_training_data(gym_env, data,\
+                filename=data['logName'][:-4]+'_playback.mp4', render=render)
+            except Exception as e:
+                print(e)
+                continue
+            path = {
+                'observations': obs,
+                'actions': act,
+                'goals': obs,
+                'init_qpos': init_qpos,
+                'init_qvel': init_qvel
+            }
+            paths.append(path)
+            # accept = input('accept demo?')
+            # if accept == 'n':
+            #     continue
+            pickle.dump(path, open(demo_dir + env + str(ind) + "_path.pkl", 'wb'))
+            print(demo_dir + env + file + "_path.pkl")
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/quatmath.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/quatmath.py
new file mode 100644
index 0000000..bae531a
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_envs/adept_envs/utils/quatmath.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python
+#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+# For testing whether a number is close to zero
+_FLOAT_EPS = np.finfo(np.float64).eps
+_EPS4 = _FLOAT_EPS * 4.0
+
+
+def mulQuat(qa, qb):
+    res = np.zeros(4)
+    res[0] = qa[0]*qb[0] - qa[1]*qb[1] - qa[2]*qb[2] - qa[3]*qb[3]
+    res[1] = qa[0]*qb[1] + qa[1]*qb[0] + qa[2]*qb[3] - qa[3]*qb[2]
+    res[2] = qa[0]*qb[2] - qa[1]*qb[3] + qa[2]*qb[0] + qa[3]*qb[1]
+    res[3] = qa[0]*qb[3] + qa[1]*qb[2] - qa[2]*qb[1] + qa[3]*qb[0]
+    return res
+
+def negQuat(quat):
+    return np.array([quat[0], -quat[1], -quat[2], -quat[3]])
+
+def quat2Vel(quat, dt=1):
+    axis = quat[1:].copy()
+    sin_a_2 = np.sqrt(np.sum(axis**2))
+    axis = axis/(sin_a_2+1e-8)
+    speed = 2*np.arctan2(sin_a_2, quat[0])/dt
+    return speed, axis
+
+def quatDiff2Vel(quat1, quat2, dt):
+    neg = negQuat(quat1)
+    diff = mulQuat(quat2, neg)
+    return quat2Vel(diff, dt)
+
+
+def axis_angle2quat(axis, angle):
+    c = np.cos(angle/2)
+    s = np.sin(angle/2)
+    return np.array([c, s*axis[0], s*axis[1], s*axis[2]])
+
+def euler2mat(euler):
+    """ Convert Euler Angles to Rotation Matrix.  See rotation.py for notes """
+    euler = np.asarray(euler, dtype=np.float64)
+    assert euler.shape[-1] == 3, "Invalid shaped euler {}".format(euler)
+
+    ai, aj, ak = -euler[..., 2], -euler[..., 1], -euler[..., 0]
+    si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+    ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+    cc, cs = ci * ck, ci * sk
+    sc, ss = si * ck, si * sk
+
+    mat = np.empty(euler.shape[:-1] + (3, 3), dtype=np.float64)
+    mat[..., 2, 2] = cj * ck
+    mat[..., 2, 1] = sj * sc - cs
+    mat[..., 2, 0] = sj * cc + ss
+    mat[..., 1, 2] = cj * sk
+    mat[..., 1, 1] = sj * ss + cc
+    mat[..., 1, 0] = sj * cs - sc
+    mat[..., 0, 2] = -sj
+    mat[..., 0, 1] = cj * si
+    mat[..., 0, 0] = cj * ci
+    return mat
+
+
+def euler2quat(euler):
+    """ Convert Euler Angles to Quaternions.  See rotation.py for notes """
+    euler = np.asarray(euler, dtype=np.float64)
+    assert euler.shape[-1] == 3, "Invalid shape euler {}".format(euler)
+
+    ai, aj, ak = euler[..., 2] / 2, -euler[..., 1] / 2, euler[..., 0] / 2
+    si, sj, sk = np.sin(ai), np.sin(aj), np.sin(ak)
+    ci, cj, ck = np.cos(ai), np.cos(aj), np.cos(ak)
+    cc, cs = ci * ck, ci * sk
+    sc, ss = si * ck, si * sk
+
+    quat = np.empty(euler.shape[:-1] + (4,), dtype=np.float64)
+    quat[..., 0] = cj * cc + sj * ss
+    quat[..., 3] = cj * sc - sj * cs
+    quat[..., 2] = -(cj * ss + sj * cc)
+    quat[..., 1] = cj * cs - sj * sc
+    return quat
+
+
+def mat2euler(mat):
+    """ Convert Rotation Matrix to Euler Angles.  See rotation.py for notes """
+    mat = np.asarray(mat, dtype=np.float64)
+    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+    cy = np.sqrt(mat[..., 2, 2] * mat[..., 2, 2] + mat[..., 1, 2] * mat[..., 1, 2])
+    condition = cy > _EPS4
+    euler = np.empty(mat.shape[:-1], dtype=np.float64)
+    euler[..., 2] = np.where(condition,
+                             -np.arctan2(mat[..., 0, 1], mat[..., 0, 0]),
+                             -np.arctan2(-mat[..., 1, 0], mat[..., 1, 1]))
+    euler[..., 1] = np.where(condition,
+                             -np.arctan2(-mat[..., 0, 2], cy),
+                             -np.arctan2(-mat[..., 0, 2], cy))
+    euler[..., 0] = np.where(condition,
+                             -np.arctan2(mat[..., 1, 2], mat[..., 2, 2]),
+                             0.0)
+    return euler
+
+
+def mat2quat(mat):
+    """ Convert Rotation Matrix to Quaternion.  See rotation.py for notes """
+    mat = np.asarray(mat, dtype=np.float64)
+    assert mat.shape[-2:] == (3, 3), "Invalid shape matrix {}".format(mat)
+
+    Qxx, Qyx, Qzx = mat[..., 0, 0], mat[..., 0, 1], mat[..., 0, 2]
+    Qxy, Qyy, Qzy = mat[..., 1, 0], mat[..., 1, 1], mat[..., 1, 2]
+    Qxz, Qyz, Qzz = mat[..., 2, 0], mat[..., 2, 1], mat[..., 2, 2]
+    # Fill only lower half of symmetric matrix
+    K = np.zeros(mat.shape[:-2] + (4, 4), dtype=np.float64)
+    K[..., 0, 0] = Qxx - Qyy - Qzz
+    K[..., 1, 0] = Qyx + Qxy
+    K[..., 1, 1] = Qyy - Qxx - Qzz
+    K[..., 2, 0] = Qzx + Qxz
+    K[..., 2, 1] = Qzy + Qyz
+    K[..., 2, 2] = Qzz - Qxx - Qyy
+    K[..., 3, 0] = Qyz - Qzy
+    K[..., 3, 1] = Qzx - Qxz
+    K[..., 3, 2] = Qxy - Qyx
+    K[..., 3, 3] = Qxx + Qyy + Qzz
+    K /= 3.0
+    # TODO: vectorize this -- probably could be made faster
+    q = np.empty(K.shape[:-2] + (4,))
+    it = np.nditer(q[..., 0], flags=['multi_index'])
+    while not it.finished:
+        # Use Hermitian eigenvectors, values for speed
+        vals, vecs = np.linalg.eigh(K[it.multi_index])
+        # Select largest eigenvector, reorder to w,x,y,z quaternion
+        q[it.multi_index] = vecs[[3, 0, 1, 2], np.argmax(vals)]
+        # Prefer quaternion with positive w
+        # (q * -1 corresponds to same rotation as q)
+        if q[it.multi_index][0] < 0:
+            q[it.multi_index] *= -1
+        it.iternext()
+    return q
+
+
+def quat2euler(quat):
+    """ Convert Quaternion to Euler Angles.  See rotation.py for notes """
+    return mat2euler(quat2mat(quat))
+
+
+def quat2mat(quat):
+    """ Convert Quaternion to Euler Angles.  See rotation.py for notes """
+    quat = np.asarray(quat, dtype=np.float64)
+    assert quat.shape[-1] == 4, "Invalid shape quat {}".format(quat)
+
+    w, x, y, z = quat[..., 0], quat[..., 1], quat[..., 2], quat[..., 3]
+    Nq = np.sum(quat * quat, axis=-1)
+    s = 2.0 / Nq
+    X, Y, Z = x * s, y * s, z * s
+    wX, wY, wZ = w * X, w * Y, w * Z
+    xX, xY, xZ = x * X, x * Y, x * Z
+    yY, yZ, zZ = y * Y, y * Z, z * Z
+
+    mat = np.empty(quat.shape[:-1] + (3, 3), dtype=np.float64)
+    mat[..., 0, 0] = 1.0 - (yY + zZ)
+    mat[..., 0, 1] = xY - wZ
+    mat[..., 0, 2] = xZ + wY
+    mat[..., 1, 0] = xY + wZ
+    mat[..., 1, 1] = 1.0 - (xX + zZ)
+    mat[..., 1, 2] = yZ - wX
+    mat[..., 2, 0] = xZ - wY
+    mat[..., 2, 1] = yZ + wX
+    mat[..., 2, 2] = 1.0 - (xX + yY)
+    return np.where((Nq > _FLOAT_EPS)[..., np.newaxis, np.newaxis], mat, np.eye(3))
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/.gitignore b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/.gitignore
new file mode 100644
index 0000000..b8e8667
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/.gitignore
@@ -0,0 +1,8 @@
+# General
+.DS_Store
+*.swp
+*.profraw
+
+# Editors
+.vscode
+.idea
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/CONTRIBUTING.public.md b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/CONTRIBUTING.public.md
new file mode 100644
index 0000000..db177d4
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/CONTRIBUTING.public.md
@@ -0,0 +1,28 @@
+# How to Contribute
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Code reviews
+
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/LICENSE b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/LICENSE
new file mode 100644
index 0000000..9a644b9
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/LICENSE
@@ -0,0 +1,203 @@
+Copyright 2019 The DSuite Authors.  All rights reserved.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/README.public.md b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/README.public.md
new file mode 100644
index 0000000..da3fa5d
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/README.public.md
@@ -0,0 +1,10 @@
+# D'Suite Scenes
+
+This repository is based on a collection of [MuJoCo](http://www.mujoco.org/) simulation
+scenes and common assets for D'Suite environments. Based on code in the ROBEL suite 
+https://github.com/google-research/robel
+
+## Disclaimer
+
+This is not an official Google product.
+
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/__init__.py b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_asset.xml
new file mode 100644
index 0000000..9e1e39d
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_asset.xml
@@ -0,0 +1,21 @@
+<mujocoinclude>
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <texture name="T_wall_marble" type="cube" height="1" width="1" file="../kitchen/textures/marble1.png"/>
+        <texture name="T_wall_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png"/>
+
+        <material name="wall_white" rgba="1 1 1 1" reflectance="0" shininess="0"/>
+        <material name="wall_blue" rgba=".66 .7 .8 1" reflectance="0" shininess="0"/>
+        <material name="wall_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0"/>
+    </asset>
+    <default>
+        <default class="backwall">
+            <geom conaffinity="0" contype="0" group="1" material="wall_blue"/>
+            <default class="wall_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="wall_collision_blue"/>
+            </default>
+        </default>
+    </default>
+
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_chain.xml
new file mode 100644
index 0000000..b76b0da
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/backwall_chain.xml
@@ -0,0 +1,16 @@
+<mujocoinclude>
+    <body name="wallroot" childclass="backwall" pos="0.059 0.584 1.587">
+        <geom pos="-.11 0.06 .6" size="1.26 0.07 0.6" type="box"/>
+        <geom material="wall_white" pos="-0.11 0.06 0.145" size="1.26 0.08 0.145" type="box"/>
+
+        <geom class="wall_collision" pos="-.11 0.06 .6" size="1.26 0.07 0.6" type="box" mass=".2"/>
+        <geom class="wall_collision" pos="-0.11 0.06 0.145" size="1.26 0.08 0.145" type="box" mass=".2"/>
+    </body>
+    <body euler="0 0 1.57" name="wall2" childclass="backwall" pos="-1.305 -0.546 1.587">
+        <geom pos="0.044 .06 0.6" size="1.079 0.07 0.6" type="box"/>
+        <geom material="wall_white" pos="0.044 .06 0.145" size="1.079 0.08 0.145" type="box"/>
+
+        <geom class="wall_collision" pos="0.044 .06 0.6" size="1.079 0.07 0.6" type="box" mass=".2"/>
+        <geom class="wall_collision" pos="0.044 .06 0.145" size="1.079 0.08 0.145" type="box" mass=".2"/>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_asset.xml
new file mode 100644
index 0000000..c3e28f8
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_asset.xml
@@ -0,0 +1,32 @@
+<mujocoinclude>
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <mesh file="../kitchen/meshes/cabinetdrawer.stl" name="cabinetdrawer"/>
+        <mesh file="../kitchen/meshes/cabinethandle.stl" name="cabinethandle"/>
+        <mesh file="../kitchen/meshes/cabinetbase.stl" name="cabinetbase"/>
+        <mesh file="../kitchen/meshes/countertop.stl" name="countertop"/>
+        <mesh file="../kitchen/meshes/faucet.stl" name="faucet"/>
+
+        <texture name="T_counter_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png" />
+        <texture name="T_counter_marble" type="cube" height="1" width="1" file="../kitchen/textures/marble1.png" />
+
+        <material name="counter_metal" rgba="1 1 1 1" texture="T_counter_metal" texrepeat="3 3" reflectance=".5" shininess="1" texuniform="false" />
+        <material name="counter_marble"  texture="T_counter_marble" texrepeat="1 1" reflectance=".2" shininess="1" texuniform="false" />
+        <material name="counter_black" rgba=".2 .2 .2 1" reflectance="1" shininess="1"  />
+        <material name="counter_blue" rgba=".46 .5 .6 1" reflectance="1" shininess="1"  />
+        <material name="counter_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0" />
+
+    </asset>
+
+    <default>
+        <default class="counters">
+            <joint damping="2" frictionloss="2" armature=".01" limited="true"/>
+            <geom conaffinity="0" contype="0" group="1" material="counter_metal" type="mesh"/>
+            <default class="counter_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="counter_collision_blue"/>
+            </default>
+        </default>
+    </default>
+
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_chain.xml
new file mode 100644
index 0000000..83e1791
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/counters_chain.xml
@@ -0,0 +1,48 @@
+<mujocoinclude>
+    <body name="counters" childclass="counters">
+        <geom material="counter_blue" mesh="cabinetbase"/>
+        <geom material="counter_marble" mesh="countertop"/>
+        <geom material="counter_marble" pos="-0.855 -0.508 1.57" size="0.463 1.08 0.03" type="box"/>
+        <geom mesh="faucet" pos="0.904 -0.172 1.511"/>
+        <geom pos="0.9 0.085 1.83" size="0.035 0.05" type="cylinder"/>
+        <geom pos="0.9 0.085 1.92" size="0.025 0.08" type="capsule"/>
+        <geom euler="1.57 0 0" pos="0.9 0.2 2" size="0.025 0.11" type="capsule"/>
+        <geom pos="0.9 0.317 1.8" size="0.025 0.2" type="capsule"/>
+        <geom pos="0.9 0.317 1.67" size="0.032 0.07" type="cylinder"/>
+        <geom pos="0.947 0.318 1.68" euler="0 1.57 0" size="0.029 0.03" type="cylinder"/>
+        <geom pos="0.99 0.318 1.68" euler="0 1.57 0" size="0.035 0.013 " type="cylinder"/>
+        <geom euler="1.57 0 0" pos="0.99 0.26 1.68" size="0.01 0.03" type="capsule"/>
+        <geom euler="0 1.57 0" pos="0.909 -0.695 1.39" size="0.022 0.28" type="cylinder"/>
+        <geom euler="1.57 0 0" pos="0.71 -0.665 1.39" size="0.018 0.03" type="cylinder"/>
+        <geom euler="1.57 0 0" pos="1.108 -0.665 1.39" size="0.018 0.03" type="cylinder"/>
+        <geom euler="0 1.57 0" pos="0.909 -0.695 1.14" size="0.022 0.28" type="cylinder"/>
+        <geom euler="1.57 0 0" pos="0.71 -0.665 1.14" size="0.018 0.03" type="cylinder"/>
+        <geom euler="1.57 0 0" pos="1.108 -0.665 1.14" size="0.018 0.03" type="cylinder"/>
+
+        <geom class="counter_collision" euler="0 1.57 0" pos="0.909 -0.695 1.39" size="0.022 0.28" type="cylinder" mass=".1"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="0.71 -0.665 1.39" size="0.018 0.03" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="1.108 -0.665 1.39" size="0.018 0.03" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" euler="0 1.57 0" pos="0.909 -0.695 1.14" size="0.022 0.28" type="cylinder" mass=".1"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="0.71 -0.665 1.14" size="0.018 0.03" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="1.108 -0.665 1.14" size="0.018 0.03" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" pos="-0.86 -0.5 0.78" size="0.46 1.07 0.777" type="box" mass="5"/>
+        <geom class="counter_collision" pos="0.907 -0.045 0.71" size="0.304 0.606 0.71" type="box" mass="3"/>
+        <geom class="counter_collision" pos="-0.855 -0.508 1.57" size="0.463 1.08 0.03" type="box" mass=".5"/>
+        <geom class="counter_collision" pos="1.159 -0.045 1.57" size="0.051 0.611 0.03" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.649 -0.045 1.57" size="0.051 0.611 0.03" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.904 0.4 1.57" size="0.204 0.165 0.03" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.904 -0.617 1.57" size="0.204 0.039 0.03" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="1.158 -0.04 1.47" size="0.05 0.61 0.076" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.652 -0.04 1.47" size="0.05 0.61 0.076" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.904 0.401 1.47" size="0.204 0.166 0.076" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.904 -0.611 1.47" size="0.206 0.034 0.076" type="box" mass=".2"/>
+        <geom class="counter_collision" pos="0.9 0.085 1.83" size="0.035 0.05" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" pos="0.9 0.085 1.92" size="0.025 0.08" type="capsule" mass=".02"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="0.9 0.2 2" size="0.025 0.11" type="capsule" mass=".02"/>
+        <geom class="counter_collision" pos="0.9 0.317 1.8" size="0.025 0.2" type="capsule" mass=".02"/>
+        <geom class="counter_collision" pos="0.9 0.317 1.67" size="0.032 0.07" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" pos="0.947 0.318 1.68" euler="0 1.57 0" size="0.029 0.03" type="cylinder" mass=".02"/>
+        <geom class="counter_collision" pos="0.99 0.318 1.68" euler="0 1.57 0" size="0.035 0.013 " type="cylinder" mass=".02"/>
+        <geom class="counter_collision" euler="1.57 0 0" pos="0.99 0.26 1.68" size="0.01 0.03" type="capsule" mass=".02"/>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_asset.xml
new file mode 100644
index 0000000..8202810
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_asset.xml
@@ -0,0 +1,25 @@
+<mujocoinclude>
+
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <texture name="T_hinge_wood" type="cube" height="1" width="1" file="../kitchen/textures/wood1.png"/>
+        <texture name="T_hinge_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png"/>
+
+        <material name="M_hinge_wood" texture="T_hinge_wood" texrepeat="3 3" reflectance="0.7" shininess=".4" texuniform="false"/>
+        <material name="M_hinge_metal" texture="T_hinge_metal" texrepeat="3 3" reflectance="0.7" shininess=".4" texuniform="false"/>
+        <material name="M_hinge_blue" rgba=".46 .5 .6 1" reflectance="0.7" shininess=".4"/>
+        <material name="hinge_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0"/>
+    </asset>
+
+    <default>
+        <default class="hingecabinet">
+            <joint damping="2" frictionloss="2" armature=".01" limited="true"/>
+            <geom conaffinity="0" contype="0" group="1" material="M_hinge_wood" type="mesh"/>
+            <default class="hinge_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="hinge_collision_blue" solimp=".8 .9 .01" solref=".02 1"/>
+            </default>
+        </default>
+    </default>
+
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_chain.xml
new file mode 100644
index 0000000..7f935d3
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/hingecabinet_chain.xml
@@ -0,0 +1,49 @@
+<mujocoinclude>
+    <body name="hingecab" childclass="hingecabinet">
+        <geom material="M_hinge_blue" size="0.04 0.3 0.2" type="box"/>
+        <geom material="M_hinge_blue" pos="0.38 0 0" size="0.02 0.3 0.2" type="box"/>
+        <geom material="M_hinge_blue" pos="-0.38 0 0" size="0.02 0.3 0.2" type="box"/>
+        <geom material="M_hinge_blue" pos="-0.2 0 0.18" size="0.16 0.3 0.02" type="box"/>
+        <geom material="M_hinge_blue" pos="-0.2 0 -0.18" size="0.16 0.3 0.02" type="box"/>
+        <geom material="M_hinge_blue" pos="0.2 0 -0.18" size="0.16 0.3 0.02" type="box"/>
+        <geom material="M_hinge_blue" pos="0.2 0 0.18" size="0.16 0.3 0.02" type="box"/>
+        <geom material="M_hinge_blue" pos="-0.2 0.28 0" size="0.16 0.02 0.16" type="box"/>
+        <geom material="M_hinge_blue" pos="0.2 0.28 0" size="0.16 0.02 0.16" type="box"/>
+
+        <geom class="hinge_collision" size="0.04 0.3 0.2" type="box" mass=".3"/>
+        <geom class="hinge_collision" pos="0.38 0 0" size="0.02 0.3 0.2" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="-0.38 0 0" size="0.02 0.3 0.2" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="-0.2 0 0.18" size="0.16 0.3 0.02" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="-0.2 0 -0.18" size="0.16 0.3 0.02" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="0.2 0 -0.18" size="0.16 0.3 0.02" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="0.2 0 0.18" size="0.16 0.3 0.02" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="-0.2 0.28 0" size="0.16 0.02 0.16" type="box" mass=".2"/>
+        <geom class="hinge_collision" pos="0.2 0.28 0" size="0.16 0.02 0.16" type="box" mass=".2"/>
+        <body name="hingeleftdoor" pos="-0.38 -0.32 0">
+            <joint axis="0 0 1" name="leftdoorhinge" range="-1.57 0"/>
+            <geom material="M_hinge_metal" pos="0.302 -0.128 0" size="0.022 0.16" type="cylinder"/>
+            <geom material="M_hinge_metal" pos="0.302 -0.061 0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder"/>
+            <geom material="M_hinge_metal" pos="0.302 -0.061 -0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder"/>
+            <geom material="M_hinge_blue" pos="0.184 -.015 0" size="0.193 0.03 0.2" type="box"/>
+
+            <geom class="hinge_collision" pos="0.184 -.015 0" size="0.193 0.03 0.2" type="box" mass=".2"/>
+            <geom class="hinge_collision" pos="0.302 -0.128 0" size="0.022 0.16" type="cylinder" mass=".1"/>
+            <geom class="hinge_collision" pos="0.302 -0.061 0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder" mass=".02"/>
+            <geom class="hinge_collision" pos="0.302 -0.061 -0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder" mass=".02"/>
+            <site type="sphere" name="hinge_site1" pos="0.302 -0.128 0" size=".01" group="3" rgba="1 0 0 1"/>
+        </body>
+        <body name="hingerightdoor" pos="0.38 -0.32 0">
+            <joint axis="0 0 1" name="rightdoorhinge" range="0 1.57"/>
+            <geom material="M_hinge_blue" pos="-0.185 -.015 0" size="0.193 0.03 0.2" type="box"/>
+            <geom material="M_hinge_metal" pos="-0.302 -0.128 0" size="0.022 0.16" type="cylinder"/>
+            <geom material="M_hinge_metal" pos="-0.302 -0.061 -0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder"/>
+            <geom material="M_hinge_metal" pos="-0.302 -0.061 0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder"/>
+
+            <geom class="hinge_collision" pos="-0.302 -0.128 0" size="0.022 0.16" type="cylinder" mass=".1"/>
+            <geom class="hinge_collision" pos="-0.302 -0.061 -0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder" mass=".02"/>
+            <geom class="hinge_collision" pos="-0.302 -0.061 0.114" euler="1.57 0 0" size="0.019 0.053 0.02" type="cylinder" mass=".02"/>
+            <geom class="hinge_collision" pos="-0.185 -.015 0" size="0.193 0.03 0.2" type="box" mass=".2"/>
+            <site type="sphere" name="hinge_site2" pos="-0.302 -0.128 0" size=".01" group="3" rgba="0 1 0 1"/>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_asset.xml
new file mode 100644
index 0000000..dbe8e9b
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_asset.xml
@@ -0,0 +1,23 @@
+<mujocoinclude>
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <mesh file="../kitchen/meshes/kettle.stl" name="kettle"/>
+        <mesh file="../kitchen/meshes/kettlehandle.stl" name="kettlehandle"/>
+
+        <texture name="T_kettle_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png" />
+        <texture name="T_kettle_wood" type="cube" height="1" width="1" file="../kitchen/textures/wood1.png" />
+
+        <material name="kettle_wood" rgba="1 1 1 1" texture="T_kettle_wood" texrepeat="3 3" reflectance="1" shininess="1" texuniform="false" />
+        <material name="kettle_metal" rgba="1 1 1 1" texture="T_kettle_metal" texrepeat="3 3" reflectance="1" shininess="1" texuniform="false" />
+        <material name="kettle_white" rgba=".9 .9 .9 1" reflectance="1" shininess="1" />
+        <material name="kettle_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0" />
+    </asset>
+    <default class="kettle">
+        <joint damping="2" frictionloss="2" armature=".01" limited="true" />
+        <geom conaffinity="0" contype="0" group="1" material="kettle_white" type="mesh"/>
+        <default class="kettle_collision">
+            <geom conaffinity="1" condim="4" contype="1" group="4" margin="0.001" material="kettle_collision_blue" solimp=".8 .9 .01" solref=".02 1" type="mesh"/>
+        </default>
+    </default>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_chain.xml
new file mode 100644
index 0000000..fb5f224
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/kettle_chain.xml
@@ -0,0 +1,20 @@
+<mujocoinclude>
+
+    <body name="kettleroot" childclass="kettle">
+        <geom mesh="kettle"/>
+        <geom material="kettle_wood" euler="0 1.57 0" pos="0 0 0.259" size="0.032 0.1" type="capsule"/>
+        <geom material="kettle_wood" euler="0 1.57 0" pos="0 0 0.13" size="0.011" type="sphere"/>
+        <geom euler="0 1.57 0" pos="0 0 0.259" size="0.02 0.115" type="capsule"/>
+        <geom pos="0.092 0 0.186" size="0.02 0.07" type="capsule"/>
+        <geom pos="-0.092 0 0.185" size="0.02 0.07" type="capsule"/>
+        <geom material="kettle_wood" pos="-0.092 0 0.22" size="0.022 0.015" type="capsule"/>
+        <geom material="kettle_wood" pos="0.092 0 0.22" size="0.022 0.015" type="capsule"/>
+
+        <geom class="kettle_collision" euler="0 1.57 0" pos="0 0 0.259" size="0.032 0.1" type="capsule" mass='.02'/>
+        <geom class="kettle_collision" pos="0.092 0 0.18" size="0.023 0.06" type="capsule" mass='.02'/>
+        <geom class="kettle_collision" pos="-0.092 0 0.18" size="0.023 0.06" type="capsule" mass='.02'/>
+        <geom class="kettle_collision" euler="0 2.25 0" pos="-0.126 0 0.07" size="0.031 0.05" type="cylinder" mass='.02'/>
+        <geom class="kettle_collision" pos="0 0 0.058" size="0.122 0.122 0.058" type="box" mass='.8'/>
+        <site type="sphere" name="kettle_site" pos="0 0 0.259" size=".01" group="3" rgba="1 0 0 1"/>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_asset.xml
new file mode 100644
index 0000000..cc651ee
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_asset.xml
@@ -0,0 +1,31 @@
+<mujocoinclude>
+
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <mesh file="../kitchen/meshes/micro.stl" name="micro"/>
+        <mesh file="../kitchen/meshes/microdoor.stl" name="microdoor"/>
+        <mesh file="../kitchen/meshes/microbutton.stl" name="microbutton"/>
+        <mesh file="../kitchen/meshes/microfeet.stl" name="microfeet"/>
+        <mesh file="../kitchen/meshes/microhandle.stl" name="microhandle"/>
+        <mesh file="../kitchen/meshes/microwindow.stl" name="microwindow"/>
+
+        <texture name="T_micro_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png"/>
+
+        <material name="micro_metal" rgba="1 1 1 1" texture="T_micro_metal" texrepeat="3 3" reflectance="1" shininess="1" texuniform="false"/>
+        <material name="micro_black" rgba=".2 .2 .2 1" reflectance="1" shininess="1"/>
+        <material name="micro_window" rgba=".4 .4 .4 .25" reflectance="1" shininess="1"/>
+        <material name="micro_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0"/>
+    </asset>
+
+    <default>
+        <default class="microwave">
+            <joint damping="2" frictionloss="2" armature=".01" limited="true"/>
+            <geom conaffinity="0" contype="0" group="1" material="micro_black" type="mesh"/>
+            <default class="micro_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="micro_collision_blue" solimp=".8 .9 .01" solref=".02 1"/>
+            </default>
+        </default>
+    </default>
+
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_chain.xml
new file mode 100644
index 0000000..fd88ab3
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/microwave_chain.xml
@@ -0,0 +1,28 @@
+<mujocoinclude>
+    <body name="microroot" childclass="microwave">
+        <geom mesh="micro"/>
+        <geom material="micro_metal" mesh="microbutton"/>
+        <geom material="micro_metal" mesh="microfeet"/>
+
+        <geom class="micro_collision" pos="-0.316 0.023 0.187" size="0.029 0.199 0.187" type="box" mass=".5"/>
+        <geom class="micro_collision" pos="0.236 0.023 0.187" size="0.109 0.199 0.187" type="box" mass=".5"/>
+        <geom class="micro_collision" pos="-0.081 0.191 0.187" size="0.207 0.03 0.187" type="box" mass=".5"/>
+        <geom class="micro_collision" pos="-0.08 -0.007 0.355" size="0.207 0.169 0.019" type="box" mass=".5"/>
+        <geom class="micro_collision" pos="-0.08 -0.008 0.024" size="0.207 0.168 0.024" type="box" mass=".5"/>
+        <geom class="micro_collision" pos="0.26 -0.197 0.187" size="0.085 0.024 0.187" type="box" mass=".5"/>
+        <body name="microdoorroot" pos="-0.345 -0.176 0.192">
+            <joint axis="0 0 1" limited="true" name="microjoint" range="-2.094 0"/>
+            <geom mesh="microdoor" pos="0.345 0.176 -0.192"/>
+            <geom material="micro_window" mesh="microwindow" pos="0.345 0.176 -0.192"/>
+            <geom material="micro_metal" pos="0.475 -0.108 0" size="0.02 0.13" type="capsule"/>
+            <geom material="micro_metal" euler="1.57 0 0" pos="0.475 -0.075 .13" size="0.02 0.03" type="capsule"/>
+            <geom material="micro_metal" euler="1.57 0 0" pos="0.475 -0.075 -.13" size="0.02 0.03" type="capsule"/>
+
+            <geom class="micro_collision" pos="0.475 -0.108 0" size="0.02 0.13" type="capsule" mass="0.020"/>
+            <geom class="micro_collision" euler="1.57 0 0" pos="0.475 -0.075 .13" size="0.02 0.03" type="capsule" mass="0.020"/>
+            <geom class="micro_collision" euler="1.57 0 0" pos="0.475 -0.075 -.13" size="0.02 0.03" type="capsule" mass="0.020"/>
+            <geom class="micro_collision" pos="0.259 -0.026 0" size="0.259 0.024 0.185" type="box" mass="0.20"/>
+            <site type="sphere" name="microhandle_site" pos="0.475 -0.108 0" size=".01" group="3" rgba="1 1 0 1"/>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_asset.xml
new file mode 100644
index 0000000..ef1184e
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_asset.xml
@@ -0,0 +1,50 @@
+<mujocoinclude>
+
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <mesh file="../kitchen/meshes/hood.stl" name="hood"/>
+        <mesh file="../kitchen/meshes/lightswitch.stl" name="lightswitch"/>
+        <mesh file="../kitchen/meshes/lightswitchbase.stl" name="lightswitchbase"/>
+        <mesh file="../kitchen/meshes/knob.stl" name="knob"/>
+        <mesh file="../kitchen/meshes/stoverim.stl" name="stoverim"/>
+        <mesh file="../kitchen/meshes/burnerplate.stl" name="burnerplate"/>
+        <mesh file="../kitchen/meshes/ovenhandle.stl" name="ovenhandle"/>
+        <mesh file="../kitchen/meshes/oven.stl" name="oven"/>
+        <mesh file="../kitchen/meshes/oventop.stl" name="oventop"/>
+        <mesh file="../kitchen/meshes/ovenwindow.stl" name="ovenwindow"/>
+
+        <texture name="T_oven_wood" type="cube" height="1" width="1" file="../kitchen/textures/wood1.png"/>
+        <texture name="T_oven_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png"/>
+
+        <material name="oven_wood" texture="T_oven_wood" texrepeat="3 3" reflectance="0.7" shininess=".4" texuniform="false"/>
+        <material name="oven_metal" rgba="1 1 1 1" texture="T_oven_metal" texrepeat="3 3" reflectance="1" shininess="1" texuniform="false"/>
+        <material name="oven_black" rgba=".15 .15 .15 1" reflectance=".2" shininess=".2" />
+        <material name="oven_burner" rgba="2 0 0 1" reflectance=".2" shininess=".2" />
+        <material name="oven_block" rgba=".1 .1 .1 1"/>
+        <material name="oven_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0"/>
+    </asset>
+    <default>
+        <default class="oven">
+            <joint armature="0.001" damping="2" limited="true"/>
+            <geom conaffinity="0" contype="0" group="1" material="oven_metal" type="mesh"/>
+            <default class="ovenlight" >
+                <light directional="false" castshadow="true" attenuation="0.03 0.03 0.03" cutoff="100" exponent="25" diffuse=".7 .65 .65" specular=".3 .3 .3"/>
+            </default>
+            <default class="oven_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="oven_collision_blue" type="mesh"/>
+            </default>
+        </default>
+    </default>
+
+    <equality>
+        <joint polycoef="0 174 0 0 0" joint1="knob_Joint_1" joint2="burner_Joint_1"/>
+        <joint polycoef="0 174 0 0 0" joint1="knob_Joint_2" joint2="burner_Joint_2"/>
+        <joint polycoef="0 174 0 0 0" joint1="knob_Joint_3" joint2="burner_Joint_3"/>
+        <joint polycoef="0 174 0 0 0" joint1="knob_Joint_4" joint2="burner_Joint_4"/>
+        <joint polycoef="0 14 0 0 0" joint1="lightswitch_joint" joint2="light_joint"/>
+    </equality>
+
+        <equality>
+    </equality>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_chain.xml
new file mode 100644
index 0000000..f96f8c7
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/oven_chain.xml
@@ -0,0 +1,115 @@
+<mujocoinclude>
+
+    <light class="ovenlight" name="ovenlight" pos="0 .2 2.25" dir="0 -.02 -.1" attenuation="0.05 0.05 0.05" cutoff="75" diffuse=".7 .65 .65" specular=".3 .3 .3"/>
+
+    <body name="ovenroot" childclass="oven" pos="0.115 -0.2921 0.9834">
+        <geom material="oven_black" mesh="burnerplate" pos="-0.24 -0.119 0.629"/>
+        <geom material="oven_black" mesh="burnerplate" pos="-0.237 0.322 0.629"/>
+        <geom material="oven_black" mesh="burnerplate" pos="0.204 0.322 0.629"/>
+        <geom material="oven_black" mesh="burnerplate" pos="0.206 -0.119 0.629"/>
+        <geom material="oven_black" euler="1.57 0 0" pos="-.215 -0.36 -0.682" size="0.018 0.03" type="cylinder"/>
+        <geom material="oven_black" euler="1.57 0 0" pos=".184 -0.36 -0.682" size="0.018 0.03" type="cylinder"/>
+        <geom material="oven_black" euler="0 1.57 0" pos="-0.015 -0.39 -0.682" size="0.022 0.28" type="cylinder"/>
+        <geom material="oven_black" euler="1.57 0 0" pos="-.215 -0.36 0.254" size="0.018 0.03" type="cylinder"/>
+        <geom material="oven_black" euler="1.57 0 0" pos=".184 -0.36 0.254" size="0.018 0.03" type="cylinder"/>
+        <geom material="oven_black" euler="0 1.57 0" pos="-0.015 -0.39 0.254" size="0.022 0.28" type="cylinder"/>
+        <geom material="oven_black" mesh="oventop" pos="-0.017 0.275 0.607"/>
+        <geom material="oven_black" mesh="ovenwindow" pos="0.9793 0.2921 -1.1877"/>
+        <geom material="oven_black" pos="-0.011 -.327 -.05" size="0.35 0.016 0.22" type="box"/>
+        <geom mesh="stoverim" pos="0.203 0.323 0.619"/>
+        <geom mesh="stoverim" pos="-0.24 -0.12 0.619"/>
+        <geom mesh="stoverim" pos="-0.237 0.323 0.619"/>
+        <geom mesh="stoverim" pos="0.207 -0.12 0.619"/>
+        <geom pos="-0.017 -.326 .455" size="0.5 0.016 0.12" type="box"/>
+        <geom pos="-0.017 -.326 -.795" size="0.5 0.016 0.185" type="box"/>
+        <geom pos="-0.017 -.326 -.14" size="0.5 0.016 0.465" type="box"/>
+        <geom pos="-0.017 0.295 -0.2" size="0.5 0.602 0.78" type="box"/>
+
+        <geom class="oven_collision" pos="-0.017 0.28 -0.175" size="0.5 0.625 0.81" type="box" mass="5"/>
+        <geom class="oven_collision" euler="1.57 0 0" pos="-.215 -0.36 -0.682" size="0.018 0.03" type="cylinder" mass=".2"/>
+        <geom class="oven_collision" euler="1.57 0 0" pos=".184 -0.36 -0.682" size="0.018 0.03" type="cylinder" mass=".2"/>
+        <geom class="oven_collision" euler="0 1.57 0" pos="-0.015 -0.39 -0.682" size="0.022 0.28" type="cylinder" mass=".2"/>
+        <geom class="oven_collision" euler="1.57 0 0" pos="-.215 -0.36 0.254" size="0.018 0.03" type="cylinder" mass=".2"/>
+        <geom class="oven_collision" euler="1.57 0 0" pos=".184 -0.36 0.254" size="0.018 0.03" type="cylinder" mass=".2"/>
+        <geom class="oven_collision" euler="0 1.57 0" pos="-0.015 -0.39 0.254" size="0.022 0.28" type="cylinder" mass=".2"/>
+        <body name="knob 1" euler="1.57 0 0" pos="-0.148 0.22 1.243">
+            <joint name="knob_Joint_1" axis="0 0 1" type="hinge" limited="true" range="-1.57 0"/>
+            <geom type="box" pos="0 0 .038" size=".014 .048 .018"/>
+            <geom type="cylinder" pos="0 0 .013" size=".05 .008"/>
+            <geom type="cylinder" pos="0 0.048 .037" size=".014 .018" rgba="1 0 0 1"/>
+
+            <geom class="oven_collision" type="box" pos="0 0 .038" size=".014 .048 .018" mass=".01"/>
+            <geom class="oven_collision" type="cylinder" pos="0 0 .013" size=".05 .008" mass=".01"/>
+            <site type="sphere" name="knob1_site" pos="0 0 .038" size=".01" group="3" rgba="1 1 0 1"/>
+        </body>
+        <body name="Burner 1" pos="0.206 -0.119 0.61">
+            <inertial pos="0 0 0" mass=".01" diaginertia="0.001 0.001 0.001"/>
+            <joint name="burner_Joint_1" axis="0 0 -1" type="slide" limited="true" range="-.009 0"/>
+            <geom material="oven_burner" size="0.1 0.01" type="cylinder"/>
+        </body>
+        <body name="knob 2" euler="1.57 0 0" pos="-0.271 0.22 1.243">
+            <joint name="knob_Joint_2" axis="0 0 1" type="hinge" limited="true" range="-1.57 0"/>
+            <geom type="box" pos="0 0 .038" size=".014 .048 .018"/>
+            <geom type="cylinder" pos="0 0 .013" size=".05 .008"/>
+            <geom type="cylinder" pos="0 0.048 .037" size=".014 .018" rgba="1 0 0 1"/>
+
+            <geom class="oven_collision" type="box" pos="0 0 .038" size=".014 .048 .018" mass=".01"/>
+            <geom class="oven_collision" type="cylinder" pos="0 0 .013" size=".05 .008" mass=".01"/>
+            <site type="sphere" name="knob2_site" pos="0 0 .038" size=".01" group="3" rgba="0 0 1 1"/>
+        </body>
+        <body name="Burner 2" pos="-0.24 -0.119 0.61">
+            <inertial pos="0 0 0" mass=".01" diaginertia="0.001 0.001 0.001"/>
+            <joint name="burner_Joint_2" axis="0 0 -1" type="slide" limited="true" stiffness="1" range="-.009 0"/>
+            <geom material="oven_burner" size="0.1 0.01" type="cylinder" group="1"/>
+        </body>
+        <body name="knob 3" euler="1.57 0 0" pos="-0.148 0.22 1.357">
+            <joint name="knob_Joint_3" axis="0 0 1" type="hinge" limited="true" range="-1.57 0"/>
+            <geom type="box" pos="0 0 .038" size=".014 .048 .018"/>
+            <geom type="cylinder" pos="0 0 .013" size=".05 .008"/>
+            <geom type="cylinder" pos="0 0.048 .037" size=".014 .018" rgba="1 0 0 1"/>
+
+            <geom class="oven_collision" type="box" pos="0 0 .038" size=".014 .048 .018" mass=".01"/>
+            <geom class="oven_collision" type="cylinder" pos="0 0 .013" size=".05 .008" mass=".01"/>
+            <site type="sphere" name="knob3_site" pos="0 0 .038" size=".01" group="3" rgba="0 1 0 1"/>
+        </body>
+        <body name="Burner 3" pos="0.204 0.322 0.61">
+            <inertial pos="0 0 0" mass=".01" diaginertia="0.001 0.001 0.001"/>
+            <joint name="burner_Joint_3" axis="0 0 -1" type="slide" limited="true" stiffness="1" range="-.009 0"/>
+            <geom material="oven_burner" size="0.1 0.01" type="cylinder" group="1"/>
+        </body>
+        <body name="knob 4" euler="1.57 0 0" pos="-0.271 0.22 1.357">
+            <joint name="knob_Joint_4" axis="0 0 1" type="hinge" limited="true" range="-1.57 0"/>
+            <geom type="box" pos="0 0 .038" size=".014 .048 .018"/>
+            <geom type="cylinder" pos="0 0 .013" size=".05 .008"/>
+            <geom type="cylinder" pos="0 0.048 .037" size=".014 .018" rgba="1 0 0 1"/>
+
+            <geom class="oven_collision" type="box" pos="0 0 .038" size=".014 .048 .018" mass=".01"/>
+            <geom class="oven_collision" type="cylinder" pos="0 0 .013" size=".05 .008" mass=".01"/>
+            <site type="sphere" name="knob4_site" pos="0 0 .038" size=".01" group="3" rgba="1 0 0 1"/>
+        </body>
+        <body name="Burner 4" pos="-0.237 0.322 0.61">
+            <inertial pos="0 0 0" mass=".01" diaginertia="0.001 0.001 0.001"/>
+            <joint name="burner_Joint_4" axis="0 0 -1" type="slide" limited="true" stiffness="1" range="-.009 0"/>
+            <geom material="oven_burner" size="0.1 0.01" type="cylinder" group="1"/>
+        </body>
+    </body>
+    <body name="hoodroot" pos="0.1 0.188 2.33" childclass="oven">
+        <geom material="oven_black" mesh="hood" pos="-0.1 -0.2896 -2.329"/>
+        <geom class="oven_collision" pos="0 0.073 -0.046" size="0.502 0.336 0.12" type="box" mass="2"/>
+        <body name="lightswitchbaseroot" pos="-0.4 -0.2473 -0.05" >
+            <geom mesh="lightswitchbase" pos="0 -0.0123 0" euler="-1.57 0 0"/>
+            <body name="lightswitchroot">
+                <inertial pos="-0.0046 0.35 0.0131" mass=".1" diaginertia="0.001 0.001 0.001"/>
+                <joint axis="0 0 1" limited="true" name="lightswitch_joint" frictionloss="1" range="-.7 0"/>
+                <geom pos="0.0252 -0.06 0" euler="-1.57 -.4 0" size="0.02 0.035" type="capsule"/>
+                <geom class="oven_collision" euler="-1.57 -.4 0" pos="0.0263 -0.065 0" size="0.021 0.03" type="capsule" mass=".01"/>
+                <site type="sphere" name="light_site" pos="0.0315 -0.075 0" size=".01" group="3" rgba="1 0 0 1"/>
+            </body>
+        </body>
+        <body name="lightblock_hinge" pos="-0.0044 -0.3 -0.1361">
+            <inertial pos="-0.0046 0.35 0.0131" mass=".01" diaginertia="0.001 0.001 0.001"/>
+            <joint axis="0 0 -1" type="slide" limited="true" name="light_joint" frictionloss="1" range="-.05 0"/>
+            <geom material="oven_block" pos="-0.008 0.4 -0.01" size="0.4 0.3 0.015" type="box"/>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_asset.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_asset.xml
new file mode 100644
index 0000000..f0f370a
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_asset.xml
@@ -0,0 +1,22 @@
+<mujocoinclude>
+
+    <compiler inertiafromgeom="auto" inertiagrouprange="4 4" angle="radian"/>
+
+    <asset>
+        <texture name="T_slide_metal" type="cube" height="1" width="1" file="../kitchen/textures/metal1.png"/>
+
+        <material name="M_slide_metal" texture="T_slide_metal" texrepeat="3 3" reflectance="0.7" shininess=".4" texuniform="false"/>
+        <material name="M_slide_blue" rgba=".46 .5 .6 1" reflectance="0.7" shininess=".4"/>
+        <material name="slide_collision_blue" rgba="0.3 0.3 1.0 0.5" shininess="0" specular="0"/>
+    </asset>
+    <default>
+        <default class="slidecabinet">
+            <joint damping="2" frictionloss="2" armature=".01" limited="true"/>
+            <geom conaffinity="0" contype="0" group="1" material="M_slide_blue" type="mesh"/>
+            <default class="slide_collision">
+                <geom conaffinity="1" condim="3" contype="0" group="4" margin="0.001" material="slide_collision_blue"/>
+            </default>
+        </default>
+    </default>
+
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_chain.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_chain.xml
new file mode 100644
index 0000000..5aa820e
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/assets/slidecabinet_chain.xml
@@ -0,0 +1,28 @@
+<mujocoinclude>
+    <body name="slide" childclass="slidecabinet">
+        <geom pos="-0.225 0 -0.18" size="0.223 0.3 0.02" type="box"/>
+        <geom pos="0.224 0 0" size="0.226 0.3 0.2" type="box"/>
+        <geom pos="-0.225 0 0.18" size="0.223 0.3 0.02" type="box"/>
+        <geom pos="-0.426 0 0" size="0.022 0.3 0.16" type="box"/>
+        <geom pos="-0.2 0.276 0.0" size="0.21 0.024 0.16" type="box"/>
+
+        <geom class="slide_collision" pos="-0.225 0 -0.18" size="0.223 0.3 0.02" type="box" mass=".2"/>
+        <geom class="slide_collision" pos="0.224 0 0" size="0.226 0.3 0.2" type="box" mass="1"/>
+        <geom class="slide_collision" pos="-0.225 0 0.18" size="0.223 0.3 0.02" type="box" mass=".2"/>
+        <geom class="slide_collision" pos="-0.426 0 0" size="0.022 0.3 0.16" type="box" mass=".2"/>
+        <geom class="slide_collision" pos="-0.2 0.276 0" size="0.2 0.024 0.16" type="box" mass=".2"/>
+        <body name="slidelink" pos="-0.225 -0.32 0">
+            <joint name="slidedoor_joint" axis="1 0 0" type="slide" range="0 .44"/>
+            <geom material="M_slide_metal" euler="1.57 0 0" pos="-0.183 -0.06 -0.114" size="0.019 0.053 0.019" type="cylinder"/>
+            <geom material="M_slide_metal" euler="1.57 0 0" pos="-0.183 -0.06 0.114" size="0.019 0.053 0.019" type="cylinder"/>
+            <geom material="M_slide_metal" pos="-0.183 -0.123 0" size="0.022 0.159" type="cylinder"/>
+            <geom pos="0 -.02 0" size="0.225 0.03 0.195" type="box"/>
+
+            <geom class="slide_collision" pos="0 -.02 0" size="0.225 0.03 0.195" type="box" mass=".2"/>
+            <geom class="slide_collision" euler="1.57 0 0" pos="-0.183 -0.06 -0.114" size="0.019 0.053 0.019" type="cylinder" mass=".02"/>
+            <geom class="slide_collision" euler="1.57 0 0" pos="-0.183 -0.06 0.114" size="0.019 0.053 0.019" type="cylinder" mass=".02"/>
+            <geom class="slide_collision" pos="-0.183 -0.123 0" size="0.022 0.159" type="cylinder" mass=".1"/>
+            <site type="sphere" name="slide_site" pos="-0.183 -0.123 0" size=".01" group="3"/>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/counters.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/counters.xml
new file mode 100644
index 0000000..69fb889
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/counters.xml
@@ -0,0 +1,14 @@
+<mujoco model="counters">
+    <compiler angle="radian" meshdir="" texturedir=""/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/counters_asset.xml"/>
+
+    <worldbody>
+
+        <body>
+		    <include file="../kitchen/assets/counters_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/hingecabinet.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/hingecabinet.xml
new file mode 100644
index 0000000..89b8db4
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/hingecabinet.xml
@@ -0,0 +1,14 @@
+<mujoco model="hinge cabinet">
+    <compiler angle="radian"/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/hingecabinet_asset.xml"/>
+
+    <worldbody>
+
+        <body pos="0 0 .25">
+		    <include file="../kitchen/assets/hingecabinet_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kettle.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kettle.xml
new file mode 100644
index 0000000..a27e978
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kettle.xml
@@ -0,0 +1,14 @@
+<mujoco model="kettle">
+    <compiler angle="radian" meshdir="" texturedir=""/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/kettle_asset.xml"/>
+
+    <worldbody>
+
+        <body>
+		    <include file="../kitchen/assets/kettle_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kitchen.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kitchen.xml
new file mode 100644
index 0000000..34813ca
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/kitchen.xml
@@ -0,0 +1,38 @@
+<mujoco model="kitchen">
+    <compiler angle="radian" inertiafromgeom='auto' inertiagrouprange='4 5'/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/oven_asset.xml"/>
+    <include file="../kitchen/assets/counters_asset.xml"/>
+    <include file="../kitchen/assets/backwall_asset.xml"/>
+    <include file="../kitchen/assets/slidecabinet_asset.xml"/>
+    <include file="../kitchen/assets/hingecabinet_asset.xml"/>
+    <include file="../kitchen/assets/microwave_asset.xml"/>
+    <include file="../kitchen/assets/kettle_asset.xml"/>
+    <worldbody>
+        <body name="kitchen" pos="0 0 0">
+            <!--<body name="counters1" pos="0 0 0">-->
+                <!--<include file="../kitchen/assets/counters_chain.xml"/>-->
+            <!--</body>-->
+            <body name="oven" pos="0 0 0">
+                <include file="../kitchen/assets/oven_chain.xml"/>
+            </body>
+            <body name="backwall" pos="0 0 0">
+                <include file="../kitchen/assets/backwall_chain.xml"/>
+            </body>
+            <body name="slidecabinet" pos="0.098 0.28 2.61">
+                <include file="../kitchen/assets/slidecabinet_chain.xml"/>
+            </body>
+            <body name="hingecabinet1" pos="-1.0 -1.0 2.6" euler="0 0 1.57">
+                <include file="../kitchen/assets/hingecabinet_chain.xml"/>
+            </body>
+            <body name="microwave" pos="-0.892 -0.96 2.025" euler="0 0 1.57">
+                <include file="../kitchen/assets/microwave_chain.xml"/>
+            </body>
+        </body>
+        <body name="kettle" pos="-0.169 0 1.626">
+            <freejoint/>
+            <include file="../kitchen/assets/kettle_chain.xml"/>
+        </body>
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate.stl
new file mode 100644
index 0000000..46740b5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate_mesh.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate_mesh.stl
new file mode 100644
index 0000000..46740b5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/burnerplate_mesh.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetbase.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetbase.stl
new file mode 100644
index 0000000..580a51c
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetbase.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetdrawer.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetdrawer.stl
new file mode 100644
index 0000000..0932eeb
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinetdrawer.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinethandle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinethandle.stl
new file mode 100644
index 0000000..960cd39
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/cabinethandle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/countertop.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/countertop.stl
new file mode 100644
index 0000000..16410d1
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/countertop.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/faucet.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/faucet.stl
new file mode 100644
index 0000000..55404af
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/faucet.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/handle2.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/handle2.stl
new file mode 100644
index 0000000..09b7833
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/handle2.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingecabinet.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingecabinet.stl
new file mode 100644
index 0000000..6693df8
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingecabinet.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingedoor.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingedoor.stl
new file mode 100644
index 0000000..feecf23
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingedoor.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingehandle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingehandle.stl
new file mode 100644
index 0000000..fb85521
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hingehandle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hood.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hood.stl
new file mode 100644
index 0000000..6c0e3ad
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/hood.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettle.stl
new file mode 100644
index 0000000..0e8d9e5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettlehandle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettlehandle.stl
new file mode 100644
index 0000000..83baef3
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/kettlehandle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/knob.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/knob.stl
new file mode 100644
index 0000000..90180b5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/knob.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitch.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitch.stl
new file mode 100644
index 0000000..fa956c9
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitch.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitchbase.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitchbase.stl
new file mode 100644
index 0000000..e64b059
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/lightswitchbase.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/micro.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/micro.stl
new file mode 100644
index 0000000..6ed6802
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/micro.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microbutton.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microbutton.stl
new file mode 100644
index 0000000..2d7f1e3
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microbutton.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microdoor.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microdoor.stl
new file mode 100644
index 0000000..fa8c548
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microdoor.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microefeet.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microefeet.stl
new file mode 100644
index 0000000..98e7069
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microefeet.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microfeet.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microfeet.stl
new file mode 100644
index 0000000..a516299
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microfeet.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microhandle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microhandle.stl
new file mode 100644
index 0000000..ed31a70
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microhandle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microwindow.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microwindow.stl
new file mode 100644
index 0000000..07d3c85
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/microwindow.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oven.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oven.stl
new file mode 100644
index 0000000..04d3b66
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oven.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenhandle.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenhandle.stl
new file mode 100644
index 0000000..30250a7
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenhandle.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oventop.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oventop.stl
new file mode 100644
index 0000000..fb6664d
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/oventop.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenwindow.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenwindow.stl
new file mode 100644
index 0000000..f0205a5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/ovenwindow.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidecabinet.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidecabinet.stl
new file mode 100644
index 0000000..6249a14
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidecabinet.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidedoor.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidedoor.stl
new file mode 100644
index 0000000..307d6c5
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/slidedoor.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/stoverim.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/stoverim.stl
new file mode 100644
index 0000000..0f76bfc
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/stoverim.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/tile.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/tile.stl
new file mode 100644
index 0000000..12639ce
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/tile.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/wall.stl b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/wall.stl
new file mode 100644
index 0000000..f5562e2
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/meshes/wall.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/microwave.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/microwave.xml
new file mode 100644
index 0000000..3946632
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/microwave.xml
@@ -0,0 +1,14 @@
+<mujoco model="microwave">
+    <compiler angle="radian" />
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/microwave_asset.xml"/>
+
+    <worldbody>
+
+        <body>
+		    <include file="../kitchen/assets/microwave_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/oven.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/oven.xml
new file mode 100644
index 0000000..6891385
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/oven.xml
@@ -0,0 +1,14 @@
+<mujoco model="Oven">
+    <compiler angle="radian" meshdir="" texturedir=""/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/oven_asset.xml"/>
+
+    <worldbody>
+
+        <body>
+		    <include file="../kitchen/assets/oven_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/slidecabinet.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/slidecabinet.xml
new file mode 100644
index 0000000..78fa599
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/slidecabinet.xml
@@ -0,0 +1,14 @@
+<mujoco model="slide">
+    <compiler meshdir="" texturedir=""/>
+    <include file='../scenes/basic_scene.xml'/>
+    <include file="../kitchen/assets/slidecabinet_asset.xml"/>
+
+    <worldbody>
+
+        <body pos="0 0 .25">
+		    <include file="../kitchen/assets/slidecabinet_chain.xml"/>
+        </body>
+
+    </worldbody>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/marble1.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/marble1.png
new file mode 100644
index 0000000..a72c67e
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/marble1.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/metal1.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/metal1.png
new file mode 100644
index 0000000..f16a314
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/metal1.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/tile1.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/tile1.png
new file mode 100644
index 0000000..3e859b4
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/tile1.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/wood1.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/wood1.png
new file mode 100644
index 0000000..8d2b69b
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/kitchen/textures/wood1.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/basic_scene.xml b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/basic_scene.xml
new file mode 100644
index 0000000..8d5356d
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/basic_scene.xml
@@ -0,0 +1,26 @@
+<mujocoinclude>
+    <asset>
+        <texture name="skybox" type="skybox" builtin="gradient" rgb1=".08 .09 .10" rgb2="0 0 0"
+               width="800" height="800" mark="random" markrgb=".8 .8 .8"/>
+
+
+        <!-- <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" width="512" height="512" markrgb='.1 .1 .1' mark='random' random='.05'/> -->
+        <texture name="texplane" type="2d" height="1" width="1" file="../scenes/textures/white_marble_tile2.png" />
+        <!-- <texture name="texplane" type="2d" height="1" width="1" file="../scenes/textures/floor/floor18.png" mark='edge' markrgb='0 0 0'/> -->
+        <!-- <texture name="texplane" type="2d" height="1" width="1" file="../scenes/textures/floor/floor6.png" mark='edge' markrgb='0 0 0'/> -->
+        <material name='MatPlane' reflectance='0.05' texture="texplane" texrepeat="4 4" texuniform="true"/>
+    </asset>
+
+    <visual>
+        <quality shadowsize="4048"/>
+    </visual>
+
+    <worldbody>
+        <light directional='true' diffuse='.3 .3 .3' specular='0.3 0.3 0.3' pos='-1 -1 1' dir='1 1 -1'/>
+        <light directional='true' diffuse='.3 .3 .3' specular='0.3 0.3 0.3' pos='1 -1 1' dir='-1 1 -1'/>
+        <light directional='true' diffuse='.3 .3 .3' specular='0.3 0.3 0.3' pos='0 1 1' dir='0 -1 -1'/>
+        <geom name='floor' pos='0 0 0' size='5 5 .1' conaffinity='1' contype='1' type='plane' material="MatPlane" condim='3'/>
+        <site name='xaxis' pos='.5 0 0' size='.005 .5' type='capsule' rgba='1 0 0 .25' euler='0 1.57 0' group='3'/>
+        <site name='yaxis' pos='0 .5 0' size='.005 .5' type='capsule' rgba='0 1 0 .25' euler='1.57 0 0' group='3'/>
+    </worldbody>
+</mujocoinclude>
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile.png
new file mode 100644
index 0000000..c3f397a
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile2.png b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile2.png
new file mode 100644
index 0000000..00033fc
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/adept_models/scenes/textures/white_marble_tile2.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/LICENSE b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/LICENSE
new file mode 100644
index 0000000..261eeb9
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/LICENSE
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/README.md b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/README.md
new file mode 100644
index 0000000..d96eaf1
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/README.md
@@ -0,0 +1,9 @@
+# franka
+Franka panda mujoco models
+
+
+# Environment
+
+franka_panda.xml           |  comming soon
+:-------------------------:|:-------------------------:
+![Alt text](franka_panda.png?raw=false "sawyer") |  comming soon
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator0.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator0.xml
new file mode 100644
index 0000000..86ee47c
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator0.xml
@@ -0,0 +1,17 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<mujocoinclude>
+	<actuator>
+        <position name="panda0_joint1" joint="panda0_joint1" class="panda" kp="870" forcerange="-87 87" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.1750" -->
+        <position name="panda0_joint2" joint="panda0_joint2" class="panda" kp="870" forcerange="-87 87" ctrlrange="-1.8326 1.8326"/> <!-- velocity="2.1750" -->
+        <position name="panda0_joint3" joint="panda0_joint3" class="panda" kp="870" forcerange="-87 87" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.1750" -->
+        <position name="panda0_joint4" joint="panda0_joint4" class="panda" kp="870" forcerange="-87 87" ctrlrange="-3.1416 0.0"/> <!-- velocity="2.1750" -->
+        <position name="panda0_joint5" joint="panda0_joint5" class="panda" kp="120" forcerange="-12 12" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.6100" -->
+        <position name="panda0_joint6" joint="panda0_joint6" class="panda" kp="120" forcerange="-12 12" ctrlrange="-3.7525 2.1817"/> <!-- velocity="2.6100" -->
+        <position name="panda0_joint7" joint="panda0_joint7" class="panda" kp="120" forcerange="-12 12" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.9671" -->
+        <position name="r_gripper_finger_joint" joint="panda0_finger_joint1" class="panda_finger" kp="500" forcerange="-70 70" ctrlrange="0 0.04"/> <!-- velocity=".2" -->
+        <position name="l_gripper_finger_joint" joint="panda0_finger_joint2" class="panda_finger" kp="500" forcerange="-70 70" ctrlrange="0 0.04"/> <!-- velocity=".2" -->
+    </actuator>
+</mujocoinclude>
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator1.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator1.xml
new file mode 100644
index 0000000..a8eda4e
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/actuator1.xml
@@ -0,0 +1,13 @@
+<mujocoinclude>
+	<actuator>
+        <position name="panda1_joint1" joint="panda1_joint1" class="panda" kp="870" forcerange="-87 87" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.1750" -->
+        <position name="panda1_joint2" joint="panda1_joint2" class="panda" kp="870" forcerange="-87 87" ctrlrange="-1.8326 1.8326"/> <!-- velocity="2.1750" -->
+        <position name="panda1_joint3" joint="panda1_joint3" class="panda" kp="870" forcerange="-87 87" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.1750" -->
+        <position name="panda1_joint4" joint="panda1_joint4" class="panda" kp="870" forcerange="-87 87" ctrlrange="-3.1416 0.0"/> <!-- velocity="2.1750" -->
+        <position name="panda1_joint5" joint="panda1_joint5" class="panda" kp="120" forcerange="-12 12" ctrlrange="-2.9671 2.9671"/> <!-- velocity="2.6100" -->
+        <position name="panda1_joint6" joint="panda1_joint6" class="panda" kp="120" forcerange="-12 12" ctrlrange="-0.0873 3.8223"/> <!-- velocity="2.6100" -->
+        <position name="panda1_joint7" joint="panda1_joint7" class="panda" kp="120" forcerange="-12 12" ctrlrange="-0.0873 3.8223"/> <!-- velocity="2.9671" -->
+        <position name="panda1_leftfinger" joint="panda1_finger_joint1" class="panda" kp="20" forcerange="-20 20" ctrlrange="0 0.04"/> <!-- velocity=".2" -->
+        <position name="panda1_rightfinger" joint="panda1_finger_joint2" class="panda" kp="20" forcerange="-20 20" ctrlrange="0 0.04"/> <!-- velocity=".2" -->
+    </actuator>
+</mujocoinclude>
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/assets.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/assets.xml
new file mode 100644
index 0000000..4f2cded
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/assets.xml
@@ -0,0 +1,63 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+<mujocoinclude>
+    <compiler angle="radian"/>
+    <!-- <option timestep="0.002" noslip_iterations="20"/> -->
+    <option timestep="0.002"/>
+    <size nuser_actuator="5"/>
+
+    <asset>
+        <mesh name="link0_col" file="../../third_party/franka/meshes/collision/link0.stl"/>
+        <mesh name="link1_col" file="../../third_party/franka/meshes/collision/link1.stl"/>
+        <mesh name="link2_col" file="../../third_party/franka/meshes/collision/link2.stl"/>
+        <mesh name="link3_col" file="../../third_party/franka/meshes/collision/link3.stl"/>
+        <mesh name="link4_col" file="../../third_party/franka/meshes/collision/link4.stl"/>
+        <mesh name="link5_col" file="../../third_party/franka/meshes/collision/link5.stl"/>
+        <mesh name="link6_col" file="../../third_party/franka/meshes/collision/link6.stl"/>
+        <mesh name="link7_col" file="../../third_party/franka/meshes/collision/link7.stl"/>
+        <mesh name="hand_col" file="../../third_party/franka/meshes/collision/hand.stl"/>
+        <mesh name="finger_col" file="../../third_party/franka/meshes/collision/finger.stl" scale='1.75 1.0 1.75'/>
+        <mesh name="link0_viz" file="../../third_party/franka/meshes/visual/link0.stl"/>
+        <mesh name="link1_viz" file="../../third_party/franka/meshes/visual/link1.stl"/>
+        <mesh name="link2_viz" file="../../third_party/franka/meshes/visual/link2.stl"/>
+        <mesh name="link3_viz" file="../../third_party/franka/meshes/visual/link3.stl"/>
+        <mesh name="link4_viz" file="../../third_party/franka/meshes/visual/link4.stl"/>
+        <mesh name="link5_viz" file="../../third_party/franka/meshes/visual/link5.stl"/>
+        <mesh name="link6_viz" file="../../third_party/franka/meshes/visual/link6.stl"/>
+        <mesh name="link7_viz" file="../../third_party/franka/meshes/visual/link7.stl"/>
+        <mesh name="hand_viz" file="../../third_party/franka/meshes/visual/hand.stl"/>
+        <mesh name="finger_viz" file="../../third_party/franka/meshes/collision/finger.stl" scale='1.75 1.0 1.75'/>
+
+    </asset>
+
+    <default>
+        <default class="panda">
+            <joint pos="0 0 0" axis="0 0 1" limited="true"/>
+            <position forcelimited="true" ctrllimited="true" user="1002 40 2001 -0.005 0.005"/>
+            <default class="panda_viz">
+                <geom contype="0" conaffinity="0" group="0" type="mesh" rgba=".95 .99 .92 1" mass="0"/>
+            </default>
+
+            <default class="panda_col">
+                <geom contype="1" conaffinity="1" group="3" type="mesh" rgba=".5 .6 .7 1"/>
+            </default>
+            <default class="panda_arm">
+                <joint damping="100"/>
+            </default>
+             <default class="panda_forearm">
+                <joint damping="10"/>
+            </default>
+             <default class="panda_finger">
+                <joint damping="100" armature='5'/>
+                <geom friction="1 0.5 0.0001" solref="0.01 1" solimp="0.8 0.9 0.001" margin="0.001" user="0" rgba="0.5 0.6 0.7 .4" contype="1" conaffinity="0" condim="6" group="3" />
+                <position user="1002 40 2001 -0.0001 0.0001"/>
+            </default>
+        </default>
+
+        <default class="panda_overlay">
+            <joint limited="false" damping="1000" armature="1" frictionloss="10"/>
+            <geom contype="0" conaffinity="0" group="2" type="mesh" rgba=".42 0.42 0.42 .5"/>
+        </default>
+    </default>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/basic_scene.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/basic_scene.xml
new file mode 100644
index 0000000..4bb7e70
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/basic_scene.xml
@@ -0,0 +1,14 @@
+<mujocoinclude>
+	<asset>
+        <texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2"
+                width="512" height="512"/>
+        <material name="MatGnd" reflectance="0.5" texture="texplane" texrepeat="1 1" texuniform="true"/>
+    </asset>
+
+    <worldbody>
+        <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="1  1 3" dir="-1 -1 -3"/>
+        <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="1 -1 3" dir="-1 1 -3"/>
+        <light directional="false" diffuse=".8 .8 .8" specular="0.3 0.3 0.3" pos="-1 0 3" dir="1 0 -3" />
+        <geom name="ground" pos="0 0 0" size="5 5 10" material="MatGnd" type="plane" contype="1" conaffinity="1"/>
+    </worldbody>
+</mujocoinclude>
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0.xml
new file mode 100644
index 0000000..e2e53a7
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0.xml
@@ -0,0 +1,103 @@
+<!-- Robot limits pulled from https://frankaemika.github.io/docs/control_parameters.html#constants -->
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+<mujocoinclude>
+	<body name="panda0_link0" childclass="panda" >
+        <geom class="panda_viz" mesh="link0_viz"/>
+        <geom class="panda_col" mesh="link0_col" mass="2.91242"/>
+        <body name="panda0_link1" pos="0 0 0.333">
+            <joint name="panda0_joint1" range="-2.8973 2.8973" class="panda_arm"/>
+            <geom class="panda_viz" mesh="link1_viz"/>
+            <geom class="panda_col" mesh="link1_col" mass="2.7063"/>
+            <body name="panda0_link2" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                <joint name="panda0_joint2" range="-1.7628 1.7628" class="panda_arm"/>
+                <geom class="panda_viz" mesh="link2_viz"/>
+                <geom class="panda_col" mesh="link2_col" mass="2.73046"/>
+                <body name="panda0_link3" pos="0 -0.316 0" quat="0.707107 0.707107 0 0">
+                    <joint name="panda0_joint3" range="-2.8973 2.8973" class="panda_arm"/>
+                    <geom class="panda_viz" mesh="link3_viz"/>
+                    <geom class="panda_col" mesh="link3_col" mass="2.04104"/>
+                    <body name="panda0_link4" pos="0.0825 0 0" quat="0.707107 0.707107 0 0">
+                        <joint name="panda0_joint4" range="-3.0718 -0.4" class="panda_arm"/>
+                        <geom class="panda_viz" mesh="link4_viz"/>
+                        <geom class="panda_col" mesh="link4_col" mass="2.08129"/>
+                        <body name="panda0_link5" pos="-0.0825 0.384 0" quat="0.707107 -0.707107 0 0">
+                            <joint name="panda0_joint5" range="-2.8973 2.8973" class="panda_forearm"/>
+                            <geom class="panda_viz" mesh="link5_viz"/>
+                            <geom class="panda_col" mesh="link5_col" mass="3.00049"/>
+                            <body name="panda0_link6" pos="0 0 0" euler='1.57 0 1.57'>
+                                <joint name="panda0_joint6" range="-1.6573 2.1127" class="panda_forearm"/>
+                                <!-- <body name="panda0_link6" pos="0 0 0" quat="0.707107 0.707107 0 0"> -->
+                                <!-- <joint name="panda0_joint6" range="-0.0873 3.8223" class="panda_forearm"/> -->
+                                <geom class="panda_viz" mesh="link6_viz"/>
+                                <geom class="panda_col" mesh="link6_col" mass="1.3235"/>
+                                <body name="panda0_link7" pos="0.088 0 0" euler='1.57 0 0.7854'>
+                                    <joint name="panda0_joint7" range="-2.8973 2.8973" class="panda_forearm"/>
+                                    <!-- <body name="panda0_link7" pos="0.088 0 0" quat="0.707107 0.707107 0 0"> -->
+                                    <!-- <joint name="panda0_joint7" range="-2.9671 2.9671" class="panda_forearm"/> -->
+                                    <geom class="panda_viz" mesh="link7_viz"/>
+                                    <geom class="panda_col" mesh="link7_col" mass="0.2"/>
+                                    <geom pos="0 0 0.107" quat="0.92388 0 0 -0.382683" class="panda_viz" mesh="hand_viz"/>
+                                    <geom pos="0 0 0.107" quat="0.92388 0 0 -0.382683" class="panda_col" mesh="hand_col" mass="0.81909"/>
+                                    <site name='end_effector' pos='0 0 .210' size='0.01' euler='0 0 -0.785398'/>
+                                    <body name="panda0_leftfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683" childclass='panda_finger'>
+                                        <inertial pos="-1.57863e-05 0.0118731 0.0434103" quat="0.705868 0.0310348 -0.0314925 0.706962" mass="0.0927059" diaginertia="6.57134e-05 6.09611e-05 1.09932e-05" />
+                                        <joint name="panda0_finger_joint1"  axis="0 1 0" type="slide" range="0 0.04" class="panda_finger"/>
+                                        <geom class="panda_viz" mesh="finger_viz"/>
+                                        <!-- <geom class="panda_col" mesh="finger_col"/> -->
+                                        <geom size="0.0070" fromto=".009 .006 .0875   -.009 .009 .0875" type="capsule" />
+                                        <geom size="0.0070" fromto=".009 .009 .0875   -.009 .006 .0875" type="capsule" />
+
+                                        <geom size="0.0075" fromto=".009 .007 .0775   -.009 .010 .0775" type="capsule" />
+                                        <geom size="0.0075" fromto=".009 .010 .0775   -.009 .007 .0775" type="capsule" />
+
+                                        <geom size="0.0082" fromto=".009 .008 .0675   -.009 .011 .0675" type="capsule" />
+                                        <geom size="0.0082" fromto=".009 .011 .0675   -.009 .008 .0675" type="capsule" />
+
+                                        <geom size="0.0090" fromto=".009 .009 .0575   -.009 .012 .0575" type="capsule" />
+                                        <geom size="0.0090" fromto=".009 .012 .0575   -.009 .009 .0575" type="capsule" />
+
+                                        <geom size="0.0100" fromto=".009 .0105 .0475   -.009 .0135 .0475" type="capsule" />
+                                        <geom size="0.0100" fromto=".009 .0135 .0475   -.009 .0105 .0475" type="capsule" />
+
+                                        <geom size="0.0110" fromto=".009 .012 .035   -.009 .015 .035" type="capsule" />
+                                        <geom size="0.0110" fromto=".009 .015 .035   -.009 .012 .035" type="capsule" />
+
+                                        <geom size="0.0185 0.0120 0.0175" pos="0 0.014 0.015" type="box" euler='.03 0 0' />
+
+                                    </body>
+                                    <body name="panda0_rightfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683"  childclass='panda_finger'>
+                                        <inertial pos="1.57863e-05 -0.0118731 0.0434103" quat="0.705868 -0.0310348 0.0314925 0.706962" mass="0.0927059" diaginertia="6.57134e-05 6.09611e-05 1.09932e-05" />
+                                        <joint name="panda0_finger_joint2" axis="0 -1 0" type="slide" range="0 0.04" class="panda_finger"/>
+                                        <geom quat="0 0 0 1" class="panda_viz" mesh="finger_viz"/>
+                                        <!-- <geom class="panda_col" mesh="finger_col"/> -->
+                                        <geom size="0.0070" fromto=".009 -.006 .0875   -.009 -.009 .0875" type="capsule" />
+                                        <geom size="0.0070" fromto=".009 -.009 .0875   -.009 -.006 .0875" type="capsule" />
+
+                                        <geom size="0.0075" fromto=".009 -.007 .0775   -.009 -.010 .0775" type="capsule" />
+                                        <geom size="0.0075" fromto=".009 -.010 .0775   -.009 -.007 .0775" type="capsule" />
+
+                                        <geom size="0.0082" fromto=".009 -.008 .0675   -.009 -.011 .0675" type="capsule" />
+                                        <geom size="0.0082" fromto=".009 -.011 .0675   -.009 -.008 .0675" type="capsule" />
+
+                                        <geom size="0.0090" fromto=".009 -.009 .0575   -.009 -.012 .0575" type="capsule" />
+                                        <geom size="0.0090" fromto=".009 -.012 .0575   -.009 -.009 .0575" type="capsule" />
+
+                                        <geom size="0.0100" fromto=".009 -.0105 .0475   -.009 -.0135 .0475" type="capsule" />
+                                        <geom size="0.0100" fromto=".009 -.0135 .0475   -.009 -.0105 .0475" type="capsule" />
+
+                                        <geom size="0.0110" fromto=".009 -.012 .035   -.009 -.015 .035" type="capsule" />
+                                        <geom size="0.0110" fromto=".009 -.015 .035   -.009 -.012 .035" type="capsule" />
+
+                                        <geom size="0.0185 0.0120 0.0175" pos="0 -.014 0.015" type="box" euler='-.03 0 0' />
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0_overlay.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0_overlay.xml
new file mode 100644
index 0000000..e64f497
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain0_overlay.xml
@@ -0,0 +1,62 @@
+<!-- Robot limits pulled from https://frankaemika.github.io/docs/control_parameters.html#constants -->
+<!-- Added this new file to the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<!--Copyright 2020 Google LLC-->
+
+<!--Licensed under the Apache License, Version 2.0 (the "License");-->
+<!--you may not use this file except in compliance with the License.-->
+<!--You may obtain a copy of the License at-->
+
+    <!--https://www.apache.org/licenses/LICENSE-2.0-->
+
+<!--Unless required by applicable law or agreed to in writing, software-->
+<!--distributed under the License is distributed on an "AS IS" BASIS,-->
+<!--WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.-->
+<!--See the License for the specific language governing permissions and-->
+<!--limitations under the License.-->
+
+<mujocoinclude>
+	<body name="_panda0_link0" childclass="panda_overlay" >
+        <geom mesh="link0_viz"/>
+        <body name="_panda0_link1" pos="0 0 0.333">
+            <joint name="_panda0_joint1" range="-2.8973 2.8973"/>
+            <geom mesh="link1_viz"/>
+            <body name="_panda0_link2" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                <joint name="_panda0_joint2" range="-1.7628 1.7628"/>
+                <geom mesh="link2_viz"/>
+                <body name="_panda0_link3" pos="0 -0.316 0" quat="0.707107 0.707107 0 0">
+                    <joint name="_panda0_joint3" range="-2.8973 2.8973"/>
+                    <geom mesh="link3_viz"/>
+                    <body name="_panda0_link4" pos="0.0825 0 0" quat="0.707107 0.707107 0 0">
+                        <joint name="_panda0_joint4" range="-3.0718 -0.4"/>
+                        <geom mesh="link4_viz"/>
+                        <body name="_panda0_link5" pos="-0.0825 0.384 0" quat="0.707107 -0.707107 0 0">
+                            <joint name="_panda0_joint5" range="-2.8973 2.8973"/>
+                            <geom mesh="link5_viz"/>
+                            <body name="_panda0_link6" pos="0 0 0" euler="1.57 0 1.57">
+                                <joint name="_panda0_joint6" range="-1.6573 2.1127"/>
+                                <geom mesh="link6_viz"/>
+                                <body name="_panda0_link7" pos="0.088 0 0" euler="1.57 0 0.7854">
+                                    <joint name="_panda0_joint7" range="-2.8973 2.8973"/>
+                                    <geom mesh="link7_viz"/>
+                                    <geom pos="0 0 0.107" quat="0.92388 0 0 -0.382683" mesh="hand_viz"/>
+                                    <site name="_end_effector" pos="0 0 .210" size="0.01" euler="0 0 -0.785398"/>
+                                    <body name="_panda0_leftfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683">
+                                        <joint name="_panda0_finger_joint1"  axis="0 1 0" type="slide" range="0 0.04"/>
+                                        <geom mesh="finger_viz"/>
+                                    </body>
+                                    <body name="_panda0_rightfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683">
+                                        <joint name="_panda0_finger_joint2" axis="0 -1 0" type="slide" range="0 0.04"/>
+                                        <geom quat="0 0 0 1" mesh="finger_viz"/>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </body>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain1.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain1.xml
new file mode 100644
index 0000000..29a9524
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/chain1.xml
@@ -0,0 +1,61 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+<mujocoinclude>
+	<body name="panda1_link0" childclass="panda" >
+        <geom class="panda_viz" mesh="link0_viz"/>
+        <geom class="panda_col" mesh="link0_col"/>
+        <body name="panda1_link1" pos="0 0 0.333">
+            <joint name="panda1_joint1" range="-2.9671 2.9671" class="panda_arm"/>
+            <geom class="panda_viz" mesh="link1_viz"/>
+            <geom class="panda_col" mesh="link1_col"/>
+            <body name="panda1_link2" pos="0 0 0" quat="0.707107 -0.707107 0 0">
+                <joint name="panda1_joint2" range="-1.8326 1.8326" class="panda_arm"/>
+                <geom class="panda_viz" mesh="link2_viz"/>
+                <geom class="panda_col" mesh="link2_col"/>
+                <body name="panda1_link3" pos="0 -0.316 0" quat="0.707107 0.707107 0 0">
+                    <joint name="panda1_joint3" range="-2.9671 2.9671" class="panda_arm"/>
+                    <geom class="panda_viz" mesh="link3_viz"/>
+                    <geom class="panda_col" mesh="link3_col"/>
+                    <body name="panda1_link4" pos="0.0825 0 0" quat="0.707107 0.707107 0 0">
+                        <joint name="panda1_joint4" range="-3.1416 0" class="panda_arm"/>
+                        <geom class="panda_viz" mesh="link4_viz"/>
+                        <geom class="panda_col" mesh="link4_col"/>
+                        <body name="panda1_link5" pos="-0.0825 0.384 0" quat="0.707107 -0.707107 0 0">
+                            <joint name="panda1_joint5" range="-2.9671 2.9671" class="panda_forearm"/>
+                            <geom class="panda_viz" mesh="link5_viz"/>
+                            <geom class="panda_col" mesh="link5_col"/>
+                            <body name="panda1_link6" pos="0 0 0" euler='1.57 0 1.57'>
+                                <joint name="panda1_joint6" range="-0.0873 3.8223" class="panda_forearm" ref='1.57'/>
+                                <!-- <body name="panda1_link6" pos="0 0 0" quat="0.707107 0.707107 0 0"> -->
+                                <!-- <joint name="panda1_joint6" range="-0.0873 3.8223" class="panda_forearm"/> -->
+                                <geom class="panda_viz" mesh="link6_viz"/>
+                                <geom class="panda_col" mesh="link6_col"/>
+                                <body name="panda1_link7" pos="0.088 0 0" euler='1.57 0 0.7854'>
+                                    <joint name="panda1_joint7" range="-2.9671 2.9671" class="panda_forearm" ref='0.7854'/>
+                                    <!-- <body name="panda1_link7" pos="0.088 0 0" quat="0.707107 0.707107 0 0"> -->
+                                    <!-- <joint name="panda1_joint7" range="-2.9671 2.9671" class="panda_forearm"/> -->
+                                    <geom class="panda_viz" mesh="link7_viz"/>
+                                    <geom class="panda_col" mesh="link7_col"/>
+                                    <geom pos="0 0 0.107" quat="0.92388 0 0 -0.382683" class="panda_viz" mesh="hand_viz"/>
+                                    <geom pos="0 0 0.107" quat="0.92388 0 0 -0.382683" class="panda_col" mesh="hand_col"/>
+                                    <site name='end_effector1' pos='0 0 .210' size='0.01' euler='0 0 -0.785398'/>
+                                    <body name="panda1_leftfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683">
+                                        <joint name="panda1_finger_joint1"  axis="0 1 0" type="slide" range="0 0.04" class="panda_finger"/>
+                                        <geom class="panda_viz" mesh="finger_viz"/>
+                                        <geom class="panda_col" mesh="finger_col"/>
+                                    </body>
+                                    <body name="panda1_rightfinger" pos="0 0 0.1654" quat="0.92388 0 0 -0.382683">
+                                        <joint name="panda1_finger_joint2" axis="0 -1 0" type="slide" range="0 0.04" class="panda_finger"/>
+                                        <geom quat="0 0 0 1" class="panda_viz" mesh="finger_viz"/>
+                                        <geom quat="0 0 0 1" class="panda_col" mesh="finger_col"/>
+                                    </body>
+                                </body>
+                            </body>
+                        </body>
+                    </body>
+                </body>
+            </body>
+        </body>
+    </body>
+</mujocoinclude>
\ No newline at end of file
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/teleop_actuator.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/teleop_actuator.xml
new file mode 100644
index 0000000..e5e46db
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/assets/teleop_actuator.xml
@@ -0,0 +1,24 @@
+<!-- Copied from actuator0.xml -->
+<!-- Added new file to the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<!--Copyright 2020 Google LLC-->
+
+<!--Licensed under the Apache License, Version 2.0 (the "License");-->
+<!--you may not use this file except in compliance with the License.-->
+<!--You may obtain a copy of the License at-->
+
+    <!--https://www.apache.org/licenses/LICENSE-2.0-->
+
+<!--Unless required by applicable law or agreed to in writing, software-->
+<!--distributed under the License is distributed on an "AS IS" BASIS,-->
+<!--WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.-->
+<!--See the License for the specific language governing permissions and-->
+<!--limitations under the License.-->
+<mujocoinclude>
+	<actuator>
+    <position name="r_gripper_finger_joint" joint="panda0_finger_joint1" class="panda_finger" kp="500" forcerange="-70 70" ctrlrange="0 0.08"/> <!-- velocity=".2" -->
+    <position name="l_gripper_finger_joint" joint="panda0_finger_joint2" class="panda_finger" kp="500" forcerange="-70 70" ctrlrange="0 0.08"/> <!-- velocity=".2" -->
+  </actuator>
+</mujocoinclude>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/bi-franka_panda.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/bi-franka_panda.xml
new file mode 100644
index 0000000..c307269
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/bi-franka_panda.xml
@@ -0,0 +1,81 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<!-- Copyright | Vikash Kumar | vikashplus@gmail.com | Google LLC ==============
+    Model       :: Bi-Franka Panda
+
+    Mujoco      :: Advanced physics simulation engine
+        Source      : www.roboti.us
+        Version     : 2.00
+        Released    : 1Oct"18
+
+    Author      :: Vikash Kumar
+        Contacts    : vikashplus@gmail.com
+        Last edits  : 14Dec"18
+
+    source:
+        1) https://github.com/vikashplus/franka
+        2) https://github.com/frankaemika/franka_ros
+        3) https://github.com/StanfordASL/PandaRobot.jl
+
+    Copyright 2018 Vikash Kumar
+        Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+================================================================================= -->
+
+<mujoco model="bi-franka_panda v200">
+
+    <include file="assets/basic_scene.xml"/>
+    <include file="assets/assets.xml"/>
+    <include file="../furniture/studyTable/studyTable_asset.xml"/>
+    <include file="../furniture/lightButtons/lightButtons_assets.xml"/>
+
+    <compiler meshdir=""/>
+
+    <default>
+        <default class='torso'>
+            <geom group='2' contype='0' conaffinity='0' rgba='.2 .2 .3 1'/>
+        </default>/
+    </default>
+
+    <worldbody>
+
+        <body name='torso' childclass='torso'>
+            <geom name='head' type='ellipsoid' size='.1 .09 .13' pos='0 0 1.835' euler='-.2 0 0'/>
+            <geom name='shoulders' type='capsule' size='.09' fromto='.15 0 1.6 -.15 0 1.6'/>
+            <geom name='absL' type='capsule' size='.09' fromto='.15 0 1.6 0.05 0 1.05'/>
+            <geom name='absR' type='capsule' size='.09' fromto='-.15 0 1.6 -.05 0 1.05'/>
+            <geom name='legs' type='capsule' size='.135' fromto='0 0 1.05 0 0 0.05'/>
+
+            <body name='leftarm' pos='0 0 1.6' euler='0 -1.57 1.57'>
+                <include file="assets/chain0.xml"/>
+            </body>
+
+            <body name='rightarm' pos='0 0 1.6' euler='0 1.57 1.57'>
+                <include file="assets/chain1.xml"/>
+            </body>
+
+        </body>
+
+
+        <!-- Study Table -->
+        <body pos='0 0.85 0'>
+            <include file="../furniture/studyTable/studyTable_body.xml"/>
+        </body>
+
+        <!-- Buttons -->
+        <body pos='-.25 0.625 .76'>
+            <include file="../furniture/lightButtons/buttons_body.xml"/>
+        </body>
+
+        <!-- Lights -->
+        <body pos='-.25 0.85 1.1'>
+            <include file="../furniture/lightButtons/lights_body.xml"/>
+        </body>
+
+    </worldbody>
+
+    <include file='assets/actuator0.xml'/>
+    <include file='assets/actuator1.xml'/>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.png b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.png
new file mode 100644
index 0000000..c34bec0
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.png differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.xml
new file mode 100644
index 0000000..07c5193
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda.xml
@@ -0,0 +1,38 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<!-- ORIGINAL LICENSE: ===========Copyright | Vikash Kumar | vikashplus@gmail.com | Google LLC    ==============
+    Model       :: Bi-Franka Panda
+
+    Mujoco      :: Advanced physics simulation engine
+        Source      : www.roboti.us
+        Version     : 2.00
+        Released    : 1Oct"18
+
+    Author      :: Vikash Kumar
+        Contacts    : vikashplus@gmail.com
+        Last edits  : 14Dec"18
+
+    source:
+        1) https://github.com/vikashplus/franka
+        2) https://github.com/frankaemika/franka_ros
+        3) https://github.com/StanfordASL/PandaRobot.jl
+
+    Copyright 2018 Vikash Kumar
+        Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+================================================================================= -->
+
+<mujoco model="franka_panda v200">
+
+    <include file="assets/basic_scene.xml"/>
+    <include file="assets/assets.xml"/>
+    <compiler meshdir=""/>
+
+    <worldbody>
+        <include file="assets/chain0.xml"/>
+    </worldbody>
+
+    <include file='assets/actuator0.xml'/>
+
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda_teleop.xml b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda_teleop.xml
new file mode 100644
index 0000000..cdbf8cd
--- /dev/null
+++ b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/franka_panda_teleop.xml
@@ -0,0 +1,54 @@
+<!-- Modified from the original source code at
+        1) https://github.com/vikashplus/franka
+    which was originally written by Vikash Kumar and licensed under the Apache License= -->
+
+<!-- ORIGINAL LICENSE: ===========Copyright | Vikash Kumar | vikashplus@gmail.com | Google LLC ==============
+    Model       :: Bi-Franka Panda
+
+    Mujoco      :: Advanced physics simulation engine
+        Source      : www.roboti.us
+        Version     : 2.00
+        Released    : 1Oct"18
+
+    Author      :: Vikash Kumar
+        Contacts    : vikashplus@gmail.com
+        Last edits  : 14Dec"18
+
+    source:
+        1) https://github.com/vikashplus/franka
+        2) https://github.com/frankaemika/franka_ros
+        3) https://github.com/StanfordASL/PandaRobot.jl
+
+    Copyright 2018 Vikash Kumar
+        Licensed under Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
+================================================================================= -->
+
+<mujoco model="franka_panda v200">
+
+    <include file="assets/basic_scene.xml"/>
+    <include file="assets/assets.xml"/>
+    <compiler meshdir=""/>
+
+    <equality>
+      <weld body1="vive_controller" body2="panda0_link7" solref="0.01 1" solimp=".25 .25 0.001"/>
+    </equality>
+
+
+    <worldbody>
+        <!-- Mocap -->
+        <body name="vive_controller" mocap="true" pos="0 0 1.895" euler="-1.57 0 -.785">
+            <geom type="box" group="2" pos='0 0 .142' size="0.02 0.10 0.03" contype="0" conaffinity="0" rgba=".9 .7 .95 .2" euler="0 0 -.785"/>
+        </body>
+
+        <!-- Robot -->
+        <body pos='0 0 .775' euler='0 0 1.57'>
+            <geom type='cylinder' size='.120 .4' pos='-.04 0 -.4'/>
+            <include file="assets/chain0.xml"/>
+            <include file="assets/chain0_overlay.xml"/>
+        </body>
+
+    </worldbody>
+
+
+    <include file='assets/teleop_actuator.xml'/>
+</mujoco>
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/finger.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/finger.stl
new file mode 100644
index 0000000..3b87289
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/finger.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/hand.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/hand.stl
new file mode 100644
index 0000000..4e82090
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/hand.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link0.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link0.stl
new file mode 100644
index 0000000..def070c
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link0.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link1.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link1.stl
new file mode 100644
index 0000000..426bcf2
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link1.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link2.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link2.stl
new file mode 100644
index 0000000..b369f15
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link2.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link3.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link3.stl
new file mode 100644
index 0000000..25162ee
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link3.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link4.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link4.stl
new file mode 100644
index 0000000..76c8c33
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link4.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link5.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link5.stl
new file mode 100644
index 0000000..3006a0b
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link5.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link6.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link6.stl
new file mode 100644
index 0000000..2e9594a
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link6.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link7.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link7.stl
new file mode 100644
index 0000000..0532d05
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/collision/link7.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/finger.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/finger.stl
new file mode 100644
index 0000000..2a5a256
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/finger.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl
new file mode 100644
index 0000000..9ecd7f2
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/hand.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link0.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link0.stl
new file mode 100644
index 0000000..bf71a18
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link0.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl
new file mode 100644
index 0000000..6289e56
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link1.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl
new file mode 100644
index 0000000..5580a80
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link2.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl
new file mode 100644
index 0000000..cdbe281
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link3.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl
new file mode 100644
index 0000000..df43017
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link4.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl
new file mode 100644
index 0000000..9cb5360
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link5.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl
new file mode 100644
index 0000000..d43652f
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link6.stl differ
diff --git a/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl
new file mode 100644
index 0000000..6d369ed
Binary files /dev/null and b/diffusion_policy/env/kitchen/relay_policy_learning/third_party/franka/meshes/visual/link7.stl differ
diff --git a/diffusion_policy/env/kitchen/v0.py b/diffusion_policy/env/kitchen/v0.py
new file mode 100644
index 0000000..d7a5b4e
--- /dev/null
+++ b/diffusion_policy/env/kitchen/v0.py
@@ -0,0 +1,20 @@
+from diffusion_policy.env.kitchen.base import KitchenBase
+
+
+class KitchenMicrowaveKettleBottomBurnerLightV0(KitchenBase):
+    TASK_ELEMENTS = ["microwave", "kettle", "bottom burner", "light switch"]
+    COMPLETE_IN_ANY_ORDER = False
+
+
+class KitchenMicrowaveKettleLightSliderV0(KitchenBase):
+    TASK_ELEMENTS = ["microwave", "kettle", "light switch", "slide cabinet"]
+    COMPLETE_IN_ANY_ORDER = False
+
+
+class KitchenKettleMicrowaveLightSliderV0(KitchenBase):
+    TASK_ELEMENTS = ["kettle", "microwave", "light switch", "slide cabinet"]
+    COMPLETE_IN_ANY_ORDER = False
+
+
+class KitchenAllV0(KitchenBase):
+    TASK_ELEMENTS = KitchenBase.ALL_TASKS
diff --git a/diffusion_policy/env/pusht/__init__.py b/diffusion_policy/env/pusht/__init__.py
new file mode 100644
index 0000000..945de59
--- /dev/null
+++ b/diffusion_policy/env/pusht/__init__.py
@@ -0,0 +1,9 @@
+from gym.envs.registration import register
+import diffusion_policy.env.pusht
+
+register(
+    id='pusht-keypoints-v0',
+    entry_point='envs.pusht.pusht_keypoints_env:PushTKeypointsEnv',
+    max_episode_steps=200,
+    reward_threshold=1.0
+)
\ No newline at end of file
diff --git a/diffusion_policy/env/pusht/pusht_env.py b/diffusion_policy/env/pusht/pusht_env.py
new file mode 100644
index 0000000..45be598
--- /dev/null
+++ b/diffusion_policy/env/pusht/pusht_env.py
@@ -0,0 +1,367 @@
+import gym
+from gym import spaces
+
+import collections
+import numpy as np
+import pygame
+import pymunk
+import pymunk.pygame_util
+from pymunk.vec2d import Vec2d
+import shapely.geometry as sg
+import cv2
+import skimage.transform as st
+from diffusion_policy.env.pusht.pymunk_override import DrawOptions
+
+
+def pymunk_to_shapely(body, shapes):
+    geoms = list()
+    for shape in shapes:
+        if isinstance(shape, pymunk.shapes.Poly):
+            verts = [body.local_to_world(v) for v in shape.get_vertices()]
+            verts += [verts[0]]
+            geoms.append(sg.Polygon(verts))
+        else:
+            raise RuntimeError(f'Unsupported shape type {type(shape)}')
+    geom = sg.MultiPolygon(geoms)
+    return geom
+
+class PushTEnv(gym.Env):
+    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 10}
+    reward_range = (0., 1.)
+
+    def __init__(self,
+            legacy=False, 
+            block_cog=None, damping=None,
+            render_action=True,
+            render_size=96,
+            reset_to_state=None
+        ):
+        self._seed = None
+        self.seed()
+        self.window_size = ws = 512  # The size of the PyGame window
+        self.render_size = render_size
+        self.sim_hz = 100
+        # Local controller params.
+        self.k_p, self.k_v = 100, 20    # PD control.z
+        self.control_hz = self.metadata['video.frames_per_second']
+        # legcay set_state for data compatiblity
+        self.legacy = legacy
+
+        # agent_pos, block_pos, block_angle
+        self.observation_space = spaces.Box(
+            low=np.array([0,0,0,0,0], dtype=np.float64),
+            high=np.array([ws,ws,ws,ws,np.pi*2], dtype=np.float64),
+            shape=(5,),
+            dtype=np.float64
+        )
+
+        # positional goal for agent
+        self.action_space = spaces.Box(
+            low=np.array([0,0], dtype=np.float64),
+            high=np.array([ws,ws], dtype=np.float64),
+            shape=(2,),
+            dtype=np.float64
+        )
+
+        self.block_cog = block_cog
+        self.damping = damping
+        self.render_action = render_action
+
+        """
+        If human-rendering is used, `self.window` will be a reference
+        to the window that we draw to. `self.clock` will be a clock that is used
+        to ensure that the environment is rendered at the correct framerate in
+        human-mode. They will remain `None` until human-mode is used for the
+        first time.
+        """
+        self.window = None
+        self.clock = None
+        self.screen = None
+
+        self.space = None
+        self.teleop = None
+        self.render_buffer = None
+        self.latest_action = None
+        self.reset_to_state = reset_to_state
+    
+    def reset(self):
+        seed = self._seed
+        self._setup()
+        if self.block_cog is not None:
+            self.block.center_of_gravity = self.block_cog
+        if self.damping is not None:
+            self.space.damping = self.damping
+        
+        # use legacy RandomState for compatiblity
+        state = self.reset_to_state
+        if state is None:
+            rs = np.random.RandomState(seed=seed)
+            state = np.array([
+                rs.randint(50, 450), rs.randint(50, 450),
+                rs.randint(100, 400), rs.randint(100, 400),
+                rs.randn() * 2 * np.pi - np.pi
+                ])
+        self._set_state(state)
+
+        observation = self._get_obs()
+        return observation
+
+    def step(self, action):
+        dt = 1.0 / self.sim_hz
+        self.n_contact_points = 0
+        n_steps = self.sim_hz // self.control_hz
+        if action is not None:
+            self.latest_action = action
+            for i in range(n_steps):
+                # Step PD control.
+                # self.agent.velocity = self.k_p * (act - self.agent.position)    # P control works too.
+                acceleration = self.k_p * (action - self.agent.position) + self.k_v * (Vec2d(0, 0) - self.agent.velocity)
+                self.agent.velocity += acceleration * dt
+
+                # Step physics.
+                self.space.step(dt)
+
+        # compute reward
+        goal_body = self._get_goal_pose_body(self.goal_pose)
+        goal_geom = pymunk_to_shapely(goal_body, self.block.shapes)
+        block_geom = pymunk_to_shapely(self.block, self.block.shapes)
+
+        intersection_area = goal_geom.intersection(block_geom).area
+        goal_area = goal_geom.area
+        coverage = intersection_area / goal_area
+        reward = np.clip(coverage / self.success_threshold, 0, 1)
+        done = coverage > self.success_threshold
+
+        observation = self._get_obs()
+        info = self._get_info()
+
+        return observation, reward, done, info
+
+    def render(self, mode):
+        return self._render_frame(mode)
+
+    def teleop_agent(self):
+        TeleopAgent = collections.namedtuple('TeleopAgent', ['act'])
+        def act(obs):
+            act = None
+            mouse_position = pymunk.pygame_util.from_pygame(Vec2d(*pygame.mouse.get_pos()), self.screen)
+            if self.teleop or (mouse_position - self.agent.position).length < 30:
+                self.teleop = True
+                act = mouse_position
+            return act
+        return TeleopAgent(act)
+
+    def _get_obs(self):
+        obs = np.array(
+            tuple(self.agent.position) \
+            + tuple(self.block.position) \
+            + (self.block.angle % (2 * np.pi),))
+        return obs
+
+    def _get_goal_pose_body(self, pose):
+        mass = 1
+        inertia = pymunk.moment_for_box(mass, (50, 100))
+        body = pymunk.Body(mass, inertia)
+        # preserving the legacy assignment order for compatibility
+        # the order here dosn't matter somehow, maybe because CoM is aligned with body origin
+        body.position = pose[:2].tolist()
+        body.angle = pose[2]
+        return body
+    
+    def _get_info(self):
+        n_steps = self.sim_hz // self.control_hz
+        n_contact_points_per_step = int(np.ceil(self.n_contact_points / n_steps))
+        info = {
+            'pos_agent': np.array(self.agent.position),
+            'vel_agent': np.array(self.agent.velocity),
+            'block_pose': np.array(list(self.block.position) + [self.block.angle]),
+            'goal_pose': self.goal_pose,
+            'n_contacts': n_contact_points_per_step}
+        return info
+
+    def _render_frame(self, mode):
+
+        if self.window is None and mode == "human":
+            pygame.init()
+            pygame.display.init()
+            self.window = pygame.display.set_mode((self.window_size, self.window_size))
+        if self.clock is None and mode == "human":
+            self.clock = pygame.time.Clock()
+
+        canvas = pygame.Surface((self.window_size, self.window_size))
+        canvas.fill((255, 255, 255))
+        self.screen = canvas
+
+        draw_options = DrawOptions(canvas)
+
+        # Draw goal pose.
+        goal_body = self._get_goal_pose_body(self.goal_pose)
+        for shape in self.block.shapes:
+            goal_points = [pymunk.pygame_util.to_pygame(goal_body.local_to_world(v), draw_options.surface) for v in shape.get_vertices()]
+            goal_points += [goal_points[0]]
+            pygame.draw.polygon(canvas, self.goal_color, goal_points)
+
+        # Draw agent and block.
+        self.space.debug_draw(draw_options)
+
+        if mode == "human":
+            # The following line copies our drawings from `canvas` to the visible window
+            self.window.blit(canvas, canvas.get_rect())
+            pygame.event.pump()
+            pygame.display.update()
+
+            # the clock is aleady ticked during in step for "human"
+
+
+        img = np.transpose(
+                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
+            )
+        img = cv2.resize(img, (self.render_size, self.render_size))
+        if self.render_action:
+            if self.render_action and (self.latest_action is not None):
+                action = np.array(self.latest_action)
+                coord = (action / 512 * 96).astype(np.int32)
+                marker_size = int(8/96*self.render_size)
+                thickness = int(1/96*self.render_size)
+                cv2.drawMarker(img, coord,
+                    color=(255,0,0), markerType=cv2.MARKER_CROSS,
+                    markerSize=marker_size, thickness=thickness)
+        return img
+
+
+    def close(self):
+        if self.window is not None:
+            pygame.display.quit()
+            pygame.quit()
+    
+    def seed(self, seed=None):
+        if seed is None:
+            seed = np.random.randint(0,25536)
+        self._seed = seed
+        self.np_random = np.random.default_rng(seed)
+
+    def _handle_collision(self, arbiter, space, data):
+        self.n_contact_points += len(arbiter.contact_point_set.points)
+
+    def _set_state(self, state):
+        if isinstance(state, np.ndarray):
+            state = state.tolist()
+        pos_agent = state[:2]
+        pos_block = state[2:4]
+        rot_block = state[4]
+        self.agent.position = pos_agent
+        # setting angle rotates with respect to center of mass
+        # therefore will modify the geometric position
+        # if not the same as CoM
+        # therefore should be modified first.
+        if self.legacy:
+            # for compatiblity with legacy data
+            self.block.position = pos_block
+            self.block.angle = rot_block
+        else:
+            self.block.angle = rot_block
+            self.block.position = pos_block
+
+        # Run physics to take effect
+        self.space.step(1.0 / self.sim_hz)
+    
+    def _set_state_local(self, state_local):
+        agent_pos_local = state_local[:2]
+        block_pose_local = state_local[2:]
+        tf_img_obj = st.AffineTransform(
+            translation=self.goal_pose[:2], 
+            rotation=self.goal_pose[2])
+        tf_obj_new = st.AffineTransform(
+            translation=block_pose_local[:2],
+            rotation=block_pose_local[2]
+        )
+        tf_img_new = st.AffineTransform(
+            matrix=tf_img_obj.params @ tf_obj_new.params
+        )
+        agent_pos_new = tf_img_new(agent_pos_local)
+        new_state = np.array(
+            list(agent_pos_new[0]) + list(tf_img_new.translation) \
+                + [tf_img_new.rotation])
+        self._set_state(new_state)
+        return new_state
+
+    def _setup(self):
+        self.space = pymunk.Space()
+        self.space.gravity = 0, 0
+        self.space.damping = 0
+        self.teleop = False
+        self.render_buffer = list()
+        
+        # Add walls.
+        walls = [
+            self._add_segment((5, 506), (5, 5), 2),
+            self._add_segment((5, 5), (506, 5), 2),
+            self._add_segment((506, 5), (506, 506), 2),
+            self._add_segment((5, 506), (506, 506), 2)
+        ]
+        self.space.add(*walls)
+
+        # Add agent, block, and goal zone.
+        self.agent = self.add_circle((256, 400), 15)
+        self.block = self.add_tee((256, 300), 0)
+        self.goal_color = pygame.Color('LightGreen')
+        self.goal_pose = np.array([256,256,np.pi/4])  # x, y, theta (in radians)
+
+        # Add collision handeling
+        self.collision_handeler = self.space.add_collision_handler(0, 0)
+        self.collision_handeler.post_solve = self._handle_collision
+        self.n_contact_points = 0
+
+        self.max_score = 50 * 100
+        self.success_threshold = 0.95    # 95% coverage.
+
+    def _add_segment(self, a, b, radius):
+        shape = pymunk.Segment(self.space.static_body, a, b, radius)
+        shape.color = pygame.Color('LightGray')    # https://htmlcolorcodes.com/color-names
+        return shape
+
+    def add_circle(self, position, radius):
+        body = pymunk.Body(body_type=pymunk.Body.KINEMATIC)
+        body.position = position
+        body.friction = 1
+        shape = pymunk.Circle(body, radius)
+        shape.color = pygame.Color('RoyalBlue')
+        self.space.add(body, shape)
+        return body
+
+    def add_box(self, position, height, width):
+        mass = 1
+        inertia = pymunk.moment_for_box(mass, (height, width))
+        body = pymunk.Body(mass, inertia)
+        body.position = position
+        shape = pymunk.Poly.create_box(body, (height, width))
+        shape.color = pygame.Color('LightSlateGray')
+        self.space.add(body, shape)
+        return body
+
+    def add_tee(self, position, angle, scale=30, color='LightSlateGray', mask=pymunk.ShapeFilter.ALL_MASKS()):
+        mass = 1
+        length = 4
+        vertices1 = [(-length*scale/2, scale),
+                                 ( length*scale/2, scale),
+                                 ( length*scale/2, 0),
+                                 (-length*scale/2, 0)]
+        inertia1 = pymunk.moment_for_poly(mass, vertices=vertices1)
+        vertices2 = [(-scale/2, scale),
+                                 (-scale/2, length*scale),
+                                 ( scale/2, length*scale),
+                                 ( scale/2, scale)]
+        inertia2 = pymunk.moment_for_poly(mass, vertices=vertices1)
+        body = pymunk.Body(mass, inertia1 + inertia2)
+        shape1 = pymunk.Poly(body, vertices1)
+        shape2 = pymunk.Poly(body, vertices2)
+        shape1.color = pygame.Color(color)
+        shape2.color = pygame.Color(color)
+        shape1.filter = pymunk.ShapeFilter(mask=mask)
+        shape2.filter = pymunk.ShapeFilter(mask=mask)
+        body.center_of_gravity = (shape1.center_of_gravity + shape2.center_of_gravity) / 2
+        body.position = position
+        body.angle = angle
+        body.friction = 1
+        self.space.add(body, shape1, shape2)
+        return body
diff --git a/diffusion_policy/env/pusht/pusht_image_env.py b/diffusion_policy/env/pusht/pusht_image_env.py
new file mode 100644
index 0000000..db6bcfb
--- /dev/null
+++ b/diffusion_policy/env/pusht/pusht_image_env.py
@@ -0,0 +1,66 @@
+from gym import spaces
+from diffusion_policy.env.pusht.pusht_env import PushTEnv
+import numpy as np
+import cv2
+
+class PushTImageEnv(PushTEnv):
+    metadata = {"render.modes": ["rgb_array"], "video.frames_per_second": 10}
+
+    def __init__(self,
+            legacy=False,
+            block_cog=None, 
+            damping=None,
+            render_size=96):
+        super().__init__(
+            legacy=legacy, 
+            block_cog=block_cog,
+            damping=damping,
+            render_size=render_size,
+            render_action=False)
+        ws = self.window_size
+        self.observation_space = spaces.Dict({
+            'image': spaces.Box(
+                low=0,
+                high=1,
+                shape=(3,render_size,render_size),
+                dtype=np.float32
+            ),
+            'agent_pos': spaces.Box(
+                low=0,
+                high=ws,
+                shape=(2,),
+                dtype=np.float32
+            )
+        })
+        self.render_cache = None
+    
+    def _get_obs(self):
+        img = super()._render_frame(mode='rgb_array')
+
+        agent_pos = np.array(self.agent.position)
+        img_obs = np.moveaxis(img.astype(np.float32) / 255, -1, 0)
+        obs = {
+            'image': img_obs,
+            'agent_pos': agent_pos
+        }
+
+        # draw action
+        if self.latest_action is not None:
+            action = np.array(self.latest_action)
+            coord = (action / 512 * 96).astype(np.int32)
+            marker_size = int(8/96*self.render_size)
+            thickness = int(1/96*self.render_size)
+            cv2.drawMarker(img, coord,
+                color=(255,0,0), markerType=cv2.MARKER_CROSS,
+                markerSize=marker_size, thickness=thickness)
+        self.render_cache = img
+
+        return obs
+
+    def render(self, mode):
+        assert mode == 'rgb_array'
+
+        if self.render_cache is None:
+            self._get_obs()
+        
+        return self.render_cache
diff --git a/diffusion_policy/env/pusht/pusht_keypoints_env.py b/diffusion_policy/env/pusht/pusht_keypoints_env.py
new file mode 100644
index 0000000..0efea6d
--- /dev/null
+++ b/diffusion_policy/env/pusht/pusht_keypoints_env.py
@@ -0,0 +1,131 @@
+from typing import Dict, Sequence, Union, Optional
+from gym import spaces
+from diffusion_policy.env.pusht.pusht_env import PushTEnv
+from diffusion_policy.env.pusht.pymunk_keypoint_manager import PymunkKeypointManager
+import numpy as np
+
+class PushTKeypointsEnv(PushTEnv):
+    def __init__(self,
+            legacy=False,
+            block_cog=None, 
+            damping=None,
+            render_size=96,
+            keypoint_visible_rate=1.0, 
+            agent_keypoints=False,
+            draw_keypoints=False,
+            reset_to_state=None,
+            render_action=True,
+            local_keypoint_map: Dict[str, np.ndarray]=None, 
+            color_map: Optional[Dict[str, np.ndarray]]=None):
+        super().__init__(
+            legacy=legacy, 
+            block_cog=block_cog,
+            damping=damping,
+            render_size=render_size,
+            reset_to_state=reset_to_state,
+            render_action=render_action)
+        ws = self.window_size
+
+        if local_keypoint_map is None:
+            # create default keypoint definiton
+            kp_kwargs = self.genenerate_keypoint_manager_params()
+            local_keypoint_map = kp_kwargs['local_keypoint_map']
+            color_map = kp_kwargs['color_map']
+
+        # create observation spaces
+        Dblockkps = np.prod(local_keypoint_map['block'].shape)
+        Dagentkps = np.prod(local_keypoint_map['agent'].shape)
+        Dagentpos = 2
+
+        Do = Dblockkps
+        if agent_keypoints:
+            # blockkp + agnet_pos
+            Do += Dagentkps
+        else:
+            # blockkp + agnet_kp
+            Do += Dagentpos
+        # obs + obs_mask
+        Dobs = Do * 2
+
+        low = np.zeros((Dobs,), dtype=np.float64)
+        high = np.full_like(low, ws)
+        # mask range 0-1
+        high[Do:] = 1.
+
+        # (block_kps+agent_kps, xy+confidence)
+        self.observation_space = spaces.Box(
+            low=low,
+            high=high,
+            shape=low.shape,
+            dtype=np.float64
+        )
+
+        self.keypoint_visible_rate = keypoint_visible_rate
+        self.agent_keypoints = agent_keypoints
+        self.draw_keypoints = draw_keypoints
+        self.kp_manager = PymunkKeypointManager(
+            local_keypoint_map=local_keypoint_map,
+            color_map=color_map)
+        self.draw_kp_map = None
+
+    @classmethod
+    def genenerate_keypoint_manager_params(cls):
+        env = PushTEnv()
+        kp_manager = PymunkKeypointManager.create_from_pusht_env(env)
+        kp_kwargs = kp_manager.kwargs
+        return kp_kwargs
+
+    def _get_obs(self):
+        # get keypoints
+        obj_map = {
+            'block': self.block
+        }
+        if self.agent_keypoints:
+            obj_map['agent'] = self.agent
+
+        kp_map = self.kp_manager.get_keypoints_global(
+            pose_map=obj_map, is_obj=True)
+        # python dict guerentee order of keys and values
+        kps = np.concatenate(list(kp_map.values()), axis=0)
+
+        # select keypoints to drop
+        n_kps = kps.shape[0]
+        visible_kps = self.np_random.random(size=(n_kps,)) < self.keypoint_visible_rate
+        kps_mask = np.repeat(visible_kps[:,None], 2, axis=1)
+
+        # save keypoints for rendering
+        vis_kps = kps.copy()
+        vis_kps[~visible_kps] = 0
+        draw_kp_map = {
+            'block': vis_kps[:len(kp_map['block'])]
+        }
+        if self.agent_keypoints:
+            draw_kp_map['agent'] = vis_kps[len(kp_map['block']):]
+        self.draw_kp_map = draw_kp_map
+        
+        # construct obs
+        obs = kps.flatten()
+        obs_mask = kps_mask.flatten()
+        if not self.agent_keypoints:
+            # passing agent position when keypoints are not avaiable
+            agent_pos = np.array(self.agent.position)
+            obs = np.concatenate([
+                obs, agent_pos
+            ])
+            obs_mask = np.concatenate([
+                obs_mask, np.ones((2,), dtype=bool)
+            ])
+
+        # obs, obs_mask
+        obs = np.concatenate([
+            obs, obs_mask.astype(obs.dtype)
+        ], axis=0)
+        return obs
+    
+    
+    def _render_frame(self, mode):
+        img = super()._render_frame(mode)
+        if self.draw_keypoints:
+            self.kp_manager.draw_keypoints(
+                img, self.draw_kp_map, radius=int(img.shape[0]/96))
+        return img
diff --git a/diffusion_policy/env/pusht/pymunk_keypoint_manager.py b/diffusion_policy/env/pusht/pymunk_keypoint_manager.py
new file mode 100644
index 0000000..2fd5530
--- /dev/null
+++ b/diffusion_policy/env/pusht/pymunk_keypoint_manager.py
@@ -0,0 +1,146 @@
+from typing import Dict, Sequence, Union, Optional
+import numpy as np
+import skimage.transform as st
+import pymunk
+import pygame
+from matplotlib import cm
+import cv2
+from diffusion_policy.env.pusht.pymunk_override import DrawOptions
+
+
+def farthest_point_sampling(points: np.ndarray, n_points: int, init_idx: int):
+    """
+    Naive O(N^2)
+    """
+    assert(n_points >= 1)
+    chosen_points = [points[init_idx]]
+    for _ in range(n_points-1):
+        cpoints = np.array(chosen_points)
+        all_dists = np.linalg.norm(points[:,None,:] - cpoints[None,:,:], axis=-1)
+        min_dists = all_dists.min(axis=1)
+        next_idx = np.argmax(min_dists)
+        next_pt = points[next_idx]
+        chosen_points.append(next_pt)
+    result = np.array(chosen_points)
+    return result
+
+
+class PymunkKeypointManager:
+    def __init__(self, 
+            local_keypoint_map: Dict[str, np.ndarray], 
+            color_map: Optional[Dict[str, np.ndarray]]=None):
+        """
+        local_keypoint_map:
+            "<attribute_name>": (N,2) floats in object local coordinate
+        """
+        if color_map is None:
+            cmap = cm.get_cmap('tab10')
+            color_map = dict()
+            for i, key in enumerate(local_keypoint_map.keys()):
+                color_map[key] = (np.array(cmap.colors[i]) * 255).astype(np.uint8)
+
+        self.local_keypoint_map = local_keypoint_map
+        self.color_map = color_map
+    
+    @property
+    def kwargs(self):
+        return {
+            'local_keypoint_map': self.local_keypoint_map,
+            'color_map': self.color_map
+        }
+
+    @classmethod
+    def create_from_pusht_env(cls, env, n_block_kps=9, n_agent_kps=3, seed=0, **kwargs):
+        rng = np.random.default_rng(seed=seed)
+        local_keypoint_map = dict()
+        for name in ['block','agent']:
+            self = env
+            self.space = pymunk.Space()
+            if name == 'agent':
+                self.agent = obj = self.add_circle((256, 400), 15)
+                n_kps = n_agent_kps
+            else:
+                self.block = obj = self.add_tee((256, 300), 0)
+                n_kps = n_block_kps
+            
+            self.screen = pygame.Surface((512,512))
+            self.screen.fill(pygame.Color("white"))
+            draw_options = DrawOptions(self.screen)
+            self.space.debug_draw(draw_options)
+            # pygame.display.flip()
+            img = np.uint8(pygame.surfarray.array3d(self.screen).transpose(1, 0, 2))
+            obj_mask = (img != np.array([255,255,255],dtype=np.uint8)).any(axis=-1)
+
+            tf_img_obj = cls.get_tf_img_obj(obj)
+            xy_img = np.moveaxis(np.array(np.indices((512,512))), 0, -1)[:,:,::-1]
+            local_coord_img = tf_img_obj.inverse(xy_img.reshape(-1,2)).reshape(xy_img.shape)
+            obj_local_coords = local_coord_img[obj_mask]
+
+            # furthest point sampling
+            init_idx = rng.choice(len(obj_local_coords))
+            obj_local_kps = farthest_point_sampling(obj_local_coords, n_kps, init_idx)
+            small_shift = rng.uniform(0, 1, size=obj_local_kps.shape)
+            obj_local_kps += small_shift
+
+            local_keypoint_map[name] = obj_local_kps
+        
+        return cls(local_keypoint_map=local_keypoint_map, **kwargs)
+
+    @staticmethod
+    def get_tf_img(pose: Sequence):
+        pos = pose[:2]
+        rot = pose[2]
+        tf_img_obj = st.AffineTransform(
+            translation=pos, rotation=rot)
+        return tf_img_obj
+
+    @classmethod
+    def get_tf_img_obj(cls, obj: pymunk.Body):
+        pose = tuple(obj.position) + (obj.angle,)
+        return cls.get_tf_img(pose)
+
+    def get_keypoints_global(self, 
+            pose_map: Dict[set, Union[Sequence, pymunk.Body]], 
+            is_obj=False):
+        kp_map = dict()
+        for key, value in pose_map.items():
+            if is_obj:
+                tf_img_obj = self.get_tf_img_obj(value)
+            else:
+                tf_img_obj = self.get_tf_img(value)
+            kp_local = self.local_keypoint_map[key]
+            kp_global = tf_img_obj(kp_local)
+            kp_map[key] = kp_global
+        return kp_map
+    
+    def draw_keypoints(self, img, kps_map, radius=1):
+        scale = np.array(img.shape[:2]) / np.array([512,512])
+        for key, value in kps_map.items():
+            color = self.color_map[key].tolist()
+            coords = (value * scale).astype(np.int32)
+            for coord in coords:
+                cv2.circle(img, coord, radius=radius, color=color, thickness=-1)
+        return img
+    
+    def draw_keypoints_pose(self, img, pose_map, is_obj=False, **kwargs):
+        kp_map = self.get_keypoints_global(pose_map, is_obj=is_obj)
+        return self.draw_keypoints(img, kps_map=kp_map, **kwargs)
+
+
+def test():
+    from diffusion_policy.environment.push_t_env import PushTEnv
+    from matplotlib import pyplot as plt
+    
+    env = PushTEnv(headless=True, obs_state=False, draw_action=False)
+    kp_manager = PymunkKeypointManager.create_from_pusht_env(env=env)
+    env.reset()
+    obj_map = {
+        'block': env.block,
+        'agent': env.agent
+    }
+
+    obs = env.render()
+    img = obs.astype(np.uint8)
+    kp_manager.draw_keypoints_pose(img=img, pose_map=obj_map, is_obj=True)
+
+    plt.imshow(img)
diff --git a/diffusion_policy/env/pusht/pymunk_override.py b/diffusion_policy/env/pusht/pymunk_override.py
new file mode 100644
index 0000000..78ecaa2
--- /dev/null
+++ b/diffusion_policy/env/pusht/pymunk_override.py
@@ -0,0 +1,248 @@
+# ----------------------------------------------------------------------------
+# pymunk
+# Copyright (c) 2007-2016 Victor Blomqvist
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# ----------------------------------------------------------------------------
+
+"""This submodule contains helper functions to help with quick prototyping 
+using pymunk together with pygame.
+
+Intended to help with debugging and prototyping, not for actual production use
+in a full application. The methods contained in this module is opinionated 
+about your coordinate system and not in any way optimized. 
+"""
+
+__docformat__ = "reStructuredText"
+
+__all__ = [
+    "DrawOptions",
+    "get_mouse_pos",
+    "to_pygame",
+    "from_pygame",
+    "lighten",
+    "positive_y_is_up",
+]
+
+from typing import List, Sequence, Tuple
+
+import pygame
+
+import numpy as np
+
+import pymunk
+from pymunk.space_debug_draw_options import SpaceDebugColor
+from pymunk.vec2d import Vec2d
+
+positive_y_is_up: bool = False
+"""Make increasing values of y point upwards.
+
+When True::
+
+    y
+    ^
+    |      . (3, 3)
+    |
+    |   . (2, 2)
+    |
+    +------ > x
+    
+When False::
+
+    +------ > x
+    |
+    |   . (2, 2)
+    |
+    |      . (3, 3)
+    v
+    y
+    
+"""
+
+
+class DrawOptions(pymunk.SpaceDebugDrawOptions):
+    def __init__(self, surface: pygame.Surface) -> None:
+        """Draw a pymunk.Space on a pygame.Surface object.
+
+        Typical usage::
+
+        >>> import pymunk
+        >>> surface = pygame.Surface((10,10))
+        >>> space = pymunk.Space()
+        >>> options = pymunk.pygame_util.DrawOptions(surface)
+        >>> space.debug_draw(options)
+
+        You can control the color of a shape by setting shape.color to the color
+        you want it drawn in::
+
+        >>> c = pymunk.Circle(None, 10)
+        >>> c.color = pygame.Color("pink")
+
+        See pygame_util.demo.py for a full example
+
+        Since pygame uses a coordiante system where y points down (in contrast
+        to many other cases), you either have to make the physics simulation
+        with Pymunk also behave in that way, or flip everything when you draw.
+
+        The easiest is probably to just make the simulation behave the same
+        way as Pygame does. In that way all coordinates used are in the same
+        orientation and easy to reason about::
+
+        >>> space = pymunk.Space()
+        >>> space.gravity = (0, -1000)
+        >>> body = pymunk.Body()
+        >>> body.position = (0, 0) # will be positioned in the top left corner
+        >>> space.debug_draw(options)
+
+        To flip the drawing its possible to set the module property
+        :py:data:`positive_y_is_up` to True. Then the pygame drawing will flip
+        the simulation upside down before drawing::
+
+        >>> positive_y_is_up = True
+        >>> body = pymunk.Body()
+        >>> body.position = (0, 0)
+        >>> # Body will be position in bottom left corner
+
+        :Parameters:
+                surface : pygame.Surface
+                    Surface that the objects will be drawn on
+        """
+        self.surface = surface
+        super(DrawOptions, self).__init__()
+
+    def draw_circle(
+        self,
+        pos: Vec2d,
+        angle: float,
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        p = to_pygame(pos, self.surface)
+
+        pygame.draw.circle(self.surface, fill_color.as_int(), p, round(radius), 0)
+        pygame.draw.circle(self.surface, light_color(fill_color).as_int(), p, round(radius-4), 0)
+
+        circle_edge = pos + Vec2d(radius, 0).rotated(angle)
+        p2 = to_pygame(circle_edge, self.surface)
+        line_r = 2 if radius > 20 else 1
+        # pygame.draw.lines(self.surface, outline_color.as_int(), False, [p, p2], line_r)
+
+    def draw_segment(self, a: Vec2d, b: Vec2d, color: SpaceDebugColor) -> None:
+        p1 = to_pygame(a, self.surface)
+        p2 = to_pygame(b, self.surface)
+
+        pygame.draw.aalines(self.surface, color.as_int(), False, [p1, p2])
+
+    def draw_fat_segment(
+        self,
+        a: Tuple[float, float],
+        b: Tuple[float, float],
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        p1 = to_pygame(a, self.surface)
+        p2 = to_pygame(b, self.surface)
+
+        r = round(max(1, radius * 2))
+        pygame.draw.lines(self.surface, fill_color.as_int(), False, [p1, p2], r)
+        if r > 2:
+            orthog = [abs(p2[1] - p1[1]), abs(p2[0] - p1[0])]
+            if orthog[0] == 0 and orthog[1] == 0:
+                return
+            scale = radius / (orthog[0] * orthog[0] + orthog[1] * orthog[1]) ** 0.5
+            orthog[0] = round(orthog[0] * scale)
+            orthog[1] = round(orthog[1] * scale)
+            points = [
+                (p1[0] - orthog[0], p1[1] - orthog[1]),
+                (p1[0] + orthog[0], p1[1] + orthog[1]),
+                (p2[0] + orthog[0], p2[1] + orthog[1]),
+                (p2[0] - orthog[0], p2[1] - orthog[1]),
+            ]
+            pygame.draw.polygon(self.surface, fill_color.as_int(), points)
+            pygame.draw.circle(
+                self.surface,
+                fill_color.as_int(),
+                (round(p1[0]), round(p1[1])),
+                round(radius),
+            )
+            pygame.draw.circle(
+                self.surface,
+                fill_color.as_int(),
+                (round(p2[0]), round(p2[1])),
+                round(radius),
+            )
+
+    def draw_polygon(
+        self,
+        verts: Sequence[Tuple[float, float]],
+        radius: float,
+        outline_color: SpaceDebugColor,
+        fill_color: SpaceDebugColor,
+    ) -> None:
+        ps = [to_pygame(v, self.surface) for v in verts]
+        ps += [ps[0]]
+
+        radius = 2
+        pygame.draw.polygon(self.surface, light_color(fill_color).as_int(), ps)
+
+        if radius > 0:
+            for i in range(len(verts)):
+                a = verts[i]
+                b = verts[(i + 1) % len(verts)]
+                self.draw_fat_segment(a, b, radius, fill_color, fill_color)
+
+    def draw_dot(
+        self, size: float, pos: Tuple[float, float], color: SpaceDebugColor
+    ) -> None:
+        p = to_pygame(pos, self.surface)
+        pygame.draw.circle(self.surface, color.as_int(), p, round(size), 0)
+
+
+def get_mouse_pos(surface: pygame.Surface) -> Tuple[int, int]:
+    """Get position of the mouse pointer in pymunk coordinates."""
+    p = pygame.mouse.get_pos()
+    return from_pygame(p, surface)
+
+
+def to_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
+    """Convenience method to convert pymunk coordinates to pygame surface
+    local coordinates.
+
+    Note that in case positive_y_is_up is False, this function wont actually do
+    anything except converting the point to integers.
+    """
+    if positive_y_is_up:
+        return round(p[0]), surface.get_height() - round(p[1])
+    else:
+        return round(p[0]), round(p[1])
+
+
+def from_pygame(p: Tuple[float, float], surface: pygame.Surface) -> Tuple[int, int]:
+    """Convenience method to convert pygame surface local coordinates to
+    pymunk coordinates
+    """
+    return to_pygame(p, surface)
+
+
+def light_color(color: SpaceDebugColor):
+    color = np.minimum(1.2 * np.float32([color.r, color.g, color.b, color.a]), np.float32([255]))
+    color = SpaceDebugColor(r=color[0], g=color[1], b=color[2], a=color[3])
+    return color
diff --git a/diffusion_policy/env/robomimic/robomimic_image_wrapper.py b/diffusion_policy/env/robomimic/robomimic_image_wrapper.py
new file mode 100644
index 0000000..b8c2506
--- /dev/null
+++ b/diffusion_policy/env/robomimic/robomimic_image_wrapper.py
@@ -0,0 +1,164 @@
+from typing import List, Optional
+from matplotlib.pyplot import fill
+import numpy as np
+import gym
+from gym import spaces
+from omegaconf import OmegaConf
+from robomimic.envs.env_robosuite import EnvRobosuite
+
+class RobomimicImageWrapper(gym.Env):
+    def __init__(self, 
+        env: EnvRobosuite,
+        shape_meta: dict,
+        init_state: Optional[np.ndarray]=None,
+        render_obs_key='agentview_image',
+        ):
+
+        self.env = env
+        self.render_obs_key = render_obs_key
+        self.init_state = init_state
+        self.seed_state_map = dict()
+        self._seed = None
+        self.shape_meta = shape_meta
+        self.render_cache = None
+        self.has_reset_before = False
+        
+        # setup spaces
+        action_shape = shape_meta['action']['shape']
+        action_space = spaces.Box(
+            low=-1,
+            high=1,
+            shape=action_shape,
+            dtype=np.float32
+        )
+        self.action_space = action_space
+
+        observation_space = spaces.Dict()
+        for key, value in shape_meta['obs'].items():
+            shape = value['shape']
+            min_value, max_value = -1, 1
+            if key.endswith('image'):
+                min_value, max_value = 0, 1
+            elif key.endswith('quat'):
+                min_value, max_value = -1, 1
+            elif key.endswith('qpos'):
+                min_value, max_value = -1, 1
+            elif key.endswith('pos'):
+                # better range?
+                min_value, max_value = -1, 1
+            else:
+                raise RuntimeError(f"Unsupported type {key}")
+            
+            this_space = spaces.Box(
+                low=min_value,
+                high=max_value,
+                shape=shape,
+                dtype=np.float32
+            )
+            observation_space[key] = this_space
+        self.observation_space = observation_space
+
+
+    def get_observation(self, raw_obs=None):
+        if raw_obs is None:
+            raw_obs = self.env.get_observation()
+        
+        self.render_cache = raw_obs[self.render_obs_key]
+
+        obs = dict()
+        for key in self.observation_space.keys():
+            obs[key] = raw_obs[key]
+        return obs
+
+    def seed(self, seed=None):
+        np.random.seed(seed=seed)
+        self._seed = seed
+    
+    def reset(self):
+        if self.init_state is not None:
+            if not self.has_reset_before:
+                # the env must be fully reset at least once to ensure correct rendering
+                self.env.reset()
+                self.has_reset_before = True
+
+            # always reset to the same state
+            # to be compatible with gym
+            raw_obs = self.env.reset_to({'states': self.init_state})
+        elif self._seed is not None:
+            # reset to a specific seed
+            seed = self._seed
+            if seed in self.seed_state_map:
+                # env.reset is expensive, use cache
+                raw_obs = self.env.reset_to({'states': self.seed_state_map[seed]})
+            else:
+                # robosuite's initializes all use numpy global random state
+                np.random.seed(seed=seed)
+                raw_obs = self.env.reset()
+                state = self.env.get_state()['states']
+                self.seed_state_map[seed] = state
+            self._seed = None
+        else:
+            # random reset
+            raw_obs = self.env.reset()
+
+        # return obs
+        obs = self.get_observation(raw_obs)
+        return obs
+    
+    def step(self, action):
+        raw_obs, reward, done, info = self.env.step(action)
+        obs = self.get_observation(raw_obs)
+        return obs, reward, done, info
+    
+    def render(self, mode='rgb_array'):
+        if self.render_cache is None:
+            raise RuntimeError('Must run reset or step before render.')
+        img = np.moveaxis(self.render_cache, 0, -1)
+        img = (img * 255).astype(np.uint8)
+        return img
+
+
+def test():
+    import os
+    from omegaconf import OmegaConf
+    cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_image.yaml')
+    cfg = OmegaConf.load(cfg_path)
+    shape_meta = cfg['shape_meta']
+
+
+    import robomimic.utils.file_utils as FileUtils
+    import robomimic.utils.env_utils as EnvUtils
+    from matplotlib import pyplot as plt
+
+    dataset_path = os.path.expanduser('~/dev/diffusion_policy/data/robomimic/datasets/square/ph/image.hdf5')
+    env_meta = FileUtils.get_env_metadata_from_dataset(
+        dataset_path)
+
+    env = EnvUtils.create_env_from_metadata(
+        env_meta=env_meta,
+        render=False, 
+        render_offscreen=False,
+        use_image_obs=True, 
+    )
+
+    wrapper = RobomimicImageWrapper(
+        env=env,
+        shape_meta=shape_meta
+    )
+    wrapper.seed(0)
+    obs = wrapper.reset()
+    img = wrapper.render()
+    plt.imshow(img)
+
+
+    # states = list()
+    # for _ in range(2):
+    #     wrapper.seed(0)
+    #     wrapper.reset()
+    #     states.append(wrapper.env.get_state()['states'])
+    # assert np.allclose(states[0], states[1])
+
+    # img = wrapper.render()
+    # plt.imshow(img)
+    # wrapper.seed()
+    # states.append(wrapper.env.get_state()['states'])
diff --git a/diffusion_policy/env/robomimic/robomimic_lowdim_wrapper.py b/diffusion_policy/env/robomimic/robomimic_lowdim_wrapper.py
new file mode 100644
index 0000000..383ae32
--- /dev/null
+++ b/diffusion_policy/env/robomimic/robomimic_lowdim_wrapper.py
@@ -0,0 +1,133 @@
+from typing import List, Dict, Optional
+import numpy as np
+import gym
+from gym.spaces import Box
+from robomimic.envs.env_robosuite import EnvRobosuite
+
+class RobomimicLowdimWrapper(gym.Env):
+    def __init__(self, 
+        env: EnvRobosuite,
+        obs_keys: List[str]=[
+            'object', 
+            'robot0_eef_pos', 
+            'robot0_eef_quat', 
+            'robot0_gripper_qpos'],
+        init_state: Optional[np.ndarray]=None,
+        render_hw=(256,256),
+        render_camera_name='agentview'
+        ):
+
+        self.env = env
+        self.obs_keys = obs_keys
+        self.init_state = init_state
+        self.render_hw = render_hw
+        self.render_camera_name = render_camera_name
+        self.seed_state_map = dict()
+        self._seed = None
+        
+        # setup spaces
+        low = np.full(env.action_dimension, fill_value=-1)
+        high = np.full(env.action_dimension, fill_value=1)
+        self.action_space = Box(
+            low=low,
+            high=high,
+            shape=low.shape,
+            dtype=low.dtype
+        )
+        obs_example = self.get_observation()
+        low = np.full_like(obs_example, fill_value=-1)
+        high = np.full_like(obs_example, fill_value=1)
+        self.observation_space = Box(
+            low=low,
+            high=high,
+            shape=low.shape,
+            dtype=low.dtype
+        )
+
+    def get_observation(self):
+        raw_obs = self.env.get_observation()
+        obs = np.concatenate([
+            raw_obs[key] for key in self.obs_keys
+        ], axis=0)
+        return obs
+
+    def seed(self, seed=None):
+        np.random.seed(seed=seed)
+        self._seed = seed
+    
+    def reset(self):
+        if self.init_state is not None:
+            # always reset to the same state
+            # to be compatible with gym
+            self.env.reset_to({'states': self.init_state})
+        elif self._seed is not None:
+            # reset to a specific seed
+            seed = self._seed
+            if seed in self.seed_state_map:
+                # env.reset is expensive, use cache
+                self.env.reset_to({'states': self.seed_state_map[seed]})
+            else:
+                # robosuite's initializes all use numpy global random state
+                np.random.seed(seed=seed)
+                self.env.reset()
+                state = self.env.get_state()['states']
+                self.seed_state_map[seed] = state
+            self._seed = None
+        else:
+            # random reset
+            self.env.reset()
+
+        # return obs
+        obs = self.get_observation()
+        return obs
+    
+    def step(self, action):
+        raw_obs, reward, done, info = self.env.step(action)
+        obs = np.concatenate([
+            raw_obs[key] for key in self.obs_keys
+        ], axis=0)
+        return obs, reward, done, info
+    
+    def render(self, mode='rgb_array'):
+        h, w = self.render_hw
+        return self.env.render(mode=mode, 
+            height=h, width=w, 
+            camera_name=self.render_camera_name)
+
+
+def test():
+    import robomimic.utils.file_utils as FileUtils
+    import robomimic.utils.env_utils as EnvUtils
+    from matplotlib import pyplot as plt
+
+    dataset_path = '/home/cchi/dev/diffusion_policy/data/robomimic/datasets/square/ph/low_dim.hdf5'
+    env_meta = FileUtils.get_env_metadata_from_dataset(
+        dataset_path)
+
+    env = EnvUtils.create_env_from_metadata(
+        env_meta=env_meta,
+        render=False, 
+        render_offscreen=False,
+        use_image_obs=False, 
+    )
+    wrapper = RobomimicLowdimWrapper(
+        env=env,
+        obs_keys=[
+            'object', 
+            'robot0_eef_pos', 
+            'robot0_eef_quat', 
+            'robot0_gripper_qpos'
+        ]
+    )
+
+    states = list()
+    for _ in range(2):
+        wrapper.seed(0)
+        wrapper.reset()
+        states.append(wrapper.env.get_state()['states'])
+    assert np.allclose(states[0], states[1])
+
+    img = wrapper.render()
+    plt.imshow(img)
+    # wrapper.seed()
+    # states.append(wrapper.env.get_state()['states'])
diff --git a/diffusion_policy/env_runner/base_image_runner.py b/diffusion_policy/env_runner/base_image_runner.py
new file mode 100644
index 0000000..0652007
--- /dev/null
+++ b/diffusion_policy/env_runner/base_image_runner.py
@@ -0,0 +1,9 @@
+from typing import Dict
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+
+class BaseImageRunner:
+    def __init__(self, output_dir):
+        self.output_dir = output_dir
+
+    def run(self, policy: BaseImagePolicy) -> Dict:
+        raise NotImplementedError()
diff --git a/diffusion_policy/env_runner/base_lowdim_runner.py b/diffusion_policy/env_runner/base_lowdim_runner.py
new file mode 100644
index 0000000..45437ec
--- /dev/null
+++ b/diffusion_policy/env_runner/base_lowdim_runner.py
@@ -0,0 +1,9 @@
+from typing import Dict
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+
+class BaseLowdimRunner:
+    def __init__(self, output_dir):
+        self.output_dir = output_dir
+
+    def run(self, policy: BaseLowdimPolicy) -> Dict:
+        raise NotImplementedError()
diff --git a/diffusion_policy/env_runner/blockpush_lowdim_runner.py b/diffusion_policy/env_runner/blockpush_lowdim_runner.py
new file mode 100644
index 0000000..b3f31bf
--- /dev/null
+++ b/diffusion_policy/env_runner/blockpush_lowdim_runner.py
@@ -0,0 +1,285 @@
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import dill
+import math
+import wandb.sdk.data_types.video as wv
+from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+from gym.wrappers import FlattenObservation
+
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+
+class BlockPushLowdimRunner(BaseLowdimRunner):
+    def __init__(self,
+            output_dir,
+            n_train=10,
+            n_train_vis=3,
+            train_start_seed=0,
+            n_test=22,
+            n_test_vis=6,
+            test_start_seed=10000,
+            max_steps=200,
+            n_obs_steps=8,
+            n_action_steps=8,
+            fps=5,
+            crf=22,
+            past_action=False,
+            abs_action=False,
+            obs_eef_target=True,
+            tqdm_interval_sec=5.0,
+            n_envs=None
+        ):
+        super().__init__(output_dir)
+
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        task_fps = 10
+        steps_per_render = max(10 // fps, 1)
+
+        def env_fn():
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    FlattenObservation(
+                        BlockPushMultimodal(
+                            control_frequency=task_fps,
+                            shared_memory=False,
+                            seed=seed,
+                            abs_action=abs_action
+                        )
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                    steps_per_render=steps_per_render
+                ),
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+        # train
+        for i in range(n_train):
+            seed = train_start_seed + i
+            enable_render = i < n_train_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('train/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        env = AsyncVectorEnv(env_fns)
+        # env = SyncVectorEnv(env_fns)
+
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.tqdm_interval_sec = tqdm_interval_sec
+        self.obs_eef_target = obs_eef_target
+
+
+    def run(self, policy: BaseLowdimPolicy):
+        device = policy.device
+        dtype = policy.dtype
+        env = self.env
+
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+        last_info = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval BlockPushLowdimRunner {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+            done = False
+            while not done:
+                # create obs dict
+                if not self.obs_eef_target:
+                    obs[...,8:10] = 0
+                np_obs_dict = {
+                    'obs': obs.astype(np.float32)
+                }
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                action = np_action_dict['action']
+
+                # step env
+                obs, reward, done, info = env.step(action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            # collect data for this round
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+            last_info[this_global_slice] = [dict((k,v[-1]) for k, v in x.items()) for x in info][this_local_slice]
+
+        # log
+        total_rewards = collections.defaultdict(list)
+        total_p1 = collections.defaultdict(list)
+        total_p2 = collections.defaultdict(list)
+        prefix_event_counts = collections.defaultdict(lambda :collections.defaultdict(lambda : 0))
+        prefix_counts = collections.defaultdict(lambda : 0)
+
+        log_data = dict()
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            this_rewards = all_rewards[i]
+            total_reward = np.unique(this_rewards).sum() # (0, 0.49, 0.51)
+            p1 = total_reward > 0.4
+            p2 = total_reward > 0.9
+
+            total_rewards[prefix].append(total_reward)
+            total_p1[prefix].append(p1)
+            total_p2[prefix].append(p2)
+            log_data[prefix+f'sim_max_reward_{seed}'] = total_reward
+
+            # aggregate event counts
+            prefix_counts[prefix] += 1
+            for key, value in last_info[i].items():
+                delta_count = 1 if value > 0 else 0
+                prefix_event_counts[prefix][key] += delta_count
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+
+        # log aggregate metrics
+        for prefix, value in total_rewards.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+        for prefix, value in total_p1.items():
+            name = prefix+'p1'
+            value = np.mean(value)
+            log_data[name] = value
+        for prefix, value in total_p2.items():
+            name = prefix+'p2'
+            value = np.mean(value)
+            log_data[name] = value
+        
+        # summarize probabilities
+        for prefix, events in prefix_event_counts.items():
+            prefix_count = prefix_counts[prefix]
+            for event, count in events.items():
+                prob = count / prefix_count
+                key = prefix + event
+                log_data[key] = prob
+
+        return log_data
diff --git a/diffusion_policy/env_runner/kitchen_lowdim_runner.py b/diffusion_policy/env_runner/kitchen_lowdim_runner.py
new file mode 100644
index 0000000..417294b
--- /dev/null
+++ b/diffusion_policy/env_runner/kitchen_lowdim_runner.py
@@ -0,0 +1,311 @@
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import dill
+import math
+import logging
+import wandb.sdk.data_types.video as wv
+import gym
+import gym.spaces
+import multiprocessing as mp
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+
+module_logger = logging.getLogger(__name__)
+
+class KitchenLowdimRunner(BaseLowdimRunner):
+    def __init__(self,
+            output_dir,
+            dataset_dir,
+            n_train=10,
+            n_train_vis=3,
+            train_start_seed=0,
+            n_test=22,
+            n_test_vis=6,
+            test_start_seed=10000,
+            max_steps=280,
+            n_obs_steps=2,
+            n_action_steps=8,
+            render_hw=(240,360),
+            fps=12.5,
+            crf=22,
+            past_action=False,
+            tqdm_interval_sec=5.0,
+            abs_action=False,
+            robot_noise_ratio=0.1,
+            n_envs=None
+        ):
+        super().__init__(output_dir)
+
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        task_fps = 12.5
+        steps_per_render = int(max(task_fps // fps, 1))
+
+        def env_fn():
+            from diffusion_policy.env.kitchen.v0 import KitchenAllV0
+            from diffusion_policy.env.kitchen.kitchen_lowdim_wrapper import KitchenLowdimWrapper
+            env = KitchenAllV0(use_abs_action=abs_action)
+            env.robot_noise_ratio = robot_noise_ratio
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    KitchenLowdimWrapper(
+                        env=env,
+                        init_qpos=None,
+                        init_qvel=None,
+                        render_hw=tuple(render_hw)
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                    steps_per_render=steps_per_render
+                ),
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+
+        all_init_qpos = np.load(pathlib.Path(dataset_dir) / "all_init_qpos.npy")
+        all_init_qvel = np.load(pathlib.Path(dataset_dir) / "all_init_qvel.npy")
+        module_logger.info(f'Loaded {len(all_init_qpos)} known initial conditions.')
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+        # train
+        for i in range(n_train):
+            seed = train_start_seed + i
+            enable_render = i < n_train_vis
+            init_qpos = None
+            init_qvel = None
+            if i < len(all_init_qpos):
+                init_qpos = all_init_qpos[i]
+                init_qvel = all_init_qvel[i]
+
+            def init_fn(env, init_qpos=init_qpos, init_qvel=init_qvel, enable_render=enable_render):
+                from diffusion_policy.env.kitchen.kitchen_lowdim_wrapper import KitchenLowdimWrapper
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set initial condition
+                assert isinstance(env.env.env, KitchenLowdimWrapper)
+                env.env.env.init_qpos = init_qpos
+                env.env.env.init_qvel = init_qvel
+            
+            env_seeds.append(seed)
+            env_prefixs.append('train/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                from diffusion_policy.env.kitchen.kitchen_lowdim_wrapper import KitchenLowdimWrapper
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set initial condition
+                assert isinstance(env.env.env, KitchenLowdimWrapper)
+                env.env.env.init_qpos = None
+                env.env.env.init_qvel = None
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+        
+        def dummy_env_fn():
+            # Avoid importing or using env in the main process
+            # to prevent OpenGL context issue with fork.
+            # Create a fake env whose sole purpos is to provide 
+            # obs/action spaces and metadata.
+            env = gym.Env()
+            env.observation_space = gym.spaces.Box(
+                -8, 8, shape=(60,), dtype=np.float32)
+            env.action_space = gym.spaces.Box(
+                -8, 8, shape=(9,), dtype=np.float32)
+            env.metadata = {
+                'render.modes': ['human', 'rgb_array', 'depth_array'],
+                'video.frames_per_second': 12
+            }
+            env = MultiStepWrapper(
+                env=env,
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+            return env
+        
+        env = AsyncVectorEnv(env_fns, dummy_env_fn=dummy_env_fn)
+        # env = SyncVectorEnv(env_fns)
+
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.tqdm_interval_sec = tqdm_interval_sec
+
+
+    def run(self, policy: BaseLowdimPolicy):
+        device = policy.device
+        dtype = policy.dtype
+        env = self.env
+
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+        last_info = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval BlockPushLowdimRunner {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+            done = False
+            while not done:
+                # create obs dict
+                np_obs_dict = {
+                    'obs': obs.astype(np.float32)
+                }
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                action = np_action_dict['action']
+
+                # step env
+                obs, reward, done, info = env.step(action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            # collect data for this round
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+            last_info[this_global_slice] = [dict((k,v[-1]) for k, v in x.items()) for x in info][this_local_slice]
+
+        # reward is number of tasks completed, max 7
+        # use info to record the order of task completion?
+        # also report the probably to completing n tasks (different aggregation of reward).
+
+        # log
+        log_data = dict()
+        prefix_total_reward_map = collections.defaultdict(list)
+        prefix_n_completed_map = collections.defaultdict(list)
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            this_rewards = all_rewards[i]
+            total_reward = np.sum(this_rewards) / 7
+            prefix_total_reward_map[prefix].append(total_reward)
+
+            n_completed_tasks = len(last_info[i]['completed_tasks'])
+            prefix_n_completed_map[prefix].append(n_completed_tasks)
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+
+        # log aggregate metrics
+        for prefix, value in prefix_total_reward_map.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+        for prefix, value in prefix_n_completed_map.items():
+            n_completed = np.array(value)
+            for i in range(7):
+                n = i + 1
+                p_n = np.mean(n_completed >= n)
+                name = prefix + f'p_{n}'
+                log_data[name] = p_n
+
+        return log_data
diff --git a/diffusion_policy/env_runner/pusht_image_runner.py b/diffusion_policy/env_runner/pusht_image_runner.py
new file mode 100644
index 0000000..3c7fa09
--- /dev/null
+++ b/diffusion_policy/env_runner/pusht_image_runner.py
@@ -0,0 +1,243 @@
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import dill
+import math
+import wandb.sdk.data_types.video as wv
+from diffusion_policy.env.pusht.pusht_image_env import PushTImageEnv
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+# from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+
+class PushTImageRunner(BaseImageRunner):
+    def __init__(self,
+            output_dir,
+            n_train=10,
+            n_train_vis=3,
+            train_start_seed=0,
+            n_test=22,
+            n_test_vis=6,
+            legacy_test=False,
+            test_start_seed=10000,
+            max_steps=200,
+            n_obs_steps=8,
+            n_action_steps=8,
+            fps=10,
+            crf=22,
+            render_size=96,
+            past_action=False,
+            tqdm_interval_sec=5.0,
+            n_envs=None
+        ):
+        super().__init__(output_dir)
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        steps_per_render = max(10 // fps, 1)
+        def env_fn():
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    PushTImageEnv(
+                        legacy=legacy_test,
+                        render_size=render_size
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                    steps_per_render=steps_per_render
+                ),
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+        # train
+        for i in range(n_train):
+            seed = train_start_seed + i
+            enable_render = i < n_train_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('train/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        env = AsyncVectorEnv(env_fns)
+
+        # test env
+        # env.reset(seed=env_seeds)
+        # x = env.step(env.action_space.sample())
+        # imgs = env.call('render')
+        # import pdb; pdb.set_trace()
+
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.tqdm_interval_sec = tqdm_interval_sec
+    
+    def run(self, policy: BaseImagePolicy):
+        device = policy.device
+        dtype = policy.dtype
+        env = self.env
+
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval PushtImageRunner {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+            done = False
+            while not done:
+                # create obs dict
+                np_obs_dict = dict(obs)
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                action = np_action_dict['action']
+
+                # step env
+                obs, reward, done, info = env.step(action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+        # clear out video buffer
+        _ = env.reset()
+
+        # log
+        max_rewards = collections.defaultdict(list)
+        log_data = dict()
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            max_reward = np.max(all_rewards[i])
+            max_rewards[prefix].append(max_reward)
+            log_data[prefix+f'sim_max_reward_{seed}'] = max_reward
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+
+        # log aggregate metrics
+        for prefix, value in max_rewards.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+
+        return log_data
diff --git a/diffusion_policy/env_runner/pusht_keypoints_runner.py b/diffusion_policy/env_runner/pusht_keypoints_runner.py
new file mode 100644
index 0000000..9056c40
--- /dev/null
+++ b/diffusion_policy/env_runner/pusht_keypoints_runner.py
@@ -0,0 +1,265 @@
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import dill
+import math
+import wandb.sdk.data_types.video as wv
+from diffusion_policy.env.pusht.pusht_keypoints_env import PushTKeypointsEnv
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+# from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+
+class PushTKeypointsRunner(BaseLowdimRunner):
+    def __init__(self,
+            output_dir,
+            keypoint_visible_rate=1.0,
+            n_train=10,
+            n_train_vis=3,
+            train_start_seed=0,
+            n_test=22,
+            n_test_vis=6,
+            legacy_test=False,
+            test_start_seed=10000,
+            max_steps=200,
+            n_obs_steps=8,
+            n_action_steps=8,
+            n_latency_steps=0,
+            fps=10,
+            crf=22,
+            agent_keypoints=False,
+            past_action=False,
+            tqdm_interval_sec=5.0,
+            n_envs=None
+        ):
+        super().__init__(output_dir)
+
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        # handle latency step
+        # to mimic latency, we request n_latency_steps addtional steps 
+        # of past observations, and the discard the last n_latency_steps
+        env_n_obs_steps = n_obs_steps + n_latency_steps
+        env_n_action_steps = n_action_steps
+
+        # assert n_obs_steps <= n_action_steps
+        kp_kwargs = PushTKeypointsEnv.genenerate_keypoint_manager_params()
+
+        def env_fn():
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    PushTKeypointsEnv(
+                        legacy=legacy_test,
+                        keypoint_visible_rate=keypoint_visible_rate,
+                        agent_keypoints=agent_keypoints,
+                        **kp_kwargs
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                ),
+                n_obs_steps=env_n_obs_steps,
+                n_action_steps=env_n_action_steps,
+                max_episode_steps=max_steps
+            )
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+        # train
+        for i in range(n_train):
+            seed = train_start_seed + i
+            enable_render = i < n_train_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+            
+            env_seeds.append(seed)
+            env_prefixs.append('train/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # set seed
+                assert isinstance(env, MultiStepWrapper)
+                env.seed(seed)
+
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        env = AsyncVectorEnv(env_fns)
+
+        # test env
+        # env.reset(seed=env_seeds)
+        # x = env.step(env.action_space.sample())
+        # imgs = env.call('render')
+        # import pdb; pdb.set_trace()
+
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.agent_keypoints = agent_keypoints
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.n_latency_steps = n_latency_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.tqdm_interval_sec = tqdm_interval_sec
+    
+    def run(self, policy: BaseLowdimPolicy):
+        device = policy.device
+        dtype = policy.dtype
+
+        env = self.env
+
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval PushtKeypointsRunner {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+            done = False
+            while not done:
+                Do = obs.shape[-1] // 2
+                # create obs dict
+                np_obs_dict = {
+                    # handle n_latency_steps by discarding the last n_latency_steps
+                    'obs': obs[...,:self.n_obs_steps,:Do].astype(np.float32),
+                    'obs_mask': obs[...,:self.n_obs_steps,Do:] > 0.5
+                }
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                # handle latency_steps, we discard the first n_latency_steps actions
+                # to simulate latency
+                action = np_action_dict['action'][:,self.n_latency_steps:]
+
+                # step env
+                obs, reward, done, info = env.step(action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            # collect data for this round
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+        # import pdb; pdb.set_trace()
+
+        # log
+        max_rewards = collections.defaultdict(list)
+        log_data = dict()
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            max_reward = np.max(all_rewards[i])
+            max_rewards[prefix].append(max_reward)
+            log_data[prefix+f'sim_max_reward_{seed}'] = max_reward
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+
+        # log aggregate metrics
+        for prefix, value in max_rewards.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+
+        return log_data
diff --git a/diffusion_policy/env_runner/real_pusht_image_runner.py b/diffusion_policy/env_runner/real_pusht_image_runner.py
new file mode 100644
index 0000000..3b58780
--- /dev/null
+++ b/diffusion_policy/env_runner/real_pusht_image_runner.py
@@ -0,0 +1,10 @@
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+
+class RealPushTImageRunner(BaseImageRunner):
+    def __init__(self,
+            output_dir):
+        super().__init__(output_dir)
+    
+    def run(self, policy: BaseImagePolicy):
+        return dict()
diff --git a/diffusion_policy/env_runner/robomimic_image_runner.py b/diffusion_policy/env_runner/robomimic_image_runner.py
new file mode 100644
index 0000000..ade77ea
--- /dev/null
+++ b/diffusion_policy/env_runner/robomimic_image_runner.py
@@ -0,0 +1,367 @@
+import os
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import h5py
+import math
+import dill
+import wandb.sdk.data_types.video as wv
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+from diffusion_policy.model.common.rotation_transformer import RotationTransformer
+
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.env.robomimic.robomimic_image_wrapper import RobomimicImageWrapper
+import robomimic.utils.file_utils as FileUtils
+import robomimic.utils.env_utils as EnvUtils
+import robomimic.utils.obs_utils as ObsUtils
+
+
+def create_env(env_meta, shape_meta, enable_render=True):
+    modality_mapping = collections.defaultdict(list)
+    for key, attr in shape_meta['obs'].items():
+        modality_mapping[attr.get('type', 'low_dim')].append(key)
+    ObsUtils.initialize_obs_modality_mapping_from_dict(modality_mapping)
+
+    env = EnvUtils.create_env_from_metadata(
+        env_meta=env_meta,
+        render=False, 
+        render_offscreen=enable_render,
+        use_image_obs=enable_render, 
+    )
+    return env
+
+
+class RobomimicImageRunner(BaseImageRunner):
+    """
+    Robomimic envs already enforces number of steps.
+    """
+
+    def __init__(self, 
+            output_dir,
+            dataset_path,
+            shape_meta:dict,
+            n_train=10,
+            n_train_vis=3,
+            train_start_idx=0,
+            n_test=22,
+            n_test_vis=6,
+            test_start_seed=10000,
+            max_steps=400,
+            n_obs_steps=2,
+            n_action_steps=8,
+            render_obs_key='agentview_image',
+            fps=10,
+            crf=22,
+            past_action=False,
+            abs_action=False,
+            tqdm_interval_sec=5.0,
+            n_envs=None
+        ):
+        super().__init__(output_dir)
+
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        # assert n_obs_steps <= n_action_steps
+        dataset_path = os.path.expanduser(dataset_path)
+        robosuite_fps = 20
+        steps_per_render = max(robosuite_fps // fps, 1)
+
+        # read from dataset
+        env_meta = FileUtils.get_env_metadata_from_dataset(
+            dataset_path)
+        # disable object state observation
+        env_meta['env_kwargs']['use_object_obs'] = False
+
+        rotation_transformer = None
+        if abs_action:
+            env_meta['env_kwargs']['controller_configs']['control_delta'] = False
+            rotation_transformer = RotationTransformer('axis_angle', 'rotation_6d')
+
+        def env_fn():
+            robomimic_env = create_env(
+                env_meta=env_meta, 
+                shape_meta=shape_meta
+            )
+            # Robosuite's hard reset causes excessive memory consumption.
+            # Disabled to run more envs.
+            # https://github.com/ARISE-Initiative/robosuite/blob/92abf5595eddb3a845cd1093703e5a3ccd01e77e/robosuite/environments/base.py#L247-L248
+            robomimic_env.env.hard_reset = False
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    RobomimicImageWrapper(
+                        env=robomimic_env,
+                        shape_meta=shape_meta,
+                        init_state=None,
+                        render_obs_key=render_obs_key
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                    steps_per_render=steps_per_render
+                ),
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+        
+        # For each process the OpenGL context can only be initialized once
+        # Since AsyncVectorEnv uses fork to create worker process,
+        # a separate env_fn that does not create OpenGL context (enable_render=False)
+        # is needed to initialize spaces.
+        def dummy_env_fn():
+            robomimic_env = create_env(
+                    env_meta=env_meta, 
+                    shape_meta=shape_meta,
+                    enable_render=False
+                )
+            return MultiStepWrapper(
+                VideoRecordingWrapper(
+                    RobomimicImageWrapper(
+                        env=robomimic_env,
+                        shape_meta=shape_meta,
+                        init_state=None,
+                        render_obs_key=render_obs_key
+                    ),
+                    video_recoder=VideoRecorder.create_h264(
+                        fps=fps,
+                        codec='h264',
+                        input_pix_fmt='rgb24',
+                        crf=crf,
+                        thread_type='FRAME',
+                        thread_count=1
+                    ),
+                    file_path=None,
+                    steps_per_render=steps_per_render
+                ),
+                n_obs_steps=n_obs_steps,
+                n_action_steps=n_action_steps,
+                max_episode_steps=max_steps
+            )
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+
+        # train
+        with h5py.File(dataset_path, 'r') as f:
+            for i in range(n_train):
+                train_idx = train_start_idx + i
+                enable_render = i < n_train_vis
+                init_state = f[f'data/demo_{train_idx}/states'][0]
+
+                def init_fn(env, init_state=init_state, 
+                    enable_render=enable_render):
+                    # setup rendering
+                    # video_wrapper
+                    assert isinstance(env.env, VideoRecordingWrapper)
+                    env.env.video_recoder.stop()
+                    env.env.file_path = None
+                    if enable_render:
+                        filename = pathlib.Path(output_dir).joinpath(
+                            'media', wv.util.generate_id() + ".mp4")
+                        filename.parent.mkdir(parents=False, exist_ok=True)
+                        filename = str(filename)
+                        env.env.file_path = filename
+
+                    # switch to init_state reset
+                    assert isinstance(env.env.env, RobomimicImageWrapper)
+                    env.env.env.init_state = init_state
+
+                env_seeds.append(train_idx)
+                env_prefixs.append('train/')
+                env_init_fn_dills.append(dill.dumps(init_fn))
+        
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, 
+                enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # switch to seed reset
+                assert isinstance(env.env.env, RobomimicImageWrapper)
+                env.env.env.init_state = None
+                env.seed(seed)
+
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+
+        env = AsyncVectorEnv(env_fns, dummy_env_fn=dummy_env_fn)
+        # env = SyncVectorEnv(env_fns)
+
+
+        self.env_meta = env_meta
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.rotation_transformer = rotation_transformer
+        self.abs_action = abs_action
+        self.tqdm_interval_sec = tqdm_interval_sec
+
+    def run(self, policy: BaseImagePolicy):
+        device = policy.device
+        dtype = policy.dtype
+        env = self.env
+        
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            env_name = self.env_meta['env_name']
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval {env_name}Image {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+            
+            done = False
+            while not done:
+                # create obs dict
+                np_obs_dict = dict(obs)
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                action = np_action_dict['action']
+                if not np.all(np.isfinite(action)):
+                    print(action)
+                    raise RuntimeError("Nan or Inf action")
+                
+                # step env
+                env_action = action
+                if self.abs_action:
+                    env_action = self.undo_transform_action(action)
+
+                obs, reward, done, info = env.step(env_action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            # collect data for this round
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+        # clear out video buffer
+        _ = env.reset()
+        
+        # log
+        max_rewards = collections.defaultdict(list)
+        log_data = dict()
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            max_reward = np.max(all_rewards[i])
+            max_rewards[prefix].append(max_reward)
+            log_data[prefix+f'sim_max_reward_{seed}'] = max_reward
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+        
+        # log aggregate metrics
+        for prefix, value in max_rewards.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+
+        return log_data
+
+    def undo_transform_action(self, action):
+        raw_shape = action.shape
+        if raw_shape[-1] == 20:
+            # dual arm
+            action = action.reshape(-1,2,10)
+
+        d_rot = action.shape[-1] - 4
+        pos = action[...,:3]
+        rot = action[...,3:3+d_rot]
+        gripper = action[...,[-1]]
+        rot = self.rotation_transformer.inverse(rot)
+        uaction = np.concatenate([
+            pos, rot, gripper
+        ], axis=-1)
+
+        if raw_shape[-1] == 20:
+            # dual arm
+            uaction = uaction.reshape(*raw_shape[:-1], 14)
+
+        return uaction
diff --git a/diffusion_policy/env_runner/robomimic_lowdim_runner.py b/diffusion_policy/env_runner/robomimic_lowdim_runner.py
new file mode 100644
index 0000000..cd28c8b
--- /dev/null
+++ b/diffusion_policy/env_runner/robomimic_lowdim_runner.py
@@ -0,0 +1,360 @@
+import os
+import wandb
+import numpy as np
+import torch
+import collections
+import pathlib
+import tqdm
+import h5py
+import dill
+import math
+import wandb.sdk.data_types.video as wv
+from diffusion_policy.gym_util.async_vector_env import AsyncVectorEnv
+# from diffusion_policy.gym_util.sync_vector_env import SyncVectorEnv
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_recording_wrapper import VideoRecordingWrapper, VideoRecorder
+from diffusion_policy.model.common.rotation_transformer import RotationTransformer
+
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.env.robomimic.robomimic_lowdim_wrapper import RobomimicLowdimWrapper
+import robomimic.utils.file_utils as FileUtils
+import robomimic.utils.env_utils as EnvUtils
+import robomimic.utils.obs_utils as ObsUtils
+
+
+def create_env(env_meta, obs_keys):
+    ObsUtils.initialize_obs_modality_mapping_from_dict(
+        {'low_dim': obs_keys})
+    env = EnvUtils.create_env_from_metadata(
+        env_meta=env_meta,
+        render=False, 
+        # only way to not show collision geometry
+        # is to enable render_offscreen
+        # which uses a lot of RAM.
+        render_offscreen=False,
+        use_image_obs=False, 
+    )
+    return env
+
+
+class RobomimicLowdimRunner(BaseLowdimRunner):
+    """
+    Robomimic envs already enforces number of steps.
+    """
+
+    def __init__(self, 
+            output_dir,
+            dataset_path,
+            obs_keys,
+            n_train=10,
+            n_train_vis=3,
+            train_start_idx=0,
+            n_test=22,
+            n_test_vis=6,
+            test_start_seed=10000,
+            max_steps=400,
+            n_obs_steps=2,
+            n_action_steps=8,
+            n_latency_steps=0,
+            render_hw=(256,256),
+            render_camera_name='agentview',
+            fps=10,
+            crf=22,
+            past_action=False,
+            abs_action=False,
+            tqdm_interval_sec=5.0,
+            n_envs=None
+        ):
+        """
+        Assuming:
+        n_obs_steps=2
+        n_latency_steps=3
+        n_action_steps=4
+        o: obs
+        i: inference
+        a: action
+        Batch t:
+        |o|o| | | | | | |
+        | |i|i|i| | | | |
+        | | | | |a|a|a|a|
+        Batch t+1
+        | | | | |o|o| | | | | | |
+        | | | | | |i|i|i| | | | |
+        | | | | | | | | |a|a|a|a|
+        """
+
+        super().__init__(output_dir)
+
+        if n_envs is None:
+            n_envs = n_train + n_test
+
+        # handle latency step
+        # to mimic latency, we request n_latency_steps addtional steps 
+        # of past observations, and the discard the last n_latency_steps
+        env_n_obs_steps = n_obs_steps + n_latency_steps
+        env_n_action_steps = n_action_steps
+
+        # assert n_obs_steps <= n_action_steps
+        dataset_path = os.path.expanduser(dataset_path)
+        robosuite_fps = 20
+        steps_per_render = max(robosuite_fps // fps, 1)
+
+        # read from dataset
+        env_meta = FileUtils.get_env_metadata_from_dataset(
+            dataset_path)
+        rotation_transformer = None
+        if abs_action:
+            env_meta['env_kwargs']['controller_configs']['control_delta'] = False
+            rotation_transformer = RotationTransformer('axis_angle', 'rotation_6d')
+
+        def env_fn():
+            robomimic_env = create_env(
+                    env_meta=env_meta, 
+                    obs_keys=obs_keys
+                )
+            # hard reset doesn't influence lowdim env
+            # robomimic_env.env.hard_reset = False
+            return MultiStepWrapper(
+                    VideoRecordingWrapper(
+                        RobomimicLowdimWrapper(
+                            env=robomimic_env,
+                            obs_keys=obs_keys,
+                            init_state=None,
+                            render_hw=render_hw,
+                            render_camera_name=render_camera_name
+                        ),
+                        video_recoder=VideoRecorder.create_h264(
+                            fps=fps,
+                            codec='h264',
+                            input_pix_fmt='rgb24',
+                            crf=crf,
+                            thread_type='FRAME',
+                            thread_count=1
+                        ),
+                        file_path=None,
+                        steps_per_render=steps_per_render
+                    ),
+                    n_obs_steps=env_n_obs_steps,
+                    n_action_steps=env_n_action_steps,
+                    max_episode_steps=max_steps
+                )
+
+        env_fns = [env_fn] * n_envs
+        env_seeds = list()
+        env_prefixs = list()
+        env_init_fn_dills = list()
+
+        # train
+        with h5py.File(dataset_path, 'r') as f:
+            for i in range(n_train):
+                train_idx = train_start_idx + i
+                enable_render = i < n_train_vis
+                init_state = f[f'data/demo_{train_idx}/states'][0]
+
+                def init_fn(env, init_state=init_state, 
+                    enable_render=enable_render):
+                    # setup rendering
+                    # video_wrapper
+                    assert isinstance(env.env, VideoRecordingWrapper)
+                    env.env.video_recoder.stop()
+                    env.env.file_path = None
+                    if enable_render:
+                        filename = pathlib.Path(output_dir).joinpath(
+                            'media', wv.util.generate_id() + ".mp4")
+                        filename.parent.mkdir(parents=False, exist_ok=True)
+                        filename = str(filename)
+                        env.env.file_path = filename
+
+                    # switch to init_state reset
+                    assert isinstance(env.env.env, RobomimicLowdimWrapper)
+                    env.env.env.init_state = init_state
+
+                env_seeds.append(train_idx)
+                env_prefixs.append('train/')
+                env_init_fn_dills.append(dill.dumps(init_fn))
+        
+        # test
+        for i in range(n_test):
+            seed = test_start_seed + i
+            enable_render = i < n_test_vis
+
+            def init_fn(env, seed=seed, 
+                enable_render=enable_render):
+                # setup rendering
+                # video_wrapper
+                assert isinstance(env.env, VideoRecordingWrapper)
+                env.env.video_recoder.stop()
+                env.env.file_path = None
+                if enable_render:
+                    filename = pathlib.Path(output_dir).joinpath(
+                        'media', wv.util.generate_id() + ".mp4")
+                    filename.parent.mkdir(parents=False, exist_ok=True)
+                    filename = str(filename)
+                    env.env.file_path = filename
+
+                # switch to seed reset
+                assert isinstance(env.env.env, RobomimicLowdimWrapper)
+                env.env.env.init_state = None
+                env.seed(seed)
+
+            env_seeds.append(seed)
+            env_prefixs.append('test/')
+            env_init_fn_dills.append(dill.dumps(init_fn))
+        
+        env = AsyncVectorEnv(env_fns)
+        # env = SyncVectorEnv(env_fns)
+
+        self.env_meta = env_meta
+        self.env = env
+        self.env_fns = env_fns
+        self.env_seeds = env_seeds
+        self.env_prefixs = env_prefixs
+        self.env_init_fn_dills = env_init_fn_dills
+        self.fps = fps
+        self.crf = crf
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.n_latency_steps = n_latency_steps
+        self.env_n_obs_steps = env_n_obs_steps
+        self.env_n_action_steps = env_n_action_steps
+        self.past_action = past_action
+        self.max_steps = max_steps
+        self.rotation_transformer = rotation_transformer
+        self.abs_action = abs_action
+        self.tqdm_interval_sec = tqdm_interval_sec
+
+    def run(self, policy: BaseLowdimPolicy):
+        device = policy.device
+        dtype = policy.dtype
+        env = self.env
+        
+        # plan for rollout
+        n_envs = len(self.env_fns)
+        n_inits = len(self.env_init_fn_dills)
+        n_chunks = math.ceil(n_inits / n_envs)
+
+        # allocate data
+        all_video_paths = [None] * n_inits
+        all_rewards = [None] * n_inits
+
+        for chunk_idx in range(n_chunks):
+            start = chunk_idx * n_envs
+            end = min(n_inits, start + n_envs)
+            this_global_slice = slice(start, end)
+            this_n_active_envs = end - start
+            this_local_slice = slice(0,this_n_active_envs)
+            
+            this_init_fns = self.env_init_fn_dills[this_global_slice]
+            n_diff = n_envs - len(this_init_fns)
+            if n_diff > 0:
+                this_init_fns.extend([self.env_init_fn_dills[0]]*n_diff)
+            assert len(this_init_fns) == n_envs
+
+            # init envs
+            env.call_each('run_dill_function', 
+                args_list=[(x,) for x in this_init_fns])
+
+            # start rollout
+            obs = env.reset()
+            past_action = None
+            policy.reset()
+
+            env_name = self.env_meta['env_name']
+            pbar = tqdm.tqdm(total=self.max_steps, desc=f"Eval {env_name}Lowdim {chunk_idx+1}/{n_chunks}", 
+                leave=False, mininterval=self.tqdm_interval_sec)
+
+            done = False
+            while not done:
+                # create obs dict
+                np_obs_dict = {
+                    # handle n_latency_steps by discarding the last n_latency_steps
+                    'obs': obs[:,:self.n_obs_steps].astype(np.float32)
+                }
+                if self.past_action and (past_action is not None):
+                    # TODO: not tested
+                    np_obs_dict['past_action'] = past_action[
+                        :,-(self.n_obs_steps-1):].astype(np.float32)
+                
+                # device transfer
+                obs_dict = dict_apply(np_obs_dict, 
+                    lambda x: torch.from_numpy(x).to(
+                        device=device))
+
+                # run policy
+                with torch.no_grad():
+                    action_dict = policy.predict_action(obs_dict)
+
+                # device_transfer
+                np_action_dict = dict_apply(action_dict,
+                    lambda x: x.detach().to('cpu').numpy())
+
+                # handle latency_steps, we discard the first n_latency_steps actions
+                # to simulate latency
+                action = np_action_dict['action'][:,self.n_latency_steps:]
+                if not np.all(np.isfinite(action)):
+                    print(action)
+                    raise RuntimeError("Nan or Inf action")
+                
+                # step env
+                env_action = action
+                if self.abs_action:
+                    env_action = self.undo_transform_action(action)
+
+                obs, reward, done, info = env.step(env_action)
+                done = np.all(done)
+                past_action = action
+
+                # update pbar
+                pbar.update(action.shape[1])
+            pbar.close()
+
+            # collect data for this round
+            all_video_paths[this_global_slice] = env.render()[this_local_slice]
+            all_rewards[this_global_slice] = env.call('get_attr', 'reward')[this_local_slice]
+
+        # log
+        max_rewards = collections.defaultdict(list)
+        log_data = dict()
+        for i in range(len(self.env_fns)):
+            seed = self.env_seeds[i]
+            prefix = self.env_prefixs[i]
+            max_reward = np.max(all_rewards[i])
+            max_rewards[prefix].append(max_reward)
+            log_data[prefix+f'sim_max_reward_{seed}'] = max_reward
+
+            # visualize sim
+            video_path = all_video_paths[i]
+            if video_path is not None:
+                sim_video = wandb.Video(video_path)
+                log_data[prefix+f'sim_video_{seed}'] = sim_video
+
+        # log aggregate metrics
+        for prefix, value in max_rewards.items():
+            name = prefix+'mean_score'
+            value = np.mean(value)
+            log_data[name] = value
+
+        return log_data
+
+    def undo_transform_action(self, action):
+        raw_shape = action.shape
+        if raw_shape[-1] == 20:
+            # dual arm
+            action = action.reshape(-1,2,10)
+
+        d_rot = action.shape[-1] - 4
+        pos = action[...,:3]
+        rot = action[...,3:3+d_rot]
+        gripper = action[...,[-1]]
+        rot = self.rotation_transformer.inverse(rot)
+        uaction = np.concatenate([
+            pos, rot, gripper
+        ], axis=-1)
+
+        if raw_shape[-1] == 20:
+            # dual arm
+            uaction = uaction.reshape(*raw_shape[:-1], 14)
+
+        return uaction
diff --git a/diffusion_policy/gym_util/async_vector_env.py b/diffusion_policy/gym_util/async_vector_env.py
new file mode 100644
index 0000000..dfb0f62
--- /dev/null
+++ b/diffusion_policy/gym_util/async_vector_env.py
@@ -0,0 +1,671 @@
+"""
+Back ported methods: call, set_attr from v0.26
+Disabled auto-reset after done
+Added render method.
+"""
+
+
+import numpy as np
+import multiprocessing as mp
+import time
+import sys
+from enum import Enum
+from copy import deepcopy
+
+from gym import logger
+from gym.vector.vector_env import VectorEnv
+from gym.error import (
+    AlreadyPendingCallError,
+    NoAsyncCallError,
+    ClosedEnvironmentError,
+    CustomSpaceError,
+)
+from gym.vector.utils import (
+    create_shared_memory,
+    create_empty_array,
+    write_to_shared_memory,
+    read_from_shared_memory,
+    concatenate,
+    CloudpickleWrapper,
+    clear_mpi_env_vars,
+)
+
+__all__ = ["AsyncVectorEnv"]
+
+
+class AsyncState(Enum):
+    DEFAULT = "default"
+    WAITING_RESET = "reset"
+    WAITING_STEP = "step"
+    WAITING_CALL = "call"
+
+
+class AsyncVectorEnv(VectorEnv):
+    """Vectorized environment that runs multiple environments in parallel. It
+    uses `multiprocessing` processes, and pipes for communication.
+    Parameters
+    ----------
+    env_fns : iterable of callable
+        Functions that create the environments.
+    observation_space : `gym.spaces.Space` instance, optional
+        Observation space of a single environment. If `None`, then the
+        observation space of the first environment is taken.
+    action_space : `gym.spaces.Space` instance, optional
+        Action space of a single environment. If `None`, then the action space
+        of the first environment is taken.
+    shared_memory : bool (default: `True`)
+        If `True`, then the observations from the worker processes are
+        communicated back through shared variables. This can improve the
+        efficiency if the observations are large (e.g. images).
+    copy : bool (default: `True`)
+        If `True`, then the `reset` and `step` methods return a copy of the
+        observations.
+    context : str, optional
+        Context for multiprocessing. If `None`, then the default context is used.
+        Only available in Python 3.
+    daemon : bool (default: `True`)
+        If `True`, then subprocesses have `daemon` flag turned on; that is, they
+        will quit if the head process quits. However, `daemon=True` prevents
+        subprocesses to spawn children, so for some environments you may want
+        to have it set to `False`
+    worker : function, optional
+        WARNING - advanced mode option! If set, then use that worker in a subprocess
+        instead of a default one. Can be useful to override some inner vector env
+        logic, for instance, how resets on done are handled. Provides high
+        degree of flexibility and a high chance to shoot yourself in the foot; thus,
+        if you are writing your own worker, it is recommended to start from the code
+        for `_worker` (or `_worker_shared_memory`) method below, and add changes
+    """
+
+    def __init__(
+        self,
+        env_fns,
+        dummy_env_fn=None,
+        observation_space=None,
+        action_space=None,
+        shared_memory=True,
+        copy=True,
+        context=None,
+        daemon=True,
+        worker=None,
+    ):
+        ctx = mp.get_context(context)
+        self.env_fns = env_fns
+        self.shared_memory = shared_memory
+        self.copy = copy
+
+        # Added dummy_env_fn to fix OpenGL error in Mujoco
+        # disable any OpenGL rendering in dummy_env_fn, since it
+        # will conflict with OpenGL context in the forked child process
+        if dummy_env_fn is None:
+            dummy_env_fn = env_fns[0]
+        dummy_env = dummy_env_fn()
+        self.metadata = dummy_env.metadata
+
+        if (observation_space is None) or (action_space is None):
+            observation_space = observation_space or dummy_env.observation_space
+            action_space = action_space or dummy_env.action_space
+        dummy_env.close()
+        del dummy_env
+        super(AsyncVectorEnv, self).__init__(
+            num_envs=len(env_fns),
+            observation_space=observation_space,
+            action_space=action_space,
+        )
+
+        if self.shared_memory:
+            try:
+                _obs_buffer = create_shared_memory(
+                    self.single_observation_space, n=self.num_envs, ctx=ctx
+                )
+                self.observations = read_from_shared_memory(
+                    _obs_buffer, self.single_observation_space, n=self.num_envs
+                )
+            except CustomSpaceError:
+                raise ValueError(
+                    "Using `shared_memory=True` in `AsyncVectorEnv` "
+                    "is incompatible with non-standard Gym observation spaces "
+                    "(i.e. custom spaces inheriting from `gym.Space`), and is "
+                    "only compatible with default Gym spaces (e.g. `Box`, "
+                    "`Tuple`, `Dict`) for batching. Set `shared_memory=False` "
+                    "if you use custom observation spaces."
+                )
+        else:
+            _obs_buffer = None
+            self.observations = create_empty_array(
+                self.single_observation_space, n=self.num_envs, fn=np.zeros
+            )
+
+        self.parent_pipes, self.processes = [], []
+        self.error_queue = ctx.Queue()
+        target = _worker_shared_memory if self.shared_memory else _worker
+        target = worker or target
+        with clear_mpi_env_vars():
+            for idx, env_fn in enumerate(self.env_fns):
+                parent_pipe, child_pipe = ctx.Pipe()
+                process = ctx.Process(
+                    target=target,
+                    name="Worker<{0}>-{1}".format(type(self).__name__, idx),
+                    args=(
+                        idx,
+                        CloudpickleWrapper(env_fn),
+                        child_pipe,
+                        parent_pipe,
+                        _obs_buffer,
+                        self.error_queue,
+                    ),
+                )
+
+                self.parent_pipes.append(parent_pipe)
+                self.processes.append(process)
+
+                process.daemon = daemon
+                process.start()
+                child_pipe.close()
+
+        self._state = AsyncState.DEFAULT
+        self._check_observation_spaces()
+
+    def seed(self, seeds=None):
+        self._assert_is_running()
+        if seeds is None:
+            seeds = [None for _ in range(self.num_envs)]
+        if isinstance(seeds, int):
+            seeds = [seeds + i for i in range(self.num_envs)]
+        assert len(seeds) == self.num_envs
+
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `seed` while waiting "
+                "for a pending call to `{0}` to complete.".format(self._state.value),
+                self._state.value,
+            )
+
+        for pipe, seed in zip(self.parent_pipes, seeds):
+            pipe.send(("seed", seed))
+        _, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+
+    def reset_async(self):
+        self._assert_is_running()
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `reset_async` while waiting "
+                "for a pending call to `{0}` to complete".format(self._state.value),
+                self._state.value,
+            )
+
+        for pipe in self.parent_pipes:
+            pipe.send(("reset", None))
+        self._state = AsyncState.WAITING_RESET
+
+    def reset_wait(self, timeout=None):
+        """
+        Parameters
+        ----------
+        timeout : int or float, optional
+            Number of seconds before the call to `reset_wait` times out. If
+            `None`, the call to `reset_wait` never times out.
+        Returns
+        -------
+        observations : sample from `observation_space`
+            A batch of observations from the vectorized environment.
+        """
+        self._assert_is_running()
+        if self._state != AsyncState.WAITING_RESET:
+            raise NoAsyncCallError(
+                "Calling `reset_wait` without any prior " "call to `reset_async`.",
+                AsyncState.WAITING_RESET.value,
+            )
+
+        if not self._poll(timeout):
+            self._state = AsyncState.DEFAULT
+            raise mp.TimeoutError(
+                "The call to `reset_wait` has timed out after "
+                "{0} second{1}.".format(timeout, "s" if timeout > 1 else "")
+            )
+
+        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+        self._state = AsyncState.DEFAULT
+
+        if not self.shared_memory:
+            self.observations = concatenate(
+                results, self.observations, self.single_observation_space
+            )
+
+        return deepcopy(self.observations) if self.copy else self.observations
+
+    def step_async(self, actions):
+        """
+        Parameters
+        ----------
+        actions : iterable of samples from `action_space`
+            List of actions.
+        """
+        self._assert_is_running()
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `step_async` while waiting "
+                "for a pending call to `{0}` to complete.".format(self._state.value),
+                self._state.value,
+            )
+
+        for pipe, action in zip(self.parent_pipes, actions):
+            pipe.send(("step", action))
+        self._state = AsyncState.WAITING_STEP
+
+    def step_wait(self, timeout=None):
+        """
+        Parameters
+        ----------
+        timeout : int or float, optional
+            Number of seconds before the call to `step_wait` times out. If
+            `None`, the call to `step_wait` never times out.
+        Returns
+        -------
+        observations : sample from `observation_space`
+            A batch of observations from the vectorized environment.
+        rewards : `np.ndarray` instance (dtype `np.float_`)
+            A vector of rewards from the vectorized environment.
+        dones : `np.ndarray` instance (dtype `np.bool_`)
+            A vector whose entries indicate whether the episode has ended.
+        infos : list of dict
+            A list of auxiliary diagnostic information.
+        """
+        self._assert_is_running()
+        if self._state != AsyncState.WAITING_STEP:
+            raise NoAsyncCallError(
+                "Calling `step_wait` without any prior call " "to `step_async`.",
+                AsyncState.WAITING_STEP.value,
+            )
+
+        if not self._poll(timeout):
+            self._state = AsyncState.DEFAULT
+            raise mp.TimeoutError(
+                "The call to `step_wait` has timed out after "
+                "{0} second{1}.".format(timeout, "s" if timeout > 1 else "")
+            )
+
+        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+        self._state = AsyncState.DEFAULT
+        observations_list, rewards, dones, infos = zip(*results)
+
+        if not self.shared_memory:
+            self.observations = concatenate(
+                observations_list, self.observations, self.single_observation_space
+            )
+
+        return (
+            deepcopy(self.observations) if self.copy else self.observations,
+            np.array(rewards),
+            np.array(dones, dtype=np.bool_),
+            infos,
+        )
+
+    def close_extras(self, timeout=None, terminate=False):
+        """
+        Parameters
+        ----------
+        timeout : int or float, optional
+            Number of seconds before the call to `close` times out. If `None`,
+            the call to `close` never times out. If the call to `close` times
+            out, then all processes are terminated.
+        terminate : bool (default: `False`)
+            If `True`, then the `close` operation is forced and all processes
+            are terminated.
+        """
+        timeout = 0 if terminate else timeout
+        try:
+            if self._state != AsyncState.DEFAULT:
+                logger.warn(
+                    "Calling `close` while waiting for a pending "
+                    "call to `{0}` to complete.".format(self._state.value)
+                )
+                function = getattr(self, "{0}_wait".format(self._state.value))
+                function(timeout)
+        except mp.TimeoutError:
+            terminate = True
+
+        if terminate:
+            for process in self.processes:
+                if process.is_alive():
+                    process.terminate()
+        else:
+            for pipe in self.parent_pipes:
+                if (pipe is not None) and (not pipe.closed):
+                    pipe.send(("close", None))
+            for pipe in self.parent_pipes:
+                if (pipe is not None) and (not pipe.closed):
+                    pipe.recv()
+
+        for pipe in self.parent_pipes:
+            if pipe is not None:
+                pipe.close()
+        for process in self.processes:
+            process.join()
+
+    def _poll(self, timeout=None):
+        self._assert_is_running()
+        if timeout is None:
+            return True
+        end_time = time.perf_counter() + timeout
+        delta = None
+        for pipe in self.parent_pipes:
+            delta = max(end_time - time.perf_counter(), 0)
+            if pipe is None:
+                return False
+            if pipe.closed or (not pipe.poll(delta)):
+                return False
+        return True
+
+    def _check_observation_spaces(self):
+        self._assert_is_running()
+        for pipe in self.parent_pipes:
+            pipe.send(("_check_observation_space", self.single_observation_space))
+        same_spaces, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+        if not all(same_spaces):
+            raise RuntimeError(
+                "Some environments have an observation space "
+                "different from `{0}`. In order to batch observations, the "
+                "observation spaces from all environments must be "
+                "equal.".format(self.single_observation_space)
+            )
+
+    def _assert_is_running(self):
+        if self.closed:
+            raise ClosedEnvironmentError(
+                "Trying to operate on `{0}`, after a "
+                "call to `close()`.".format(type(self).__name__)
+            )
+
+    def _raise_if_errors(self, successes):
+        if all(successes):
+            return
+
+        num_errors = self.num_envs - sum(successes)
+        assert num_errors > 0
+        for _ in range(num_errors):
+            index, exctype, value = self.error_queue.get()
+            logger.error(
+                "Received the following error from Worker-{0}: "
+                "{1}: {2}".format(index, exctype.__name__, value)
+            )
+            logger.error("Shutting down Worker-{0}.".format(index))
+            self.parent_pipes[index].close()
+            self.parent_pipes[index] = None
+
+        logger.error("Raising the last exception back to the main process.")
+        raise exctype(value)
+    
+    def call_async(self, name: str, *args, **kwargs):
+        """Calls the method with name asynchronously and apply args and kwargs to the method.
+
+        Args:
+            name: Name of the method or property to call.
+            *args: Arguments to apply to the method call.
+            **kwargs: Keyword arguments to apply to the method call.
+
+        Raises:
+            ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called).
+            AlreadyPendingCallError: Calling `call_async` while waiting for a pending call to complete
+        """
+        self._assert_is_running()
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `call_async` while waiting "
+                f"for a pending call to `{self._state.value}` to complete.",
+                self._state.value,
+            )
+
+        for pipe in self.parent_pipes:
+            pipe.send(("_call", (name, args, kwargs)))
+        self._state = AsyncState.WAITING_CALL
+
+    def call_wait(self, timeout = None) -> list:
+        """Calls all parent pipes and waits for the results.
+
+        Args:
+            timeout: Number of seconds before the call to `step_wait` times out.
+                If `None` (default), the call to `step_wait` never times out.
+
+        Returns:
+            List of the results of the individual calls to the method or property for each environment.
+
+        Raises:
+            NoAsyncCallError: Calling `call_wait` without any prior call to `call_async`.
+            TimeoutError: The call to `call_wait` has timed out after timeout second(s).
+        """
+        self._assert_is_running()
+        if self._state != AsyncState.WAITING_CALL:
+            raise NoAsyncCallError(
+                "Calling `call_wait` without any prior call to `call_async`.",
+                AsyncState.WAITING_CALL.value,
+            )
+
+        if not self._poll(timeout):
+            self._state = AsyncState.DEFAULT
+            raise mp.TimeoutError(
+                f"The call to `call_wait` has timed out after {timeout} second(s)."
+            )
+
+        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+        self._state = AsyncState.DEFAULT
+
+        return results
+
+    def call(self, name: str, *args, **kwargs):
+        """Call a method, or get a property, from each parallel environment.
+
+        Args:
+            name (str): Name of the method or property to call.
+            *args: Arguments to apply to the method call.
+            **kwargs: Keyword arguments to apply to the method call.
+
+        Returns:
+            List of the results of the individual calls to the method or property for each environment.
+        """
+        self.call_async(name, *args, **kwargs)
+        return self.call_wait()
+    
+
+    def call_each(self, name: str, 
+            args_list: list=None, 
+            kwargs_list: list=None, 
+            timeout = None):
+        n_envs = len(self.parent_pipes)
+        if args_list is None:
+            args_list = [[]] * n_envs
+        assert len(args_list) == n_envs
+
+        if kwargs_list is None:
+            kwargs_list = [dict()] * n_envs
+        assert len(kwargs_list) == n_envs
+
+        # send
+        self._assert_is_running()
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `call_async` while waiting "
+                f"for a pending call to `{self._state.value}` to complete.",
+                self._state.value,
+            )
+
+        for i, pipe in enumerate(self.parent_pipes):
+            pipe.send(("_call", (name, args_list[i], kwargs_list[i])))
+        self._state = AsyncState.WAITING_CALL
+
+        # receive
+        self._assert_is_running()
+        if self._state != AsyncState.WAITING_CALL:
+            raise NoAsyncCallError(
+                "Calling `call_wait` without any prior call to `call_async`.",
+                AsyncState.WAITING_CALL.value,
+            )
+
+        if not self._poll(timeout):
+            self._state = AsyncState.DEFAULT
+            raise mp.TimeoutError(
+                f"The call to `call_wait` has timed out after {timeout} second(s)."
+            )
+
+        results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+        self._state = AsyncState.DEFAULT
+
+        return results
+
+
+    def set_attr(self, name: str, values):
+        """Sets an attribute of the sub-environments.
+
+        Args:
+            name: Name of the property to be set in each individual environment.
+            values: Values of the property to be set to. If ``values`` is a list or
+                tuple, then it corresponds to the values for each individual
+                environment, otherwise a single value is set for all environments.
+
+        Raises:
+            ValueError: Values must be a list or tuple with length equal to the number of environments.
+            AlreadyPendingCallError: Calling `set_attr` while waiting for a pending call to complete.
+        """
+        self._assert_is_running()
+        if not isinstance(values, (list, tuple)):
+            values = [values for _ in range(self.num_envs)]
+        if len(values) != self.num_envs:
+            raise ValueError(
+                "Values must be a list or tuple with length equal to the "
+                f"number of environments. Got `{len(values)}` values for "
+                f"{self.num_envs} environments."
+            )
+
+        if self._state != AsyncState.DEFAULT:
+            raise AlreadyPendingCallError(
+                "Calling `set_attr` while waiting "
+                f"for a pending call to `{self._state.value}` to complete.",
+                self._state.value,
+            )
+
+        for pipe, value in zip(self.parent_pipes, values):
+            pipe.send(("_setattr", (name, value)))
+        _, successes = zip(*[pipe.recv() for pipe in self.parent_pipes])
+        self._raise_if_errors(successes)
+
+    def render(self, *args, **kwargs):
+        return self.call('render', *args, **kwargs)
+
+
+
+def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
+    assert shared_memory is None
+    env = env_fn()
+    parent_pipe.close()
+    try:
+        while True:
+            command, data = pipe.recv()
+            if command == "reset":
+                observation = env.reset()
+                pipe.send((observation, True))
+            elif command == "step":
+                observation, reward, done, info = env.step(data)
+                # if done:
+                #     observation = env.reset()
+                pipe.send(((observation, reward, done, info), True))
+            elif command == "seed":
+                env.seed(data)
+                pipe.send((None, True))
+            elif command == "close":
+                pipe.send((None, True))
+                break
+            elif command == "_call":
+                name, args, kwargs = data
+                if name in ["reset", "step", "seed", "close"]:
+                    raise ValueError(
+                        f"Trying to call function `{name}` with "
+                        f"`_call`. Use `{name}` directly instead."
+                    )
+                function = getattr(env, name)
+                if callable(function):
+                    pipe.send((function(*args, **kwargs), True))
+                else:
+                    pipe.send((function, True))
+            elif command == "_setattr":
+                name, value = data
+                setattr(env, name, value)
+                pipe.send((None, True))
+
+            elif command == "_check_observation_space":
+                pipe.send((data == env.observation_space, True))
+            else:
+                raise RuntimeError(
+                    "Received unknown command `{0}`. Must "
+                    "be one of {`reset`, `step`, `seed`, `close`, "
+                    "`_check_observation_space`}.".format(command)
+                )
+    except (KeyboardInterrupt, Exception):
+        error_queue.put((index,) + sys.exc_info()[:2])
+        pipe.send((None, False))
+    finally:
+        env.close()
+
+
+def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
+    assert shared_memory is not None
+    env = env_fn()
+    observation_space = env.observation_space
+    parent_pipe.close()
+    try:
+        while True:
+            command, data = pipe.recv()
+            if command == "reset":
+                observation = env.reset()
+                write_to_shared_memory(
+                    index, observation, shared_memory, observation_space
+                )
+                pipe.send((None, True))
+            elif command == "step":
+                observation, reward, done, info = env.step(data)
+                # if done:
+                #     observation = env.reset()
+                write_to_shared_memory(
+                    index, observation, shared_memory, observation_space
+                )
+                pipe.send(((None, reward, done, info), True))
+            elif command == "seed":
+                env.seed(data)
+                pipe.send((None, True))
+            elif command == "close":
+                pipe.send((None, True))
+                break
+            elif command == "_call":
+                name, args, kwargs = data
+                if name in ["reset", "step", "seed", "close"]:
+                    raise ValueError(
+                        f"Trying to call function `{name}` with "
+                        f"`_call`. Use `{name}` directly instead."
+                    )
+                function = getattr(env, name)
+                if callable(function):
+                    pipe.send((function(*args, **kwargs), True))
+                else:
+                    pipe.send((function, True))
+            elif command == "_setattr":
+                name, value = data
+                setattr(env, name, value)
+                pipe.send((None, True))
+            elif command == "_check_observation_space":
+                pipe.send((data == observation_space, True))
+            else:
+                raise RuntimeError(
+                    "Received unknown command `{0}`. Must "
+                    "be one of {`reset`, `step`, `seed`, `close`, "
+                    "`_check_observation_space`}.".format(command)
+                )
+    except (KeyboardInterrupt, Exception):
+        error_queue.put((index,) + sys.exc_info()[:2])
+        pipe.send((None, False))
+    finally:
+        env.close()
\ No newline at end of file
diff --git a/diffusion_policy/gym_util/multistep_wrapper.py b/diffusion_policy/gym_util/multistep_wrapper.py
new file mode 100644
index 0000000..aae6149
--- /dev/null
+++ b/diffusion_policy/gym_util/multistep_wrapper.py
@@ -0,0 +1,162 @@
+import gym
+from gym import spaces
+import numpy as np
+from collections import defaultdict, deque
+import dill
+
+def stack_repeated(x, n):
+    return np.repeat(np.expand_dims(x,axis=0),n,axis=0)
+
+def repeated_box(box_space, n):
+    return spaces.Box(
+        low=stack_repeated(box_space.low, n),
+        high=stack_repeated(box_space.high, n),
+        shape=(n,) + box_space.shape,
+        dtype=box_space.dtype
+    )
+
+def repeated_space(space, n):
+    if isinstance(space, spaces.Box):
+        return repeated_box(space, n)
+    elif isinstance(space, spaces.Dict):
+        result_space = spaces.Dict()
+        for key, value in space.items():
+            result_space[key] = repeated_space(value, n)
+        return result_space
+    else:
+        raise RuntimeError(f'Unsupported space type {type(space)}')
+
+def take_last_n(x, n):
+    x = list(x)
+    n = min(len(x), n)
+    return np.array(x[-n:])
+
+def dict_take_last_n(x, n):
+    result = dict()
+    for key, value in x.items():
+        result[key] = take_last_n(value, n)
+    return result
+
+def aggregate(data, method='max'):
+    if method == 'max':
+        # equivelent to any
+        return np.max(data)
+    elif method == 'min':
+        # equivelent to all
+        return np.min(data)
+    elif method == 'mean':
+        return np.mean(data)
+    elif method == 'sum':
+        return np.sum(data)
+    else:
+        raise NotImplementedError()
+
+def stack_last_n_obs(all_obs, n_steps):
+    assert(len(all_obs) > 0)
+    all_obs = list(all_obs)
+    result = np.zeros((n_steps,) + all_obs[-1].shape, 
+        dtype=all_obs[-1].dtype)
+    start_idx = -min(n_steps, len(all_obs))
+    result[start_idx:] = np.array(all_obs[start_idx:])
+    if n_steps > len(all_obs):
+        # pad
+        result[:start_idx] = result[start_idx]
+    return result
+
+
+class MultiStepWrapper(gym.Wrapper):
+    def __init__(self, 
+            env, 
+            n_obs_steps, 
+            n_action_steps, 
+            max_episode_steps=None,
+            reward_agg_method='max'
+        ):
+        super().__init__(env)
+        self._action_space = repeated_space(env.action_space, n_action_steps)
+        self._observation_space = repeated_space(env.observation_space, n_obs_steps)
+        self.max_episode_steps = max_episode_steps
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.reward_agg_method = reward_agg_method
+        self.n_obs_steps = n_obs_steps
+
+        self.obs = deque(maxlen=n_obs_steps+1)
+        self.reward = list()
+        self.done = list()
+        self.info = defaultdict(lambda : deque(maxlen=n_obs_steps+1))
+    
+    def reset(self):
+        """Resets the environment using kwargs."""
+        obs = super().reset()
+
+        self.obs = deque([obs], maxlen=self.n_obs_steps+1)
+        self.reward = list()
+        self.done = list()
+        self.info = defaultdict(lambda : deque(maxlen=self.n_obs_steps+1))
+
+        obs = self._get_obs(self.n_obs_steps)
+        return obs
+
+    def step(self, action):
+        """
+        actions: (n_action_steps,) + action_shape
+        """
+        for act in action:
+            if len(self.done) > 0 and self.done[-1]:
+                # termination
+                break
+            observation, reward, done, info = super().step(act)
+
+            self.obs.append(observation)
+            self.reward.append(reward)
+            if (self.max_episode_steps is not None) \
+                and (len(self.reward) >= self.max_episode_steps):
+                # truncation
+                done = True
+            self.done.append(done)
+            self._add_info(info)
+
+        observation = self._get_obs(self.n_obs_steps)
+        reward = aggregate(self.reward, self.reward_agg_method)
+        done = aggregate(self.done, 'max')
+        info = dict_take_last_n(self.info, self.n_obs_steps)
+        return observation, reward, done, info
+
+    def _get_obs(self, n_steps=1):
+        """
+        Output (n_steps,) + obs_shape
+        """
+        assert(len(self.obs) > 0)
+        if isinstance(self.observation_space, spaces.Box):
+            return stack_last_n_obs(self.obs, n_steps)
+        elif isinstance(self.observation_space, spaces.Dict):
+            result = dict()
+            for key in self.observation_space.keys():
+                result[key] = stack_last_n_obs(
+                    [obs[key] for obs in self.obs],
+                    n_steps
+                )
+            return result
+        else:
+            raise RuntimeError('Unsupported space type')
+
+    def _add_info(self, info):
+        for key, value in info.items():
+            self.info[key].append(value)
+    
+    def get_rewards(self):
+        return self.reward
+    
+    def get_attr(self, name):
+        return getattr(self, name)
+
+    def run_dill_function(self, dill_fn):
+        fn = dill.loads(dill_fn)
+        return fn(self)
+    
+    def get_infos(self):
+        result = dict()
+        for k, v in self.info.items():
+            result[k] = list(v)
+        return result
diff --git a/diffusion_policy/gym_util/sync_vector_env.py b/diffusion_policy/gym_util/sync_vector_env.py
new file mode 100644
index 0000000..c85a682
--- /dev/null
+++ b/diffusion_policy/gym_util/sync_vector_env.py
@@ -0,0 +1,182 @@
+import numpy as np
+from copy import deepcopy
+
+from gym import logger
+from gym.vector.vector_env import VectorEnv
+from gym.vector.utils import concatenate, create_empty_array
+
+__all__ = ["SyncVectorEnv"]
+
+
+class SyncVectorEnv(VectorEnv):
+    """Vectorized environment that serially runs multiple environments.
+    Parameters
+    ----------
+    env_fns : iterable of callable
+        Functions that create the environments.
+    observation_space : `gym.spaces.Space` instance, optional
+        Observation space of a single environment. If `None`, then the
+        observation space of the first environment is taken.
+    action_space : `gym.spaces.Space` instance, optional
+        Action space of a single environment. If `None`, then the action space
+        of the first environment is taken.
+    copy : bool (default: `True`)
+        If `True`, then the `reset` and `step` methods return a copy of the
+        observations.
+    """
+
+    def __init__(self, env_fns, observation_space=None, action_space=None, copy=True):
+        self.env_fns = env_fns
+        self.envs = [env_fn() for env_fn in env_fns]
+        self.copy = copy
+        self.metadata = self.envs[0].metadata
+
+        if (observation_space is None) or (action_space is None):
+            observation_space = observation_space or self.envs[0].observation_space
+            action_space = action_space or self.envs[0].action_space
+        super(SyncVectorEnv, self).__init__(
+            num_envs=len(env_fns),
+            observation_space=observation_space,
+            action_space=action_space,
+        )
+
+        self._check_observation_spaces()
+        self.observations = create_empty_array(
+            self.single_observation_space, n=self.num_envs, fn=np.zeros
+        )
+        self._rewards = np.zeros((self.num_envs,), dtype=np.float64)
+        self._dones = np.zeros((self.num_envs,), dtype=np.bool_)
+        # self._rewards = [0] * self.num_envs
+        # self._dones = [False] * self.num_envs
+        self._actions = None
+
+    def seed(self, seeds=None):
+        if seeds is None:
+            seeds = [None for _ in range(self.num_envs)]
+        if isinstance(seeds, int):
+            seeds = [seeds + i for i in range(self.num_envs)]
+        assert len(seeds) == self.num_envs
+
+        for env, seed in zip(self.envs, seeds):
+            env.seed(seed)
+
+    def reset_wait(self):
+        self._dones[:] = False
+        observations = []
+        for env in self.envs:
+            observation = env.reset()
+            observations.append(observation)
+        self.observations = concatenate(
+            observations, self.observations, self.single_observation_space
+        )
+
+        return deepcopy(self.observations) if self.copy else self.observations
+
+    def step_async(self, actions):
+        self._actions = actions
+
+    def step_wait(self):
+        observations, infos = [], []
+        for i, (env, action) in enumerate(zip(self.envs, self._actions)):
+            observation, self._rewards[i], self._dones[i], info = env.step(action)
+            # if self._dones[i]:
+            #     observation = env.reset()
+            observations.append(observation)
+            infos.append(info)
+        self.observations = concatenate(
+            observations, self.observations, self.single_observation_space
+        )
+
+        return (
+            deepcopy(self.observations) if self.copy else self.observations,
+            np.copy(self._rewards),
+            np.copy(self._dones),
+            infos,
+        )
+
+    def close_extras(self, **kwargs):
+        [env.close() for env in self.envs]
+
+    def _check_observation_spaces(self):
+        for env in self.envs:
+            if not (env.observation_space == self.single_observation_space):
+                break
+        else:
+            return True
+        raise RuntimeError(
+            "Some environments have an observation space "
+            "different from `{0}`. In order to batch observations, the "
+            "observation spaces from all environments must be "
+            "equal.".format(self.single_observation_space)
+        )
+    
+    def call(self, name, *args, **kwargs) -> tuple:
+        """Calls the method with name and applies args and kwargs.
+
+        Args:
+            name: The method name
+            *args: The method args
+            **kwargs: The method kwargs
+
+        Returns:
+            Tuple of results
+        """
+        results = []
+        for env in self.envs:
+            function = getattr(env, name)
+            if callable(function):
+                results.append(function(*args, **kwargs))
+            else:
+                results.append(function)
+
+        return tuple(results)
+
+    def call_each(self, name: str, 
+            args_list: list=None, 
+            kwargs_list: list=None):
+        n_envs = len(self.envs)
+        if args_list is None:
+            args_list = [[]] * n_envs
+        assert len(args_list) == n_envs
+
+        if kwargs_list is None:
+            kwargs_list = [dict()] * n_envs
+        assert len(kwargs_list) == n_envs
+
+        results = []
+        for i, env in enumerate(self.envs):
+            function = getattr(env, name)
+            if callable(function):
+                results.append(function(*args_list[i], **kwargs_list[i]))
+            else:
+                results.append(function)
+
+        return tuple(results)
+
+
+    def render(self, *args, **kwargs):
+        return self.call('render', *args, **kwargs)
+    
+    def set_attr(self, name: str, values):
+        """Sets an attribute of the sub-environments.
+
+        Args:
+            name: The property name to change
+            values: Values of the property to be set to. If ``values`` is a list or
+                tuple, then it corresponds to the values for each individual
+                environment, otherwise, a single value is set for all environments.
+
+        Raises:
+            ValueError: Values must be a list or tuple with length equal to the number of environments.
+        """
+        if not isinstance(values, (list, tuple)):
+            values = [values for _ in range(self.num_envs)]
+        if len(values) != self.num_envs:
+            raise ValueError(
+                "Values must be a list or tuple with length equal to the "
+                f"number of environments. Got `{len(values)}` values for "
+                f"{self.num_envs} environments."
+            )
+
+        for env, value in zip(self.envs, values):
+            setattr(env, name, value)
\ No newline at end of file
diff --git a/diffusion_policy/gym_util/video_recording_wrapper.py b/diffusion_policy/gym_util/video_recording_wrapper.py
new file mode 100644
index 0000000..c690f50
--- /dev/null
+++ b/diffusion_policy/gym_util/video_recording_wrapper.py
@@ -0,0 +1,51 @@
+import gym
+import numpy as np
+from diffusion_policy.real_world.video_recorder import VideoRecorder
+
+class VideoRecordingWrapper(gym.Wrapper):
+    def __init__(self, 
+            env, 
+            video_recoder: VideoRecorder,
+            mode='rgb_array',
+            file_path=None,
+            steps_per_render=1,
+            **kwargs
+        ):
+        """
+        When file_path is None, don't record.
+        """
+        super().__init__(env)
+        
+        self.mode = mode
+        self.render_kwargs = kwargs
+        self.steps_per_render = steps_per_render
+        self.file_path = file_path
+        self.video_recoder = video_recoder
+
+        self.step_count = 0
+
+    def reset(self, **kwargs):
+        obs = super().reset(**kwargs)
+        self.frames = list()
+        self.step_count = 1
+        self.video_recoder.stop()
+        return obs
+    
+    def step(self, action):
+        result = super().step(action)
+        self.step_count += 1
+        if self.file_path is not None \
+            and ((self.step_count % self.steps_per_render) == 0):
+            if not self.video_recoder.is_ready():
+                self.video_recoder.start(self.file_path)
+
+            frame = self.env.render(
+                mode=self.mode, **self.render_kwargs)
+            assert frame.dtype == np.uint8
+            self.video_recoder.write_frame(frame)
+        return result
+    
+    def render(self, mode='rgb_array', **kwargs):
+        if self.video_recoder.is_ready():
+            self.video_recoder.stop()
+        return self.file_path
diff --git a/diffusion_policy/gym_util/video_wrapper.py b/diffusion_policy/gym_util/video_wrapper.py
new file mode 100644
index 0000000..abfebbe
--- /dev/null
+++ b/diffusion_policy/gym_util/video_wrapper.py
@@ -0,0 +1,44 @@
+import gym
+import numpy as np
+
+class VideoWrapper(gym.Wrapper):
+    def __init__(self, 
+            env, 
+            mode='rgb_array',
+            enabled=True,
+            steps_per_render=1,
+            **kwargs
+        ):
+        super().__init__(env)
+        
+        self.mode = mode
+        self.enabled = enabled
+        self.render_kwargs = kwargs
+        self.steps_per_render = steps_per_render
+
+        self.frames = list()
+        self.step_count = 0
+
+    def reset(self, **kwargs):
+        obs = super().reset(**kwargs)
+        self.frames = list()
+        self.step_count = 1
+        if self.enabled:
+            frame = self.env.render(
+                mode=self.mode, **self.render_kwargs)
+            assert frame.dtype == np.uint8
+            self.frames.append(frame)
+        return obs
+    
+    def step(self, action):
+        result = super().step(action)
+        self.step_count += 1
+        if self.enabled and ((self.step_count % self.steps_per_render) == 0):
+            frame = self.env.render(
+                mode=self.mode, **self.render_kwargs)
+            assert frame.dtype == np.uint8
+            self.frames.append(frame)
+        return result
+    
+    def render(self, mode='rgb_array', **kwargs):
+        return self.frames
diff --git a/diffusion_policy/model/bet/action_ae/__init__.py b/diffusion_policy/model/bet/action_ae/__init__.py
new file mode 100644
index 0000000..9a7b88d
--- /dev/null
+++ b/diffusion_policy/model/bet/action_ae/__init__.py
@@ -0,0 +1,63 @@
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+import abc
+
+from typing import Optional, Union
+
+import diffusion_policy.model.bet.utils as utils
+
+
+class AbstractActionAE(utils.SaveModule, abc.ABC):
+    @abc.abstractmethod
+    def fit_model(
+        self,
+        input_dataloader: DataLoader,
+        eval_dataloader: DataLoader,
+        obs_encoding_net: Optional[nn.Module] = None,
+    ) -> None:
+        pass
+
+    @abc.abstractmethod
+    def encode_into_latent(
+        self,
+        input_action: torch.Tensor,
+        input_rep: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        """
+        Given the input action, discretize it.
+
+        Inputs:
+        input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch,
+        and is generally assumed that the last dimnesion is the action dimension.
+
+        Outputs:
+        discretized_action (shape: ... x num_tokens): The discretized action.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def decode_actions(
+        self,
+        latent_action_batch: Optional[torch.Tensor],
+        input_rep_batch: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Given a discretized action, convert it to a continuous action.
+
+        Inputs:
+        latent_action_batch (shape: ... x num_tokens): The discretized action
+        generated by the discretizer.
+
+        Outputs:
+        continuous_action (shape: ... x action_dim): The continuous action.
+        """
+        raise NotImplementedError
+
+    @property
+    @abc.abstractmethod
+    def num_latents(self) -> Union[int, float]:
+        """
+        Number of possible latents for this generator, useful for state priors that use softmax.
+        """
+        return float("inf")
diff --git a/diffusion_policy/model/bet/action_ae/discretizers/k_means.py b/diffusion_policy/model/bet/action_ae/discretizers/k_means.py
new file mode 100644
index 0000000..534e588
--- /dev/null
+++ b/diffusion_policy/model/bet/action_ae/discretizers/k_means.py
@@ -0,0 +1,148 @@
+import torch
+import numpy as np
+
+import tqdm
+
+from typing import Optional, Tuple, Union
+from diffusion_policy.model.common.dict_of_tensor_mixin import DictOfTensorMixin
+
+
+class KMeansDiscretizer(DictOfTensorMixin):
+    """
+    Simplified and modified version of KMeans algorithm  from sklearn.
+    """
+
+    def __init__(
+        self,
+        action_dim: int,
+        num_bins: int = 100,
+        predict_offsets: bool = False,
+    ):
+        super().__init__()
+        self.n_bins = num_bins
+        self.action_dim = action_dim
+        self.predict_offsets = predict_offsets
+
+    def fit_discretizer(self, input_actions: torch.Tensor) -> None:
+        assert (
+            self.action_dim == input_actions.shape[-1]
+        ), f"Input action dimension {self.action_dim} does not match fitted model {input_actions.shape[-1]}"
+
+        flattened_actions = input_actions.view(-1, self.action_dim)
+        cluster_centers = KMeansDiscretizer._kmeans(
+            flattened_actions, ncluster=self.n_bins
+        )
+        self.params_dict['bin_centers'] = cluster_centers
+
+    @property
+    def suggested_actions(self) -> torch.Tensor:
+        return self.params_dict['bin_centers']
+
+    @classmethod
+    def _kmeans(cls, x: torch.Tensor, ncluster: int = 512, niter: int = 50):
+        """
+        Simple k-means clustering algorithm adapted from Karpathy's minGPT libary
+        https://github.com/karpathy/minGPT/blob/master/play_image.ipynb
+        """
+        N, D = x.size()
+        c = x[torch.randperm(N)[:ncluster]]  # init clusters at random
+
+        pbar = tqdm.trange(niter)
+        pbar.set_description("K-means clustering")
+        for i in pbar:
+            # assign all pixels to the closest codebook element
+            a = ((x[:, None, :] - c[None, :, :]) ** 2).sum(-1).argmin(1)
+            # move each codebook element to be the mean of the pixels that assigned to it
+            c = torch.stack([x[a == k].mean(0) for k in range(ncluster)])
+            # re-assign any poorly positioned codebook elements
+            nanix = torch.any(torch.isnan(c), dim=1)
+            ndead = nanix.sum().item()
+            if ndead:
+                tqdm.tqdm.write(
+                    "done step %d/%d, re-initialized %d dead clusters"
+                    % (i + 1, niter, ndead)
+                )
+            c[nanix] = x[torch.randperm(N)[:ndead]]  # re-init dead clusters
+        return c
+
+    def encode_into_latent(
+        self, input_action: torch.Tensor, input_rep: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        """
+        Given the input action, discretize it using the k-Means clustering algorithm.
+
+        Inputs:
+        input_action (shape: ... x action_dim): The input action to discretize. This can be in a batch,
+        and is generally assumed that the last dimnesion is the action dimension.
+
+        Outputs:
+        discretized_action (shape: ... x num_tokens): The discretized action.
+        If self.predict_offsets is True, then the offsets are also returned.
+        """
+        assert (
+            input_action.shape[-1] == self.action_dim
+        ), "Input action dimension does not match fitted model"
+
+        # flatten the input action
+        flattened_actions = input_action.view(-1, self.action_dim)
+
+        # get the closest cluster center
+        closest_cluster_center = torch.argmin(
+            torch.sum(
+                (flattened_actions[:, None, :] - self.params_dict['bin_centers'][None, :, :]) ** 2,
+                dim=2,
+            ),
+            dim=1,
+        )
+        # Reshape to the original shape
+        discretized_action = closest_cluster_center.view(input_action.shape[:-1] + (1,))
+
+        if self.predict_offsets:
+            # decode from latent and get the difference
+            reconstructed_action = self.decode_actions(discretized_action)
+            offsets = input_action - reconstructed_action
+            return (discretized_action, offsets)
+        else:
+            # return the one-hot vector
+            return discretized_action
+
+    def decode_actions(
+        self,
+        latent_action_batch: torch.Tensor,
+        input_rep_batch: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Given the latent action, reconstruct the original action.
+
+        Inputs:
+        latent_action (shape: ... x 1): The latent action to reconstruct. This can be in a batch,
+        and is generally assumed that the last dimension is the action dimension. If the latent_action_batch
+        is a tuple, then it is assumed to be (discretized_action, offsets).
+
+        Outputs:
+        reconstructed_action (shape: ... x action_dim): The reconstructed action.
+        """
+        offsets = None
+        if type(latent_action_batch) == tuple:
+            latent_action_batch, offsets = latent_action_batch
+        # get the closest cluster center
+        closest_cluster_center = self.params_dict['bin_centers'][latent_action_batch]
+        # Reshape to the original shape
+        reconstructed_action = closest_cluster_center.view(
+            latent_action_batch.shape[:-1] + (self.action_dim,)
+        )
+        if offsets is not None:
+            reconstructed_action += offsets
+        return reconstructed_action
+
+    @property
+    def discretized_space(self) -> int:
+        return self.n_bins
+
+    @property
+    def latent_dim(self) -> int:
+        return 1
+
+    @property
+    def num_latents(self) -> int:
+        return self.n_bins
diff --git a/diffusion_policy/model/bet/latent_generators/latent_generator.py b/diffusion_policy/model/bet/latent_generators/latent_generator.py
new file mode 100644
index 0000000..072bdc8
--- /dev/null
+++ b/diffusion_policy/model/bet/latent_generators/latent_generator.py
@@ -0,0 +1,71 @@
+import abc
+import torch
+from typing import Tuple, Optional
+
+import diffusion_policy.model.bet.utils as utils
+
+
+class AbstractLatentGenerator(abc.ABC, utils.SaveModule):
+    """
+    Abstract class for a generative model that can generate latents given observation representations.
+
+    In the probabilisitc sense, this model fits and samples from P(latent|observation) given some observation.
+    """
+
+    @abc.abstractmethod
+    def get_latent_and_loss(
+        self,
+        obs_rep: torch.Tensor,
+        target_latents: torch.Tensor,
+        seq_masks: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Given a set of observation representation and generated latents, get the encoded latent and the loss.
+
+        Inputs:
+        input_action: Batch of the actions taken in the multimodal demonstrations.
+        target_latents: Batch of the latents that the generator should learn to generate the actions from.
+        seq_masks: Batch of masks that indicate which timesteps are valid.
+
+        Outputs:
+        latent: The sampled latent from the observation.
+        loss: The loss of the latent generator.
+        """
+        pass
+
+    @abc.abstractmethod
+    def generate_latents(
+        self, seq_obses: torch.Tensor, seq_masks: torch.Tensor
+    ) -> torch.Tensor:
+        """
+        Given a batch of sequences of observations, generate a batch of sequences of latents.
+
+        Inputs:
+        seq_obses: Batch of sequences of observations, of shape seq x batch x dim, following the transformer convention.
+        seq_masks: Batch of sequences of masks, of shape seq x batch, following the transformer convention.
+
+        Outputs:
+        seq_latents: Batch of sequences of latents of shape seq x batch x latent_dim.
+        """
+        pass
+
+    def get_optimizer(
+        self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
+    ) -> torch.optim.Optimizer:
+        """
+        Default optimizer class. Override this if you want to use a different optimizer.
+        """
+        return torch.optim.Adam(
+            self.parameters(), lr=learning_rate, weight_decay=weight_decay, betas=betas
+        )
+
+
+class LatentGeneratorDataParallel(torch.nn.DataParallel):
+    def get_latent_and_loss(self, *args, **kwargs):
+        return self.module.get_latent_and_loss(*args, **kwargs)  # type: ignore
+
+    def generate_latents(self, *args, **kwargs):
+        return self.module.generate_latents(*args, **kwargs)  # type: ignore
+
+    def get_optimizer(self, *args, **kwargs):
+        return self.module.get_optimizer(*args, **kwargs)  # type: ignore
diff --git a/diffusion_policy/model/bet/latent_generators/mingpt.py b/diffusion_policy/model/bet/latent_generators/mingpt.py
new file mode 100644
index 0000000..4836b27
--- /dev/null
+++ b/diffusion_policy/model/bet/latent_generators/mingpt.py
@@ -0,0 +1,188 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import einops
+import diffusion_policy.model.bet.latent_generators.latent_generator as latent_generator
+
+import diffusion_policy.model.bet.libraries.mingpt.model as mingpt_model
+import diffusion_policy.model.bet.libraries.mingpt.trainer as mingpt_trainer
+from diffusion_policy.model.bet.libraries.loss_fn import FocalLoss, soft_cross_entropy
+
+from typing import Optional, Tuple
+
+
+class MinGPT(latent_generator.AbstractLatentGenerator):
+    def __init__(
+        self,
+        input_dim: int,
+        n_layer: int = 12,
+        n_head: int = 12,
+        n_embd: int = 768,
+        embd_pdrop: float = 0.1,
+        resid_pdrop: float = 0.1,
+        attn_pdrop: float = 0.1,
+        block_size: int = 128,
+        vocab_size: int = 50257,
+        latent_dim: int = 768,  # Ignore, used for compatibility with other models.
+        action_dim: int = 0,
+        discrete_input: bool = False,
+        predict_offsets: bool = False,
+        offset_loss_scale: float = 1.0,
+        focal_loss_gamma: float = 0.0,
+        **kwargs
+    ):
+        super().__init__()
+        self.input_size = input_dim
+        self.n_layer = n_layer
+        self.n_head = n_head
+        self.n_embd = n_embd
+        self.embd_pdrop = embd_pdrop
+        self.resid_pdrop = resid_pdrop
+        self.attn_pdrop = attn_pdrop
+        self.block_size = block_size
+        self.vocab_size = vocab_size
+        self.action_dim = action_dim
+        self.predict_offsets = predict_offsets
+        self.offset_loss_scale = offset_loss_scale
+        self.focal_loss_gamma = focal_loss_gamma
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+        gpt_config = mingpt_model.GPTConfig(
+            input_size=self.input_size,
+            vocab_size=self.vocab_size * (1 + self.action_dim)
+            if self.predict_offsets
+            else self.vocab_size,
+            block_size=self.block_size,
+            n_layer=n_layer,
+            n_head=n_head,
+            n_embd=n_embd,
+            discrete_input=discrete_input,
+            embd_pdrop=embd_pdrop,
+            resid_pdrop=resid_pdrop,
+            attn_pdrop=attn_pdrop,
+        )
+
+        self.model = mingpt_model.GPT(gpt_config)
+
+    def get_latent_and_loss(
+        self,
+        obs_rep: torch.Tensor,
+        target_latents: torch.Tensor,
+        seq_masks: Optional[torch.Tensor] = None,
+        return_loss_components: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        # Unlike torch.transformers, GPT takes in batch x seq_len x embd_dim
+        # obs_rep = einops.rearrange(obs_rep, "seq batch embed -> batch seq embed")
+        # target_latents = einops.rearrange(
+        #     target_latents, "seq batch embed -> batch seq embed"
+        # )
+        # While this has been trained autoregressively,
+        # there is no reason why it needs to be so.
+        # We can just use the observation as the input and the next latent as the target.
+        if self.predict_offsets:
+            target_latents, target_offsets = target_latents
+        is_soft_target = (target_latents.shape[-1] == self.vocab_size) and (
+            self.vocab_size != 1
+        )
+        if is_soft_target:
+            target_latents = target_latents.view(-1, target_latents.size(-1))
+            criterion = soft_cross_entropy
+        else:
+            target_latents = target_latents.view(-1)
+            if self.vocab_size == 1:
+                # unify k-means (target_class == 0) and GMM (target_prob == 1)
+                target_latents = torch.zeros_like(target_latents)
+            criterion = FocalLoss(gamma=self.focal_loss_gamma)
+        if self.predict_offsets:
+            output, _ = self.model(obs_rep)
+            logits = output[:, :, : self.vocab_size]
+            offsets = output[:, :, self.vocab_size :]
+            batch = logits.shape[0]
+            seq = logits.shape[1]
+            offsets = einops.rearrange(
+                offsets,
+                "N T (V A) -> (N T) V A",  # N = batch, T = seq
+                V=self.vocab_size,
+                A=self.action_dim,
+            )
+            # calculate (optionally soft) cross entropy and offset losses
+            class_loss = criterion(logits.view(-1, logits.size(-1)), target_latents)
+            # offset loss is only calculated on the target class
+            # if soft targets, argmax is considered the target class
+            selected_offsets = offsets[
+                torch.arange(offsets.size(0)),
+                target_latents.argmax(dim=-1).view(-1)
+                if is_soft_target
+                else target_latents.view(-1),
+            ]
+            offset_loss = self.offset_loss_scale * F.mse_loss(
+                selected_offsets, target_offsets.view(-1, self.action_dim)
+            )
+            loss = offset_loss + class_loss
+            logits = einops.rearrange(logits, "batch seq classes -> seq batch classes")
+            offsets = einops.rearrange(
+                offsets,
+                "(N T) V A -> T N V A",  # ? N, T order? Anyway does not affect loss and training (might affect visualization)
+                N=batch,
+                T=seq,
+            )
+            if return_loss_components:
+                return (
+                    (logits, offsets),
+                    loss,
+                    {"offset": offset_loss, "class": class_loss, "total": loss},
+                )
+            else:
+                return (logits, offsets), loss
+        else:
+            logits, _ = self.model(obs_rep)
+            loss = criterion(logits.view(-1, logits.size(-1)), target_latents)
+            logits = einops.rearrange(
+                logits, "batch seq classes -> seq batch classes"
+            )  # ? N, T order? Anyway does not affect loss and training (might affect visualization)
+            if return_loss_components:
+                return logits, loss, {"class": loss, "total": loss}
+            else:
+                return logits, loss
+
+    def generate_latents(
+        self, obs_rep: torch.Tensor
+    ) -> torch.Tensor:
+        batch, seq, embed = obs_rep.shape
+
+        output, _ = self.model(obs_rep, None)
+        if self.predict_offsets:
+            logits = output[:, :, : self.vocab_size]
+            offsets = output[:, :, self.vocab_size :]
+            offsets = einops.rearrange(
+                offsets,
+                "N T (V A) -> (N T) V A",  # N = batch, T = seq
+                V=self.vocab_size,
+                A=self.action_dim,
+            )
+        else:
+            logits = output
+        probs = F.softmax(logits, dim=-1)
+        batch, seq, choices = probs.shape
+        # Sample from the multinomial distribution, one per row.
+        sampled_data = torch.multinomial(probs.view(-1, choices), num_samples=1)
+        sampled_data = einops.rearrange(
+            sampled_data, "(batch seq) 1 -> batch seq 1", batch=batch, seq=seq
+        )
+        if self.predict_offsets:
+            sampled_offsets = offsets[
+                torch.arange(offsets.shape[0]), sampled_data.flatten()
+            ].view(batch, seq, self.action_dim)
+
+            return (sampled_data, sampled_offsets)
+        else:
+            return sampled_data
+
+    def get_optimizer(
+        self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
+    ) -> torch.optim.Optimizer:
+        trainer_cfg = mingpt_trainer.TrainerConfig(
+            weight_decay=weight_decay, learning_rate=learning_rate, betas=betas
+        )
+        return self.model.configure_optimizers(trainer_cfg)
diff --git a/diffusion_policy/model/bet/latent_generators/transformer.py b/diffusion_policy/model/bet/latent_generators/transformer.py
new file mode 100644
index 0000000..76af7cc
--- /dev/null
+++ b/diffusion_policy/model/bet/latent_generators/transformer.py
@@ -0,0 +1,108 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import einops
+import diffusion_policy.model.bet.latent_generators.latent_generator as latent_generator
+
+from diffusion_policy.model.diffusion.transformer_for_diffusion import TransformerForDiffusion
+from diffusion_policy.model.bet.libraries.loss_fn import FocalLoss, soft_cross_entropy
+
+from typing import Optional, Tuple
+
+class Transformer(latent_generator.AbstractLatentGenerator):
+    def __init__(
+        self,
+        input_dim: int,
+        num_bins: int,
+        action_dim: int,
+        horizon: int,
+        focal_loss_gamma: float,
+        offset_loss_scale: float,
+        **kwargs
+    ):
+        super().__init__()
+        self.model = TransformerForDiffusion(
+            input_dim=input_dim,
+            output_dim=num_bins * (1 + action_dim),
+            horizon=horizon,
+            **kwargs
+        )
+        self.vocab_size = num_bins
+        self.focal_loss_gamma = focal_loss_gamma
+        self.offset_loss_scale = offset_loss_scale
+        self.action_dim = action_dim
+    
+    def get_optimizer(self, **kwargs) -> torch.optim.Optimizer:
+        return self.model.configure_optimizers(**kwargs)
+    
+    def get_latent_and_loss(self, 
+            obs_rep: torch.Tensor, 
+            target_latents: torch.Tensor, 
+            return_loss_components=True,
+            ) -> Tuple[torch.Tensor, torch.Tensor]:
+        target_latents, target_offsets = target_latents
+        target_latents = target_latents.view(-1)
+        criterion = FocalLoss(gamma=self.focal_loss_gamma)
+
+        t = torch.tensor(0, device=self.model.device)
+        output = self.model(obs_rep, t)
+        logits = output[:, :, : self.vocab_size]
+        offsets = output[:, :, self.vocab_size :]
+        batch = logits.shape[0]
+        seq = logits.shape[1]
+        offsets = einops.rearrange(
+            offsets,
+            "N T (V A) -> (N T) V A",  # N = batch, T = seq
+            V=self.vocab_size,
+            A=self.action_dim,
+        )
+        # calculate (optionally soft) cross entropy and offset losses
+        class_loss = criterion(logits.view(-1, logits.size(-1)), target_latents)
+        # offset loss is only calculated on the target class
+        # if soft targets, argmax is considered the target class
+        selected_offsets = offsets[
+            torch.arange(offsets.size(0)),
+            target_latents.view(-1),
+        ]
+        offset_loss = self.offset_loss_scale * F.mse_loss(
+            selected_offsets, target_offsets.view(-1, self.action_dim)
+        )
+        loss = offset_loss + class_loss
+        logits = einops.rearrange(logits, "batch seq classes -> seq batch classes")
+        offsets = einops.rearrange(
+            offsets,
+            "(N T) V A -> T N V A",  # ? N, T order? Anyway does not affect loss and training (might affect visualization)
+            N=batch,
+            T=seq,
+        )
+        return (
+            (logits, offsets),
+            loss,
+            {"offset": offset_loss, "class": class_loss, "total": loss},
+        )
+
+    def generate_latents(
+        self, obs_rep: torch.Tensor
+    ) -> torch.Tensor:
+        t = torch.tensor(0, device=self.model.device)
+        output = self.model(obs_rep, t)
+        logits = output[:, :, : self.vocab_size]
+        offsets = output[:, :, self.vocab_size :]
+        offsets = einops.rearrange(
+            offsets,
+            "N T (V A) -> (N T) V A",  # N = batch, T = seq
+            V=self.vocab_size,
+            A=self.action_dim,
+        )
+
+        probs = F.softmax(logits, dim=-1)
+        batch, seq, choices = probs.shape
+        # Sample from the multinomial distribution, one per row.
+        sampled_data = torch.multinomial(probs.view(-1, choices), num_samples=1)
+        sampled_data = einops.rearrange(
+            sampled_data, "(batch seq) 1 -> batch seq 1", batch=batch, seq=seq
+        )
+        sampled_offsets = offsets[
+            torch.arange(offsets.shape[0]), sampled_data.flatten()
+        ].view(batch, seq, self.action_dim)
+        return (sampled_data, sampled_offsets)
diff --git a/diffusion_policy/model/bet/libraries/loss_fn.py b/diffusion_policy/model/bet/libraries/loss_fn.py
new file mode 100644
index 0000000..7873ae1
--- /dev/null
+++ b/diffusion_policy/model/bet/libraries/loss_fn.py
@@ -0,0 +1,168 @@
+from typing import Optional, Sequence
+
+import torch
+from torch import Tensor
+from torch import nn
+from torch.nn import functional as F
+
+# Reference: https://github.com/pytorch/pytorch/issues/11959
+def soft_cross_entropy(
+    input: torch.Tensor,
+    target: torch.Tensor,
+) -> torch.Tensor:
+    """
+    Args:
+        input: (batch_size, num_classes): tensor of raw logits
+        target: (batch_size, num_classes): tensor of class probability; sum(target) == 1
+
+    Returns:
+        loss: (batch_size,)
+    """
+    log_probs = torch.log_softmax(input, dim=-1)
+    # target is a distribution
+    loss = F.kl_div(log_probs, target, reduction="batchmean")
+    return loss
+
+
+# Focal loss implementation
+# Source: https://github.com/AdeelH/pytorch-multi-class-focal-loss/blob/master/focal_loss.py
+# MIT License
+#
+# Copyright (c) 2020 Adeel Hassan
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+class FocalLoss(nn.Module):
+    """Focal Loss, as described in https://arxiv.org/abs/1708.02002.
+    It is essentially an enhancement to cross entropy loss and is
+    useful for classification tasks when there is a large class imbalance.
+    x is expected to contain raw, unnormalized scores for each class.
+    y is expected to contain class labels.
+    Shape:
+        - x: (batch_size, C) or (batch_size, C, d1, d2, ..., dK), K > 0.
+        - y: (batch_size,) or (batch_size, d1, d2, ..., dK), K > 0.
+    """
+
+    def __init__(
+        self,
+        alpha: Optional[Tensor] = None,
+        gamma: float = 0.0,
+        reduction: str = "mean",
+        ignore_index: int = -100,
+    ):
+        """Constructor.
+        Args:
+            alpha (Tensor, optional): Weights for each class. Defaults to None.
+            gamma (float, optional): A constant, as described in the paper.
+                Defaults to 0.
+            reduction (str, optional): 'mean', 'sum' or 'none'.
+                Defaults to 'mean'.
+            ignore_index (int, optional): class label to ignore.
+                Defaults to -100.
+        """
+        if reduction not in ("mean", "sum", "none"):
+            raise ValueError('Reduction must be one of: "mean", "sum", "none".')
+
+        super().__init__()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.ignore_index = ignore_index
+        self.reduction = reduction
+
+        self.nll_loss = nn.NLLLoss(
+            weight=alpha, reduction="none", ignore_index=ignore_index
+        )
+
+    def __repr__(self):
+        arg_keys = ["alpha", "gamma", "ignore_index", "reduction"]
+        arg_vals = [self.__dict__[k] for k in arg_keys]
+        arg_strs = [f"{k}={v}" for k, v in zip(arg_keys, arg_vals)]
+        arg_str = ", ".join(arg_strs)
+        return f"{type(self).__name__}({arg_str})"
+
+    def forward(self, x: Tensor, y: Tensor) -> Tensor:
+        if x.ndim > 2:
+            # (N, C, d1, d2, ..., dK) --> (N * d1 * ... * dK, C)
+            c = x.shape[1]
+            x = x.permute(0, *range(2, x.ndim), 1).reshape(-1, c)
+            # (N, d1, d2, ..., dK) --> (N * d1 * ... * dK,)
+            y = y.view(-1)
+
+        unignored_mask = y != self.ignore_index
+        y = y[unignored_mask]
+        if len(y) == 0:
+            return 0.0
+        x = x[unignored_mask]
+
+        # compute weighted cross entropy term: -alpha * log(pt)
+        # (alpha is already part of self.nll_loss)
+        log_p = F.log_softmax(x, dim=-1)
+        ce = self.nll_loss(log_p, y)
+
+        # get true class column from each row
+        all_rows = torch.arange(len(x))
+        log_pt = log_p[all_rows, y]
+
+        # compute focal term: (1 - pt)^gamma
+        pt = log_pt.exp()
+        focal_term = (1 - pt) ** self.gamma
+
+        # the full loss: -alpha * ((1 - pt)^gamma) * log(pt)
+        loss = focal_term * ce
+
+        if self.reduction == "mean":
+            loss = loss.mean()
+        elif self.reduction == "sum":
+            loss = loss.sum()
+
+        return loss
+
+
+def focal_loss(
+    alpha: Optional[Sequence] = None,
+    gamma: float = 0.0,
+    reduction: str = "mean",
+    ignore_index: int = -100,
+    device="cpu",
+    dtype=torch.float32,
+) -> FocalLoss:
+    """Factory function for FocalLoss.
+    Args:
+        alpha (Sequence, optional): Weights for each class. Will be converted
+            to a Tensor if not None. Defaults to None.
+        gamma (float, optional): A constant, as described in the paper.
+            Defaults to 0.
+        reduction (str, optional): 'mean', 'sum' or 'none'.
+            Defaults to 'mean'.
+        ignore_index (int, optional): class label to ignore.
+            Defaults to -100.
+        device (str, optional): Device to move alpha to. Defaults to 'cpu'.
+        dtype (torch.dtype, optional): dtype to cast alpha to.
+            Defaults to torch.float32.
+    Returns:
+        A FocalLoss object
+    """
+    if alpha is not None:
+        if not isinstance(alpha, Tensor):
+            alpha = torch.tensor(alpha)
+        alpha = alpha.to(device=device, dtype=dtype)
+
+    fl = FocalLoss(
+        alpha=alpha, gamma=gamma, reduction=reduction, ignore_index=ignore_index
+    )
+    return fl
diff --git a/diffusion_policy/model/bet/libraries/mingpt/LICENSE b/diffusion_policy/model/bet/libraries/mingpt/LICENSE
new file mode 100644
index 0000000..87bfb15
--- /dev/null
+++ b/diffusion_policy/model/bet/libraries/mingpt/LICENSE
@@ -0,0 +1,8 @@
+The MIT License (MIT) Copyright (c) 2020 Andrej Karpathy
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/diffusion_policy/model/bet/libraries/mingpt/__init__.py b/diffusion_policy/model/bet/libraries/mingpt/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/diffusion_policy/model/bet/libraries/mingpt/model.py b/diffusion_policy/model/bet/libraries/mingpt/model.py
new file mode 100644
index 0000000..06308e3
--- /dev/null
+++ b/diffusion_policy/model/bet/libraries/mingpt/model.py
@@ -0,0 +1,250 @@
+"""
+GPT model:
+- the initial stem consists of a combination of token encoding and a positional encoding
+- the meat of it is a uniform sequence of Transformer blocks
+    - each Transformer is a sequential combination of a 1-hidden-layer MLP block and a self-attention block
+    - all blocks feed into a central residual pathway similar to resnets
+- the final decoder is a linear projection into a vanilla Softmax classifier
+"""
+
+import math
+import logging
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+logger = logging.getLogger(__name__)
+
+
+class GPTConfig:
+    """base GPT config, params common to all GPT versions"""
+
+    embd_pdrop = 0.1
+    resid_pdrop = 0.1
+    attn_pdrop = 0.1
+    discrete_input = False
+    input_size = 10
+    n_embd = 768
+    n_layer = 12
+
+    def __init__(self, vocab_size, block_size, **kwargs):
+        self.vocab_size = vocab_size
+        self.block_size = block_size
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+
+class GPT1Config(GPTConfig):
+    """GPT-1 like network roughly 125M params"""
+
+    n_layer = 12
+    n_head = 12
+    n_embd = 768
+
+
+class CausalSelfAttention(nn.Module):
+    """
+    A vanilla multi-head masked self-attention layer with a projection at the end.
+    It is possible to use torch.nn.MultiheadAttention here but I am including an
+    explicit implementation here to show that there is nothing too scary here.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+        # key, query, value projections for all heads
+        self.key = nn.Linear(config.n_embd, config.n_embd)
+        self.query = nn.Linear(config.n_embd, config.n_embd)
+        self.value = nn.Linear(config.n_embd, config.n_embd)
+        # regularization
+        self.attn_drop = nn.Dropout(config.attn_pdrop)
+        self.resid_drop = nn.Dropout(config.resid_pdrop)
+        # output projection
+        self.proj = nn.Linear(config.n_embd, config.n_embd)
+        # causal mask to ensure that attention is only applied to the left in the input sequence
+        self.register_buffer(
+            "mask",
+            torch.tril(torch.ones(config.block_size, config.block_size)).view(
+                1, 1, config.block_size, config.block_size
+            ),
+        )
+        self.n_head = config.n_head
+
+    def forward(self, x):
+        (
+            B,
+            T,
+            C,
+        ) = x.size()  # batch size, sequence length, embedding dimensionality (n_embd)
+
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        k = (
+            self.key(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        )  # (B, nh, T, hs)
+        q = (
+            self.query(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        )  # (B, nh, T, hs)
+        v = (
+            self.value(x).view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        )  # (B, nh, T, hs)
+
+        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+        att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
+        att = F.softmax(att, dim=-1)
+        att = self.attn_drop(att)
+        y = att @ v  # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
+        y = (
+            y.transpose(1, 2).contiguous().view(B, T, C)
+        )  # re-assemble all head outputs side by side
+
+        # output projection
+        y = self.resid_drop(self.proj(y))
+        return y
+
+
+class Block(nn.Module):
+    """an unassuming Transformer block"""
+
+    def __init__(self, config):
+        super().__init__()
+        self.ln1 = nn.LayerNorm(config.n_embd)
+        self.ln2 = nn.LayerNorm(config.n_embd)
+        self.attn = CausalSelfAttention(config)
+        self.mlp = nn.Sequential(
+            nn.Linear(config.n_embd, 4 * config.n_embd),
+            nn.GELU(),
+            nn.Linear(4 * config.n_embd, config.n_embd),
+            nn.Dropout(config.resid_pdrop),
+        )
+
+    def forward(self, x):
+        x = x + self.attn(self.ln1(x))
+        x = x + self.mlp(self.ln2(x))
+        return x
+
+
+class GPT(nn.Module):
+    """the full GPT language model, with a context size of block_size"""
+
+    def __init__(self, config: GPTConfig):
+        super().__init__()
+
+        # input embedding stem
+        if config.discrete_input:
+            self.tok_emb = nn.Embedding(config.vocab_size, config.n_embd)
+        else:
+            self.tok_emb = nn.Linear(config.input_size, config.n_embd)
+        self.discrete_input = config.discrete_input
+        self.pos_emb = nn.Parameter(torch.zeros(1, config.block_size, config.n_embd))
+        self.drop = nn.Dropout(config.embd_pdrop)
+        # transformer
+        self.blocks = nn.Sequential(*[Block(config) for _ in range(config.n_layer)])
+        # decoder head
+        self.ln_f = nn.LayerNorm(config.n_embd)
+        self.head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
+
+        self.block_size = config.block_size
+        self.apply(self._init_weights)
+
+        logger.info(
+            "number of parameters: %e", sum(p.numel() for p in self.parameters())
+        )
+
+    def get_block_size(self):
+        return self.block_size
+
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.LayerNorm):
+            torch.nn.init.zeros_(module.bias)
+            torch.nn.init.ones_(module.weight)
+        elif isinstance(module, GPT):
+            torch.nn.init.normal_(module.pos_emb, mean=0.0, std=0.02)
+
+    def configure_optimizers(self, train_config):
+        """
+        This long function is unfortunately doing something very simple and is being very defensive:
+        We are separating out all parameters of the model into two buckets: those that will experience
+        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
+        We are then returning the PyTorch optimizer object.
+        """
+
+        # separate out all parameters to those that will and won't experience regularizing weight decay
+        decay = set()
+        no_decay = set()
+        whitelist_weight_modules = (torch.nn.Linear,)
+        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
+        for mn, m in self.named_modules():
+            for pn, p in m.named_parameters():
+                fpn = "%s.%s" % (mn, pn) if mn else pn  # full param name
+
+                if pn.endswith("bias"):
+                    # all biases will not be decayed
+                    no_decay.add(fpn)
+                elif pn.endswith("weight") and isinstance(m, whitelist_weight_modules):
+                    # weights of whitelist modules will be weight decayed
+                    decay.add(fpn)
+                elif pn.endswith("weight") and isinstance(m, blacklist_weight_modules):
+                    # weights of blacklist modules will NOT be weight decayed
+                    no_decay.add(fpn)
+
+        # special case the position embedding parameter in the root GPT module as not decayed
+        no_decay.add("pos_emb")
+
+        # validate that we considered every parameter
+        param_dict = {pn: p for pn, p in self.named_parameters()}
+        inter_params = decay & no_decay
+        union_params = decay | no_decay
+        assert (
+            len(inter_params) == 0
+        ), "parameters %s made it into both decay/no_decay sets!" % (str(inter_params),)
+        assert (
+            len(param_dict.keys() - union_params) == 0
+        ), "parameters %s were not separated into either decay/no_decay set!" % (
+            str(param_dict.keys() - union_params),
+        )
+
+        # create the pytorch optimizer object
+        optim_groups = [
+            {
+                "params": [param_dict[pn] for pn in sorted(list(decay))],
+                "weight_decay": train_config.weight_decay,
+            },
+            {
+                "params": [param_dict[pn] for pn in sorted(list(no_decay))],
+                "weight_decay": 0.0,
+            },
+        ]
+        optimizer = torch.optim.AdamW(
+            optim_groups, lr=train_config.learning_rate, betas=train_config.betas
+        )
+        return optimizer
+
+    def forward(self, idx, targets=None):
+        if self.discrete_input:
+            b, t = idx.size()
+        else:
+            b, t, dim = idx.size()
+        assert t <= self.block_size, "Cannot forward, model block size is exhausted."
+
+        # forward the GPT model
+        token_embeddings = self.tok_emb(idx)  # each index maps to a (learnable) vector
+        position_embeddings = self.pos_emb[
+            :, :t, :
+        ]  # each position maps to a (learnable) vector
+        x = self.drop(token_embeddings + position_embeddings)
+        x = self.blocks(x)
+        x = self.ln_f(x)
+        logits = self.head(x)
+
+        # if we are given some desired targets also calculate the loss
+        loss = None
+        if targets is not None:
+            loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1))
+
+        return logits, loss
diff --git a/diffusion_policy/model/bet/libraries/mingpt/trainer.py b/diffusion_policy/model/bet/libraries/mingpt/trainer.py
new file mode 100644
index 0000000..64fec8e
--- /dev/null
+++ b/diffusion_policy/model/bet/libraries/mingpt/trainer.py
@@ -0,0 +1,162 @@
+"""
+Simple training loop; Boilerplate that could apply to any arbitrary neural network,
+so nothing in this file really has anything to do with GPT specifically.
+"""
+
+import math
+import logging
+
+from tqdm import tqdm
+import numpy as np
+
+import torch
+import torch.optim as optim
+from torch.optim.lr_scheduler import LambdaLR
+from torch.utils.data.dataloader import DataLoader
+
+logger = logging.getLogger(__name__)
+
+
+class TrainerConfig:
+    # optimization parameters
+    max_epochs = 10
+    batch_size = 64
+    learning_rate = 3e-4
+    betas = (0.9, 0.95)
+    grad_norm_clip = 1.0
+    weight_decay = 0.1  # only applied on matmul weights
+    # learning rate decay params: linear warmup followed by cosine decay to 10% of original
+    lr_decay = False
+    warmup_tokens = 375e6  # these two numbers come from the GPT-3 paper, but may not be good defaults elsewhere
+    final_tokens = 260e9  # (at what point we reach 10% of original LR)
+    # checkpoint settings
+    ckpt_path = None
+    num_workers = 0  # for DataLoader
+
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+
+class Trainer:
+    def __init__(self, model, train_dataset, test_dataset, config):
+        self.model = model
+        self.train_dataset = train_dataset
+        self.test_dataset = test_dataset
+        self.config = config
+
+        # take over whatever gpus are on the system
+        self.device = "cpu"
+        if torch.cuda.is_available():
+            self.device = torch.cuda.current_device()
+            self.model = torch.nn.DataParallel(self.model).to(self.device)
+
+    def save_checkpoint(self):
+        # DataParallel wrappers keep raw model object in .module attribute
+        raw_model = self.model.module if hasattr(self.model, "module") else self.model
+        logger.info("saving %s", self.config.ckpt_path)
+        torch.save(raw_model.state_dict(), self.config.ckpt_path)
+
+    def train(self):
+        model, config = self.model, self.config
+        raw_model = model.module if hasattr(self.model, "module") else model
+        optimizer = raw_model.configure_optimizers(config)
+
+        def run_epoch(loader, is_train):
+            model.train(is_train)
+
+            losses = []
+            pbar = (
+                tqdm(enumerate(loader), total=len(loader))
+                if is_train
+                else enumerate(loader)
+            )
+            for it, (x, y) in pbar:
+
+                # place data on the correct device
+                x = x.to(self.device)
+                y = y.to(self.device)
+
+                # forward the model
+                with torch.set_grad_enabled(is_train):
+                    logits, loss = model(x, y)
+                    loss = (
+                        loss.mean()
+                    )  # collapse all losses if they are scattered on multiple gpus
+                    losses.append(loss.item())
+
+                if is_train:
+
+                    # backprop and update the parameters
+                    model.zero_grad()
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(
+                        model.parameters(), config.grad_norm_clip
+                    )
+                    optimizer.step()
+
+                    # decay the learning rate based on our progress
+                    if config.lr_decay:
+                        self.tokens += (
+                            y >= 0
+                        ).sum()  # number of tokens processed this step (i.e. label is not -100)
+                        if self.tokens < config.warmup_tokens:
+                            # linear warmup
+                            lr_mult = float(self.tokens) / float(
+                                max(1, config.warmup_tokens)
+                            )
+                        else:
+                            # cosine learning rate decay
+                            progress = float(
+                                self.tokens - config.warmup_tokens
+                            ) / float(
+                                max(1, config.final_tokens - config.warmup_tokens)
+                            )
+                            lr_mult = max(
+                                0.1, 0.5 * (1.0 + math.cos(math.pi * progress))
+                            )
+                        lr = config.learning_rate * lr_mult
+                        for param_group in optimizer.param_groups:
+                            param_group["lr"] = lr
+                    else:
+                        lr = config.learning_rate
+
+                    # report progress
+                    pbar.set_description(  # type: ignore
+                        f"epoch {epoch+1} iter {it}: train loss {loss.item():.5f}. lr {lr:e}"
+                    )
+
+            if not is_train:
+                test_loss = float(np.mean(losses))
+                logger.info("test loss: %f", test_loss)
+                return test_loss
+
+        best_loss = float("inf")
+        self.tokens = 0  # counter used for learning rate decay
+
+        train_loader = DataLoader(
+            self.train_dataset,
+            shuffle=True,
+            pin_memory=True,
+            batch_size=config.batch_size,
+            num_workers=config.num_workers,
+        )
+        if self.test_dataset is not None:
+            test_loader = DataLoader(
+                self.test_dataset,
+                shuffle=True,
+                pin_memory=True,
+                batch_size=config.batch_size,
+                num_workers=config.num_workers,
+            )
+
+        for epoch in range(config.max_epochs):
+            run_epoch(train_loader, is_train=True)
+            if self.test_dataset is not None:
+                test_loss = run_epoch(test_loader, is_train=False)
+
+            # supports early stopping based on the test loss, or just save always if no test set is provided
+            good_model = self.test_dataset is None or test_loss < best_loss
+            if self.config.ckpt_path is not None and good_model:
+                best_loss = test_loss
+                self.save_checkpoint()
diff --git a/diffusion_policy/model/bet/libraries/mingpt/utils.py b/diffusion_policy/model/bet/libraries/mingpt/utils.py
new file mode 100644
index 0000000..620b9d8
--- /dev/null
+++ b/diffusion_policy/model/bet/libraries/mingpt/utils.py
@@ -0,0 +1,52 @@
+import random
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+
+
+def top_k_logits(logits, k):
+    v, ix = torch.topk(logits, k)
+    out = logits.clone()
+    out[out < v[:, [-1]]] = -float("Inf")
+    return out
+
+
+@torch.no_grad()
+def sample(model, x, steps, temperature=1.0, sample=False, top_k=None):
+    """
+    take a conditioning sequence of indices in x (of shape (b,t)) and predict the next token in
+    the sequence, feeding the predictions back into the model each time. Clearly the sampling
+    has quadratic complexity unlike an RNN that is only linear, and has a finite context window
+    of block_size, unlike an RNN that has an infinite context window.
+    """
+    block_size = model.get_block_size()
+    model.eval()
+    for k in range(steps):
+        x_cond = (
+            x if x.size(1) <= block_size else x[:, -block_size:]
+        )  # crop context if needed
+        logits, _ = model(x_cond)
+        # pluck the logits at the final step and scale by temperature
+        logits = logits[:, -1, :] / temperature
+        # optionally crop probabilities to only the top k options
+        if top_k is not None:
+            logits = top_k_logits(logits, top_k)
+        # apply softmax to convert to probabilities
+        probs = F.softmax(logits, dim=-1)
+        # sample from the distribution or take the most likely
+        if sample:
+            ix = torch.multinomial(probs, num_samples=1)
+        else:
+            _, ix = torch.topk(probs, k=1, dim=-1)
+        # append to the sequence and continue
+        x = torch.cat((x, ix), dim=1)
+
+    return x
diff --git a/diffusion_policy/model/bet/utils.py b/diffusion_policy/model/bet/utils.py
new file mode 100644
index 0000000..e021a0b
--- /dev/null
+++ b/diffusion_policy/model/bet/utils.py
@@ -0,0 +1,131 @@
+import os
+import random
+from collections import OrderedDict
+from typing import List, Optional
+
+import einops
+import numpy as np
+import torch
+import torch.nn as nn
+
+from torch.utils.data import random_split
+import wandb
+
+
+def mlp(input_dim, hidden_dim, output_dim, hidden_depth, output_mod=None):
+    if hidden_depth == 0:
+        mods = [nn.Linear(input_dim, output_dim)]
+    else:
+        mods = [nn.Linear(input_dim, hidden_dim), nn.ReLU(inplace=True)]
+        for i in range(hidden_depth - 1):
+            mods += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU(inplace=True)]
+        mods.append(nn.Linear(hidden_dim, output_dim))
+    if output_mod is not None:
+        mods.append(output_mod)
+    trunk = nn.Sequential(*mods)
+    return trunk
+
+
+class eval_mode:
+    def __init__(self, *models, no_grad=False):
+        self.models = models
+        self.no_grad = no_grad
+        self.no_grad_context = torch.no_grad()
+
+    def __enter__(self):
+        self.prev_states = []
+        for model in self.models:
+            self.prev_states.append(model.training)
+            model.train(False)
+        if self.no_grad:
+            self.no_grad_context.__enter__()
+
+    def __exit__(self, *args):
+        if self.no_grad:
+            self.no_grad_context.__exit__(*args)
+        for model, state in zip(self.models, self.prev_states):
+            model.train(state)
+        return False
+
+
+def freeze_module(module: nn.Module) -> nn.Module:
+    for param in module.parameters():
+        param.requires_grad = False
+    module.eval()
+    return module
+
+
+def set_seed_everywhere(seed):
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
+    np.random.seed(seed)
+    random.seed(seed)
+
+
+def shuffle_along_axis(a, axis):
+    idx = np.random.rand(*a.shape).argsort(axis=axis)
+    return np.take_along_axis(a, idx, axis=axis)
+
+
+def transpose_batch_timestep(*args):
+    return (einops.rearrange(arg, "b t ... -> t b ...") for arg in args)
+
+
+class TrainWithLogger:
+    def reset_log(self):
+        self.log_components = OrderedDict()
+
+    def log_append(self, log_key, length, loss_components):
+        for key, value in loss_components.items():
+            key_name = f"{log_key}/{key}"
+            count, sum = self.log_components.get(key_name, (0, 0.0))
+            self.log_components[key_name] = (
+                count + length,
+                sum + (length * value.detach().cpu().item()),
+            )
+
+    def flush_log(self, epoch, iterator=None):
+        log_components = OrderedDict()
+        iterator_log_component = OrderedDict()
+        for key, value in self.log_components.items():
+            count, sum = value
+            to_log = sum / count
+            log_components[key] = to_log
+            # Set the iterator status
+            log_key, name_key = key.split("/")
+            iterator_log_name = f"{log_key[0]}{name_key[0]}".upper()
+            iterator_log_component[iterator_log_name] = to_log
+        postfix = ",".join(
+            "{}:{:.2e}".format(key, iterator_log_component[key])
+            for key in iterator_log_component.keys()
+        )
+        if iterator is not None:
+            iterator.set_postfix_str(postfix)
+        wandb.log(log_components, step=epoch)
+        self.log_components = OrderedDict()
+
+
+class SaveModule(nn.Module):
+    def set_snapshot_path(self, path):
+        self.snapshot_path = path
+        print(f"Setting snapshot path to {self.snapshot_path}")
+
+    def save_snapshot(self):
+        os.makedirs(self.snapshot_path, exist_ok=True)
+        torch.save(self.state_dict(), self.snapshot_path / "snapshot.pth")
+
+    def load_snapshot(self):
+        self.load_state_dict(torch.load(self.snapshot_path / "snapshot.pth"))
+
+
+def split_datasets(dataset, train_fraction=0.95, random_seed=42):
+    dataset_length = len(dataset)
+    lengths = [
+        int(train_fraction * dataset_length),
+        dataset_length - int(train_fraction * dataset_length),
+    ]
+    train_set, val_set = random_split(
+        dataset, lengths, generator=torch.Generator().manual_seed(random_seed)
+    )
+    return train_set, val_set
diff --git a/diffusion_policy/model/common/dict_of_tensor_mixin.py b/diffusion_policy/model/common/dict_of_tensor_mixin.py
new file mode 100644
index 0000000..9d08bc1
--- /dev/null
+++ b/diffusion_policy/model/common/dict_of_tensor_mixin.py
@@ -0,0 +1,38 @@
+import torch
+import torch.nn as nn
+
+class DictOfTensorMixin(nn.Module):
+    def __init__(self, params_dict=None):
+        super().__init__()
+        if params_dict is None:
+            params_dict = nn.ParameterDict()
+        self.params_dict = params_dict
+
+    @property
+    def device(self):
+        return next(iter(self.parameters())).device
+
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+        def dfs_add(dest, keys, value: torch.Tensor):
+            if len(keys) == 1:
+                dest[keys[0]] = value
+                return
+
+            if keys[0] not in dest:
+                dest[keys[0]] = nn.ParameterDict()
+            dfs_add(dest[keys[0]], keys[1:], value)
+
+        def load_dict(state_dict, prefix):
+            out_dict = nn.ParameterDict()
+            for key, value in state_dict.items():
+                value: torch.Tensor
+                if key.startswith(prefix):
+                    param_keys = key[len(prefix):].split('.')[1:]
+                    # if len(param_keys) == 0:
+                    #     import pdb; pdb.set_trace()
+                    dfs_add(out_dict, param_keys, value.clone())
+            return out_dict
+
+        self.params_dict = load_dict(state_dict, prefix + 'params_dict')
+        self.params_dict.requires_grad_(False)
+        return 
diff --git a/diffusion_policy/model/common/lr_scheduler.py b/diffusion_policy/model/common/lr_scheduler.py
new file mode 100644
index 0000000..f4c0533
--- /dev/null
+++ b/diffusion_policy/model/common/lr_scheduler.py
@@ -0,0 +1,46 @@
+from diffusers.optimization import (
+    Union, SchedulerType, Optional,
+    Optimizer, TYPE_TO_SCHEDULER_FUNCTION
+)
+
+def get_scheduler(
+    name: Union[str, SchedulerType],
+    optimizer: Optimizer,
+    num_warmup_steps: Optional[int] = None,
+    num_training_steps: Optional[int] = None,
+    **kwargs
+):
+    """
+    Added kwargs vs diffuser's original implementation
+
+    Unified API to get any scheduler from its name.
+
+    Args:
+        name (`str` or `SchedulerType`):
+            The name of the scheduler to use.
+        optimizer (`torch.optim.Optimizer`):
+            The optimizer that will be used during training.
+        num_warmup_steps (`int`, *optional*):
+            The number of warmup steps to do. This is not required by all schedulers (hence the argument being
+            optional), the function will raise an error if it's unset and the scheduler type requires it.
+        num_training_steps (`int``, *optional*):
+            The number of training steps to do. This is not required by all schedulers (hence the argument being
+            optional), the function will raise an error if it's unset and the scheduler type requires it.
+    """
+    name = SchedulerType(name)
+    schedule_func = TYPE_TO_SCHEDULER_FUNCTION[name]
+    if name == SchedulerType.CONSTANT:
+        return schedule_func(optimizer, **kwargs)
+
+    # All other schedulers require `num_warmup_steps`
+    if num_warmup_steps is None:
+        raise ValueError(f"{name} requires `num_warmup_steps`, please provide that argument.")
+
+    if name == SchedulerType.CONSTANT_WITH_WARMUP:
+        return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, **kwargs)
+
+    # All other schedulers require `num_training_steps`
+    if num_training_steps is None:
+        raise ValueError(f"{name} requires `num_training_steps`, please provide that argument.")
+
+    return schedule_func(optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=num_training_steps, **kwargs)
diff --git a/diffusion_policy/model/common/module_attr_mixin.py b/diffusion_policy/model/common/module_attr_mixin.py
new file mode 100644
index 0000000..8cbdf70
--- /dev/null
+++ b/diffusion_policy/model/common/module_attr_mixin.py
@@ -0,0 +1,14 @@
+import torch.nn as nn
+
+class ModuleAttrMixin(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self._dummy_variable = nn.Parameter()
+
+    @property
+    def device(self):
+        return next(iter(self.parameters())).device
+    
+    @property
+    def dtype(self):
+        return next(iter(self.parameters())).dtype
diff --git a/diffusion_policy/model/common/normalizer.py b/diffusion_policy/model/common/normalizer.py
new file mode 100644
index 0000000..a264692
--- /dev/null
+++ b/diffusion_policy/model/common/normalizer.py
@@ -0,0 +1,353 @@
+from typing import Union, Dict
+
+import unittest
+import zarr
+import numpy as np
+import torch
+import torch.nn as nn
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.model.common.dict_of_tensor_mixin import DictOfTensorMixin
+
+
+class LinearNormalizer(DictOfTensorMixin):
+    avaliable_modes = ['limits', 'gaussian']
+    
+    @torch.no_grad()
+    def fit(self,
+        data: Union[Dict, torch.Tensor, np.ndarray, zarr.Array],
+        last_n_dims=1,
+        dtype=torch.float32,
+        mode='limits',
+        output_max=1.,
+        output_min=-1.,
+        range_eps=1e-4,
+        fit_offset=True):
+        if isinstance(data, dict):
+            for key, value in data.items():
+                self.params_dict[key] =  _fit(value, 
+                    last_n_dims=last_n_dims,
+                    dtype=dtype,
+                    mode=mode,
+                    output_max=output_max,
+                    output_min=output_min,
+                    range_eps=range_eps,
+                    fit_offset=fit_offset)
+        else:
+            self.params_dict['_default'] = _fit(data, 
+                    last_n_dims=last_n_dims,
+                    dtype=dtype,
+                    mode=mode,
+                    output_max=output_max,
+                    output_min=output_min,
+                    range_eps=range_eps,
+                    fit_offset=fit_offset)
+    
+    def __call__(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return self.normalize(x)
+    
+    def __getitem__(self, key: str):
+        return SingleFieldLinearNormalizer(self.params_dict[key])
+
+    def __setitem__(self, key: str , value: 'SingleFieldLinearNormalizer'):
+        self.params_dict[key] = value.params_dict
+
+    def _normalize_impl(self, x, forward=True):
+        if isinstance(x, dict):
+            result = dict()
+            for key, value in x.items():
+                params = self.params_dict[key]
+                result[key] = _normalize(value, params, forward=forward)
+            return result
+        else:
+            if '_default' not in self.params_dict:
+                raise RuntimeError("Not initialized")
+            params = self.params_dict['_default']
+            return _normalize(x, params, forward=forward)
+
+    def normalize(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return self._normalize_impl(x, forward=True)
+
+    def unnormalize(self, x: Union[Dict, torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return self._normalize_impl(x, forward=False)
+
+    def get_input_stats(self) -> Dict:
+        if len(self.params_dict) == 0:
+            raise RuntimeError("Not initialized")
+        if len(self.params_dict) == 1 and '_default' in self.params_dict:
+            return self.params_dict['_default']['input_stats']
+        
+        result = dict()
+        for key, value in self.params_dict.items():
+            if key != '_default':
+                result[key] = value['input_stats']
+        return result
+
+
+    def get_output_stats(self, key='_default'):
+        input_stats = self.get_input_stats()
+        if 'min' in input_stats:
+            # no dict
+            return dict_apply(input_stats, self.normalize)
+        
+        result = dict()
+        for key, group in input_stats.items():
+            this_dict = dict()
+            for name, value in group.items():
+                this_dict[name] = self.normalize({key:value})[key]
+            result[key] = this_dict
+        return result
+
+
+class SingleFieldLinearNormalizer(DictOfTensorMixin):
+    avaliable_modes = ['limits', 'gaussian']
+    
+    @torch.no_grad()
+    def fit(self,
+            data: Union[torch.Tensor, np.ndarray, zarr.Array],
+            last_n_dims=1,
+            dtype=torch.float32,
+            mode='limits',
+            output_max=1.,
+            output_min=-1.,
+            range_eps=1e-4,
+            fit_offset=True):
+        self.params_dict = _fit(data, 
+            last_n_dims=last_n_dims,
+            dtype=dtype,
+            mode=mode,
+            output_max=output_max,
+            output_min=output_min,
+            range_eps=range_eps,
+            fit_offset=fit_offset)
+    
+    @classmethod
+    def create_fit(cls, data: Union[torch.Tensor, np.ndarray, zarr.Array], **kwargs):
+        obj = cls()
+        obj.fit(data, **kwargs)
+        return obj
+    
+    @classmethod
+    def create_manual(cls, 
+            scale: Union[torch.Tensor, np.ndarray], 
+            offset: Union[torch.Tensor, np.ndarray],
+            input_stats_dict: Dict[str, Union[torch.Tensor, np.ndarray]]):
+        def to_tensor(x):
+            if not isinstance(x, torch.Tensor):
+                x = torch.from_numpy(x)
+            x = x.flatten()
+            return x
+        
+        # check
+        for x in [offset] + list(input_stats_dict.values()):
+            assert x.shape == scale.shape
+            assert x.dtype == scale.dtype
+        
+        params_dict = nn.ParameterDict({
+            'scale': to_tensor(scale),
+            'offset': to_tensor(offset),
+            'input_stats': nn.ParameterDict(
+                dict_apply(input_stats_dict, to_tensor))
+        })
+        return cls(params_dict)
+
+    @classmethod
+    def create_identity(cls, dtype=torch.float32):
+        scale = torch.tensor([1], dtype=dtype)
+        offset = torch.tensor([0], dtype=dtype)
+        input_stats_dict = {
+            'min': torch.tensor([-1], dtype=dtype),
+            'max': torch.tensor([1], dtype=dtype),
+            'mean': torch.tensor([0], dtype=dtype),
+            'std': torch.tensor([1], dtype=dtype)
+        }
+        return cls.create_manual(scale, offset, input_stats_dict)
+
+    def normalize(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return _normalize(x, self.params_dict, forward=True)
+
+    def unnormalize(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return _normalize(x, self.params_dict, forward=False)
+
+    def get_input_stats(self):
+        return self.params_dict['input_stats']
+
+    def get_output_stats(self):
+        return dict_apply(self.params_dict['input_stats'], self.normalize)
+
+    def __call__(self, x: Union[torch.Tensor, np.ndarray]) -> torch.Tensor:
+        return self.normalize(x)
+
+
+
+def _fit(data: Union[torch.Tensor, np.ndarray, zarr.Array],
+        last_n_dims=1,
+        dtype=torch.float32,
+        mode='limits',
+        output_max=1.,
+        output_min=-1.,
+        range_eps=1e-4,
+        fit_offset=True):
+    assert mode in ['limits', 'gaussian']
+    assert last_n_dims >= 0
+    assert output_max > output_min
+
+    # convert data to torch and type
+    if isinstance(data, zarr.Array):
+        data = data[:]
+    if isinstance(data, np.ndarray):
+        data = torch.from_numpy(data)
+    if dtype is not None:
+        data = data.type(dtype)
+
+    # convert shape
+    dim = 1
+    if last_n_dims > 0:
+        dim = np.prod(data.shape[-last_n_dims:])
+    data = data.reshape(-1,dim)
+
+    # compute input stats min max mean std
+    input_min, _ = data.min(axis=0)
+    input_max, _ = data.max(axis=0)
+    input_mean = data.mean(axis=0)
+    input_std = data.std(axis=0)
+
+    # compute scale and offset
+    if mode == 'limits':
+        if fit_offset:
+            # unit scale
+            input_range = input_max - input_min
+            ignore_dim = input_range < range_eps
+            input_range[ignore_dim] = output_max - output_min
+            scale = (output_max - output_min) / input_range
+            offset = output_min - scale * input_min
+            offset[ignore_dim] = (output_max + output_min) / 2 - input_min[ignore_dim]
+            # ignore dims scaled to mean of output max and min
+        else:
+            # use this when data is pre-zero-centered.
+            assert output_max > 0
+            assert output_min < 0
+            # unit abs
+            output_abs = min(abs(output_min), abs(output_max))
+            input_abs = torch.maximum(torch.abs(input_min), torch.abs(input_max))
+            ignore_dim = input_abs < range_eps
+            input_abs[ignore_dim] = output_abs
+            # don't scale constant channels 
+            scale = output_abs / input_abs
+            offset = torch.zeros_like(input_mean)
+    elif mode == 'gaussian':
+        ignore_dim = input_std < range_eps
+        scale = input_std.clone()
+        scale[ignore_dim] = 1
+        scale = 1 / scale
+
+        if fit_offset:
+            offset = - input_mean * scale
+        else:
+            offset = torch.zeros_like(input_mean)
+    
+    # save
+    this_params = nn.ParameterDict({
+        'scale': scale,
+        'offset': offset,
+        'input_stats': nn.ParameterDict({
+            'min': input_min,
+            'max': input_max,
+            'mean': input_mean,
+            'std': input_std
+        })
+    })
+    for p in this_params.parameters():
+        p.requires_grad_(False)
+    return this_params
+
+
+def _normalize(x, params, forward=True):
+    assert 'scale' in params
+    if isinstance(x, np.ndarray):
+        x = torch.from_numpy(x)
+    scale = params['scale']
+    offset = params['offset']
+    x = x.to(device=scale.device, dtype=scale.dtype)
+    src_shape = x.shape
+    x = x.reshape(-1, scale.shape[0])
+    if forward:
+        x = x * scale + offset
+    else:
+        x = (x - offset) / scale
+    x = x.reshape(src_shape)
+    return x
+
+
+def test():
+    data = torch.zeros((100,10,9,2)).uniform_()
+    data[...,0,0] = 0
+
+    normalizer = SingleFieldLinearNormalizer()
+    normalizer.fit(data, mode='limits', last_n_dims=2)
+    datan = normalizer.normalize(data)
+    assert datan.shape == data.shape
+    assert np.allclose(datan.max(), 1.)
+    assert np.allclose(datan.min(), -1.)
+    dataun = normalizer.unnormalize(datan)
+    assert torch.allclose(data, dataun, atol=1e-7)
+
+    input_stats = normalizer.get_input_stats()
+    output_stats = normalizer.get_output_stats()
+
+    normalizer = SingleFieldLinearNormalizer()
+    normalizer.fit(data, mode='limits', last_n_dims=1, fit_offset=False)
+    datan = normalizer.normalize(data)
+    assert datan.shape == data.shape
+    assert np.allclose(datan.max(), 1., atol=1e-3)
+    assert np.allclose(datan.min(), 0., atol=1e-3)
+    dataun = normalizer.unnormalize(datan)
+    assert torch.allclose(data, dataun, atol=1e-7)
+
+    data = torch.zeros((100,10,9,2)).uniform_()
+    normalizer = SingleFieldLinearNormalizer()
+    normalizer.fit(data, mode='gaussian', last_n_dims=0)
+    datan = normalizer.normalize(data)
+    assert datan.shape == data.shape
+    assert np.allclose(datan.mean(), 0., atol=1e-3)
+    assert np.allclose(datan.std(), 1., atol=1e-3)
+    dataun = normalizer.unnormalize(datan)
+    assert torch.allclose(data, dataun, atol=1e-7)
+
+
+    # dict
+    data = torch.zeros((100,10,9,2)).uniform_()
+    data[...,0,0] = 0
+
+    normalizer = LinearNormalizer()
+    normalizer.fit(data, mode='limits', last_n_dims=2)
+    datan = normalizer.normalize(data)
+    assert datan.shape == data.shape
+    assert np.allclose(datan.max(), 1.)
+    assert np.allclose(datan.min(), -1.)
+    dataun = normalizer.unnormalize(datan)
+    assert torch.allclose(data, dataun, atol=1e-7)
+
+    input_stats = normalizer.get_input_stats()
+    output_stats = normalizer.get_output_stats()
+
+    data = {
+        'obs': torch.zeros((1000,128,9,2)).uniform_() * 512,
+        'action': torch.zeros((1000,128,2)).uniform_() * 512
+    }
+    normalizer = LinearNormalizer()
+    normalizer.fit(data)
+    datan = normalizer.normalize(data)
+    dataun = normalizer.unnormalize(datan)
+    for key in data:
+        assert torch.allclose(data[key], dataun[key], atol=1e-4)
+    
+    input_stats = normalizer.get_input_stats()
+    output_stats = normalizer.get_output_stats()
+
+    state_dict = normalizer.state_dict()
+    n = LinearNormalizer()
+    n.load_state_dict(state_dict)
+    datan = n.normalize(data)
+    dataun = n.unnormalize(datan)
+    for key in data:
+        assert torch.allclose(data[key], dataun[key], atol=1e-4)
diff --git a/diffusion_policy/model/common/rotation_transformer.py b/diffusion_policy/model/common/rotation_transformer.py
new file mode 100644
index 0000000..788928b
--- /dev/null
+++ b/diffusion_policy/model/common/rotation_transformer.py
@@ -0,0 +1,103 @@
+from typing import Union
+import pytorch3d.transforms as pt
+import torch
+import numpy as np
+import functools
+
+class RotationTransformer:
+    valid_reps = [
+        'axis_angle',
+        'euler_angles',
+        'quaternion',
+        'rotation_6d',
+        'matrix'
+    ]
+
+    def __init__(self, 
+            from_rep='axis_angle', 
+            to_rep='rotation_6d', 
+            from_convention=None,
+            to_convention=None):
+        """
+        Valid representations
+
+        Alwasy use matrix as intermediate representation.
+        """
+        assert from_rep != to_rep
+        assert from_rep in self.valid_reps
+        assert to_rep in self.valid_reps
+        if from_rep == 'euler_angles':
+            assert from_convention is not None
+        if to_rep == 'euler_angles':
+            assert to_convention is not None
+
+        forward_funcs = list()
+        inverse_funcs = list()
+
+        if from_rep != 'matrix':
+            funcs = [
+                getattr(pt, f'{from_rep}_to_matrix'),
+                getattr(pt, f'matrix_to_{from_rep}')
+            ]
+            if from_convention is not None:
+                funcs = [functools.partial(func, convernsion=from_convention) 
+                    for func in funcs]
+            forward_funcs.append(funcs[0])
+            inverse_funcs.append(funcs[1])
+
+        if to_rep != 'matrix':
+            funcs = [
+                getattr(pt, f'matrix_to_{to_rep}'),
+                getattr(pt, f'{to_rep}_to_matrix')
+            ]
+            if to_convention is not None:
+                funcs = [functools.partial(func, convernsion=to_convention) 
+                    for func in funcs]
+            forward_funcs.append(funcs[0])
+            inverse_funcs.append(funcs[1])
+        
+        inverse_funcs = inverse_funcs[::-1]
+        
+        self.forward_funcs = forward_funcs
+        self.inverse_funcs = inverse_funcs
+
+    @staticmethod
+    def _apply_funcs(x: Union[np.ndarray, torch.Tensor], funcs: list) -> Union[np.ndarray, torch.Tensor]:
+        x_ = x
+        if isinstance(x, np.ndarray):
+            x_ = torch.from_numpy(x)
+        x_: torch.Tensor
+        for func in funcs:
+            x_ = func(x_)
+        y = x_
+        if isinstance(x, np.ndarray):
+            y = x_.numpy()
+        return y
+        
+    def forward(self, x: Union[np.ndarray, torch.Tensor]
+        ) -> Union[np.ndarray, torch.Tensor]:
+        return self._apply_funcs(x, self.forward_funcs)
+    
+    def inverse(self, x: Union[np.ndarray, torch.Tensor]
+        ) -> Union[np.ndarray, torch.Tensor]:
+        return self._apply_funcs(x, self.inverse_funcs)
+
+
+def test():
+    tf = RotationTransformer()
+
+    rotvec = np.random.uniform(-2*np.pi,2*np.pi,size=(1000,3))
+    rot6d = tf.forward(rotvec)
+    new_rotvec = tf.inverse(rot6d)
+
+    from scipy.spatial.transform import Rotation
+    diff = Rotation.from_rotvec(rotvec) * Rotation.from_rotvec(new_rotvec).inv()
+    dist = diff.magnitude()
+    assert dist.max() < 1e-7
+
+    tf = RotationTransformer('rotation_6d', 'matrix')
+    rot6d_wrong = rot6d + np.random.normal(scale=0.1, size=rot6d.shape)
+    mat = tf.forward(rot6d_wrong)
+    mat_det = np.linalg.det(mat)
+    assert np.allclose(mat_det, 1)
+    # rotaiton_6d will be normalized to rotation matrix
diff --git a/diffusion_policy/model/common/shape_util.py b/diffusion_policy/model/common/shape_util.py
new file mode 100644
index 0000000..e1786c1
--- /dev/null
+++ b/diffusion_policy/model/common/shape_util.py
@@ -0,0 +1,23 @@
+from typing import Dict, List, Tuple, Callable
+import torch
+import torch.nn as nn
+
+def get_module_device(m: nn.Module):
+    device = torch.device('cpu')
+    try:
+        param = next(iter(m.parameters()))
+        device = param.device
+    except StopIteration:
+        pass
+    return device
+
+@torch.no_grad()
+def get_output_shape(
+        input_shape: Tuple[int],
+        net: Callable[[torch.Tensor], torch.Tensor]
+    ):  
+        device = get_module_device(net)
+        test_input = torch.zeros((1,)+tuple(input_shape), device=device)
+        test_output = net(test_input)
+        output_shape = tuple(test_output.shape[1:])
+        return output_shape
diff --git a/diffusion_policy/model/common/tensor_util.py b/diffusion_policy/model/common/tensor_util.py
new file mode 100644
index 0000000..7d6cbff
--- /dev/null
+++ b/diffusion_policy/model/common/tensor_util.py
@@ -0,0 +1,960 @@
+"""
+A collection of utilities for working with nested tensor structures consisting
+of numpy arrays and torch tensors.
+"""
+import collections
+import numpy as np
+import torch
+
+
+def recursive_dict_list_tuple_apply(x, type_func_dict):
+    """
+    Recursively apply functions to a nested dictionary or list or tuple, given a dictionary of 
+    {data_type: function_to_apply}.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        type_func_dict (dict): a mapping from data types to the functions to be 
+            applied for each data type.
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    assert(list not in type_func_dict)
+    assert(tuple not in type_func_dict)
+    assert(dict not in type_func_dict)
+
+    if isinstance(x, (dict, collections.OrderedDict)):
+        new_x = collections.OrderedDict() if isinstance(x, collections.OrderedDict) else dict()
+        for k, v in x.items():
+            new_x[k] = recursive_dict_list_tuple_apply(v, type_func_dict)
+        return new_x
+    elif isinstance(x, (list, tuple)):
+        ret = [recursive_dict_list_tuple_apply(v, type_func_dict) for v in x]
+        if isinstance(x, tuple):
+            ret = tuple(ret)
+        return ret
+    else:
+        for t, f in type_func_dict.items():
+            if isinstance(x, t):
+                return f(x)
+        else:
+            raise NotImplementedError(
+                'Cannot handle data type %s' % str(type(x)))
+
+
+def map_tensor(x, func):
+    """
+    Apply function @func to torch.Tensor objects in a nested dictionary or
+    list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        func (function): function to apply to each tensor
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: func,
+            type(None): lambda x: x,
+        }
+    )
+
+
+def map_ndarray(x, func):
+    """
+    Apply function @func to np.ndarray objects in a nested dictionary or
+    list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        func (function): function to apply to each array
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            np.ndarray: func,
+            type(None): lambda x: x,
+        }
+    )
+
+
+def map_tensor_ndarray(x, tensor_func, ndarray_func):
+    """
+    Apply function @tensor_func to torch.Tensor objects and @ndarray_func to 
+    np.ndarray objects in a nested dictionary or list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        tensor_func (function): function to apply to each tensor
+        ndarray_Func (function): function to apply to each array
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: tensor_func,
+            np.ndarray: ndarray_func,
+            type(None): lambda x: x,
+        }
+    )
+
+
+def clone(x):
+    """
+    Clones all torch tensors and numpy arrays in nested dictionary or list
+    or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.clone(),
+            np.ndarray: lambda x: x.copy(),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def detach(x):
+    """
+    Detaches all torch tensors in nested dictionary or list
+    or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.detach(),
+        }
+    )
+
+
+def to_batch(x):
+    """
+    Introduces a leading batch dimension of 1 for all torch tensors and numpy 
+    arrays in nested dictionary or list or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x[None, ...],
+            np.ndarray: lambda x: x[None, ...],
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_sequence(x):
+    """
+    Introduces a time dimension of 1 at dimension 1 for all torch tensors and numpy 
+    arrays in nested dictionary or list or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x[:, None, ...],
+            np.ndarray: lambda x: x[:, None, ...],
+            type(None): lambda x: x,
+        }
+    )
+
+
+def index_at_time(x, ind):
+    """
+    Indexes all torch tensors and numpy arrays in dimension 1 with index @ind in
+    nested dictionary or list or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        ind (int): index
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x[:, ind, ...],
+            np.ndarray: lambda x: x[:, ind, ...],
+            type(None): lambda x: x,
+        }
+    )
+
+
+def unsqueeze(x, dim):
+    """
+    Adds dimension of size 1 at dimension @dim in all torch tensors and numpy arrays
+    in nested dictionary or list or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        dim (int): dimension
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.unsqueeze(dim=dim),
+            np.ndarray: lambda x: np.expand_dims(x, axis=dim),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def contiguous(x):
+    """
+    Makes all torch tensors and numpy arrays contiguous in nested dictionary or 
+    list or tuple and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.contiguous(),
+            np.ndarray: lambda x: np.ascontiguousarray(x),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_device(x, device):
+    """
+    Sends all torch tensors in nested dictionary or list or tuple to device
+    @device, and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        device (torch.Device): device to send tensors to
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x, d=device: x.to(d),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_tensor(x):
+    """
+    Converts all numpy arrays in nested dictionary or list or tuple to
+    torch tensors (and leaves existing torch Tensors as-is), and returns 
+    a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x,
+            np.ndarray: lambda x: torch.from_numpy(x),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_numpy(x):
+    """
+    Converts all torch tensors in nested dictionary or list or tuple to
+    numpy (and leaves existing numpy arrays as-is), and returns 
+    a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    def f(tensor):
+        if tensor.is_cuda:
+            return tensor.detach().cpu().numpy()
+        else:
+            return tensor.detach().numpy()
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: f,
+            np.ndarray: lambda x: x,
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_list(x):
+    """
+    Converts all torch tensors and numpy arrays in nested dictionary or list 
+    or tuple to a list, and returns a new nested structure. Useful for
+    json encoding.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    def f(tensor):
+        if tensor.is_cuda:
+            return tensor.detach().cpu().numpy().tolist()
+        else:
+            return tensor.detach().numpy().tolist()
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: f,
+            np.ndarray: lambda x: x.tolist(),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_float(x):
+    """
+    Converts all torch tensors and numpy arrays in nested dictionary or list 
+    or tuple to float type entries, and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.float(),
+            np.ndarray: lambda x: x.astype(np.float32),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_uint8(x):
+    """
+    Converts all torch tensors and numpy arrays in nested dictionary or list 
+    or tuple to uint8 type entries, and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.byte(),
+            np.ndarray: lambda x: x.astype(np.uint8),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def to_torch(x, device):
+    """
+    Converts all numpy arrays and torch tensors in nested dictionary or list or tuple to 
+    torch tensors on device @device and returns a new nested structure.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        device (torch.Device): device to send tensors to
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return to_device(to_float(to_tensor(x)), device)
+
+
+def to_one_hot_single(tensor, num_class):
+    """
+    Convert tensor to one-hot representation, assuming a certain number of total class labels.
+
+    Args:
+        tensor (torch.Tensor): tensor containing integer labels
+        num_class (int): number of classes
+
+    Returns:
+        x (torch.Tensor): tensor containing one-hot representation of labels
+    """
+    x = torch.zeros(tensor.size() + (num_class,)).to(tensor.device)
+    x.scatter_(-1, tensor.unsqueeze(-1), 1)
+    return x
+
+
+def to_one_hot(tensor, num_class):
+    """
+    Convert all tensors in nested dictionary or list or tuple to one-hot representation, 
+    assuming a certain number of total class labels.
+
+    Args:
+        tensor (dict or list or tuple): a possibly nested dictionary or list or tuple
+        num_class (int): number of classes
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return map_tensor(tensor, func=lambda x, nc=num_class: to_one_hot_single(x, nc))
+
+
+def flatten_single(x, begin_axis=1):
+    """
+    Flatten a tensor in all dimensions from @begin_axis onwards.
+
+    Args:
+        x (torch.Tensor): tensor to flatten
+        begin_axis (int): which axis to flatten from
+
+    Returns:
+        y (torch.Tensor): flattened tensor
+    """
+    fixed_size = x.size()[:begin_axis]
+    _s = list(fixed_size) + [-1]
+    return x.reshape(*_s)
+
+
+def flatten(x, begin_axis=1):
+    """
+    Flatten all tensors in nested dictionary or list or tuple, from @begin_axis onwards.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        begin_axis (int): which axis to flatten from
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x, b=begin_axis: flatten_single(x, begin_axis=b),
+        }
+    )
+
+
+def reshape_dimensions_single(x, begin_axis, end_axis, target_dims):
+    """
+    Reshape selected dimensions in a tensor to a target dimension.
+
+    Args:
+        x (torch.Tensor): tensor to reshape
+        begin_axis (int): begin dimension
+        end_axis (int): end dimension
+        target_dims (tuple or list): target shape for the range of dimensions
+            (@begin_axis, @end_axis)
+
+    Returns:
+        y (torch.Tensor): reshaped tensor
+    """
+    assert(begin_axis <= end_axis)
+    assert(begin_axis >= 0)
+    assert(end_axis < len(x.shape))
+    assert(isinstance(target_dims, (tuple, list)))
+    s = x.shape
+    final_s = []
+    for i in range(len(s)):
+        if i == begin_axis:
+            final_s.extend(target_dims)
+        elif i < begin_axis or i > end_axis:
+            final_s.append(s[i])
+    return x.reshape(*final_s)
+
+
+def reshape_dimensions(x, begin_axis, end_axis, target_dims):
+    """
+    Reshape selected dimensions for all tensors in nested dictionary or list or tuple 
+    to a target dimension.
+    
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        begin_axis (int): begin dimension
+        end_axis (int): end dimension
+        target_dims (tuple or list): target shape for the range of dimensions
+            (@begin_axis, @end_axis)
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x, b=begin_axis, e=end_axis, t=target_dims: reshape_dimensions_single(
+                x, begin_axis=b, end_axis=e, target_dims=t),
+            np.ndarray: lambda x, b=begin_axis, e=end_axis, t=target_dims: reshape_dimensions_single(
+                x, begin_axis=b, end_axis=e, target_dims=t),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def join_dimensions(x, begin_axis, end_axis):
+    """
+    Joins all dimensions between dimensions (@begin_axis, @end_axis) into a flat dimension, for
+    all tensors in nested dictionary or list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        begin_axis (int): begin dimension
+        end_axis (int): end dimension
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x, b=begin_axis, e=end_axis: reshape_dimensions_single(
+                x, begin_axis=b, end_axis=e, target_dims=[-1]),
+            np.ndarray: lambda x, b=begin_axis, e=end_axis: reshape_dimensions_single(
+                x, begin_axis=b, end_axis=e, target_dims=[-1]),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def expand_at_single(x, size, dim):
+    """
+    Expand a tensor at a single dimension @dim by @size
+
+    Args:
+        x (torch.Tensor): input tensor
+        size (int): size to expand
+        dim (int): dimension to expand
+
+    Returns:
+        y (torch.Tensor): expanded tensor
+    """
+    assert dim < x.ndimension()
+    assert x.shape[dim] == 1
+    expand_dims = [-1] * x.ndimension()
+    expand_dims[dim] = size
+    return x.expand(*expand_dims)
+
+
+def expand_at(x, size, dim):
+    """
+    Expand all tensors in nested dictionary or list or tuple at a single
+    dimension @dim by @size.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        size (int): size to expand
+        dim (int): dimension to expand
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return map_tensor(x, lambda t, s=size, d=dim: expand_at_single(t, s, d))
+
+
+def unsqueeze_expand_at(x, size, dim):
+    """
+    Unsqueeze and expand a tensor at a dimension @dim by @size.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        size (int): size to expand
+        dim (int): dimension to unsqueeze and expand
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    x = unsqueeze(x, dim)
+    return expand_at(x, size, dim)
+
+
+def repeat_by_expand_at(x, repeats, dim):
+    """
+    Repeat a dimension by combining expand and reshape operations.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        repeats (int): number of times to repeat the target dimension
+        dim (int): dimension to repeat on
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    x = unsqueeze_expand_at(x, repeats, dim + 1)
+    return join_dimensions(x, dim, dim + 1)
+
+
+def named_reduce_single(x, reduction, dim):
+    """
+    Reduce tensor at a dimension by named reduction functions.
+
+    Args:
+        x (torch.Tensor): tensor to be reduced
+        reduction (str): one of ["sum", "max", "mean", "flatten"]
+        dim (int): dimension to be reduced (or begin axis for flatten)
+
+    Returns:
+        y (torch.Tensor): reduced tensor
+    """
+    assert x.ndimension() > dim
+    assert reduction in ["sum", "max", "mean", "flatten"]
+    if reduction == "flatten":
+        x = flatten(x, begin_axis=dim)
+    elif reduction == "max":
+        x = torch.max(x, dim=dim)[0]  # [B, D]
+    elif reduction == "sum":
+        x = torch.sum(x, dim=dim)
+    else:
+        x = torch.mean(x, dim=dim)
+    return x
+
+
+def named_reduce(x, reduction, dim):
+    """
+    Reduces all tensors in nested dictionary or list or tuple at a dimension
+    using a named reduction function.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        reduction (str): one of ["sum", "max", "mean", "flatten"]
+        dim (int): dimension to be reduced (or begin axis for flatten)
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return map_tensor(x, func=lambda t, r=reduction, d=dim: named_reduce_single(t, r, d))
+
+
+def gather_along_dim_with_dim_single(x, target_dim, source_dim, indices):
+    """
+    This function indexes out a target dimension of a tensor in a structured way,
+    by allowing a different value to be selected for each member of a flat index 
+    tensor (@indices) corresponding to a source dimension. This can be interpreted
+    as moving along the source dimension, using the corresponding index value
+    in @indices to select values for all other dimensions outside of the
+    source and target dimensions. A common use case is to gather values
+    in target dimension 1 for each batch member (target dimension 0).
+
+    Args:
+        x (torch.Tensor): tensor to gather values for
+        target_dim (int): dimension to gather values along
+        source_dim (int): dimension to hold constant and use for gathering values
+            from the other dimensions
+        indices (torch.Tensor): flat index tensor with same shape as tensor @x along
+            @source_dim
+    
+    Returns:
+        y (torch.Tensor): gathered tensor, with dimension @target_dim indexed out
+    """
+    assert len(indices.shape) == 1
+    assert x.shape[source_dim] == indices.shape[0]
+
+    # unsqueeze in all dimensions except the source dimension
+    new_shape = [1] * x.ndimension()
+    new_shape[source_dim] = -1
+    indices = indices.reshape(*new_shape)
+
+    # repeat in all dimensions - but preserve shape of source dimension,
+    # and make sure target_dimension has singleton dimension
+    expand_shape = list(x.shape)
+    expand_shape[source_dim] = -1
+    expand_shape[target_dim] = 1
+    indices = indices.expand(*expand_shape)
+
+    out = x.gather(dim=target_dim, index=indices)
+    return out.squeeze(target_dim)
+
+
+def gather_along_dim_with_dim(x, target_dim, source_dim, indices):
+    """
+    Apply @gather_along_dim_with_dim_single to all tensors in a nested 
+    dictionary or list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        target_dim (int): dimension to gather values along
+        source_dim (int): dimension to hold constant and use for gathering values
+            from the other dimensions
+        indices (torch.Tensor): flat index tensor with same shape as tensor @x along
+            @source_dim
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple
+    """
+    return map_tensor(x, 
+        lambda y, t=target_dim, s=source_dim, i=indices: gather_along_dim_with_dim_single(y, t, s, i))
+    
+
+def gather_sequence_single(seq, indices):
+    """
+    Given a tensor with leading dimensions [B, T, ...], gather an element from each sequence in 
+    the batch given an index for each sequence.
+
+    Args:
+        seq (torch.Tensor): tensor with leading dimensions [B, T, ...]
+        indices (torch.Tensor): tensor indices of shape [B]
+
+    Return:
+        y (torch.Tensor): indexed tensor of shape [B, ....]
+    """
+    return gather_along_dim_with_dim_single(seq, target_dim=1, source_dim=0, indices=indices)
+
+
+def gather_sequence(seq, indices):
+    """
+    Given a nested dictionary or list or tuple, gathers an element from each sequence of the batch
+    for tensors with leading dimensions [B, T, ...].
+
+    Args:
+        seq (dict or list or tuple): a possibly nested dictionary or list or tuple with tensors
+            of leading dimensions [B, T, ...]
+        indices (torch.Tensor): tensor indices of shape [B]
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple with tensors of shape [B, ...]
+    """
+    return gather_along_dim_with_dim(seq, target_dim=1, source_dim=0, indices=indices)
+
+
+def pad_sequence_single(seq, padding, batched=False, pad_same=True, pad_values=None):
+    """
+    Pad input tensor or array @seq in the time dimension (dimension 1).
+
+    Args:
+        seq (np.ndarray or torch.Tensor): sequence to be padded
+        padding (tuple): begin and end padding, e.g. [1, 1] pads both begin and end of the sequence by 1
+        batched (bool): if sequence has the batch dimension
+        pad_same (bool): if pad by duplicating
+        pad_values (scalar or (ndarray, Tensor)): values to be padded if not pad_same
+
+    Returns:
+        padded sequence (np.ndarray or torch.Tensor)
+    """
+    assert isinstance(seq, (np.ndarray, torch.Tensor))
+    assert pad_same or pad_values is not None
+    if pad_values is not None:
+        assert isinstance(pad_values, float)
+    repeat_func = np.repeat if isinstance(seq, np.ndarray) else torch.repeat_interleave
+    concat_func = np.concatenate if isinstance(seq, np.ndarray) else torch.cat
+    ones_like_func = np.ones_like if isinstance(seq, np.ndarray) else torch.ones_like
+    seq_dim = 1 if batched else 0
+
+    begin_pad = []
+    end_pad = []
+
+    if padding[0] > 0:
+        pad = seq[[0]] if pad_same else ones_like_func(seq[[0]]) * pad_values
+        begin_pad.append(repeat_func(pad, padding[0], seq_dim))
+    if padding[1] > 0:
+        pad = seq[[-1]] if pad_same else ones_like_func(seq[[-1]]) * pad_values
+        end_pad.append(repeat_func(pad, padding[1], seq_dim))
+
+    return concat_func(begin_pad + [seq] + end_pad, seq_dim)
+
+
+def pad_sequence(seq, padding, batched=False, pad_same=True, pad_values=None):
+    """
+    Pad a nested dictionary or list or tuple of sequence tensors in the time dimension (dimension 1).
+
+    Args:
+        seq (dict or list or tuple): a possibly nested dictionary or list or tuple with tensors
+            of leading dimensions [B, T, ...]
+        padding (tuple): begin and end padding, e.g. [1, 1] pads both begin and end of the sequence by 1
+        batched (bool): if sequence has the batch dimension
+        pad_same (bool): if pad by duplicating
+        pad_values (scalar or (ndarray, Tensor)): values to be padded if not pad_same
+
+    Returns:
+        padded sequence (dict or list or tuple)
+    """
+    return recursive_dict_list_tuple_apply(
+        seq,
+        {
+            torch.Tensor: lambda x, p=padding, b=batched, ps=pad_same, pv=pad_values:
+                pad_sequence_single(x, p, b, ps, pv),
+            np.ndarray: lambda x, p=padding, b=batched, ps=pad_same, pv=pad_values:
+                pad_sequence_single(x, p, b, ps, pv),
+            type(None): lambda x: x,
+        }
+    )
+
+
+def assert_size_at_dim_single(x, size, dim, msg):
+    """
+    Ensure that array or tensor @x has size @size in dim @dim.
+
+    Args:
+        x (np.ndarray or torch.Tensor): input array or tensor
+        size (int): size that tensors should have at @dim
+        dim (int): dimension to check
+        msg (str): text to display if assertion fails
+    """
+    assert x.shape[dim] == size, msg
+
+
+def assert_size_at_dim(x, size, dim, msg):
+    """
+    Ensure that arrays and tensors in nested dictionary or list or tuple have 
+    size @size in dim @dim.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+        size (int): size that tensors should have at @dim
+        dim (int): dimension to check
+    """
+    map_tensor(x, lambda t, s=size, d=dim, m=msg: assert_size_at_dim_single(t, s, d, m))
+
+
+def get_shape(x):
+    """
+    Get all shapes of arrays and tensors in nested dictionary or list or tuple.
+
+    Args:
+        x (dict or list or tuple): a possibly nested dictionary or list or tuple
+
+    Returns:
+        y (dict or list or tuple): new nested dict-list-tuple that contains each array or
+            tensor's shape
+    """
+    return recursive_dict_list_tuple_apply(
+        x,
+        {
+            torch.Tensor: lambda x: x.shape,
+            np.ndarray: lambda x: x.shape,
+            type(None): lambda x: x,
+        }
+    )
+
+
+def list_of_flat_dict_to_dict_of_list(list_of_dict):
+    """
+    Helper function to go from a list of flat dictionaries to a dictionary of lists.
+    By "flat" we mean that none of the values are dictionaries, but are numpy arrays,
+    floats, etc.
+
+    Args:
+        list_of_dict (list): list of flat dictionaries
+
+    Returns:
+        dict_of_list (dict): dictionary of lists
+    """
+    assert isinstance(list_of_dict, list)
+    dic = collections.OrderedDict()
+    for i in range(len(list_of_dict)):
+        for k in list_of_dict[i]:
+            if k not in dic:
+                dic[k] = []
+            dic[k].append(list_of_dict[i][k])
+    return dic
+
+
+def flatten_nested_dict_list(d, parent_key='', sep='_', item_key=''):
+    """
+    Flatten a nested dict or list to a list.
+
+    For example, given a dict
+    {
+        a: 1
+        b: {
+            c: 2
+        }
+        c: 3
+    }
+
+    the function would return [(a, 1), (b_c, 2), (c, 3)]
+
+    Args:
+        d (dict, list): a nested dict or list to be flattened
+        parent_key (str): recursion helper
+        sep (str): separator for nesting keys
+        item_key (str): recursion helper
+    Returns:
+        list: a list of (key, value) tuples
+    """
+    items = []
+    if isinstance(d, (tuple, list)):
+        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
+        for i, v in enumerate(d):
+            items.extend(flatten_nested_dict_list(v, new_key, sep=sep, item_key=str(i)))
+        return items
+    elif isinstance(d, dict):
+        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
+        for k, v in d.items():
+            assert isinstance(k, str)
+            items.extend(flatten_nested_dict_list(v, new_key, sep=sep, item_key=k))
+        return items
+    else:
+        new_key = parent_key + sep + item_key if len(parent_key) > 0 else item_key
+        return [(new_key, d)]
+
+
+def time_distributed(inputs, op, activation=None, inputs_as_kwargs=False, inputs_as_args=False, **kwargs):
+    """
+    Apply function @op to all tensors in nested dictionary or list or tuple @inputs in both the
+    batch (B) and time (T) dimension, where the tensors are expected to have shape [B, T, ...].
+    Will do this by reshaping tensors to [B * T, ...], passing through the op, and then reshaping
+    outputs to [B, T, ...].
+
+    Args:
+        inputs (list or tuple or dict): a possibly nested dictionary or list or tuple with tensors
+            of leading dimensions [B, T, ...]
+        op: a layer op that accepts inputs
+        activation: activation to apply at the output
+        inputs_as_kwargs (bool): whether to feed input as a kwargs dict to the op
+        inputs_as_args (bool) whether to feed input as a args list to the op
+        kwargs (dict): other kwargs to supply to the op
+
+    Returns:
+        outputs (dict or list or tuple): new nested dict-list-tuple with tensors of leading dimension [B, T].
+    """
+    batch_size, seq_len = flatten_nested_dict_list(inputs)[0][1].shape[:2]
+    inputs = join_dimensions(inputs, 0, 1)
+    if inputs_as_kwargs:
+        outputs = op(**inputs, **kwargs)
+    elif inputs_as_args:
+        outputs = op(*inputs, **kwargs)
+    else:
+        outputs = op(inputs, **kwargs)
+
+    if activation is not None:
+        outputs = map_tensor(outputs, activation)
+    outputs = reshape_dimensions(outputs, begin_axis=0, end_axis=0, target_dims=(batch_size, seq_len))
+    return outputs
diff --git a/diffusion_policy/model/diffusion/conditional_unet1d.py b/diffusion_policy/model/diffusion/conditional_unet1d.py
new file mode 100644
index 0000000..0e17bbf
--- /dev/null
+++ b/diffusion_policy/model/diffusion/conditional_unet1d.py
@@ -0,0 +1,238 @@
+from typing import Union
+import logging
+import torch
+import torch.nn as nn
+import einops
+from einops.layers.torch import Rearrange
+
+from diffusion_policy.model.diffusion.conv1d_components import (
+    Downsample1d, Upsample1d, Conv1dBlock)
+from diffusion_policy.model.diffusion.positional_embedding import SinusoidalPosEmb
+
+logger = logging.getLogger(__name__)
+
+class ConditionalResidualBlock1D(nn.Module):
+    def __init__(self, 
+            in_channels, 
+            out_channels, 
+            cond_dim,
+            kernel_size=3,
+            n_groups=8,
+            cond_predict_scale=False):
+        super().__init__()
+
+        self.blocks = nn.ModuleList([
+            Conv1dBlock(in_channels, out_channels, kernel_size, n_groups=n_groups),
+            Conv1dBlock(out_channels, out_channels, kernel_size, n_groups=n_groups),
+        ])
+
+        # FiLM modulation https://arxiv.org/abs/1709.07871
+        # predicts per-channel scale and bias
+        cond_channels = out_channels
+        if cond_predict_scale:
+            cond_channels = out_channels * 2
+        self.cond_predict_scale = cond_predict_scale
+        self.out_channels = out_channels
+        self.cond_encoder = nn.Sequential(
+            nn.Mish(),
+            nn.Linear(cond_dim, cond_channels),
+            Rearrange('batch t -> batch t 1'),
+        )
+
+        # make sure dimensions compatible
+        self.residual_conv = nn.Conv1d(in_channels, out_channels, 1) \
+            if in_channels != out_channels else nn.Identity()
+
+    def forward(self, x, cond):
+        '''
+            x : [ batch_size x in_channels x horizon ]
+            cond : [ batch_size x cond_dim]
+
+            returns:
+            out : [ batch_size x out_channels x horizon ]
+        '''
+        out = self.blocks[0](x)
+        embed = self.cond_encoder(cond)
+        if self.cond_predict_scale:
+            embed = embed.reshape(
+                embed.shape[0], 2, self.out_channels, 1)
+            scale = embed[:,0,...]
+            bias = embed[:,1,...]
+            out = scale * out + bias
+        else:
+            out = out + embed
+        out = self.blocks[1](out)
+        out = out + self.residual_conv(x)
+        return out
+
+
+class ConditionalUnet1D(nn.Module):
+    def __init__(self, 
+        input_dim,
+        local_cond_dim=None,
+        global_cond_dim=None,
+        diffusion_step_embed_dim=256,
+        down_dims=[256,512,1024],
+        kernel_size=3,
+        n_groups=8,
+        cond_predict_scale=False
+        ):
+        super().__init__()
+        all_dims = [input_dim] + list(down_dims)
+        start_dim = down_dims[0]
+
+        dsed = diffusion_step_embed_dim
+        diffusion_step_encoder = nn.Sequential(
+            SinusoidalPosEmb(dsed),
+            nn.Linear(dsed, dsed * 4),
+            nn.Mish(),
+            nn.Linear(dsed * 4, dsed),
+        )
+        cond_dim = dsed
+        if global_cond_dim is not None:
+            cond_dim += global_cond_dim
+
+        in_out = list(zip(all_dims[:-1], all_dims[1:]))
+
+        local_cond_encoder = None
+        if local_cond_dim is not None:
+            _, dim_out = in_out[0]
+            dim_in = local_cond_dim
+            local_cond_encoder = nn.ModuleList([
+                # down encoder
+                ConditionalResidualBlock1D(
+                    dim_in, dim_out, cond_dim=cond_dim, 
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale),
+                # up encoder
+                ConditionalResidualBlock1D(
+                    dim_in, dim_out, cond_dim=cond_dim, 
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale)
+            ])
+
+        mid_dim = all_dims[-1]
+        self.mid_modules = nn.ModuleList([
+            ConditionalResidualBlock1D(
+                mid_dim, mid_dim, cond_dim=cond_dim,
+                kernel_size=kernel_size, n_groups=n_groups,
+                cond_predict_scale=cond_predict_scale
+            ),
+            ConditionalResidualBlock1D(
+                mid_dim, mid_dim, cond_dim=cond_dim,
+                kernel_size=kernel_size, n_groups=n_groups,
+                cond_predict_scale=cond_predict_scale
+            ),
+        ])
+
+        down_modules = nn.ModuleList([])
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            is_last = ind >= (len(in_out) - 1)
+            down_modules.append(nn.ModuleList([
+                ConditionalResidualBlock1D(
+                    dim_in, dim_out, cond_dim=cond_dim, 
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale),
+                ConditionalResidualBlock1D(
+                    dim_out, dim_out, cond_dim=cond_dim, 
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale),
+                Downsample1d(dim_out) if not is_last else nn.Identity()
+            ]))
+
+        up_modules = nn.ModuleList([])
+        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
+            is_last = ind >= (len(in_out) - 1)
+            up_modules.append(nn.ModuleList([
+                ConditionalResidualBlock1D(
+                    dim_out*2, dim_in, cond_dim=cond_dim,
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale),
+                ConditionalResidualBlock1D(
+                    dim_in, dim_in, cond_dim=cond_dim,
+                    kernel_size=kernel_size, n_groups=n_groups,
+                    cond_predict_scale=cond_predict_scale),
+                Upsample1d(dim_in) if not is_last else nn.Identity()
+            ]))
+        
+        final_conv = nn.Sequential(
+            Conv1dBlock(start_dim, start_dim, kernel_size=kernel_size),
+            nn.Conv1d(start_dim, input_dim, 1),
+        )
+
+        self.diffusion_step_encoder = diffusion_step_encoder
+        self.local_cond_encoder = local_cond_encoder
+        self.up_modules = up_modules
+        self.down_modules = down_modules
+        self.final_conv = final_conv
+
+        logger.info(
+            "number of parameters: %e", sum(p.numel() for p in self.parameters())
+        )
+
+    def forward(self, 
+            sample: torch.Tensor, 
+            timestep: Union[torch.Tensor, float, int], 
+            local_cond=None, global_cond=None, **kwargs):
+        """
+        x: (B,T,input_dim)
+        timestep: (B,) or int, diffusion step
+        local_cond: (B,T,local_cond_dim)
+        global_cond: (B,global_cond_dim)
+        output: (B,T,input_dim)
+        """
+        sample = einops.rearrange(sample, 'b h t -> b t h')
+
+        # 1. time
+        timesteps = timestep
+        if not torch.is_tensor(timesteps):
+            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
+            timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device)
+        elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0:
+            timesteps = timesteps[None].to(sample.device)
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        timesteps = timesteps.expand(sample.shape[0])
+
+        global_feature = self.diffusion_step_encoder(timesteps)
+
+        if global_cond is not None:
+            global_feature = torch.cat([
+                global_feature, global_cond
+            ], axis=-1)
+        
+        # encode local features
+        h_local = list()
+        if local_cond is not None:
+            local_cond = einops.rearrange(local_cond, 'b h t -> b t h')
+            resnet, resnet2 = self.local_cond_encoder
+            x = resnet(local_cond, global_feature)
+            h_local.append(x)
+            x = resnet2(local_cond, global_feature)
+            h_local.append(x)
+        
+        x = sample
+        h = []
+        for idx, (resnet, resnet2, downsample) in enumerate(self.down_modules):
+            x = resnet(x, global_feature)
+            if idx == 0 and len(h_local) > 0:
+                x = x + h_local[0]
+            x = resnet2(x, global_feature)
+            h.append(x)
+            x = downsample(x)
+
+        for mid_module in self.mid_modules:
+            x = mid_module(x, global_feature)
+
+        for idx, (resnet, resnet2, upsample) in enumerate(self.up_modules):
+            x = torch.cat((x, h.pop()), dim=1)
+            x = resnet(x, global_feature)
+            if idx == len(self.up_modules) and len(h_local) > 0:
+                x = x + h_local[1]
+            x = resnet2(x, global_feature)
+            x = upsample(x)
+
+        x = self.final_conv(x)
+
+        x = einops.rearrange(x, 'b t h -> b h t')
+        return x
+
diff --git a/diffusion_policy/model/diffusion/conv1d_components.py b/diffusion_policy/model/diffusion/conv1d_components.py
new file mode 100644
index 0000000..1c4cfc9
--- /dev/null
+++ b/diffusion_policy/model/diffusion/conv1d_components.py
@@ -0,0 +1,46 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# from einops.layers.torch import Rearrange
+
+
+class Downsample1d(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.Conv1d(dim, dim, 3, 2, 1)
+
+    def forward(self, x):
+        return self.conv(x)
+
+class Upsample1d(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.ConvTranspose1d(dim, dim, 4, 2, 1)
+
+    def forward(self, x):
+        return self.conv(x)
+
+class Conv1dBlock(nn.Module):
+    '''
+        Conv1d --> GroupNorm --> Mish
+    '''
+
+    def __init__(self, inp_channels, out_channels, kernel_size, n_groups=8):
+        super().__init__()
+
+        self.block = nn.Sequential(
+            nn.Conv1d(inp_channels, out_channels, kernel_size, padding=kernel_size // 2),
+            # Rearrange('batch channels horizon -> batch channels 1 horizon'),
+            nn.GroupNorm(n_groups, out_channels),
+            # Rearrange('batch channels 1 horizon -> batch channels horizon'),
+            nn.Mish(),
+        )
+
+    def forward(self, x):
+        return self.block(x)
+
+
+def test():
+    cb = Conv1dBlock(256, 128, kernel_size=3)
+    x = torch.zeros((1,256,16))
+    o = cb(x)
diff --git a/diffusion_policy/model/diffusion/ema_model.py b/diffusion_policy/model/diffusion/ema_model.py
new file mode 100644
index 0000000..d916c2b
--- /dev/null
+++ b/diffusion_policy/model/diffusion/ema_model.py
@@ -0,0 +1,88 @@
+import copy
+import torch
+from torch.nn.modules.batchnorm import _BatchNorm
+
+class EMAModel:
+    """
+    Exponential Moving Average of models weights
+    """
+
+    def __init__(
+        self,
+        model,
+        update_after_step=0,
+        inv_gamma=1.0,
+        power=2 / 3,
+        min_value=0.0,
+        max_value=0.9999
+    ):
+        """
+        @crowsonkb's notes on EMA Warmup:
+            If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are good values for models you plan
+            to train for a million or more steps (reaches decay factor 0.999 at 31.6K steps, 0.9999 at 1M steps),
+            gamma=1, power=3/4 for models you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999
+            at 215.4k steps).
+        Args:
+            inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
+            power (float): Exponential factor of EMA warmup. Default: 2/3.
+            min_value (float): The minimum EMA decay rate. Default: 0.
+        """
+
+        self.averaged_model = model
+        self.averaged_model.eval()
+        self.averaged_model.requires_grad_(False)
+
+        self.update_after_step = update_after_step
+        self.inv_gamma = inv_gamma
+        self.power = power
+        self.min_value = min_value
+        self.max_value = max_value
+
+        self.decay = 0.0
+        self.optimization_step = 0
+
+    def get_decay(self, optimization_step):
+        """
+        Compute the decay factor for the exponential moving average.
+        """
+        step = max(0, optimization_step - self.update_after_step - 1)
+        value = 1 - (1 + step / self.inv_gamma) ** -self.power
+
+        if step <= 0:
+            return 0.0
+
+        return max(self.min_value, min(value, self.max_value))
+
+    @torch.no_grad()
+    def step(self, new_model):
+        self.decay = self.get_decay(self.optimization_step)
+
+        # old_all_dataptrs = set()
+        # for param in new_model.parameters():
+        #     data_ptr = param.data_ptr()
+        #     if data_ptr != 0:
+        #         old_all_dataptrs.add(data_ptr)
+
+        all_dataptrs = set()
+        for module, ema_module in zip(new_model.modules(), self.averaged_model.modules()):            
+            for param, ema_param in zip(module.parameters(recurse=False), ema_module.parameters(recurse=False)):
+                # iterative over immediate parameters only.
+                if isinstance(param, dict):
+                    raise RuntimeError('Dict parameter not supported')
+                
+                # data_ptr = param.data_ptr()
+                # if data_ptr != 0:
+                #     all_dataptrs.add(data_ptr)
+
+                if isinstance(module, _BatchNorm):
+                    # skip batchnorms
+                    ema_param.copy_(param.to(dtype=ema_param.dtype).data)
+                elif not param.requires_grad:
+                    ema_param.copy_(param.to(dtype=ema_param.dtype).data)
+                else:
+                    ema_param.mul_(self.decay)
+                    ema_param.add_(param.data.to(dtype=ema_param.dtype), alpha=1 - self.decay)
+
+        # verify that iterating over module and then parameters is identical to parameters recursively.
+        # assert old_all_dataptrs == all_dataptrs
+        self.optimization_step += 1
diff --git a/diffusion_policy/model/diffusion/mask_generator.py b/diffusion_policy/model/diffusion/mask_generator.py
new file mode 100644
index 0000000..fc5bb7d
--- /dev/null
+++ b/diffusion_policy/model/diffusion/mask_generator.py
@@ -0,0 +1,221 @@
+from typing import Sequence, Optional
+import torch
+from torch import nn
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+
+
+def get_intersection_slice_mask(
+    shape: tuple, 
+    dim_slices: Sequence[slice], 
+    device: Optional[torch.device]=None
+    ):
+    assert(len(shape) == len(dim_slices))
+    mask = torch.zeros(size=shape, dtype=torch.bool, device=device)
+    mask[dim_slices] = True
+    return mask
+
+
+def get_union_slice_mask(
+    shape: tuple, 
+    dim_slices: Sequence[slice], 
+    device: Optional[torch.device]=None
+    ):
+    assert(len(shape) == len(dim_slices))
+    mask = torch.zeros(size=shape, dtype=torch.bool, device=device)
+    for i in range(len(dim_slices)):
+        this_slices = [slice(None)] * len(shape)
+        this_slices[i] = dim_slices[i]
+        mask[this_slices] = True
+    return mask
+
+
+class DummyMaskGenerator(ModuleAttrMixin):
+    def __init__(self):
+        super().__init__()
+    
+    @torch.no_grad()
+    def forward(self, shape):
+        device = self.device
+        mask = torch.ones(size=shape, dtype=torch.bool, device=device)
+        return mask
+
+
+class LowdimMaskGenerator(ModuleAttrMixin):
+    def __init__(self,
+        action_dim, obs_dim,
+        # obs mask setup
+        max_n_obs_steps=2, 
+        fix_obs_steps=True, 
+        # action mask
+        action_visible=False
+        ):
+        super().__init__()
+        self.action_dim = action_dim
+        self.obs_dim = obs_dim
+        self.max_n_obs_steps = max_n_obs_steps
+        self.fix_obs_steps = fix_obs_steps
+        self.action_visible = action_visible
+
+    @torch.no_grad()
+    def forward(self, shape, seed=None):
+        device = self.device
+        B, T, D = shape
+        assert D == (self.action_dim + self.obs_dim)
+
+        # create all tensors on this device
+        rng = torch.Generator(device=device)
+        if seed is not None:
+            rng = rng.manual_seed(seed)
+
+        # generate dim mask
+        dim_mask = torch.zeros(size=shape, 
+            dtype=torch.bool, device=device)
+        is_action_dim = dim_mask.clone()
+        is_action_dim[...,:self.action_dim] = True
+        is_obs_dim = ~is_action_dim
+
+        # generate obs mask
+        if self.fix_obs_steps:
+            obs_steps = torch.full((B,), 
+            fill_value=self.max_n_obs_steps, device=device)
+        else:
+            obs_steps = torch.randint(
+                low=1, high=self.max_n_obs_steps+1, 
+                size=(B,), generator=rng, device=device)
+            
+        steps = torch.arange(0, T, device=device).reshape(1,T).expand(B,T)
+        obs_mask = (steps.T < obs_steps).T.reshape(B,T,1).expand(B,T,D)
+        obs_mask = obs_mask & is_obs_dim
+
+        # generate action mask
+        if self.action_visible:
+            action_steps = torch.maximum(
+                obs_steps - 1, 
+                torch.tensor(0,
+                    dtype=obs_steps.dtype, 
+                    device=obs_steps.device))
+            action_mask = (steps.T < action_steps).T.reshape(B,T,1).expand(B,T,D)
+            action_mask = action_mask & is_action_dim
+
+        mask = obs_mask
+        if self.action_visible:
+            mask = mask | action_mask
+        
+        return mask
+
+
+class KeypointMaskGenerator(ModuleAttrMixin):
+    def __init__(self, 
+            # dimensions
+            action_dim, keypoint_dim,
+            # obs mask setup
+            max_n_obs_steps=2, fix_obs_steps=True, 
+            # keypoint mask setup
+            keypoint_visible_rate=0.7, time_independent=False,
+            # action mask
+            action_visible=False,
+            context_dim=0, # dim for context
+            n_context_steps=1
+            ):
+        super().__init__()
+        self.action_dim = action_dim
+        self.keypoint_dim = keypoint_dim
+        self.context_dim = context_dim
+        self.max_n_obs_steps = max_n_obs_steps
+        self.fix_obs_steps = fix_obs_steps
+        self.keypoint_visible_rate = keypoint_visible_rate
+        self.time_independent = time_independent
+        self.action_visible = action_visible
+        self.n_context_steps = n_context_steps
+    
+    @torch.no_grad()
+    def forward(self, shape, seed=None):
+        device = self.device
+        B, T, D = shape
+        all_keypoint_dims = D - self.action_dim - self.context_dim
+        n_keypoints = all_keypoint_dims // self.keypoint_dim
+        
+        # create all tensors on this device
+        rng = torch.Generator(device=device)
+        if seed is not None:
+            rng = rng.manual_seed(seed)
+        
+        # generate dim mask
+        dim_mask = torch.zeros(size=shape, 
+            dtype=torch.bool, device=device)
+        is_action_dim = dim_mask.clone()
+        is_action_dim[...,:self.action_dim] = True
+        is_context_dim = dim_mask.clone()
+        if self.context_dim > 0:
+            is_context_dim[...,-self.context_dim:] = True
+        is_obs_dim = ~(is_action_dim | is_context_dim)
+        # assumption trajectory=cat([action, keypoints, context], dim=-1)
+
+        # generate obs mask
+        if self.fix_obs_steps:
+            obs_steps = torch.full((B,), 
+            fill_value=self.max_n_obs_steps, device=device)
+        else:
+            obs_steps = torch.randint(
+                low=1, high=self.max_n_obs_steps+1, 
+                size=(B,), generator=rng, device=device)
+            
+        steps = torch.arange(0, T, device=device).reshape(1,T).expand(B,T)
+        obs_mask = (steps.T < obs_steps).T.reshape(B,T,1).expand(B,T,D)
+        obs_mask = obs_mask & is_obs_dim
+
+        # generate action mask
+        if self.action_visible:
+            action_steps = torch.maximum(
+                obs_steps - 1, 
+                torch.tensor(0,
+                    dtype=obs_steps.dtype, 
+                    device=obs_steps.device))
+            action_mask = (steps.T < action_steps).T.reshape(B,T,1).expand(B,T,D)
+            action_mask = action_mask & is_action_dim
+
+        # generate keypoint mask
+        if self.time_independent:
+            visible_kps = torch.rand(size=(B, T, n_keypoints), 
+                generator=rng, device=device) < self.keypoint_visible_rate
+            visible_dims = torch.repeat_interleave(visible_kps, repeats=self.keypoint_dim, dim=-1)
+            visible_dims_mask = torch.cat([
+                torch.ones((B, T, self.action_dim), 
+                    dtype=torch.bool, device=device),
+                visible_dims,
+                torch.ones((B, T, self.context_dim), 
+                    dtype=torch.bool, device=device),
+            ], axis=-1)
+            keypoint_mask = visible_dims_mask
+        else:
+            visible_kps = torch.rand(size=(B,n_keypoints), 
+                generator=rng, device=device) < self.keypoint_visible_rate
+            visible_dims = torch.repeat_interleave(visible_kps, repeats=self.keypoint_dim, dim=-1)
+            visible_dims_mask = torch.cat([
+                torch.ones((B, self.action_dim), 
+                    dtype=torch.bool, device=device),
+                visible_dims,
+                torch.ones((B, self.context_dim), 
+                    dtype=torch.bool, device=device),
+            ], axis=-1)
+            keypoint_mask = visible_dims_mask.reshape(B,1,D).expand(B,T,D)
+        keypoint_mask = keypoint_mask & is_obs_dim
+
+        # generate context mask
+        context_mask = is_context_dim.clone()
+        context_mask[:,self.n_context_steps:,:] = False
+
+        mask = obs_mask & keypoint_mask 
+        if self.action_visible:
+            mask = mask | action_mask
+        if self.context_dim > 0:
+            mask = mask | context_mask
+
+        return mask
+
+
+def test():
+    # kmg = KeypointMaskGenerator(2,2, random_obs_steps=True)
+    # self = KeypointMaskGenerator(2,2,context_dim=2, action_visible=True)
+    # self = KeypointMaskGenerator(2,2,context_dim=0, action_visible=True)
+    self = LowdimMaskGenerator(2,20, max_n_obs_steps=3, action_visible=True)
diff --git a/diffusion_policy/model/diffusion/positional_embedding.py b/diffusion_policy/model/diffusion/positional_embedding.py
new file mode 100644
index 0000000..fe7fdcd
--- /dev/null
+++ b/diffusion_policy/model/diffusion/positional_embedding.py
@@ -0,0 +1,17 @@
+import math
+import torch
+import torch.nn as nn
+
+class SinusoidalPosEmb(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x):
+        device = x.device
+        half_dim = self.dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
+        emb = x[:, None] * emb[None, :]
+        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
+        return emb
diff --git a/diffusion_policy/model/diffusion/transformer_for_diffusion.py b/diffusion_policy/model/diffusion/transformer_for_diffusion.py
new file mode 100644
index 0000000..2c533c3
--- /dev/null
+++ b/diffusion_policy/model/diffusion/transformer_for_diffusion.py
@@ -0,0 +1,418 @@
+from typing import Union, Optional, Tuple
+import logging
+import torch
+import torch.nn as nn
+from diffusion_policy.model.diffusion.positional_embedding import SinusoidalPosEmb
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+
+logger = logging.getLogger(__name__)
+
+class TransformerForDiffusion(ModuleAttrMixin):
+    def __init__(self,
+            input_dim: int,
+            output_dim: int,
+            horizon: int,
+            n_obs_steps: int = None,
+            cond_dim: int = 0,
+            n_layer: int = 12,
+            n_head: int = 12,
+            n_emb: int = 768,
+            p_drop_emb: float = 0.1,
+            p_drop_attn: float = 0.1,
+            causal_attn: bool=False,
+            time_as_cond: bool=True,
+            obs_as_cond: bool=False,
+            n_cond_layers: int = 0
+        ) -> None:
+        super().__init__()
+
+        # compute number of tokens for main trunk and condition encoder
+        if n_obs_steps is None:
+            n_obs_steps = horizon
+        
+        T = horizon
+        T_cond = 1
+        if not time_as_cond:
+            T += 1
+            T_cond -= 1
+        obs_as_cond = cond_dim > 0
+        if obs_as_cond:
+            assert time_as_cond
+            T_cond += n_obs_steps
+
+        # input embedding stem
+        self.input_emb = nn.Linear(input_dim, n_emb)
+        self.pos_emb = nn.Parameter(torch.zeros(1, T, n_emb))
+        self.drop = nn.Dropout(p_drop_emb)
+
+        # cond encoder
+        self.time_emb = SinusoidalPosEmb(n_emb)
+        self.cond_obs_emb = None
+        
+        if obs_as_cond:
+            self.cond_obs_emb = nn.Linear(cond_dim, n_emb)
+
+        self.cond_pos_emb = None
+        self.encoder = None
+        self.decoder = None
+        encoder_only = False
+        if T_cond > 0:
+            self.cond_pos_emb = nn.Parameter(torch.zeros(1, T_cond, n_emb))
+            if n_cond_layers > 0:
+                encoder_layer = nn.TransformerEncoderLayer(
+                    d_model=n_emb,
+                    nhead=n_head,
+                    dim_feedforward=4*n_emb,
+                    dropout=p_drop_attn,
+                    activation='gelu',
+                    batch_first=True,
+                    norm_first=True
+                )
+                self.encoder = nn.TransformerEncoder(
+                    encoder_layer=encoder_layer,
+                    num_layers=n_cond_layers
+                )
+            else:
+                self.encoder = nn.Sequential(
+                    nn.Linear(n_emb, 4 * n_emb),
+                    nn.Mish(),
+                    nn.Linear(4 * n_emb, n_emb)
+                )
+            # decoder
+            decoder_layer = nn.TransformerDecoderLayer(
+                d_model=n_emb,
+                nhead=n_head,
+                dim_feedforward=4*n_emb,
+                dropout=p_drop_attn,
+                activation='gelu',
+                batch_first=True,
+                norm_first=True # important for stability
+            )
+            self.decoder = nn.TransformerDecoder(
+                decoder_layer=decoder_layer,
+                num_layers=n_layer
+            )
+        else:
+            # encoder only BERT
+            encoder_only = True
+
+            encoder_layer = nn.TransformerEncoderLayer(
+                d_model=n_emb,
+                nhead=n_head,
+                dim_feedforward=4*n_emb,
+                dropout=p_drop_attn,
+                activation='gelu',
+                batch_first=True,
+                norm_first=True
+            )
+            self.encoder = nn.TransformerEncoder(
+                encoder_layer=encoder_layer,
+                num_layers=n_layer
+            )
+
+        # attention mask
+        if causal_attn:
+            # causal mask to ensure that attention is only applied to the left in the input sequence
+            # torch.nn.Transformer uses additive mask as opposed to multiplicative mask in minGPT
+            # therefore, the upper triangle should be -inf and others (including diag) should be 0.
+            sz = T
+            mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+            mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+            self.register_buffer("mask", mask)
+            
+            if time_as_cond and obs_as_cond:
+                S = T_cond
+                t, s = torch.meshgrid(
+                    torch.arange(T),
+                    torch.arange(S),
+                    indexing='ij'
+                )
+                mask = t >= (s-1) # add one dimension since time is the first token in cond
+                mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+                self.register_buffer('memory_mask', mask)
+            else:
+                self.memory_mask = None
+        else:
+            self.mask = None
+            self.memory_mask = None
+
+        # decoder head
+        self.ln_f = nn.LayerNorm(n_emb)
+        self.head = nn.Linear(n_emb, output_dim)
+            
+        # constants
+        self.T = T
+        self.T_cond = T_cond
+        self.horizon = horizon
+        self.time_as_cond = time_as_cond
+        self.obs_as_cond = obs_as_cond
+        self.encoder_only = encoder_only
+
+        # init
+        self.apply(self._init_weights)
+        logger.info(
+            "number of parameters: %e", sum(p.numel() for p in self.parameters())
+        )
+
+    def _init_weights(self, module):
+        ignore_types = (nn.Dropout, 
+            SinusoidalPosEmb, 
+            nn.TransformerEncoderLayer, 
+            nn.TransformerDecoderLayer,
+            nn.TransformerEncoder,
+            nn.TransformerDecoder,
+            nn.ModuleList,
+            nn.Mish,
+            nn.Sequential)
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.MultiheadAttention):
+            weight_names = [
+                'in_proj_weight', 'q_proj_weight', 'k_proj_weight', 'v_proj_weight']
+            for name in weight_names:
+                weight = getattr(module, name)
+                if weight is not None:
+                    torch.nn.init.normal_(weight, mean=0.0, std=0.02)
+            
+            bias_names = ['in_proj_bias', 'bias_k', 'bias_v']
+            for name in bias_names:
+                bias = getattr(module, name)
+                if bias is not None:
+                    torch.nn.init.zeros_(bias)
+        elif isinstance(module, nn.LayerNorm):
+            torch.nn.init.zeros_(module.bias)
+            torch.nn.init.ones_(module.weight)
+        elif isinstance(module, TransformerForDiffusion):
+            torch.nn.init.normal_(module.pos_emb, mean=0.0, std=0.02)
+            if module.cond_obs_emb is not None:
+                torch.nn.init.normal_(module.cond_pos_emb, mean=0.0, std=0.02)
+        elif isinstance(module, ignore_types):
+            # no param
+            pass
+        else:
+            raise RuntimeError("Unaccounted module {}".format(module))
+    
+    def get_optim_groups(self, weight_decay: float=1e-3):
+        """
+        This long function is unfortunately doing something very simple and is being very defensive:
+        We are separating out all parameters of the model into two buckets: those that will experience
+        weight decay for regularization and those that won't (biases, and layernorm/embedding weights).
+        We are then returning the PyTorch optimizer object.
+        """
+
+        # separate out all parameters to those that will and won't experience regularizing weight decay
+        decay = set()
+        no_decay = set()
+        whitelist_weight_modules = (torch.nn.Linear, torch.nn.MultiheadAttention)
+        blacklist_weight_modules = (torch.nn.LayerNorm, torch.nn.Embedding)
+        for mn, m in self.named_modules():
+            for pn, p in m.named_parameters():
+                fpn = "%s.%s" % (mn, pn) if mn else pn  # full param name
+
+                if pn.endswith("bias"):
+                    # all biases will not be decayed
+                    no_decay.add(fpn)
+                elif pn.startswith("bias"):
+                    # MultiheadAttention bias starts with "bias"
+                    no_decay.add(fpn)
+                elif pn.endswith("weight") and isinstance(m, whitelist_weight_modules):
+                    # weights of whitelist modules will be weight decayed
+                    decay.add(fpn)
+                elif pn.endswith("weight") and isinstance(m, blacklist_weight_modules):
+                    # weights of blacklist modules will NOT be weight decayed
+                    no_decay.add(fpn)
+
+        # special case the position embedding parameter in the root GPT module as not decayed
+        no_decay.add("pos_emb")
+        no_decay.add("_dummy_variable")
+        if self.cond_pos_emb is not None:
+            no_decay.add("cond_pos_emb")
+
+        # validate that we considered every parameter
+        param_dict = {pn: p for pn, p in self.named_parameters()}
+        inter_params = decay & no_decay
+        union_params = decay | no_decay
+        assert (
+            len(inter_params) == 0
+        ), "parameters %s made it into both decay/no_decay sets!" % (str(inter_params),)
+        assert (
+            len(param_dict.keys() - union_params) == 0
+        ), "parameters %s were not separated into either decay/no_decay set!" % (
+            str(param_dict.keys() - union_params),
+        )
+
+        # create the pytorch optimizer object
+        optim_groups = [
+            {
+                "params": [param_dict[pn] for pn in sorted(list(decay))],
+                "weight_decay": weight_decay,
+            },
+            {
+                "params": [param_dict[pn] for pn in sorted(list(no_decay))],
+                "weight_decay": 0.0,
+            },
+        ]
+        return optim_groups
+
+
+    def configure_optimizers(self, 
+            learning_rate: float=1e-4, 
+            weight_decay: float=1e-3,
+            betas: Tuple[float, float]=(0.9,0.95)):
+        optim_groups = self.get_optim_groups(weight_decay=weight_decay)
+        optimizer = torch.optim.AdamW(
+            optim_groups, lr=learning_rate, betas=betas
+        )
+        return optimizer
+
+    def forward(self, 
+        sample: torch.Tensor, 
+        timestep: Union[torch.Tensor, float, int], 
+        cond: Optional[torch.Tensor]=None, **kwargs):
+        """
+        x: (B,T,input_dim)
+        timestep: (B,) or int, diffusion step
+        cond: (B,T',cond_dim)
+        output: (B,T,input_dim)
+        """
+        # 1. time
+        timesteps = timestep
+        if not torch.is_tensor(timesteps):
+            # TODO: this requires sync between CPU and GPU. So try to pass timesteps as tensors if you can
+            timesteps = torch.tensor([timesteps], dtype=torch.long, device=sample.device)
+        elif torch.is_tensor(timesteps) and len(timesteps.shape) == 0:
+            timesteps = timesteps[None].to(sample.device)
+        # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+        timesteps = timesteps.expand(sample.shape[0])
+        time_emb = self.time_emb(timesteps).unsqueeze(1)
+        # (B,1,n_emb)
+
+        # process input
+        input_emb = self.input_emb(sample)
+
+        if self.encoder_only:
+            # BERT
+            token_embeddings = torch.cat([time_emb, input_emb], dim=1)
+            t = token_embeddings.shape[1]
+            position_embeddings = self.pos_emb[
+                :, :t, :
+            ]  # each position maps to a (learnable) vector
+            x = self.drop(token_embeddings + position_embeddings)
+            # (B,T+1,n_emb)
+            x = self.encoder(src=x, mask=self.mask)
+            # (B,T+1,n_emb)
+            x = x[:,1:,:]
+            # (B,T,n_emb)
+        else:
+            # encoder
+            cond_embeddings = time_emb
+            if self.obs_as_cond:
+                cond_obs_emb = self.cond_obs_emb(cond)
+                # (B,To,n_emb)
+                cond_embeddings = torch.cat([cond_embeddings, cond_obs_emb], dim=1)
+            tc = cond_embeddings.shape[1]
+            position_embeddings = self.cond_pos_emb[
+                :, :tc, :
+            ]  # each position maps to a (learnable) vector
+            x = self.drop(cond_embeddings + position_embeddings)
+            x = self.encoder(x)
+            memory = x
+            # (B,T_cond,n_emb)
+            
+            # decoder
+            token_embeddings = input_emb
+            t = token_embeddings.shape[1]
+            position_embeddings = self.pos_emb[
+                :, :t, :
+            ]  # each position maps to a (learnable) vector
+            x = self.drop(token_embeddings + position_embeddings)
+            # (B,T,n_emb)
+            x = self.decoder(
+                tgt=x,
+                memory=memory,
+                tgt_mask=self.mask,
+                memory_mask=self.memory_mask
+            )
+            # (B,T,n_emb)
+        
+        # head
+        x = self.ln_f(x)
+        x = self.head(x)
+        # (B,T,n_out)
+        return x
+
+
+def test():
+    # GPT with time embedding
+    transformer = TransformerForDiffusion(
+        input_dim=16,
+        output_dim=16,
+        horizon=8,
+        n_obs_steps=4,
+        # cond_dim=10,
+        causal_attn=True,
+        # time_as_cond=False,
+        # n_cond_layers=4
+    )
+    opt = transformer.configure_optimizers()
+
+    timestep = torch.tensor(0)
+    sample = torch.zeros((4,8,16))
+    out = transformer(sample, timestep)
+    
+
+    # GPT with time embedding and obs cond
+    transformer = TransformerForDiffusion(
+        input_dim=16,
+        output_dim=16,
+        horizon=8,
+        n_obs_steps=4,
+        cond_dim=10,
+        causal_attn=True,
+        # time_as_cond=False,
+        # n_cond_layers=4
+    )
+    opt = transformer.configure_optimizers()
+    
+    timestep = torch.tensor(0)
+    sample = torch.zeros((4,8,16))
+    cond = torch.zeros((4,4,10))
+    out = transformer(sample, timestep, cond)
+
+    # GPT with time embedding and obs cond and encoder
+    transformer = TransformerForDiffusion(
+        input_dim=16,
+        output_dim=16,
+        horizon=8,
+        n_obs_steps=4,
+        cond_dim=10,
+        causal_attn=True,
+        # time_as_cond=False,
+        n_cond_layers=4
+    )
+    opt = transformer.configure_optimizers()
+    
+    timestep = torch.tensor(0)
+    sample = torch.zeros((4,8,16))
+    cond = torch.zeros((4,4,10))
+    out = transformer(sample, timestep, cond)
+
+    # BERT with time embedding token
+    transformer = TransformerForDiffusion(
+        input_dim=16,
+        output_dim=16,
+        horizon=8,
+        n_obs_steps=4,
+        # cond_dim=10,
+        # causal_attn=True,
+        time_as_cond=False,
+        # n_cond_layers=4
+    )
+    opt = transformer.configure_optimizers()
+
+    timestep = torch.tensor(0)
+    sample = torch.zeros((4,8,16))
+    out = transformer(sample, timestep)
+
diff --git a/diffusion_policy/model/vision/crop_randomizer.py b/diffusion_policy/model/vision/crop_randomizer.py
new file mode 100644
index 0000000..9079574
--- /dev/null
+++ b/diffusion_policy/model/vision/crop_randomizer.py
@@ -0,0 +1,288 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms.functional as ttf
+import diffusion_policy.model.common.tensor_util as tu
+
+class CropRandomizer(nn.Module):
+    """
+    Randomly sample crops at input, and then average across crop features at output.
+    """
+    def __init__(
+        self,
+        input_shape,
+        crop_height, 
+        crop_width, 
+        num_crops=1,
+        pos_enc=False,
+    ):
+        """
+        Args:
+            input_shape (tuple, list): shape of input (not including batch dimension)
+            crop_height (int): crop height
+            crop_width (int): crop width
+            num_crops (int): number of random crops to take
+            pos_enc (bool): if True, add 2 channels to the output to encode the spatial
+                location of the cropped pixels in the source image
+        """
+        super().__init__()
+
+        assert len(input_shape) == 3 # (C, H, W)
+        assert crop_height < input_shape[1]
+        assert crop_width < input_shape[2]
+
+        self.input_shape = input_shape
+        self.crop_height = crop_height
+        self.crop_width = crop_width
+        self.num_crops = num_crops
+        self.pos_enc = pos_enc
+
+    def output_shape_in(self, input_shape=None):
+        """
+        Function to compute output shape from inputs to this module. Corresponds to
+        the @forward_in operation, where raw inputs (usually observation modalities)
+        are passed in.
+
+        Args:
+            input_shape (iterable of int): shape of input. Does not include batch dimension.
+                Some modules may not need this argument, if their output does not depend 
+                on the size of the input, or if they assume fixed size input.
+
+        Returns:
+            out_shape ([int]): list of integers corresponding to output shape
+        """
+
+        # outputs are shape (C, CH, CW), or maybe C + 2 if using position encoding, because
+        # the number of crops are reshaped into the batch dimension, increasing the batch
+        # size from B to B * N
+        out_c = self.input_shape[0] + 2 if self.pos_enc else self.input_shape[0]
+        return [out_c, self.crop_height, self.crop_width]
+
+    def output_shape_out(self, input_shape=None):
+        """
+        Function to compute output shape from inputs to this module. Corresponds to
+        the @forward_out operation, where processed inputs (usually encoded observation
+        modalities) are passed in.
+
+        Args:
+            input_shape (iterable of int): shape of input. Does not include batch dimension.
+                Some modules may not need this argument, if their output does not depend 
+                on the size of the input, or if they assume fixed size input.
+
+        Returns:
+            out_shape ([int]): list of integers corresponding to output shape
+        """
+        
+        # since the forward_out operation splits [B * N, ...] -> [B, N, ...]
+        # and then pools to result in [B, ...], only the batch dimension changes,
+        # and so the other dimensions retain their shape.
+        return list(input_shape)
+
+    def forward_in(self, inputs):
+        """
+        Samples N random crops for each input in the batch, and then reshapes
+        inputs to [B * N, ...].
+        """
+        assert len(inputs.shape) >= 3 # must have at least (C, H, W) dimensions
+        if self.training:
+            # generate random crops
+            out, _ = sample_random_image_crops(
+                images=inputs,
+                crop_height=self.crop_height, 
+                crop_width=self.crop_width, 
+                num_crops=self.num_crops,
+                pos_enc=self.pos_enc,
+            )
+            # [B, N, ...] -> [B * N, ...]
+            return tu.join_dimensions(out, 0, 1)
+        else:
+            # take center crop during eval
+            out = ttf.center_crop(img=inputs, output_size=(
+                self.crop_height, self.crop_width))
+            if self.num_crops > 1:
+                B,C,H,W = out.shape
+                out = out.unsqueeze(1).expand(B,self.num_crops,C,H,W).reshape(-1,C,H,W)
+                # [B * N, ...]
+            return out
+
+    def forward_out(self, inputs):
+        """
+        Splits the outputs from shape [B * N, ...] -> [B, N, ...] and then average across N
+        to result in shape [B, ...] to make sure the network output is consistent with
+        what would have happened if there were no randomization.
+        """
+        if self.num_crops <= 1:
+            return inputs
+        else:
+            batch_size = (inputs.shape[0] // self.num_crops)
+            out = tu.reshape_dimensions(inputs, begin_axis=0, end_axis=0, 
+                target_dims=(batch_size, self.num_crops))
+            return out.mean(dim=1)
+    
+    def forward(self, inputs):
+        return self.forward_in(inputs)
+
+    def __repr__(self):
+        """Pretty print network."""
+        header = '{}'.format(str(self.__class__.__name__))
+        msg = header + "(input_shape={}, crop_size=[{}, {}], num_crops={})".format(
+            self.input_shape, self.crop_height, self.crop_width, self.num_crops)
+        return msg
+
+
+def crop_image_from_indices(images, crop_indices, crop_height, crop_width):
+    """
+    Crops images at the locations specified by @crop_indices. Crops will be 
+    taken across all channels.
+
+    Args:
+        images (torch.Tensor): batch of images of shape [..., C, H, W]
+
+        crop_indices (torch.Tensor): batch of indices of shape [..., N, 2] where
+            N is the number of crops to take per image and each entry corresponds
+            to the pixel height and width of where to take the crop. Note that
+            the indices can also be of shape [..., 2] if only 1 crop should
+            be taken per image. Leading dimensions must be consistent with
+            @images argument. Each index specifies the top left of the crop.
+            Values must be in range [0, H - CH - 1] x [0, W - CW - 1] where
+            H and W are the height and width of @images and CH and CW are
+            @crop_height and @crop_width.
+
+        crop_height (int): height of crop to take
+
+        crop_width (int): width of crop to take
+
+    Returns:
+        crops (torch.Tesnor): cropped images of shape [..., C, @crop_height, @crop_width]
+    """
+
+    # make sure length of input shapes is consistent
+    assert crop_indices.shape[-1] == 2
+    ndim_im_shape = len(images.shape)
+    ndim_indices_shape = len(crop_indices.shape)
+    assert (ndim_im_shape == ndim_indices_shape + 1) or (ndim_im_shape == ndim_indices_shape + 2)
+
+    # maybe pad so that @crop_indices is shape [..., N, 2]
+    is_padded = False
+    if ndim_im_shape == ndim_indices_shape + 2:
+        crop_indices = crop_indices.unsqueeze(-2)
+        is_padded = True
+
+    # make sure leading dimensions between images and indices are consistent
+    assert images.shape[:-3] == crop_indices.shape[:-2]
+
+    device = images.device
+    image_c, image_h, image_w = images.shape[-3:]
+    num_crops = crop_indices.shape[-2]
+
+    # make sure @crop_indices are in valid range
+    assert (crop_indices[..., 0] >= 0).all().item()
+    assert (crop_indices[..., 0] < (image_h - crop_height)).all().item()
+    assert (crop_indices[..., 1] >= 0).all().item()
+    assert (crop_indices[..., 1] < (image_w - crop_width)).all().item()
+
+    # convert each crop index (ch, cw) into a list of pixel indices that correspond to the entire window.
+
+    # 2D index array with columns [0, 1, ..., CH - 1] and shape [CH, CW]
+    crop_ind_grid_h = torch.arange(crop_height).to(device)
+    crop_ind_grid_h = tu.unsqueeze_expand_at(crop_ind_grid_h, size=crop_width, dim=-1)
+    # 2D index array with rows [0, 1, ..., CW - 1] and shape [CH, CW]
+    crop_ind_grid_w = torch.arange(crop_width).to(device)
+    crop_ind_grid_w = tu.unsqueeze_expand_at(crop_ind_grid_w, size=crop_height, dim=0)
+    # combine into shape [CH, CW, 2]
+    crop_in_grid = torch.cat((crop_ind_grid_h.unsqueeze(-1), crop_ind_grid_w.unsqueeze(-1)), dim=-1)
+
+    # Add above grid with the offset index of each sampled crop to get 2d indices for each crop.
+    # After broadcasting, this will be shape [..., N, CH, CW, 2] and each crop has a [CH, CW, 2]
+    # shape array that tells us which pixels from the corresponding source image to grab.
+    grid_reshape = [1] * len(crop_indices.shape[:-1]) + [crop_height, crop_width, 2]
+    all_crop_inds = crop_indices.unsqueeze(-2).unsqueeze(-2) + crop_in_grid.reshape(grid_reshape)
+
+    # For using @torch.gather, convert to flat indices from 2D indices, and also
+    # repeat across the channel dimension. To get flat index of each pixel to grab for 
+    # each sampled crop, we just use the mapping: ind = h_ind * @image_w + w_ind
+    all_crop_inds = all_crop_inds[..., 0] * image_w + all_crop_inds[..., 1] # shape [..., N, CH, CW]
+    all_crop_inds = tu.unsqueeze_expand_at(all_crop_inds, size=image_c, dim=-3) # shape [..., N, C, CH, CW]
+    all_crop_inds = tu.flatten(all_crop_inds, begin_axis=-2) # shape [..., N, C, CH * CW]
+
+    # Repeat and flatten the source images -> [..., N, C, H * W] and then use gather to index with crop pixel inds
+    images_to_crop = tu.unsqueeze_expand_at(images, size=num_crops, dim=-4)
+    images_to_crop = tu.flatten(images_to_crop, begin_axis=-2)
+    crops = torch.gather(images_to_crop, dim=-1, index=all_crop_inds)
+    # [..., N, C, CH * CW] -> [..., N, C, CH, CW]
+    reshape_axis = len(crops.shape) - 1
+    crops = tu.reshape_dimensions(crops, begin_axis=reshape_axis, end_axis=reshape_axis, 
+                    target_dims=(crop_height, crop_width))
+
+    if is_padded:
+        # undo padding -> [..., C, CH, CW]
+        crops = crops.squeeze(-4)
+    return crops
+
+def sample_random_image_crops(images, crop_height, crop_width, num_crops, pos_enc=False):
+    """
+    For each image, randomly sample @num_crops crops of size (@crop_height, @crop_width), from
+    @images.
+
+    Args:
+        images (torch.Tensor): batch of images of shape [..., C, H, W]
+
+        crop_height (int): height of crop to take
+        
+        crop_width (int): width of crop to take
+
+        num_crops (n): number of crops to sample
+
+        pos_enc (bool): if True, also add 2 channels to the outputs that gives a spatial 
+            encoding of the original source pixel locations. This means that the
+            output crops will contain information about where in the source image 
+            it was sampled from.
+
+    Returns:
+        crops (torch.Tensor): crops of shape (..., @num_crops, C, @crop_height, @crop_width) 
+            if @pos_enc is False, otherwise (..., @num_crops, C + 2, @crop_height, @crop_width)
+
+        crop_inds (torch.Tensor): sampled crop indices of shape (..., N, 2)
+    """
+    device = images.device
+
+    # maybe add 2 channels of spatial encoding to the source image
+    source_im = images
+    if pos_enc:
+        # spatial encoding [y, x] in [0, 1]
+        h, w = source_im.shape[-2:]
+        pos_y, pos_x = torch.meshgrid(torch.arange(h), torch.arange(w))
+        pos_y = pos_y.float().to(device) / float(h)
+        pos_x = pos_x.float().to(device) / float(w)
+        position_enc = torch.stack((pos_y, pos_x)) # shape [C, H, W]
+
+        # unsqueeze and expand to match leading dimensions -> shape [..., C, H, W]
+        leading_shape = source_im.shape[:-3]
+        position_enc = position_enc[(None,) * len(leading_shape)]
+        position_enc = position_enc.expand(*leading_shape, -1, -1, -1)
+
+        # concat across channel dimension with input
+        source_im = torch.cat((source_im, position_enc), dim=-3)
+
+    # make sure sample boundaries ensure crops are fully within the images
+    image_c, image_h, image_w = source_im.shape[-3:]
+    max_sample_h = image_h - crop_height
+    max_sample_w = image_w - crop_width
+
+    # Sample crop locations for all tensor dimensions up to the last 3, which are [C, H, W].
+    # Each gets @num_crops samples - typically this will just be the batch dimension (B), so 
+    # we will sample [B, N] indices, but this supports having more than one leading dimension,
+    # or possibly no leading dimension.
+    #
+    # Trick: sample in [0, 1) with rand, then re-scale to [0, M) and convert to long to get sampled ints
+    crop_inds_h = (max_sample_h * torch.rand(*source_im.shape[:-3], num_crops).to(device)).long()
+    crop_inds_w = (max_sample_w * torch.rand(*source_im.shape[:-3], num_crops).to(device)).long()
+    crop_inds = torch.cat((crop_inds_h.unsqueeze(-1), crop_inds_w.unsqueeze(-1)), dim=-1) # shape [..., N, 2]
+
+    crops = crop_image_from_indices(
+        images=source_im, 
+        crop_indices=crop_inds, 
+        crop_height=crop_height, 
+        crop_width=crop_width, 
+    )
+
+    return crops, crop_inds
diff --git a/diffusion_policy/model/vision/model_getter.py b/diffusion_policy/model/vision/model_getter.py
new file mode 100644
index 0000000..6cacd73
--- /dev/null
+++ b/diffusion_policy/model/vision/model_getter.py
@@ -0,0 +1,28 @@
+import torch
+import torchvision
+
+def get_resnet(name, weights=None, **kwargs):
+    """
+    name: resnet18, resnet34, resnet50
+    weights: "IMAGENET1K_V1", "r3m"
+    """
+    # load r3m weights
+    if (weights == "r3m") or (weights == "R3M"):
+        return get_r3m(name=name, **kwargs)
+
+    func = getattr(torchvision.models, name)
+    resnet = func(weights=weights, **kwargs)
+    resnet.fc = torch.nn.Identity()
+    return resnet
+
+def get_r3m(name, **kwargs):
+    """
+    name: resnet18, resnet34, resnet50
+    """
+    import r3m
+    r3m.device = 'cpu'
+    model = r3m.load_r3m(name)
+    r3m_model = model.module
+    resnet_model = r3m_model.convnet
+    resnet_model = resnet_model.to('cpu')
+    return resnet_model
diff --git a/diffusion_policy/model/vision/multi_image_obs_encoder.py b/diffusion_policy/model/vision/multi_image_obs_encoder.py
new file mode 100644
index 0000000..de6aa65
--- /dev/null
+++ b/diffusion_policy/model/vision/multi_image_obs_encoder.py
@@ -0,0 +1,195 @@
+from typing import Dict, Tuple, Union
+import copy
+import torch
+import torch.nn as nn
+import torchvision
+from diffusion_policy.model.vision.crop_randomizer import CropRandomizer
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+from diffusion_policy.common.pytorch_util import dict_apply, replace_submodules
+
+
+class MultiImageObsEncoder(ModuleAttrMixin):
+    def __init__(self,
+            shape_meta: dict,
+            rgb_model: Union[nn.Module, Dict[str,nn.Module]],
+            resize_shape: Union[Tuple[int,int], Dict[str,tuple], None]=None,
+            crop_shape: Union[Tuple[int,int], Dict[str,tuple], None]=None,
+            random_crop: bool=True,
+            # replace BatchNorm with GroupNorm
+            use_group_norm: bool=False,
+            # use single rgb model for all rgb inputs
+            share_rgb_model: bool=False,
+            # renormalize rgb input with imagenet normalization
+            # assuming input in [0,1]
+            imagenet_norm: bool=False
+        ):
+        """
+        Assumes rgb input: B,C,H,W
+        Assumes low_dim input: B,D
+        """
+        super().__init__()
+
+        rgb_keys = list()
+        low_dim_keys = list()
+        key_model_map = nn.ModuleDict()
+        key_transform_map = nn.ModuleDict()
+        key_shape_map = dict()
+
+        # handle sharing vision backbone
+        if share_rgb_model:
+            assert isinstance(rgb_model, nn.Module)
+            key_model_map['rgb'] = rgb_model
+
+        obs_shape_meta = shape_meta['obs']
+        for key, attr in obs_shape_meta.items():
+            shape = tuple(attr['shape'])
+            type = attr.get('type', 'low_dim')
+            key_shape_map[key] = shape
+            if type == 'rgb':
+                rgb_keys.append(key)
+                # configure model for this key
+                this_model = None
+                if not share_rgb_model:
+                    if isinstance(rgb_model, dict):
+                        # have provided model for each key
+                        this_model = rgb_model[key]
+                    else:
+                        assert isinstance(rgb_model, nn.Module)
+                        # have a copy of the rgb model
+                        this_model = copy.deepcopy(rgb_model)
+                
+                if this_model is not None:
+                    if use_group_norm:
+                        this_model = replace_submodules(
+                            root_module=this_model,
+                            predicate=lambda x: isinstance(x, nn.BatchNorm2d),
+                            func=lambda x: nn.GroupNorm(
+                                num_groups=x.num_features//16, 
+                                num_channels=x.num_features)
+                        )
+                    key_model_map[key] = this_model
+                
+                # configure resize
+                input_shape = shape
+                this_resizer = nn.Identity()
+                if resize_shape is not None:
+                    if isinstance(resize_shape, dict):
+                        h, w = resize_shape[key]
+                    else:
+                        h, w = resize_shape
+                    this_resizer = torchvision.transforms.Resize(
+                        size=(h,w)
+                    )
+                    input_shape = (shape[0],h,w)
+
+                # configure randomizer
+                this_randomizer = nn.Identity()
+                if crop_shape is not None:
+                    if isinstance(crop_shape, dict):
+                        h, w = crop_shape[key]
+                    else:
+                        h, w = crop_shape
+                    if random_crop:
+                        this_randomizer = CropRandomizer(
+                            input_shape=input_shape,
+                            crop_height=h,
+                            crop_width=w,
+                            num_crops=1,
+                            pos_enc=False
+                        )
+                    else:
+                        this_normalizer = torchvision.transforms.CenterCrop(
+                            size=(h,w)
+                        )
+                # configure normalizer
+                this_normalizer = nn.Identity()
+                if imagenet_norm:
+                    this_normalizer = torchvision.transforms.Normalize(
+                        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                
+                this_transform = nn.Sequential(this_resizer, this_randomizer, this_normalizer)
+                key_transform_map[key] = this_transform
+            elif type == 'low_dim':
+                low_dim_keys.append(key)
+            else:
+                raise RuntimeError(f"Unsupported obs type: {type}")
+        rgb_keys = sorted(rgb_keys)
+        low_dim_keys = sorted(low_dim_keys)
+
+        self.shape_meta = shape_meta
+        self.key_model_map = key_model_map
+        self.key_transform_map = key_transform_map
+        self.share_rgb_model = share_rgb_model
+        self.rgb_keys = rgb_keys
+        self.low_dim_keys = low_dim_keys
+        self.key_shape_map = key_shape_map
+
+    def forward(self, obs_dict):
+        batch_size = None
+        features = list()
+        # process rgb input
+        if self.share_rgb_model:
+            # pass all rgb obs to rgb model
+            imgs = list()
+            for key in self.rgb_keys:
+                img = obs_dict[key]
+                if batch_size is None:
+                    batch_size = img.shape[0]
+                else:
+                    assert batch_size == img.shape[0]
+                assert img.shape[1:] == self.key_shape_map[key]
+                img = self.key_transform_map[key](img)
+                imgs.append(img)
+            # (N*B,C,H,W)
+            imgs = torch.cat(imgs, dim=0)
+            # (N*B,D)
+            feature = self.key_model_map['rgb'](imgs)
+            # (N,B,D)
+            feature = feature.reshape(-1,batch_size,*feature.shape[1:])
+            # (B,N,D)
+            feature = torch.moveaxis(feature,0,1)
+            # (B,N*D)
+            feature = feature.reshape(batch_size,-1)
+            features.append(feature)
+        else:
+            # run each rgb obs to independent models
+            for key in self.rgb_keys:
+                img = obs_dict[key]
+                if batch_size is None:
+                    batch_size = img.shape[0]
+                else:
+                    assert batch_size == img.shape[0]
+                assert img.shape[1:] == self.key_shape_map[key]
+                img = self.key_transform_map[key](img)
+                feature = self.key_model_map[key](img)
+                features.append(feature)
+        
+        # process lowdim input
+        for key in self.low_dim_keys:
+            data = obs_dict[key]
+            if batch_size is None:
+                batch_size = data.shape[0]
+            else:
+                assert batch_size == data.shape[0]
+            assert data.shape[1:] == self.key_shape_map[key]
+            features.append(data)
+        
+        # concatenate all features
+        result = torch.cat(features, dim=-1)
+        return result
+    
+    @torch.no_grad()
+    def output_shape(self):
+        example_obs_dict = dict()
+        obs_shape_meta = self.shape_meta['obs']
+        batch_size = 1
+        for key, attr in obs_shape_meta.items():
+            shape = tuple(attr['shape'])
+            this_obs = torch.zeros(
+                (batch_size,) + shape, 
+                dtype=self.dtype,
+                device=self.device)
+            example_obs_dict[key] = this_obs
+        example_output = self.forward(example_obs_dict)
+        output_shape = example_output.shape[1:]
+        return output_shape
diff --git a/diffusion_policy/policy/base_image_policy.py b/diffusion_policy/policy/base_image_policy.py
new file mode 100644
index 0000000..383e180
--- /dev/null
+++ b/diffusion_policy/policy/base_image_policy.py
@@ -0,0 +1,25 @@
+from typing import Dict
+import torch
+import torch.nn as nn
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+
+class BaseImagePolicy(ModuleAttrMixin):
+    # init accepts keyword argument shape_meta, see config/task/*_image.yaml
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict:
+            str: B,To,*
+        return: B,Ta,Da
+        """
+        raise NotImplementedError()
+
+    # reset state for stateful policies
+    def reset(self):
+        pass
+
+    # ========== training ===========
+    # no standard training interface except setting normalizer
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        raise NotImplementedError()
diff --git a/diffusion_policy/policy/base_lowdim_policy.py b/diffusion_policy/policy/base_lowdim_policy.py
new file mode 100644
index 0000000..8bbaafc
--- /dev/null
+++ b/diffusion_policy/policy/base_lowdim_policy.py
@@ -0,0 +1,36 @@
+from typing import Dict
+import torch
+import torch.nn as nn
+from diffusion_policy.model.common.module_attr_mixin import ModuleAttrMixin
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+
+class BaseLowdimPolicy(ModuleAttrMixin):  
+    # ========= inference  ============
+    # also as self.device and self.dtype for inference device transfer
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict:
+            obs: B,To,Do
+        return: 
+            action: B,Ta,Da
+        To = 3
+        Ta = 4
+        T = 6
+        |o|o|o|
+        | | |a|a|a|a|
+        |o|o|
+        | |a|a|a|a|a|
+        | | | | |a|a|
+        """
+        raise NotImplementedError()
+
+    # reset state for stateful policies
+    def reset(self):
+        pass
+
+    # ========== training ===========
+    # no standard training interface except setting normalizer
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        raise NotImplementedError()
+
+    
\ No newline at end of file
diff --git a/diffusion_policy/policy/bet_lowdim_policy.py b/diffusion_policy/policy/bet_lowdim_policy.py
new file mode 100644
index 0000000..8464a2e
--- /dev/null
+++ b/diffusion_policy/policy/bet_lowdim_policy.py
@@ -0,0 +1,127 @@
+from typing import Dict, Tuple
+import torch
+import torch.nn as nn
+from omegaconf import OmegaConf
+import torch.nn.functional as F
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.model.bet.action_ae.discretizers.k_means import KMeansDiscretizer
+from diffusion_policy.model.bet.latent_generators.mingpt import MinGPT
+from diffusion_policy.model.bet.utils import eval_mode
+
+class BETLowdimPolicy(BaseLowdimPolicy):
+    def __init__(self, 
+            action_ae: KMeansDiscretizer, 
+            obs_encoding_net: nn.Module, 
+            state_prior: MinGPT,
+            horizon,
+            n_action_steps,
+            n_obs_steps):
+        super().__init__()
+    
+        self.normalizer = LinearNormalizer()
+        self.action_ae = action_ae
+        self.obs_encoding_net = obs_encoding_net
+        self.state_prior = state_prior
+        self.horizon = horizon
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+
+    # ========= inference  ============
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'obs' in obs_dict
+        assert 'past_action' not in obs_dict # not implemented yet
+        nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
+        B, _, Do = nobs.shape
+        To = self.n_obs_steps
+        T = self.horizon
+
+        # pad To to T
+        obs = torch.full((B,T,Do), -2, dtype=nobs.dtype, device=nobs.device)
+        obs[:,:To,:] = nobs[:,:To,:]
+
+        # (B,T,Do)
+        enc_obs = self.obs_encoding_net(obs)
+
+        # Sample latents from the prior
+        latents, offsets = self.state_prior.generate_latents(enc_obs)
+
+        # un-descritize
+        naction_pred = self.action_ae.decode_actions(
+            latent_action_batch=(latents, offsets)
+        )
+        # (B,T,Da)
+
+        # un-normalize
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        start = To - 1
+        end = start + self.n_action_steps
+        action = action_pred[:,start:end]
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+    
+    def fit_action_ae(self, input_actions: torch.Tensor):
+        self.action_ae.fit_discretizer(input_actions=input_actions)
+    
+    def get_latents(self, latent_collection_loader):
+        training_latents = list()
+        with eval_mode(self.action_ae, self.obs_encoding_net, no_grad=True):
+            for observations, action, mask in latent_collection_loader:
+                obs, act = observations.to(self.device, non_blocking=True), action.to(self.device, non_blocking=True)
+                enc_obs = self.obs_encoding_net(obs)
+                latent = self.action_ae.encode_into_latent(act, enc_obs)
+                reconstructed_action = self.action_ae.decode_actions(
+                    latent,
+                    enc_obs,
+                )
+                total_mse_loss += F.mse_loss(act, reconstructed_action, reduction="sum")
+                if type(latent) == tuple:
+                    # serialize into tensor; assumes last dim is latent dim
+                    detached_latents = tuple(x.detach() for x in latent)
+                    training_latents.append(torch.cat(detached_latents, dim=-1))
+                else:
+                    training_latents.append(latent.detach())
+        training_latents_tensor = torch.cat(training_latents, dim=0)
+        return training_latents_tensor
+
+    def get_optimizer(
+            self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
+        ) -> torch.optim.Optimizer:
+        return self.state_prior.get_optimizer(
+                weight_decay=weight_decay, 
+                learning_rate=learning_rate, 
+                betas=tuple(betas))
+    
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nbatch = self.normalizer.normalize(batch)
+        obs = nbatch['obs']
+        action = nbatch['action']
+
+        # mask out observations after n_obs_steps
+        To = self.n_obs_steps
+        obs[:,To:,:] = -2 # (normal obs range [-1,1])
+
+        enc_obs = self.obs_encoding_net(obs)
+        latent = self.action_ae.encode_into_latent(action, enc_obs)
+        _, loss, loss_components = self.state_prior.get_latent_and_loss(
+            obs_rep=enc_obs,
+            target_latents=latent,
+            return_loss_components=True,
+        )
+        return loss, loss_components
diff --git a/diffusion_policy/policy/diffusion_transformer_hybrid_image_policy.py b/diffusion_policy/policy/diffusion_transformer_hybrid_image_policy.py
new file mode 100644
index 0000000..b5aae29
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_transformer_hybrid_image_policy.py
@@ -0,0 +1,385 @@
+from typing import Dict, Tuple
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.model.diffusion.transformer_for_diffusion import TransformerForDiffusion
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+from diffusion_policy.common.robomimic_config_util import get_robomimic_config
+from robomimic.algo import algo_factory
+from robomimic.algo.algo import PolicyAlgo
+import robomimic.utils.obs_utils as ObsUtils
+import robomimic.models.base_nets as rmbn
+import diffusion_policy.model.vision.crop_randomizer as dmvc
+from diffusion_policy.common.pytorch_util import dict_apply, replace_submodules
+
+
+class DiffusionTransformerHybridImagePolicy(BaseImagePolicy):
+    def __init__(self, 
+            shape_meta: dict,
+            noise_scheduler: DDPMScheduler,
+            # task params
+            horizon, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            # image
+            crop_shape=(76, 76),
+            obs_encoder_group_norm=False,
+            eval_fixed_crop=False,
+            # arch
+            n_layer=8,
+            n_cond_layers=0,
+            n_head=4,
+            n_emb=256,
+            p_drop_emb=0.0,
+            p_drop_attn=0.3,
+            causal_attn=True,
+            time_as_cond=True,
+            obs_as_cond=True,
+            pred_action_steps_only=False,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+
+        # parse shape_meta
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        obs_shape_meta = shape_meta['obs']
+        obs_config = {
+            'low_dim': [],
+            'rgb': [],
+            'depth': [],
+            'scan': []
+        }
+        obs_key_shapes = dict()
+        for key, attr in obs_shape_meta.items():
+            shape = attr['shape']
+            obs_key_shapes[key] = list(shape)
+
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                obs_config['rgb'].append(key)
+            elif type == 'low_dim':
+                obs_config['low_dim'].append(key)
+            else:
+                raise RuntimeError(f"Unsupported obs type: {type}")
+
+        # get raw robomimic config
+        config = get_robomimic_config(
+            algo_name='bc_rnn',
+            hdf5_type='image',
+            task_name='square',
+            dataset_type='ph')
+        
+        with config.unlocked():
+            # set config with shape_meta
+            config.observation.modalities.obs = obs_config
+
+            if crop_shape is None:
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality['obs_randomizer_class'] = None
+            else:
+                # set random crop parameter
+                ch, cw = crop_shape
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality.obs_randomizer_kwargs.crop_height = ch
+                        modality.obs_randomizer_kwargs.crop_width = cw
+
+        # init global state
+        ObsUtils.initialize_obs_utils_with_config(config)
+
+        # load model
+        policy: PolicyAlgo = algo_factory(
+                algo_name=config.algo_name,
+                config=config,
+                obs_key_shapes=obs_key_shapes,
+                ac_dim=action_dim,
+                device='cpu',
+            )
+
+        obs_encoder = policy.nets['policy'].nets['encoder'].nets['obs']
+        
+        if obs_encoder_group_norm:
+            # replace batch norm with group norm
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, nn.BatchNorm2d),
+                func=lambda x: nn.GroupNorm(
+                    num_groups=x.num_features//16, 
+                    num_channels=x.num_features)
+            )
+            # obs_encoder.obs_nets['agentview_image'].nets[0].nets
+        
+        # obs_encoder.obs_randomizers['agentview_image']
+        if eval_fixed_crop:
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, rmbn.CropRandomizer),
+                func=lambda x: dmvc.CropRandomizer(
+                    input_shape=x.input_shape,
+                    crop_height=x.crop_height,
+                    crop_width=x.crop_width,
+                    num_crops=x.num_crops,
+                    pos_enc=x.pos_enc
+                )
+            )
+
+        # create diffusion model
+        obs_feature_dim = obs_encoder.output_shape()[0]
+        input_dim = action_dim if obs_as_cond else (obs_feature_dim + action_dim)
+        output_dim = input_dim
+        cond_dim = obs_feature_dim if obs_as_cond else 0
+
+        model = TransformerForDiffusion(
+            input_dim=input_dim,
+            output_dim=output_dim,
+            horizon=horizon,
+            n_obs_steps=n_obs_steps,
+            cond_dim=cond_dim,
+            n_layer=n_layer,
+            n_head=n_head,
+            n_emb=n_emb,
+            p_drop_emb=p_drop_emb,
+            p_drop_attn=p_drop_attn,
+            causal_attn=causal_attn,
+            time_as_cond=time_as_cond,
+            obs_as_cond=obs_as_cond,
+            n_cond_layers=n_cond_layers
+        )
+
+        self.obs_encoder = obs_encoder
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if (obs_as_cond) else obs_feature_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.obs_feature_dim = obs_feature_dim
+        self.action_dim = action_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.obs_as_cond = obs_as_cond
+        self.pred_action_steps_only = pred_action_steps_only
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            cond=None, generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'past_action' not in obs_dict # not implemented yet
+        # normalize input
+        nobs = self.normalizer.normalize(obs_dict)
+        value = next(iter(nobs.values()))
+        B, To = value.shape[:2]
+        T = self.horizon
+        Da = self.action_dim
+        Do = self.obs_feature_dim
+        To = self.n_obs_steps
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # handle different ways of passing observation
+        cond = None
+        cond_data = None
+        cond_mask = None
+        if self.obs_as_cond:
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, To, Do
+            cond = nobs_features.reshape(B, To, -1)
+            shape = (B, T, Da)
+            if self.pred_action_steps_only:
+                shape = (B, self.n_action_steps, Da)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            # condition through impainting
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(B, T, -1)
+            shape = (B, T, Da+Do)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = nobs_features
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            cond=cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        if self.pred_action_steps_only:
+            action = action_pred
+        else:
+            start = To - 1
+            end = start + self.n_action_steps
+            action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def get_optimizer(
+            self, 
+            transformer_weight_decay: float, 
+            obs_encoder_weight_decay: float,
+            learning_rate: float, 
+            betas: Tuple[float, float]
+        ) -> torch.optim.Optimizer:
+        optim_groups = self.model.get_optim_groups(
+            weight_decay=transformer_weight_decay)
+        optim_groups.append({
+            "params": self.obs_encoder.parameters(),
+            "weight_decay": obs_encoder_weight_decay
+        })
+        optimizer = torch.optim.AdamW(
+            optim_groups, lr=learning_rate, betas=betas
+        )
+        return optimizer
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nobs = self.normalizer.normalize(batch['obs'])
+        nactions = self.normalizer['action'].normalize(batch['action'])
+        batch_size = nactions.shape[0]
+        horizon = nactions.shape[1]
+        To = self.n_obs_steps
+
+        # handle different ways of passing observation
+        cond = None
+        trajectory = nactions
+        if self.obs_as_cond:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, 
+                lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            cond = nobs_features.reshape(batch_size, To, -1)
+            if self.pred_action_steps_only:
+                start = To - 1
+                end = start + self.n_action_steps
+                trajectory = nactions[:,start:end]
+        else:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, lambda x: x.reshape(-1, *x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(batch_size, horizon, -1)
+            trajectory = torch.cat([nactions, nobs_features], dim=-1).detach()
+
+        # generate impainting mask
+        if self.pred_action_steps_only:
+            condition_mask = torch.zeros_like(trajectory, dtype=torch.bool)
+        else:
+            condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = trajectory[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, cond)
+
+        pred_type = self.noise_scheduler.config.prediction_type 
+        if pred_type == 'epsilon':
+            target = noise
+        elif pred_type == 'sample':
+            target = trajectory
+        else:
+            raise ValueError(f"Unsupported prediction type {pred_type}")
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/diffusion_transformer_lowdim_policy.py b/diffusion_policy/policy/diffusion_transformer_lowdim_policy.py
new file mode 100644
index 0000000..7d0eff8
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_transformer_lowdim_policy.py
@@ -0,0 +1,234 @@
+from typing import Dict, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.model.diffusion.transformer_for_diffusion import TransformerForDiffusion
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+
+class DiffusionTransformerLowdimPolicy(BaseLowdimPolicy):
+    def __init__(self, 
+            model: TransformerForDiffusion,
+            noise_scheduler: DDPMScheduler,
+            horizon, 
+            obs_dim, 
+            action_dim, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            obs_as_cond=False,
+            pred_action_steps_only=False,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+        if pred_action_steps_only:
+            assert obs_as_cond
+
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if (obs_as_cond) else obs_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.obs_dim = obs_dim
+        self.action_dim = action_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.obs_as_cond = obs_as_cond
+        self.pred_action_steps_only = pred_action_steps_only
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            cond=None, generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+
+        assert 'obs' in obs_dict
+        assert 'past_action' not in obs_dict # not implemented yet
+        nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
+        B, _, Do = nobs.shape
+        To = self.n_obs_steps
+        assert Do == self.obs_dim
+        T = self.horizon
+        Da = self.action_dim
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # handle different ways of passing observation
+        cond = None
+        cond_data = None
+        cond_mask = None
+        if self.obs_as_cond:
+            cond = nobs[:,:To]
+            shape = (B, T, Da)
+            if self.pred_action_steps_only:
+                shape = (B, self.n_action_steps, Da)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            # condition through impainting
+            shape = (B, T, Da+Do)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = nobs[:,:To]
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            cond=cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        if self.pred_action_steps_only:
+            action = action_pred
+        else:
+            start = To - 1
+            end = start + self.n_action_steps
+            action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        if not self.obs_as_cond:
+            nobs_pred = nsample[...,Da:]
+            obs_pred = self.normalizer['obs'].unnormalize(nobs_pred)
+            action_obs_pred = obs_pred[:,start:end]
+            result['action_obs_pred'] = action_obs_pred
+            result['obs_pred'] = obs_pred
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def get_optimizer(
+            self, weight_decay: float, learning_rate: float, betas: Tuple[float, float]
+        ) -> torch.optim.Optimizer:
+        return self.model.configure_optimizers(
+                weight_decay=weight_decay, 
+                learning_rate=learning_rate, 
+                betas=tuple(betas))
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nbatch = self.normalizer.normalize(batch)
+        obs = nbatch['obs']
+        action = nbatch['action']
+
+        # handle different ways of passing observation
+        cond = None
+        trajectory = action
+        if self.obs_as_cond:
+            cond = obs[:,:self.n_obs_steps,:]
+            if self.pred_action_steps_only:
+                To = self.n_obs_steps
+                start = To - 1
+                end = start + self.n_action_steps
+                trajectory = action[:,start:end]
+        else:
+            trajectory = torch.cat([action, obs], dim=-1)
+        
+        # generate impainting mask
+        if self.pred_action_steps_only:
+            condition_mask = torch.zeros_like(trajectory, dtype=torch.bool)
+        else:
+            condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+        
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = trajectory[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, cond)
+
+        pred_type = self.noise_scheduler.config.prediction_type 
+        if pred_type == 'epsilon':
+            target = noise
+        elif pred_type == 'sample':
+            target = trajectory
+        else:
+            raise ValueError(f"Unsupported prediction type {pred_type}")
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/diffusion_unet_hybrid_image_policy.py b/diffusion_policy/policy/diffusion_unet_hybrid_image_policy.py
new file mode 100644
index 0000000..8f8c54f
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_unet_hybrid_image_policy.py
@@ -0,0 +1,351 @@
+from typing import Dict
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+from diffusion_policy.common.robomimic_config_util import get_robomimic_config
+from robomimic.algo import algo_factory
+from robomimic.algo.algo import PolicyAlgo
+import robomimic.utils.obs_utils as ObsUtils
+import robomimic.models.base_nets as rmbn
+import diffusion_policy.model.vision.crop_randomizer as dmvc
+from diffusion_policy.common.pytorch_util import dict_apply, replace_submodules
+
+
+class DiffusionUnetHybridImagePolicy(BaseImagePolicy):
+    def __init__(self, 
+            shape_meta: dict,
+            noise_scheduler: DDPMScheduler,
+            horizon, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            obs_as_global_cond=True,
+            crop_shape=(76, 76),
+            diffusion_step_embed_dim=256,
+            down_dims=(256,512,1024),
+            kernel_size=5,
+            n_groups=8,
+            cond_predict_scale=True,
+            obs_encoder_group_norm=False,
+            eval_fixed_crop=False,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+
+        # parse shape_meta
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        obs_shape_meta = shape_meta['obs']
+        obs_config = {
+            'low_dim': [],
+            'rgb': [],
+            'depth': [],
+            'scan': []
+        }
+        obs_key_shapes = dict()
+        for key, attr in obs_shape_meta.items():
+            shape = attr['shape']
+            obs_key_shapes[key] = list(shape)
+
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                obs_config['rgb'].append(key)
+            elif type == 'low_dim':
+                obs_config['low_dim'].append(key)
+            else:
+                raise RuntimeError(f"Unsupported obs type: {type}")
+
+        # get raw robomimic config
+        config = get_robomimic_config(
+            algo_name='bc_rnn',
+            hdf5_type='image',
+            task_name='square',
+            dataset_type='ph')
+        
+        with config.unlocked():
+            # set config with shape_meta
+            config.observation.modalities.obs = obs_config
+
+            if crop_shape is None:
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality['obs_randomizer_class'] = None
+            else:
+                # set random crop parameter
+                ch, cw = crop_shape
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality.obs_randomizer_kwargs.crop_height = ch
+                        modality.obs_randomizer_kwargs.crop_width = cw
+
+        # init global state
+        ObsUtils.initialize_obs_utils_with_config(config)
+
+        # load model
+        policy: PolicyAlgo = algo_factory(
+                algo_name=config.algo_name,
+                config=config,
+                obs_key_shapes=obs_key_shapes,
+                ac_dim=action_dim,
+                device='cpu',
+            )
+
+        obs_encoder = policy.nets['policy'].nets['encoder'].nets['obs']
+        
+        if obs_encoder_group_norm:
+            # replace batch norm with group norm
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, nn.BatchNorm2d),
+                func=lambda x: nn.GroupNorm(
+                    num_groups=x.num_features//16, 
+                    num_channels=x.num_features)
+            )
+            # obs_encoder.obs_nets['agentview_image'].nets[0].nets
+        
+        # obs_encoder.obs_randomizers['agentview_image']
+        if eval_fixed_crop:
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, rmbn.CropRandomizer),
+                func=lambda x: dmvc.CropRandomizer(
+                    input_shape=x.input_shape,
+                    crop_height=x.crop_height,
+                    crop_width=x.crop_width,
+                    num_crops=x.num_crops,
+                    pos_enc=x.pos_enc
+                )
+            )
+
+        # create diffusion model
+        obs_feature_dim = obs_encoder.output_shape()[0]
+        input_dim = action_dim + obs_feature_dim
+        global_cond_dim = None
+        if obs_as_global_cond:
+            input_dim = action_dim
+            global_cond_dim = obs_feature_dim * n_obs_steps
+
+        model = ConditionalUnet1D(
+            input_dim=input_dim,
+            local_cond_dim=None,
+            global_cond_dim=global_cond_dim,
+            diffusion_step_embed_dim=diffusion_step_embed_dim,
+            down_dims=down_dims,
+            kernel_size=kernel_size,
+            n_groups=n_groups,
+            cond_predict_scale=cond_predict_scale
+        )
+
+        self.obs_encoder = obs_encoder
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if obs_as_global_cond else obs_feature_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.obs_feature_dim = obs_feature_dim
+        self.action_dim = action_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.obs_as_global_cond = obs_as_global_cond
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+
+        print("Diffusion params: %e" % sum(p.numel() for p in self.model.parameters()))
+        print("Vision params: %e" % sum(p.numel() for p in self.obs_encoder.parameters()))
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            local_cond=None, global_cond=None,
+            generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, 
+                local_cond=local_cond, global_cond=global_cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'past_action' not in obs_dict # not implemented yet
+        # normalize input
+        nobs = self.normalizer.normalize(obs_dict)
+        value = next(iter(nobs.values()))
+        B, To = value.shape[:2]
+        T = self.horizon
+        Da = self.action_dim
+        Do = self.obs_feature_dim
+        To = self.n_obs_steps
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        if self.obs_as_global_cond:
+            # condition through global feature
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, Do
+            global_cond = nobs_features.reshape(B, -1)
+            # empty data for action
+            cond_data = torch.zeros(size=(B, T, Da), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            # condition through impainting
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(B, To, -1)
+            cond_data = torch.zeros(size=(B, T, Da+Do), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = nobs_features
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            local_cond=local_cond,
+            global_cond=global_cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        start = To - 1
+        end = start + self.n_action_steps
+        action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nobs = self.normalizer.normalize(batch['obs'])
+        nactions = self.normalizer['action'].normalize(batch['action'])
+        batch_size = nactions.shape[0]
+        horizon = nactions.shape[1]
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        trajectory = nactions
+        cond_data = trajectory
+        if self.obs_as_global_cond:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, 
+                lambda x: x[:,:self.n_obs_steps,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, Do
+            global_cond = nobs_features.reshape(batch_size, -1)
+        else:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, lambda x: x.reshape(-1, *x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(batch_size, horizon, -1)
+            cond_data = torch.cat([nactions, nobs_features], dim=-1)
+            trajectory = cond_data.detach()
+
+        # generate impainting mask
+        condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+        
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = cond_data[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, 
+            local_cond=local_cond, global_cond=global_cond)
+
+        pred_type = self.noise_scheduler.config.prediction_type 
+        if pred_type == 'epsilon':
+            target = noise
+        elif pred_type == 'sample':
+            target = trajectory
+        else:
+            raise ValueError(f"Unsupported prediction type {pred_type}")
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/diffusion_unet_image_policy.py b/diffusion_policy/policy/diffusion_unet_image_policy.py
new file mode 100644
index 0000000..2aaf876
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_unet_image_policy.py
@@ -0,0 +1,259 @@
+from typing import Dict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+from diffusion_policy.model.vision.multi_image_obs_encoder import MultiImageObsEncoder
+from diffusion_policy.common.pytorch_util import dict_apply
+
+class DiffusionUnetImagePolicy(BaseImagePolicy):
+    def __init__(self, 
+            shape_meta: dict,
+            noise_scheduler: DDPMScheduler,
+            obs_encoder: MultiImageObsEncoder,
+            horizon, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            obs_as_global_cond=True,
+            diffusion_step_embed_dim=256,
+            down_dims=(256,512,1024),
+            kernel_size=5,
+            n_groups=8,
+            cond_predict_scale=True,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+
+        # parse shapes
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        # get feature dim
+        obs_feature_dim = obs_encoder.output_shape()[0]
+
+        # create diffusion model
+        input_dim = action_dim + obs_feature_dim
+        global_cond_dim = None
+        if obs_as_global_cond:
+            input_dim = action_dim
+            global_cond_dim = obs_feature_dim * n_obs_steps
+
+        model = ConditionalUnet1D(
+            input_dim=input_dim,
+            local_cond_dim=None,
+            global_cond_dim=global_cond_dim,
+            diffusion_step_embed_dim=diffusion_step_embed_dim,
+            down_dims=down_dims,
+            kernel_size=kernel_size,
+            n_groups=n_groups,
+            cond_predict_scale=cond_predict_scale
+        )
+
+        self.obs_encoder = obs_encoder
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if obs_as_global_cond else obs_feature_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.obs_feature_dim = obs_feature_dim
+        self.action_dim = action_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.obs_as_global_cond = obs_as_global_cond
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            local_cond=None, global_cond=None,
+            generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, 
+                local_cond=local_cond, global_cond=global_cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'past_action' not in obs_dict # not implemented yet
+        # normalize input
+        nobs = self.normalizer.normalize(obs_dict)
+        value = next(iter(nobs.values()))
+        B, To = value.shape[:2]
+        T = self.horizon
+        Da = self.action_dim
+        Do = self.obs_feature_dim
+        To = self.n_obs_steps
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        if self.obs_as_global_cond:
+            # condition through global feature
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, Do
+            global_cond = nobs_features.reshape(B, -1)
+            # empty data for action
+            cond_data = torch.zeros(size=(B, T, Da), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            # condition through impainting
+            this_nobs = dict_apply(nobs, lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(B, To, -1)
+            cond_data = torch.zeros(size=(B, T, Da+Do), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = nobs_features
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            local_cond=local_cond,
+            global_cond=global_cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        start = To - 1
+        end = start + self.n_action_steps
+        action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nobs = self.normalizer.normalize(batch['obs'])
+        nactions = self.normalizer['action'].normalize(batch['action'])
+        batch_size = nactions.shape[0]
+        horizon = nactions.shape[1]
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        trajectory = nactions
+        cond_data = trajectory
+        if self.obs_as_global_cond:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, 
+                lambda x: x[:,:self.n_obs_steps,...].reshape(-1,*x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, Do
+            global_cond = nobs_features.reshape(batch_size, -1)
+        else:
+            # reshape B, T, ... to B*T
+            this_nobs = dict_apply(nobs, lambda x: x.reshape(-1, *x.shape[2:]))
+            nobs_features = self.obs_encoder(this_nobs)
+            # reshape back to B, T, Do
+            nobs_features = nobs_features.reshape(batch_size, horizon, -1)
+            cond_data = torch.cat([nactions, nobs_features], dim=-1)
+            trajectory = cond_data.detach()
+
+        # generate impainting mask
+        condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+        
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = cond_data[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, 
+            local_cond=local_cond, global_cond=global_cond)
+
+        pred_type = self.noise_scheduler.config.prediction_type 
+        if pred_type == 'epsilon':
+            target = noise
+        elif pred_type == 'sample':
+            target = trajectory
+        else:
+            raise ValueError(f"Unsupported prediction type {pred_type}")
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/diffusion_unet_lowdim_policy.py b/diffusion_policy/policy/diffusion_unet_lowdim_policy.py
new file mode 100644
index 0000000..c01ed7a
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_unet_lowdim_policy.py
@@ -0,0 +1,252 @@
+from typing import Dict
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+
+class DiffusionUnetLowdimPolicy(BaseLowdimPolicy):
+    def __init__(self, 
+            model: ConditionalUnet1D,
+            noise_scheduler: DDPMScheduler,
+            horizon, 
+            obs_dim, 
+            action_dim, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            obs_as_local_cond=False,
+            obs_as_global_cond=False,
+            pred_action_steps_only=False,
+            oa_step_convention=False,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+        assert not (obs_as_local_cond and obs_as_global_cond)
+        if pred_action_steps_only:
+            assert obs_as_global_cond
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if (obs_as_local_cond or obs_as_global_cond) else obs_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.obs_dim = obs_dim
+        self.action_dim = action_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.obs_as_local_cond = obs_as_local_cond
+        self.obs_as_global_cond = obs_as_global_cond
+        self.pred_action_steps_only = pred_action_steps_only
+        self.oa_step_convention = oa_step_convention
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            local_cond=None, global_cond=None,
+            generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, 
+                local_cond=local_cond, global_cond=global_cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+
+        assert 'obs' in obs_dict
+        assert 'past_action' not in obs_dict # not implemented yet
+        nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
+        B, _, Do = nobs.shape
+        To = self.n_obs_steps
+        assert Do == self.obs_dim
+        T = self.horizon
+        Da = self.action_dim
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        if self.obs_as_local_cond:
+            # condition through local feature
+            # all zero except first To timesteps
+            local_cond = torch.zeros(size=(B,T,Do), device=device, dtype=dtype)
+            local_cond[:,:To] = nobs[:,:To]
+            shape = (B, T, Da)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        elif self.obs_as_global_cond:
+            # condition throught global feature
+            global_cond = nobs[:,:To].reshape(nobs.shape[0], -1)
+            shape = (B, T, Da)
+            if self.pred_action_steps_only:
+                shape = (B, self.n_action_steps, Da)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            # condition through impainting
+            shape = (B, T, Da+Do)
+            cond_data = torch.zeros(size=shape, device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = nobs[:,:To]
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            local_cond=local_cond,
+            global_cond=global_cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        if self.pred_action_steps_only:
+            action = action_pred
+        else:
+            start = To
+            if self.oa_step_convention:
+                start = To - 1
+            end = start + self.n_action_steps
+            action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        if not (self.obs_as_local_cond or self.obs_as_global_cond):
+            nobs_pred = nsample[...,Da:]
+            obs_pred = self.normalizer['obs'].unnormalize(nobs_pred)
+            action_obs_pred = obs_pred[:,start:end]
+            result['action_obs_pred'] = action_obs_pred
+            result['obs_pred'] = obs_pred
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nbatch = self.normalizer.normalize(batch)
+        obs = nbatch['obs']
+        action = nbatch['action']
+
+        # handle different ways of passing observation
+        local_cond = None
+        global_cond = None
+        trajectory = action
+        if self.obs_as_local_cond:
+            # zero out observations after n_obs_steps
+            local_cond = obs
+            local_cond[:,self.n_obs_steps:,:] = 0
+        elif self.obs_as_global_cond:
+            global_cond = obs[:,:self.n_obs_steps,:].reshape(
+                obs.shape[0], -1)
+            if self.pred_action_steps_only:
+                To = self.n_obs_steps
+                start = To
+                if self.oa_step_convention:
+                    start = To - 1
+                end = start + self.n_action_steps
+                trajectory = action[:,start:end]
+        else:
+            trajectory = torch.cat([action, obs], dim=-1)
+
+        # generate impainting mask
+        if self.pred_action_steps_only:
+            condition_mask = torch.zeros_like(trajectory, dtype=torch.bool)
+        else:
+            condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+        
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = trajectory[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, 
+            local_cond=local_cond, global_cond=global_cond)
+
+        pred_type = self.noise_scheduler.config.prediction_type 
+        if pred_type == 'epsilon':
+            target = noise
+        elif pred_type == 'sample':
+            target = trajectory
+        else:
+            raise ValueError(f"Unsupported prediction type {pred_type}")
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/diffusion_unet_video_policy.py b/diffusion_policy/policy/diffusion_unet_video_policy.py
new file mode 100644
index 0000000..eed968a
--- /dev/null
+++ b/diffusion_policy/policy/diffusion_unet_video_policy.py
@@ -0,0 +1,313 @@
+from typing import Dict
+import copy
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
+
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.model.diffusion.conditional_unet1d import ConditionalUnet1D
+from diffusion_policy.model.diffusion.mask_generator import LowdimMaskGenerator
+from diffusion_policy.model.common.shape_util import get_output_shape
+from diffusion_policy.model.obs_encoder.temporal_aggregator import TemporalAggregator
+
+
+class DiffusionUnetVideoPolicy(BaseImagePolicy):
+    def __init__(self, 
+            shape_meta: dict,
+            noise_scheduler: DDPMScheduler,
+            rgb_net: nn.Module, # (B,T,C,H,W) -> (B,Do)
+            horizon, 
+            n_action_steps, 
+            n_obs_steps,
+            num_inference_steps=None,
+            lowdim_as_global_cond=True,
+            diffusion_step_embed_dim=256,
+            down_dims=(256,512,1024),
+            kernel_size=5,
+            n_groups=8,
+            cond_predict_scale=True,
+            # parameters for TemporalAggregator
+            channel_mults=(1,1),
+            n_blocks_per_level=1,
+            ta_kernel_size=3,
+            ta_n_groups=8,
+            # parameters passed to step
+            **kwargs):
+        super().__init__()
+
+        # parse shape_meta
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        obs_shape_meta = shape_meta['obs']
+
+        rgb_nets_map = nn.ModuleDict()
+        rgb_feature_dims = list()
+        lowdim_keys = list()
+        lowdim_input_dims = list()
+
+        for key, attr in obs_shape_meta.items():
+            shape = tuple(attr['shape'])
+            type = attr.get('type', 'lowdim')
+            if type == 'rgb':
+                # assign network for each rgb input
+                if len(rgb_nets_map) == 0:
+                    net = rgb_net
+                else:
+                    net = copy.deepcopy(rgb_net)
+                rgb_nets_map[key] = net
+
+                # video input with n_obs_steps timesteps
+                shape = (n_obs_steps,) + shape
+                # compute output shape
+                output_shape = get_output_shape(shape, net)
+                assert(len(output_shape) == 1)
+                rgb_feature_dims.append(output_shape[0])
+            elif type == 'lowdim':
+                lowdim_keys.append(key)
+                assert(len(shape) == 1)
+                lowdim_input_dims.append(shape[0])
+
+        # the order decides concatenation order
+        # dict preserves insertion order
+        # rgb and then lowdim
+        self.rgb_nets_map = rgb_nets_map
+        self.lowdim_keys = lowdim_keys
+        self.lowdim_net = None
+
+        # compute dimensions for diffusion
+        rgb_feature_dim = sum(rgb_feature_dims)
+        lowdim_input_dim = sum(lowdim_input_dims)
+        global_cond_dim = rgb_feature_dim
+        input_dim = action_dim
+        if lowdim_as_global_cond:
+            lowdim_net = TemporalAggregator(
+                in_channels=lowdim_input_dim,
+                channel_mults=channel_mults,
+                n_blocks_per_level=n_blocks_per_level,
+                kernel_size=ta_kernel_size,
+                n_groups=ta_n_groups
+            )
+            self.lowdim_net = lowdim_net
+            lowdim_feature_shape = get_output_shape(
+                (n_obs_steps, lowdim_input_dim), lowdim_net)
+            assert len(lowdim_feature_shape) == 1
+            global_cond_dim += lowdim_feature_shape[0]
+        else:
+            input_dim += lowdim_input_dim
+
+        model = ConditionalUnet1D(
+            input_dim=input_dim,
+            local_cond_dim=None,
+            global_cond_dim=global_cond_dim,
+            diffusion_step_embed_dim=diffusion_step_embed_dim,
+            down_dims=down_dims,
+            kernel_size=kernel_size,
+            n_groups=n_groups,
+            cond_predict_scale=cond_predict_scale
+        )
+
+        self.model = model
+        self.noise_scheduler = noise_scheduler
+        self.mask_generator = LowdimMaskGenerator(
+            action_dim=action_dim,
+            obs_dim=0 if lowdim_as_global_cond else lowdim_input_dim,
+            max_n_obs_steps=n_obs_steps,
+            fix_obs_steps=True,
+            action_visible=False
+        )
+        self.normalizer = LinearNormalizer()
+        self.horizon = horizon
+        self.action_dim = action_dim
+        self.lowdim_input_dim = lowdim_input_dim
+        self.n_action_steps = n_action_steps
+        self.n_obs_steps = n_obs_steps
+        self.lowdim_as_global_cond = lowdim_as_global_cond
+        self.kwargs = kwargs
+
+        if num_inference_steps is None:
+            num_inference_steps = noise_scheduler.config.num_train_timesteps
+        self.num_inference_steps = num_inference_steps
+    
+    # ========= inference  ============
+    def conditional_sample(self, 
+            condition_data, condition_mask,
+            local_cond=None, global_cond=None,
+            generator=None,
+            # keyword arguments to scheduler.step
+            **kwargs
+            ):
+        model = self.model
+        scheduler = self.noise_scheduler
+
+        trajectory = torch.randn(
+            size=condition_data.shape, 
+            dtype=condition_data.dtype,
+            device=condition_data.device,
+            generator=generator)
+    
+        # set step values
+        scheduler.set_timesteps(self.num_inference_steps)
+
+        for t in scheduler.timesteps:
+            # 1. apply conditioning
+            trajectory[condition_mask] = condition_data[condition_mask]
+
+            # 2. predict model output
+            model_output = model(trajectory, t, 
+                local_cond=local_cond, global_cond=global_cond)
+
+            # 3. compute previous image: x_t -> x_t-1
+            trajectory = scheduler.step(
+                model_output, t, trajectory, 
+                generator=generator,
+                **kwargs
+                ).prev_sample
+        
+        # finally make sure conditioning is enforced
+        trajectory[condition_mask] = condition_data[condition_mask]        
+
+        return trajectory
+
+
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'past_action' not in obs_dict # not implemented yet
+        # normalize input
+        nobs = self.normalizer.normalize(obs_dict)
+        value = next(iter(nobs.values()))
+        B, To = value.shape[:2]
+        T = self.horizon
+        Da = self.action_dim
+        To = self.n_obs_steps
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # run encoder first
+        # python 3.6+ dict preserves order
+        rgb_features_map = dict()
+        for key, net in self.rgb_nets_map.items():
+            rgb_features_map[key] = net(nobs[key][:,:self.n_obs_steps])
+        rgb_feature = torch.cat(list(rgb_features_map.values()), dim=-1)
+
+        lowdim_input = torch.cat([nobs[k] for k in self.lowdim_keys], dim=-1)
+
+        # handle different ways of passing lowdim
+        global_cond = None
+        cond_data = None
+        cond_mask = None
+        if self.lowdim_as_global_cond:
+            lowdim_feature = self.lowdim_net(lowdim_input[:,:To])
+            global_cond = torch.cat([rgb_feature, lowdim_feature], dim=-1)
+            # empty data for action
+            cond_data = torch.zeros(size=(B, T, Da), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+        else:
+            global_cond = rgb_feature
+            cond_data = torch.zeros(size=(B, T, Da+self.lowdim_input_dim), device=device, dtype=dtype)
+            cond_mask = torch.zeros_like(cond_data, dtype=torch.bool)
+            cond_data[:,:To,Da:] = lowdim_input[:,:To]
+            cond_mask[:,:To,Da:] = True
+
+        # run sampling
+        nsample = self.conditional_sample(
+            cond_data, 
+            cond_mask,
+            local_cond=None,
+            global_cond=global_cond,
+            **self.kwargs)
+        
+        # unnormalize prediction
+        naction_pred = nsample[...,:Da]
+        action_pred = self.normalizer['action'].unnormalize(naction_pred)
+
+        # get action
+        start = To
+        end = start + self.n_action_steps
+        action = action_pred[:,start:end]
+        
+        result = {
+            'action': action,
+            'action_pred': action_pred
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nobs = self.normalizer.normalize(batch['obs'])
+        nactions = self.normalizer['action'].normalize(batch['action'])
+
+        # run encoder first
+        # python 3.6+ dict preserves order
+        rgb_features_map = dict()
+        for key, net in self.rgb_nets_map.items():
+            rgb_features_map[key] = net(nobs[key][:,self.n_obs_steps:])
+        rgb_feature = torch.cat(list(rgb_features_map.values()), dim=-1)
+
+        lowdim_input = torch.cat([nobs[k] for k in self.lowdim_keys], axis=-1)
+        
+        # handle different ways of passing lowdim
+        global_cond = None
+        trajectory = None
+        cond_data = None
+        if self.lowdim_as_global_cond:
+            lowdim_feature = self.lowdim_net(lowdim_input[:,:self.n_obs_steps])
+            global_cond = torch.cat([rgb_feature, lowdim_feature], dim=-1)
+            trajectory = nactions
+            cond_data = nactions
+        else:
+            global_cond = rgb_feature
+            trajectory = torch.cat([nactions, lowdim_input], dim=-1)
+            cond_data = trajectory
+
+        # generate impainting mask
+        condition_mask = self.mask_generator(trajectory.shape)
+
+        # Sample noise that we'll add to the images
+        noise = torch.randn(trajectory.shape, device=trajectory.device)
+        bsz = trajectory.shape[0]
+        # Sample a random timestep for each image
+        timesteps = torch.randint(
+            0, self.noise_scheduler.config.num_train_timesteps, 
+            (bsz,), device=trajectory.device
+        ).long()
+        # Add noise to the clean images according to the noise magnitude at each timestep
+        # (this is the forward diffusion process)
+        noisy_trajectory = self.noise_scheduler.add_noise(
+            trajectory, noise, timesteps)
+        
+        # compute loss mask
+        loss_mask = ~condition_mask
+
+        # apply conditioning
+        noisy_trajectory[condition_mask] = cond_data[condition_mask]
+        
+        # Predict the noise residual
+        pred = self.model(noisy_trajectory, timesteps, 
+            local_cond=None, global_cond=global_cond)
+
+        if self.kwargs.get('predict_epsilon', True):
+            # default for most methods
+            target = noise
+        else:
+            # DDPM also has
+            target = trajectory
+
+        loss = F.mse_loss(pred, target, reduction='none')
+        loss = loss * loss_mask.type(loss.dtype)
+        loss = reduce(loss, 'b ... -> b (...)', 'mean')
+        loss = loss.mean()
+        return loss
diff --git a/diffusion_policy/policy/ibc_dfo_hybrid_image_policy.py b/diffusion_policy/policy/ibc_dfo_hybrid_image_policy.py
new file mode 100644
index 0000000..c10e7de
--- /dev/null
+++ b/diffusion_policy/policy/ibc_dfo_hybrid_image_policy.py
@@ -0,0 +1,322 @@
+from typing import Dict, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.common.robomimic_config_util import get_robomimic_config
+from robomimic.algo import algo_factory
+from robomimic.algo.algo import PolicyAlgo
+import robomimic.utils.obs_utils as ObsUtils
+import robomimic.models.base_nets as rmbn
+import diffusion_policy.model.vision.crop_randomizer as dmvc
+from diffusion_policy.common.pytorch_util import dict_apply, replace_submodules
+
+
+class IbcDfoHybridImagePolicy(BaseImagePolicy):
+    def __init__(self,
+            shape_meta: dict,
+            horizon, 
+            n_action_steps, 
+            n_obs_steps,
+            dropout=0.1,
+            train_n_neg=128,
+            pred_n_iter=5,
+            pred_n_samples=16384,
+            kevin_inference=False,
+            andy_train=False,
+            obs_encoder_group_norm=True,
+            eval_fixed_crop=True,
+            crop_shape=(76, 76),
+        ):
+        super().__init__()
+
+        # parse shape_meta
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        obs_shape_meta = shape_meta['obs']
+        obs_config = {
+            'low_dim': [],
+            'rgb': [],
+            'depth': [],
+            'scan': []
+        }
+        obs_key_shapes = dict()
+        for key, attr in obs_shape_meta.items():
+            shape = attr['shape']
+            obs_key_shapes[key] = list(shape)
+
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                obs_config['rgb'].append(key)
+            elif type == 'low_dim':
+                obs_config['low_dim'].append(key)
+            else:
+                raise RuntimeError(f"Unsupported obs type: {type}")
+
+        # get raw robomimic config
+        config = get_robomimic_config(
+            algo_name='bc_rnn',
+            hdf5_type='image',
+            task_name='square',
+            dataset_type='ph')
+        
+        with config.unlocked():
+            # set config with shape_meta
+            config.observation.modalities.obs = obs_config
+
+            if crop_shape is None:
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality['obs_randomizer_class'] = None
+            else:
+                # set random crop parameter
+                ch, cw = crop_shape
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality.obs_randomizer_kwargs.crop_height = ch
+                        modality.obs_randomizer_kwargs.crop_width = cw
+
+        # init global state
+        ObsUtils.initialize_obs_utils_with_config(config)
+
+        # load model
+        policy: PolicyAlgo = algo_factory(
+                algo_name=config.algo_name,
+                config=config,
+                obs_key_shapes=obs_key_shapes,
+                ac_dim=action_dim,
+                device='cpu',
+            )
+
+        self.obs_encoder = obs_encoder = policy.nets['policy'].nets['encoder'].nets['obs']
+        
+        if obs_encoder_group_norm:
+            # replace batch norm with group norm
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, nn.BatchNorm2d),
+                func=lambda x: nn.GroupNorm(
+                    num_groups=x.num_features//16, 
+                    num_channels=x.num_features)
+            )
+            # obs_encoder.obs_nets['agentview_image'].nets[0].nets
+        
+        # obs_encoder.obs_randomizers['agentview_image']
+        if eval_fixed_crop:
+            replace_submodules(
+                root_module=obs_encoder,
+                predicate=lambda x: isinstance(x, rmbn.CropRandomizer),
+                func=lambda x: dmvc.CropRandomizer(
+                    input_shape=x.input_shape,
+                    crop_height=x.crop_height,
+                    crop_width=x.crop_width,
+                    num_crops=x.num_crops,
+                    pos_enc=x.pos_enc
+                )
+            )
+
+        obs_feature_dim = obs_encoder.output_shape()[0]
+        in_action_channels = action_dim * n_action_steps
+        in_obs_channels = obs_feature_dim * n_obs_steps
+        in_channels = in_action_channels + in_obs_channels
+        mid_channels = 1024
+        out_channels = 1
+
+        self.dense0 = nn.Linear(in_features=in_channels, out_features=mid_channels)
+        self.drop0 = nn.Dropout(dropout)
+        self.dense1 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop1 = nn.Dropout(dropout)
+        self.dense2 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop2 = nn.Dropout(dropout)
+        self.dense3 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop3 = nn.Dropout(dropout)
+        self.dense4 = nn.Linear(in_features=mid_channels, out_features=out_channels)
+
+        self.normalizer = LinearNormalizer()
+
+        self.train_n_neg = train_n_neg
+        self.pred_n_iter = pred_n_iter
+        self.pred_n_samples = pred_n_samples
+        self.obs_feature_dim = obs_feature_dim
+        self.action_dim = action_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.horizon = horizon
+        self.kevin_inference = kevin_inference
+        self.andy_train = andy_train
+    
+    def forward(self, obs, action):
+        B, N, Ta, Da = action.shape
+        B, To, Do = obs.shape
+        s = obs.reshape(B,1,-1).expand(-1,N,-1)
+        x = torch.cat([s, action.reshape(B,N,-1)], dim=-1).reshape(B*N,-1)
+        x = self.drop0(torch.relu(self.dense0(x)))
+        x = self.drop1(torch.relu(self.dense1(x)))
+        x = self.drop2(torch.relu(self.dense2(x)))
+        x = self.drop3(torch.relu(self.dense3(x)))
+        x = self.dense4(x)
+        x = x.reshape(B,N)
+        return x
+
+    # ========= inference  ============
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+        assert 'past_action' not in obs_dict # not implemented yet
+        # normalize input
+        nobs = self.normalizer.normalize(obs_dict)
+        value = next(iter(nobs.values()))
+        B, To = value.shape[:2]
+        T = self.horizon
+        Ta = self.n_action_steps
+        Da = self.action_dim
+        Do = self.obs_feature_dim
+        To = self.n_obs_steps
+
+        # build input
+        device = self.device
+        dtype = self.dtype
+
+        # encode obs
+        # reshape B, T, ... to B*T
+        this_nobs = dict_apply(nobs, 
+            lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+        nobs_features = self.obs_encoder(this_nobs)
+        # reshape back to B, To, Do
+        nobs_features = nobs_features.reshape(B,To,-1)
+
+        # only take necessary obs
+        naction_stats = self.get_naction_stats()
+
+        # first sample
+        action_dist = torch.distributions.Uniform(
+            low=naction_stats['min'],
+            high=naction_stats['max']
+        )
+        samples = action_dist.sample((B, self.pred_n_samples, Ta)).to(
+            dtype=dtype)
+        # (B, N, Ta, Da)
+
+        if self.kevin_inference:
+            # kevin's implementation
+            noise_scale = 3e-2
+            for i in range(self.pred_n_iter):
+                # Compute energies.
+                logits = self.forward(nobs_features, samples)
+                probs = F.softmax(logits, dim=-1)
+
+                # Resample with replacement.
+                idxs = torch.multinomial(probs, self.pred_n_samples, replacement=True)
+                samples = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs]
+
+                # Add noise and clip to target bounds.
+                samples = samples + torch.randn_like(samples) * noise_scale
+                samples = samples.clamp(min=naction_stats['min'], max=naction_stats['max'])
+
+            # Return target with highest probability.
+            logits = self.forward(nobs_features, samples)
+            probs = F.softmax(logits, dim=-1)
+            best_idxs = probs.argmax(dim=-1)
+            acts_n = samples[torch.arange(samples.size(0)), best_idxs, :]
+        else:
+            # andy's implementation
+            zero = torch.tensor(0, device=self.device)
+            resample_std = torch.tensor(3e-2, device=self.device)
+            for i in range(self.pred_n_iter):
+                # Forward pass.
+                logits = self.forward(nobs_features, samples) # (B, N)
+                prob = torch.softmax(logits, dim=-1)
+
+                if i < (self.pred_n_iter - 1):
+                    idxs = torch.multinomial(prob, self.pred_n_samples, replacement=True)
+                    samples = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs]
+                    samples += torch.normal(zero, resample_std, size=samples.shape, device=self.device)
+
+            # Return one sample per x in batch.
+            idxs = torch.multinomial(prob, num_samples=1, replacement=True)
+            acts_n = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs].squeeze(1)
+
+        action = self.normalizer['action'].unnormalize(acts_n)
+        result = {
+            'action': action
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nobs = self.normalizer.normalize(batch['obs'])
+        naction = self.normalizer['action'].normalize(batch['action'])
+
+        # shapes
+        Do = self.obs_feature_dim
+        Da = self.action_dim
+        To = self.n_obs_steps
+        Ta = self.n_action_steps
+        T = self.horizon
+        B = naction.shape[0]
+
+        # encode obs
+        # reshape B, T, ... to B*T
+        this_nobs = dict_apply(nobs, 
+            lambda x: x[:,:To,...].reshape(-1,*x.shape[2:]))
+        nobs_features = self.obs_encoder(this_nobs)
+        # reshape back to B, To, Do
+        nobs_features = nobs_features.reshape(B,To,-1)
+
+        start = To - 1
+        end = start + Ta
+        this_action = naction[:,start:end]
+
+        # Small additive noise to true positives.
+        this_action += torch.normal(mean=0, std=1e-4,
+            size=this_action.shape,
+            dtype=this_action.dtype,
+            device=this_action.device)
+
+        # Sample negatives: (B, train_n_neg, Ta, Da)
+        naction_stats = self.get_naction_stats()
+        action_dist = torch.distributions.Uniform(
+            low=naction_stats['min'],
+            high=naction_stats['max']
+        )
+        samples = action_dist.sample((B, self.train_n_neg, Ta)).to(
+            dtype=this_action.dtype)
+        action_samples = torch.cat([
+            this_action.unsqueeze(1), samples], dim=1)
+        # (B, train_n_neg+1, Ta, Da)
+
+        if self.andy_train:
+            # Get onehot labels
+            labels = torch.zeros(action_samples.shape[:2], 
+                dtype=this_action.dtype, device=this_action.device)
+            labels[:,0] = 1
+            logits = self.forward(nobs_features, action_samples)
+            # (B, N)
+            logits = torch.log_softmax(logits, dim=-1)
+            loss = -torch.mean(torch.sum(logits * labels, axis=-1))
+        else:
+            labels = torch.zeros((B,),dtype=torch.int64, device=this_action.device)
+            # training
+            logits = self.forward(nobs_features, action_samples)
+            loss = F.cross_entropy(logits, labels)
+        return loss
+
+    def get_naction_stats(self):
+        Da = self.action_dim
+        naction_stats = self.normalizer['action'].get_output_stats()
+        repeated_stats = dict()
+        for key, value in naction_stats.items():
+            assert len(value.shape) == 1
+            n_repeats = Da // value.shape[0]
+            assert value.shape[0] * n_repeats == Da
+            repeated_stats[key] = value.repeat(n_repeats)
+        return repeated_stats
diff --git a/diffusion_policy/policy/ibc_dfo_lowdim_policy.py b/diffusion_policy/policy/ibc_dfo_lowdim_policy.py
new file mode 100644
index 0000000..046f384
--- /dev/null
+++ b/diffusion_policy/policy/ibc_dfo_lowdim_policy.py
@@ -0,0 +1,209 @@
+from typing import Dict, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+
+class IbcDfoLowdimPolicy(BaseLowdimPolicy):
+    def __init__(self,
+            horizon, 
+            obs_dim, 
+            action_dim, 
+            n_action_steps, 
+            n_obs_steps,
+            dropout=0.1,
+            train_n_neg=128,
+            pred_n_iter=5,
+            pred_n_samples=16384,
+            kevin_inference=False,
+            andy_train=False
+        ):
+        super().__init__()
+
+        in_action_channels = action_dim * n_action_steps
+        in_obs_channels = obs_dim * n_obs_steps
+        in_channels = in_action_channels + in_obs_channels
+        mid_channels = 1024
+        out_channels = 1
+
+        self.dense0 = nn.Linear(in_features=in_channels, out_features=mid_channels)
+        self.drop0 = nn.Dropout(dropout)
+        self.dense1 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop1 = nn.Dropout(dropout)
+        self.dense2 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop2 = nn.Dropout(dropout)
+        self.dense3 = nn.Linear(in_features=mid_channels, out_features=mid_channels)
+        self.drop3 = nn.Dropout(dropout)
+        self.dense4 = nn.Linear(in_features=mid_channels, out_features=out_channels)
+
+        self.normalizer = LinearNormalizer()
+
+        self.train_n_neg = train_n_neg
+        self.pred_n_iter = pred_n_iter
+        self.pred_n_samples = pred_n_samples
+        self.obs_dim = obs_dim
+        self.action_dim = action_dim
+        self.n_obs_steps = n_obs_steps
+        self.n_action_steps = n_action_steps
+        self.horizon = horizon
+        self.kevin_inference = kevin_inference
+        self.andy_train = andy_train
+    
+    def forward(self, obs, action):
+        B, N, Ta, Da = action.shape
+        B, To, Do = obs.shape
+        s = obs.reshape(B,1,-1).expand(-1,N,-1)
+        x = torch.cat([s, action.reshape(B,N,-1)], dim=-1).reshape(B*N,-1)
+        x = self.drop0(torch.relu(self.dense0(x)))
+        x = self.drop1(torch.relu(self.dense1(x)))
+        x = self.drop2(torch.relu(self.dense2(x)))
+        x = self.drop3(torch.relu(self.dense3(x)))
+        x = self.dense4(x)
+        x = x.reshape(B,N)
+        return x
+
+    # ========= inference  ============
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        obs_dict: must include "obs" key
+        result: must include "action" key
+        """
+
+        assert 'obs' in obs_dict
+        assert 'past_action' not in obs_dict # not implemented yet
+        nobs = self.normalizer['obs'].normalize(obs_dict['obs'])
+        B, _, Do = nobs.shape
+        To = self.n_obs_steps
+        assert Do == self.obs_dim
+        T = self.horizon
+        Da = self.action_dim
+        Ta = self.n_action_steps
+
+        # only take necessary obs
+        this_obs = nobs[:,:To]
+        naction_stats = self.get_naction_stats()
+
+        # first sample
+        action_dist = torch.distributions.Uniform(
+            low=naction_stats['min'],
+            high=naction_stats['max']
+        )
+        samples = action_dist.sample((B, self.pred_n_samples, Ta)).to(
+            dtype=this_obs.dtype)
+        # (B, N, Ta, Da)
+
+        if self.kevin_inference:
+            # kevin's implementation
+            noise_scale = 3e-2
+            for i in range(self.pred_n_iter):
+                # Compute energies.
+                logits = self.forward(this_obs, samples)
+                probs = F.softmax(logits, dim=-1)
+
+                # Resample with replacement.
+                idxs = torch.multinomial(probs, self.pred_n_samples, replacement=True)
+                samples = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs]
+
+                # Add noise and clip to target bounds.
+                samples = samples + torch.randn_like(samples) * noise_scale
+                samples = samples.clamp(min=naction_stats['min'], max=naction_stats['max'])
+
+            # Return target with highest probability.
+            logits = self.forward(this_obs, samples)
+            probs = F.softmax(logits, dim=-1)
+            best_idxs = probs.argmax(dim=-1)
+            acts_n = samples[torch.arange(samples.size(0)), best_idxs, :]
+        else:
+            # andy's implementation
+            zero = torch.tensor(0, device=self.device)
+            resample_std = torch.tensor(3e-2, device=self.device)
+            for i in range(self.pred_n_iter):
+                # Forward pass.
+                logits = self.forward(this_obs, samples) # (B, N)
+                prob = torch.softmax(logits, dim=-1)
+
+                if i < (self.pred_n_iter - 1):
+                    idxs = torch.multinomial(prob, self.pred_n_samples, replacement=True)
+                    samples = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs]
+                    samples += torch.normal(zero, resample_std, size=samples.shape, device=self.device)
+
+            # Return one sample per x in batch.
+            idxs = torch.multinomial(prob, num_samples=1, replacement=True)
+            acts_n = samples[torch.arange(samples.size(0)).unsqueeze(-1), idxs].squeeze(1)
+
+        action = self.normalizer['action'].unnormalize(acts_n)
+        result = {
+            'action': action
+        }
+        return result
+
+    # ========= training  ============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def compute_loss(self, batch):
+        # normalize input
+        assert 'valid_mask' not in batch
+        nbatch = self.normalizer.normalize(batch)
+        nobs = nbatch['obs']
+        naction = nbatch['action']
+
+        # shapes
+        Do = self.obs_dim
+        Da = self.action_dim
+        To = self.n_obs_steps
+        Ta = self.n_action_steps
+        T = self.horizon
+        B = naction.shape[0]
+
+        this_obs = nobs[:,:To]
+        start = To - 1
+        end = start + Ta
+        this_action = naction[:,start:end]
+
+        # Small additive noise to true positives.
+        this_action += torch.normal(mean=0, std=1e-4,
+            size=this_action.shape,
+            dtype=this_action.dtype,
+            device=this_action.device)
+
+        # Sample negatives: (B, train_n_neg, Ta, Da)
+        naction_stats = self.get_naction_stats()
+        action_dist = torch.distributions.Uniform(
+            low=naction_stats['min'],
+            high=naction_stats['max']
+        )
+        samples = action_dist.sample((B, self.train_n_neg, Ta)).to(
+            dtype=this_action.dtype)
+        action_samples = torch.cat([
+            this_action.unsqueeze(1), samples], dim=1)
+        # (B, train_n_neg+1, Ta, Da)
+
+        if self.andy_train:
+            # Get onehot labels
+            labels = torch.zeros(action_samples.shape[:2], 
+                dtype=this_action.dtype, device=this_action.device)
+            labels[:,0] = 1
+            logits = self.forward(this_obs, action_samples)
+            # (B, N)
+            logits = torch.log_softmax(logits, dim=-1)
+            loss = -torch.mean(torch.sum(logits * labels, axis=-1))
+        else:
+            labels = torch.zeros((B,),dtype=torch.int64, device=this_action.device)
+            # training
+            logits = self.forward(this_obs, action_samples)
+            loss = F.cross_entropy(logits, labels)
+        return loss
+
+
+    def get_naction_stats(self):
+        Da = self.action_dim
+        naction_stats = self.normalizer['action'].get_output_stats()
+        repeated_stats = dict()
+        for key, value in naction_stats.items():
+            assert len(value.shape) == 1
+            n_repeats = Da // value.shape[0]
+            assert value.shape[0] * n_repeats == Da
+            repeated_stats[key] = value.repeat(n_repeats)
+        return repeated_stats
diff --git a/diffusion_policy/policy/robomimic_image_policy.py b/diffusion_policy/policy/robomimic_image_policy.py
new file mode 100644
index 0000000..c2584f5
--- /dev/null
+++ b/diffusion_policy/policy/robomimic_image_policy.py
@@ -0,0 +1,142 @@
+from typing import Dict
+import torch
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.common.pytorch_util import dict_apply
+
+from robomimic.algo import algo_factory
+from robomimic.algo.algo import PolicyAlgo
+import robomimic.utils.obs_utils as ObsUtils
+from diffusion_policy.common.robomimic_config_util import get_robomimic_config
+
+class RobomimicImagePolicy(BaseImagePolicy):
+    def __init__(self, 
+            shape_meta: dict,
+            algo_name='bc_rnn',
+            obs_type='image',
+            task_name='square',
+            dataset_type='ph',
+            crop_shape=(76,76)
+        ):
+        super().__init__()
+
+        # parse shape_meta
+        action_shape = shape_meta['action']['shape']
+        assert len(action_shape) == 1
+        action_dim = action_shape[0]
+        obs_shape_meta = shape_meta['obs']
+        obs_config = {
+            'low_dim': [],
+            'rgb': [],
+            'depth': [],
+            'scan': []
+        }
+        obs_key_shapes = dict()
+        for key, attr in obs_shape_meta.items():
+            shape = attr['shape']
+            obs_key_shapes[key] = list(shape)
+
+            type = attr.get('type', 'low_dim')
+            if type == 'rgb':
+                obs_config['rgb'].append(key)
+            elif type == 'low_dim':
+                obs_config['low_dim'].append(key)
+            else:
+                raise RuntimeError(f"Unsupported obs type: {type}")
+
+        # get raw robomimic config
+        config = get_robomimic_config(
+            algo_name=algo_name,
+            hdf5_type=obs_type,
+            task_name=task_name,
+            dataset_type=dataset_type)
+
+        
+        with config.unlocked():
+            # set config with shape_meta
+            config.observation.modalities.obs = obs_config
+
+            if crop_shape is None:
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality['obs_randomizer_class'] = None
+            else:
+                # set random crop parameter
+                ch, cw = crop_shape
+                for key, modality in config.observation.encoder.items():
+                    if modality.obs_randomizer_class == 'CropRandomizer':
+                        modality.obs_randomizer_kwargs.crop_height = ch
+                        modality.obs_randomizer_kwargs.crop_width = cw
+
+        # init global state
+        ObsUtils.initialize_obs_utils_with_config(config)
+
+        # load model
+        model: PolicyAlgo = algo_factory(
+                algo_name=config.algo_name,
+                config=config,
+                obs_key_shapes=obs_key_shapes,
+                ac_dim=action_dim,
+                device='cpu',
+            )
+
+        self.model = model
+        self.nets = model.nets
+        self.normalizer = LinearNormalizer()
+        self.config = config
+
+    def to(self,*args,**kwargs):
+        device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
+        if device is not None:
+            self.model.device = device
+        super().to(*args,**kwargs)
+    
+    # =========== inference =============
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        nobs_dict = self.normalizer(obs_dict)
+        robomimic_obs_dict = dict_apply(nobs_dict, lambda x: x[:,0,...])
+        naction = self.model.get_action(robomimic_obs_dict)
+        action = self.normalizer['action'].unnormalize(naction)
+        # (B, Da)
+        result = {
+            'action': action[:,None,:] # (B, 1, Da)
+        }
+        return result
+
+    def reset(self):
+        self.model.reset()
+
+    # =========== training ==============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+
+    def train_on_batch(self, batch, epoch, validate=False):
+        nobs = self.normalizer.normalize(batch['obs'])
+        nactions = self.normalizer['action'].normalize(batch['action'])
+        robomimic_batch = {
+            'obs': nobs,
+            'actions': nactions
+        }
+        input_batch = self.model.process_batch_for_training(
+            robomimic_batch)
+        info = self.model.train_on_batch(
+            batch=input_batch, epoch=epoch, validate=validate)
+        # keys: losses, predictions
+        return info
+    
+    def on_epoch_end(self, epoch):
+        self.model.on_epoch_end(epoch)
+
+    def get_optimizer(self):
+        return self.model.optimizers['policy']
+
+
+def test():
+    import os
+    from omegaconf import OmegaConf
+    cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_image.yaml')
+    cfg = OmegaConf.load(cfg_path)
+    shape_meta = cfg.shape_meta
+
+    policy = RobomimicImagePolicy(shape_meta=shape_meta)
+
diff --git a/diffusion_policy/policy/robomimic_lowdim_policy.py b/diffusion_policy/policy/robomimic_lowdim_policy.py
new file mode 100644
index 0000000..0c7b9a5
--- /dev/null
+++ b/diffusion_policy/policy/robomimic_lowdim_policy.py
@@ -0,0 +1,87 @@
+from typing import Dict
+import torch
+from diffusion_policy.model.common.normalizer import LinearNormalizer
+from diffusion_policy.policy.base_lowdim_policy import BaseLowdimPolicy
+
+from robomimic.algo import algo_factory
+from robomimic.algo.algo import PolicyAlgo
+import robomimic.utils.obs_utils as ObsUtils
+from diffusion_policy.common.robomimic_config_util import get_robomimic_config
+
+class RobomimicLowdimPolicy(BaseLowdimPolicy):
+    def __init__(self, 
+            action_dim, 
+            obs_dim,
+            algo_name='bc_rnn',
+            obs_type='low_dim',
+            task_name='square',
+            dataset_type='ph',
+        ):
+        super().__init__()
+        # key for robomimic obs input
+        # previously this is 'object', 'robot0_eef_pos' etc
+        obs_key = 'obs'
+
+        config = get_robomimic_config(
+            algo_name=algo_name,
+            hdf5_type=obs_type,
+            task_name=task_name,
+            dataset_type=dataset_type)
+        with config.unlocked():
+            config.observation.modalities.obs.low_dim = [obs_key]
+        
+        ObsUtils.initialize_obs_utils_with_config(config)
+        model: PolicyAlgo = algo_factory(
+                algo_name=config.algo_name,
+                config=config,
+                obs_key_shapes={obs_key: [obs_dim]},
+                ac_dim=action_dim,
+                device='cpu',
+            )
+        self.model = model
+        self.nets = model.nets
+        self.normalizer = LinearNormalizer()
+        self.obs_key = obs_key
+        self.config = config
+
+    def to(self,*args,**kwargs):
+        device, dtype, non_blocking, convert_to_format = torch._C._nn._parse_to(*args, **kwargs)
+        if device is not None:
+            self.model.device = device
+        super().to(*args,**kwargs)
+    
+    # =========== inference =============
+    def predict_action(self, obs_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        obs = self.normalizer['obs'].normalize(obs_dict['obs'])
+        assert obs.shape[1] == 1
+        robomimic_obs_dict = {self.obs_key: obs[:,0,:]}
+        naction = self.model.get_action(robomimic_obs_dict)
+        action = self.normalizer['action'].unnormalize(naction)
+        # (B, Da)
+        result = {
+            'action': action[:,None,:] # (B, 1, Da)
+        }
+        return result
+    
+    def reset(self):
+        self.model.reset()
+        
+    # =========== training ==============
+    def set_normalizer(self, normalizer: LinearNormalizer):
+        self.normalizer.load_state_dict(normalizer.state_dict())
+    
+    def train_on_batch(self, batch, epoch, validate=False):
+        nbatch = self.normalizer.normalize(batch)
+        robomimic_batch = {
+            'obs': {self.obs_key: nbatch['obs']},
+            'actions': nbatch['action']
+        }
+        input_batch = self.model.process_batch_for_training(
+            robomimic_batch)
+        info = self.model.train_on_batch(
+            batch=input_batch, epoch=epoch, validate=validate)
+        # keys: losses, predictions
+        return info
+
+    def get_optimizer(self):
+        return self.model.optimizers['policy']
diff --git a/diffusion_policy/real_world/keystroke_counter.py b/diffusion_policy/real_world/keystroke_counter.py
new file mode 100644
index 0000000..9014713
--- /dev/null
+++ b/diffusion_policy/real_world/keystroke_counter.py
@@ -0,0 +1,45 @@
+from pynput.keyboard import Key, KeyCode, Listener
+from collections import defaultdict
+from threading import Lock
+
+class KeystrokeCounter(Listener):
+    def __init__(self):
+        self.key_count_map = defaultdict(lambda:0)
+        self.key_press_list = list()
+        self.lock = Lock()
+        super().__init__(on_press=self.on_press, on_release=self.on_release)
+    
+    def on_press(self, key):
+        with self.lock:
+            self.key_count_map[key] += 1
+            self.key_press_list.append(key)
+    
+    def on_release(self, key):
+        pass
+    
+    def clear(self):
+        with self.lock:
+            self.key_count_map = defaultdict(lambda:0)
+            self.key_press_list = list()
+    
+    def __getitem__(self, key):
+        with self.lock:
+            return self.key_count_map[key]
+    
+    def get_press_events(self):
+        with self.lock:
+            events = list(self.key_press_list)
+            self.key_press_list = list()
+            return events
+
+if __name__ == '__main__':
+    import time
+    with KeystrokeCounter() as counter:
+        try:
+            while True:
+                print('Space:', counter[Key.space])
+                print('q:', counter[KeyCode(char='q')])
+                time.sleep(1/60)
+        except KeyboardInterrupt:
+            events = counter.get_press_events()
+            print(events)
diff --git a/diffusion_policy/real_world/multi_camera_visualizer.py b/diffusion_policy/real_world/multi_camera_visualizer.py
new file mode 100644
index 0000000..84cbf99
--- /dev/null
+++ b/diffusion_policy/real_world/multi_camera_visualizer.py
@@ -0,0 +1,74 @@
+import time
+import multiprocessing as mp
+import numpy as np
+import cv2
+from threadpoolctl import threadpool_limits
+from diffusion_policy.real_world.multi_realsense import MultiRealsense
+
+class MultiCameraVisualizer(mp.Process):
+    def __init__(self,
+        realsense: MultiRealsense,
+        row, col,
+        window_name='Multi Cam Vis',
+        vis_fps=60,
+        fill_value=0,
+        rgb_to_bgr=True
+        ):
+        super().__init__()
+        self.row = row
+        self.col = col
+        self.window_name = window_name
+        self.vis_fps = vis_fps
+        self.fill_value = fill_value
+        self.rgb_to_bgr=rgb_to_bgr
+        self.realsense = realsense
+        # shared variables
+        self.stop_event = mp.Event()
+
+    def start(self, wait=False):
+        super().start()
+    
+    def stop(self, wait=False):
+        self.stop_event.set()
+        if wait:
+            self.stop_wait()
+
+    def start_wait(self):
+        pass
+
+    def stop_wait(self):
+        self.join()        
+    
+    def run(self):
+        cv2.setNumThreads(1)
+        threadpool_limits(1)
+        channel_slice = slice(None)
+        if self.rgb_to_bgr:
+            channel_slice = slice(None,None,-1)
+
+        vis_data = None
+        vis_img = None
+        while not self.stop_event.is_set():
+            vis_data = self.realsense.get_vis(out=vis_data)
+            color = vis_data['color']
+            N, H, W, C = color.shape
+            assert C == 3
+            oh = H * self.row
+            ow = W * self.col
+            if vis_img is None:
+                vis_img = np.full((oh, ow, 3), 
+                    fill_value=self.fill_value, dtype=np.uint8)
+            for row in range(self.row):
+                for col in range(self.col):
+                    idx = col + row * self.col
+                    h_start = H * row
+                    h_end = h_start + H
+                    w_start = W * col
+                    w_end = w_start + W
+                    if idx < N:
+                        # opencv uses bgr
+                        vis_img[h_start:h_end,w_start:w_end
+                            ] = color[idx,:,:,channel_slice]
+            cv2.imshow(self.window_name, vis_img)
+            cv2.pollKey()
+            time.sleep(1 / self.vis_fps)
diff --git a/diffusion_policy/real_world/multi_realsense.py b/diffusion_policy/real_world/multi_realsense.py
new file mode 100644
index 0000000..3b29ad8
--- /dev/null
+++ b/diffusion_policy/real_world/multi_realsense.py
@@ -0,0 +1,224 @@
+from typing import List, Optional, Union, Dict, Callable
+import numbers
+import time
+import pathlib
+from multiprocessing.managers import SharedMemoryManager
+import numpy as np
+import pyrealsense2 as rs
+from diffusion_policy.real_world.single_realsense import SingleRealsense
+from diffusion_policy.real_world.video_recorder import VideoRecorder
+
+class MultiRealsense:
+    def __init__(self,
+        serial_numbers: Optional[List[str]]=None,
+        shm_manager: Optional[SharedMemoryManager]=None,
+        resolution=(1280,720),
+        capture_fps=30,
+        put_fps=None,
+        put_downsample=True,
+        record_fps=None,
+        enable_color=True,
+        enable_depth=False,
+        enable_infrared=False,
+        get_max_k=30,
+        advanced_mode_config: Optional[Union[dict, List[dict]]]=None,
+        transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None,
+        vis_transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None,
+        recording_transform: Optional[Union[Callable[[Dict], Dict], List[Callable]]]=None,
+        video_recorder: Optional[Union[VideoRecorder, List[VideoRecorder]]]=None,
+        verbose=False
+        ):
+        if shm_manager is None:
+            shm_manager = SharedMemoryManager()
+            shm_manager.start()
+        if serial_numbers is None:
+            serial_numbers = SingleRealsense.get_connected_devices_serial()
+        n_cameras = len(serial_numbers)
+
+        advanced_mode_config = repeat_to_list(
+            advanced_mode_config, n_cameras, dict)
+        transform = repeat_to_list(
+            transform, n_cameras, Callable)
+        vis_transform = repeat_to_list(
+            vis_transform, n_cameras, Callable)
+        recording_transform = repeat_to_list(
+            recording_transform, n_cameras, Callable)
+
+        video_recorder = repeat_to_list(
+            video_recorder, n_cameras, VideoRecorder)
+
+        cameras = dict()
+        for i, serial in enumerate(serial_numbers):
+            cameras[serial] = SingleRealsense(
+                shm_manager=shm_manager,
+                serial_number=serial,
+                resolution=resolution,
+                capture_fps=capture_fps,
+                put_fps=put_fps,
+                put_downsample=put_downsample,
+                record_fps=record_fps,
+                enable_color=enable_color,
+                enable_depth=enable_depth,
+                enable_infrared=enable_infrared,
+                get_max_k=get_max_k,
+                advanced_mode_config=advanced_mode_config[i],
+                transform=transform[i],
+                vis_transform=vis_transform[i],
+                recording_transform=recording_transform[i],
+                video_recorder=video_recorder[i],
+                verbose=verbose
+            )
+        
+        self.cameras = cameras
+        self.shm_manager = shm_manager
+
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+    
+    @property
+    def n_cameras(self):
+        return len(self.cameras)
+    
+    @property
+    def is_ready(self):
+        is_ready = True
+        for camera in self.cameras.values():
+            if not camera.is_ready:
+                is_ready = False
+        return is_ready
+    
+    def start(self, wait=True, put_start_time=None):
+        if put_start_time is None:
+            put_start_time = time.time()
+        for camera in self.cameras.values():
+            camera.start(wait=False, put_start_time=put_start_time)
+        
+        if wait:
+            self.start_wait()
+    
+    def stop(self, wait=True):
+        for camera in self.cameras.values():
+            camera.stop(wait=False)
+        
+        if wait:
+            self.stop_wait()
+
+    def start_wait(self):
+        for camera in self.cameras.values():
+            camera.start_wait()
+
+    def stop_wait(self):
+        for camera in self.cameras.values():
+            camera.join()
+    
+    def get(self, k=None, out=None) -> Dict[int, Dict[str, np.ndarray]]:
+        """
+        Return order T,H,W,C
+        {
+            0: {
+                'rgb': (T,H,W,C),
+                'timestamp': (T,)
+            },
+            1: ...
+        }
+        """
+        if out is None:
+            out = dict()
+        for i, camera in enumerate(self.cameras.values()):
+            this_out = None
+            if i in out:
+                this_out = out[i]
+            this_out = camera.get(k=k, out=this_out)
+            out[i] = this_out
+        return out
+
+    def get_vis(self, out=None):
+        results = list()
+        for i, camera in enumerate(self.cameras.values()):
+            this_out = None
+            if out is not None:
+                this_out = dict()
+                for key, v in out.items():
+                    # use the slicing trick to maintain the array
+                    # when v is 1D
+                    this_out[key] = v[i:i+1].reshape(v.shape[1:])
+            this_out = camera.get_vis(out=this_out)
+            if out is None:
+                results.append(this_out)
+        if out is None:
+            out = dict()
+            for key in results[0].keys():
+                out[key] = np.stack([x[key] for x in results])
+        return out
+    
+    def set_color_option(self, option, value):
+        n_camera = len(self.cameras)
+        value = repeat_to_list(value, n_camera, numbers.Number)
+        for i, camera in enumerate(self.cameras.values()):
+            camera.set_color_option(option, value[i])
+
+    def set_exposure(self, exposure=None, gain=None):
+        """
+        exposure: (1, 10000) 100us unit. (0.1 ms, 1/10000s)
+        gain: (0, 128)
+        """
+
+        if exposure is None and gain is None:
+            # auto exposure
+            self.set_color_option(rs.option.enable_auto_exposure, 1.0)
+        else:
+            # manual exposure
+            self.set_color_option(rs.option.enable_auto_exposure, 0.0)
+            if exposure is not None:
+                self.set_color_option(rs.option.exposure, exposure)
+            if gain is not None:
+                self.set_color_option(rs.option.gain, gain)
+    
+    def set_white_balance(self, white_balance=None):
+        if white_balance is None:
+            self.set_color_option(rs.option.enable_auto_white_balance, 1.0)
+        else:
+            self.set_color_option(rs.option.enable_auto_white_balance, 0.0)
+            self.set_color_option(rs.option.white_balance, white_balance)
+    
+    def get_intrinsics(self):
+        return np.array([c.get_intrinsics() for c in self.cameras.values()])
+    
+    def get_depth_scale(self):
+        return np.array([c.get_depth_scale() for c in self.cameras.values()])
+    
+    def start_recording(self, video_path: Union[str, List[str]], start_time: float):
+        if isinstance(video_path, str):
+            # directory
+            video_dir = pathlib.Path(video_path)
+            assert video_dir.parent.is_dir()
+            video_dir.mkdir(parents=True, exist_ok=True)
+            video_path = list()
+            for i in range(self.n_cameras):
+                video_path.append(
+                    str(video_dir.joinpath(f'{i}.mp4').absolute()))
+        assert len(video_path) == self.n_cameras
+
+        for i, camera in enumerate(self.cameras.values()):
+            camera.start_recording(video_path[i], start_time)
+    
+    def stop_recording(self):
+        for i, camera in enumerate(self.cameras.values()):
+            camera.stop_recording()
+    
+    def restart_put(self, start_time):
+        for camera in self.cameras.values():
+            camera.restart_put(start_time)
+
+
+def repeat_to_list(x, n: int, cls):
+    if x is None:
+        x = [None] * n
+    if isinstance(x, cls):
+        x = [x] * n
+    assert len(x) == n
+    return x
diff --git a/diffusion_policy/real_world/real_data_conversion.py b/diffusion_policy/real_world/real_data_conversion.py
new file mode 100644
index 0000000..a75e4cc
--- /dev/null
+++ b/diffusion_policy/real_world/real_data_conversion.py
@@ -0,0 +1,194 @@
+from typing import Sequence, Tuple, Dict, Optional, Union
+import os
+import pathlib
+import numpy as np
+import av
+import zarr
+import numcodecs
+import multiprocessing
+import concurrent.futures
+from tqdm import tqdm
+from diffusion_policy.common.replay_buffer import ReplayBuffer, get_optimal_chunks
+from diffusion_policy.common.cv2_util import get_image_transform
+from diffusion_policy.real_world.video_recorder import read_video
+from diffusion_policy.codecs.imagecodecs_numcodecs import (
+    register_codecs,
+    Jpeg2k
+)
+register_codecs()
+
+
+def real_data_to_replay_buffer(
+        dataset_path: str, 
+        out_store: Optional[zarr.ABSStore]=None, 
+        out_resolutions: Union[None, tuple, Dict[str,tuple]]=None, # (width, height)
+        lowdim_keys: Optional[Sequence[str]]=None,
+        image_keys: Optional[Sequence[str]]=None,
+        lowdim_compressor: Optional[numcodecs.abc.Codec]=None,
+        image_compressor: Optional[numcodecs.abc.Codec]=None,
+        n_decoding_threads: int=multiprocessing.cpu_count(),
+        n_encoding_threads: int=multiprocessing.cpu_count(),
+        max_inflight_tasks: int=multiprocessing.cpu_count()*5,
+        verify_read: bool=True
+        ) -> ReplayBuffer:
+    """
+    It is recommended to use before calling this function
+    to avoid CPU oversubscription
+    cv2.setNumThreads(1)
+    threadpoolctl.threadpool_limits(1)
+
+    out_resolution:
+        if None:
+            use video resolution
+        if (width, height) e.g. (1280, 720)
+        if dict:
+            camera_0: (1280, 720)
+    image_keys: ['camera_0', 'camera_1']
+    """
+    if out_store is None:
+        out_store = zarr.MemoryStore()
+    if n_decoding_threads <= 0:
+        n_decoding_threads = multiprocessing.cpu_count()
+    if n_encoding_threads <= 0:
+        n_encoding_threads = multiprocessing.cpu_count()
+    if image_compressor is None:
+        image_compressor = Jpeg2k(level=50)
+
+    # verify input
+    input = pathlib.Path(os.path.expanduser(dataset_path))
+    in_zarr_path = input.joinpath('replay_buffer.zarr')
+    in_video_dir = input.joinpath('videos')
+    assert in_zarr_path.is_dir()
+    assert in_video_dir.is_dir()
+    
+    in_replay_buffer = ReplayBuffer.create_from_path(str(in_zarr_path.absolute()), mode='r')
+
+    # save lowdim data to single chunk
+    chunks_map = dict()
+    compressor_map = dict()
+    for key, value in in_replay_buffer.data.items():
+        chunks_map[key] = value.shape
+        compressor_map[key] = lowdim_compressor
+
+    print('Loading lowdim data')
+    out_replay_buffer = ReplayBuffer.copy_from_store(
+        src_store=in_replay_buffer.root.store,
+        store=out_store,
+        keys=lowdim_keys,
+        chunks=chunks_map,
+        compressors=compressor_map
+        )
+    
+    # worker function
+    def put_img(zarr_arr, zarr_idx, img):
+        try:
+            zarr_arr[zarr_idx] = img
+            # make sure we can successfully decode
+            if verify_read:
+                _ = zarr_arr[zarr_idx]
+            return True
+        except Exception as e:
+            return False
+
+    
+    n_cameras = 0
+    camera_idxs = set() 
+    if image_keys is not None:
+        n_cameras = len(image_keys)
+        camera_idxs = set(int(x.split('_')[-1]) for x in image_keys)
+    else:
+        # estimate number of cameras
+        episode_video_dir = in_video_dir.joinpath(str(0))
+        episode_video_paths = sorted(episode_video_dir.glob('*.mp4'), key=lambda x: int(x.stem))
+        camera_idxs = set(int(x.stem) for x in episode_video_paths)
+        n_cameras = len(episode_video_paths)
+    
+    n_steps = in_replay_buffer.n_steps
+    episode_starts = in_replay_buffer.episode_ends[:] - in_replay_buffer.episode_lengths[:]
+    episode_lengths = in_replay_buffer.episode_lengths
+    timestamps = in_replay_buffer['timestamp'][:]
+    dt = timestamps[1] - timestamps[0]
+
+    with tqdm(total=n_steps*n_cameras, desc="Loading image data", mininterval=1.0) as pbar:
+        # one chunk per thread, therefore no synchronization needed
+        with concurrent.futures.ThreadPoolExecutor(max_workers=n_encoding_threads) as executor:
+            futures = set()
+            for episode_idx, episode_length in enumerate(episode_lengths):
+                episode_video_dir = in_video_dir.joinpath(str(episode_idx))
+                episode_start = episode_starts[episode_idx]
+
+                episode_video_paths = sorted(episode_video_dir.glob('*.mp4'), key=lambda x: int(x.stem))
+                this_camera_idxs = set(int(x.stem) for x in episode_video_paths)
+                if image_keys is None:
+                    for i in this_camera_idxs - camera_idxs:
+                        print(f"Unexpected camera {i} at episode {episode_idx}")
+                for i in camera_idxs - this_camera_idxs:
+                    print(f"Missing camera {i} at episode {episode_idx}")
+                    if image_keys is not None:
+                        raise RuntimeError(f"Missing camera {i} at episode {episode_idx}")
+
+                for video_path in episode_video_paths:
+                    camera_idx = int(video_path.stem)
+                    if image_keys is not None:
+                        # if image_keys provided, skip not used cameras
+                        if camera_idx not in camera_idxs:
+                            continue
+
+                    # read resolution
+                    with av.open(str(video_path.absolute())) as container:
+                        video = container.streams.video[0]
+                        vcc = video.codec_context
+                        this_res = (vcc.width, vcc.height)
+                    in_img_res = this_res
+
+                    arr_name = f'camera_{camera_idx}'
+                    # figure out save resolution
+                    out_img_res = in_img_res
+                    if isinstance(out_resolutions, dict):
+                        if arr_name in out_resolutions:
+                            out_img_res = tuple(out_resolutions[arr_name])
+                    elif out_resolutions is not None:
+                        out_img_res = tuple(out_resolutions)
+
+                    # allocate array
+                    if arr_name not in out_replay_buffer:
+                        ow, oh = out_img_res
+                        _ = out_replay_buffer.data.require_dataset(
+                            name=arr_name,
+                            shape=(n_steps,oh,ow,3),
+                            chunks=(1,oh,ow,3),
+                            compressor=image_compressor,
+                            dtype=np.uint8
+                        )
+                    arr = out_replay_buffer[arr_name]
+
+                    image_tf = get_image_transform(
+                        input_res=in_img_res, output_res=out_img_res, bgr_to_rgb=False)
+                    for step_idx, frame in enumerate(read_video(
+                            video_path=str(video_path),
+                            dt=dt,
+                            img_transform=image_tf,
+                            thread_type='FRAME',
+                            thread_count=n_decoding_threads
+                        )):
+                        if len(futures) >= max_inflight_tasks:
+                            # limit number of inflight tasks
+                            completed, futures = concurrent.futures.wait(futures, 
+                                return_when=concurrent.futures.FIRST_COMPLETED)
+                            for f in completed:
+                                if not f.result():
+                                    raise RuntimeError('Failed to encode image!')
+                            pbar.update(len(completed))
+                        
+                        global_idx = episode_start + step_idx
+                        futures.add(executor.submit(put_img, arr, global_idx, frame))
+
+                        if step_idx == (episode_length - 1):
+                            break
+            completed, futures = concurrent.futures.wait(futures)
+            for f in completed:
+                if not f.result():
+                    raise RuntimeError('Failed to encode image!')
+            pbar.update(len(completed))
+    return out_replay_buffer
+
diff --git a/diffusion_policy/real_world/real_env.py b/diffusion_policy/real_world/real_env.py
new file mode 100644
index 0000000..b731205
--- /dev/null
+++ b/diffusion_policy/real_world/real_env.py
@@ -0,0 +1,435 @@
+from typing import Optional
+import pathlib
+import numpy as np
+import time
+import shutil
+import math
+from multiprocessing.managers import SharedMemoryManager
+from diffusion_policy.real_world.rtde_interpolation_controller import RTDEInterpolationController
+from diffusion_policy.real_world.multi_realsense import MultiRealsense, SingleRealsense
+from diffusion_policy.real_world.video_recorder import VideoRecorder
+from diffusion_policy.common.timestamp_accumulator import (
+    TimestampObsAccumulator, 
+    TimestampActionAccumulator,
+    align_timestamps
+)
+from diffusion_policy.real_world.multi_camera_visualizer import MultiCameraVisualizer
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from diffusion_policy.common.cv2_util import (
+    get_image_transform, optimal_row_cols)
+
+DEFAULT_OBS_KEY_MAP = {
+    # robot
+    'ActualTCPPose': 'robot_eef_pose',
+    'ActualTCPSpeed': 'robot_eef_pose_vel',
+    'ActualQ': 'robot_joint',
+    'ActualQd': 'robot_joint_vel',
+    # timestamps
+    'step_idx': 'step_idx',
+    'timestamp': 'timestamp'
+}
+
+class RealEnv:
+    def __init__(self, 
+            # required params
+            output_dir,
+            robot_ip,
+            # env params
+            frequency=10,
+            n_obs_steps=2,
+            # obs
+            obs_image_resolution=(640,480),
+            max_obs_buffer_size=30,
+            camera_serial_numbers=None,
+            obs_key_map=DEFAULT_OBS_KEY_MAP,
+            obs_float32=False,
+            # action
+            max_pos_speed=0.25,
+            max_rot_speed=0.6,
+            # robot
+            tcp_offset=0.13,
+            init_joints=False,
+            # video capture params
+            video_capture_fps=30,
+            video_capture_resolution=(1280,720),
+            # saving params
+            record_raw_video=True,
+            thread_per_video=2,
+            video_crf=21,
+            # vis params
+            enable_multi_cam_vis=True,
+            multi_cam_vis_resolution=(1280,720),
+            # shared memory
+            shm_manager=None
+            ):
+        assert frequency <= video_capture_fps
+        output_dir = pathlib.Path(output_dir)
+        assert output_dir.parent.is_dir()
+        video_dir = output_dir.joinpath('videos')
+        video_dir.mkdir(parents=True, exist_ok=True)
+        zarr_path = str(output_dir.joinpath('replay_buffer.zarr').absolute())
+        replay_buffer = ReplayBuffer.create_from_path(
+            zarr_path=zarr_path, mode='a')
+
+        if shm_manager is None:
+            shm_manager = SharedMemoryManager()
+            shm_manager.start()
+        if camera_serial_numbers is None:
+            camera_serial_numbers = SingleRealsense.get_connected_devices_serial()
+
+        color_tf = get_image_transform(
+            input_res=video_capture_resolution,
+            output_res=obs_image_resolution, 
+            # obs output rgb
+            bgr_to_rgb=True)
+        color_transform = color_tf
+        if obs_float32:
+            color_transform = lambda x: color_tf(x).astype(np.float32) / 255
+
+        def transform(data):
+            data['color'] = color_transform(data['color'])
+            return data
+        
+        rw, rh, col, row = optimal_row_cols(
+            n_cameras=len(camera_serial_numbers),
+            in_wh_ratio=obs_image_resolution[0]/obs_image_resolution[1],
+            max_resolution=multi_cam_vis_resolution
+        )
+        vis_color_transform = get_image_transform(
+            input_res=video_capture_resolution,
+            output_res=(rw,rh),
+            bgr_to_rgb=False
+        )
+        def vis_transform(data):
+            data['color'] = vis_color_transform(data['color'])
+            return data
+
+        recording_transfrom = None
+        recording_fps = video_capture_fps
+        recording_pix_fmt = 'bgr24'
+        if not record_raw_video:
+            recording_transfrom = transform
+            recording_fps = frequency
+            recording_pix_fmt = 'rgb24'
+
+        video_recorder = VideoRecorder.create_h264(
+            fps=recording_fps, 
+            codec='h264',
+            input_pix_fmt=recording_pix_fmt, 
+            crf=video_crf,
+            thread_type='FRAME',
+            thread_count=thread_per_video)
+
+        realsense = MultiRealsense(
+            serial_numbers=camera_serial_numbers,
+            shm_manager=shm_manager,
+            resolution=video_capture_resolution,
+            capture_fps=video_capture_fps,
+            put_fps=video_capture_fps,
+            # send every frame immediately after arrival
+            # ignores put_fps
+            put_downsample=False,
+            record_fps=recording_fps,
+            enable_color=True,
+            enable_depth=False,
+            enable_infrared=False,
+            get_max_k=max_obs_buffer_size,
+            transform=transform,
+            vis_transform=vis_transform,
+            recording_transform=recording_transfrom,
+            video_recorder=video_recorder,
+            verbose=False
+            )
+        
+        multi_cam_vis = None
+        if enable_multi_cam_vis:
+            multi_cam_vis = MultiCameraVisualizer(
+                realsense=realsense,
+                row=row,
+                col=col,
+                rgb_to_bgr=False
+            )
+
+        cube_diag = np.linalg.norm([1,1,1])
+        j_init = np.array([0,-90,-90,-90,90,0]) / 180 * np.pi
+        if not init_joints:
+            j_init = None
+
+        robot = RTDEInterpolationController(
+            shm_manager=shm_manager,
+            robot_ip=robot_ip,
+            frequency=125, # UR5 CB3 RTDE
+            lookahead_time=0.1,
+            gain=300,
+            max_pos_speed=max_pos_speed*cube_diag,
+            max_rot_speed=max_rot_speed*cube_diag,
+            launch_timeout=3,
+            tcp_offset_pose=[0,0,tcp_offset,0,0,0],
+            payload_mass=None,
+            payload_cog=None,
+            joints_init=j_init,
+            joints_init_speed=1.05,
+            soft_real_time=False,
+            verbose=False,
+            receive_keys=None,
+            get_max_k=max_obs_buffer_size
+            )
+        self.realsense = realsense
+        self.robot = robot
+        self.multi_cam_vis = multi_cam_vis
+        self.video_capture_fps = video_capture_fps
+        self.frequency = frequency
+        self.n_obs_steps = n_obs_steps
+        self.max_obs_buffer_size = max_obs_buffer_size
+        self.max_pos_speed = max_pos_speed
+        self.max_rot_speed = max_rot_speed
+        self.obs_key_map = obs_key_map
+        # recording
+        self.output_dir = output_dir
+        self.video_dir = video_dir
+        self.replay_buffer = replay_buffer
+        # temp memory buffers
+        self.last_realsense_data = None
+        # recording buffers
+        self.obs_accumulator = None
+        self.action_accumulator = None
+        self.stage_accumulator = None
+
+        self.start_time = None
+    
+    # ======== start-stop API =============
+    @property
+    def is_ready(self):
+        return self.realsense.is_ready and self.robot.is_ready
+    
+    def start(self, wait=True):
+        self.realsense.start(wait=False)
+        self.robot.start(wait=False)
+        if self.multi_cam_vis is not None:
+            self.multi_cam_vis.start(wait=False)
+        if wait:
+            self.start_wait()
+
+    def stop(self, wait=True):
+        self.end_episode()
+        if self.multi_cam_vis is not None:
+            self.multi_cam_vis.stop(wait=False)
+        self.robot.stop(wait=False)
+        self.realsense.stop(wait=False)
+        if wait:
+            self.stop_wait()
+
+    def start_wait(self):
+        self.realsense.start_wait()
+        self.robot.start_wait()
+        if self.multi_cam_vis is not None:
+            self.multi_cam_vis.start_wait()
+    
+    def stop_wait(self):
+        self.robot.stop_wait()
+        self.realsense.stop_wait()
+        if self.multi_cam_vis is not None:
+            self.multi_cam_vis.stop_wait()
+
+    # ========= context manager ===========
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    # ========= async env API ===========
+    def get_obs(self) -> dict:
+        "observation dict"
+        assert self.is_ready
+
+        # get data
+        # 30 Hz, camera_receive_timestamp
+        k = math.ceil(self.n_obs_steps * (self.video_capture_fps / self.frequency))
+        self.last_realsense_data = self.realsense.get(
+            k=k, 
+            out=self.last_realsense_data)
+
+        # 125 hz, robot_receive_timestamp
+        last_robot_data = self.robot.get_all_state()
+        # both have more than n_obs_steps data
+
+        # align camera obs timestamps
+        dt = 1 / self.frequency
+        last_timestamp = np.max([x['timestamp'][-1] for x in self.last_realsense_data.values()])
+        obs_align_timestamps = last_timestamp - (np.arange(self.n_obs_steps)[::-1] * dt)
+
+        camera_obs = dict()
+        for camera_idx, value in self.last_realsense_data.items():
+            this_timestamps = value['timestamp']
+            this_idxs = list()
+            for t in obs_align_timestamps:
+                is_before_idxs = np.nonzero(this_timestamps < t)[0]
+                this_idx = 0
+                if len(is_before_idxs) > 0:
+                    this_idx = is_before_idxs[-1]
+                this_idxs.append(this_idx)
+            # remap key
+            camera_obs[f'camera_{camera_idx}'] = value['color'][this_idxs]
+
+        # align robot obs
+        robot_timestamps = last_robot_data['robot_receive_timestamp']
+        this_timestamps = robot_timestamps
+        this_idxs = list()
+        for t in obs_align_timestamps:
+            is_before_idxs = np.nonzero(this_timestamps < t)[0]
+            this_idx = 0
+            if len(is_before_idxs) > 0:
+                this_idx = is_before_idxs[-1]
+            this_idxs.append(this_idx)
+
+        robot_obs_raw = dict()
+        for k, v in last_robot_data.items():
+            if k in self.obs_key_map:
+                robot_obs_raw[self.obs_key_map[k]] = v
+        
+        robot_obs = dict()
+        for k, v in robot_obs_raw.items():
+            robot_obs[k] = v[this_idxs]
+
+        # accumulate obs
+        if self.obs_accumulator is not None:
+            self.obs_accumulator.put(
+                robot_obs_raw,
+                robot_timestamps
+            )
+
+        # return obs
+        obs_data = dict(camera_obs)
+        obs_data.update(robot_obs)
+        obs_data['timestamp'] = obs_align_timestamps
+        return obs_data
+    
+    def exec_actions(self, 
+            actions: np.ndarray, 
+            timestamps: np.ndarray, 
+            stages: Optional[np.ndarray]=None):
+        assert self.is_ready
+        if not isinstance(actions, np.ndarray):
+            actions = np.array(actions)
+        if not isinstance(timestamps, np.ndarray):
+            timestamps = np.array(timestamps)
+        if stages is None:
+            stages = np.zeros_like(timestamps, dtype=np.int64)
+        elif not isinstance(stages, np.ndarray):
+            stages = np.array(stages, dtype=np.int64)
+
+        # convert action to pose
+        receive_time = time.time()
+        is_new = timestamps > receive_time
+        new_actions = actions[is_new]
+        new_timestamps = timestamps[is_new]
+        new_stages = stages[is_new]
+
+        # schedule waypoints
+        for i in range(len(new_actions)):
+            self.robot.schedule_waypoint(
+                pose=new_actions[i],
+                target_time=new_timestamps[i]
+            )
+        
+        # record actions
+        if self.action_accumulator is not None:
+            self.action_accumulator.put(
+                new_actions,
+                new_timestamps
+            )
+        if self.stage_accumulator is not None:
+            self.stage_accumulator.put(
+                new_stages,
+                new_timestamps
+            )
+    
+    def get_robot_state(self):
+        return self.robot.get_state()
+
+    # recording API
+    def start_episode(self, start_time=None):
+        "Start recording and return first obs"
+        if start_time is None:
+            start_time = time.time()
+        self.start_time = start_time
+
+        assert self.is_ready
+
+        # prepare recording stuff
+        episode_id = self.replay_buffer.n_episodes
+        this_video_dir = self.video_dir.joinpath(str(episode_id))
+        this_video_dir.mkdir(parents=True, exist_ok=True)
+        n_cameras = self.realsense.n_cameras
+        video_paths = list()
+        for i in range(n_cameras):
+            video_paths.append(
+                str(this_video_dir.joinpath(f'{i}.mp4').absolute()))
+        
+        # start recording on realsense
+        self.realsense.restart_put(start_time=start_time)
+        self.realsense.start_recording(video_path=video_paths, start_time=start_time)
+
+        # create accumulators
+        self.obs_accumulator = TimestampObsAccumulator(
+            start_time=start_time,
+            dt=1/self.frequency
+        )
+        self.action_accumulator = TimestampActionAccumulator(
+            start_time=start_time,
+            dt=1/self.frequency
+        )
+        self.stage_accumulator = TimestampActionAccumulator(
+            start_time=start_time,
+            dt=1/self.frequency
+        )
+        print(f'Episode {episode_id} started!')
+    
+    def end_episode(self):
+        "Stop recording"
+        assert self.is_ready
+        
+        # stop video recorder
+        self.realsense.stop_recording()
+
+        if self.obs_accumulator is not None:
+            # recording
+            assert self.action_accumulator is not None
+            assert self.stage_accumulator is not None
+
+            # Since the only way to accumulate obs and action is by calling
+            # get_obs and exec_actions, which will be in the same thread.
+            # We don't need to worry new data come in here.
+            obs_data = self.obs_accumulator.data
+            obs_timestamps = self.obs_accumulator.timestamps
+
+            actions = self.action_accumulator.actions
+            action_timestamps = self.action_accumulator.timestamps
+            stages = self.stage_accumulator.actions
+            n_steps = min(len(obs_timestamps), len(action_timestamps))
+            if n_steps > 0:
+                episode = dict()
+                episode['timestamp'] = obs_timestamps[:n_steps]
+                episode['action'] = actions[:n_steps]
+                episode['stage'] = stages[:n_steps]
+                for key, value in obs_data.items():
+                    episode[key] = value[:n_steps]
+                self.replay_buffer.add_episode(episode, compressors='disk')
+                episode_id = self.replay_buffer.n_episodes - 1
+                print(f'Episode {episode_id} saved!')
+            
+            self.obs_accumulator = None
+            self.action_accumulator = None
+            self.stage_accumulator = None
+
+    def drop_episode(self):
+        self.end_episode()
+        self.replay_buffer.drop_episode()
+        episode_id = self.replay_buffer.n_episodes
+        this_video_dir = self.video_dir.joinpath(str(episode_id))
+        if this_video_dir.exists():
+            shutil.rmtree(str(this_video_dir))
+        print(f'Episode {episode_id} dropped!')
+
diff --git a/diffusion_policy/real_world/real_inference_util.py b/diffusion_policy/real_world/real_inference_util.py
new file mode 100644
index 0000000..4a262fa
--- /dev/null
+++ b/diffusion_policy/real_world/real_inference_util.py
@@ -0,0 +1,52 @@
+from typing import Dict, Callable, Tuple
+import numpy as np
+from diffusion_policy.common.cv2_util import get_image_transform
+
+def get_real_obs_dict(
+        env_obs: Dict[str, np.ndarray], 
+        shape_meta: dict,
+        ) -> Dict[str, np.ndarray]:
+    obs_dict_np = dict()
+    obs_shape_meta = shape_meta['obs']
+    for key, attr in obs_shape_meta.items():
+        type = attr.get('type', 'low_dim')
+        shape = attr.get('shape')
+        if type == 'rgb':
+            this_imgs_in = env_obs[key]
+            t,hi,wi,ci = this_imgs_in.shape
+            co,ho,wo = shape
+            assert ci == co
+            out_imgs = this_imgs_in
+            if (ho != hi) or (wo != wi) or (this_imgs_in.dtype == np.uint8):
+                tf = get_image_transform(
+                    input_res=(wi,hi), 
+                    output_res=(wo,ho), 
+                    bgr_to_rgb=False)
+                out_imgs = np.stack([tf(x) for x in this_imgs_in])
+                if this_imgs_in.dtype == np.uint8:
+                    out_imgs = out_imgs.astype(np.float32) / 255
+            # THWC to TCHW
+            obs_dict_np[key] = np.moveaxis(out_imgs,-1,1)
+        elif type == 'low_dim':
+            this_data_in = env_obs[key]
+            if 'pose' in key and shape == (2,):
+                # take X,Y coordinates
+                this_data_in = this_data_in[...,[0,1]]
+            obs_dict_np[key] = this_data_in
+    return obs_dict_np
+
+
+def get_real_obs_resolution(
+        shape_meta: dict
+        ) -> Tuple[int, int]:
+    out_res = None
+    obs_shape_meta = shape_meta['obs']
+    for key, attr in obs_shape_meta.items():
+        type = attr.get('type', 'low_dim')
+        shape = attr.get('shape')
+        if type == 'rgb':
+            co,ho,wo = shape
+            if out_res is None:
+                out_res = (wo, ho)
+            assert out_res == (wo, ho)
+    return out_res
diff --git a/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json b/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json
new file mode 100644
index 0000000..a3c00b4
--- /dev/null
+++ b/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json
@@ -0,0 +1,95 @@
+{
+    "aux-param-autoexposure-setpoint": "400",
+    "aux-param-colorcorrection1": "0.461914",
+    "aux-param-colorcorrection10": "-0.553711",
+    "aux-param-colorcorrection11": "-0.553711",
+    "aux-param-colorcorrection12": "0.0458984",
+    "aux-param-colorcorrection2": "0.540039",
+    "aux-param-colorcorrection3": "0.540039",
+    "aux-param-colorcorrection4": "0.208008",
+    "aux-param-colorcorrection5": "-0.332031",
+    "aux-param-colorcorrection6": "-0.212891",
+    "aux-param-colorcorrection7": "-0.212891",
+    "aux-param-colorcorrection8": "0.68457",
+    "aux-param-colorcorrection9": "0.930664",
+    "aux-param-depthclampmax": "65535",
+    "aux-param-depthclampmin": "0",
+    "aux-param-disparityshift": "0",
+    "controls-autoexposure-auto": "True",
+    "controls-autoexposure-manual": "33000",
+    "controls-color-autoexposure-auto": "True",
+    "controls-color-autoexposure-manual": "100",
+    "controls-color-backlight-compensation": "0",
+    "controls-color-brightness": "0",
+    "controls-color-contrast": "50",
+    "controls-color-gain": "100",
+    "controls-color-gamma": "300",
+    "controls-color-hue": "0",
+    "controls-color-power-line-frequency": "3",
+    "controls-color-saturation": "64",
+    "controls-color-sharpness": "50",
+    "controls-color-white-balance-auto": "True",
+    "controls-color-white-balance-manual": "4600",
+    "controls-depth-gain": "16",
+    "controls-depth-white-balance-auto": "False",
+    "controls-laserpower": "150",
+    "controls-laserstate": "on",
+    "ignoreSAD": "0",
+    "param-amplitude-factor": "0",
+    "param-autoexposure-setpoint": "400",
+    "param-censusenablereg-udiameter": "9",
+    "param-censusenablereg-vdiameter": "3",
+    "param-censususize": "9",
+    "param-censusvsize": "3",
+    "param-depthclampmax": "65535",
+    "param-depthclampmin": "0",
+    "param-depthunits": "1000",
+    "param-disableraucolor": "0",
+    "param-disablesadcolor": "0",
+    "param-disablesadnormalize": "0",
+    "param-disablesloleftcolor": "0",
+    "param-disableslorightcolor": "1",
+    "param-disparitymode": "0",
+    "param-disparityshift": "0",
+    "param-lambdaad": "751",
+    "param-lambdacensus": "6",
+    "param-leftrightthreshold": "10",
+    "param-maxscorethreshb": "2893",
+    "param-medianthreshold": "796",
+    "param-minscorethresha": "4",
+    "param-neighborthresh": "108",
+    "param-raumine": "6",
+    "param-rauminn": "3",
+    "param-rauminnssum": "7",
+    "param-raumins": "2",
+    "param-rauminw": "2",
+    "param-rauminwesum": "12",
+    "param-regioncolorthresholdb": "0.785714",
+    "param-regioncolorthresholdg": "0.565558",
+    "param-regioncolorthresholdr": "0.985323",
+    "param-regionshrinku": "3",
+    "param-regionshrinkv": "0",
+    "param-robbinsmonrodecrement": "25",
+    "param-robbinsmonroincrement": "2",
+    "param-rsmdiffthreshold": "1.65625",
+    "param-rsmrauslodiffthreshold": "0.71875",
+    "param-rsmremovethreshold": "0.809524",
+    "param-scanlineedgetaub": "13",
+    "param-scanlineedgetaug": "15",
+    "param-scanlineedgetaur": "30",
+    "param-scanlinep1": "155",
+    "param-scanlinep1onediscon": "160",
+    "param-scanlinep1twodiscon": "59",
+    "param-scanlinep2": "190",
+    "param-scanlinep2onediscon": "507",
+    "param-scanlinep2twodiscon": "493",
+    "param-secondpeakdelta": "647",
+    "param-texturecountthresh": "0",
+    "param-texturedifferencethresh": "1722",
+    "param-usersm": "1",
+    "param-zunits": "1000",
+    "stream-depth-format": "Z16",
+    "stream-fps": "30",
+    "stream-height": "480",
+    "stream-width": "640"
+}
diff --git a/diffusion_policy/real_world/realsense_config/435_high_accuracy_mode.json b/diffusion_policy/real_world/realsense_config/435_high_accuracy_mode.json
new file mode 100644
index 0000000..8b88e5f
--- /dev/null
+++ b/diffusion_policy/real_world/realsense_config/435_high_accuracy_mode.json
@@ -0,0 +1,94 @@
+{
+    "aux-param-autoexposure-setpoint": "1536",
+    "aux-param-colorcorrection1": "0.298828",
+    "aux-param-colorcorrection10": "-0",
+    "aux-param-colorcorrection11": "-0",
+    "aux-param-colorcorrection12": "-0",
+    "aux-param-colorcorrection2": "0.293945",
+    "aux-param-colorcorrection3": "0.293945",
+    "aux-param-colorcorrection4": "0.114258",
+    "aux-param-colorcorrection5": "-0",
+    "aux-param-colorcorrection6": "-0",
+    "aux-param-colorcorrection7": "-0",
+    "aux-param-colorcorrection8": "-0",
+    "aux-param-colorcorrection9": "-0",
+    "aux-param-depthclampmax": "65536",
+    "aux-param-depthclampmin": "0",
+    "aux-param-disparityshift": "0",
+    "controls-autoexposure-auto": "True",
+    "controls-autoexposure-manual": "8500",
+    "controls-color-autoexposure-auto": "True",
+    "controls-color-autoexposure-manual": "100",
+    "controls-color-backlight-compensation": "0",
+    "controls-color-brightness": "0",
+    "controls-color-contrast": "50",
+    "controls-color-gain": "100",
+    "controls-color-gamma": "300",
+    "controls-color-hue": "0",
+    "controls-color-power-line-frequency": "3",
+    "controls-color-saturation": "64",
+    "controls-color-sharpness": "50",
+    "controls-color-white-balance-auto": "True",
+    "controls-color-white-balance-manual": "4600",
+    "controls-depth-gain": "16",
+    "controls-laserpower": "150",
+    "controls-laserstate": "on",
+    "ignoreSAD": "0",
+    "param-amplitude-factor": "0",
+    "param-autoexposure-setpoint": "1536",
+    "param-censusenablereg-udiameter": "9",
+    "param-censusenablereg-vdiameter": "9",
+    "param-censususize": "9",
+    "param-censusvsize": "9",
+    "param-depthclampmax": "65536",
+    "param-depthclampmin": "0",
+    "param-depthunits": "1000",
+    "param-disableraucolor": "0",
+    "param-disablesadcolor": "0",
+    "param-disablesadnormalize": "0",
+    "param-disablesloleftcolor": "0",
+    "param-disableslorightcolor": "1",
+    "param-disparitymode": "0",
+    "param-disparityshift": "0",
+    "param-lambdaad": "751",
+    "param-lambdacensus": "6",
+    "param-leftrightthreshold": "10",
+    "param-maxscorethreshb": "2893",
+    "param-medianthreshold": "796",
+    "param-minscorethresha": "4",
+    "param-neighborthresh": "108",
+    "param-raumine": "6",
+    "param-rauminn": "3",
+    "param-rauminnssum": "7",
+    "param-raumins": "2",
+    "param-rauminw": "2",
+    "param-rauminwesum": "12",
+    "param-regioncolorthresholdb": "0.785714",
+    "param-regioncolorthresholdg": "0.565558",
+    "param-regioncolorthresholdr": "0.985323",
+    "param-regionshrinku": "3",
+    "param-regionshrinkv": "0",
+    "param-robbinsmonrodecrement": "25",
+    "param-robbinsmonroincrement": "2",
+    "param-rsmdiffthreshold": "1.65625",
+    "param-rsmrauslodiffthreshold": "0.71875",
+    "param-rsmremovethreshold": "0.809524",
+    "param-scanlineedgetaub": "13",
+    "param-scanlineedgetaug": "15",
+    "param-scanlineedgetaur": "30",
+    "param-scanlinep1": "155",
+    "param-scanlinep1onediscon": "160",
+    "param-scanlinep1twodiscon": "59",
+    "param-scanlinep2": "190",
+    "param-scanlinep2onediscon": "507",
+    "param-scanlinep2twodiscon": "493",
+    "param-secondpeakdelta": "647",
+    "param-texturecountthresh": "0",
+    "param-texturedifferencethresh": "1722",
+    "param-usersm": "1",
+    "param-zunits": "1000",
+    "stream-depth-format": "Z16",
+    "stream-fps": "30",
+    "stream-height": "480",
+    "stream-width": "848"
+}
\ No newline at end of file
diff --git a/diffusion_policy/real_world/rtde_interpolation_controller.py b/diffusion_policy/real_world/rtde_interpolation_controller.py
new file mode 100644
index 0000000..c1bca0c
--- /dev/null
+++ b/diffusion_policy/real_world/rtde_interpolation_controller.py
@@ -0,0 +1,361 @@
+import os
+import time
+import enum
+import multiprocessing as mp
+from multiprocessing.managers import SharedMemoryManager
+import scipy.interpolate as si
+import scipy.spatial.transform as st
+import numpy as np
+from rtde_control import RTDEControlInterface
+from rtde_receive import RTDEReceiveInterface
+from diffusion_policy.shared_memory.shared_memory_queue import (
+    SharedMemoryQueue, Empty)
+from diffusion_policy.shared_memory.shared_memory_ring_buffer import SharedMemoryRingBuffer
+from diffusion_policy.common.pose_trajectory_interpolator import PoseTrajectoryInterpolator
+
+class Command(enum.Enum):
+    STOP = 0
+    SERVOL = 1
+    SCHEDULE_WAYPOINT = 2
+
+
+class RTDEInterpolationController(mp.Process):
+    """
+    To ensure sending commnd to the robot with predictable latency
+    this controller need its seperate process (due to python GIL)
+    """
+
+
+    def __init__(self,
+            shm_manager: SharedMemoryManager, 
+            robot_ip, 
+            frequency=125, 
+            lookahead_time=0.1, 
+            gain=300,
+            max_pos_speed=0.25, # 5% of max speed
+            max_rot_speed=0.16, # 5% of max speed
+            launch_timeout=3,
+            tcp_offset_pose=None,
+            payload_mass=None,
+            payload_cog=None,
+            joints_init=None,
+            joints_init_speed=1.05,
+            soft_real_time=False,
+            verbose=False,
+            receive_keys=None,
+            get_max_k=128,
+            ):
+        """
+        frequency: CB2=125, UR3e=500
+        lookahead_time: [0.03, 0.2]s smoothens the trajectory with this lookahead time
+        gain: [100, 2000] proportional gain for following target position
+        max_pos_speed: m/s
+        max_rot_speed: rad/s
+        tcp_offset_pose: 6d pose
+        payload_mass: float
+        payload_cog: 3d position, center of gravity
+        soft_real_time: enables round-robbin scheduling and real-time priority
+            requires running scripts/rtprio_setup.sh before hand.
+
+        """
+        # verify
+        assert 0 < frequency <= 500
+        assert 0.03 <= lookahead_time <= 0.2
+        assert 100 <= gain <= 2000
+        assert 0 < max_pos_speed
+        assert 0 < max_rot_speed
+        if tcp_offset_pose is not None:
+            tcp_offset_pose = np.array(tcp_offset_pose)
+            assert tcp_offset_pose.shape == (6,)
+        if payload_mass is not None:
+            assert 0 <= payload_mass <= 5
+        if payload_cog is not None:
+            payload_cog = np.array(payload_cog)
+            assert payload_cog.shape == (3,)
+            assert payload_mass is not None
+        if joints_init is not None:
+            joints_init = np.array(joints_init)
+            assert joints_init.shape == (6,)
+
+        super().__init__(name="RTDEPositionalController")
+        self.robot_ip = robot_ip
+        self.frequency = frequency
+        self.lookahead_time = lookahead_time
+        self.gain = gain
+        self.max_pos_speed = max_pos_speed
+        self.max_rot_speed = max_rot_speed
+        self.launch_timeout = launch_timeout
+        self.tcp_offset_pose = tcp_offset_pose
+        self.payload_mass = payload_mass
+        self.payload_cog = payload_cog
+        self.joints_init = joints_init
+        self.joints_init_speed = joints_init_speed
+        self.soft_real_time = soft_real_time
+        self.verbose = verbose
+
+        # build input queue
+        example = {
+            'cmd': Command.SERVOL.value,
+            'target_pose': np.zeros((6,), dtype=np.float64),
+            'duration': 0.0,
+            'target_time': 0.0
+        }
+        input_queue = SharedMemoryQueue.create_from_examples(
+            shm_manager=shm_manager,
+            examples=example,
+            buffer_size=256
+        )
+
+        # build ring buffer
+        if receive_keys is None:
+            receive_keys = [
+                'ActualTCPPose',
+                'ActualTCPSpeed',
+                'ActualQ',
+                'ActualQd',
+
+                'TargetTCPPose',
+                'TargetTCPSpeed',
+                'TargetQ',
+                'TargetQd'
+            ]
+        rtde_r = RTDEReceiveInterface(hostname=robot_ip)
+        example = dict()
+        for key in receive_keys:
+            example[key] = np.array(getattr(rtde_r, 'get'+key)())
+        example['robot_receive_timestamp'] = time.time()
+        ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+            shm_manager=shm_manager,
+            examples=example,
+            get_max_k=get_max_k,
+            get_time_budget=0.2,
+            put_desired_frequency=frequency
+        )
+
+        self.ready_event = mp.Event()
+        self.input_queue = input_queue
+        self.ring_buffer = ring_buffer
+        self.receive_keys = receive_keys
+    
+    # ========= launch method ===========
+    def start(self, wait=True):
+        super().start()
+        if wait:
+            self.start_wait()
+        if self.verbose:
+            print(f"[RTDEPositionalController] Controller process spawned at {self.pid}")
+
+    def stop(self, wait=True):
+        message = {
+            'cmd': Command.STOP.value
+        }
+        self.input_queue.put(message)
+        if wait:
+            self.stop_wait()
+
+    def start_wait(self):
+        self.ready_event.wait(self.launch_timeout)
+        assert self.is_alive()
+    
+    def stop_wait(self):
+        self.join()
+    
+    @property
+    def is_ready(self):
+        return self.ready_event.is_set()
+
+    # ========= context manager ===========
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+        
+    # ========= command methods ============
+    def servoL(self, pose, duration=0.1):
+        """
+        duration: desired time to reach pose
+        """
+        assert self.is_alive()
+        assert(duration >= (1/self.frequency))
+        pose = np.array(pose)
+        assert pose.shape == (6,)
+
+        message = {
+            'cmd': Command.SERVOL.value,
+            'target_pose': pose,
+            'duration': duration
+        }
+        self.input_queue.put(message)
+    
+    def schedule_waypoint(self, pose, target_time):
+        assert target_time > time.time()
+        pose = np.array(pose)
+        assert pose.shape == (6,)
+
+        message = {
+            'cmd': Command.SCHEDULE_WAYPOINT.value,
+            'target_pose': pose,
+            'target_time': target_time
+        }
+        self.input_queue.put(message)
+
+    # ========= receive APIs =============
+    def get_state(self, k=None, out=None):
+        if k is None:
+            return self.ring_buffer.get(out=out)
+        else:
+            return self.ring_buffer.get_last_k(k=k,out=out)
+    
+    def get_all_state(self):
+        return self.ring_buffer.get_all()
+    
+    # ========= main loop in process ============
+    def run(self):
+        # enable soft real-time
+        if self.soft_real_time:
+            os.sched_setscheduler(
+                0, os.SCHED_RR, os.sched_param(20))
+
+        # start rtde
+        robot_ip = self.robot_ip
+        rtde_c = RTDEControlInterface(hostname=robot_ip)
+        rtde_r = RTDEReceiveInterface(hostname=robot_ip)
+
+        try:
+            if self.verbose:
+                print(f"[RTDEPositionalController] Connect to robot: {robot_ip}")
+
+            # set parameters
+            if self.tcp_offset_pose is not None:
+                rtde_c.setTcp(self.tcp_offset_pose)
+            if self.payload_mass is not None:
+                if self.payload_cog is not None:
+                    assert rtde_c.setPayload(self.payload_mass, self.payload_cog)
+                else:
+                    assert rtde_c.setPayload(self.payload_mass)
+            
+            # init pose
+            if self.joints_init is not None:
+                assert rtde_c.moveJ(self.joints_init, self.joints_init_speed, 1.4)
+
+            # main loop
+            dt = 1. / self.frequency
+            curr_pose = rtde_r.getActualTCPPose()
+            # use monotonic time to make sure the control loop never go backward
+            curr_t = time.monotonic()
+            last_waypoint_time = curr_t
+            pose_interp = PoseTrajectoryInterpolator(
+                times=[curr_t],
+                poses=[curr_pose]
+            )
+            
+            iter_idx = 0
+            keep_running = True
+            while keep_running:
+                # start control iteration
+                t_start = rtde_c.initPeriod()
+
+                # send command to robot
+                t_now = time.monotonic()
+                # diff = t_now - pose_interp.times[-1]
+                # if diff > 0:
+                #     print('extrapolate', diff)
+                pose_command = pose_interp(t_now)
+                vel = 0.5
+                acc = 0.5
+                assert rtde_c.servoL(pose_command, 
+                    vel, acc, # dummy, not used by ur5
+                    dt, 
+                    self.lookahead_time, 
+                    self.gain)
+                
+                # update robot state
+                state = dict()
+                for key in self.receive_keys:
+                    state[key] = np.array(getattr(rtde_r, 'get'+key)())
+                state['robot_receive_timestamp'] = time.time()
+                self.ring_buffer.put(state)
+
+                # fetch command from queue
+                try:
+                    commands = self.input_queue.get_all()
+                    n_cmd = len(commands['cmd'])
+                except Empty:
+                    n_cmd = 0
+
+                # execute commands
+                for i in range(n_cmd):
+                    command = dict()
+                    for key, value in commands.items():
+                        command[key] = value[i]
+                    cmd = command['cmd']
+
+                    if cmd == Command.STOP.value:
+                        keep_running = False
+                        # stop immediately, ignore later commands
+                        break
+                    elif cmd == Command.SERVOL.value:
+                        # since curr_pose always lag behind curr_target_pose
+                        # if we start the next interpolation with curr_pose
+                        # the command robot receive will have discontinouity 
+                        # and cause jittery robot behavior.
+                        target_pose = command['target_pose']
+                        duration = float(command['duration'])
+                        curr_time = t_now + dt
+                        t_insert = curr_time + duration
+                        pose_interp = pose_interp.drive_to_waypoint(
+                            pose=target_pose,
+                            time=t_insert,
+                            curr_time=curr_time,
+                            max_pos_speed=self.max_pos_speed,
+                            max_rot_speed=self.max_rot_speed
+                        )
+                        last_waypoint_time = t_insert
+                        if self.verbose:
+                            print("[RTDEPositionalController] New pose target:{} duration:{}s".format(
+                                target_pose, duration))
+                    elif cmd == Command.SCHEDULE_WAYPOINT.value:
+                        target_pose = command['target_pose']
+                        target_time = float(command['target_time'])
+                        # translate global time to monotonic time
+                        target_time = time.monotonic() - time.time() + target_time
+                        curr_time = t_now + dt
+                        pose_interp = pose_interp.schedule_waypoint(
+                            pose=target_pose,
+                            time=target_time,
+                            max_pos_speed=self.max_pos_speed,
+                            max_rot_speed=self.max_rot_speed,
+                            curr_time=curr_time,
+                            last_waypoint_time=last_waypoint_time
+                        )
+                        last_waypoint_time = target_time
+                    else:
+                        keep_running = False
+                        break
+
+                # regulate frequency
+                rtde_c.waitPeriod(t_start)
+
+                # first loop successful, ready to receive command
+                if iter_idx == 0:
+                    self.ready_event.set()
+                iter_idx += 1
+
+                if self.verbose:
+                    print(f"[RTDEPositionalController] Actual frequency {1/(time.perf_counter() - t_start)}")
+
+        finally:
+            # manditory cleanup
+            # decelerate
+            rtde_c.servoStop()
+
+            # terminate
+            rtde_c.stopScript()
+            rtde_c.disconnect()
+            rtde_r.disconnect()
+            self.ready_event.set()
+
+            if self.verbose:
+                print(f"[RTDEPositionalController] Disconnected from robot: {robot_ip}")
diff --git a/diffusion_policy/real_world/single_realsense.py b/diffusion_policy/real_world/single_realsense.py
new file mode 100644
index 0000000..7a8443b
--- /dev/null
+++ b/diffusion_policy/real_world/single_realsense.py
@@ -0,0 +1,480 @@
+from typing import Optional, Callable, Dict
+import os
+import enum
+import time
+import json
+import numpy as np
+import pyrealsense2 as rs
+import multiprocessing as mp
+import cv2
+from threadpoolctl import threadpool_limits
+from multiprocessing.managers import SharedMemoryManager
+from diffusion_policy.common.timestamp_accumulator import get_accumulate_timestamp_idxs
+from diffusion_policy.shared_memory.shared_ndarray import SharedNDArray
+from diffusion_policy.shared_memory.shared_memory_ring_buffer import SharedMemoryRingBuffer
+from diffusion_policy.shared_memory.shared_memory_queue import SharedMemoryQueue, Full, Empty
+from diffusion_policy.real_world.video_recorder import VideoRecorder
+
+class Command(enum.Enum):
+    SET_COLOR_OPTION = 0
+    SET_DEPTH_OPTION = 1
+    START_RECORDING = 2
+    STOP_RECORDING = 3
+    RESTART_PUT = 4
+
+class SingleRealsense(mp.Process):
+    MAX_PATH_LENGTH = 4096 # linux path has a limit of 4096 bytes
+
+    def __init__(
+            self, 
+            shm_manager: SharedMemoryManager,
+            serial_number,
+            resolution=(1280,720),
+            capture_fps=30,
+            put_fps=None,
+            put_downsample=True,
+            record_fps=None,
+            enable_color=True,
+            enable_depth=False,
+            enable_infrared=False,
+            get_max_k=30,
+            advanced_mode_config=None,
+            transform: Optional[Callable[[Dict], Dict]] = None,
+            vis_transform: Optional[Callable[[Dict], Dict]] = None,
+            recording_transform: Optional[Callable[[Dict], Dict]] = None,
+            video_recorder: Optional[VideoRecorder] = None,
+            verbose=False
+        ):
+        super().__init__()
+
+        if put_fps is None:
+            put_fps = capture_fps
+        if record_fps is None:
+            record_fps = capture_fps
+
+        # create ring buffer
+        resolution = tuple(resolution)
+        shape = resolution[::-1]
+        examples = dict()
+        if enable_color:
+            examples['color'] = np.empty(
+                shape=shape+(3,), dtype=np.uint8)
+        if enable_depth:
+            examples['depth'] = np.empty(
+                shape=shape, dtype=np.uint16)
+        if enable_infrared:
+            examples['infrared'] = np.empty(
+                shape=shape, dtype=np.uint8)
+        examples['camera_capture_timestamp'] = 0.0
+        examples['camera_receive_timestamp'] = 0.0
+        examples['timestamp'] = 0.0
+        examples['step_idx'] = 0
+
+        vis_ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+            shm_manager=shm_manager,
+            examples=examples if vis_transform is None 
+                else vis_transform(dict(examples)),
+            get_max_k=1,
+            get_time_budget=0.2,
+            put_desired_frequency=capture_fps
+        )
+
+        ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+            shm_manager=shm_manager,
+            examples=examples if transform is None
+                else transform(dict(examples)),
+            get_max_k=get_max_k,
+            get_time_budget=0.2,
+            put_desired_frequency=put_fps
+        )
+
+        # create command queue
+        examples = {
+            'cmd': Command.SET_COLOR_OPTION.value,
+            'option_enum': rs.option.exposure.value,
+            'option_value': 0.0,
+            'video_path': np.array('a'*self.MAX_PATH_LENGTH),
+            'recording_start_time': 0.0,
+            'put_start_time': 0.0
+        }
+
+        command_queue = SharedMemoryQueue.create_from_examples(
+            shm_manager=shm_manager,
+            examples=examples,
+            buffer_size=128
+        )
+
+        # create shared array for intrinsics
+        intrinsics_array = SharedNDArray.create_from_shape(
+                mem_mgr=shm_manager,
+                shape=(7,),
+                dtype=np.float64)
+        intrinsics_array.get()[:] = 0
+
+        # create video recorder
+        if video_recorder is None:
+            # realsense uses bgr24 pixel format
+            # default thread_type to FRAEM
+            # i.e. each frame uses one core
+            # instead of all cores working on all frames.
+            # this prevents CPU over-subpscription and
+            # improves performance significantly
+            video_recorder = VideoRecorder.create_h264(
+                fps=record_fps, 
+                codec='h264',
+                input_pix_fmt='bgr24', 
+                crf=18,
+                thread_type='FRAME',
+                thread_count=1)
+
+        # copied variables
+        self.serial_number = serial_number
+        self.resolution = resolution
+        self.capture_fps = capture_fps
+        self.put_fps = put_fps
+        self.put_downsample = put_downsample
+        self.record_fps = record_fps
+        self.enable_color = enable_color
+        self.enable_depth = enable_depth
+        self.enable_infrared = enable_infrared
+        self.advanced_mode_config = advanced_mode_config
+        self.transform = transform
+        self.vis_transform = vis_transform
+        self.recording_transform = recording_transform
+        self.video_recorder = video_recorder
+        self.verbose = verbose
+        self.put_start_time = None
+
+        # shared variables
+        self.stop_event = mp.Event()
+        self.ready_event = mp.Event()
+        self.ring_buffer = ring_buffer
+        self.vis_ring_buffer = vis_ring_buffer
+        self.command_queue = command_queue
+        self.intrinsics_array = intrinsics_array
+    
+    @staticmethod
+    def get_connected_devices_serial():
+        serials = list()
+        for d in rs.context().devices:
+            if d.get_info(rs.camera_info.name).lower() != 'platform camera':
+                serial = d.get_info(rs.camera_info.serial_number)
+                product_line = d.get_info(rs.camera_info.product_line)
+                if product_line == 'D400':
+                    # only works with D400 series
+                    serials.append(serial)
+        serials = sorted(serials)
+        return serials
+
+    # ========= context manager ===========
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    # ========= user API ===========
+    def start(self, wait=True, put_start_time=None):
+        self.put_start_time = put_start_time
+        super().start()
+        if wait:
+            self.start_wait()
+    
+    def stop(self, wait=True):
+        self.stop_event.set()
+        if wait:
+            self.end_wait()
+
+    def start_wait(self):
+        self.ready_event.wait()
+    
+    def end_wait(self):
+        self.join()
+
+    @property
+    def is_ready(self):
+        return self.ready_event.is_set()
+
+    def get(self, k=None, out=None):
+        if k is None:
+            return self.ring_buffer.get(out=out)
+        else:
+            return self.ring_buffer.get_last_k(k, out=out)
+    
+    def get_vis(self, out=None):
+        return self.vis_ring_buffer.get(out=out)
+    
+    # ========= user API ===========
+    def set_color_option(self, option: rs.option, value: float):
+        self.command_queue.put({
+            'cmd': Command.SET_COLOR_OPTION.value,
+            'option_enum': option.value,
+            'option_value': value
+        })
+    
+    def set_exposure(self, exposure=None, gain=None):
+        """
+        exposure: (1, 10000) 100us unit. (0.1 ms, 1/10000s)
+        gain: (0, 128)
+        """
+
+        if exposure is None and gain is None:
+            # auto exposure
+            self.set_color_option(rs.option.enable_auto_exposure, 1.0)
+        else:
+            # manual exposure
+            self.set_color_option(rs.option.enable_auto_exposure, 0.0)
+            if exposure is not None:
+                self.set_color_option(rs.option.exposure, exposure)
+            if gain is not None:
+                self.set_color_option(rs.option.gain, gain)
+    
+    def set_white_balance(self, white_balance=None):
+        if white_balance is None:
+            self.set_color_option(rs.option.enable_auto_white_balance, 1.0)
+        else:
+            self.set_color_option(rs.option.enable_auto_white_balance, 0.0)
+            self.set_color_option(rs.option.white_balance, white_balance)
+
+    def get_intrinsics(self):
+        assert self.ready_event.is_set()
+        fx, fy, ppx, ppy = self.intrinsics_array.get()[:4]
+        mat = np.eye(3)
+        mat[0,0] = fx
+        mat[1,1] = fy
+        mat[0,2] = ppx
+        mat[1,2] = ppy
+        return mat
+
+    def get_depth_scale(self):
+        assert self.ready_event.is_set()
+        scale = self.intrinsics_array.get()[-1]
+        return scale
+    
+    def start_recording(self, video_path: str, start_time: float=-1):
+        assert self.enable_color
+
+        path_len = len(video_path.encode('utf-8'))
+        if path_len > self.MAX_PATH_LENGTH:
+            raise RuntimeError('video_path too long.')
+        self.command_queue.put({
+            'cmd': Command.START_RECORDING.value,
+            'video_path': video_path,
+            'recording_start_time': start_time
+        })
+        
+    def stop_recording(self):
+        self.command_queue.put({
+            'cmd': Command.STOP_RECORDING.value
+        })
+    
+    def restart_put(self, start_time):
+        self.command_queue.put({
+            'cmd': Command.RESTART_PUT.value,
+            'put_start_time': start_time
+        })
+     
+    # ========= interval API ===========
+    def run(self):
+        # limit threads
+        threadpool_limits(1)
+        cv2.setNumThreads(1)
+
+        w, h = self.resolution
+        fps = self.capture_fps
+        align = rs.align(rs.stream.color)
+        # Enable the streams from all the intel realsense devices
+        rs_config = rs.config()
+        if self.enable_color:
+            rs_config.enable_stream(rs.stream.color, 
+                w, h, rs.format.bgr8, fps)
+        if self.enable_depth:
+            rs_config.enable_stream(rs.stream.depth, 
+                w, h, rs.format.z16, fps)
+        if self.enable_infrared:
+            rs_config.enable_stream(rs.stream.infrared,
+                w, h, rs.format.y8, fps)
+        
+        try:
+            rs_config.enable_device(self.serial_number)
+
+            # start pipeline
+            pipeline = rs.pipeline()
+            pipeline_profile = pipeline.start(rs_config)
+
+            # report global time
+            # https://github.com/IntelRealSense/librealsense/pull/3909
+            d = pipeline_profile.get_device().first_color_sensor()
+            d.set_option(rs.option.global_time_enabled, 1)
+
+            # setup advanced mode
+            if self.advanced_mode_config is not None:
+                json_text = json.dumps(self.advanced_mode_config)
+                device = pipeline_profile.get_device()
+                advanced_mode = rs.rs400_advanced_mode(device)
+                advanced_mode.load_json(json_text)
+
+            # get
+            color_stream = pipeline_profile.get_stream(rs.stream.color)
+            intr = color_stream.as_video_stream_profile().get_intrinsics()
+            order = ['fx', 'fy', 'ppx', 'ppy', 'height', 'width']
+            for i, name in enumerate(order):
+                self.intrinsics_array.get()[i] = getattr(intr, name)
+
+            if self.enable_depth:
+                depth_sensor = pipeline_profile.get_device().first_depth_sensor()
+                depth_scale = depth_sensor.get_depth_scale()
+                self.intrinsics_array.get()[-1] = depth_scale
+            
+            # one-time setup (intrinsics etc, ignore for now)
+            if self.verbose:
+                print(f'[SingleRealsense {self.serial_number}] Main loop started.')
+
+            # put frequency regulation
+            put_idx = None
+            put_start_time = self.put_start_time
+            if put_start_time is None:
+                put_start_time = time.time()
+
+            iter_idx = 0
+            t_start = time.time()
+            while not self.stop_event.is_set():
+                # wait for frames to come in
+                frameset = pipeline.wait_for_frames()
+                receive_time = time.time()
+                # align frames to color
+                frameset = align.process(frameset)
+
+                # grab data
+                data = dict()
+                data['camera_receive_timestamp'] = receive_time
+                # realsense report in ms
+                data['camera_capture_timestamp'] = frameset.get_timestamp() / 1000
+                if self.enable_color:
+                    color_frame = frameset.get_color_frame()
+                    data['color'] = np.asarray(color_frame.get_data())
+                    t = color_frame.get_timestamp() / 1000
+                    data['camera_capture_timestamp'] = t
+                    # print('device', time.time() - t)
+                    # print(color_frame.get_frame_timestamp_domain())
+                if self.enable_depth:
+                    data['depth'] = np.asarray(
+                        frameset.get_depth_frame().get_data())
+                if self.enable_infrared:
+                    data['infrared'] = np.asarray(
+                        frameset.get_infrared_frame().get_data())
+                
+                # apply transform
+                put_data = data
+                if self.transform is not None:
+                    put_data = self.transform(dict(data))
+
+                if self.put_downsample:                
+                    # put frequency regulation
+                    local_idxs, global_idxs, put_idx \
+                        = get_accumulate_timestamp_idxs(
+                            timestamps=[receive_time],
+                            start_time=put_start_time,
+                            dt=1/self.put_fps,
+                            # this is non in first iteration
+                            # and then replaced with a concrete number
+                            next_global_idx=put_idx,
+                            # continue to pump frames even if not started.
+                            # start_time is simply used to align timestamps.
+                            allow_negative=True
+                        )
+
+                    for step_idx in global_idxs:
+                        put_data['step_idx'] = step_idx
+                        # put_data['timestamp'] = put_start_time + step_idx / self.put_fps
+                        put_data['timestamp'] = receive_time
+                        # print(step_idx, data['timestamp'])
+                        self.ring_buffer.put(put_data, wait=False)
+                else:
+                    step_idx = int((receive_time - put_start_time) * self.put_fps)
+                    put_data['step_idx'] = step_idx
+                    put_data['timestamp'] = receive_time
+                    self.ring_buffer.put(put_data, wait=False)
+
+                # signal ready
+                if iter_idx == 0:
+                    self.ready_event.set()
+                
+                # put to vis
+                vis_data = data
+                if self.vis_transform == self.transform:
+                    vis_data = put_data
+                elif self.vis_transform is not None:
+                    vis_data = self.vis_transform(dict(data))
+                self.vis_ring_buffer.put(vis_data, wait=False)
+                
+                # record frame
+                rec_data = data
+                if self.recording_transform == self.transform:
+                    rec_data = put_data
+                elif self.recording_transform is not None:
+                    rec_data = self.recording_transform(dict(data))
+
+                if self.video_recorder.is_ready():
+                    self.video_recorder.write_frame(rec_data['color'], 
+                        frame_time=receive_time)
+
+                # perf
+                t_end = time.time()
+                duration = t_end - t_start
+                frequency = np.round(1 / duration, 1)
+                t_start = t_end
+                if self.verbose:
+                    print(f'[SingleRealsense {self.serial_number}] FPS {frequency}')
+
+                # fetch command from queue
+                try:
+                    commands = self.command_queue.get_all()
+                    n_cmd = len(commands['cmd'])
+                except Empty:
+                    n_cmd = 0
+
+                # execute commands
+                for i in range(n_cmd):
+                    command = dict()
+                    for key, value in commands.items():
+                        command[key] = value[i]
+                    cmd = command['cmd']
+                    if cmd == Command.SET_COLOR_OPTION.value:
+                        sensor = pipeline_profile.get_device().first_color_sensor()
+                        option = rs.option(command['option_enum'])
+                        value = float(command['option_value'])
+                        sensor.set_option(option, value)
+                        # print('auto', sensor.get_option(rs.option.enable_auto_exposure))
+                        # print('exposure', sensor.get_option(rs.option.exposure))
+                        # print('gain', sensor.get_option(rs.option.gain))
+                    elif cmd == Command.SET_DEPTH_OPTION.value:
+                        sensor = pipeline_profile.get_device().first_depth_sensor()
+                        option = rs.option(command['option_enum'])
+                        value = float(command['option_value'])
+                        sensor.set_option(option, value)
+                    elif cmd == Command.START_RECORDING.value:
+                        video_path = str(command['video_path'])
+                        start_time = command['recording_start_time']
+                        if start_time < 0:
+                            start_time = None
+                        self.video_recorder.start(video_path, start_time=start_time)
+                    elif cmd == Command.STOP_RECORDING.value:
+                        self.video_recorder.stop()
+                        # stop need to flush all in-flight frames to disk, which might take longer than dt.
+                        # soft-reset put to drop frames to prevent ring buffer overflow.
+                        put_idx = None
+                    elif cmd == Command.RESTART_PUT.value:
+                        put_idx = None
+                        put_start_time = command['put_start_time']
+                        # self.ring_buffer.clear()
+
+                iter_idx += 1
+        finally:
+            self.video_recorder.stop()
+            rs_config.disable_all_streams()
+            self.ready_event.set()
+        
+        if self.verbose:
+            print(f'[SingleRealsense {self.serial_number}] Exiting worker process.')
diff --git a/diffusion_policy/real_world/spacemouse.py b/diffusion_policy/real_world/spacemouse.py
new file mode 100644
index 0000000..ddf7c60
--- /dev/null
+++ b/diffusion_policy/real_world/spacemouse.py
@@ -0,0 +1,108 @@
+from spnav import spnav_open, spnav_poll_event, spnav_close, SpnavMotionEvent, SpnavButtonEvent
+from threading import Thread, Event
+from collections import defaultdict
+import numpy as np
+import time
+
+
+class Spacemouse(Thread):
+    def __init__(self, max_value=500, deadzone=(0,0,0,0,0,0), dtype=np.float32):
+        """
+        Continously listen to 3D connecion space naviagtor events
+        and update the latest state.
+
+        max_value: {300, 500} 300 for wired version and 500 for wireless
+        deadzone: [0,1], number or tuple, axis with value lower than this value will stay at 0
+        
+        front
+        z
+        ^   _
+        |  (O) space mouse
+        |
+        *----->x right
+        y
+        """
+        if np.issubdtype(type(deadzone), np.number):
+            deadzone = np.full(6, fill_value=deadzone, dtype=dtype)
+        else:
+            deadzone = np.array(deadzone, dtype=dtype)
+        assert (deadzone >= 0).all()
+
+        super().__init__()
+        self.stop_event = Event()
+        self.max_value = max_value
+        self.dtype = dtype
+        self.deadzone = deadzone
+        self.motion_event = SpnavMotionEvent([0,0,0], [0,0,0], 0)
+        self.button_state = defaultdict(lambda: False)
+        self.tx_zup_spnav = np.array([
+            [0,0,-1],
+            [1,0,0],
+            [0,1,0]
+        ], dtype=dtype)
+
+    def get_motion_state(self):
+        me = self.motion_event
+        state = np.array(me.translation + me.rotation, 
+            dtype=self.dtype) / self.max_value
+        is_dead = (-self.deadzone < state) & (state < self.deadzone)
+        state[is_dead] = 0
+        return state
+    
+    def get_motion_state_transformed(self):
+        """
+        Return in right-handed coordinate
+        z
+        *------>y right
+        |   _
+        |  (O) space mouse
+        v
+        x
+        back
+
+        """
+        state = self.get_motion_state()
+        tf_state = np.zeros_like(state)
+        tf_state[:3] = self.tx_zup_spnav @ state[:3]
+        tf_state[3:] = self.tx_zup_spnav @ state[3:]
+        return tf_state
+
+    def is_button_pressed(self, button_id):
+        return self.button_state[button_id]
+
+    def stop(self):
+        self.stop_event.set()
+        self.join()
+
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    def run(self):
+        spnav_open()
+        try:
+            while not self.stop_event.is_set():
+                event = spnav_poll_event()
+                if isinstance(event, SpnavMotionEvent):
+                    self.motion_event = event
+                elif isinstance(event, SpnavButtonEvent):
+                    self.button_state[event.bnum] = event.press
+                else:
+                    time.sleep(1/200)
+        finally:
+            spnav_close()
+
+
+def test():
+    with Spacemouse(deadzone=0.3) as sm:
+        for i in range(2000):
+            # print(sm.get_motion_state())
+            print(sm.get_motion_state_transformed())
+            print(sm.is_button_pressed(0))
+            time.sleep(1/100)
+
+if __name__ == '__main__':
+    test()
diff --git a/diffusion_policy/real_world/spacemouse_shared_memory.py b/diffusion_policy/real_world/spacemouse_shared_memory.py
new file mode 100644
index 0000000..163877f
--- /dev/null
+++ b/diffusion_policy/real_world/spacemouse_shared_memory.py
@@ -0,0 +1,160 @@
+import multiprocessing as mp
+import numpy as np
+import time
+from spnav import spnav_open, spnav_poll_event, spnav_close, SpnavMotionEvent, SpnavButtonEvent
+from diffusion_policy.shared_memory.shared_memory_ring_buffer import SharedMemoryRingBuffer
+
+class Spacemouse(mp.Process):
+    def __init__(self, 
+            shm_manager, 
+            get_max_k=30, 
+            frequency=200,
+            max_value=500, 
+            deadzone=(0,0,0,0,0,0), 
+            dtype=np.float32,
+            n_buttons=2,
+            ):
+        """
+        Continously listen to 3D connecion space naviagtor events
+        and update the latest state.
+
+        max_value: {300, 500} 300 for wired version and 500 for wireless
+        deadzone: [0,1], number or tuple, axis with value lower than this value will stay at 0
+        
+        front
+        z
+        ^   _
+        |  (O) space mouse
+        |
+        *----->x right
+        y
+        """
+        super().__init__()
+        if np.issubdtype(type(deadzone), np.number):
+            deadzone = np.full(6, fill_value=deadzone, dtype=dtype)
+        else:
+            deadzone = np.array(deadzone, dtype=dtype)
+        assert (deadzone >= 0).all()
+
+        # copied varaibles
+        self.frequency = frequency
+        self.max_value = max_value
+        self.dtype = dtype
+        self.deadzone = deadzone
+        self.n_buttons = n_buttons
+        # self.motion_event = SpnavMotionEvent([0,0,0], [0,0,0], 0)
+        # self.button_state = defaultdict(lambda: False)
+        self.tx_zup_spnav = np.array([
+            [0,0,-1],
+            [1,0,0],
+            [0,1,0]
+        ], dtype=dtype)
+
+        example = {
+            # 3 translation, 3 rotation, 1 period
+            'motion_event': np.zeros((7,), dtype=np.int64),
+            # left and right button
+            'button_state': np.zeros((n_buttons,), dtype=bool),
+            'receive_timestamp': time.time()
+        }
+        ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+            shm_manager=shm_manager, 
+            examples=example,
+            get_max_k=get_max_k,
+            get_time_budget=0.2,
+            put_desired_frequency=frequency
+        )
+
+        # shared variables
+        self.ready_event = mp.Event()
+        self.stop_event = mp.Event()
+        self.ring_buffer = ring_buffer
+
+    # ======= get state APIs ==========
+
+    def get_motion_state(self):
+        state = self.ring_buffer.get()
+        state = np.array(state['motion_event'][:6], 
+            dtype=self.dtype) / self.max_value
+        is_dead = (-self.deadzone < state) & (state < self.deadzone)
+        state[is_dead] = 0
+        return state
+    
+    def get_motion_state_transformed(self):
+        """
+        Return in right-handed coordinate
+        z
+        *------>y right
+        |   _
+        |  (O) space mouse
+        v
+        x
+        back
+
+        """
+        state = self.get_motion_state()
+        tf_state = np.zeros_like(state)
+        tf_state[:3] = self.tx_zup_spnav @ state[:3]
+        tf_state[3:] = self.tx_zup_spnav @ state[3:]
+        return tf_state
+
+    def get_button_state(self):
+        state = self.ring_buffer.get()
+        return state['button_state']
+    
+    def is_button_pressed(self, button_id):
+        return self.get_button_state()[button_id]
+    
+    #========== start stop API ===========
+
+    def start(self, wait=True):
+        super().start()
+        if wait:
+            self.ready_event.wait()
+    
+    def stop(self, wait=True):
+        self.stop_event.set()
+        if wait:
+            self.join()
+    
+    def __enter__(self):
+        self.start()
+        return self
+    
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    # ========= main loop ==========
+    def run(self):
+        spnav_open()
+        try:
+            motion_event = np.zeros((7,), dtype=np.int64)
+            button_state = np.zeros((self.n_buttons,), dtype=bool)
+            # send one message immediately so client can start reading
+            self.ring_buffer.put({
+                'motion_event': motion_event,
+                'button_state': button_state,
+                'receive_timestamp': time.time()
+            })
+            self.ready_event.set()
+
+            while not self.stop_event.is_set():
+                event = spnav_poll_event()
+                receive_timestamp = time.time()
+                if isinstance(event, SpnavMotionEvent):
+                    motion_event[:3] = event.translation
+                    motion_event[3:6] = event.rotation
+                    motion_event[6] = event.period
+                elif isinstance(event, SpnavButtonEvent):
+                    button_state[event.bnum] = event.press
+                else:
+                    # finish integrating this round of events
+                    # before sending over
+                    self.ring_buffer.put({
+                        'motion_event': motion_event,
+                        'button_state': button_state,
+                        'receive_timestamp': receive_timestamp
+                    })
+                    time.sleep(1/self.frequency)
+        finally:
+            spnav_close()
diff --git a/diffusion_policy/real_world/video_recorder.py b/diffusion_policy/real_world/video_recorder.py
new file mode 100644
index 0000000..28ea8e0
--- /dev/null
+++ b/diffusion_policy/real_world/video_recorder.py
@@ -0,0 +1,161 @@
+from typing import Optional, Callable, Generator
+import numpy as np
+import av
+from diffusion_policy.common.timestamp_accumulator import get_accumulate_timestamp_idxs
+
+def read_video(
+        video_path: str, dt: float,
+        video_start_time: float=0.0, 
+        start_time: float=0.0,
+        img_transform: Optional[Callable[[np.ndarray], np.ndarray]]=None,
+        thread_type: str="AUTO",
+        thread_count: int=0,
+        max_pad_frames: int=10
+        ) -> Generator[np.ndarray, None, None]:
+    frame = None
+    with av.open(video_path) as container:
+        stream = container.streams.video[0]
+        stream.thread_type = thread_type
+        stream.thread_count = thread_count
+        next_global_idx = 0
+        for frame_idx, frame in enumerate(container.decode(stream)):
+            # The presentation time in seconds for this frame.
+            since_start = frame.time
+            frame_time = video_start_time + since_start
+            local_idxs, global_idxs, next_global_idx \
+                = get_accumulate_timestamp_idxs(
+                # only one timestamp
+                timestamps=[frame_time],
+                start_time=start_time,
+                dt=dt,
+                next_global_idx=next_global_idx
+            )
+            if len(global_idxs) > 0:
+                array = frame.to_ndarray(format='rgb24')
+                img = array
+                if img_transform is not None:
+                    img = img_transform(array)
+                for global_idx in global_idxs:
+                    yield img
+    # repeat last frame max_pad_frames times
+    array = frame.to_ndarray(format='rgb24')
+    img = array
+    if img_transform is not None:
+        img = img_transform(array)
+    for i in range(max_pad_frames):
+        yield img
+
+class VideoRecorder:
+    def __init__(self,
+        fps,
+        codec,
+        input_pix_fmt,
+        # options for codec
+        **kwargs
+    ):
+        """
+        input_pix_fmt: rgb24, bgr24 see https://github.com/PyAV-Org/PyAV/blob/bc4eedd5fc474e0f25b22102b2771fe5a42bb1c7/av/video/frame.pyx#L352
+        """
+
+        self.fps = fps
+        self.codec = codec
+        self.input_pix_fmt = input_pix_fmt
+        self.kwargs = kwargs
+        # runtime set
+        self._reset_state()
+    
+    def _reset_state(self):
+        self.container = None
+        self.stream = None
+        self.shape = None
+        self.dtype = None
+        self.start_time = None
+        self.next_global_idx = 0
+    
+    @classmethod
+    def create_h264(cls,
+            fps,
+            codec='h264',
+            input_pix_fmt='rgb24',
+            output_pix_fmt='yuv420p',
+            crf=18,
+            profile='high',
+            **kwargs
+        ):
+        obj = cls(
+            fps=fps,
+            codec=codec,
+            input_pix_fmt=input_pix_fmt,
+            pix_fmt=output_pix_fmt,
+            options={
+                'crf': str(crf),
+                'profile': profile
+            },
+            **kwargs
+        )
+        return obj
+
+
+    def __del__(self):
+        self.stop()
+
+    def is_ready(self):
+        return self.stream is not None
+
+    def start(self, file_path, start_time=None):
+        if self.is_ready():
+            # if still recording, stop first and start anew.
+            self.stop()
+
+        self.container = av.open(file_path, mode='w')
+        self.stream = self.container.add_stream(self.codec, rate=self.fps)
+        codec_context = self.stream.codec_context
+        for k, v in self.kwargs.items():
+            setattr(codec_context, k, v)
+        self.start_time = start_time
+    
+    def write_frame(self, img: np.ndarray, frame_time=None):
+        if not self.is_ready():
+            raise RuntimeError('Must run start() before writing!')
+        
+        n_repeats = 1
+        if self.start_time is not None:
+            local_idxs, global_idxs, self.next_global_idx \
+                = get_accumulate_timestamp_idxs(
+                # only one timestamp
+                timestamps=[frame_time],
+                start_time=self.start_time,
+                dt=1/self.fps,
+                next_global_idx=self.next_global_idx
+            )
+            # number of apperance means repeats
+            n_repeats = len(local_idxs)
+        
+        if self.shape is None:
+            self.shape = img.shape
+            self.dtype = img.dtype
+            h,w,c = img.shape
+            self.stream.width = w
+            self.stream.height = h
+        assert img.shape == self.shape
+        assert img.dtype == self.dtype
+
+        frame = av.VideoFrame.from_ndarray(
+            img, format=self.input_pix_fmt)
+        for i in range(n_repeats):
+            for packet in self.stream.encode(frame):
+                self.container.mux(packet)
+
+    def stop(self):
+        if not self.is_ready():
+            return
+
+        # Flush stream
+        for packet in self.stream.encode():
+            self.container.mux(packet)
+
+        # Close the file
+        self.container.close()
+
+        # reset runtime parameters
+        self._reset_state()
diff --git a/diffusion_policy/scripts/bet_blockpush_conversion.py b/diffusion_policy/scripts/bet_blockpush_conversion.py
new file mode 100644
index 0000000..c23b487
--- /dev/null
+++ b/diffusion_policy/scripts/bet_blockpush_conversion.py
@@ -0,0 +1,46 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+
+import os
+import click
+import pathlib
+import numpy as np
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+
+@click.command()
+@click.option('-i', '--input', required=True, help='input dir contains npy files')
+@click.option('-o', '--output', required=True, help='output zarr path')
+@click.option('--abs_action', is_flag=True, default=False)
+def main(input, output, abs_action):
+    data_directory = pathlib.Path(input)
+    observations = np.load(
+        data_directory / "multimodal_push_observations.npy"
+    )
+    actions = np.load(data_directory / "multimodal_push_actions.npy")
+    masks = np.load(data_directory / "multimodal_push_masks.npy")
+
+    buffer = ReplayBuffer.create_empty_numpy()
+    for i in range(len(masks)):
+        eps_len = int(masks[i].sum())
+        obs = observations[i,:eps_len].astype(np.float32)
+        action = actions[i,:eps_len].astype(np.float32)
+        if abs_action:
+            prev_eef_target = obs[:,8:10]
+            next_eef_target = prev_eef_target + action
+            action = next_eef_target
+        data = {                              
+            'obs': obs,
+            'action': action
+        }
+        buffer.add_episode(data)
+
+    buffer.save_to_path(zarr_path=output, chunk_length=-1)
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/blockpush_abs_conversion.py b/diffusion_policy/scripts/blockpush_abs_conversion.py
new file mode 100644
index 0000000..6c11e42
--- /dev/null
+++ b/diffusion_policy/scripts/blockpush_abs_conversion.py
@@ -0,0 +1,29 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import os
+import click
+import pathlib
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+
+
+@click.command()
+@click.option('-i', '--input', required=True)
+@click.option('-o', '--output', required=True)
+@click.option('-t', '--target_eef_idx', default=8, type=int)
+def main(input, output, target_eef_idx):
+    buffer = ReplayBuffer.copy_from_path(input)
+    obs = buffer['obs']
+    action = buffer['action']
+    prev_eef_target = obs[:,target_eef_idx:target_eef_idx+action.shape[1]]
+    next_eef_target = prev_eef_target + action
+    action[:] = next_eef_target
+    buffer.save_to_path(zarr_path=output, chunk_length=-1)
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/episode_lengths.py b/diffusion_policy/scripts/episode_lengths.py
new file mode 100644
index 0000000..7d80336
--- /dev/null
+++ b/diffusion_policy/scripts/episode_lengths.py
@@ -0,0 +1,29 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import click
+import numpy as np
+import json
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+
+@click.command()
+@click.option('--input', '-i', required=True)
+@click.option('--dt', default=0.1, type=float)
+def main(input, dt):
+    buffer = ReplayBuffer.create_from_path(input)
+    lenghts = buffer.episode_lengths
+    durations = lenghts * dt
+    result = {
+        'duration/mean': np.mean(durations)
+    }
+
+    text = json.dumps(result, indent=2)
+    print(text)
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/generate_bet_blockpush.py b/diffusion_policy/scripts/generate_bet_blockpush.py
new file mode 100644
index 0000000..e271227
--- /dev/null
+++ b/diffusion_policy/scripts/generate_bet_blockpush.py
@@ -0,0 +1,64 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+
+import os
+import click
+import pathlib
+import numpy as np
+from tqdm import tqdm
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+from tf_agents.environments.wrappers import TimeLimit
+from tf_agents.environments.gym_wrapper import GymWrapper
+from tf_agents.trajectories.time_step import StepType
+from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal
+from diffusion_policy.env.block_pushing.block_pushing import BlockPush
+from diffusion_policy.env.block_pushing.oracles.multimodal_push_oracle import MultimodalOrientedPushOracle
+
+@click.command()
+@click.option('-o', '--output', required=True)
+@click.option('-n', '--n_episodes', default=1000)
+@click.option('-c', '--chunk_length', default=-1)
+def main(output, n_episodes, chunk_length):
+
+    buffer = ReplayBuffer.create_empty_numpy()
+    env = TimeLimit(GymWrapper(BlockPushMultimodal()), duration=350)
+    for i in tqdm(range(n_episodes)):
+        print(i)
+        obs_history = list()
+        action_history = list()
+
+        env.seed(i)
+        policy = MultimodalOrientedPushOracle(env)
+        time_step = env.reset()
+        policy_state = policy.get_initial_state(1)
+        while True:
+            action_step = policy.action(time_step, policy_state)
+            obs = np.concatenate(list(time_step.observation.values()), axis=-1)
+            action = action_step.action
+            obs_history.append(obs)
+            action_history.append(action)
+
+            if time_step.step_type == 2:
+                break
+
+            # state = env.wrapped_env().gym.get_pybullet_state()
+            time_step = env.step(action)
+        obs_history = np.array(obs_history)
+        action_history = np.array(action_history)
+
+        episode = {
+            'obs': obs_history,
+            'action': action_history
+        }
+        buffer.add_episode(episode)
+    
+    buffer.save_to_path(output, chunk_length=chunk_length)
+        
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/real_dataset_conversion.py b/diffusion_policy/scripts/real_dataset_conversion.py
new file mode 100644
index 0000000..3e4b0f9
--- /dev/null
+++ b/diffusion_policy/scripts/real_dataset_conversion.py
@@ -0,0 +1,60 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import os
+import click
+import pathlib
+import zarr
+import cv2
+import threadpoolctl
+from diffusion_policy.real_world.real_data_conversion import real_data_to_replay_buffer
+
+@click.command()
+@click.option('--input', '-i',  required=True)
+@click.option('--output', '-o', default=None)
+@click.option('--resolution', '-r', default='640x480')
+@click.option('--n_decoding_threads', '-nd', default=-1, type=int)
+@click.option('--n_encoding_threads', '-ne', default=-1, type=int)
+def main(input, output, resolution, n_decoding_threads, n_encoding_threads):
+    out_resolution = tuple(int(x) for x in resolution.split('x'))
+    input = pathlib.Path(os.path.expanduser(input))
+    in_zarr_path = input.joinpath('replay_buffer.zarr')
+    in_video_dir = input.joinpath('videos')
+    assert in_zarr_path.is_dir()
+    assert in_video_dir.is_dir()
+    if output is None:
+        output = input.joinpath(resolution + '.zarr.zip')
+    else:
+        output = pathlib.Path(os.path.expanduser(output))
+
+    if output.exists():
+        click.confirm('Output path already exists! Overrite?', abort=True)
+
+    cv2.setNumThreads(1)
+    with threadpoolctl.threadpool_limits(1):
+        replay_buffer = real_data_to_replay_buffer(
+            dataset_path=str(input),
+            out_resolutions=out_resolution,
+            n_decoding_threads=n_decoding_threads,
+            n_encoding_threads=n_encoding_threads
+        )
+    
+    print('Saving to disk')
+    if output.suffix == '.zip':
+        with zarr.ZipStore(output) as zip_store:
+            replay_buffer.save_to_store(
+                store=zip_store
+            )
+    else:
+        with zarr.DirectoryStore(output) as store:
+            replay_buffer.save_to_store(
+                store=store
+            )
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/real_pusht_metrics.py b/diffusion_policy/scripts/real_pusht_metrics.py
new file mode 100644
index 0000000..cdb05ef
--- /dev/null
+++ b/diffusion_policy/scripts/real_pusht_metrics.py
@@ -0,0 +1,151 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import os
+import click
+import av
+import cv2
+import collections
+import multiprocessing as mp
+import numpy as np
+from tqdm import tqdm
+import threadpoolctl
+from matplotlib import pyplot as plt
+import json
+
+def get_t_mask(img, hsv_ranges=None):
+    if hsv_ranges is None:
+        hsv_ranges = [
+            [0,255],
+            [130,216],
+            [150,230]
+        ]
+    hsv_img = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
+    mask = np.ones(img.shape[:2], dtype=bool)
+    for c in range(len(hsv_ranges)):
+        l, h = hsv_ranges[c]
+        mask &= (l <= hsv_img[...,c])
+        mask &= (hsv_img[...,c] <= h)
+    return mask
+
+def get_mask_metrics(target_mask, mask):
+    total = np.sum(target_mask)
+    i = np.sum(target_mask & mask)
+    u = np.sum(target_mask | mask)
+    iou = i / u
+    coverage = i / total
+    result = {
+        'iou': iou,
+        'coverage': coverage
+    }
+    return result
+
+def get_video_metrics(video_path, target_mask, use_tqdm=True):
+    threadpoolctl.threadpool_limits(1)
+    cv2.setNumThreads(1)
+
+    metrics = collections.defaultdict(list)
+    with av.open(video_path) as container:
+        stream = container.streams.video[0]
+        iterator = None
+        if use_tqdm:
+            iterator = tqdm(container.decode(stream), total=stream.frames)
+        else:
+            iterator = container.decode(stream)
+        for frame in iterator:
+            img = frame.to_ndarray(format='rgb24')
+            mask = get_t_mask(img)
+            metric = get_mask_metrics(
+                target_mask=target_mask, mask=mask)
+            for k, v in metric.items():
+                metrics[k].append(v)
+    return metrics
+
+def worker(x):
+    return get_video_metrics(*x)
+
+@click.command()
+@click.option(
+    '--reference', '-r', required=True, 
+    help="Reference video whose last frame will define goal.")
+@click.option(
+    '--input', '-i', required=True,
+    help='Dataset path to evaluate.')
+@click.option(
+    '--camera_idx', '-ci', default=0, type=int,
+    help="Camera index to compute metrics")
+@click.option('--n_workers', '-n', default=20, type=int)
+def main(reference, input, camera_idx, n_workers):
+    # read last frame of the reference video to get target mask
+    last_frame = None
+    with av.open(reference) as container:
+        stream = container.streams.video[0]
+        for frame in tqdm(
+                container.decode(stream), 
+                total=stream.frames):
+            last_frame = frame
+
+    last_img = last_frame.to_ndarray(format='rgb24')
+    target_mask = get_t_mask(last_img)
+
+    # path = '/home/ubuntu/dev/diffusion_policy/data/pusht_real/eval_20230109/diffusion_hybrid_ep136/videos/4/0.mp4'
+    # last_frame = None
+    # with av.open(path) as container:
+    #     stream = container.streams.video[0]
+    #     for frame in tqdm(
+    #             container.decode(stream), 
+    #             total=stream.frames):
+    #         last_frame = frame
+    # img = last_frame.to_ndarray(format='rgb24')
+    # mask = get_t_mask(img)
+
+    # get metrics for each episode
+    episode_video_path_map = dict()
+    input_dir = pathlib.Path(input)
+    input_video_dir = input_dir.joinpath('videos')
+    for vid_dir in input_video_dir.glob("*/"):
+        episode_idx = int(vid_dir.stem)
+        video_path = vid_dir.joinpath(f'{camera_idx}.mp4')
+        if video_path.exists():
+            episode_video_path_map[episode_idx] = str(video_path.absolute())
+
+    episode_idxs = sorted(episode_video_path_map.keys())
+    print(f"Found video for following episodes: {episode_idxs}")
+
+    # run
+    with mp.Pool(n_workers) as pool:
+        args = list()
+        for idx in episode_idxs:
+            args.append((episode_video_path_map[idx], target_mask))
+        results = pool.map(worker, args)
+    episode_metric_map = dict()
+    for idx, result in zip(episode_idxs, results):
+        episode_metric_map[idx] = result
+
+    # aggregate metrics
+    agg_map = collections.defaultdict(list)
+    for idx, metric in episode_metric_map.items():
+        for key, value in metric.items():
+            agg_map['max/'+key].append(np.max(value))
+            agg_map['last/'+key].append(value[-1])
+
+    final_metric = dict()
+    for key, value in agg_map.items():
+        final_metric[key] = np.mean(value)
+
+    # save metrics
+    print('Saving metrics!')
+    with input_dir.joinpath('metrics_agg.json').open('w') as f:
+        json.dump(final_metric, f, sort_keys=True, indent=2)
+    
+    with input_dir.joinpath('metrics_raw.json').open('w') as f:
+        json.dump(episode_metric_map, f, sort_keys=True, indent=2)
+    print('Done!')
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/real_pusht_successrate.py b/diffusion_policy/scripts/real_pusht_successrate.py
new file mode 100644
index 0000000..da447f9
--- /dev/null
+++ b/diffusion_policy/scripts/real_pusht_successrate.py
@@ -0,0 +1,69 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import os
+import click
+import collections
+import numpy as np
+from tqdm import tqdm
+import json
+
+@click.command()
+@click.option(
+    '--reference', '-r', required=True,
+    help='Reference metrics_raw.json from demostration dataset.'
+)
+@click.option(
+    '--input', '-i', required=True,
+    help='Data search path'
+)
+def main(reference, input):
+    # compute the min last metric for demo metrics
+    demo_metrics = json.load(open(reference, 'r'))
+    demo_min_metrics = collections.defaultdict(lambda:float('inf'))
+    for episode_idx, metrics in demo_metrics.items():
+        for key, value in metrics.items():
+            last_value = value[-1]
+            demo_min_metrics[key] = min(demo_min_metrics[key], last_value)
+    print(demo_min_metrics)
+
+    # find all metric 
+    name = 'metrics_raw.json'
+    search_dir = pathlib.Path(input)
+    success_rate_map = dict()
+    for json_path in search_dir.glob('**/'+name):
+        rel_path = json_path.relative_to(search_dir)
+        rel_name = str(rel_path.parent)
+        this_metrics = json.load(json_path.open('r'))
+        metric_success_idxs = collections.defaultdict(list)
+        metric_failure_idxs = collections.defaultdict(list)
+        for episode_idx, metrics in this_metrics.items():
+            for key, value in metrics.items():
+                last_value = value[-1]
+                # print(episode_idx, key, last_value)
+                demo_min = demo_min_metrics[key]
+                if last_value >= demo_min:
+                    # success
+                    metric_success_idxs[key].append(episode_idx)
+                else:
+                    metric_failure_idxs[key].append(episode_idx)
+                # in case of no success
+                _ = metric_success_idxs[key]
+                _ = metric_failure_idxs[key]
+        metric_success_rate = dict()
+        n_episodes = len(this_metrics)
+        for key, value in metric_success_idxs.items():
+            metric_success_rate[key] = len(value) / n_episodes
+        # metric_success_rate['failured_idxs'] = metric_failure_idxs
+        success_rate_map[rel_name] = metric_success_rate
+    
+    text = json.dumps(success_rate_map, indent=2)
+    print(text)
+
+if __name__ == '__main__':
+    main()
diff --git a/diffusion_policy/scripts/robomimic_dataset_action_comparison.py b/diffusion_policy/scripts/robomimic_dataset_action_comparison.py
new file mode 100644
index 0000000..43114de
--- /dev/null
+++ b/diffusion_policy/scripts/robomimic_dataset_action_comparison.py
@@ -0,0 +1,51 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import os
+import click
+import pathlib
+import h5py
+import numpy as np
+from tqdm import tqdm
+from scipy.spatial.transform import Rotation
+
+def read_all_actions(hdf5_file, metric_skip_steps=1):
+    n_demos = len(hdf5_file['data'])
+    all_actions = list()
+    for i in tqdm(range(n_demos)):
+        actions = hdf5_file[f'data/demo_{i}/actions'][:]
+        all_actions.append(actions[metric_skip_steps:])
+    all_actions = np.concatenate(all_actions, axis=0)
+    return all_actions
+
+
+@click.command()
+@click.option('-i', '--input', required=True, help='input hdf5 path')
+@click.option('-o', '--output', required=True, help='output hdf5 path. Parent directory must exist')
+def main(input, output):
+    # process inputs
+    input = pathlib.Path(input).expanduser()
+    assert input.is_file()
+    output = pathlib.Path(output).expanduser()
+    assert output.is_file()
+
+    input_file = h5py.File(str(input), 'r')
+    output_file = h5py.File(str(output), 'r')
+
+    input_all_actions = read_all_actions(input_file)
+    output_all_actions = read_all_actions(output_file)
+    pos_dist = np.linalg.norm(input_all_actions[:,:3] - output_all_actions[:,:3], axis=-1)
+    rot_dist = (Rotation.from_rotvec(input_all_actions[:,3:6]
+        ) * Rotation.from_rotvec(output_all_actions[:,3:6]).inv()
+        ).magnitude()
+
+    print(f'max pos dist: {pos_dist.max()}')
+    print(f'max rot dist: {rot_dist.max()}')
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/scripts/robomimic_dataset_conversion.py b/diffusion_policy/scripts/robomimic_dataset_conversion.py
new file mode 100644
index 0000000..5496d55
--- /dev/null
+++ b/diffusion_policy/scripts/robomimic_dataset_conversion.py
@@ -0,0 +1,103 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+
+import multiprocessing
+import os
+import shutil
+import click
+import pathlib
+import h5py
+from tqdm import tqdm
+import collections
+import pickle
+from diffusion_policy.common.robomimic_util import RobomimicAbsoluteActionConverter
+
+def worker(x):
+    path, idx, do_eval = x
+    converter = RobomimicAbsoluteActionConverter(path)
+    if do_eval:
+        abs_actions, info = converter.convert_and_eval_idx(idx)
+    else:
+        abs_actions = converter.convert_idx(idx)
+        info = dict()
+    return abs_actions, info
+
+@click.command()
+@click.option('-i', '--input', required=True, help='input hdf5 path')
+@click.option('-o', '--output', required=True, help='output hdf5 path. Parent directory must exist')
+@click.option('-e', '--eval_dir', default=None, help='directory to output evaluation metrics')
+@click.option('-n', '--num_workers', default=None, type=int)
+def main(input, output, eval_dir, num_workers):
+    # process inputs
+    input = pathlib.Path(input).expanduser()
+    assert input.is_file()
+    output = pathlib.Path(output).expanduser()
+    assert output.parent.is_dir()
+    assert not output.is_dir()
+
+    do_eval = False
+    if eval_dir is not None:
+        eval_dir = pathlib.Path(eval_dir).expanduser()
+        assert eval_dir.parent.exists()
+        do_eval = True
+    
+    converter = RobomimicAbsoluteActionConverter(input)
+
+    # run
+    with multiprocessing.Pool(num_workers) as pool:
+        results = pool.map(worker, [(input, i, do_eval) for i in range(len(converter))])
+    
+    # save output
+    print('Copying hdf5')
+    shutil.copy(str(input), str(output))
+
+    # modify action
+    with h5py.File(output, 'r+') as out_file:
+        for i in tqdm(range(len(converter)), desc="Writing to output"):
+            abs_actions, info = results[i]
+            demo = out_file[f'data/demo_{i}']
+            demo['actions'][:] = abs_actions
+    
+    # save eval
+    if do_eval:
+        eval_dir.mkdir(parents=False, exist_ok=True)
+
+        print("Writing error_stats.pkl")
+        infos = [info for _, info in results]
+        pickle.dump(infos, eval_dir.joinpath('error_stats.pkl').open('wb'))
+
+        print("Generating visualization")
+        metrics = ['pos', 'rot']
+        metrics_dicts = dict()
+        for m in metrics:
+            metrics_dicts[m] = collections.defaultdict(list)
+
+        for i in range(len(infos)):
+            info = infos[i]
+            for k, v in info.items():
+                for m in metrics:
+                    metrics_dicts[m][k].append(v[m])
+
+        from matplotlib import pyplot as plt
+        plt.switch_backend('PDF')
+
+        fig, ax = plt.subplots(1, len(metrics))
+        for i in range(len(metrics)):
+            axis = ax[i]
+            data = metrics_dicts[metrics[i]]
+            for key, value in data.items():
+                axis.plot(value, label=key)
+            axis.legend()
+            axis.set_title(metrics[i])
+        fig.set_size_inches(10,4)
+        fig.savefig(str(eval_dir.joinpath('error_stats.pdf')))
+        fig.savefig(str(eval_dir.joinpath('error_stats.png')))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/shared_memory/shared_memory_queue.py b/diffusion_policy/shared_memory/shared_memory_queue.py
new file mode 100644
index 0000000..a4f3163
--- /dev/null
+++ b/diffusion_policy/shared_memory/shared_memory_queue.py
@@ -0,0 +1,187 @@
+from typing import Dict, List, Union
+import numbers
+from queue import (Empty, Full)
+from multiprocessing.managers import SharedMemoryManager
+import numpy as np
+from diffusion_policy.shared_memory.shared_memory_util import ArraySpec, SharedAtomicCounter
+from diffusion_policy.shared_memory.shared_ndarray import SharedNDArray
+
+
+class SharedMemoryQueue:
+    """
+    A Lock-Free FIFO Shared Memory Data Structure.
+    Stores a sequence of dict of numpy arrays.
+    """
+
+    def __init__(self,
+            shm_manager: SharedMemoryManager,
+            array_specs: List[ArraySpec],
+            buffer_size: int
+        ):
+
+        # create atomic counter
+        write_counter = SharedAtomicCounter(shm_manager)
+        read_counter = SharedAtomicCounter(shm_manager)
+        
+        # allocate shared memory
+        shared_arrays = dict()
+        for spec in array_specs:
+            key = spec.name
+            assert key not in shared_arrays
+            array = SharedNDArray.create_from_shape(
+                mem_mgr=shm_manager,
+                shape=(buffer_size,) + tuple(spec.shape),
+                dtype=spec.dtype)
+            shared_arrays[key] = array
+        
+        self.buffer_size = buffer_size
+        self.array_specs = array_specs
+        self.write_counter = write_counter
+        self.read_counter = read_counter
+        self.shared_arrays = shared_arrays
+    
+    @classmethod
+    def create_from_examples(cls, 
+            shm_manager: SharedMemoryManager,
+            examples: Dict[str, Union[np.ndarray, numbers.Number]], 
+            buffer_size: int
+            ):
+        specs = list()
+        for key, value in examples.items():
+            shape = None
+            dtype = None
+            if isinstance(value, np.ndarray):
+                shape = value.shape
+                dtype = value.dtype
+                assert dtype != np.dtype('O')
+            elif isinstance(value, numbers.Number):
+                shape = tuple()
+                dtype = np.dtype(type(value))
+            else:
+                raise TypeError(f'Unsupported type {type(value)}')
+
+            spec = ArraySpec(
+                name=key,
+                shape=shape,
+                dtype=dtype
+            )
+            specs.append(spec)
+
+        obj = cls(
+            shm_manager=shm_manager,
+            array_specs=specs,
+            buffer_size=buffer_size
+            )
+        return obj
+    
+    def qsize(self):
+        read_count = self.read_counter.load()
+        write_count = self.write_counter.load()
+        n_data = write_count - read_count
+        return n_data
+    
+    def empty(self):
+        n_data = self.qsize()
+        return n_data <= 0
+    
+    def clear(self):
+        self.read_counter.store(self.write_counter.load())
+    
+    def put(self, data: Dict[str, Union[np.ndarray, numbers.Number]]):
+        read_count = self.read_counter.load()
+        write_count = self.write_counter.load()
+        n_data = write_count - read_count
+        if n_data >= self.buffer_size:
+            raise Full()
+        
+        next_idx = write_count % self.buffer_size
+
+        # write to shared memory
+        for key, value in data.items():
+            arr: np.ndarray
+            arr = self.shared_arrays[key].get()
+            if isinstance(value, np.ndarray):
+                arr[next_idx] = value
+            else:
+                arr[next_idx] = np.array(value, dtype=arr.dtype)
+
+        # update idx
+        self.write_counter.add(1)
+    
+    def get(self, out=None) -> Dict[str, np.ndarray]:
+        write_count = self.write_counter.load()
+        read_count = self.read_counter.load()
+        n_data = write_count - read_count
+        if n_data <= 0:
+            raise Empty()
+
+        if out is None:
+            out = self._allocate_empty()
+
+        next_idx = read_count % self.buffer_size
+        for key, value in self.shared_arrays.items():
+            arr = value.get()
+            np.copyto(out[key], arr[next_idx])
+        
+        # update idx
+        self.read_counter.add(1)
+        return out
+
+    def get_k(self, k, out=None) -> Dict[str, np.ndarray]:
+        write_count = self.write_counter.load()
+        read_count = self.read_counter.load()
+        n_data = write_count - read_count
+        if n_data <= 0:
+            raise Empty()
+        assert k <= n_data
+
+        out = self._get_k_impl(k, read_count, out=out)
+        self.read_counter.add(k)
+        return out
+
+    def get_all(self, out=None) -> Dict[str, np.ndarray]:
+        write_count = self.write_counter.load()
+        read_count = self.read_counter.load()
+        n_data = write_count - read_count
+        if n_data <= 0:
+            raise Empty()
+
+        out = self._get_k_impl(n_data, read_count, out=out)
+        self.read_counter.add(n_data)
+        return out
+    
+    def _get_k_impl(self, k, read_count, out=None) -> Dict[str, np.ndarray]:
+        if out is None:
+            out = self._allocate_empty(k)
+
+        curr_idx = read_count % self.buffer_size
+        for key, value in self.shared_arrays.items():
+            arr = value.get()
+            target = out[key]
+
+            start = curr_idx
+            end = min(start + k, self.buffer_size)
+            target_start = 0
+            target_end = (end - start)
+            target[target_start: target_end] = arr[start:end]
+
+            remainder = k - (end - start)
+            if remainder > 0:
+                # wrap around
+                start = 0
+                end = start + remainder
+                target_start = target_end
+                target_end = k
+                target[target_start: target_end] = arr[start:end]
+
+        return out
+    
+    def _allocate_empty(self, k=None):
+        result = dict()
+        for spec in self.array_specs:
+            shape = spec.shape
+            if k is not None:
+                shape = (k,) + shape
+            result[spec.name] = np.empty(
+                shape=shape, dtype=spec.dtype)
+        return result
diff --git a/diffusion_policy/shared_memory/shared_memory_ring_buffer.py b/diffusion_policy/shared_memory/shared_memory_ring_buffer.py
new file mode 100644
index 0000000..5d59e89
--- /dev/null
+++ b/diffusion_policy/shared_memory/shared_memory_ring_buffer.py
@@ -0,0 +1,219 @@
+from typing import Dict, List, Union
+
+from queue import Empty
+import numbers
+import time
+from multiprocessing.managers import SharedMemoryManager
+import numpy as np
+
+from diffusion_policy.shared_memory.shared_ndarray import SharedNDArray
+from diffusion_policy.shared_memory.shared_memory_util import ArraySpec, SharedAtomicCounter
+
+class SharedMemoryRingBuffer:
+    """
+    A Lock-Free FILO Shared Memory Data Structure.
+    Stores a sequence of dict of numpy arrays.
+    """
+
+    def __init__(self, 
+            shm_manager: SharedMemoryManager,
+            array_specs: List[ArraySpec],
+            get_max_k: int,
+            get_time_budget: float,
+            put_desired_frequency: float,
+            safety_margin: float=1.5
+        ):
+        """
+        shm_manager: Manages the life cycle of share memories 
+            across processes. Remeber to run .start() before passing.
+        array_specs: Name, shape and type of arrays for a single time step.
+        get_max_k: The maxmum number of items can be queried at once.
+        get_time_budget: The maxmum amount of time spent copying data from 
+            shared memory to local memory. Increase this number for larger arrays.
+        put_desired_frequency: The maximum frequency that .put() can be called.
+            This influces the buffer size.
+        """
+
+        # create atomic counter
+        counter = SharedAtomicCounter(shm_manager)
+
+        # compute buffer size
+        # At any given moment, the past get_max_k items should never 
+        # be touched (to be read freely). Assuming the reading is reading
+        # these k items, which takes maximum of get_time_budget seconds,
+        # we need enough empty slots to make sure put_desired_frequency Hz
+        # of put can be sustaied.
+        buffer_size = int(np.ceil(
+            put_desired_frequency * get_time_budget 
+            * safety_margin)) + get_max_k
+
+        # allocate shared memory
+        shared_arrays = dict()
+        for spec in array_specs:
+            key = spec.name
+            assert key not in shared_arrays
+            array = SharedNDArray.create_from_shape(
+                mem_mgr=shm_manager,
+                shape=(buffer_size,) + tuple(spec.shape),
+                dtype=spec.dtype)
+            shared_arrays[key] = array
+        
+        # allocate timestamp array
+        timestamp_array = SharedNDArray.create_from_shape(
+            mem_mgr=shm_manager, 
+            shape=(buffer_size,),
+            dtype=np.float64)
+        timestamp_array.get()[:] = -np.inf
+        
+        self.buffer_size = buffer_size
+        self.array_specs = array_specs
+        self.counter = counter
+        self.shared_arrays = shared_arrays
+        self.timestamp_array = timestamp_array
+        self.get_time_budget = get_time_budget
+        self.get_max_k = get_max_k
+        self.put_desired_frequency = put_desired_frequency
+
+    
+    @property
+    def count(self):
+        return self.counter.load()
+    
+    @classmethod
+    def create_from_examples(cls, 
+            shm_manager: SharedMemoryManager,
+            examples: Dict[str, Union[np.ndarray, numbers.Number]], 
+            get_max_k: int=32,
+            get_time_budget: float=0.01,
+            put_desired_frequency: float=60
+            ):
+        specs = list()
+        for key, value in examples.items():
+            shape = None
+            dtype = None
+            if isinstance(value, np.ndarray):
+                shape = value.shape
+                dtype = value.dtype
+                assert dtype != np.dtype('O')
+            elif isinstance(value, numbers.Number):
+                shape = tuple()
+                dtype = np.dtype(type(value))
+            else:
+                raise TypeError(f'Unsupported type {type(value)}')
+
+            spec = ArraySpec(
+                name=key,
+                shape=shape,
+                dtype=dtype
+            )
+            specs.append(spec)
+
+        obj = cls(
+            shm_manager=shm_manager,
+            array_specs=specs,
+            get_max_k=get_max_k,
+            get_time_budget=get_time_budget,
+            put_desired_frequency=put_desired_frequency
+            )
+        return obj
+
+    def clear(self):
+        self.counter.store(0)
+    
+    def put(self, data: Dict[str, Union[np.ndarray, numbers.Number]], wait: bool=True):
+        count = self.counter.load()
+        next_idx = count % self.buffer_size
+        # Make sure the next self.get_max_k elements in the ring buffer have at least 
+        # self.get_time_budget seconds untouched after written, so that
+        # get_last_k can safely read k elements from any count location.
+        # Sanity check: when get_max_k == 1, the element pointed by next_idx
+        # should be rewritten at minimum self.get_time_budget seconds later.
+        timestamp_lookahead_idx = (next_idx + self.get_max_k - 1) % self.buffer_size
+        old_timestamp = self.timestamp_array.get()[timestamp_lookahead_idx]
+        t = time.monotonic()
+        if (t - old_timestamp) < self.get_time_budget:
+            deltat = t - old_timestamp
+            if wait:
+                # sleep the remaining time to be safe
+                time.sleep(self.get_time_budget - deltat)
+            else:
+                # throw an error
+                past_iters = self.buffer_size - self.get_max_k
+                hz = past_iters / deltat
+                raise TimeoutError(
+                    'Put executed too fast {}items/{:.4f}s ~= {}Hz'.format(
+                        past_iters, deltat,hz))
+
+        # write to shared memory
+        for key, value in data.items():
+            arr: np.ndarray
+            arr = self.shared_arrays[key].get()
+            if isinstance(value, np.ndarray):
+                arr[next_idx] = value
+            else:
+                arr[next_idx] = np.array(value, dtype=arr.dtype)
+        
+        # update timestamp
+        self.timestamp_array.get()[next_idx] = time.monotonic()
+        self.counter.add(1)
+
+    def _allocate_empty(self, k=None):
+        result = dict()
+        for spec in self.array_specs:
+            shape = spec.shape
+            if k is not None:
+                shape = (k,) + shape
+            result[spec.name] = np.empty(
+                shape=shape, dtype=spec.dtype)
+        return result
+
+    def get(self, out=None) -> Dict[str, np.ndarray]:
+        if out is None:
+            out = self._allocate_empty()
+        start_time = time.monotonic()
+        count = self.counter.load()
+        curr_idx = (count - 1) % self.buffer_size
+        for key, value in self.shared_arrays.items():
+            arr = value.get()
+            np.copyto(out[key], arr[curr_idx])
+        end_time = time.monotonic()
+        dt = end_time - start_time
+        if dt > self.get_time_budget:
+            raise TimeoutError(f'Get time out {dt} vs {self.get_time_budget}')
+        return out
+    
+    def get_last_k(self, k:int, out=None) -> Dict[str, np.ndarray]:
+        assert k <= self.get_max_k
+        if out is None:
+            out = self._allocate_empty(k)
+        start_time = time.monotonic()
+        count = self.counter.load()
+        assert k <= count
+        curr_idx = (count - 1) % self.buffer_size
+        for key, value in self.shared_arrays.items():
+            arr = value.get()
+            target = out[key]
+
+            end = curr_idx + 1
+            start = max(0, end - k)
+            target_end = k
+            target_start = target_end - (end - start)
+            target[target_start: target_end] = arr[start:end]
+
+            remainder = k - (end - start)
+            if remainder > 0:
+                # wrap around
+                end = self.buffer_size
+                start = end - remainder
+                target_start = 0
+                target_end = end - start
+                target[target_start: target_end] = arr[start:end]
+        end_time = time.monotonic()
+        dt = end_time - start_time
+        if dt > self.get_time_budget:
+            raise TimeoutError(f'Get time out {dt} vs {self.get_time_budget}')
+        return out
+
+    def get_all(self) -> Dict[str, np.ndarray]:
+        k = min(self.count, self.get_max_k)
+        return self.get_last_k(k=k)
diff --git a/diffusion_policy/shared_memory/shared_memory_util.py b/diffusion_policy/shared_memory/shared_memory_util.py
new file mode 100644
index 0000000..24ee7df
--- /dev/null
+++ b/diffusion_policy/shared_memory/shared_memory_util.py
@@ -0,0 +1,39 @@
+from typing import Tuple
+from dataclasses import dataclass
+import numpy as np
+from multiprocessing.managers import SharedMemoryManager
+from atomics import atomicview, MemoryOrder, UINT
+
+@dataclass
+class ArraySpec:
+    name: str
+    shape: Tuple[int]
+    dtype: np.dtype
+
+
+class SharedAtomicCounter:
+    def __init__(self, 
+            shm_manager: SharedMemoryManager, 
+            size :int=8 # 64bit int
+            ):
+        shm = shm_manager.SharedMemory(size=size)
+        self.shm = shm
+        self.size = size
+        self.store(0) # initialize
+
+    @property
+    def buf(self):
+        return self.shm.buf[:self.size]
+
+    def load(self) -> int:
+        with atomicview(buffer=self.buf, atype=UINT) as a: 
+            value = a.load(order=MemoryOrder.ACQUIRE)
+        return value
+    
+    def store(self, value: int):
+        with atomicview(buffer=self.buf, atype=UINT) as a:
+            a.store(value, order=MemoryOrder.RELEASE)
+    
+    def add(self, value: int):
+        with atomicview(buffer=self.buf, atype=UINT) as a:
+            a.add(value, order=MemoryOrder.ACQ_REL)
diff --git a/diffusion_policy/shared_memory/shared_ndarray.py b/diffusion_policy/shared_memory/shared_ndarray.py
new file mode 100644
index 0000000..cbe5c9f
--- /dev/null
+++ b/diffusion_policy/shared_memory/shared_ndarray.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+import multiprocessing
+import multiprocessing.synchronize
+from multiprocessing.managers import SharedMemoryManager
+from multiprocessing.shared_memory import SharedMemory
+from typing import Any, TYPE_CHECKING, Generic, Optional, Tuple, TypeVar, Union
+
+import numpy as np
+import numpy.typing as npt
+from diffusion_policy.common.nested_dict_util import (nested_dict_check, nested_dict_map)
+
+
+SharedMemoryLike = Union[str, SharedMemory]  # shared memory or name of shared memory
+SharedT = TypeVar("SharedT", bound=np.generic)
+
+
+class SharedNDArray(Generic[SharedT]):
+    """Class to keep track of and retrieve the data in a shared array
+    Attributes
+    ----------
+    shm
+        SharedMemory object containing the data of the array
+    shape
+        Shape of the NumPy array
+    dtype
+        Type of the NumPy array. Anything that may be passed to the `dtype=` argument in `np.ndarray`.
+    lock
+        (Optional) multiprocessing.Lock to manage access to the SharedNDArray. This is only created if
+        lock=True is passed to the constructor, otherwise it is set to `None`.
+    A SharedNDArray object may be created either directly with a preallocated shared memory object plus the
+    dtype and shape of the numpy array it represents:
+    >>> from multiprocessing.shared_memory import SharedMemory
+    >>> import numpy as np
+    >>> from shared_ndarray2 import SharedNDArray
+    >>> x = np.array([1, 2, 3])
+    >>> shm = SharedMemory(name="x", create=True, size=x.nbytes)
+    >>> arr = SharedNDArray(shm, x.shape, x.dtype)
+    >>> arr[:] = x[:]  # copy x into the array
+    >>> print(arr[:])
+    [1 2 3]
+    >>> shm.close()
+    >>> shm.unlink()
+    Or using a SharedMemoryManager either from an existing array or from arbitrary shape and nbytes:
+    >>> from multiprocessing.managers import SharedMemoryManager
+    >>> mem_mgr = SharedMemoryManager()
+    >>> mem_mgr.start()  # Better yet, use SharedMemoryManager context manager
+    >>> arr = SharedNDArray.from_shape(mem_mgr, x.shape, x.dtype)
+    >>> arr[:] = x[:]  # copy x into the array
+    >>> print(arr[:])
+    [1 2 3]
+    >>> # -or in one step-
+    >>> arr = SharedNDArray.from_array(mem_mgr, x)
+    >>> print(arr[:])
+    [1 2 3]
+    `SharedNDArray` does not subclass numpy.ndarray but rather generates an ndarray on-the-fly in get(),
+    which is used in __getitem__ and __setitem__. Thus to access the data and/or use any ndarray methods
+    get() or __getitem__ or __setitem__ must be used
+    >>> arr.max()  # ERROR: SharedNDArray has no `max` method.
+    Traceback (most recent call last):
+        ....
+    AttributeError: SharedNDArray object has no attribute 'max'. To access NumPy ndarray object use .get() method.
+    >>> arr.get().max()  # (or arr[:].max())  OK: This gets an ndarray on which we can operate
+    3
+    >>> y = np.zeros(3)
+    >>> y[:] = arr  # ERROR: Cannot broadcast-assign a SharedNDArray to ndarray `y`
+    Traceback (most recent call last):
+        ...
+    ValueError: setting an array element with a sequence.
+    >>> y[:] = arr[:]  # OK: This gets an ndarray that can be copied element-wise to `y`
+    >>> mem_mgr.shutdown()
+    """
+
+    shm: SharedMemory
+    # shape: Tuple[int, ...]  # is a property
+    dtype: np.dtype
+    lock: Optional[multiprocessing.synchronize.Lock]
+
+    def __init__(
+        self, shm: SharedMemoryLike, shape: Tuple[int, ...], dtype: npt.DTypeLike):
+        """Initialize a SharedNDArray object from existing shared memory, object shape, and dtype.
+        To initialize a SharedNDArray object from a memory manager and data or shape, use the `from_array()
+        or `from_shape()` classmethods.
+        Parameters
+        ----------
+        shm
+            `multiprocessing.shared_memory.SharedMemory` object or name for connecting to an existing block
+            of shared memory (using SharedMemory constructor)
+        shape
+            Shape of the NumPy array to be represented in the shared memory
+        dtype
+            Data type for the NumPy array to be represented in shared memory. Any valid argument for
+            `np.dtype` may be used as it will be converted to an actual `dtype` object.
+        lock : bool, optional
+            If True, create a multiprocessing.Lock object accessible with the `.lock` attribute, by default
+            False.  If passing the `SharedNDArray` as an argument to a `multiprocessing.Pool` function this
+            should not be used -- see this comment to a Stack Overflow question about `multiprocessing.Lock`:
+            https://stackoverflow.com/questions/25557686/python-sharing-a-lock-between-processes#comment72803059_25558333
+        Raises
+        ------
+        ValueError
+            The SharedMemory size (number of bytes) does not match the product of the shape and dtype
+            itemsize.
+        """
+        if isinstance(shm, str):
+            shm = SharedMemory(name=shm, create=False)
+        dtype = np.dtype(dtype)  # Try to convert to dtype
+        assert shm.size >= (dtype.itemsize * np.prod(shape))
+        self.shm = shm
+        self.dtype = dtype
+        self._shape: Tuple[int, ...] = shape
+
+    def __repr__(self):
+        # Like numpy's ndarray repr
+        cls_name = self.__class__.__name__
+        nspaces = len(cls_name) + 1
+        array_repr = str(self.get())
+        array_repr = array_repr.replace("\n", "\n" + " " * nspaces)
+        return f"{cls_name}({array_repr}, dtype={self.dtype})"
+
+    @classmethod
+    def create_from_array(
+        cls, mem_mgr: SharedMemoryManager, arr: npt.NDArray[SharedT]
+    ) -> SharedNDArray[SharedT]:
+        """Create a SharedNDArray from a SharedMemoryManager and an existing numpy array.
+        Parameters
+        ----------
+        mem_mgr
+            Running `multiprocessing.managers.SharedMemoryManager` instance from which to create the
+            SharedMemory for the SharedNDArray
+        arr
+            NumPy `ndarray` object to copy into the created SharedNDArray upon initialization.
+        """
+        # Simply use from_shape() to create the SharedNDArray and copy the data into it.
+        shared_arr = cls.create_from_shape(mem_mgr, arr.shape, arr.dtype)
+        shared_arr.get()[:] = arr[:]
+        return shared_arr
+
+    @classmethod
+    def create_from_shape(
+        cls, mem_mgr: SharedMemoryManager, shape: Tuple, dtype: npt.DTypeLike) -> SharedNDArray:
+        """Create a SharedNDArray directly from a SharedMemoryManager
+        Parameters
+        ----------
+        mem_mgr
+            SharedMemoryManager instance that has been started
+        shape
+            Shape of the array
+        dtype
+            Data type for the NumPy array to be represented in shared memory. Any valid argument for
+            `np.dtype` may be used as it will be converted to an actual `dtype` object.
+        """
+        dtype = np.dtype(dtype)  # Convert to dtype if possible
+        shm = mem_mgr.SharedMemory(np.prod(shape) * dtype.itemsize)
+        return cls(shm=shm, shape=shape, dtype=dtype)
+
+    @property
+    def shape(self) -> Tuple[int, ...]:
+        return self._shape
+
+
+    def get(self) -> npt.NDArray[SharedT]:
+        """Get a numpy array with access to the shared memory"""
+        return np.ndarray(self.shape, dtype=self.dtype, buffer=self.shm.buf)
+
+    def __del__(self):
+        self.shm.close()
diff --git a/diffusion_policy/workspace/base_workspace.py b/diffusion_policy/workspace/base_workspace.py
new file mode 100644
index 0000000..1352404
--- /dev/null
+++ b/diffusion_policy/workspace/base_workspace.py
@@ -0,0 +1,145 @@
+from typing import Optional
+import os
+import pathlib
+import hydra
+import copy
+from hydra.core.hydra_config import HydraConfig
+from omegaconf import OmegaConf
+import dill
+import torch
+import threading
+
+
+class BaseWorkspace:
+    include_keys = tuple()
+    exclude_keys = tuple()
+
+    def __init__(self, cfg: OmegaConf, output_dir: Optional[str]=None):
+        self.cfg = cfg
+        self._output_dir = output_dir
+        self._saving_thread = None
+
+    @property
+    def output_dir(self):
+        output_dir = self._output_dir
+        if output_dir is None:
+            output_dir = HydraConfig.get().runtime.output_dir
+        return output_dir
+    
+    def run(self):
+        """
+        Create any resource shouldn't be serialized as local variables
+        """
+        pass
+
+    def save_checkpoint(self, path=None, tag='latest', 
+            exclude_keys=None,
+            include_keys=None,
+            use_thread=True):
+        if path is None:
+            path = pathlib.Path(self.output_dir).joinpath('checkpoints', f'{tag}.ckpt')
+        else:
+            path = pathlib.Path(path)
+        if exclude_keys is None:
+            exclude_keys = tuple(self.exclude_keys)
+        if include_keys is None:
+            include_keys = tuple(self.include_keys) + ('_output_dir',)
+
+        path.parent.mkdir(parents=False, exist_ok=True)
+        payload = {
+            'cfg': self.cfg,
+            'state_dicts': dict(),
+            'pickles': dict()
+        } 
+
+        for key, value in self.__dict__.items():
+            if hasattr(value, 'state_dict') and hasattr(value, 'load_state_dict'):
+                # modules, optimizers and samplers etc
+                if key not in exclude_keys:
+                    if use_thread:
+                        payload['state_dicts'][key] = _copy_to_cpu(value.state_dict())
+                    else:
+                        payload['state_dicts'][key] = value.state_dict()
+            elif key in include_keys:
+                payload['pickles'][key] = dill.dumps(value)
+        if use_thread:
+            self._saving_thread = threading.Thread(
+                target=lambda : torch.save(payload, path.open('wb'), pickle_module=dill))
+            self._saving_thread.start()
+        else:
+            torch.save(payload, path.open('wb'), pickle_module=dill)
+        return str(path.absolute())
+    
+    def get_checkpoint_path(self, tag='latest'):
+        return pathlib.Path(self.output_dir).joinpath('checkpoints', f'{tag}.ckpt')
+
+    def load_payload(self, payload, exclude_keys=None, include_keys=None, **kwargs):
+        if exclude_keys is None:
+            exclude_keys = tuple()
+        if include_keys is None:
+            include_keys = payload['pickles'].keys()
+
+        for key, value in payload['state_dicts'].items():
+            if key not in exclude_keys:
+                self.__dict__[key].load_state_dict(value, **kwargs)
+        for key in include_keys:
+            if key in payload['pickles']:
+                self.__dict__[key] = dill.loads(payload['pickles'][key])
+    
+    def load_checkpoint(self, path=None, tag='latest',
+            exclude_keys=None, 
+            include_keys=None, 
+            **kwargs):
+        if path is None:
+            path = self.get_checkpoint_path(tag=tag)
+        else:
+            path = pathlib.Path(path)
+        payload = torch.load(path.open('rb'), pickle_module=dill, **kwargs)
+        self.load_payload(payload, 
+            exclude_keys=exclude_keys, 
+            include_keys=include_keys)
+        return payload
+    
+    @classmethod
+    def create_from_checkpoint(cls, path, 
+            exclude_keys=None, 
+            include_keys=None,
+            **kwargs):
+        payload = torch.load(open(path, 'rb'), pickle_module=dill)
+        instance = cls(payload['cfg'])
+        instance.load_payload(
+            payload=payload, 
+            exclude_keys=exclude_keys,
+            include_keys=include_keys,
+            **kwargs)
+        return instance
+
+    def save_snapshot(self, tag='latest'):
+        """
+        Quick loading and saving for reserach, saves full state of the workspace.
+
+        However, loading a snapshot assumes the code stays exactly the same.
+        Use save_checkpoint for long-term storage.
+        """
+        path = pathlib.Path(self.output_dir).joinpath('snapshots', f'{tag}.pkl')
+        path.parent.mkdir(parents=False, exist_ok=True)
+        torch.save(self, path.open('wb'), pickle_module=dill)
+        return str(path.absolute())
+    
+    @classmethod
+    def create_from_snapshot(cls, path):
+        return torch.load(open(path, 'rb'), pickle_module=dill)
+
+
+def _copy_to_cpu(x):
+    if isinstance(x, torch.Tensor):
+        return x.detach().to('cpu')
+    elif isinstance(x, dict):
+        result = dict()
+        for k, v in x.items():
+            result[k] = _copy_to_cpu(v)
+        return result
+    elif isinstance(x, list):
+        return [_copy_to_cpu(k) for k in x]
+    else:
+        return copy.deepcopy(x)
diff --git a/diffusion_policy/workspace/train_bet_lowdim_workspace.py b/diffusion_policy/workspace/train_bet_lowdim_workspace.py
new file mode 100644
index 0000000..c8909e1
--- /dev/null
+++ b/diffusion_policy/workspace/train_bet_lowdim_workspace.py
@@ -0,0 +1,293 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.bet_lowdim_policy import BETLowdimPolicy
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.model.common.normalizer import (
+    LinearNormalizer, 
+    SingleFieldLinearNormalizer
+)
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusers.training_utils import EMAModel
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+# %%
+class TrainBETLowdimWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf):
+        super().__init__(cfg)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.policy: BETLowdimPolicy
+        self.policy = hydra.utils.instantiate(cfg.policy)
+
+        # configure training state
+        self.optimizer = self.policy.get_optimizer(**cfg.optimizer)
+
+        self.global_step = 0
+        self.epoch = 0
+
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+        OmegaConf.resolve(cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseLowdimDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseLowdimDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        # set normalizer
+        normalizer = None
+        if cfg.training.enable_normalizer:
+            normalizer = dataset.get_normalizer()
+        else:
+            normalizer = LinearNormalizer()
+            normalizer['action'] = SingleFieldLinearNormalizer.create_identity()
+            normalizer['obs'] = SingleFieldLinearNormalizer.create_identity()
+
+        self.policy.set_normalizer(normalizer)
+
+        # fit action_ae (K-Means)
+        self.policy.fit_action_ae(
+                normalizer['action'].normalize(
+                    dataset.get_all_actions()))
+
+        # configure env runner
+        env_runner: BaseLowdimRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseLowdimRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.policy.to(device)
+        optimizer_to(self.optimizer, device)
+        
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss, loss_components = self.policy.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # clip grad norm
+                        torch.nn.utils.clip_grad_norm_(
+                            self.policy.state_prior.parameters(), cfg.training.grad_norm_clip
+                        )
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad(set_to_none=True)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'train_loss_offset': loss_components['offset'].item(),
+                            'train_loss_class': loss_components['class'].item()
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                self.policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(self.policy)
+                    # log all
+                    step_log.update(runner_log)
+                
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                raw_loss, loss_components = self.policy.compute_loss(batch)
+                                val_losses.append(raw_loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run sample on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = train_sampling_batch
+                        obs_dict = {'obs': batch['obs']}
+                        gt_action = batch['action']
+                        
+                        result = self.policy.predict_action(obs_dict)
+                        if cfg.pred_action_steps_only:
+                            pred_action = result['action']
+                            start = cfg.n_obs_steps - 1
+                            end = start + cfg.n_action_steps
+                            gt_action = gt_action[:,start:end]
+                        else:
+                            pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        # log
+                        step_log['train_action_mse_error'] = mse.item()
+                        # release RAM
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                self.policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainBETLowdimWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_transformer_hybrid_workspace.py b/diffusion_policy/workspace/train_diffusion_transformer_hybrid_workspace.py
new file mode 100644
index 0000000..38b3be1
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_transformer_hybrid_workspace.py
@@ -0,0 +1,295 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_transformer_hybrid_image_policy import DiffusionTransformerHybridImagePolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.model.diffusion.ema_model import EMAModel
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainDiffusionTransformerHybridWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionTransformerHybridImagePolicy = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionTransformerHybridImagePolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = self.model.get_optimizer(**cfg.optimizer)
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+        
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+                        
+                        # update ema
+                        if cfg.training.use_ema:
+                            ema.step(self.model)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                if cfg.training.use_ema:
+                    policy = self.ema_model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = dict_apply(train_sampling_batch, lambda x: x.to(device, non_blocking=True))
+                        obs_dict = batch['obs']
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        step_log['train_action_mse_error'] = mse.item()
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionTransformerHybridWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_transformer_lowdim_workspace.py b/diffusion_policy/workspace/train_diffusion_transformer_lowdim_workspace.py
new file mode 100644
index 0000000..cbfa54b
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_transformer_lowdim_workspace.py
@@ -0,0 +1,303 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_transformer_lowdim_policy import DiffusionTransformerLowdimPolicy
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+from diffusers.training_utils import EMAModel
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+# %%
+class TrainDiffusionTransformerLowdimWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf):
+        super().__init__(cfg)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionTransformerLowdimPolicy
+        self.model = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionTransformerLowdimPolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = self.model.get_optimizer(**cfg.optimizer)
+
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseLowdimDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseLowdimDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env runner
+        env_runner: BaseLowdimRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseLowdimRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+
+                        # update ema
+                        if cfg.training.use_ema:
+                            ema.step(self.model)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                if cfg.training.use_ema:
+                    policy = self.ema_model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+            
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = dict_apply(train_sampling_batch, lambda x: x.to(device, non_blocking=True))
+                        obs_dict = {'obs': batch['obs']}
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        if cfg.pred_action_steps_only:
+                            pred_action = result['action']
+                            start = cfg.n_obs_steps - 1
+                            end = start + cfg.n_action_steps
+                            gt_action = gt_action[:,start:end]
+                        else:
+                            pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        step_log['train_action_mse_error'] = mse.item()
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionTransformerLowdimWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_unet_hybrid_workspace.py b/diffusion_policy/workspace/train_diffusion_unet_hybrid_workspace.py
new file mode 100644
index 0000000..9219427
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_unet_hybrid_workspace.py
@@ -0,0 +1,296 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_unet_hybrid_image_policy import DiffusionUnetHybridImagePolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.model.diffusion.ema_model import EMAModel
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainDiffusionUnetHybridWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionUnetHybridImagePolicy = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionUnetHybridImagePolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+                        
+                        # update ema
+                        if cfg.training.use_ema:
+                            ema.step(self.model)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                if cfg.training.use_ema:
+                    policy = self.ema_model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = dict_apply(train_sampling_batch, lambda x: x.to(device, non_blocking=True))
+                        obs_dict = batch['obs']
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        step_log['train_action_mse_error'] = mse.item()
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionUnetHybridWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_unet_image_workspace.py b/diffusion_policy/workspace/train_diffusion_unet_image_workspace.py
new file mode 100644
index 0000000..d830235
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_unet_image_workspace.py
@@ -0,0 +1,300 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_unet_image_policy import DiffusionUnetImagePolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.model.diffusion.ema_model import EMAModel
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainDiffusionUnetImageWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionUnetImagePolicy = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionUnetImagePolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                if cfg.training.freeze_encoder:
+                    self.model.obs_encoder.eval()
+                    self.model.obs_encoder.requires_grad_(False)
+
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+                        
+                        # update ema
+                        if cfg.training.use_ema:
+                            ema.step(self.model)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                if cfg.training.use_ema:
+                    policy = self.ema_model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = dict_apply(train_sampling_batch, lambda x: x.to(device, non_blocking=True))
+                        obs_dict = batch['obs']
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        step_log['train_action_mse_error'] = mse.item()
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionUnetImageWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_unet_lowdim_workspace.py b/diffusion_policy/workspace/train_diffusion_unet_lowdim_workspace.py
new file mode 100644
index 0000000..617b8ae
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_unet_lowdim_workspace.py
@@ -0,0 +1,306 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import numpy as np
+import random
+import wandb
+import tqdm
+import shutil
+
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_unet_lowdim_policy import DiffusionUnetLowdimPolicy
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+from diffusers.training_utils import EMAModel
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+# %%
+class TrainDiffusionUnetLowdimWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionUnetLowdimPolicy
+        self.model = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionUnetLowdimPolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseLowdimDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseLowdimDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env runner
+        env_runner: BaseLowdimRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseLowdimRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+                        
+                        # update ema
+                        if cfg.training.use_ema:
+                            ema.step(self.model)
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+                
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                if cfg.training.use_ema:
+                    policy = self.ema_model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = train_sampling_batch
+                        obs_dict = {'obs': batch['obs']}
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        if cfg.pred_action_steps_only:
+                            pred_action = result['action']
+                            start = cfg.n_obs_steps - 1
+                            end = start + cfg.n_action_steps
+                            gt_action = gt_action[:,start:end]
+                        else:
+                            pred_action = result['action_pred']
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        # log
+                        step_log['train_action_mse_error'] = mse.item()
+                        # release RAM
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionUnetLowdimWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_diffusion_unet_video_workspace.py b/diffusion_policy/workspace/train_diffusion_unet_video_workspace.py
new file mode 100644
index 0000000..1c13092
--- /dev/null
+++ b/diffusion_policy/workspace/train_diffusion_unet_video_workspace.py
@@ -0,0 +1,235 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.diffusion_unet_video_policy import DiffusionUnetVideoPolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.model.diffusion.ema_model import EMAModel
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainDiffusionUnetVideoWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+        
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: DiffusionUnetVideoPolicy = hydra.utils.instantiate(cfg.policy)
+
+        self.ema_model: DiffusionUnetVideoPolicy = None
+        if cfg.training.use_ema:
+            self.ema_model = copy.deepcopy(self.model)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        self.model.set_normalizer(normalizer)
+        if cfg.training.use_ema:
+            self.ema_model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure ema
+        ema: EMAModel = None
+        if cfg.training.use_ema:
+            ema = hydra.utils.instantiate(
+                cfg.ema,
+                model=self.ema_model)
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        if self.ema_model is not None:
+            self.ema_model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for validation
+        val_batch = next(iter(train_dataloader))
+
+        # training loop
+        for _ in range(cfg.training.num_epochs):
+            with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                    leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                for batch in tepoch:
+                    # device transfer
+                    batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+
+                    # compute loss
+                    raw_loss = self.model.compute_loss(batch)
+                    loss = raw_loss / cfg.training.gradient_accumulate_every
+                    loss.backward()
+
+                    # step optimizer
+                    if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                        self.optimizer.step()
+                        self.optimizer.zero_grad()
+                        lr_scheduler.step()
+                    
+                    # update ema
+                    if cfg.training.use_ema:
+                        ema.step(self.model)
+
+                    # logging
+                    raw_loss_cpu = raw_loss.item()
+                    # tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                    step_log = {
+                        'train_loss': raw_loss_cpu,
+                        'global_step': self.global_step,
+                        'epoch': self.epoch,
+                        'lr': lr_scheduler.get_last_lr()[0]
+                    }
+
+                    # eval
+                    should_eval = self.global_step % cfg.training.eval_every == 0
+                    if (not cfg.training.eval_first) and (self.global_step == 0):
+                        should_eval = False
+                    if should_eval:
+                        policy = self.model
+                        if cfg.training.use_ema:
+                            policy = self.ema_model
+                        policy.eval()
+                        runner_log = env_runner.run(policy)
+                        policy.train()
+                        step_log.update(runner_log)
+
+                        # checkpointing
+                        if cfg.checkpoint.save_last_ckpt:
+                            self.save_checkpoint()
+                        if cfg.checkpoint.save_last_snapshot:
+                            self.save_snapshot()
+
+                        save_path = topk_manager.get_ckpt_path({
+                            'epoch': self.epoch,
+                            'test_score': runner_log['test/mean_score']
+                        })
+                        if save_path is not None:
+                            self.save_checkpoint(path=save_path)
+
+                    # validation
+                    should_val = self.global_step % cfg.training.val_every == 0
+                    if should_val:
+                        policy = self.model
+                        if cfg.training.use_ema:
+                            policy = self.ema_model
+                        policy.eval()
+                        with torch.no_grad():
+                            # sample trajectory from training set, and evaluate difference
+                            batch = dict_apply(val_batch, lambda x: x.to(device, non_blocking=True))
+                            obs_dict = batch['obs']
+                            gt_action = batch['action']
+                            
+                            result = policy.predict_action(obs_dict)
+                            pred_action = result['action_pred']
+                            mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                            step_log['train_action_mse_error'] = mse.item()
+                            del batch
+                            del obs_dict
+                            del gt_action
+                            del result
+                            del pred_action
+                            del mse
+                        policy.train()
+
+                    # log
+                    wandb_run.log(step_log, step=self.global_step)
+
+                    self.global_step += 1
+            self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainDiffusionUnetVideoWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_ibc_dfo_hybrid_workspace.py b/diffusion_policy/workspace/train_ibc_dfo_hybrid_workspace.py
new file mode 100644
index 0000000..1370897
--- /dev/null
+++ b/diffusion_policy/workspace/train_ibc_dfo_hybrid_workspace.py
@@ -0,0 +1,283 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.ibc_dfo_hybrid_image_policy import IbcDfoHybridImagePolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.model.diffusion.ema_model import EMAModel
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainIbcDfoHybridWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: IbcDfoHybridImagePolicy= hydra.utils.instantiate(cfg.policy)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = train_sampling_batch
+                        n_samples = cfg.training.sample_max_batch
+                        batch = dict_apply(train_sampling_batch, 
+                            lambda x: x.to(device, non_blocking=True))
+                        obs_dict = dict_apply(batch['obs'], lambda x: x[:n_samples])
+                        gt_action = batch['action']
+                        
+                        result = policy.predict_action(obs_dict)
+                        pred_action = result['action']
+                        start = cfg.n_obs_steps - 1
+                        end = start + cfg.n_action_steps
+                        gt_action = gt_action[:,start:end]
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        # log
+                        step_log['train_action_mse_error'] = mse.item()
+                        # release RAM
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainIbcDfoHybridWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_ibc_dfo_lowdim_workspace.py b/diffusion_policy/workspace/train_ibc_dfo_lowdim_workspace.py
new file mode 100644
index 0000000..fad9c67
--- /dev/null
+++ b/diffusion_policy/workspace/train_ibc_dfo_lowdim_workspace.py
@@ -0,0 +1,282 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import numpy as np
+import random
+import wandb
+import tqdm
+import shutil
+
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.ibc_dfo_lowdim_policy import IbcDfoLowdimPolicy
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.model.common.lr_scheduler import get_scheduler
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+# %%
+class TrainIbcDfoLowdimWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: IbcDfoLowdimPolicy
+        self.model = hydra.utils.instantiate(cfg.policy)
+
+        # configure training state
+        self.optimizer = hydra.utils.instantiate(
+            cfg.optimizer, params=self.model.parameters())
+
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseLowdimDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseLowdimDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+
+        # configure lr scheduler
+        lr_scheduler = get_scheduler(
+            cfg.training.lr_scheduler,
+            optimizer=self.optimizer,
+            num_warmup_steps=cfg.training.lr_warmup_steps,
+            num_training_steps=(
+                len(train_dataloader) * cfg.training.num_epochs) \
+                    // cfg.training.gradient_accumulate_every,
+            # pytorch assumes stepping LRScheduler every epoch
+            # however huggingface diffusers steps it every batch
+            last_epoch=self.global_step-1
+        )
+
+        # configure env runner
+        env_runner: BaseLowdimRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseLowdimRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+        optimizer_to(self.optimizer, device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        # compute loss
+                        raw_loss = self.model.compute_loss(batch)
+                        loss = raw_loss / cfg.training.gradient_accumulate_every
+                        loss.backward()
+
+                        # step optimizer
+                        if self.global_step % cfg.training.gradient_accumulate_every == 0:
+                            self.optimizer.step()
+                            self.optimizer.zero_grad()
+                            lr_scheduler.step()
+
+                        # logging
+                        raw_loss_cpu = raw_loss.item()
+                        tepoch.set_postfix(loss=raw_loss_cpu, refresh=False)
+                        train_losses.append(raw_loss_cpu)
+                        step_log = {
+                            'train_loss': raw_loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch,
+                            'lr': lr_scheduler.get_last_lr()[0]
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+                
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                policy = self.model
+                policy.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(policy)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                loss = self.model.compute_loss(batch)
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = train_sampling_batch
+                        n_samples = cfg.training.sample_max_batch
+                        obs_dict = {'obs': batch['obs'][:n_samples]}
+                        gt_action = batch['action'][:n_samples]
+
+                        result = policy.predict_action(obs_dict)
+                        pred_action = result['action']
+                        start = cfg.n_obs_steps - 1
+                        end = start + cfg.n_action_steps
+                        gt_action = gt_action[:,start:end]
+                        mse = torch.nn.functional.mse_loss(pred_action, gt_action)
+                        # log
+                        step_log['train_action_mse_error'] = mse.item()
+                        # release RAM
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_action
+                        del mse
+                
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                policy.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainIbcDfoLowdimWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_robomimic_image_workspace.py b/diffusion_policy/workspace/train_robomimic_image_workspace.py
new file mode 100644
index 0000000..2217aee
--- /dev/null
+++ b/diffusion_policy/workspace/train_robomimic_image_workspace.py
@@ -0,0 +1,254 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.robomimic_image_policy import RobomimicImagePolicy
+from diffusion_policy.dataset.base_dataset import BaseImageDataset
+from diffusion_policy.env_runner.base_image_runner import BaseImageRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainRobomimicImageWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf, output_dir=None):
+        super().__init__(cfg, output_dir=output_dir)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: RobomimicImagePolicy = hydra.utils.instantiate(cfg.policy)
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseImageDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseImageDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+
+        # configure env
+        env_runner: BaseImageRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseImageRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+
+        # save batch for sampling
+        train_sampling_batch = None
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+            cfg.training.sample_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        if train_sampling_batch is None:
+                            train_sampling_batch = batch
+
+                        info = self.model.train_on_batch(batch, epoch=self.epoch)
+
+                        # logging 
+                        loss_cpu = info['losses']['action_loss'].item()
+                        tepoch.set_postfix(loss=loss_cpu, refresh=False)
+                        train_losses.append(loss_cpu)
+                        step_log = {
+                            'train_loss': loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                self.model.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(self.model)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                info = self.model.train_on_batch(batch, epoch=self.epoch, validate=True)
+                                loss = info['losses']['action_loss']
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # run diffusion sampling on a training batch
+                if (self.epoch % cfg.training.sample_every) == 0:
+                    with torch.no_grad():
+                        # sample trajectory from training set, and evaluate difference
+                        batch = dict_apply(train_sampling_batch, lambda x: x.to(device, non_blocking=True))
+                        obs_dict = batch['obs']
+                        gt_action = batch['action']
+                        T = gt_action.shape[1]
+
+                        pred_actions = list()
+                        self.model.reset()
+                        for i in range(T):
+                            result = self.model.predict_action(
+                                dict_apply(obs_dict, lambda x: x[:,[i]])
+                            )
+                            pred_actions.append(result['action'])
+                        pred_actions = torch.cat(pred_actions, dim=1)
+                        mse = torch.nn.functional.mse_loss(pred_actions, gt_action)
+                        step_log['train_action_mse_error'] = mse.item()
+                        del batch
+                        del obs_dict
+                        del gt_action
+                        del result
+                        del pred_actions
+                        del mse
+
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                self.model.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainRobomimicImageWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/diffusion_policy/workspace/train_robomimic_lowdim_workspace.py b/diffusion_policy/workspace/train_robomimic_lowdim_workspace.py
new file mode 100644
index 0000000..d39f1c1
--- /dev/null
+++ b/diffusion_policy/workspace/train_robomimic_lowdim_workspace.py
@@ -0,0 +1,221 @@
+if __name__ == "__main__":
+    import sys
+    import os
+    import pathlib
+
+    ROOT_DIR = str(pathlib.Path(__file__).parent.parent.parent)
+    sys.path.append(ROOT_DIR)
+    os.chdir(ROOT_DIR)
+
+import os
+import hydra
+import torch
+from omegaconf import OmegaConf
+import pathlib
+from torch.utils.data import DataLoader
+import copy
+import random
+import wandb
+import tqdm
+import numpy as np
+import shutil
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.robomimic_lowdim_policy import RobomimicLowdimPolicy
+from diffusion_policy.dataset.base_dataset import BaseLowdimDataset
+from diffusion_policy.env_runner.base_lowdim_runner import BaseLowdimRunner
+from diffusion_policy.common.checkpoint_util import TopKCheckpointManager
+from diffusion_policy.common.json_logger import JsonLogger
+from diffusion_policy.common.pytorch_util import dict_apply, optimizer_to
+
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+class TrainRobomimicLowdimWorkspace(BaseWorkspace):
+    include_keys = ['global_step', 'epoch']
+
+    def __init__(self, cfg: OmegaConf):
+        super().__init__(cfg)
+
+        # set seed
+        seed = cfg.training.seed
+        torch.manual_seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+
+        # configure model
+        self.model: RobomimicLowdimPolicy = hydra.utils.instantiate(cfg.policy)
+
+        # configure training state
+        self.global_step = 0
+        self.epoch = 0
+
+    def run(self):
+        cfg = copy.deepcopy(self.cfg)
+
+        # resume training
+        if cfg.training.resume:
+            lastest_ckpt_path = self.get_checkpoint_path()
+            if lastest_ckpt_path.is_file():
+                print(f"Resuming from checkpoint {lastest_ckpt_path}")
+                self.load_checkpoint(path=lastest_ckpt_path)
+
+        # configure dataset
+        dataset: BaseLowdimDataset
+        dataset = hydra.utils.instantiate(cfg.task.dataset)
+        assert isinstance(dataset, BaseLowdimDataset)
+        train_dataloader = DataLoader(dataset, **cfg.dataloader)
+        normalizer = dataset.get_normalizer()
+
+        # configure validation dataset
+        val_dataset = dataset.get_validation_dataset()
+        val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)
+
+        self.model.set_normalizer(normalizer)
+
+        # configure env
+        env_runner: BaseLowdimRunner
+        env_runner = hydra.utils.instantiate(
+            cfg.task.env_runner,
+            output_dir=self.output_dir)
+        assert isinstance(env_runner, BaseLowdimRunner)
+
+        # configure logging
+        wandb_run = wandb.init(
+            dir=str(self.output_dir),
+            config=OmegaConf.to_container(cfg, resolve=True),
+            **cfg.logging
+        )
+        wandb.config.update(
+            {
+                "output_dir": self.output_dir,
+            }
+        )
+
+        # configure checkpoint
+        topk_manager = TopKCheckpointManager(
+            save_dir=os.path.join(self.output_dir, 'checkpoints'),
+            **cfg.checkpoint.topk
+        )
+
+        # device transfer
+        device = torch.device(cfg.training.device)
+        self.model.to(device)
+
+        if cfg.training.debug:
+            cfg.training.num_epochs = 2
+            cfg.training.max_train_steps = 3
+            cfg.training.max_val_steps = 3
+            cfg.training.rollout_every = 1
+            cfg.training.checkpoint_every = 1
+            cfg.training.val_every = 1
+
+        # training loop
+        log_path = os.path.join(self.output_dir, 'logs.json.txt')
+        with JsonLogger(log_path) as json_logger:
+            for local_epoch_idx in range(cfg.training.num_epochs):
+                step_log = dict()
+                # ========= train for this epoch ==========
+                train_losses = list()
+                with tqdm.tqdm(train_dataloader, desc=f"Training epoch {self.epoch}", 
+                        leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                    for batch_idx, batch in enumerate(tepoch):
+                        # device transfer
+                        batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                        info = self.model.train_on_batch(batch, epoch=self.epoch)
+
+                        # logging 
+                        loss_cpu = info['losses']['action_loss'].item()
+                        tepoch.set_postfix(loss=loss_cpu, refresh=False)
+                        train_losses.append(loss_cpu)
+                        step_log = {
+                            'train_loss': loss_cpu,
+                            'global_step': self.global_step,
+                            'epoch': self.epoch
+                        }
+
+                        is_last_batch = (batch_idx == (len(train_dataloader)-1))
+                        if not is_last_batch:
+                            # log of last step is combined with validation and rollout
+                            wandb_run.log(step_log, step=self.global_step)
+                            json_logger.log(step_log)
+                            self.global_step += 1
+
+                        if (cfg.training.max_train_steps is not None) \
+                            and batch_idx >= (cfg.training.max_train_steps-1):
+                            break
+
+                # at the end of each epoch
+                # replace train_loss with epoch average
+                train_loss = np.mean(train_losses)
+                step_log['train_loss'] = train_loss
+
+                # ========= eval for this epoch ==========
+                self.model.eval()
+
+                # run rollout
+                if (self.epoch % cfg.training.rollout_every) == 0:
+                    runner_log = env_runner.run(self.model)
+                    # log all
+                    step_log.update(runner_log)
+
+                # run validation
+                if (self.epoch % cfg.training.val_every) == 0:
+                    with torch.no_grad():
+                        val_losses = list()
+                        with tqdm.tqdm(val_dataloader, desc=f"Validation epoch {self.epoch}", 
+                                leave=False, mininterval=cfg.training.tqdm_interval_sec) as tepoch:
+                            for batch_idx, batch in enumerate(tepoch):
+                                batch = dict_apply(batch, lambda x: x.to(device, non_blocking=True))
+                                info = self.model.train_on_batch(batch, epoch=self.epoch, validate=True)
+                                loss = info['losses']['action_loss']
+                                val_losses.append(loss)
+                                if (cfg.training.max_val_steps is not None) \
+                                    and batch_idx >= (cfg.training.max_val_steps-1):
+                                    break
+                        if len(val_losses) > 0:
+                            val_loss = torch.mean(torch.tensor(val_losses)).item()
+                            # log epoch average validation loss
+                            step_log['val_loss'] = val_loss
+
+                # checkpoint
+                if (self.epoch % cfg.training.checkpoint_every) == 0:
+                    # checkpointing
+                    if cfg.checkpoint.save_last_ckpt:
+                        self.save_checkpoint()
+                    if cfg.checkpoint.save_last_snapshot:
+                        self.save_snapshot()
+
+                    # sanitize metric names
+                    metric_dict = dict()
+                    for key, value in step_log.items():
+                        new_key = key.replace('/', '_')
+                        metric_dict[new_key] = value
+                    
+                    # We can't copy the last checkpoint here
+                    # since save_checkpoint uses threads.
+                    # therefore at this point the file might have been empty!
+                    topk_ckpt_path = topk_manager.get_ckpt_path(metric_dict)
+
+                    if topk_ckpt_path is not None:
+                        self.save_checkpoint(path=topk_ckpt_path)
+                # ========= eval end for this epoch ==========
+                self.model.train()
+
+                # end of epoch
+                # log of last step is combined with validation and rollout
+                wandb_run.log(step_log, step=self.global_step)
+                json_logger.log(step_log)
+                self.global_step += 1
+                self.epoch += 1
+
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.parent.joinpath("config")), 
+    config_name=pathlib.Path(__file__).stem)
+def main(cfg):
+    workspace = TrainRobomimicLowdimWorkspace(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()
diff --git a/eval_real_robot.py b/eval_real_robot.py
new file mode 100644
index 0000000..cb3cdd1
--- /dev/null
+++ b/eval_real_robot.py
@@ -0,0 +1,418 @@
+"""
+Usage:
+(robodiff)$ python eval_real_robot.py -i <ckpt_path> -o <save_dir> --robot_ip <ip_of_ur5>
+
+================ Human in control ==============
+Robot movement:
+Move your SpaceMouse to move the robot EEF (locked in xy plane).
+Press SpaceMouse right button to unlock z axis.
+Press SpaceMouse left button to enable rotation axes.
+
+Recording control:
+Click the opencv window (make sure it's in focus).
+Press "C" to start evaluation (hand control over to policy).
+Press "Q" to exit program.
+
+================ Policy in control ==============
+Make sure you can hit the robot hardware emergency-stop button quickly! 
+
+Recording control:
+Press "S" to stop evaluation and gain control back.
+"""
+
+# %%
+import time
+from multiprocessing.managers import SharedMemoryManager
+import click
+import cv2
+import numpy as np
+import torch
+import dill
+import hydra
+import pathlib
+import skvideo.io
+from omegaconf import OmegaConf
+import scipy.spatial.transform as st
+from diffusion_policy.real_world.real_env import RealEnv
+from diffusion_policy.real_world.spacemouse_shared_memory import Spacemouse
+from diffusion_policy.common.precise_sleep import precise_wait
+from diffusion_policy.real_world.real_inference_util import (
+    get_real_obs_resolution, 
+    get_real_obs_dict)
+from diffusion_policy.common.pytorch_util import dict_apply
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+from diffusion_policy.policy.base_image_policy import BaseImagePolicy
+from diffusion_policy.common.cv2_util import get_image_transform
+
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+@click.command()
+@click.option('--input', '-i', required=True, help='Path to checkpoint')
+@click.option('--output', '-o', required=True, help='Directory to save recording')
+@click.option('--robot_ip', '-ri', required=True, help="UR5's IP address e.g. 192.168.0.204")
+@click.option('--match_dataset', '-m', default=None, help='Dataset used to overlay and adjust initial condition')
+@click.option('--match_episode', '-me', default=None, type=int, help='Match specific episode from the match dataset')
+@click.option('--vis_camera_idx', default=0, type=int, help="Which RealSense camera to visualize.")
+@click.option('--init_joints', '-j', is_flag=True, default=False, help="Whether to initialize robot joint configuration in the beginning.")
+@click.option('--steps_per_inference', '-si', default=6, type=int, help="Action horizon for inference.")
+@click.option('--max_duration', '-md', default=60, help='Max duration for each epoch in seconds.')
+@click.option('--frequency', '-f', default=10, type=float, help="Control frequency in Hz.")
+@click.option('--command_latency', '-cl', default=0.01, type=float, help="Latency between receiving SapceMouse command to executing on Robot in Sec.")
+def main(input, output, robot_ip, match_dataset, match_episode,
+    vis_camera_idx, init_joints, 
+    steps_per_inference, max_duration,
+    frequency, command_latency):
+    # load match_dataset
+    match_camera_idx = 0
+    episode_first_frame_map = dict()
+    if match_dataset is not None:
+        match_dir = pathlib.Path(match_dataset)
+        match_video_dir = match_dir.joinpath('videos')
+        for vid_dir in match_video_dir.glob("*/"):
+            episode_idx = int(vid_dir.stem)
+            match_video_path = vid_dir.joinpath(f'{match_camera_idx}.mp4')
+            if match_video_path.exists():
+                frames = skvideo.io.vread(
+                    str(match_video_path), num_frames=1)
+                episode_first_frame_map[episode_idx] = frames[0]
+    print(f"Loaded initial frame for {len(episode_first_frame_map)} episodes")
+    
+    # load checkpoint
+    ckpt_path = input
+    payload = torch.load(open(ckpt_path, 'rb'), pickle_module=dill)
+    cfg = payload['cfg']
+    cls = hydra.utils.get_class(cfg._target_)
+    workspace = cls(cfg)
+    workspace: BaseWorkspace
+    workspace.load_payload(payload, exclude_keys=None, include_keys=None)
+
+    # hacks for method-specific setup.
+    action_offset = 0
+    delta_action = False
+    if 'diffusion' in cfg.name:
+        # diffusion model
+        policy: BaseImagePolicy
+        policy = workspace.model
+        if cfg.training.use_ema:
+            policy = workspace.ema_model
+
+        device = torch.device('cuda')
+        policy.eval().to(device)
+
+        # set inference params
+        policy.num_inference_steps = 16 # DDIM inference iterations
+        policy.n_action_steps = policy.horizon - policy.n_obs_steps + 1
+
+    elif 'robomimic' in cfg.name:
+        # BCRNN model
+        policy: BaseImagePolicy
+        policy = workspace.model
+
+        device = torch.device('cuda')
+        policy.eval().to(device)
+
+        # BCRNN always has action horizon of 1
+        steps_per_inference = 1
+        action_offset = cfg.n_latency_steps
+        delta_action = cfg.task.dataset.get('delta_action', False)
+
+    elif 'ibc' in cfg.name:
+        policy: BaseImagePolicy
+        policy = workspace.model
+        policy.pred_n_iter = 5
+        policy.pred_n_samples = 4096
+
+        device = torch.device('cuda')
+        policy.eval().to(device)
+        steps_per_inference = 1
+        action_offset = 1
+        delta_action = cfg.task.dataset.get('delta_action', False)
+    else:
+        raise RuntimeError("Unsupported policy type: ", cfg.name)
+
+    # setup experiment
+    dt = 1/frequency
+
+    obs_res = get_real_obs_resolution(cfg.task.shape_meta)
+    n_obs_steps = cfg.n_obs_steps
+    print("n_obs_steps: ", n_obs_steps)
+    print("steps_per_inference:", steps_per_inference)
+    print("action_offset:", action_offset)
+
+    with SharedMemoryManager() as shm_manager:
+        with Spacemouse(shm_manager=shm_manager) as sm, RealEnv(
+            output_dir=output, 
+            robot_ip=robot_ip, 
+            frequency=frequency,
+            n_obs_steps=n_obs_steps,
+            obs_image_resolution=obs_res,
+            obs_float32=True,
+            init_joints=init_joints,
+            enable_multi_cam_vis=True,
+            record_raw_video=True,
+            # number of threads per camera view for video recording (H.264)
+            thread_per_video=3,
+            # video recording quality, lower is better (but slower).
+            video_crf=21,
+            shm_manager=shm_manager) as env:
+            cv2.setNumThreads(1)
+
+            # Should be the same as demo
+            # realsense exposure
+            env.realsense.set_exposure(exposure=120, gain=0)
+            # realsense white balance
+            env.realsense.set_white_balance(white_balance=5900)
+
+            print("Waiting for realsense")
+            time.sleep(1.0)
+
+            print("Warming up policy inference")
+            obs = env.get_obs()
+            with torch.no_grad():
+                policy.reset()
+                obs_dict_np = get_real_obs_dict(
+                    env_obs=obs, shape_meta=cfg.task.shape_meta)
+                obs_dict = dict_apply(obs_dict_np, 
+                    lambda x: torch.from_numpy(x).unsqueeze(0).to(device))
+                result = policy.predict_action(obs_dict)
+                action = result['action'][0].detach().to('cpu').numpy()
+                assert action.shape[-1] == 2
+                del result
+
+            print('Ready!')
+            while True:
+                # ========= human control loop ==========
+                print("Human in control!")
+                state = env.get_robot_state()
+                target_pose = state['TargetTCPPose']
+                t_start = time.monotonic()
+                iter_idx = 0
+                while True:
+                    # calculate timing
+                    t_cycle_end = t_start + (iter_idx + 1) * dt
+                    t_sample = t_cycle_end - command_latency
+                    t_command_target = t_cycle_end + dt
+
+                    # pump obs
+                    obs = env.get_obs()
+
+                    # visualize
+                    episode_id = env.replay_buffer.n_episodes
+                    vis_img = obs[f'camera_{vis_camera_idx}'][-1]
+                    match_episode_id = episode_id
+                    if match_episode is not None:
+                        match_episode_id = match_episode
+                    if match_episode_id in episode_first_frame_map:
+                        match_img = episode_first_frame_map[match_episode_id]
+                        ih, iw, _ = match_img.shape
+                        oh, ow, _ = vis_img.shape
+                        tf = get_image_transform(
+                            input_res=(iw, ih), 
+                            output_res=(ow, oh), 
+                            bgr_to_rgb=False)
+                        match_img = tf(match_img).astype(np.float32) / 255
+                        vis_img = np.minimum(vis_img, match_img)
+
+                    text = f'Episode: {episode_id}'
+                    cv2.putText(
+                        vis_img,
+                        text,
+                        (10,20),
+                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                        fontScale=0.5,
+                        thickness=1,
+                        color=(255,255,255)
+                    )
+                    cv2.imshow('default', vis_img[...,::-1])
+                    key_stroke = cv2.pollKey()
+                    if key_stroke == ord('q'):
+                        # Exit program
+                        env.end_episode()
+                        exit(0)
+                    elif key_stroke == ord('c'):
+                        # Exit human control loop
+                        # hand control over to the policy
+                        break
+
+                    precise_wait(t_sample)
+                    # get teleop command
+                    sm_state = sm.get_motion_state_transformed()
+                    # print(sm_state)
+                    dpos = sm_state[:3] * (env.max_pos_speed / frequency)
+                    drot_xyz = sm_state[3:] * (env.max_rot_speed / frequency)
+  
+                    if not sm.is_button_pressed(0):
+                        # translation mode
+                        drot_xyz[:] = 0
+                    else:
+                        dpos[:] = 0
+                    if not sm.is_button_pressed(1):
+                        # 2D translation mode
+                        dpos[2] = 0    
+
+                    drot = st.Rotation.from_euler('xyz', drot_xyz)
+                    target_pose[:3] += dpos
+                    target_pose[3:] = (drot * st.Rotation.from_rotvec(
+                        target_pose[3:])).as_rotvec()
+                    # clip target pose
+                    target_pose[:2] = np.clip(target_pose[:2], [0.25, -0.45], [0.77, 0.40])
+
+                    # execute teleop command
+                    env.exec_actions(
+                        actions=[target_pose], 
+                        timestamps=[t_command_target-time.monotonic()+time.time()])
+                    precise_wait(t_cycle_end)
+                    iter_idx += 1
+                
+                # ========== policy control loop ==============
+                try:
+                    # start episode
+                    policy.reset()
+                    start_delay = 1.0
+                    eval_t_start = time.time() + start_delay
+                    t_start = time.monotonic() + start_delay
+                    env.start_episode(eval_t_start)
+                    # wait for 1/30 sec to get the closest frame actually
+                    # reduces overall latency
+                    frame_latency = 1/30
+                    precise_wait(eval_t_start - frame_latency, time_func=time.time)
+                    print("Started!")
+                    iter_idx = 0
+                    term_area_start_timestamp = float('inf')
+                    perv_target_pose = None
+                    while True:
+                        # calculate timing
+                        t_cycle_end = t_start + (iter_idx + steps_per_inference) * dt
+
+                        # get obs
+                        print('get_obs')
+                        obs = env.get_obs()
+                        obs_timestamps = obs['timestamp']
+                        print(f'Obs latency {time.time() - obs_timestamps[-1]}')
+
+                        # run inference
+                        with torch.no_grad():
+                            s = time.time()
+                            obs_dict_np = get_real_obs_dict(
+                                env_obs=obs, shape_meta=cfg.task.shape_meta)
+                            obs_dict = dict_apply(obs_dict_np, 
+                                lambda x: torch.from_numpy(x).unsqueeze(0).to(device))
+                            result = policy.predict_action(obs_dict)
+                            # this action starts from the first obs step
+                            action = result['action'][0].detach().to('cpu').numpy()
+                            print('Inference latency:', time.time() - s)
+                        
+                        # convert policy action to env actions
+                        if delta_action:
+                            assert len(action) == 1
+                            if perv_target_pose is None:
+                                perv_target_pose = obs['robot_eef_pose'][-1]
+                            this_target_pose = perv_target_pose.copy()
+                            this_target_pose[[0,1]] += action[-1]
+                            perv_target_pose = this_target_pose
+                            this_target_poses = np.expand_dims(this_target_pose, axis=0)
+                        else:
+                            this_target_poses = np.zeros((len(action), len(target_pose)), dtype=np.float64)
+                            this_target_poses[:] = target_pose
+                            this_target_poses[:,[0,1]] = action
+
+                        # deal with timing
+                        # the same step actions are always the target for
+                        action_timestamps = (np.arange(len(action), dtype=np.float64) + action_offset
+                            ) * dt + obs_timestamps[-1]
+                        action_exec_latency = 0.01
+                        curr_time = time.time()
+                        is_new = action_timestamps > (curr_time + action_exec_latency)
+                        if np.sum(is_new) == 0:
+                            # exceeded time budget, still do something
+                            this_target_poses = this_target_poses[[-1]]
+                            # schedule on next available step
+                            next_step_idx = int(np.ceil((curr_time - eval_t_start) / dt))
+                            action_timestamp = eval_t_start + (next_step_idx) * dt
+                            print('Over budget', action_timestamp - curr_time)
+                            action_timestamps = np.array([action_timestamp])
+                        else:
+                            this_target_poses = this_target_poses[is_new]
+                            action_timestamps = action_timestamps[is_new]
+
+                        # clip actions
+                        this_target_poses[:,:2] = np.clip(
+                            this_target_poses[:,:2], [0.25, -0.45], [0.77, 0.40])
+
+                        # execute actions
+                        env.exec_actions(
+                            actions=this_target_poses,
+                            timestamps=action_timestamps
+                        )
+                        print(f"Submitted {len(this_target_poses)} steps of actions.")
+
+                        # visualize
+                        episode_id = env.replay_buffer.n_episodes
+                        vis_img = obs[f'camera_{vis_camera_idx}'][-1]
+                        text = 'Episode: {}, Time: {:.1f}'.format(
+                            episode_id, time.monotonic() - t_start
+                        )
+                        cv2.putText(
+                            vis_img,
+                            text,
+                            (10,20),
+                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
+                            fontScale=0.5,
+                            thickness=1,
+                            color=(255,255,255)
+                        )
+                        cv2.imshow('default', vis_img[...,::-1])
+
+
+                        key_stroke = cv2.pollKey()
+                        if key_stroke == ord('s'):
+                            # Stop episode
+                            # Hand control back to human
+                            env.end_episode()
+                            print('Stopped.')
+                            break
+
+                        # auto termination
+                        terminate = False
+                        if time.monotonic() - t_start > max_duration:
+                            terminate = True
+                            print('Terminated by the timeout!')
+
+                        term_pose = np.array([ 3.40948500e-01,  2.17721816e-01,  4.59076878e-02,  2.22014183e+00, -2.22184883e+00, -4.07186655e-04])
+                        curr_pose = obs['robot_eef_pose'][-1]
+                        dist = np.linalg.norm((curr_pose - term_pose)[:2], axis=-1)
+                        if dist < 0.03:
+                            # in termination area
+                            curr_timestamp = obs['timestamp'][-1]
+                            if term_area_start_timestamp > curr_timestamp:
+                                term_area_start_timestamp = curr_timestamp
+                            else:
+                                term_area_time = curr_timestamp - term_area_start_timestamp
+                                if term_area_time > 0.5:
+                                    terminate = True
+                                    print('Terminated by the policy!')
+                        else:
+                            # out of the area
+                            term_area_start_timestamp = float('inf')
+
+                        if terminate:
+                            env.end_episode()
+                            break
+
+                        # wait for execution
+                        precise_wait(t_cycle_end - frame_latency)
+                        iter_idx += steps_per_inference
+
+                except KeyboardInterrupt:
+                    print("Interrupted!")
+                    # stop robot.
+                    env.end_episode()
+                
+                print("Stopped.")
+
+
+
+# %%
+if __name__ == '__main__':
+    main()
diff --git a/media/multimodal_sim.png b/media/multimodal_sim.png
new file mode 100644
index 0000000..1df8ed1
Binary files /dev/null and b/media/multimodal_sim.png differ
diff --git a/media/teaser.png b/media/teaser.png
new file mode 100644
index 0000000..2e14aa0
Binary files /dev/null and b/media/teaser.png differ
diff --git a/multirun_metrics.py b/multirun_metrics.py
new file mode 100644
index 0000000..1254a65
--- /dev/null
+++ b/multirun_metrics.py
@@ -0,0 +1,267 @@
+from typing import List, Optional
+import pathlib
+import pandas as pd
+import numpy as np
+import numba
+import click
+import time
+import collections
+import json
+import wandb
+import yaml
+import numbers
+import scipy.ndimage as sn
+from diffusion_policy.common.json_logger import read_json_log, JsonLogger
+import logging
+
+@numba.jit(nopython=True)
+def get_indexed_window_average(
+        arr: np.ndarray, idxs: np.ndarray, window_size: int):
+    result = np.zeros(idxs.shape, dtype=arr.dtype)
+    length = arr.shape[0]
+    for i in range(len(idxs)):
+        idx = idxs[i]
+        start = max(idx - window_size, 0)
+        end = min(start + window_size, length)
+        result[i] = np.mean(arr[start:end])
+    return result
+
+
+def compute_metrics(log_df: pd.DataFrame, key: str, 
+        end_step: Optional[int]=None,
+        k_min_loss: int=10,
+        k_around_max: int=10,
+        max_k_window: int=10,
+        replace_slash: int=True,
+        ):
+    if key not in log_df:
+        return dict()
+
+    # prepare data
+    if end_step is not None:
+        log_df = log_df.iloc[:end_step]
+    is_key = ~pd.isnull(log_df[key])
+    is_key_idxs = is_key.index[is_key].to_numpy()
+    if len(is_key_idxs) == 0:
+        return dict()
+
+    key_data = log_df[key][is_key].to_numpy()
+    # after adding validation to workspace
+    # rollout happens at the last step of each epoch
+    # where the reported train_loss and val_loss
+    # are already the average for that epoch
+    train_loss = log_df['train_loss'][is_key].to_numpy()
+    val_loss = log_df['val_loss'][is_key].to_numpy()
+
+    result = dict()
+
+    log_key = key
+    if replace_slash:
+        log_key = key.replace('/', '_')
+    # max
+    max_value = np.max(key_data)
+    result['max/'+log_key] = max_value
+
+    # k_around_max
+    max_idx = np.argmax(key_data)
+    end = min(max_idx + k_around_max // 2, len(key_data))
+    start = max(end - k_around_max, 0)
+    k_around_max_value = np.mean(key_data[start:end])
+    result['k_around_max/'+log_key] = k_around_max_value
+
+    # max_k_window
+    k_window_value = sn.uniform_filter1d(key_data, size=max_k_window, axis=0, mode='nearest')
+    max_k_window_value = np.max(k_window_value)
+    result['max_k_window/'+log_key] = max_k_window_value
+
+    # min_train_loss
+    min_idx = np.argmin(train_loss)
+    min_train_loss_value = key_data[min_idx]
+    result['min_train_loss/'+log_key] = min_train_loss_value
+
+    # min_val_loss
+    min_idx = np.argmin(val_loss)
+    min_val_loss_value = key_data[min_idx]
+    result['min_val_loss/'+log_key] = min_val_loss_value
+
+    # k_min_train_loss
+    min_loss_idxs = np.argsort(train_loss)[:k_min_loss]
+    k_min_train_loss_value = np.mean(key_data[min_loss_idxs])
+    result['k_min_train_loss/'+log_key] = k_min_train_loss_value
+
+    # k_min_val_loss
+    min_loss_idxs = np.argsort(val_loss)[:k_min_loss]
+    k_min_val_loss_value = np.mean(key_data[min_loss_idxs])
+    result['k_min_val_loss/'+log_key] = k_min_val_loss_value
+
+    # last
+    result['last/'+log_key] = key_data[-1]
+
+    # global step for visualization
+    result['metric_global_step/'+log_key] = is_key_idxs[-1]
+    return result
+
+
+def compute_metrics_agg(
+        log_dfs: List[pd.DataFrame], 
+        key: str, end_step:int, 
+        **kwargs):
+    
+    # compute metrics
+    results = collections.defaultdict(list)
+    for log_df in log_dfs:
+        result = compute_metrics(log_df, key=key, end_step=end_step, **kwargs)
+        for k, v in result.items():
+            results[k].append(v)
+    # agg
+    agg_result = dict()
+    for k, v in results.items():
+        value = np.mean(v)
+        if k.startswith('metric_global_step'):
+            value = int(value)
+        agg_result[k] = value
+    return agg_result
+
+
+@click.command()
+@click.option('--input', '-i', required=True, help='Root logging dir, contains train_* dirs')
+@click.option('--key', '-k', multiple=True, default=['test/mean_score'])
+@click.option('--interval', default=10, type=float)
+@click.option('--replace_slash', default=True, type=bool)
+@click.option('--index_key', '-ik', default='global_step')
+@click.option('--use_wandb', '-w', is_flag=True, default=False)
+@click.option('--project', default=None)
+@click.option('--name', default=None)
+@click.option('--id', default=None)
+@click.option('--group', default=None)
+def main(
+    input,
+    key,
+    interval,
+    replace_slash,
+    index_key,
+    use_wandb,
+    # wandb args
+    project,
+    name,
+    id,
+    group):
+    root_dir = pathlib.Path(input)
+    assert root_dir.is_dir()
+    metrics_dir = root_dir.joinpath('metrics')
+    metrics_dir.mkdir(exist_ok=True)
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+        handlers=[
+            logging.FileHandler(str(metrics_dir.joinpath("metrics.log"))),
+            logging.StreamHandler()
+        ]
+    )
+    
+    train_dirs = list(root_dir.glob('train_*'))
+    log_files = [x.joinpath('logs.json.txt') for x in train_dirs]
+    logging.info("Monitor waiting for log files!")
+    while True:
+        # wait for files to show up
+        files_exist = True
+        for log_file in log_files:
+            if not log_file.is_file():
+                files_exist = False
+        if files_exist:
+            break
+        time.sleep(1.0)
+    logging.info("All log files ready!")
+
+    # init path
+    metric_log_path = metrics_dir.joinpath('logs.json.txt')
+    metric_path = metrics_dir.joinpath('metrics.json')
+    config_path = root_dir.joinpath('config.yaml')
+
+    # load config
+    config = yaml.safe_load(config_path.open('r'))
+
+    # init wandb
+    wandb_run = None
+    if use_wandb:
+        wandb_kwargs = config['logging']
+        if project is not None:
+            wandb_kwargs['project'] = project
+        if id is not None:
+            wandb_kwargs['id'] = id
+        if name is not None:
+            wandb_kwargs['name'] = name
+        if group is not None:
+            wandb_kwargs['group'] = group
+        wandb_kwargs['resume'] = True
+        wandb_run = wandb.init(
+            dir=str(metrics_dir),
+            config=config,
+            # auto-resume run, automatically load id
+            # as long as using the same dir.
+            # https://docs.wandb.ai/guides/track/advanced/resuming#resuming-guidance
+            **wandb_kwargs
+        )
+        wandb.config.update(
+            {
+                "output_dir": str(root_dir),
+            }
+        )
+
+    with JsonLogger(metric_log_path) as json_logger:
+        last_log = json_logger.get_last_log()
+        while True:
+            # read json files
+            log_dfs = [read_json_log(str(x), required_keys=key) for x in log_files]
+
+            # previously logged data point
+            last_log_idx = -1
+            if last_log is not None:
+                last_log_idx = log_dfs[0].index[log_dfs[0][index_key] <= last_log[index_key]][-1]
+            
+            start_idx = last_log_idx + 1
+            # last idx where we have a data point from all logs
+            end_idx = min(*[len(x) for x in log_dfs])
+
+            # log every position
+            for this_idx in range(start_idx, end_idx):
+                # compute metrics
+                all_metrics = dict()
+                global_step = log_dfs[0]['global_step'][this_idx]
+                epoch = log_dfs[0]['epoch'][this_idx]
+                all_metrics['global_step'] = global_step
+                all_metrics['epoch'] = epoch
+                for k in key:
+                    metrics = compute_metrics_agg(
+                        log_dfs=log_dfs, key=k, end_step=this_idx+1,
+                        replace_slash=replace_slash)
+                    all_metrics.update(metrics)
+
+                # sanitize metrics
+                old_metrics = all_metrics
+                all_metrics = dict()
+                for k, v in old_metrics.items():
+                    if isinstance(v, numbers.Integral):
+                        all_metrics[k] = int(v)
+                    elif isinstance(v, numbers.Number):
+                        all_metrics[k] = float(v)
+                
+                has_update = all_metrics != last_log
+                if has_update:
+                    last_log = all_metrics
+                    json_logger.log(all_metrics)
+
+                    with metric_path.open('w') as f:
+                        json.dump(all_metrics, f, sort_keys=True, indent=2)
+
+                    if wandb_run is not None:
+                        wandb_run.log(all_metrics, step=all_metrics[index_key])
+
+                    logging.info(f"Metrics logged at step {all_metrics[index_key]}")
+            
+            time.sleep(interval)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyrightconfig.json b/pyrightconfig.json
new file mode 100644
index 0000000..004c930
--- /dev/null
+++ b/pyrightconfig.json
@@ -0,0 +1,7 @@
+{
+    "exclude": [
+        "data/**",
+        "data_local/**",
+        "outputs/**"
+    ]
+}
\ No newline at end of file
diff --git a/ray_exec.py b/ray_exec.py
new file mode 100644
index 0000000..ed596cf
--- /dev/null
+++ b/ray_exec.py
@@ -0,0 +1,121 @@
+"""
+Usage:
+Training:
+python train.py --config-name=train_diffusion_lowdim_workspace -- logger.mode=online
+"""
+import os
+import ray
+import click
+
+def worker_fn(command_args, data_src=None, unbuffer_python=False, use_shell=False):
+    import os
+    import subprocess
+    import signal
+    import time
+
+    # setup data symlink
+    if data_src is not None:
+        cwd = os.getcwd()
+        src = os.path.expanduser(data_src)
+        dst = os.path.join(cwd, 'data')
+        try:
+            os.symlink(src=src, dst=dst)
+        except FileExistsError:
+            # it's fine if it already exists
+            pass
+
+    # run command
+    process_env = os.environ.copy()
+    if unbuffer_python:
+        # disable stdout/stderr buffering for subprocess (if python)
+        # to remove latency between print statement and receiving printed result
+        process_env['PYTHONUNBUFFERED'] = 'TRUE'
+    
+    # ray worker masks out Ctrl-C signal (ie SIGINT)
+    # here we unblock this signal for the child process
+    def preexec_function():
+        import signal
+        signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT})
+    
+    if use_shell:
+        command_args = ' '.join(command_args)
+
+    # stdout passtrough to ray worker, which is then passed to ray driver
+    process = subprocess.Popen(
+        args=command_args, 
+        env=process_env,
+        preexec_fn=preexec_function,
+        shell=use_shell)
+
+    while process.poll() is None:
+        try:
+            # sleep to ensure that monitor thread can acquire gil
+            # and raise KeyboardInterrupt here. 
+            time.sleep(0.01)
+        except KeyboardInterrupt:
+            process.send_signal(signal.SIGINT)
+            print('SIGINT sent to subprocess')
+        except Exception as e:
+            process.terminate()
+            raise e
+
+    if process.returncode not in (0, -2):
+        print("Failed execution!")
+        raise RuntimeError("Failed execution.")
+    return process.returncode
+
+
+@click.command()
+@click.option('--ray_address', '-ra', default='auto')
+@click.option('--num_cpus', '-nc', default=7, type=float)
+@click.option('--num_gpus', '-ng', default=1, type=float)
+@click.option('--max_retries', '-mr', default=0, type=int)
+@click.option('--data_src', '-d', default='./data', type=str)
+@click.option('--unbuffer_python', '-u', is_flag=True, default=False)
+@click.argument('command_args', nargs=-1, type=str)
+def main(ray_address, 
+    num_cpus, num_gpus, max_retries, 
+    data_src, unbuffer_python, 
+    command_args):
+    # expand path
+    if data_src is not None:
+        data_src = os.path.abspath(os.path.expanduser(data_src))
+
+    # init ray
+    root_dir = os.path.dirname(__file__)
+    runtime_env = {
+        'working_dir': root_dir,
+        'excludes': ['.git']
+    }
+    ray.init(
+        address=ray_address, 
+        runtime_env=runtime_env
+    )
+    # remote worker func
+    worker_ray = ray.remote(worker_fn).options(
+        num_cpus=num_cpus, 
+        num_gpus=num_gpus,
+        max_retries=max_retries,
+        # resources=resources,
+        retry_exceptions=True
+        )
+    # run
+    task_ref = worker_ray.remote(command_args, data_src, unbuffer_python)
+
+    try:
+        # normal case
+        result = ray.get(task_ref)
+        print('Return code: ', result)
+    except KeyboardInterrupt:
+        # a KeyboardInterrupt will be raised in worker
+        ray.cancel(task_ref, force=False)
+        result = ray.get(task_ref)
+        print('Return code: ', result)
+    except Exception as e:
+        # worker will be terminated
+        ray.cancel(task_ref, force=True)
+        raise e
+    
+
+if __name__ == '__main__':
+    main()
diff --git a/ray_train_multirun.py b/ray_train_multirun.py
new file mode 100644
index 0000000..ef400aa
--- /dev/null
+++ b/ray_train_multirun.py
@@ -0,0 +1,271 @@
+"""
+Start local ray cluster
+(robodiff)$ export CUDA_VISIBLE_DEVICES=0,1,2 # select GPUs to be managed by the ray cluster
+(robodiff)$ ray start --head --num-gpus=3
+
+Training:
+python ray_train_multirun.py --config-name=train_diffusion_unet_lowdim_workspace --seeds=42,43,44 --monitor_key=test/mean_score -- logger.mode=online training.eval_first=True
+"""
+import os
+import ray
+import click
+import hydra
+import yaml
+import wandb
+import pathlib
+import collections
+from pprint import pprint
+from omegaconf import OmegaConf
+from ray_exec import worker_fn
+from ray.util.placement_group import (
+    placement_group,
+)
+from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+@click.command()
+@click.option('--config-name', '-cn', required=True, type=str)
+@click.option('--config-dir', '-cd', default=None, type=str)
+@click.option('--seeds', '-s', default='42,43,44', type=str)
+@click.option('--monitor_key', '-k', multiple=True, default=['test/mean_score'])
+@click.option('--ray_address', '-ra', default='auto')
+@click.option('--num_cpus', '-nc', default=7, type=float)
+@click.option('--num_gpus', '-ng', default=1, type=float)
+@click.option('--max_retries', '-mr', default=0, type=int)
+@click.option('--monitor_max_retires', default=3, type=int)
+@click.option('--data_src', '-d', default='./data', type=str)
+@click.option('--unbuffer_python', '-u', is_flag=True, default=False)
+@click.option('--single_node', '-sn', is_flag=True, default=False, help='run all experiments on a single machine')
+@click.argument('command_args', nargs=-1, type=str)
+def main(config_name, config_dir, seeds, monitor_key, ray_address, 
+    num_cpus, num_gpus, max_retries, monitor_max_retires,
+    data_src, unbuffer_python, 
+    single_node, command_args):
+    # parse args
+    seeds = [int(x) for x in seeds.split(',')]
+    # expand path
+    if data_src is not None:
+        data_src = os.path.abspath(os.path.expanduser(data_src))
+
+    # initialize hydra
+    if config_dir is None:
+        config_path_abs = pathlib.Path(__file__).parent.joinpath(
+            'diffusion_policy','config')
+        config_path_rel = str(config_path_abs.relative_to(pathlib.Path.cwd()))
+    else:
+        config_path_rel = config_dir
+
+    run_command_args = list()
+    monitor_command_args = list()
+    with hydra.initialize(
+        version_base=None, 
+        config_path=config_path_rel):
+
+        # generate raw config
+        cfg = hydra.compose(
+            config_name=config_name, 
+            overrides=command_args)
+        OmegaConf.resolve(cfg)
+    
+        # manually create output dir
+        output_dir = pathlib.Path(cfg.multi_run.run_dir)
+        output_dir.mkdir(parents=True, exist_ok=False)
+        config_path = output_dir.joinpath('config.yaml')
+        print(output_dir)
+
+        # save current config
+        yaml.dump(OmegaConf.to_container(cfg, resolve=True), 
+            config_path.open('w'), default_flow_style=False)
+
+        # wandb
+        wandb_group_id = wandb.util.generate_id()
+        name_base = cfg.multi_run.wandb_name_base
+
+        # create monitor command args
+        monitor_command_args = [
+            'python',
+            'multirun_metrics.py',
+            '--input', str(output_dir),
+            '--use_wandb',
+            '--project', 'diffusion_policy_metrics',
+            '--group', wandb_group_id
+        ]
+        for k in monitor_key:
+            monitor_command_args.extend([
+                '--key', k
+            ])
+
+        # generate command args
+        run_command_args = list()
+        for i, seed in enumerate(seeds):
+            test_start_seed = (seed + 1) * 100000
+            this_output_dir = output_dir.joinpath(f'train_{i}')
+            this_output_dir.mkdir()
+            wandb_name = name_base + f'_train_{i}'
+            wandb_run_id = wandb_group_id + f'_train_{i}'
+
+            this_command_args = [
+                'python',
+                'train.py',
+                '--config-name='+config_name,
+                '--config-dir='+config_path_rel
+            ]
+
+            this_command_args.extend(command_args)
+            this_command_args.extend([
+                f'training.seed={seed}',
+                f'task.env_runner.test_start_seed={test_start_seed}',
+                f'logging.name={wandb_name}',
+                f'logging.id={wandb_run_id}',
+                f'logging.group={wandb_group_id}',
+                f'hydra.run.dir={this_output_dir}'
+            ])
+            run_command_args.append(this_command_args)
+
+    # init ray
+    root_dir = os.path.dirname(__file__)
+    runtime_env = {
+        'working_dir': root_dir,
+        'excludes': ['.git'],
+        'pip': ['dm-control==1.0.9']
+    }
+    ray.init(
+        address=ray_address, 
+        runtime_env=runtime_env
+    )
+    
+    # create resources for train
+    train_resources = dict()
+
+    train_bundle = dict(train_resources)
+    train_bundle['CPU'] = num_cpus
+    train_bundle['GPU'] = num_gpus
+
+    # create resources for monitor
+    monitor_resources = dict()
+    monitor_resources['CPU'] = 1
+    
+    monitor_bundle = dict(monitor_resources)
+
+    # aggregate bundle
+    bundle = collections.defaultdict(lambda:0)
+    n_train_bundles = 1
+    if single_node:
+        n_train_bundles = len(seeds)
+    for _ in range(n_train_bundles):
+        for k, v in train_bundle.items():
+            bundle[k] += v
+    for k, v in monitor_bundle.items():
+        bundle[k] += v
+    bundle = dict(bundle)
+
+    # create placement group
+    print("Creating placement group with resources:")
+    pprint(bundle)
+    pg = placement_group([bundle])
+
+    # run
+    task_name_map = dict()
+    task_refs = list()
+    for i, this_command_args in enumerate(run_command_args):
+        if single_node or i == (len(run_command_args) - 1):
+            print(f'Training worker {i} with placement group.')
+            ray.get(pg.ready())
+            print("Placement Group created!")
+            worker_ray = ray.remote(worker_fn).options(
+                num_cpus=num_cpus,
+                num_gpus=num_gpus,
+                max_retries=max_retries,
+                resources=train_resources,
+                retry_exceptions=True,
+                scheduling_strategy=PlacementGroupSchedulingStrategy(
+                    placement_group=pg)
+            )
+        else:
+            print(f'Training worker {i} without placement group.')
+            worker_ray = ray.remote(worker_fn).options(
+                num_cpus=num_cpus,
+                num_gpus=num_gpus,
+                max_retries=max_retries,
+                resources=train_resources,
+                retry_exceptions=True,
+            )
+        task_ref = worker_ray.remote(
+            this_command_args, data_src, unbuffer_python)
+        task_refs.append(task_ref)
+        task_name_map[task_ref] = f'train_{i}'
+
+    # monitor worker is always packed on the same node
+    # as training worker 0
+    ray.get(pg.ready())
+    monitor_worker_ray = ray.remote(worker_fn).options(
+        num_cpus=1, 
+        num_gpus=0,
+        max_retries=monitor_max_retires,
+        # resources=monitor_resources,
+        retry_exceptions=True,
+        scheduling_strategy=PlacementGroupSchedulingStrategy(
+            placement_group=pg)
+        )
+    monitor_ref = monitor_worker_ray.remote(
+            monitor_command_args, data_src, unbuffer_python)
+    task_name_map[monitor_ref] = 'metrics'
+
+    try:
+        # normal case
+        ready_refs = list()
+        rest_refs = task_refs
+        while len(ready_refs) < len(task_refs):
+            this_ready_refs, rest_refs = ray.wait(rest_refs, 
+                num_returns=1, timeout=None, fetch_local=True)
+            cancel_other_tasks = False
+            for ref in this_ready_refs:
+                task_name = task_name_map[ref]
+                try:
+                    result = ray.get(ref)
+                    print(f"Task {task_name} finished with result: {result}")
+                except KeyboardInterrupt as e:
+                    # skip to outer try catch
+                    raise KeyboardInterrupt
+                except Exception as e:
+                    print(f"Task {task_name} raised exception: {e}")
+                    this_cancel_other_tasks = True
+                    if isinstance(e, ray.exceptions.RayTaskError):
+                        if isinstance(e.cause, ray.exceptions.TaskCancelledError):
+                            this_cancel_other_tasks = False
+                    cancel_other_tasks = cancel_other_tasks or this_cancel_other_tasks
+                ready_refs.append(ref)
+            if cancel_other_tasks:
+                print('Exception! Cancelling all other tasks.')
+                # cancel all other refs
+                for _ref in rest_refs:
+                    ray.cancel(_ref, force=False)
+        print("Training tasks done.")
+        ray.cancel(monitor_ref, force=False)
+    except KeyboardInterrupt:
+        print('KeyboardInterrupt received in the driver.')
+        # a KeyboardInterrupt will be raised in worker
+        _ = [ray.cancel(x, force=False) for x in task_refs + [monitor_ref]]
+        print('KeyboardInterrupt sent to workers.')
+    except Exception as e:
+        # worker will be terminated
+        _ = [ray.cancel(x, force=True) for x in task_refs + [monitor_ref]]
+        raise e
+
+    for ref in task_refs + [monitor_ref]:
+        task_name = task_name_map[ref]
+        try:
+            result = ray.get(ref)
+            print(f"Task {task_name} finished with result: {result}")
+        except KeyboardInterrupt as e:
+            # force kill everything.
+            print("Force killing all workers")
+            _ = [ray.cancel(x, force=True) for x in task_refs]
+            ray.cancel(monitor_ref, force=True)
+        except Exception as e:
+            print(f"Task {task_name} raised exception: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ca8a70c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,6 @@
+from setuptools import setup, find_packages
+
+setup(
+  name = 'diffusion_policy',
+  packages = find_packages(),
+)
diff --git a/tests/test_block_pushing.py b/tests/test_block_pushing.py
new file mode 100644
index 0000000..2813e84
--- /dev/null
+++ b/tests/test_block_pushing.py
@@ -0,0 +1,44 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+from diffusion_policy.env.block_pushing.block_pushing_multimodal import BlockPushMultimodal
+from gym.wrappers import FlattenObservation
+from diffusion_policy.gym_util.multistep_wrapper import MultiStepWrapper
+from diffusion_policy.gym_util.video_wrapper import VideoWrapper
+
+def test():
+    env = MultiStepWrapper(
+            VideoWrapper(
+                FlattenObservation(
+                    BlockPushMultimodal()
+                ),
+                enabled=True,
+                steps_per_render=2
+            ),
+            n_obs_steps=2,
+            n_action_steps=8,
+            max_episode_steps=16
+        )
+    env = BlockPushMultimodal()
+    obs = env.reset()
+    import pdb; pdb.set_trace()
+
+    env = FlattenObservation(BlockPushMultimodal())
+    obs = env.reset()
+    action = env.action_space.sample()
+    next_obs, reward, done, info = env.step(action)
+    print(obs[8:10] + action - next_obs[8:10])
+    import pdb; pdb.set_trace()
+
+    for i in range(3):
+        obs, reward, done, info = env.step(env.action_space.sample())
+    img = env.render()
+    import pdb; pdb.set_trace()
+    print("Done!", done)
+
+if __name__ == '__main__':
+    test()
diff --git a/tests/test_cv2_util.py b/tests/test_cv2_util.py
new file mode 100644
index 0000000..d804208
--- /dev/null
+++ b/tests/test_cv2_util.py
@@ -0,0 +1,22 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import numpy as np
+from diffusion_policy.common.cv2_util import get_image_transform
+
+
+def test():
+    tf = get_image_transform((1280,720), (640,480), bgr_to_rgb=False)
+    in_img = np.zeros((720,1280,3), dtype=np.uint8)
+    out_img = tf(in_img)
+    # print(out_img.shape)
+    assert out_img.shape == (480,640,3)
+
+    # import pdb; pdb.set_trace()
+
+if __name__ == '__main__':
+    test()
diff --git a/tests/test_multi_realsense.py b/tests/test_multi_realsense.py
new file mode 100644
index 0000000..9c2c54a
--- /dev/null
+++ b/tests/test_multi_realsense.py
@@ -0,0 +1,82 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import cv2
+import json
+import time
+import numpy as np
+from diffusion_policy.real_world.multi_realsense import MultiRealsense
+from diffusion_policy.real_world.video_recorder import VideoRecorder
+
+def test():
+    config = json.load(open('/home/cchi/dev/diffusion_policy/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json', 'r'))
+
+    def transform(data):
+        color = data['color']
+        h,w,_ = color.shape
+        factor = 4
+        color = cv2.resize(color, (w//factor,h//factor), interpolation=cv2.INTER_AREA)
+        # color = color[:,140:500]
+        data['color'] = color
+        return data
+
+    from diffusion_policy.common.cv2_util import get_image_transform
+    color_transform = get_image_transform(
+        input_res=(1280,720),
+        output_res=(640,480), 
+        bgr_to_rgb=False)
+    def transform(data):
+        data['color'] = color_transform(data['color'])
+        return data
+
+    # one thread per camera
+    video_recorder = VideoRecorder.create_h264(
+        fps=30,
+        codec='h264',
+        thread_type='FRAME'
+    )
+
+    with MultiRealsense(
+            resolution=(1280,720),
+            capture_fps=30,
+            record_fps=15,
+            enable_color=True,
+            # advanced_mode_config=config,
+            transform=transform,
+            # recording_transform=transform,
+            # video_recorder=video_recorder,
+            verbose=True
+        ) as realsense:
+        realsense.set_exposure(exposure=150, gain=5)
+        intr = realsense.get_intrinsics()
+        print(intr)
+
+        video_path = 'data_local/test'
+        rec_start_time = time.time() + 1
+        realsense.start_recording(video_path, start_time=rec_start_time)
+        realsense.restart_put(rec_start_time)
+
+        out = None
+        vis_img = None
+        while True:
+            out = realsense.get(out=out)
+
+            # bgr = out['color']
+            # print(bgr.shape)
+            # vis_img = np.concatenate(list(bgr), axis=0, out=vis_img)
+            # cv2.imshow('default', vis_img)
+            # key = cv2.pollKey()
+            # if key == ord('q'):
+            #     break
+
+            time.sleep(1/60)
+            if time.time() > (rec_start_time + 20.0):
+                break
+
+
+if __name__ == "__main__":
+    test()
diff --git a/tests/test_pose_trajectory_interpolator.py b/tests/test_pose_trajectory_interpolator.py
new file mode 100644
index 0000000..ba94abe
--- /dev/null
+++ b/tests/test_pose_trajectory_interpolator.py
@@ -0,0 +1,126 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+from tqdm import tqdm
+import numpy as np
+import scipy.interpolate as si
+import scipy.spatial.transform as st
+from diffusion_policy.common.pose_trajectory_interpolator import (
+    rotation_distance, 
+    pose_distance, 
+    PoseTrajectoryInterpolator)
+
+
+def test_rotation_distance():
+    def rotation_distance_align(a: st.Rotation, b: st.Rotation) -> float:
+        return st.Rotation.align_vectors(b.as_matrix().T, a.as_matrix().T)[0].magnitude()
+
+    for i in range(10000):
+        a = st.Rotation.from_euler('xyz', np.random.uniform(-7,7,size=3))
+        b = st.Rotation.from_euler('xyz', np.random.uniform(-7,7,size=3))
+        x = rotation_distance(a, b)
+        y = rotation_distance_align(a, b)
+        assert abs(x-y) < 1e-7
+
+def test_pose_trajectory_interpolator():
+    t = np.linspace(-1,5,100)
+    interp = PoseTrajectoryInterpolator(
+        [0,1,3],
+        np.zeros((3,6))
+    )
+    times = interp.times
+    poses = interp.poses
+
+    trimmed_interp = interp.trim(-1,4)
+    assert len(trimmed_interp.times) == 5
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(-1,4)
+    assert len(trimmed_interp.times) == 5
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(0.5, 3.5)
+    assert len(trimmed_interp.times) == 4
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(0.5, 2.5)
+    assert len(trimmed_interp.times) == 3
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(0.5, 1.5)
+    assert len(trimmed_interp.times) == 3
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(1.2, 1.5)
+    assert len(trimmed_interp.times) == 2
+    trimmed_interp(t)
+
+    trimmed_interp = interp.trim(1.3, 1.3)
+    assert len(trimmed_interp.times) == 1
+    trimmed_interp(t)
+
+    # import pdb; pdb.set_trace()
+
+def test_add_waypoint():
+    # fuzz testing
+    for i in tqdm(range(10000)):
+        rng = np.random.default_rng(i)
+        n_waypoints = rng.integers(1, 5)
+        waypoint_times = np.sort(rng.uniform(0, 1, size=n_waypoints))
+        last_waypoint_time = waypoint_times[-1]
+        insert_time = rng.uniform(-0.1, 1.1)
+        curr_time = rng.uniform(-0.1, 1.1)
+        max_pos_speed = rng.poisson(3) + 1e-3
+        max_rot_speed = rng.poisson(3) + 1e-3
+        waypoint_poses = rng.normal(0, 3, size=(n_waypoints, 6))
+        new_pose = rng.normal(0, 3, size=6)
+
+        if rng.random() < 0.1:
+            last_waypoint_time = None
+            if rng.random() < 0.1:
+                curr_time = None
+
+        interp = PoseTrajectoryInterpolator(
+            times=waypoint_times, 
+            poses=waypoint_poses)
+        new_interp = interp.add_waypoint(
+            pose=new_pose,
+            time=insert_time,
+            max_pos_speed=max_pos_speed,
+            max_rot_speed=max_rot_speed,
+            curr_time=curr_time,
+            last_waypoint_time=last_waypoint_time
+        )
+
+def test_drive_to_waypoint():
+    # fuzz testing
+    for i in tqdm(range(10000)):
+        rng = np.random.default_rng(i)
+        n_waypoints = rng.integers(1, 5)
+        waypoint_times = np.sort(rng.uniform(0, 1, size=n_waypoints))
+        insert_time = rng.uniform(-0.1, 1.1)
+        curr_time = rng.uniform(-0.1, 1.1)
+        max_pos_speed = rng.poisson(3) + 1e-3
+        max_rot_speed = rng.poisson(3) + 1e-3
+        waypoint_poses = rng.normal(0, 3, size=(n_waypoints, 6))
+        new_pose = rng.normal(0, 3, size=6)
+
+        interp = PoseTrajectoryInterpolator(
+            times=waypoint_times, 
+            poses=waypoint_poses)
+        new_interp = interp.drive_to_waypoint(
+            pose=new_pose,
+            time=insert_time,
+            curr_time=curr_time,
+            max_pos_speed=max_pos_speed,
+            max_rot_speed=max_rot_speed
+        )
+
+
+
+if __name__ == '__main__':
+    test_drive_to_waypoint()
diff --git a/tests/test_precise_sleep.py b/tests/test_precise_sleep.py
new file mode 100644
index 0000000..85c0d94
--- /dev/null
+++ b/tests/test_precise_sleep.py
@@ -0,0 +1,56 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import time
+import numpy as np
+from diffusion_policy.common.precise_sleep import precise_sleep, precise_wait
+
+
+def test_sleep():
+    dt = 0.1
+    tol = 1e-3
+    time_samples = list()
+    for i in range(100):
+        precise_sleep(dt)
+        # time.sleep(dt)
+        time_samples.append(time.monotonic())
+    time_deltas = np.diff(time_samples)
+
+    from matplotlib import pyplot as plt
+    plt.plot(time_deltas)
+    plt.ylim((dt-tol,dt+tol))
+
+
+def test_wait():
+    dt = 0.1
+    tol = 1e-3
+    errors = list()
+    t_start = time.monotonic()
+    for i in range(1,100):
+        t_end_desired = t_start + i * dt
+        time.sleep(t_end_desired - time.monotonic())
+        t_end = time.monotonic()
+        errors.append(t_end - t_end_desired)
+
+    new_errors = list()
+    t_start = time.monotonic()
+    for i in range(1,100):
+        t_end_desired = t_start + i * dt
+        precise_wait(t_end_desired)
+        t_end = time.monotonic()
+        new_errors.append(t_end - t_end_desired)
+    
+    from matplotlib import pyplot as plt
+    plt.plot(errors, label='time.sleep')
+    plt.plot(new_errors, label='sleep/spin hybrid')
+    plt.ylim((-tol,+tol))
+    plt.title('0.1 sec sleep error')
+    plt.legend()
+
+    
+if __name__ == '__main__':
+    test_sleep()
diff --git a/tests/test_replay_buffer.py b/tests/test_replay_buffer.py
new file mode 100644
index 0000000..c048366
--- /dev/null
+++ b/tests/test_replay_buffer.py
@@ -0,0 +1,62 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import zarr
+from diffusion_policy.common.replay_buffer import ReplayBuffer
+
+def test():
+    import numpy as np
+    buff = ReplayBuffer.create_empty_numpy()
+    buff.add_episode({
+        'obs': np.zeros((100,10), dtype=np.float16)
+    })
+    buff.add_episode({
+        'obs': np.ones((50,10)),
+        'action': np.ones((50,2))
+    })
+    # buff.rechunk(256)
+    obs = buff.get_episode(0)
+
+    import numpy as np
+    buff = ReplayBuffer.create_empty_zarr()
+    buff.add_episode({
+        'obs': np.zeros((100,10), dtype=np.float16)
+    })
+    buff.add_episode({
+        'obs': np.ones((50,10)),
+        'action': np.ones((50,2))
+    })
+    obs = buff.get_episode(0)
+    buff.set_chunks({
+        'obs': (100,10),
+        'action': (100,2)
+    })
+
+
+def test_real():
+    import os
+    dist_group = zarr.open(
+        os.path.expanduser('~/dev/diffusion_policy/data/pusht/pusht_cchi_v2.zarr'), 'r')
+
+    buff = ReplayBuffer.create_empty_numpy()
+    key, group = next(iter(dist_group.items()))
+    for key, group in dist_group.items():
+        buff.add_episode(group)
+
+    # out_path = os.path.expanduser('~/dev/diffusion_policy/data/pusht_cchi2_v2_replay.zarr')
+    out_path = os.path.expanduser('~/dev/diffusion_policy/data/test.zarr')
+    out_store = zarr.DirectoryStore(out_path)
+    buff.save_to_store(out_store)
+
+    buff = ReplayBuffer.copy_from_path(out_path, store=zarr.MemoryStore())
+    buff.pop_episode()
+
+
+def test_pop():
+    buff = ReplayBuffer.create_from_path(
+        '/home/chengchi/dev/diffusion_policy/data/pusht_cchi_v3_replay.zarr',
+        mode='rw')
diff --git a/tests/test_ring_buffer.py b/tests/test_ring_buffer.py
new file mode 100644
index 0000000..18d03bc
--- /dev/null
+++ b/tests/test_ring_buffer.py
@@ -0,0 +1,188 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import time
+import numpy as np
+import multiprocessing as mp
+from multiprocessing.managers import SharedMemoryManager
+from diffusion_policy.shared_memory.shared_memory_ring_buffer import (
+    SharedMemoryRingBuffer,
+    SharedAtomicCounter)
+
+
+def test():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+    ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+        shm_manager,
+        {'timestamp': np.array(0, dtype=np.float64)},
+        buffer_size=128
+    )
+    for i in range(30):
+        ring_buffer.put({
+            'timestamp': np.array(
+                time.perf_counter(), 
+                dtype=np.float64)
+        })
+    print(ring_buffer.get())
+
+
+def _timestamp_worker(ring_buffer, start_event, stop_event):
+    while not stop_event.is_set():
+        start_event.set()
+        ring_buffer.put({
+            'timestamp': np.array(
+                time.time(), 
+                dtype=np.float64)
+        })
+
+
+def test_mp():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+    ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+        shm_manager,
+        {'timestamp': np.array(0, dtype=np.float64)},
+        get_max_k=1,
+        get_time_budget=0.01,
+        put_desired_frequency=1000
+    )
+    start_event = mp.Event()
+    stop_event = mp.Event()
+    worker = mp.Process(target=_timestamp_worker, args=(
+        ring_buffer, start_event, stop_event))
+    worker.start()
+    start_event.wait()
+    for i in range(1000):
+        t = float(ring_buffer.get()['timestamp'])
+        curr_t = time.time()
+        print('latency', curr_t - t)
+    stop_event.set()
+    worker.join()
+
+
+def test_get_last_k():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+    ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+        shm_manager,
+        {'counter': np.array(0, dtype=np.int64)},
+        buffer_size=8
+    )
+
+    from collections import deque
+    k = 4
+    last_k = deque(maxlen=k)
+    for i in range(100):
+        ring_buffer.put({
+            'counter': np.array(i, dtype=np.int64)
+        })
+        last_k.append(i)
+        if i > k:
+            result = ring_buffer.get_last_k(k)['counter']
+            assert np.allclose(result, last_k)
+
+    print(ring_buffer.shared_arrays['counter'].get())
+    result = ring_buffer.get_last_k(4)
+    print(result)
+
+
+def test_timing():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+    ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+        shm_manager,
+        {'counter': np.array(0, dtype=np.int64)},
+        get_max_k=8,
+        get_time_budget=0.1,
+        put_desired_frequency=100
+    )
+    # print(ring_buffer.timestamp_array.get())
+    print('buffer_size', ring_buffer.buffer_size)
+
+    dt = 1 / 150
+    t_init = time.monotonic()
+    for i in range(1000):
+        t_start = time.monotonic()
+        ring_buffer.put({
+            'counter': np.array(i, dtype=np.int64)
+        }, wait=False)
+        if (i % 10 == 0) and (i > 0):
+            result = ring_buffer.get_last_k(8)
+
+        t_end =time.monotonic()
+        desired_t = (i+1) * dt + t_init
+        if desired_t > t_end:
+            time.sleep(desired_t - t_end)
+        hz = 1 / (time.monotonic() - t_start)
+        print(f'{hz}Hz')
+
+
+def _timestamp_image_worker(ring_buffer, img_shape, dt, start_event, stop_event):
+    i = 0
+    t_init = time.monotonic()
+    image = np.ones(img_shape, dtype=np.uint8)
+    while not stop_event.is_set():
+        t_start = time.monotonic()
+        start_event.set()
+        ring_buffer.put({
+            'img': image,
+            'timestamp': time.time(),
+            'counter': i
+        })
+        t_end = time.monotonic()
+        desired_t = (i+1) * dt + t_init
+        # print('alive')
+        if desired_t > t_end:
+            time.sleep(desired_t - t_end)
+        # hz = 1 / (time.monotonic() - t_start)
+        i += 1
+
+
+def test_timing_mp():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+
+    hz = 200
+    img_shape = (1920,1080,3)
+    ring_buffer = SharedMemoryRingBuffer.create_from_examples(
+        shm_manager,
+        examples={
+            'img': np.zeros(img_shape, dtype=np.uint8),
+            'timestamp': time.time(),
+            'counter': 0
+        },
+        get_max_k=60,
+        get_time_budget=0.02,
+        put_desired_frequency=hz
+    )
+    start_event = mp.Event()
+    stop_event = mp.Event()
+    worker = mp.Process(target=_timestamp_image_worker, args=(
+        ring_buffer, img_shape, 1/hz, start_event, stop_event))
+    worker.start()
+    start_event.wait()
+    out = None
+    t_start = time.monotonic()
+    k = 1
+    for i in range(1000):
+        if ring_buffer.count < k:
+            time.sleep(0)
+            continue
+        out = ring_buffer.get_last_k(k=k, out=out)
+        t = float(out['timestamp'][-1])
+        curr_t = time.time()
+        print('latency', curr_t - t)
+    t_end = time.monotonic()
+    print('Get Hz', 1/(t_end-t_start)*1000)
+    stop_event.set()
+    worker.join()
+
+
+if __name__ == '__main__':
+    # test_mp()
+    test_timing_mp()
diff --git a/tests/test_robomimic_image_runner.py b/tests/test_robomimic_image_runner.py
new file mode 100644
index 0000000..629e6c7
--- /dev/null
+++ b/tests/test_robomimic_image_runner.py
@@ -0,0 +1,38 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+from diffusion_policy.env_runner.robomimic_image_runner import RobomimicImageRunner
+
+def test():
+    import os
+    from omegaconf import OmegaConf
+    cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_image.yaml')
+    cfg = OmegaConf.load(cfg_path)
+    cfg['n_obs_steps'] = 1
+    cfg['n_action_steps'] = 1
+    cfg['past_action_visible'] = False
+    runner_cfg = cfg['env_runner']
+    runner_cfg['n_train'] = 1
+    runner_cfg['n_test'] = 1
+    del runner_cfg['_target_']
+    runner = RobomimicImageRunner(
+        **runner_cfg, 
+        output_dir='/tmp/test')
+
+    # import pdb; pdb.set_trace()
+
+    self = runner
+    env = self.env
+    env.seed(seeds=self.env_seeds)
+    obs = env.reset()
+    for i in range(10):
+        _ = env.step(env.action_space.sample())
+
+    imgs = env.render()
+
+if __name__ == '__main__':
+    test()
diff --git a/tests/test_robomimic_lowdim_runner.py b/tests/test_robomimic_lowdim_runner.py
new file mode 100644
index 0000000..83390fd
--- /dev/null
+++ b/tests/test_robomimic_lowdim_runner.py
@@ -0,0 +1,34 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+from diffusion_policy.env_runner.robomimic_lowdim_runner import RobomimicLowdimRunner
+
+def test():
+    import os
+    from omegaconf import OmegaConf
+    cfg_path = os.path.expanduser('~/dev/diffusion_policy/diffusion_policy/config/task/lift_lowdim.yaml')
+    cfg = OmegaConf.load(cfg_path)
+    cfg['n_obs_steps'] = 1
+    cfg['n_action_steps'] = 1
+    cfg['past_action_visible'] = False
+    runner_cfg = cfg['env_runner']
+    runner_cfg['n_train'] = 1
+    runner_cfg['n_test'] = 0
+    del runner_cfg['_target_']
+    runner = RobomimicLowdimRunner(
+        **runner_cfg, 
+        output_dir='/tmp/test')
+
+    # import pdb; pdb.set_trace()
+
+    self = runner
+    env = self.env
+    env.seed(seeds=self.env_seeds)
+    obs = env.reset()
+
+if __name__ == '__main__':
+    test()
diff --git a/tests/test_shared_queue.py b/tests/test_shared_queue.py
new file mode 100644
index 0000000..4e6f9d9
--- /dev/null
+++ b/tests/test_shared_queue.py
@@ -0,0 +1,67 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import numpy as np
+from multiprocessing.managers import SharedMemoryManager
+from diffusion_policy.shared_memory.shared_memory_queue import SharedMemoryQueue, Full, Empty
+
+
+def test():
+    shm_manager = SharedMemoryManager()
+    shm_manager.start()
+    example = {
+        'cmd': 0,
+        'pose': np.zeros((6,))
+    }
+    queue = SharedMemoryQueue.create_from_examples(
+        shm_manager=shm_manager,
+        examples=example,
+        buffer_size=3
+    )
+    raised = False
+    try:
+        queue.get()
+    except Empty:
+        raised = True
+    assert raised
+
+    data = {
+        'cmd': 1,
+        'pose': np.ones((6,))
+    }
+    queue.put(data)
+    result = queue.get()
+    assert result['cmd'] == data['cmd']
+    assert np.allclose(result['pose'], data['pose'])
+
+    queue.put(data)
+    queue.put(data)
+    queue.put(data)
+    assert queue.qsize() == 3
+    raised = False
+    try:
+        queue.put(data)
+    except Full:
+        raised = True
+    assert raised
+
+    result = queue.get_all()
+    assert np.allclose(result['cmd'], [1,1,1])
+    
+    queue.put({'cmd': 0})
+    queue.put({'cmd': 1})
+    queue.put({'cmd': 2})
+    queue.get()
+    queue.put({'cmd': 3})
+
+    result = queue.get_k(3)
+    assert np.allclose(result['cmd'], [1,2,3])
+
+    queue.clear()
+
+if __name__ == "__main__":
+    test()
diff --git a/tests/test_single_realsense.py b/tests/test_single_realsense.py
new file mode 100644
index 0000000..836ad6b
--- /dev/null
+++ b/tests/test_single_realsense.py
@@ -0,0 +1,87 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import cv2
+import json
+import time
+from multiprocessing.managers import SharedMemoryManager
+from diffusion_policy.real_world.single_realsense import SingleRealsense
+
+def test():
+    
+    serials = SingleRealsense.get_connected_devices_serial()
+    # import pdb; pdb.set_trace()
+    serial = serials[0]
+    config = json.load(open('/home/cchi/dev/diffusion_policy/diffusion_policy/real_world/realsense_config/415_high_accuracy_mode.json', 'r'))
+
+    def transform(data):
+        color = data['color']
+        h,w,_ = color.shape
+        factor = 2
+        color = cv2.resize(color, (w//factor,h//factor), interpolation=cv2.INTER_AREA)
+        # color = color[:,140:500]
+        data['color'] = color
+        return data
+
+    # at 960x540 with //3, 60fps and 30fps are indistinguishable
+
+    with SharedMemoryManager() as shm_manager:
+        with SingleRealsense(
+            shm_manager=shm_manager,
+            serial_number=serial,
+            resolution=(1280,720),
+            # resolution=(960,540),
+            # resolution=(640,480),
+            capture_fps=30,
+            enable_color=True,
+            # enable_depth=True,
+            # enable_infrared=True,
+            # advanced_mode_config=config,
+            # transform=transform,
+            # recording_transform=transform
+            # verbose=True
+            ) as realsense:
+            cv2.setNumThreads(1) 
+            realsense.set_exposure(exposure=150, gain=5)
+            intr = realsense.get_intrinsics()
+            print(intr)
+
+
+            video_path = 'data_local/test.mp4'
+            rec_start_time = time.time() + 2
+            realsense.start_recording(video_path, start_time=rec_start_time)
+
+            data = None
+            while True:
+                data = realsense.get(out=data)
+                t = time.time()
+                # print('capture_latency', data['receive_timestamp']-data['capture_timestamp'], 'receive_latency', t - data['receive_timestamp'])
+                # print('receive', t - data['receive_timestamp'])
+
+                # dt = time.time() - data['timestamp']
+                # print(dt)
+                # print(data['capture_timestamp'] - rec_start_time)
+
+                bgr = data['color']
+                # print(bgr.shape)
+                cv2.imshow('default', bgr)
+                key = cv2.pollKey()
+                # if key == ord('q'):
+                #     break
+                # elif key == ord('r'):
+                #     video_path = 'data_local/test.mp4'
+                #     realsense.start_recording(video_path)
+                # elif key == ord('s'):
+                #     realsense.stop_recording()
+                
+                time.sleep(1/60)
+                if time.time() > (rec_start_time + 20.0):
+                    break
+
+
+if __name__ == "__main__":
+    test()
diff --git a/tests/test_timestamp_accumulator.py b/tests/test_timestamp_accumulator.py
new file mode 100644
index 0000000..144826b
--- /dev/null
+++ b/tests/test_timestamp_accumulator.py
@@ -0,0 +1,151 @@
+import sys
+import os
+
+ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(ROOT_DIR)
+os.chdir(ROOT_DIR)
+
+import numpy as np
+import time
+from diffusion_policy.common.timestamp_accumulator import (
+    get_accumulate_timestamp_idxs,
+    TimestampObsAccumulator,
+    TimestampActionAccumulator
+)
+
+
+def test_index():
+    buffer = np.zeros(16)
+    start_time = 0.0
+    dt = 1/10
+
+    timestamps = np.linspace(0,1,100)
+    gi = list()
+    next_global_idx = 0
+
+    local_idxs, global_idxs, next_global_idx = get_accumulate_timestamp_idxs(timestamps, 
+        start_time=start_time, dt=dt, next_global_idx=next_global_idx)
+    assert local_idxs[0] == 0
+    assert global_idxs[0] == 0
+    # print(local_idxs)
+    # print(global_idxs)
+    # print(timestamps[local_idxs])
+    buffer[global_idxs] = timestamps[local_idxs]
+    gi.extend(global_idxs)
+    
+    timestamps = np.linspace(0.5,1.5,100)
+    local_idxs, global_idxs, next_global_idx = get_accumulate_timestamp_idxs(timestamps, 
+        start_time=start_time, dt=dt, next_global_idx = next_global_idx)
+    # print(local_idxs)
+    # print(global_idxs)
+    # print(timestamps[local_idxs])
+    # import pdb; pdb.set_trace()
+    buffer[global_idxs] = timestamps[local_idxs]
+    gi.extend(global_idxs)
+    
+    assert np.all(buffer[1:] > buffer[:-1])
+    assert np.all(np.array(gi) == np.array(list(range(len(gi)))))
+    # print(buffer)
+
+    # start over
+    next_global_idx = 0
+    timestamps = np.linspace(0,1,3)
+    local_idxs, global_idxs, next_global_idx = get_accumulate_timestamp_idxs(timestamps, 
+        start_time=start_time, dt=dt, next_global_idx = next_global_idx)
+    assert local_idxs[0] == 0
+    assert local_idxs[-1] == 2
+    # print(local_idxs)
+    # print(global_idxs)
+    # print(timestamps[local_idxs])
+
+    # test numerical error issue
+    # this becomes a problem when eps <= 1e-7
+    start_time = time.time()
+    next_global_idx = 0
+    timestamps = np.arange(100000) * dt + start_time
+    local_idxs, global_idxs, next_global_idx = get_accumulate_timestamp_idxs(timestamps, 
+        start_time=start_time, dt=dt, next_global_idx = next_global_idx)
+    assert local_idxs == global_idxs
+    # print(local_idxs)
+    # print(global_idxs)
+    # print(timestamps[local_idxs])
+
+
+def test_obs_accumulator():
+    dt = 1/10
+    ddt = 1/100
+    n = 100
+    d = 6
+    start_time = time.time()
+    toa = TimestampObsAccumulator(start_time, dt)
+    poses = np.arange(n).reshape((n,1))
+    poses = np.repeat(poses, d, axis=1)
+    timestamps = np.arange(n) * ddt + start_time
+
+    toa.put({
+        'pose': poses,
+        'timestamp': timestamps
+    }, timestamps)
+    assert np.all(toa.data['pose'][:,0] == np.arange(10)*10)
+    assert len(toa) == 10
+
+    # add the same thing, result shouldn't change
+    toa.put({
+        'pose': poses,
+        'timestamp': timestamps
+    }, timestamps)
+    assert np.all(toa.data['pose'][:,0] == np.arange(10)*10)
+    assert len(toa) == 10
+    
+    # add lower than desired freuquency to test fill_in
+    dt = 1/10
+    ddt = 1/5
+    n = 10
+    d = 6
+    start_time = time.time()
+    toa = TimestampObsAccumulator(start_time, dt)
+    poses = np.arange(n).reshape((n,1))
+    poses = np.repeat(poses, d, axis=1)
+    timestamps = np.arange(n) * ddt + start_time
+
+    toa.put({
+        'pose': poses,
+        'timestamp': timestamps
+    }, timestamps)
+    assert len(toa) == 1 + (n-1) * 2
+
+    timestamps = (np.arange(n) + 2) * ddt + start_time
+    toa.put({
+        'pose': poses,
+        'timestamp': timestamps
+    }, timestamps)
+    assert len(toa) == 1 + (n-1) * 2 + 4
+
+
+def test_action_accumulator():
+    dt = 1/10
+    n = 10
+    d = 6
+    start_time = time.time()
+    taa = TimestampActionAccumulator(start_time, dt)
+    actions = np.arange(n).reshape((n,1))
+    actions = np.repeat(actions, d, axis=1)
+
+    timestamps = np.arange(n) * dt + start_time
+    taa.put(actions, timestamps)
+    assert np.all(taa.actions == actions)
+    assert np.all(taa.timestamps == timestamps)
+
+    # add another round
+    taa.put(actions-5, timestamps-0.5)
+    assert np.allclose(taa.timestamps, timestamps)
+
+    # add another round
+    taa.put(actions+5, timestamps+0.5)
+    assert len(taa) == 15
+    assert np.all(taa.actions[:,0] == np.arange(15))
+    
+
+
+if __name__ == '__main__':
+    test_action_accumulator()
diff --git a/train.py b/train.py
new file mode 100644
index 0000000..51c564e
--- /dev/null
+++ b/train.py
@@ -0,0 +1,35 @@
+"""
+Usage:
+Training:
+python train.py --config-name=train_diffusion_lowdim_workspace
+"""
+
+import sys
+# use line-buffering for both stdout and stderr
+sys.stdout = open(sys.stdout.fileno(), mode='w', buffering=1)
+sys.stderr = open(sys.stderr.fileno(), mode='w', buffering=1)
+
+import hydra
+from omegaconf import OmegaConf
+import pathlib
+from diffusion_policy.workspace.base_workspace import BaseWorkspace
+
+# allows arbitrary python code execution in configs using the ${eval:''} resolver
+OmegaConf.register_new_resolver("eval", eval, replace=True)
+
+@hydra.main(
+    version_base=None,
+    config_path=str(pathlib.Path(__file__).parent.joinpath(
+        'diffusion_policy','config'))
+)
+def main(cfg: OmegaConf):
+    # resolve immediately so all the ${now:} resolvers
+    # will use the same time.
+    OmegaConf.resolve(cfg)
+
+    cls = hydra.utils.get_class(cfg._target_)
+    workspace: BaseWorkspace = cls(cfg)
+    workspace.run()
+
+if __name__ == "__main__":
+    main()