Files
roboimi/roboimi/vla/conf/agent/resnet_imf_attnres.yaml
2026-04-01 23:35:31 +08:00

41 lines
873 B
YAML

# @package agent
defaults:
- /backbone@vision_backbone: resnet_diffusion
- /modules@state_encoder: identity_state_encoder
- /modules@action_encoder: identity_action_encoder
- /head: imf_transformer1d
- _self_
_target_: roboimi.vla.agent_imf.IMFVLAAgent
action_dim: 16
obs_dim: 16
normalization_type: "min_max"
pred_horizon: 16
obs_horizon: 2
num_action_steps: 8
camera_names: ${data.camera_names}
num_cams: 3
vision_backbone:
num_cameras: ${agent.num_cams}
camera_names: ${agent.camera_names}
diffusion_steps: 100
inference_steps: 1
head_type: "transformer"
head:
input_dim: ${agent.action_dim}
output_dim: ${agent.action_dim}
horizon: ${agent.pred_horizon}
n_obs_steps: ${agent.obs_horizon}
cond_dim: 208
causal_attn: false
time_as_cond: true
obs_as_cond: true
n_cond_layers: 0
backbone_type: attnres_full
n_head: 1
n_kv_head: 1