Files
roboimi/roboimi/vla/conf/agent/resnet_diffusion.yaml
2026-02-12 19:54:11 +08:00

39 lines
1.2 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# @package agent
defaults:
# - /backbone@vision_backbone: resnet
- /backbone@vision_backbone: resnet_diffusion
- /modules@state_encoder: identity_state_encoder
- /modules@action_encoder: identity_action_encoder
- /head: conditional_unet1d
- _self_
_target_: roboimi.vla.agent.VLAAgent
# ====================
# 模型维度配置
# ====================
action_dim: 16 # 动作维度(机器人关节数)
obs_dim: 16 # 本体感知维度(关节位置)
# ====================
#
# ====================
normalization_type: "min_max" # "min_max" or "gaussian"
# ====================
# 时间步配置
# ====================
pred_horizon: 8 # 预测未来多少步动作
obs_horizon: 2 # 使用多少步历史观测
num_action_steps: 4 # 每次推理实际执行多少步动作(应 <= pred_horizon - obs_horizon + 1
# ====================
# 相机配置
# ====================
num_cams: 3 # 摄像头数量 (r_vis, top, front)
# ====================
# 扩散过程配置
# ====================
diffusion_steps: 100 # 扩散训练步数DDPM
inference_steps: 10 # 推理时的去噪步数DDIM固定为 10