From 31419a6fc194a5d36e3eaf5f36b043a958e02cf0 Mon Sep 17 00:00:00 2001 From: gouhanke <12219217+gouhanke@user.noreply.gitee.com> Date: Fri, 6 Feb 2026 11:53:01 +0800 Subject: [PATCH] =?UTF-8?q?chore(camera):=20=E6=B7=BB=E5=8A=A0front?= =?UTF-8?q?=E7=9B=B8=E6=9C=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../assets/models/manipulators/DianaMed/table_square.xml | 1 + roboimi/assets/robots/diana_med.py | 4 ++-- roboimi/demos/diana_record_sim_episodes.py | 2 +- roboimi/demos/eval_vla.py | 6 +++--- roboimi/envs/double_base.py | 8 ++++++++ roboimi/envs/double_pos_ctrl_env.py | 3 ++- roboimi/utils/constants.py | 2 +- roboimi/vla/conf/agent/resnet_diffusion.yaml | 2 +- roboimi/vla/conf/config.yaml | 2 +- roboimi/vla/conf/data/resnet_dataset.yaml | 1 + roboimi/vla/conf/data/siglip2.yaml | 2 +- roboimi/vla/data/dataset.py | 2 +- 12 files changed, 23 insertions(+), 12 deletions(-) diff --git a/roboimi/assets/models/manipulators/DianaMed/table_square.xml b/roboimi/assets/models/manipulators/DianaMed/table_square.xml index 4813a53..a629d19 100644 --- a/roboimi/assets/models/manipulators/DianaMed/table_square.xml +++ b/roboimi/assets/models/manipulators/DianaMed/table_square.xml @@ -8,5 +8,6 @@ + diff --git a/roboimi/assets/robots/diana_med.py b/roboimi/assets/robots/diana_med.py index 234b50e..0c26ca0 100644 --- a/roboimi/assets/robots/diana_med.py +++ b/roboimi/assets/robots/diana_med.py @@ -58,8 +58,8 @@ class BiDianaMed(ArmBase): def __init__(self): super().__init__( name="Bidiana", - urdf_path="./assets/models/manipulators/DianaMed/DualDianaMed.urdf", - xml_path="./assets/models/manipulators/DianaMed/bi_diana_transfer_ee.xml", + urdf_path="roboimi/assets/models/manipulators/DianaMed/DualDianaMed.urdf", + xml_path="roboimi/assets/models/manipulators/DianaMed/bi_diana_transfer_ee.xml", gripper=None ) self.left_arm = self.Arm(self, 'single', self.urdf_path) diff --git a/roboimi/demos/diana_record_sim_episodes.py b/roboimi/demos/diana_record_sim_episodes.py index 5eadf79..63a46bd 100644 --- a/roboimi/demos/diana_record_sim_episodes.py +++ b/roboimi/demos/diana_record_sim_episodes.py @@ -21,7 +21,7 @@ def main(): render_cam_name = 'angle' episode_len = 700 #SIM_TASK_CONFIGS[task_name]['episode_len'] - camera_names = ['angle','r_vis', 'top'] #SIM_TASK_CONFIGS[task_name]['camera_names'] + camera_names = ['angle','r_vis', 'top', 'front'] #SIM_TASK_CONFIGS[task_name]['camera_names'] if task_name == 'sim_transfer': policy = TestPickAndTransferPolicy(inject_noise) print(task_name) diff --git a/roboimi/demos/eval_vla.py b/roboimi/demos/eval_vla.py index 9d14756..91df49b 100644 --- a/roboimi/demos/eval_vla.py +++ b/roboimi/demos/eval_vla.py @@ -29,7 +29,7 @@ class VLAEvaluator: self, agent: torch.nn.Module, device: str = 'cuda', - camera_names: List[str] = ['r_vis', 'top'], + camera_names: List[str] = ['r_vis', 'top', 'front'], num_queries: int = 1, obs_horizon: int = 2, pred_horizon: int = 16, @@ -351,7 +351,7 @@ def evaluate_policy( max_timesteps: int = 700, task_name: str = 'sim_transfer', device: str = 'cuda', - camera_names: List[str] = ['r_vis', 'top'], + camera_names: List[str] = ['r_vis', 'top', 'front'], num_queries: int = 1, obs_horizon: int = 2, save_video: bool = True @@ -500,7 +500,7 @@ def main(): help='Maximum timesteps per episode') parser.add_argument('--device', type=str, default='cuda', help='Device for inference') - parser.add_argument('--camera_names', nargs='+', default=['r_vis', 'top'], + parser.add_argument('--camera_names', nargs='+', default=['r_vis', 'top', 'front'], help='Camera names to use') parser.add_argument('--num_queries', type=int, default=16, help='Policy query frequency (timesteps)') diff --git a/roboimi/envs/double_base.py b/roboimi/envs/double_base.py index 1b7785b..55b1067 100644 --- a/roboimi/envs/double_base.py +++ b/roboimi/envs/double_base.py @@ -53,6 +53,7 @@ class DualDianaMed(MujocoEnv): self.l_vis = None self.top = None self.angle = None + self.front = None self.obs = None self.rew = None @@ -168,6 +169,7 @@ class DualDianaMed(MujocoEnv): obs['images']['angle'] = self.angle obs['images']['r_vis'] = self.r_vis obs['images']['l_vis'] = self.l_vis + obs['images']['front'] = self.front return obs def _get_image_obs(self): @@ -177,6 +179,7 @@ class DualDianaMed(MujocoEnv): obs['images']['angle'] = self.angle obs['images']['r_vis'] = self.r_vis obs['images']['l_vis'] = self.l_vis + obs['images']['front'] = self.front return obs def _get_qpos_obs(self): @@ -202,6 +205,8 @@ class DualDianaMed(MujocoEnv): return self.r_vis elif self.cam == 'l_vis': return self.l_vis + elif self.cam == 'front': + return self.front else: raise AttributeError("please input right name") @@ -222,6 +227,9 @@ class DualDianaMed(MujocoEnv): img_renderer.update_scene(self.mj_data,camera="angle") self.angle = img_renderer.render() self.angle = self.angle[:, :, ::-1] + img_renderer.update_scene(self.mj_data,camera="front") + self.front = img_renderer.render() + self.front = self.front[:, :, ::-1] cv2.imshow('Cam view', self.cam_view) cv2.waitKey(1) diff --git a/roboimi/envs/double_pos_ctrl_env.py b/roboimi/envs/double_pos_ctrl_env.py index 4d15e8c..878bd08 100644 --- a/roboimi/envs/double_pos_ctrl_env.py +++ b/roboimi/envs/double_pos_ctrl_env.py @@ -77,7 +77,8 @@ class DualDianaMed_Pos_Ctrl(DualDianaMed): while self.cam_flage: if(type(self.top)==type(None) or type(self.angle)==type(None) - or type(self.r_vis)==type(None)): + or type(self.r_vis)==type(None) + or type(self.front)==type(None)): time.sleep(0.001) t+=1 else: diff --git a/roboimi/utils/constants.py b/roboimi/utils/constants.py index dd1d4ec..2f0d41b 100644 --- a/roboimi/utils/constants.py +++ b/roboimi/utils/constants.py @@ -20,7 +20,7 @@ SIM_TASK_CONFIGS = { 'dataset_dir': DATASET_DIR + '/sim_transfer', 'num_episodes': 20, 'episode_len': 700, - 'camera_names': ['top','r_vis'], + 'camera_names': ['top','r_vis','front'], 'xml_dir': HOME_PATH + '/assets' }, diff --git a/roboimi/vla/conf/agent/resnet_diffusion.yaml b/roboimi/vla/conf/agent/resnet_diffusion.yaml index 6e8a3ab..61d76a2 100644 --- a/roboimi/vla/conf/agent/resnet_diffusion.yaml +++ b/roboimi/vla/conf/agent/resnet_diffusion.yaml @@ -19,4 +19,4 @@ obs_horizon: 2 # How many historical observations to use diffusion_steps: 100 # Number of diffusion timesteps for training # Camera Configuration -num_cams: 2 # Number of cameras (e.g., r_vis, top) +num_cams: 3 # Number of cameras (e.g., r_vis, top) diff --git a/roboimi/vla/conf/config.yaml b/roboimi/vla/conf/config.yaml index dca3f26..0b18727 100644 --- a/roboimi/vla/conf/config.yaml +++ b/roboimi/vla/conf/config.yaml @@ -4,7 +4,7 @@ defaults: - data: resnet_dataset train: - batch_size: 32 # Batch size for training + batch_size: 16 # Batch size for training lr: 1e-4 # Learning rate max_steps: 20000 # Maximum training steps log_freq: 100 # Log frequency (steps) diff --git a/roboimi/vla/conf/data/resnet_dataset.yaml b/roboimi/vla/conf/data/resnet_dataset.yaml index 28145a7..62b0d5e 100644 --- a/roboimi/vla/conf/data/resnet_dataset.yaml +++ b/roboimi/vla/conf/data/resnet_dataset.yaml @@ -13,6 +13,7 @@ action_horizon: 8 # Action execution horizon (used during evaluation) camera_names: - r_vis - top + - front # Normalization Type: 'gaussian' (mean/std) or 'min_max' ([-1, 1]) normalization_type: gaussian diff --git a/roboimi/vla/conf/data/siglip2.yaml b/roboimi/vla/conf/data/siglip2.yaml index e37b284..65ec0e9 100644 --- a/roboimi/vla/conf/data/siglip2.yaml +++ b/roboimi/vla/conf/data/siglip2.yaml @@ -4,5 +4,5 @@ dataset_dir: "/home/d51/workspace/work/robo-imi-act/roboimi/demos/dataset/sim_tr pred_horizon: 16 obs_horizon: 1 action_horizon: 8 -camera_names: ['r_vis', 'top'] # ['angle', 'r_vis', 'top'] +camera_names: ['r_vis', 'top', 'front'] # ['angle', 'r_vis', 'top'] normalization_type: 'gaussian' # 'min_max' or 'gaussian' \ No newline at end of file diff --git a/roboimi/vla/data/dataset.py b/roboimi/vla/data/dataset.py index 5c3ba8c..d6164d1 100644 --- a/roboimi/vla/data/dataset.py +++ b/roboimi/vla/data/dataset.py @@ -13,7 +13,7 @@ class RobotDiffusionDataset(Dataset): pred_horizon=16, obs_horizon=2, action_horizon=8, - camera_names=['r_vis', 'top'], + camera_names=['r_vis', 'top', 'front'], normalization_type='gaussian'): """ Args: