From 926d8cf8943adc36311b1b4b21b5a7cb56a42f64 Mon Sep 17 00:00:00 2001 From: gouhanke <12219217+gouhanke@user.noreply.gitee.com> Date: Thu, 12 Feb 2026 15:02:18 +0800 Subject: [PATCH] =?UTF-8?q?chore:=20=E5=8A=A0=E8=BD=BD=E6=97=B6=E5=B0=86?= =?UTF-8?q?=E5=9B=BE=E5=83=8F=E7=BC=A9=E6=94=BE=E5=88=B0224*224=EF=BC=8C?= =?UTF-8?q?=20resnet=E7=A6=81=E7=94=A8crop?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- roboimi/vla/conf/backbone/resnet_diffusion.yaml | 6 +++--- roboimi/vla/data/simpe_robot_dataset.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/roboimi/vla/conf/backbone/resnet_diffusion.yaml b/roboimi/vla/conf/backbone/resnet_diffusion.yaml index 2055ca7..6f8a11a 100644 --- a/roboimi/vla/conf/backbone/resnet_diffusion.yaml +++ b/roboimi/vla/conf/backbone/resnet_diffusion.yaml @@ -14,9 +14,9 @@ freeze_backbone: true # 冻结ResNet参数,只训练后面的pool和out层( # ==================== # 输入配置 # ==================== -input_shape: [3, 96, 96] # 输入图像形状 (C, H, W) -crop_shape: [84, 84] # 裁剪后的图像形状 (H, W) -crop_is_random: true # 训练时使用随机裁剪,评估时使用中心裁剪 +input_shape: [3, 224, 224] # 输入图像形状 (C, H, W) - ImageNet标准尺寸 +crop_shape: null # 裁剪后的图像形状 (H, W) - 设为null禁用裁剪 +crop_is_random: true # 训练时使用随机裁剪,评估时使用中心裁剪(crop_shape=null时无效) # ==================== # 归一化和特征提取 diff --git a/roboimi/vla/data/simpe_robot_dataset.py b/roboimi/vla/data/simpe_robot_dataset.py index 7650a37..7b2fef3 100644 --- a/roboimi/vla/data/simpe_robot_dataset.py +++ b/roboimi/vla/data/simpe_robot_dataset.py @@ -86,6 +86,9 @@ class SimpleRobotDataset(Dataset): h5_path = f'observations/images/{cam_name}' if h5_path in f: img = f[h5_path][meta["frame_idx"]] + # Resize图像到224x224(减少内存和I/O负担) + import cv2 + img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR) # 转换为float并归一化到 [0, 1] img = torch.from_numpy(img).float() / 255.0 frame[f"observation.{cam_name}"] = img.permute(2, 0, 1) # HWC -> CHW