diff --git a/roboimi/demos/vla_scripts/eval_vla.py b/roboimi/demos/vla_scripts/eval_vla.py
index 97fe38f..9c358e4 100644
--- a/roboimi/demos/vla_scripts/eval_vla.py
+++ b/roboimi/demos/vla_scripts/eval_vla.py
@@ -103,10 +103,14 @@ def prepare_observation(obs: Dict, camera_names: list) -> Dict:
     Returns:
         agent 格式的观测字典
     """
+    import cv2
+
     # 转换图像: numpy -> tensor, HWC -> CHW
     images = {}
     for cam_name in camera_names:
         img = obs['images'][cam_name]
+        # Resize 到 224x224（与训练时一致）
+        img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)
         img = rearrange(img, 'h w c -> c h w')
         img = torch.from_numpy(img / 255.0).float()
         images[cam_name] = img
diff --git a/roboimi/utils/act_ex_utils.py b/roboimi/utils/act_ex_utils.py
index 3c1648e..d08f203 100644
--- a/roboimi/utils/act_ex_utils.py
+++ b/roboimi/utils/act_ex_utils.py
@@ -27,8 +27,8 @@ def sample_insertion_pose():
 
 def sample_transfer_pose():
         # Box
-        x_range = [0.0, 0.05]
-        y_range = [0.95, 1.05]
+        x_range = [-0.05, 0.05]
+        y_range = [0.90, 1.05]
         z_range = [0.47, 0.47]
 
         ranges = np.vstack([x_range, y_range, z_range])