import json import tempfile import unittest from pathlib import Path from unittest import mock import numpy as np import torch from omegaconf import OmegaConf from roboimi.demos.vla_scripts import eval_vla class _FakeAgent: def __init__(self, actions): self._actions = [torch.tensor(action, dtype=torch.float32) for action in actions] self.reset_calls = 0 def eval(self): return self def to(self, _device): return self def reset(self): self.reset_calls += 1 def select_action(self, observation): del observation return self._actions.pop(0) class _FakeEnv: def __init__(self): self.step_count = 0 self.rew = 0.0 self.render_calls = 0 self.reset_calls = [] def reset(self, box_pos): self.reset_calls.append(np.array(box_pos, copy=True)) self.step_count = 0 self.rew = 0.0 def _get_image_obs(self): frame_value = self.step_count front = np.full((6, 8, 3), fill_value=frame_value, dtype=np.uint8) top = np.full((6, 8, 3), fill_value=frame_value + 20, dtype=np.uint8) return {"images": {"front": front, "top": top}} def _get_qpos_obs(self): return {"qpos": np.arange(16, dtype=np.float32)} def step(self, action): del action self.step_count += 1 self.rew = float(self.step_count) def render(self): self.render_calls += 1 def getBodyPos(self, name): base = float(self.step_count) if name == 'eef_left': return np.array([base, base + 0.1, base + 0.2], dtype=np.float32) if name == 'eef_right': return np.array([base + 1.0, base + 1.1, base + 1.2], dtype=np.float32) raise KeyError(name) def getBodyQuat(self, name): base = float(self.step_count) if name == 'eef_left': return np.array([1.0, base, 0.0, 0.0], dtype=np.float32) if name == 'eef_right': return np.array([1.0, 0.0, base, 0.0], dtype=np.float32) raise KeyError(name) class _FakeVideoWriter: def __init__(self, output_path): self.output_path = Path(output_path) self.output_path.parent.mkdir(parents=True, exist_ok=True) self.output_path.write_bytes(b'') self.frames = [] self.released = False def isOpened(self): return True def write(self, frame): self.frames.append(np.array(frame, copy=True)) def release(self): self.released = True self.output_path.write_bytes(b'fake-mp4') class EvalVLARolloutArtifactsTest(unittest.TestCase): def test_eval_config_exposes_rollout_artifact_defaults(self): eval_cfg = OmegaConf.load(Path('roboimi/vla/conf/eval/eval.yaml')) self.assertIn('artifact_dir', eval_cfg) self.assertFalse(eval_cfg.save_summary_json) self.assertFalse(eval_cfg.save_trajectory_npz) self.assertFalse(eval_cfg.record_video) self.assertIsNone(eval_cfg.artifact_dir) self.assertIsNone(eval_cfg.video_camera_name) self.assertEqual(eval_cfg.video_fps, 30) def test_run_eval_exports_npz_summary_and_video_artifacts(self): actions = [ np.arange(16, dtype=np.float32), np.arange(16, dtype=np.float32) + 10.0, ] fake_agent = _FakeAgent(actions) fake_env = _FakeEnv() with tempfile.TemporaryDirectory() as tmpdir: cfg = OmegaConf.create( { 'agent': {}, 'eval': { 'ckpt_path': 'checkpoints/vla_model_best.pt', 'num_episodes': 1, 'max_timesteps': 2, 'device': 'cpu', 'task_name': 'sim_transfer', 'camera_names': ['front', 'top'], 'use_smoothing': True, 'smooth_alpha': 0.5, 'verbose_action': False, 'headless': True, 'artifact_dir': tmpdir, 'save_summary_json': True, 'save_trajectory_npz': True, 'record_video': True, 'video_camera_name': 'front', 'video_fps': 12, }, } ) writer_holder = {} def fake_open_video_writer(output_path, frame_size, fps): self.assertEqual(frame_size, (8, 6)) self.assertEqual(fps, 12) writer = _FakeVideoWriter(output_path) writer_holder['writer'] = writer return writer with mock.patch.object( eval_vla, 'load_checkpoint', return_value=(fake_agent, None), ), mock.patch.object( eval_vla, 'make_sim_env', return_value=fake_env, ), mock.patch.object( eval_vla, 'sample_transfer_pose', return_value=np.array([0.1, 0.2, 0.3], dtype=np.float32), ), mock.patch.object( eval_vla, 'tqdm', side_effect=lambda iterable, **kwargs: iterable, ), mock.patch.object( eval_vla, '_open_video_writer', side_effect=fake_open_video_writer, ): summary = eval_vla._run_eval(cfg) artifacts = summary['artifacts'] trajectory_path = Path(artifacts['trajectory_npz']) summary_path = Path(artifacts['summary_json']) video_path = Path(artifacts['video_mp4']) self.assertEqual(Path(artifacts['output_dir']), Path(tmpdir)) self.assertEqual(artifacts['video_camera_name'], 'front') self.assertTrue(trajectory_path.exists()) self.assertTrue(summary_path.exists()) self.assertTrue(video_path.exists()) rollout_npz = np.load(trajectory_path) np.testing.assert_array_equal(rollout_npz['episode_index'], np.array([0, 0])) np.testing.assert_array_equal(rollout_npz['timestep'], np.array([0, 1])) np.testing.assert_array_equal(rollout_npz['reward'], np.array([1.0, 2.0], dtype=np.float32)) np.testing.assert_array_equal(rollout_npz['raw_predicted_ee_action'][0], actions[0]) np.testing.assert_array_equal(rollout_npz['raw_predicted_ee_action'][1], actions[1]) np.testing.assert_array_equal(rollout_npz['executed_ee_action'][0], actions[0]) np.testing.assert_array_equal( rollout_npz['executed_ee_action'][1], (actions[0] + actions[1]) / 2.0, ) np.testing.assert_array_equal( rollout_npz['left_ee_pos'], np.array([[1.0, 1.1, 1.2], [2.0, 2.1, 2.2]], dtype=np.float32), ) np.testing.assert_array_equal( rollout_npz['right_ee_pos'], np.array([[2.0, 2.1, 2.2], [3.0, 3.1, 3.2]], dtype=np.float32), ) self.assertEqual(rollout_npz['obs_read_time_ms'].shape, (2,)) self.assertEqual(rollout_npz['preprocess_time_ms'].shape, (2,)) self.assertEqual(rollout_npz['inference_time_ms'].shape, (2,)) self.assertEqual(rollout_npz['env_step_time_ms'].shape, (2,)) self.assertEqual(rollout_npz['total_time_ms'].shape, (2,)) writer = writer_holder['writer'] self.assertTrue(writer.released) self.assertEqual(len(writer.frames), 2) np.testing.assert_array_equal(writer.frames[0], np.zeros((6, 8, 3), dtype=np.uint8)) np.testing.assert_array_equal(writer.frames[1], np.full((6, 8, 3), 1, dtype=np.uint8)) with summary_path.open('r', encoding='utf-8') as fh: saved_summary = json.load(fh) self.assertEqual(saved_summary['artifacts']['trajectory_npz'], str(trajectory_path)) self.assertEqual(saved_summary['artifacts']['video_mp4'], str(video_path)) self.assertEqual(saved_summary['episode_rewards'], [3.0]) self.assertAlmostEqual(summary['avg_reward'], 3.0) self.assertIn('avg_obs_read_time_ms', summary) self.assertIn('avg_env_step_time_ms', summary) def test_run_eval_parallel_rejects_trajectory_and_video_exports(self): unsupported_flags = [ "record_video", "save_trajectory", "save_trajectory_npz", ] for flag_name in unsupported_flags: with self.subTest(flag_name=flag_name): cfg = OmegaConf.create( { "agent": {}, "eval": { "ckpt_path": "checkpoints/vla_model_best.pt", "num_episodes": 2, "num_workers": 2, "max_timesteps": 1, "device": "cpu", "task_name": "sim_transfer", "camera_names": ["front"], "use_smoothing": False, "smooth_alpha": 0.3, "verbose_action": False, "headless": True, "save_artifacts": True, flag_name: True, }, } ) with self.assertRaisesRegex(ValueError, flag_name): eval_vla._run_eval_parallel(cfg) def test_run_eval_parallel_writes_merged_summary_timing_and_worker_dirs(self): with tempfile.TemporaryDirectory() as tmpdir: cfg = OmegaConf.create( { "agent": {}, "eval": { "ckpt_path": "checkpoints/vla_model_best.pt", "num_episodes": 3, "num_workers": 2, "max_timesteps": 1, "device": "cpu", "task_name": "sim_transfer", "camera_names": ["front"], "use_smoothing": False, "smooth_alpha": 0.3, "verbose_action": False, "headless": True, "artifact_dir": tmpdir, "save_summary_json": True, "save_timing": True, }, } ) def fake_run_spawn_jobs(payloads, max_workers, worker_fn): del max_workers, worker_fn return [ { "episodes": [ { "episode_index": 2, "episode_reward": 3.0, "episode_max_reward": 3.0, "inference_fps": 30.0, "control_fps": 15.0, } ], "_merge_state": { "obs_read_time_ms": [3.0], "preprocess_time_ms": [1.0], "inference_time_ms": [2.0], "env_step_time_ms": [4.0], "total_time_ms": [5.0], "model_forward_flags": [True], }, }, { "episodes": [ { "episode_index": 1, "episode_reward": 2.0, "episode_max_reward": 2.0, "inference_fps": 20.0, "control_fps": 10.0, }, { "episode_index": 0, "episode_reward": 1.0, "episode_max_reward": 1.0, "inference_fps": 10.0, "control_fps": 5.0, }, ], "_merge_state": { "obs_read_time_ms": [1.0, 2.0], "preprocess_time_ms": [1.0, 1.0], "inference_time_ms": [2.0, 2.0], "env_step_time_ms": [4.0, 4.0], "total_time_ms": [5.0, 5.0], "model_forward_flags": [False, True], }, }, ] with mock.patch.object( eval_vla, "sample_transfer_pose", side_effect=[ np.array([0.1, 0.2, 0.3], dtype=np.float32), np.array([0.4, 0.5, 0.6], dtype=np.float32), np.array([0.7, 0.8, 0.9], dtype=np.float32), ], ), mock.patch.object( eval_vla, "_run_spawn_jobs", side_effect=fake_run_spawn_jobs, ): summary = eval_vla._run_eval_parallel(cfg) summary_path = Path(tmpdir) / "rollout_summary.json" timing_path = Path(tmpdir) / "timing.json" worker_00_dir = Path(tmpdir) / "workers" / "worker_00" worker_01_dir = Path(tmpdir) / "workers" / "worker_01" self.assertTrue(summary_path.exists()) self.assertTrue(timing_path.exists()) self.assertTrue(worker_00_dir.is_dir()) self.assertTrue(worker_01_dir.is_dir()) self.assertEqual(summary["episode_rewards"], [1.0, 2.0, 3.0]) with summary_path.open("r", encoding="utf-8") as fh: saved_summary = json.load(fh) with timing_path.open("r", encoding="utf-8") as fh: saved_timing = json.load(fh) self.assertEqual(saved_summary["episode_rewards"], [1.0, 2.0, 3.0]) self.assertEqual(saved_summary["artifact_dir"], tmpdir) self.assertEqual(saved_timing["count"], 3) self.assertEqual(saved_timing["model_forward_count"], 2) def test_run_eval_parallel_cuda_writes_merged_summary_timing_and_worker_dirs(self): with tempfile.TemporaryDirectory() as tmpdir: cfg = OmegaConf.create( { "agent": {}, "eval": { "ckpt_path": "checkpoints/vla_model_best.pt", "num_episodes": 3, "num_workers": 2, "cuda_devices": [0], "max_timesteps": 1, "device": "cuda", "task_name": "sim_transfer", "camera_names": ["front"], "use_smoothing": False, "smooth_alpha": 0.3, "verbose_action": False, "headless": True, "artifact_dir": tmpdir, "save_summary_json": True, "save_timing": True, }, } ) def fake_run_cuda_parallel_processes(server_payloads, worker_payloads): self.assertEqual(len(server_payloads), 1) self.assertEqual(server_payloads[0]["device_index"], 0) self.assertEqual([payload["server_index"] for payload in worker_payloads], [0, 0]) return [ { "episodes": [ { "episode_index": 2, "episode_reward": 3.0, "episode_max_reward": 3.0, "inference_fps": 30.0, "control_fps": 15.0, } ], "_merge_state": { "obs_read_time_ms": [3.0], "preprocess_time_ms": [1.0], "inference_time_ms": [2.0], "env_step_time_ms": [4.0], "total_time_ms": [5.0], "model_forward_flags": [True], }, }, { "episodes": [ { "episode_index": 1, "episode_reward": 2.0, "episode_max_reward": 2.0, "inference_fps": 20.0, "control_fps": 10.0, }, { "episode_index": 0, "episode_reward": 1.0, "episode_max_reward": 1.0, "inference_fps": 10.0, "control_fps": 5.0, }, ], "_merge_state": { "obs_read_time_ms": [1.0, 2.0], "preprocess_time_ms": [1.0, 1.0], "inference_time_ms": [2.0, 2.0], "env_step_time_ms": [4.0, 4.0], "total_time_ms": [5.0, 5.0], "model_forward_flags": [False, True], }, }, ] with mock.patch.object( eval_vla, "sample_transfer_pose", side_effect=[ np.array([0.1, 0.2, 0.3], dtype=np.float32), np.array([0.4, 0.5, 0.6], dtype=np.float32), np.array([0.7, 0.8, 0.9], dtype=np.float32), ], ), mock.patch.object( eval_vla, "_run_cuda_parallel_processes", side_effect=fake_run_cuda_parallel_processes, create=True, ): summary = eval_vla._run_eval_parallel_cuda(cfg) summary_path = Path(tmpdir) / "rollout_summary.json" timing_path = Path(tmpdir) / "timing.json" worker_00_dir = Path(tmpdir) / "workers" / "worker_00" worker_01_dir = Path(tmpdir) / "workers" / "worker_01" self.assertTrue(summary_path.exists()) self.assertTrue(timing_path.exists()) self.assertTrue(worker_00_dir.is_dir()) self.assertTrue(worker_01_dir.is_dir()) self.assertEqual(summary["episode_rewards"], [1.0, 2.0, 3.0]) with summary_path.open("r", encoding="utf-8") as fh: saved_summary = json.load(fh) with timing_path.open("r", encoding="utf-8") as fh: saved_timing = json.load(fh) self.assertEqual(saved_summary["episode_rewards"], [1.0, 2.0, 3.0]) self.assertEqual(saved_summary["artifact_dir"], tmpdir) self.assertEqual(saved_timing["count"], 3) self.assertEqual(saved_timing["model_forward_count"], 2) if __name__ == '__main__': unittest.main()