Files
mamba_diffusion/main.py
gameloader 1446f97459 refactor(as_mamba): Remove dt prediction and use fixed dt
Removes the `dt_head` network and associated configuration parameters
(dt_min, dt_max, lambda_nfe, warmup_epochs). Replaces predicted time
steps with a fixed value derived from sequence length. Eliminates
the warmup phase and NFE loss calculation.
2026-01-21 13:07:36 +08:00

43 lines
1.7 KiB
Python

import argparse
from as_mamba import TrainConfig, run_training_and_plot
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description="Train AS-Mamba on sphere-to-sphere flow.")
parser.add_argument("--epochs", type=int, default=None)
parser.add_argument("--batch-size", type=int, default=None)
parser.add_argument("--steps-per-epoch", type=int, default=None)
parser.add_argument("--seq-len", type=int, default=None)
parser.add_argument("--lr", type=float, default=None)
parser.add_argument("--device", type=str, default=None)
parser.add_argument("--output-dir", type=str, default=None)
parser.add_argument("--project", type=str, default=None)
parser.add_argument("--run-name", type=str, default=None)
parser.add_argument("--val-every", type=int, default=None)
parser.add_argument("--val-samples", type=int, default=None)
parser.add_argument("--val-plot-samples", type=int, default=None)
parser.add_argument("--val-max-steps", type=int, default=None)
parser.add_argument("--center-min", type=float, default=None)
parser.add_argument("--center-max", type=float, default=None)
parser.add_argument("--center-distance-min", type=float, default=None)
parser.add_argument("--use-residual", action="store_true")
return parser
def main() -> None:
parser = build_parser()
args = parser.parse_args()
cfg = TrainConfig()
for key, value in vars(args).items():
if value is not None:
setattr(cfg, key.replace("-", "_"), value)
plot_path = run_training_and_plot(cfg)
print(f"Saved trajectory plot to {plot_path}")
if __name__ == "__main__":
main()