first timesnet try

2025-07-30 21:18:46 +08:00
parent dc8c9f1f09
commit 6ee5c769c4
17 changed files with 2918 additions and 0 deletions
--- a/train_test.py
+++ b/train_test.py
@ -0,0 +1,209 @@
+#!/usr/bin/env python3
+"""
+Training script for TimesNet model on ETT datasets.
+"""
+
+import os
+import math
+import argparse
+import torch
+import torch.nn as nn
+from train.train import train_forecasting_model
+from models.TimesNet.TimesNet import Model as TimesNet
+
+class Args:
+    """Configuration class for TimesNet model parameters."""
+    def __init__(self, seq_len, pred_len, enc_in, c_out):
+        # Model architecture parameters
+        self.task_name = 'long_term_forecast'
+        self.seq_len = seq_len
+        self.label_len = seq_len // 2  # Half of seq_len as label length
+        self.pred_len = pred_len
+        self.enc_in = enc_in
+        self.c_out = c_out
+        
+        # TimesNet specific parameters
+        self.top_k = 5  # k parameter as specified
+        self.e_layers = 2  # Number of layers as specified
+        self.d_min = 32  # dmin as specified
+        self.d_max = 512  # dmax as specified
+        
+        # Calculate d_model based on the formula: min{max{2*⌈log C⌉, dmin}, dmax}
+        log_c = math.ceil(math.log2(enc_in)) if enc_in > 1 else 1
+        # self.d_model = min(max(2 * log_c, self.d_min), self.d_max)
+        self.d_model = 64
+        
+        # Other model parameters
+        self.d_ff = 64  # Standard transformer ratio
+        self.num_kernels = 6  # For Inception blocks
+        self.embed = 'timeF'  # Time feature embedding type
+        self.freq = 'h'  # Frequency for time features (minutely for ETT)
+        self.dropout = 0.1
+        
+        print(f"Model configuration:")
+        print(f"  - Input channels (C): {enc_in}")
+        print(f"  - d_model: {self.d_model} (calculated from 2*⌈log₂({enc_in})⌉ = {2 * log_c})")
+        print(f"  - Sequence length: {seq_len}")
+        print(f"  - Prediction length: {pred_len}")
+        print(f"  - Top-k: {self.top_k}")
+        print(f"  - Layers: {self.e_layers}")
+
+def create_timesnet_model(args):
+    """Create TimesNet model with given configuration."""
+    def model_constructor():
+        return TimesNet(args)
+    return model_constructor
+
+def train_single_dataset(data_path, dataset_name, pred_len, args):
+    """Train TimesNet on a single dataset configuration."""
+    
+    # Update args for current prediction length
+    args.pred_len = pred_len
+    
+    # Create model constructor
+    model_constructor = create_timesnet_model(args)
+    
+    # Training configuration
+    config = {
+        'learning_rate': 1e-5,  # LR = 10^-4 as specified
+        'batch_size': 32,       # BatchSize 32 as specified
+        'weight_decay': 1e-4,
+        'dataset': dataset_name,
+        'pred_len': pred_len,
+        'seq_len': args.seq_len,
+        'd_model': args.d_model,
+        'top_k': args.top_k,
+        'e_layers': args.e_layers
+    }
+    
+    # Project name for tracking
+    project_name = f"TimesNet_{dataset_name}_pred{pred_len}"
+    
+    print(f"\n{'='*60}")
+    print(f"Training {dataset_name} with prediction length {pred_len}")
+    print(f"Data path: {data_path}")
+    print(f"{'='*60}")
+    
+    # Train the model
+    try:
+        model, metrics = train_forecasting_model(
+            model_constructor=model_constructor,
+            data_path=data_path,
+            project_name=project_name,
+            config=config,
+            early_stopping_patience=10,
+            max_epochs=10,  # epochs 10 as specified
+            checkpoint_dir="./checkpoints",
+            log_interval=50
+        )
+        
+        print(f"Training completed for {project_name}")
+        print(f"Final validation MSE: {metrics.get('final_val_loss', 'N/A'):.6f}")
+        
+        return model, metrics
+        
+    except Exception as e:
+        print(f"Error training {project_name}: {e}")
+        return None, None
+
+def main():
+    parser = argparse.ArgumentParser(description='Train TimesNet on ETT datasets')
+    parser.add_argument('--data_dir', type=str, default='processed_data', 
+                      help='Directory containing processed NPZ files')
+    parser.add_argument('--datasets', nargs='+', default=['ETTm1', 'ETTm2'],
+                      help='List of datasets to train on')
+    parser.add_argument('--pred_lengths', nargs='+', type=int, default=[96, 192, 336, 720],
+                      help='List of prediction lengths to train on')
+    parser.add_argument('--seq_len', type=int, default=96,
+                      help='Input sequence length')
+    parser.add_argument('--device', type=str, default=None,
+                      help='Device to use for training (cuda/cpu)')
+    
+    args = parser.parse_args()
+    
+    print("TimesNet Training Script")
+    print("=" * 50)
+    print(f"Datasets: {args.datasets}")
+    print(f"Prediction lengths: {args.pred_lengths}")
+    print(f"Input sequence length: {args.seq_len}")
+    print(f"Data directory: {args.data_dir}")
+    
+    # Check if data directory exists
+    if not os.path.exists(args.data_dir):
+        print(f"Error: Data directory '{args.data_dir}' not found!")
+        return
+    
+    # Set device
+    if args.device is None:
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    else:
+        device = args.device
+    print(f"Using device: {device}")
+    
+    # Training results storage
+    all_results = {}
+    
+    # Train on each dataset and prediction length combination
+    for dataset in args.datasets:
+        all_results[dataset] = {}
+        
+        for pred_len in args.pred_lengths:
+            # Construct data file path
+            data_file = f"{dataset}_input{args.seq_len}_pred{pred_len}.npz"
+            data_path = os.path.join(args.data_dir, data_file)
+            
+            # Check if data file exists
+            if not os.path.exists(data_path):
+                print(f"Warning: Data file '{data_path}' not found, skipping...")
+                continue
+            
+            # Load data to get input dimensions
+            import numpy as np
+            data = np.load(data_path, allow_pickle=True)
+            enc_in = data['train_x'].shape[-1]  # Number of features/channels
+            print("输入数据通道数:", enc_in)
+            c_out = enc_in  # Output same number of channels
+            
+            # Create model configuration
+            model_args = Args(
+                seq_len=args.seq_len,
+                pred_len=pred_len,
+                enc_in=enc_in,
+                c_out=c_out
+            )
+            
+            # Train the model
+            model, metrics = train_single_dataset(
+                data_path=data_path,
+                dataset_name=dataset,
+                pred_len=pred_len,
+                args=model_args
+            )
+            
+            # Store results
+            all_results[dataset][pred_len] = {
+                'model': model,
+                'metrics': metrics,
+                'data_path': data_path
+            }
+    
+    # Print summary
+    print("\n" + "=" * 80)
+    print("TRAINING SUMMARY")
+    print("=" * 80)
+    
+    for dataset in all_results:
+        print(f"\n{dataset}:")
+        for pred_len in all_results[dataset]:
+            result = all_results[dataset][pred_len]
+            if result['metrics'] is not None:
+                mse = result['metrics'].get('final_val_mse', 'N/A')
+                print(f"  Pred Length {pred_len}: MSE = {mse}")
+            else:
+                print(f"  Pred Length {pred_len}: Training failed")
+    
+    print(f"\nAll models saved in: ./checkpoints/")
+    print("Training completed!")
+
+if __name__ == "__main__":
+    main()