commit d6dd462886a47b8a0378bd704730937b58ebe329
Author: gameloader <ggwqqo@163.com>
Date:   Thu Aug 28 10:17:59 2025 +0000

    first commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8d3acf7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,168 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+/scripts/long_term_forecast/Traffic_script/PatchTST1.sh
+/backups/
+/result.xlsx
+/~$result.xlsx
+/Time-Series-Library.zip
+/temp.sh
+
+.idea
+/tv_result.xlsx
+/test.py
+/m4_results/
+/test_results/
+/PatchTST_results.xlsx
+/seq_len_long_term_forecast/
+/progress.xlsx
+/scripts/short_term_forecast/PatchTST_M4.sh
+/run_tv.py
+
+/scripts/long_term_forecast/ETT_tv_script/
+/dataset/
+/data/
+data_factory_all.py
+data_loader_all.py
+/scripts/short_term_forecast/tv_script/
+/exp/exp_short_term_forecasting_tv.py
+/exp/exp_long_term_forecasting_tv.py
+/timesnetv2.xlsx
+/scripts/anomaly_detection/tmp/
+/scripts/imputation/tmp/
+/utils/self_tools.py
+/scripts/exp_scripts/
+
+checkpoints/
+results/
+result_long_term_forecast.txt
+result_anomaly_detection.txt
+scripts/augmentation/
+run_anylearn.py
+environment.txt
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..67f5a65
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,20 @@
+## Instructions for Contributing to TSlib
+
+Sincerely thanks to all the researchers who want to use or contribute to TSlib.
+
+Since our team may not have enough time to fix all the bugs and catch up with the latest model, your contribution is essential to this project.
+
+### (1) Fix Bug
+
+You can directly propose a pull request and add detailed descriptions to the comment, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/498).
+
+### (2) Add a new time series model
+
+Thanks to creative researchers, extensive great TS models are presented, which advance this community significantly. If you want to add your model to TSlib, here are some instructions:
+
+- Propose an issue to describe your model and give a link to your paper and official code. We will discuss whether your model is suitable for this library, such as [this issue](https://github.com/thuml/Time-Series-Library/issues/346).
+- Propose a pull request in a similar style as TSlib, which means adding an additional file to ./models and providing corresponding scripts for reproduction, such as [this pull request](https://github.com/thuml/Time-Series-Library/pull/446).
+
+Note: Given that there are a lot of TS models that have been proposed, we may not have enough time to judge which model can be a remarkable supplement to the current library. Thus, we decide ONLY to add the officially published paper to our library. Peer review can be a reliable criterion.
+
+Thanks again for your valuable contributions.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..278c267
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 THUML @ Tsinghua University
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..06bddb1
--- /dev/null
+++ b/README.md
@@ -0,0 +1,171 @@
+# Time Series Library (TSLib)
+TSLib is an open-source library for deep learning researchers, especially for deep time series analysis.
+
+We provide a neat code base to evaluate advanced deep time series models or develop your model, which covers five mainstream tasks: **long- and short-term forecasting, imputation, anomaly detection, and classification.**
+
+:triangular_flag_on_post:**News** (2024.10) We have included [[TimeXer]](https://arxiv.org/abs/2402.19072), which defined a practical forecasting paradigm: Forecasting with Exogenous Variables. Considering both practicability and computation efficiency, we believe the new forecasting paradigm defined in TimeXer can be the "right" task for future research.
+
+:triangular_flag_on_post:**News** (2024.10) Our lab has open-sourced [[OpenLTM]](https://github.com/thuml/OpenLTM), which provides a distinct pretrain-finetuning paradigm compared to TSLib. If you are interested in Large Time Series Models, you may find this repository helpful.
+
+:triangular_flag_on_post:**News** (2024.07) We wrote a comprehensive survey of [[Deep Time Series Models]](https://arxiv.org/abs/2407.13278) with a rigorous benchmark based on TSLib. In this paper, we summarized the design principles of current time series models supported by insightful experiments, hoping to be helpful to future research.
+
+:triangular_flag_on_post:**News** (2024.04) Many thanks for the great work from [frecklebars](https://github.com/thuml/Time-Series-Library/pull/378). The famous sequential model [Mamba](https://arxiv.org/abs/2312.00752) has been included in our library. See [this file](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py), where you need to install `mamba_ssm` with pip at first.
+
+:triangular_flag_on_post:**News** (2024.03) Given the inconsistent look-back length of various papers, we split the long-term forecasting in the leaderboard into two categories: Look-Back-96 and Look-Back-Searching. We recommend researchers read [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2), which includes both look-back length settings in experiments for scientific rigor.
+
+:triangular_flag_on_post:**News** (2023.10) We add an implementation to [iTransformer](https://arxiv.org/abs/2310.06625), which is the state-of-the-art model for long-term forecasting. The official code and complete scripts of iTransformer can be found [here](https://github.com/thuml/iTransformer).
+
+:triangular_flag_on_post:**News** (2023.09) We added a detailed [tutorial](https://github.com/thuml/Time-Series-Library/blob/main/tutorial/TimesNet_tutorial.ipynb) for [TimesNet](https://openreview.net/pdf?id=ju_Uqw384Oq) and this library, which is quite friendly to beginners of deep time series analysis.
+
+:triangular_flag_on_post:**News** (2023.02) We release the TSlib as a comprehensive benchmark and code base for time series models, which is extended from our previous GitHub repository [Autoformer](https://github.com/thuml/Autoformer).
+
+## Leaderboard for Time Series Analysis
+
+Till March 2024, the top three models for five different tasks are:
+
+| Model<br>Ranking | Long-term<br>Forecasting<br>Look-Back-96              | Long-term<br/>Forecasting<br/>Look-Back-Searching     | Short-term<br>Forecasting                                    | Imputation                                                   | Classification                                               | Anomaly<br>Detection                               |
+| ---------------- | ----------------------------------------------------- | ----------------------------------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -------------------------------------------------- |
+| 🥇 1st            | [TimeXer](https://arxiv.org/abs/2402.19072)      | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2) | [TimesNet](https://arxiv.org/abs/2210.02186)                 | [TimesNet](https://arxiv.org/abs/2210.02186)                 | [TimesNet](https://arxiv.org/abs/2210.02186)                 | [TimesNet](https://arxiv.org/abs/2210.02186)       |
+| 🥈 2nd            | [iTransformer](https://arxiv.org/abs/2310.06625) | [PatchTST](https://github.com/yuqinie98/PatchTST)     | [Non-stationary<br/>Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary<br/>Transformer](https://github.com/thuml/Nonstationary_Transformers) | [Non-stationary<br/>Transformer](https://github.com/thuml/Nonstationary_Transformers) | [FEDformer](https://github.com/MAZiqing/FEDformer) |
+| 🥉 3rd            | [TimeMixer](https://openreview.net/pdf?id=7oLshfEIC2)          | [DLinear](https://arxiv.org/pdf/2205.13504.pdf)       | [FEDformer](https://github.com/MAZiqing/FEDformer)           | [Autoformer](https://github.com/thuml/Autoformer)            | [Informer](https://github.com/zhouhaoyi/Informer2020)        | [Autoformer](https://github.com/thuml/Autoformer)  |
+
+
+**Note: We will keep updating this leaderboard.** If you have proposed advanced and awesome models, you can send us your paper/code link or raise a pull request. We will add them to this repo and update the leaderboard as soon as possible.
+
+**Compared models of this leaderboard.** ☑ means that their codes have already been included in this repo.
+  - [x] **TimeXer** - TimeXer: Empowering Transformers for Time Series Forecasting with Exogenous Variables [[NeurIPS 2024]](https://arxiv.org/abs/2402.19072) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeXer.py)
+  - [x] **TimeMixer** - TimeMixer: Decomposable Multiscale Mixing for Time Series Forecasting [[ICLR 2024]](https://openreview.net/pdf?id=7oLshfEIC2) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimeMixer.py).
+  - [x] **TSMixer** - TSMixer: An All-MLP Architecture for Time Series Forecasting [[arXiv 2023]](https://arxiv.org/pdf/2303.06053.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TSMixer.py)
+  - [x] **iTransformer** - iTransformer: Inverted Transformers Are Effective for Time Series Forecasting [[ICLR 2024]](https://arxiv.org/abs/2310.06625) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/iTransformer.py).
+  - [x] **PatchTST** - A Time Series is Worth 64 Words: Long-term Forecasting with Transformers [[ICLR 2023]](https://openreview.net/pdf?id=Jbdc0vTOcol) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PatchTST.py).
+  - [x] **TimesNet** - TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis [[ICLR 2023]](https://openreview.net/pdf?id=ju_Uqw384Oq) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TimesNet.py).
+  - [x] **DLinear** - Are Transformers Effective for Time Series Forecasting? [[AAAI 2023]](https://arxiv.org/pdf/2205.13504.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/DLinear.py).
+  - [x] **LightTS** - Less Is More: Fast Multivariate Time Series Forecasting with Light Sampling-oriented MLP Structures [[arXiv 2022]](https://arxiv.org/abs/2207.01186) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/LightTS.py).
+  - [x] **ETSformer** - ETSformer: Exponential Smoothing Transformers for Time-series Forecasting [[arXiv 2022]](https://arxiv.org/abs/2202.01381) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/ETSformer.py).
+  - [x] **Non-stationary Transformer** - Non-stationary Transformers: Exploring the Stationarity in Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/pdf?id=ucNDIDRNjjv) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Nonstationary_Transformer.py).
+  - [x] **FEDformer** - FEDformer: Frequency Enhanced Decomposed Transformer for Long-term Series Forecasting [[ICML 2022]](https://proceedings.mlr.press/v162/zhou22g.html) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FEDformer.py).
+  - [x] **Pyraformer** - Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting [[ICLR 2022]](https://openreview.net/pdf?id=0EXmFzUn5I) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Pyraformer.py).
+  - [x] **Autoformer** - Autoformer: Decomposition Transformers with Auto-Correlation for Long-Term Series Forecasting [[NeurIPS 2021]](https://openreview.net/pdf?id=I55UqU-M11y) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Autoformer.py).
+  - [x] **Informer** - Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting [[AAAI 2021]](https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Informer.py).
+  - [x] **Reformer** - Reformer: The Efficient Transformer [[ICLR 2020]](https://openreview.net/forum?id=rkgNKkHtvB) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Reformer.py).
+  - [x] **Transformer** - Attention is All You Need [[NeurIPS 2017]](https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Transformer.py).
+
+See our latest paper [[TimesNet]](https://arxiv.org/abs/2210.02186) for the comprehensive benchmark. We will release a real-time updated online version soon.
+
+**Newly added baselines.** We will add them to the leaderboard after a comprehensive evaluation.
+  - [x] **MultiPatchFormer** - A multiscale model for multivariate time series forecasting [[Scientific Reports 2025]](https://www.nature.com/articles/s41598-024-82417-4) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MultiPatchFormer.py)
+  - [x] **WPMixer** - WPMixer: Efficient Multi-Resolution Mixing for Long-Term Time Series Forecasting [[AAAI 2025]](https://arxiv.org/abs/2412.17176) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/WPMixer.py)
+  - [x] **PAttn** - Are Language Models Actually Useful for Time Series Forecasting? [[NeurIPS 2024]](https://arxiv.org/pdf/2406.16964) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/PAttn.py)
+  - [x] **Mamba** - Mamba: Linear-Time Sequence Modeling with Selective State Spaces [[arXiv 2023]](https://arxiv.org/abs/2312.00752) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Mamba.py)
+  - [x] **SegRNN** - SegRNN: Segment Recurrent Neural Network for Long-Term Time Series Forecasting [[arXiv 2023]](https://arxiv.org/abs/2308.11200.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SegRNN.py).
+  - [x] **Koopa** - Koopa: Learning Non-stationary Time Series Dynamics with Koopman Predictors [[NeurIPS 2023]](https://arxiv.org/pdf/2305.18803.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Koopa.py).
+  - [x] **FreTS** - Frequency-domain MLPs are More Effective Learners in Time Series Forecasting [[NeurIPS 2023]](https://arxiv.org/pdf/2311.06184.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FreTS.py).
+  - [x] **MICN** - MICN: Multi-scale Local and Global Context Modeling for Long-term Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=zt53IDUR1U)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/MICN.py).
+  - [x] **Crossformer** - Crossformer: Transformer Utilizing Cross-Dimension Dependency for Multivariate Time Series Forecasting [[ICLR 2023]](https://openreview.net/pdf?id=vSVLM2j9eie)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/Crossformer.py).
+  - [x] **TiDE** - Long-term Forecasting with TiDE: Time-series Dense Encoder [[arXiv 2023]](https://arxiv.org/pdf/2304.08424.pdf) [[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TiDE.py).
+  - [x] **SCINet** - SCINet: Time Series Modeling and Forecasting with Sample Convolution and Interaction [[NeurIPS 2022]](https://openreview.net/pdf?id=AyajSjTAzmg)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/SCINet.py).
+  - [x] **FiLM** - FiLM: Frequency improved Legendre Memory Model for Long-term Time Series Forecasting [[NeurIPS 2022]](https://openreview.net/forum?id=zTQdHSQUQWc)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/FiLM.py).
+  - [x] **TFT** - Temporal Fusion Transformers for Interpretable Multi-horizon Time Series Forecasting [[arXiv 2019]](https://arxiv.org/abs/1912.09363)[[Code]](https://github.com/thuml/Time-Series-Library/blob/main/models/TemporalFusionTransformer.py). 
+ 
+## Usage
+
+1. Install Python 3.8. For convenience, execute the following command.
+
+```
+pip install -r requirements.txt
+```
+
+2. Prepare Data. You can obtain the well pre-processed datasets from [[Google Drive]](https://drive.google.com/drive/folders/13Cg1KYOlzM5C7K8gK8NfC-F3EYxkM3D2?usp=sharing) or [[Baidu Drive]](https://pan.baidu.com/s/1r3KhGd0Q9PJIUZdfEYoymg?pwd=i9iy), Then place the downloaded data in the folder`./dataset`. Here is a summary of supported datasets.
+
+<p align="center">
+<img src=".\pic\dataset.png" height = "200" alt="" align=center />
+</p>
+
+3. Train and evaluate model. We provide the experiment scripts for all benchmarks under the folder `./scripts/`. You can reproduce the experiment results as the following examples:
+
+```
+# long-term forecast
+bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
+# short-term forecast
+bash ./scripts/short_term_forecast/TimesNet_M4.sh
+# imputation
+bash ./scripts/imputation/ETT_script/TimesNet_ETTh1.sh
+# anomaly detection
+bash ./scripts/anomaly_detection/PSM/TimesNet.sh
+# classification
+bash ./scripts/classification/TimesNet.sh
+```
+
+4. Develop your own model.
+
+- Add the model file to the folder `./models`. You can follow the `./models/Transformer.py`.
+- Include the newly added model in the `Exp_Basic.model_dict` of  `./exp/exp_basic.py`.
+- Create the corresponding scripts under the folder `./scripts`.
+
+Note: 
+
+(1) About classification: Since we include all five tasks in a unified code base, the accuracy of each subtask may fluctuate but the average performance can be reproduced (even a bit better). We have provided the reproduced checkpoints [here](https://github.com/thuml/Time-Series-Library/issues/494).
+
+(2) About anomaly detection: Some discussion about the adjustment strategy in anomaly detection can be found [here](https://github.com/thuml/Anomaly-Transformer/issues/14). The key point is that the adjustment strategy corresponds to an event-level metric.
+
+## Citation
+
+If you find this repo useful, please cite our paper.
+
+```
+@inproceedings{wu2023timesnet,
+  title={TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis},
+  author={Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long},
+  booktitle={International Conference on Learning Representations},
+  year={2023},
+}
+
+@article{wang2024tssurvey,
+  title={Deep Time Series Models: A Comprehensive Survey and Benchmark},
+  author={Yuxuan Wang and Haixu Wu and Jiaxiang Dong and Yong Liu and Mingsheng Long and Jianmin Wang},
+  booktitle={arXiv preprint arXiv:2407.13278},
+  year={2024},
+}
+```
+
+## Contact
+If you have any questions or suggestions, feel free to contact our maintenance team:
+
+Current:
+- Haixu Wu (Ph.D. student, wuhx23@mails.tsinghua.edu.cn)
+- Yong Liu (Ph.D. student, liuyong21@mails.tsinghua.edu.cn)
+- Huikun Weng (Undergraduate, wenghk22@mails.tsinghua.edu.cn)
+
+Previous:
+- Yuxuan Wang (Ph.D. student, wangyuxu22@mails.tsinghua.edu.cn)
+- Tengge Hu (Master student, htg21@mails.tsinghua.edu.cn)
+- Haoran Zhang (Master student, z-hr20@mails.tsinghua.edu.cn)
+- Jiawei Guo (Undergraduate, guo-jw21@mails.tsinghua.edu.cn)
+
+Or describe it in Issues.
+
+## Acknowledgement
+
+This library is constructed based on the following repos:
+
+- Forecasting: https://github.com/thuml/Autoformer.
+
+- Anomaly Detection: https://github.com/thuml/Anomaly-Transformer.
+
+- Classification: https://github.com/thuml/Flowformer.
+
+All the experiment datasets are public, and we obtain them from the following links:
+
+- Long-term Forecasting and Imputation: https://github.com/thuml/Autoformer.
+
+- Short-term Forecasting: https://github.com/ServiceNow/N-BEATS.
+
+- Anomaly Detection: https://github.com/thuml/Anomaly-Transformer.
+
+- Classification: https://www.timeseriesclassification.com/.
+
+## All Thanks To Our Contributors
+
+<a href="https://github.com/thuml/Time-Series-Library/graphs/contributors">
+  <img src="https://contrib.rocks/image?repo=thuml/Time-Series-Library" />
+</a>
diff --git a/data_provider/__init__.py b/data_provider/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/data_provider/__init__.py
@@ -0,0 +1 @@
+
diff --git a/data_provider/data_factory.py b/data_provider/data_factory.py
new file mode 100644
index 0000000..7fc458f
--- /dev/null
+++ b/data_provider/data_factory.py
@@ -0,0 +1,86 @@
+from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
+    MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader
+from data_provider.uea import collate_fn
+from torch.utils.data import DataLoader
+
+data_dict = {
+    'ETTh1': Dataset_ETT_hour,
+    'ETTh2': Dataset_ETT_hour,
+    'ETTm1': Dataset_ETT_minute,
+    'ETTm2': Dataset_ETT_minute,
+    'custom': Dataset_Custom,
+    'm4': Dataset_M4,
+    'PSM': PSMSegLoader,
+    'MSL': MSLSegLoader,
+    'SMAP': SMAPSegLoader,
+    'SMD': SMDSegLoader,
+    'SWAT': SWATSegLoader,
+    'UEA': UEAloader
+}
+
+
+def data_provider(args, flag):
+    Data = data_dict[args.data]
+    timeenc = 0 if args.embed != 'timeF' else 1
+
+    shuffle_flag = False if (flag == 'test' or flag == 'TEST') else True
+    drop_last = False
+    batch_size = args.batch_size
+    freq = args.freq
+
+    if args.task_name == 'anomaly_detection':
+        drop_last = False
+        data_set = Data(
+            args = args,
+            root_path=args.root_path,
+            win_size=args.seq_len,
+            flag=flag,
+        )
+        print(flag, len(data_set))
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last)
+        return data_set, data_loader
+    elif args.task_name == 'classification':
+        drop_last = False
+        data_set = Data(
+            args = args,
+            root_path=args.root_path,
+            flag=flag,
+        )
+
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last,
+            collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
+        )
+        return data_set, data_loader
+    else:
+        if args.data == 'm4':
+            drop_last = False
+        data_set = Data(
+            args = args,
+            root_path=args.root_path,
+            data_path=args.data_path,
+            flag=flag,
+            size=[args.seq_len, args.label_len, args.pred_len],
+            features=args.features,
+            target=args.target,
+            timeenc=timeenc,
+            freq=freq,
+            seasonal_patterns=args.seasonal_patterns
+        )
+        print(flag, len(data_set))
+        data_loader = DataLoader(
+            data_set,
+            batch_size=batch_size,
+            shuffle=shuffle_flag,
+            num_workers=args.num_workers,
+            drop_last=drop_last)
+        return data_set, data_loader
diff --git a/data_provider/data_loader.py b/data_provider/data_loader.py
new file mode 100644
index 0000000..dcbea31
--- /dev/null
+++ b/data_provider/data_loader.py
@@ -0,0 +1,748 @@
+import os
+import numpy as np
+import pandas as pd
+import glob
+import re
+import torch
+from torch.utils.data import Dataset, DataLoader
+from sklearn.preprocessing import StandardScaler
+from utils.timefeatures import time_features
+from data_provider.m4 import M4Dataset, M4Meta
+from data_provider.uea import subsample, interpolate_missing, Normalizer
+from sktime.datasets import load_from_tsfile_to_dataframe
+import warnings
+from utils.augmentation import run_augmentation_single
+
+warnings.filterwarnings('ignore')
+
+
+class Dataset_ETT_hour(Dataset):
+    def __init__(self, args, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        self.args = args
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
+        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0) 
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_ETT_minute(Dataset):
+    def __init__(self, args, root_path, flag='train', size=None,
+                 features='S', data_path='ETTm1.csv',
+                 target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        self.args = args
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
+        border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
+            df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0)
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_Custom(Dataset):
+    def __init__(self, args, root_path, flag='train', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
+        # size [seq_len, label_len, pred_len]
+        self.args = args
+        # info
+        if size == None:
+            self.seq_len = 24 * 4 * 4
+            self.label_len = 24 * 4
+            self.pred_len = 24 * 4
+        else:
+            self.seq_len = size[0]
+            self.label_len = size[1]
+            self.pred_len = size[2]
+        # init
+        assert flag in ['train', 'test', 'val']
+        type_map = {'train': 0, 'val': 1, 'test': 2}
+        self.set_type = type_map[flag]
+
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.timeenc = timeenc
+        self.freq = freq
+
+        self.root_path = root_path
+        self.data_path = data_path
+        self.__read_data__()
+
+    def __read_data__(self):
+        self.scaler = StandardScaler()
+        df_raw = pd.read_csv(os.path.join(self.root_path,
+                                          self.data_path))
+
+        '''
+        df_raw.columns: ['date', ...(other features), target feature]
+        '''
+        cols = list(df_raw.columns)
+        cols.remove(self.target)
+        cols.remove('date')
+        df_raw = df_raw[['date'] + cols + [self.target]]
+        num_train = int(len(df_raw) * 0.7)
+        num_test = int(len(df_raw) * 0.2)
+        num_vali = len(df_raw) - num_train - num_test
+        border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
+        border2s = [num_train, num_train + num_vali, len(df_raw)]
+        border1 = border1s[self.set_type]
+        border2 = border2s[self.set_type]
+
+        if self.features == 'M' or self.features == 'MS':
+            cols_data = df_raw.columns[1:]
+            df_data = df_raw[cols_data]
+        elif self.features == 'S':
+            df_data = df_raw[[self.target]]
+
+        if self.scale:
+            train_data = df_data[border1s[0]:border2s[0]]
+            self.scaler.fit(train_data.values)
+            data = self.scaler.transform(df_data.values)
+        else:
+            data = df_data.values
+
+        df_stamp = df_raw[['date']][border1:border2]
+        df_stamp['date'] = pd.to_datetime(df_stamp.date)
+        if self.timeenc == 0:
+            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
+            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
+            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
+            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
+            data_stamp = df_stamp.drop(['date'], 1).values
+        elif self.timeenc == 1:
+            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
+            data_stamp = data_stamp.transpose(1, 0)
+
+        self.data_x = data[border1:border2]
+        self.data_y = data[border1:border2]
+
+        if self.set_type == 0 and self.args.augmentation_ratio > 0:
+            self.data_x, self.data_y, augmentation_tags = run_augmentation_single(self.data_x, self.data_y, self.args)
+
+        self.data_stamp = data_stamp
+
+    def __getitem__(self, index):
+        s_begin = index
+        s_end = s_begin + self.seq_len
+        r_begin = s_end - self.label_len
+        r_end = r_begin + self.label_len + self.pred_len
+
+        seq_x = self.data_x[s_begin:s_end]
+        seq_y = self.data_y[r_begin:r_end]
+        seq_x_mark = self.data_stamp[s_begin:s_end]
+        seq_y_mark = self.data_stamp[r_begin:r_end]
+
+        return seq_x, seq_y, seq_x_mark, seq_y_mark
+
+    def __len__(self):
+        return len(self.data_x) - self.seq_len - self.pred_len + 1
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+
+class Dataset_M4(Dataset):
+    def __init__(self, args, root_path, flag='pred', size=None,
+                 features='S', data_path='ETTh1.csv',
+                 target='OT', scale=False, inverse=False, timeenc=0, freq='15min',
+                 seasonal_patterns='Yearly'):
+        # size [seq_len, label_len, pred_len]
+        # init
+        self.features = features
+        self.target = target
+        self.scale = scale
+        self.inverse = inverse
+        self.timeenc = timeenc
+        self.root_path = root_path
+
+        self.seq_len = size[0]
+        self.label_len = size[1]
+        self.pred_len = size[2]
+
+        self.seasonal_patterns = seasonal_patterns
+        self.history_size = M4Meta.history_size[seasonal_patterns]
+        self.window_sampling_limit = int(self.history_size * self.pred_len)
+        self.flag = flag
+
+        self.__read_data__()
+
+    def __read_data__(self):
+        # M4Dataset.initialize()
+        if self.flag == 'train':
+            dataset = M4Dataset.load(training=True, dataset_file=self.root_path)
+        else:
+            dataset = M4Dataset.load(training=False, dataset_file=self.root_path)
+        training_values = np.array(
+            [v[~np.isnan(v)] for v in
+             dataset.values[dataset.groups == self.seasonal_patterns]])  # split different frequencies
+        self.ids = np.array([i for i in dataset.ids[dataset.groups == self.seasonal_patterns]])
+        self.timeseries = [ts for ts in training_values]
+
+    def __getitem__(self, index):
+        insample = np.zeros((self.seq_len, 1))
+        insample_mask = np.zeros((self.seq_len, 1))
+        outsample = np.zeros((self.pred_len + self.label_len, 1))
+        outsample_mask = np.zeros((self.pred_len + self.label_len, 1))  # m4 dataset
+
+        sampled_timeseries = self.timeseries[index]
+        cut_point = np.random.randint(low=max(1, len(sampled_timeseries) - self.window_sampling_limit),
+                                      high=len(sampled_timeseries),
+                                      size=1)[0]
+
+        insample_window = sampled_timeseries[max(0, cut_point - self.seq_len):cut_point]
+        insample[-len(insample_window):, 0] = insample_window
+        insample_mask[-len(insample_window):, 0] = 1.0
+        outsample_window = sampled_timeseries[
+                           max(0, cut_point - self.label_len):min(len(sampled_timeseries), cut_point + self.pred_len)]
+        outsample[:len(outsample_window), 0] = outsample_window
+        outsample_mask[:len(outsample_window), 0] = 1.0
+        return insample, outsample, insample_mask, outsample_mask
+
+    def __len__(self):
+        return len(self.timeseries)
+
+    def inverse_transform(self, data):
+        return self.scaler.inverse_transform(data)
+
+    def last_insample_window(self):
+        """
+        The last window of insample size of all timeseries.
+        This function does not support batching and does not reshuffle timeseries.
+
+        :return: Last insample window of all timeseries. Shape "timeseries, insample size"
+        """
+        insample = np.zeros((len(self.timeseries), self.seq_len))
+        insample_mask = np.zeros((len(self.timeseries), self.seq_len))
+        for i, ts in enumerate(self.timeseries):
+            ts_last_window = ts[-self.seq_len:]
+            insample[i, -len(ts):] = ts_last_window
+            insample_mask[i, -len(ts):] = 1.0
+        return insample, insample_mask
+
+
+class PSMSegLoader(Dataset):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = pd.read_csv(os.path.join(root_path, 'train.csv'))
+        data = data.values[:, 1:]
+        data = np.nan_to_num(data)
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = pd.read_csv(os.path.join(root_path, 'test.csv'))
+        test_data = test_data.values[:, 1:]
+        test_data = np.nan_to_num(test_data)
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = pd.read_csv(os.path.join(root_path, 'test_label.csv')).values[:, 1:]
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class MSLSegLoader(Dataset):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "MSL_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "MSL_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = np.load(os.path.join(root_path, "MSL_test_label.npy"))
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SMAPSegLoader(Dataset):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "SMAP_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "SMAP_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = np.load(os.path.join(root_path, "SMAP_test_label.npy"))
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SMDSegLoader(Dataset):
+    def __init__(self, args, root_path, win_size, step=100, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+        data = np.load(os.path.join(root_path, "SMD_train.npy"))
+        self.scaler.fit(data)
+        data = self.scaler.transform(data)
+        test_data = np.load(os.path.join(root_path, "SMD_test.npy"))
+        self.test = self.scaler.transform(test_data)
+        self.train = data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = np.load(os.path.join(root_path, "SMD_test_label.npy"))
+
+    def __len__(self):
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class SWATSegLoader(Dataset):
+    def __init__(self, args, root_path, win_size, step=1, flag="train"):
+        self.flag = flag
+        self.step = step
+        self.win_size = win_size
+        self.scaler = StandardScaler()
+
+        train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv'))
+        test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv'))
+        labels = test_data.values[:, -1:]
+        train_data = train_data.values[:, :-1]
+        test_data = test_data.values[:, :-1]
+
+        self.scaler.fit(train_data)
+        train_data = self.scaler.transform(train_data)
+        test_data = self.scaler.transform(test_data)
+        self.train = train_data
+        self.test = test_data
+        data_len = len(self.train)
+        self.val = self.train[(int)(data_len * 0.8):]
+        self.test_labels = labels
+        print("test:", self.test.shape)
+        print("train:", self.train.shape)
+
+    def __len__(self):
+        """
+        Number of images in the object dataset.
+        """
+        if self.flag == "train":
+            return (self.train.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'val'):
+            return (self.val.shape[0] - self.win_size) // self.step + 1
+        elif (self.flag == 'test'):
+            return (self.test.shape[0] - self.win_size) // self.step + 1
+        else:
+            return (self.test.shape[0] - self.win_size) // self.win_size + 1
+
+    def __getitem__(self, index):
+        index = index * self.step
+        if self.flag == "train":
+            return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'val'):
+            return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size])
+        elif (self.flag == 'test'):
+            return np.float32(self.test[index:index + self.win_size]), np.float32(
+                self.test_labels[index:index + self.win_size])
+        else:
+            return np.float32(self.test[
+                              index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32(
+                self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size])
+
+
+class UEAloader(Dataset):
+    """
+    Dataset class for datasets included in:
+        Time Series Classification Archive (www.timeseriesclassification.com)
+    Argument:
+        limit_size: float in (0, 1) for debug
+    Attributes:
+        all_df: (num_samples * seq_len, num_columns) dataframe indexed by integer indices, with multiple rows corresponding to the same index (sample).
+            Each row is a time step; Each column contains either metadata (e.g. timestamp) or a feature.
+        feature_df: (num_samples * seq_len, feat_dim) dataframe; contains the subset of columns of `all_df` which correspond to selected features
+        feature_names: names of columns contained in `feature_df` (same as feature_df.columns)
+        all_IDs: (num_samples,) series of IDs contained in `all_df`/`feature_df` (same as all_df.index.unique() )
+        labels_df: (num_samples, num_labels) pd.DataFrame of label(s) for each sample
+        max_seq_len: maximum sequence (time series) length. If None, script argument `max_seq_len` will be used.
+            (Moreover, script argument overrides this attribute)
+    """
+
+    def __init__(self, args, root_path, file_list=None, limit_size=None, flag=None):
+        self.args = args
+        self.root_path = root_path
+        self.flag = flag
+        self.all_df, self.labels_df = self.load_all(root_path, file_list=file_list, flag=flag)
+        self.all_IDs = self.all_df.index.unique()  # all sample IDs (integer indices 0 ... num_samples-1)
+
+        if limit_size is not None:
+            if limit_size > 1:
+                limit_size = int(limit_size)
+            else:  # interpret as proportion if in (0, 1]
+                limit_size = int(limit_size * len(self.all_IDs))
+            self.all_IDs = self.all_IDs[:limit_size]
+            self.all_df = self.all_df.loc[self.all_IDs]
+
+        # use all features
+        self.feature_names = self.all_df.columns
+        self.feature_df = self.all_df
+
+        # pre_process
+        normalizer = Normalizer()
+        self.feature_df = normalizer.normalize(self.feature_df)
+        print(len(self.all_IDs))
+
+    def load_all(self, root_path, file_list=None, flag=None):
+        """
+        Loads datasets from ts files contained in `root_path` into a dataframe, optionally choosing from `pattern`
+        Args:
+            root_path: directory containing all individual .ts files
+            file_list: optionally, provide a list of file paths within `root_path` to consider.
+                Otherwise, entire `root_path` contents will be used.
+        Returns:
+            all_df: a single (possibly concatenated) dataframe with all data corresponding to specified files
+            labels_df: dataframe containing label(s) for each sample
+        """
+        # Select paths for training and evaluation
+        if file_list is None:
+            data_paths = glob.glob(os.path.join(root_path, '*'))  # list of all paths
+        else:
+            data_paths = [os.path.join(root_path, p) for p in file_list]
+        if len(data_paths) == 0:
+            raise Exception('No files found using: {}'.format(os.path.join(root_path, '*')))
+        if flag is not None:
+            data_paths = list(filter(lambda x: re.search(flag, x), data_paths))
+        input_paths = [p for p in data_paths if os.path.isfile(p) and p.endswith('.ts')]
+        if len(input_paths) == 0:
+            pattern='*.ts'
+            raise Exception("No .ts files found using pattern: '{}'".format(pattern))
+
+        all_df, labels_df = self.load_single(input_paths[0])  # a single file contains dataset
+
+        return all_df, labels_df
+
+    def load_single(self, filepath):
+        df, labels = load_from_tsfile_to_dataframe(filepath, return_separate_X_and_y=True,
+                                                             replace_missing_vals_with='NaN')
+        labels = pd.Series(labels, dtype="category")
+        self.class_names = labels.cat.categories
+        labels_df = pd.DataFrame(labels.cat.codes,
+                                 dtype=np.int8)  # int8-32 gives an error when using nn.CrossEntropyLoss
+
+        lengths = df.applymap(
+            lambda x: len(x)).values  # (num_samples, num_dimensions) array containing the length of each series
+
+        horiz_diffs = np.abs(lengths - np.expand_dims(lengths[:, 0], -1))
+
+        if np.sum(horiz_diffs) > 0:  # if any row (sample) has varying length across dimensions
+            df = df.applymap(subsample)
+
+        lengths = df.applymap(lambda x: len(x)).values
+        vert_diffs = np.abs(lengths - np.expand_dims(lengths[0, :], 0))
+        if np.sum(vert_diffs) > 0:  # if any column (dimension) has varying length across samples
+            self.max_seq_len = int(np.max(lengths[:, 0]))
+        else:
+            self.max_seq_len = lengths[0, 0]
+
+        # First create a (seq_len, feat_dim) dataframe for each sample, indexed by a single integer ("ID" of the sample)
+        # Then concatenate into a (num_samples * seq_len, feat_dim) dataframe, with multiple rows corresponding to the
+        # sample index (i.e. the same scheme as all datasets in this project)
+
+        df = pd.concat((pd.DataFrame({col: df.loc[row, col] for col in df.columns}).reset_index(drop=True).set_index(
+            pd.Series(lengths[row, 0] * [row])) for row in range(df.shape[0])), axis=0)
+
+        # Replace NaN values
+        grp = df.groupby(by=df.index)
+        df = grp.transform(interpolate_missing)
+
+        return df, labels_df
+
+    def instance_norm(self, case):
+        if self.root_path.count('EthanolConcentration') > 0:  # special process for numerical stability
+            mean = case.mean(0, keepdim=True)
+            case = case - mean
+            stdev = torch.sqrt(torch.var(case, dim=1, keepdim=True, unbiased=False) + 1e-5)
+            case /= stdev
+            return case
+        else:
+            return case
+
+    def __getitem__(self, ind):
+        batch_x = self.feature_df.loc[self.all_IDs[ind]].values
+        labels = self.labels_df.loc[self.all_IDs[ind]].values
+        if self.flag == "TRAIN" and self.args.augmentation_ratio > 0:
+            num_samples = len(self.all_IDs)
+            num_columns = self.feature_df.shape[1]
+            seq_len = int(self.feature_df.shape[0] / num_samples)
+            batch_x = batch_x.reshape((1, seq_len, num_columns))
+            batch_x, labels, augmentation_tags = run_augmentation_single(batch_x, labels, self.args)
+
+            batch_x = batch_x.reshape((1 * seq_len, num_columns))
+
+        return self.instance_norm(torch.from_numpy(batch_x)), \
+               torch.from_numpy(labels)
+
+    def __len__(self):
+        return len(self.all_IDs)
diff --git a/data_provider/m4.py b/data_provider/m4.py
new file mode 100644
index 0000000..3ab32c6
--- /dev/null
+++ b/data_provider/m4.py
@@ -0,0 +1,138 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright © 2020 Element AI Inc. All rights reserved.
+
+"""
+M4 Dataset
+"""
+import logging
+import os
+from collections import OrderedDict
+from dataclasses import dataclass
+from glob import glob
+
+import numpy as np
+import pandas as pd
+import patoolib
+from tqdm import tqdm
+import logging
+import os
+import pathlib
+import sys
+from urllib import request
+
+
+def url_file_name(url: str) -> str:
+    """
+    Extract file name from url.
+
+    :param url: URL to extract file name from.
+    :return: File name.
+    """
+    return url.split('/')[-1] if len(url) > 0 else ''
+
+
+def download(url: str, file_path: str) -> None:
+    """
+    Download a file to the given path.
+
+    :param url: URL to download
+    :param file_path: Where to download the content.
+    """
+
+    def progress(count, block_size, total_size):
+        progress_pct = float(count * block_size) / float(total_size) * 100.0
+        sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
+        sys.stdout.flush()
+
+    if not os.path.isfile(file_path):
+        opener = request.build_opener()
+        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
+        request.install_opener(opener)
+        pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
+        f, _ = request.urlretrieve(url, file_path, progress)
+        sys.stdout.write('\n')
+        sys.stdout.flush()
+        file_info = os.stat(f)
+        logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
+    else:
+        file_info = os.stat(file_path)
+        logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
+
+
+@dataclass()
+class M4Dataset:
+    ids: np.ndarray
+    groups: np.ndarray
+    frequencies: np.ndarray
+    horizons: np.ndarray
+    values: np.ndarray
+
+    @staticmethod
+    def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
+        """
+        Load cached dataset.
+
+        :param training: Load training part if training is True, test part otherwise.
+        """
+        info_file = os.path.join(dataset_file, 'M4-info.csv')
+        train_cache_file = os.path.join(dataset_file, 'training.npz')
+        test_cache_file = os.path.join(dataset_file, 'test.npz')
+        m4_info = pd.read_csv(info_file)
+        return M4Dataset(ids=m4_info.M4id.values,
+                         groups=m4_info.SP.values,
+                         frequencies=m4_info.Frequency.values,
+                         horizons=m4_info.Horizon.values,
+                         values=np.load(
+                             train_cache_file if training else test_cache_file,
+                             allow_pickle=True))
+
+
+@dataclass()
+class M4Meta:
+    seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
+    horizons = [6, 8, 18, 13, 14, 48]
+    frequencies = [1, 4, 12, 1, 1, 24]
+    horizons_map = {
+        'Yearly': 6,
+        'Quarterly': 8,
+        'Monthly': 18,
+        'Weekly': 13,
+        'Daily': 14,
+        'Hourly': 48
+    }  # different predict length
+    frequency_map = {
+        'Yearly': 1,
+        'Quarterly': 4,
+        'Monthly': 12,
+        'Weekly': 1,
+        'Daily': 1,
+        'Hourly': 24
+    }
+    history_size = {
+        'Yearly': 1.5,
+        'Quarterly': 1.5,
+        'Monthly': 1.5,
+        'Weekly': 10,
+        'Daily': 10,
+        'Hourly': 10
+    }  # from interpretable.gin
+
+
+def load_m4_info() -> pd.DataFrame:
+    """
+    Load M4Info file.
+
+    :return: Pandas DataFrame of M4Info.
+    """
+    return pd.read_csv(INFO_FILE_PATH)
diff --git a/data_provider/uea.py b/data_provider/uea.py
new file mode 100644
index 0000000..f0dd0ab
--- /dev/null
+++ b/data_provider/uea.py
@@ -0,0 +1,125 @@
+import os
+import numpy as np
+import pandas as pd
+import torch
+
+
+def collate_fn(data, max_len=None):
+    """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
+    Args:
+        data: len(batch_size) list of tuples (X, y).
+            - X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
+            - y: torch tensor of shape (num_labels,) : class indices or numerical targets
+                (for classification or regression, respectively). num_labels > 1 for multi-task models
+        max_len: global fixed sequence length. Used for architectures requiring fixed length input,
+            where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
+    Returns:
+        X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
+        targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
+        target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
+            0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
+        padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
+    """
+
+    batch_size = len(data)
+    features, labels = zip(*data)
+
+    # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
+    lengths = [X.shape[0] for X in features]  # original sequence length for each time series
+    if max_len is None:
+        max_len = max(lengths)
+
+    X = torch.zeros(batch_size, max_len, features[0].shape[-1])  # (batch_size, padded_length, feat_dim)
+    for i in range(batch_size):
+        end = min(lengths[i], max_len)
+        X[i, :end, :] = features[i][:end, :]
+
+    targets = torch.stack(labels, dim=0)  # (batch_size, num_labels)
+
+    padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
+                                 max_len=max_len)  # (batch_size, padded_length) boolean tensor, "1" means keep
+
+    return X, targets, padding_masks
+
+
+def padding_mask(lengths, max_len=None):
+    """
+    Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
+    where 1 means keep element at this position (time step)
+    """
+    batch_size = lengths.numel()
+    max_len = max_len or lengths.max_val()  # trick works because of overloading of 'or' operator for non-boolean types
+    return (torch.arange(0, max_len, device=lengths.device)
+            .type_as(lengths)
+            .repeat(batch_size, 1)
+            .lt(lengths.unsqueeze(1)))
+
+
+class Normalizer(object):
+    """
+    Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
+    """
+
+    def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
+        """
+        Args:
+            norm_type: choose from:
+                "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
+                "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
+            mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
+        """
+
+        self.norm_type = norm_type
+        self.mean = mean
+        self.std = std
+        self.min_val = min_val
+        self.max_val = max_val
+
+    def normalize(self, df):
+        """
+        Args:
+            df: input dataframe
+        Returns:
+            df: normalized dataframe
+        """
+        if self.norm_type == "standardization":
+            if self.mean is None:
+                self.mean = df.mean()
+                self.std = df.std()
+            return (df - self.mean) / (self.std + np.finfo(float).eps)
+
+        elif self.norm_type == "minmax":
+            if self.max_val is None:
+                self.max_val = df.max()
+                self.min_val = df.min()
+            return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
+
+        elif self.norm_type == "per_sample_std":
+            grouped = df.groupby(by=df.index)
+            return (df - grouped.transform('mean')) / grouped.transform('std')
+
+        elif self.norm_type == "per_sample_minmax":
+            grouped = df.groupby(by=df.index)
+            min_vals = grouped.transform('min')
+            return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
+
+        else:
+            raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
+
+
+def interpolate_missing(y):
+    """
+    Replaces NaN values in pd.Series `y` using linear interpolation
+    """
+    if y.isna().any():
+        y = y.interpolate(method='linear', limit_direction='both')
+    return y
+
+
+def subsample(y, limit=256, factor=2):
+    """
+    If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
+    """
+    if len(y) > limit:
+        return y[::factor].reset_index(drop=True)
+    return y
diff --git a/exp/__init__.py b/exp/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/exp/exp_anomaly_detection.py b/exp/exp_anomaly_detection.py
new file mode 100644
index 0000000..0e856c6
--- /dev/null
+++ b/exp/exp_anomaly_detection.py
@@ -0,0 +1,207 @@
+from data_provider.data_factory import data_provider
+from exp.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, adjustment
+from sklearn.metrics import precision_recall_fscore_support
+from sklearn.metrics import accuracy_score
+import torch.multiprocessing
+
+torch.multiprocessing.set_sharing_strategy('file_system')
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Anomaly_Detection(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Anomaly_Detection, self).__init__(args)
+
+    def _build_model(self):
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.MSELoss()
+        return criterion
+
+    def vali(self, vali_data, vali_loader, criterion):
+        total_loss = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, _) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+
+                outputs = self.model(batch_x, None, None, None)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, :, f_dim:]
+                pred = outputs.detach()
+                true = batch_x.detach()
+
+                loss = criterion(pred, true)
+                total_loss.append(loss.item())
+        total_loss = np.average(total_loss)
+        self.model.train()
+        return total_loss
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+        test_data, test_loader = self._get_data(flag='test')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+
+                batch_x = batch_x.float().to(self.device)
+
+                outputs = self.model(batch_x, None, None, None)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, :, f_dim:]
+                loss = criterion(outputs, batch_x)
+                train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                loss.backward()
+                model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(vali_data, vali_loader, criterion)
+            test_loss = self.vali(test_data, test_loader, criterion)
+
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='test')
+        train_data, train_loader = self._get_data(flag='train')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        attens_energy = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        self.anomaly_criterion = nn.MSELoss(reduce=False)
+
+        # (1) stastic on the train set
+        with torch.no_grad():
+            for i, (batch_x, batch_y) in enumerate(train_loader):
+                batch_x = batch_x.float().to(self.device)
+                # reconstruction
+                outputs = self.model(batch_x, None, None, None)
+                # criterion
+                score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
+                score = score.detach().cpu().numpy()
+                attens_energy.append(score)
+
+        attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
+        train_energy = np.array(attens_energy)
+
+        # (2) find the threshold
+        attens_energy = []
+        test_labels = []
+        for i, (batch_x, batch_y) in enumerate(test_loader):
+            batch_x = batch_x.float().to(self.device)
+            # reconstruction
+            outputs = self.model(batch_x, None, None, None)
+            # criterion
+            score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
+            score = score.detach().cpu().numpy()
+            attens_energy.append(score)
+            test_labels.append(batch_y)
+
+        attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
+        test_energy = np.array(attens_energy)
+        combined_energy = np.concatenate([train_energy, test_energy], axis=0)
+        threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio)
+        print("Threshold :", threshold)
+
+        # (3) evaluation on the test set
+        pred = (test_energy > threshold).astype(int)
+        test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
+        test_labels = np.array(test_labels)
+        gt = test_labels.astype(int)
+
+        print("pred:   ", pred.shape)
+        print("gt:     ", gt.shape)
+
+        # (4) detection adjustment
+        gt, pred = adjustment(gt, pred)
+
+        pred = np.array(pred)
+        gt = np.array(gt)
+        print("pred: ", pred.shape)
+        print("gt:   ", gt.shape)
+
+        accuracy = accuracy_score(gt, pred)
+        precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary')
+        print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
+            accuracy, precision,
+            recall, f_score))
+
+        f = open("result_anomaly_detection.txt", 'a')
+        f.write(setting + "  \n")
+        f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
+            accuracy, precision,
+            recall, f_score))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+        return
diff --git a/exp/exp_basic.py b/exp/exp_basic.py
new file mode 100644
index 0000000..7dc9811
--- /dev/null
+++ b/exp/exp_basic.py
@@ -0,0 +1,80 @@
+import os
+import torch
+from models import Autoformer, Transformer, TimesNet, Nonstationary_Transformer, DLinear, FEDformer, \
+    Informer, LightTS, Reformer, ETSformer, Pyraformer, PatchTST, MICN, Crossformer, FiLM, iTransformer, \
+    Koopa, TiDE, FreTS, TimeMixer, TSMixer, SegRNN, MambaSimple, TemporalFusionTransformer, SCINet, PAttn, TimeXer, \
+    WPMixer, MultiPatchFormer, xPatch_SparseChannel
+
+
+class Exp_Basic(object):
+    def __init__(self, args):
+        self.args = args
+        self.model_dict = {
+            'TimesNet': TimesNet,
+            'Autoformer': Autoformer,
+            'Transformer': Transformer,
+            'Nonstationary_Transformer': Nonstationary_Transformer,
+            'DLinear': DLinear,
+            'FEDformer': FEDformer,
+            'Informer': Informer,
+            'LightTS': LightTS,
+            'Reformer': Reformer,
+            'ETSformer': ETSformer,
+            'PatchTST': PatchTST,
+            'Pyraformer': Pyraformer,
+            'MICN': MICN,
+            'Crossformer': Crossformer,
+            'FiLM': FiLM,
+            'iTransformer': iTransformer,
+            'Koopa': Koopa,
+            'TiDE': TiDE,
+            'FreTS': FreTS,
+            'MambaSimple': MambaSimple,
+            'TimeMixer': TimeMixer,
+            'TSMixer': TSMixer,
+            'SegRNN': SegRNN,
+            'TemporalFusionTransformer': TemporalFusionTransformer,
+            "SCINet": SCINet,
+            'PAttn': PAttn,
+            'TimeXer': TimeXer,
+            'WPMixer': WPMixer,
+            'MultiPatchFormer': MultiPatchFormer,
+            'xPatch_SparseChannel': xPatch_SparseChannel
+        }
+        if args.model == 'Mamba':
+            print('Please make sure you have successfully installed mamba_ssm')
+            from models import Mamba
+            self.model_dict['Mamba'] = Mamba
+
+        self.device = self._acquire_device()
+        self.model = self._build_model().to(self.device)
+
+    def _build_model(self):
+        raise NotImplementedError
+        return None
+
+    def _acquire_device(self):
+        if self.args.use_gpu and self.args.gpu_type == 'cuda':
+            os.environ["CUDA_VISIBLE_DEVICES"] = str(
+                self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
+            device = torch.device('cuda:{}'.format(self.args.gpu))
+            print('Use GPU: cuda:{}'.format(self.args.gpu))
+        elif self.args.use_gpu and self.args.gpu_type == 'mps':
+            device = torch.device('mps')
+            print('Use GPU: mps')
+        else:
+            device = torch.device('cpu')
+            print('Use CPU')
+        return device
+
+    def _get_data(self):
+        pass
+
+    def vali(self):
+        pass
+
+    def train(self):
+        pass
+
+    def test(self):
+        pass
diff --git a/exp/exp_classification.py b/exp/exp_classification.py
new file mode 100644
index 0000000..bd8d19f
--- /dev/null
+++ b/exp/exp_classification.py
@@ -0,0 +1,192 @@
+from data_provider.data_factory import data_provider
+from exp.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+import pdb
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Classification(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Classification, self).__init__(args)
+
+    def _build_model(self):
+        # model input depends on data
+        train_data, train_loader = self._get_data(flag='TRAIN')
+        test_data, test_loader = self._get_data(flag='TEST')
+        self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len)
+        self.args.pred_len = 96
+        self.args.enc_in = train_data.feature_df.shape[1]
+        self.args.num_class = len(train_data.class_names)
+        # model init
+        model = self.model_dict[self.args.model].Model(self.args).float()
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.CrossEntropyLoss()
+        return criterion
+
+    def vali(self, vali_data, vali_loader, criterion):
+        total_loss = []
+        preds = []
+        trues = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+                padding_mask = padding_mask.float().to(self.device)
+                label = label.to(self.device)
+
+                outputs = self.model(batch_x, padding_mask, None, None)
+
+                pred = outputs.detach()
+                loss = criterion(pred, label.long().squeeze())
+                total_loss.append(loss.item())
+
+                preds.append(outputs.detach())
+                trues.append(label)
+
+        total_loss = np.average(total_loss)
+
+        preds = torch.cat(preds, 0)
+        trues = torch.cat(trues, 0)
+        probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
+        predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
+        trues = trues.flatten().cpu().numpy()
+        accuracy = cal_accuracy(predictions, trues)
+
+        self.model.train()
+        return total_loss, accuracy
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='TRAIN')
+        vali_data, vali_loader = self._get_data(flag='TEST')
+        test_data, test_loader = self._get_data(flag='TEST')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+
+            for i, (batch_x, label, padding_mask) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+
+                batch_x = batch_x.float().to(self.device)
+                padding_mask = padding_mask.float().to(self.device)
+                label = label.to(self.device)
+
+                outputs = self.model(batch_x, padding_mask, None, None)
+                loss = criterion(outputs, label.long().squeeze(-1))
+                train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                loss.backward()
+                nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
+                model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)
+            # test_loss, test_accuracy = self.vali(test_data, test_loader, criterion)
+
+            print(
+                "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f}" # Test Loss: {5:.3f} Test Acc: {6:.3f}"
+                .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy))
+            # test_loss, test_accuracy))
+            early_stopping(-val_accuracy, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='TEST')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        preds = []
+        trues = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, label, padding_mask) in enumerate(test_loader):
+                batch_x = batch_x.float().to(self.device)
+                padding_mask = padding_mask.float().to(self.device)
+                label = label.to(self.device)
+
+                outputs = self.model(batch_x, padding_mask, None, None)
+
+                preds.append(outputs.detach())
+                trues.append(label)
+
+        preds = torch.cat(preds, 0)
+        trues = torch.cat(trues, 0)
+        print('test shape:', preds.shape, trues.shape)
+
+        probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
+        predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
+        trues = trues.flatten().cpu().numpy()
+        accuracy = cal_accuracy(predictions, trues)
+
+        # result save
+        folder_path = './results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        print('accuracy:{}'.format(accuracy))
+        file_name='result_classification.txt'
+        f = open(os.path.join(folder_path,file_name), 'a')
+        f.write(setting + "  \n")
+        f.write('accuracy:{}'.format(accuracy))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+        return
diff --git a/exp/exp_imputation.py b/exp/exp_imputation.py
new file mode 100644
index 0000000..efc9054
--- /dev/null
+++ b/exp/exp_imputation.py
@@ -0,0 +1,228 @@
+from data_provider.data_factory import data_provider
+from exp.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, visual
+from utils.metrics import metric
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Imputation(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Imputation, self).__init__(args)
+
+    def _build_model(self):
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.MSELoss()
+        return criterion
+
+    def vali(self, vali_data, vali_loader, criterion):
+        total_loss = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_x_mark = batch_x_mark.float().to(self.device)
+
+                # random mask
+                B, T, N = batch_x.shape
+                """
+                B = batch size
+                T = seq len
+                N = number of features
+                """
+                mask = torch.rand((B, T, N)).to(self.device)
+                mask[mask <= self.args.mask_rate] = 0  # masked
+                mask[mask > self.args.mask_rate] = 1  # remained
+                inp = batch_x.masked_fill(mask == 0, 0)
+
+                outputs = self.model(inp, batch_x_mark, None, None, mask)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, :, f_dim:]
+
+                # add support for MS
+                batch_x = batch_x[:, :, f_dim:]
+                mask = mask[:, :, f_dim:]
+
+                pred = outputs.detach()
+                true = batch_x.detach()
+                mask = mask.detach()
+
+                loss = criterion(pred[mask == 0], true[mask == 0])
+                total_loss.append(loss.item())
+        total_loss = np.average(total_loss)
+        self.model.train()
+        return total_loss
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+        test_data, test_loader = self._get_data(flag='test')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+
+                batch_x = batch_x.float().to(self.device)
+                batch_x_mark = batch_x_mark.float().to(self.device)
+
+                # random mask
+                B, T, N = batch_x.shape
+                mask = torch.rand((B, T, N)).to(self.device)
+                mask[mask <= self.args.mask_rate] = 0  # masked
+                mask[mask > self.args.mask_rate] = 1  # remained
+                inp = batch_x.masked_fill(mask == 0, 0)
+
+                outputs = self.model(inp, batch_x_mark, None, None, mask)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, :, f_dim:]
+
+                # add support for MS
+                batch_x = batch_x[:, :, f_dim:]
+                mask = mask[:, :, f_dim:]
+
+                loss = criterion(outputs[mask == 0], batch_x[mask == 0])
+                train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                loss.backward()
+                model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(vali_data, vali_loader, criterion)
+            test_loss = self.vali(test_data, test_loader, criterion)
+
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='test')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        preds = []
+        trues = []
+        masks = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_x_mark = batch_x_mark.float().to(self.device)
+
+                # random mask
+                B, T, N = batch_x.shape
+                mask = torch.rand((B, T, N)).to(self.device)
+                mask[mask <= self.args.mask_rate] = 0  # masked
+                mask[mask > self.args.mask_rate] = 1  # remained
+                inp = batch_x.masked_fill(mask == 0, 0)
+
+                # imputation
+                outputs = self.model(inp, batch_x_mark, None, None, mask)
+
+                # eval
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, :, f_dim:]
+
+                # add support for MS 
+                batch_x = batch_x[:, :, f_dim:]
+                mask = mask[:, :, f_dim:]
+
+                outputs = outputs.detach().cpu().numpy()
+                pred = outputs
+                true = batch_x.detach().cpu().numpy()
+                preds.append(pred)
+                trues.append(true)
+                masks.append(mask.detach().cpu())
+
+                if i % 20 == 0:
+                    filled = true[0, :, -1].copy()
+                    filled = filled * mask[0, :, -1].detach().cpu().numpy() + \
+                             pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy())
+                    visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))
+
+        preds = np.concatenate(preds, 0)
+        trues = np.concatenate(trues, 0)
+        masks = np.concatenate(masks, 0)
+        print('test shape:', preds.shape, trues.shape)
+
+        # result save
+        folder_path = './results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])
+        print('mse:{}, mae:{}'.format(mse, mae))
+        f = open("result_imputation.txt", 'a')
+        f.write(setting + "  \n")
+        f.write('mse:{}, mae:{}'.format(mse, mae))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+
+        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
+        np.save(folder_path + 'pred.npy', preds)
+        np.save(folder_path + 'true.npy', trues)
+        return
diff --git a/exp/exp_long_term_forecasting.py b/exp/exp_long_term_forecasting.py
new file mode 100644
index 0000000..63ed56c
--- /dev/null
+++ b/exp/exp_long_term_forecasting.py
@@ -0,0 +1,268 @@
+from data_provider.data_factory import data_provider
+from exp.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, visual
+from utils.metrics import metric
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+from utils.dtw_metric import dtw, accelerated_dtw
+from utils.augmentation import run_augmentation, run_augmentation_single
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Long_Term_Forecast(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Long_Term_Forecast, self).__init__(args)
+
+    def _build_model(self):
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self):
+        criterion = nn.MSELoss()
+        return criterion
+ 
+
+    def vali(self, vali_data, vali_loader, criterion):
+        total_loss = []
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float()
+
+                batch_x_mark = batch_x_mark.float().to(self.device)
+                batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+
+                pred = outputs.detach()
+                true = batch_y.detach()
+
+                loss = criterion(pred, true)
+
+                total_loss.append(loss.item())
+        total_loss = np.average(total_loss)
+        self.model.train()
+        return total_loss
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+        test_data, test_loader = self._get_data(flag='test')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion()
+
+        if self.args.use_amp:
+            scaler = torch.cuda.amp.GradScaler()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float().to(self.device)
+                batch_x_mark = batch_x_mark.float().to(self.device)
+                batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                        f_dim = -1 if self.args.features == 'MS' else 0
+                        outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                        loss = criterion(outputs, batch_y)
+                        train_loss.append(loss.item())
+                else:
+                    outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                    f_dim = -1 if self.args.features == 'MS' else 0
+                    outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+                    loss = criterion(outputs, batch_y)
+                    train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                if self.args.use_amp:
+                    scaler.scale(loss).backward()
+                    scaler.step(model_optim)
+                    scaler.update()
+                else:
+                    loss.backward()
+                    model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(vali_data, vali_loader, criterion)
+            test_loss = self.vali(test_data, test_loader, criterion)
+
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def test(self, setting, test=0):
+        test_data, test_loader = self._get_data(flag='test')
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        preds = []
+        trues = []
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
+                batch_x = batch_x.float().to(self.device)
+                batch_y = batch_y.float().to(self.device)
+
+                batch_x_mark = batch_x_mark.float().to(self.device)
+                batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+                # encoder - decoder
+                if self.args.use_amp:
+                    with torch.cuda.amp.autocast():
+                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+                else:
+                    outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, :]
+                batch_y = batch_y[:, -self.args.pred_len:, :].to(self.device)
+                outputs = outputs.detach().cpu().numpy()
+                batch_y = batch_y.detach().cpu().numpy()
+                if test_data.scale and self.args.inverse:
+                    shape = batch_y.shape
+                    if outputs.shape[-1] != batch_y.shape[-1]:
+                        outputs = np.tile(outputs, [1, 1, int(batch_y.shape[-1] / outputs.shape[-1])])
+                    outputs = test_data.inverse_transform(outputs.reshape(shape[0] * shape[1], -1)).reshape(shape)
+                    batch_y = test_data.inverse_transform(batch_y.reshape(shape[0] * shape[1], -1)).reshape(shape)
+
+                outputs = outputs[:, :, f_dim:]
+                batch_y = batch_y[:, :, f_dim:]
+
+                pred = outputs
+                true = batch_y
+
+                preds.append(pred)
+                trues.append(true)
+                if i % 20 == 0:
+                    input = batch_x.detach().cpu().numpy()
+                    if test_data.scale and self.args.inverse:
+                        shape = input.shape
+                        input = test_data.inverse_transform(input.reshape(shape[0] * shape[1], -1)).reshape(shape)
+                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
+                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
+                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
+
+        preds = np.concatenate(preds, axis=0)
+        trues = np.concatenate(trues, axis=0)
+        print('test shape:', preds.shape, trues.shape)
+        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
+        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
+        print('test shape:', preds.shape, trues.shape)
+
+        # result save
+        folder_path = './results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        # dtw calculation
+        if self.args.use_dtw:
+            dtw_list = []
+            manhattan_distance = lambda x, y: np.abs(x - y)
+            for i in range(preds.shape[0]):
+                x = preds[i].reshape(-1, 1)
+                y = trues[i].reshape(-1, 1)
+                if i % 100 == 0:
+                    print("calculating dtw iter:", i)
+                d, _, _, _ = accelerated_dtw(x, y, dist=manhattan_distance)
+                dtw_list.append(d)
+            dtw = np.array(dtw_list).mean()
+        else:
+            dtw = 'Not calculated'
+
+        mae, mse, rmse, mape, mspe = metric(preds, trues)
+        print('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
+        f = open("result_long_term_forecast.txt", 'a')
+        f.write(setting + "  \n")
+        f.write('mse:{}, mae:{}, dtw:{}'.format(mse, mae, dtw))
+        f.write('\n')
+        f.write('\n')
+        f.close()
+
+        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
+        np.save(folder_path + 'pred.npy', preds)
+        np.save(folder_path + 'true.npy', trues)
+
+        return
diff --git a/exp/exp_short_term_forecasting.py b/exp/exp_short_term_forecasting.py
new file mode 100644
index 0000000..90a0d8b
--- /dev/null
+++ b/exp/exp_short_term_forecasting.py
@@ -0,0 +1,235 @@
+from data_provider.data_factory import data_provider
+from data_provider.m4 import M4Meta
+from exp.exp_basic import Exp_Basic
+from utils.tools import EarlyStopping, adjust_learning_rate, visual
+from utils.losses import mape_loss, mase_loss, smape_loss
+from utils.m4_summary import M4Summary
+import torch
+import torch.nn as nn
+from torch import optim
+import os
+import time
+import warnings
+import numpy as np
+import pandas
+
+warnings.filterwarnings('ignore')
+
+
+class Exp_Short_Term_Forecast(Exp_Basic):
+    def __init__(self, args):
+        super(Exp_Short_Term_Forecast, self).__init__(args)
+
+    def _build_model(self):
+        if self.args.data == 'm4':
+            self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns]  # Up to M4 config
+            self.args.seq_len = 2 * self.args.pred_len  # input_len = 2*pred_len
+            self.args.label_len = self.args.pred_len
+            self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns]
+        model = self.model_dict[self.args.model].Model(self.args).float()
+
+        if self.args.use_multi_gpu and self.args.use_gpu:
+            model = nn.DataParallel(model, device_ids=self.args.device_ids)
+        return model
+
+    def _get_data(self, flag):
+        data_set, data_loader = data_provider(self.args, flag)
+        return data_set, data_loader
+
+    def _select_optimizer(self):
+        model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
+        return model_optim
+
+    def _select_criterion(self, loss_name='MSE'):
+        if loss_name == 'MSE':
+            return nn.MSELoss()
+        elif loss_name == 'MAPE':
+            return mape_loss()
+        elif loss_name == 'MASE':
+            return mase_loss()
+        elif loss_name == 'SMAPE':
+            return smape_loss()
+
+    def train(self, setting):
+        train_data, train_loader = self._get_data(flag='train')
+        vali_data, vali_loader = self._get_data(flag='val')
+
+        path = os.path.join(self.args.checkpoints, setting)
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        time_now = time.time()
+
+        train_steps = len(train_loader)
+        early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
+
+        model_optim = self._select_optimizer()
+        criterion = self._select_criterion(self.args.loss)
+        mse = nn.MSELoss()
+
+        for epoch in range(self.args.train_epochs):
+            iter_count = 0
+            train_loss = []
+
+            self.model.train()
+            epoch_time = time.time()
+            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
+                iter_count += 1
+                model_optim.zero_grad()
+                batch_x = batch_x.float().to(self.device)
+
+                batch_y = batch_y.float().to(self.device)
+                batch_y_mark = batch_y_mark.float().to(self.device)
+
+                # decoder input
+                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
+                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
+
+                outputs = self.model(batch_x, None, dec_inp, None)
+
+                f_dim = -1 if self.args.features == 'MS' else 0
+                outputs = outputs[:, -self.args.pred_len:, f_dim:]
+                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
+
+                batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device)
+                loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark)
+                loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :]))
+                loss = loss_value  # + loss_sharpness * 1e-5
+                train_loss.append(loss.item())
+
+                if (i + 1) % 100 == 0:
+                    print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
+                    speed = (time.time() - time_now) / iter_count
+                    left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
+                    print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
+                    iter_count = 0
+                    time_now = time.time()
+
+                loss.backward()
+                model_optim.step()
+
+            print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
+            train_loss = np.average(train_loss)
+            vali_loss = self.vali(train_loader, vali_loader, criterion)
+            test_loss = vali_loss
+            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
+                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
+            early_stopping(vali_loss, self.model, path)
+            if early_stopping.early_stop:
+                print("Early stopping")
+                break
+
+            adjust_learning_rate(model_optim, epoch + 1, self.args)
+
+        best_model_path = path + '/' + 'checkpoint.pth'
+        self.model.load_state_dict(torch.load(best_model_path))
+
+        return self.model
+
+    def vali(self, train_loader, vali_loader, criterion):
+        x, _ = train_loader.dataset.last_insample_window()
+        y = vali_loader.dataset.timeseries
+        x = torch.tensor(x, dtype=torch.float32).to(self.device)
+        x = x.unsqueeze(-1)
+
+        self.model.eval()
+        with torch.no_grad():
+            # decoder input
+            B, _, C = x.shape
+            dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
+            dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
+            # encoder - decoder
+            outputs = torch.zeros((B, self.args.pred_len, C)).float()  # .to(self.device)
+            id_list = np.arange(0, B, 500)  # validation set size
+            id_list = np.append(id_list, B)
+            for i in range(len(id_list) - 1):
+                outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None,
+                                                                      dec_inp[id_list[i]:id_list[i + 1]],
+                                                                      None).detach().cpu()
+            f_dim = -1 if self.args.features == 'MS' else 0
+            outputs = outputs[:, -self.args.pred_len:, f_dim:]
+            pred = outputs
+            true = torch.from_numpy(np.array(y))
+            batch_y_mark = torch.ones(true.shape)
+
+            loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark)
+
+        self.model.train()
+        return loss
+
+    def test(self, setting, test=0):
+        _, train_loader = self._get_data(flag='train')
+        _, test_loader = self._get_data(flag='test')
+        x, _ = train_loader.dataset.last_insample_window()
+        y = test_loader.dataset.timeseries
+        x = torch.tensor(x, dtype=torch.float32).to(self.device)
+        x = x.unsqueeze(-1)
+
+        if test:
+            print('loading model')
+            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
+
+        folder_path = './test_results/' + setting + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        self.model.eval()
+        with torch.no_grad():
+            B, _, C = x.shape
+            dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
+            dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
+            # encoder - decoder
+            outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
+            id_list = np.arange(0, B, 1)
+            id_list = np.append(id_list, B)
+            for i in range(len(id_list) - 1):
+                outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x[id_list[i]:id_list[i + 1]], None,
+                                                                      dec_inp[id_list[i]:id_list[i + 1]], None)
+
+                if id_list[i] % 1000 == 0:
+                    print(id_list[i])
+
+            f_dim = -1 if self.args.features == 'MS' else 0
+            outputs = outputs[:, -self.args.pred_len:, f_dim:]
+            outputs = outputs.detach().cpu().numpy()
+
+            preds = outputs
+            trues = y
+            x = x.detach().cpu().numpy()
+
+            for i in range(0, preds.shape[0], preds.shape[0] // 10):
+                gt = np.concatenate((x[i, :, 0], trues[i]), axis=0)
+                pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0)
+                visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
+
+        print('test shape:', preds.shape)
+
+        # result save
+        folder_path = './m4_results/' + self.args.model + '/'
+        if not os.path.exists(folder_path):
+            os.makedirs(folder_path)
+
+        forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)])
+        forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
+        forecasts_df.index.name = 'id'
+        forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
+        forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv')
+
+        print(self.args.model)
+        file_path = './m4_results/' + self.args.model + '/'
+        if 'Weekly_forecast.csv' in os.listdir(file_path) \
+                and 'Monthly_forecast.csv' in os.listdir(file_path) \
+                and 'Yearly_forecast.csv' in os.listdir(file_path) \
+                and 'Daily_forecast.csv' in os.listdir(file_path) \
+                and 'Hourly_forecast.csv' in os.listdir(file_path) \
+                and 'Quarterly_forecast.csv' in os.listdir(file_path):
+            m4_summary = M4Summary(file_path, self.args.root_path)
+            # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
+            smape_results, owa_results, mape, mase = m4_summary.evaluate()
+            print('smape:', smape_results)
+            print('mape:', mape)
+            print('mase:', mase)
+            print('owa:', owa_results)
+        else:
+            print('After all 6 tasks are finished, you can calculate the averaged index')
+        return
diff --git a/layers/AutoCorrelation.py b/layers/AutoCorrelation.py
new file mode 100644
index 0000000..c566461
--- /dev/null
+++ b/layers/AutoCorrelation.py
@@ -0,0 +1,163 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+import numpy as np
+import math
+from math import sqrt
+import os
+
+
+class AutoCorrelation(nn.Module):
+    """
+    AutoCorrelation Mechanism with the following two phases:
+    (1) period-based dependencies discovery
+    (2) time delay aggregation
+    This block can replace the self-attention family mechanism seamlessly.
+    """
+
+    def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
+        super(AutoCorrelation, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def time_delay_agg_training(self, values, corr):
+        """
+        SpeedUp version of Autocorrelation (a batch-normalization style design)
+        This is for the training phase.
+        """
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        # find top k
+        top_k = int(self.factor * math.log(length))
+        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
+        index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
+        weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
+        # update corr
+        tmp_corr = torch.softmax(weights, dim=-1)
+        # aggregation
+        tmp_values = values
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            pattern = torch.roll(tmp_values, -int(index[i]), -1)
+            delays_agg = delays_agg + pattern * \
+                         (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
+        return delays_agg
+
+    def time_delay_agg_inference(self, values, corr):
+        """
+        SpeedUp version of Autocorrelation (a batch-normalization style design)
+        This is for the inference phase.
+        """
+        batch = values.shape[0]
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        # index init
+        init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device)
+        # find top k
+        top_k = int(self.factor * math.log(length))
+        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
+        weights, delay = torch.topk(mean_value, top_k, dim=-1)
+        # update corr
+        tmp_corr = torch.softmax(weights, dim=-1)
+        # aggregation
+        tmp_values = values.repeat(1, 1, 1, 2)
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
+            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
+            delays_agg = delays_agg + pattern * \
+                         (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
+        return delays_agg
+
+    def time_delay_agg_full(self, values, corr):
+        """
+        Standard version of Autocorrelation
+        """
+        batch = values.shape[0]
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        # index init
+        init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).to(values.device)
+        # find top k
+        top_k = int(self.factor * math.log(length))
+        weights, delay = torch.topk(corr, top_k, dim=-1)
+        # update corr
+        tmp_corr = torch.softmax(weights, dim=-1)
+        # aggregation
+        tmp_values = values.repeat(1, 1, 1, 2)
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            tmp_delay = init_index + delay[..., i].unsqueeze(-1)
+            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
+            delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
+        return delays_agg
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        if L > S:
+            zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
+            values = torch.cat([values, zeros], dim=1)
+            keys = torch.cat([keys, zeros], dim=1)
+        else:
+            values = values[:, :L, :, :]
+            keys = keys[:, :L, :, :]
+
+        # period-based dependencies
+        q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
+        k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
+        res = q_fft * torch.conj(k_fft)
+        corr = torch.fft.irfft(res, dim=-1)
+
+        # time delay agg
+        if self.training:
+            V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
+        else:
+            V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
+
+        if self.output_attention:
+            return (V.contiguous(), corr.permute(0, 3, 1, 2))
+        else:
+            return (V.contiguous(), None)
+
+
+class AutoCorrelationLayer(nn.Module):
+    def __init__(self, correlation, d_model, n_heads, d_keys=None,
+                 d_values=None):
+        super(AutoCorrelationLayer, self).__init__()
+
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+
+        self.inner_correlation = correlation
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+
+        out, attn = self.inner_correlation(
+            queries,
+            keys,
+            values,
+            attn_mask
+        )
+        out = out.view(B, L, -1)
+
+        return self.out_projection(out), attn
diff --git a/layers/Autoformer_EncDec.py b/layers/Autoformer_EncDec.py
new file mode 100644
index 0000000..6fce4bc
--- /dev/null
+++ b/layers/Autoformer_EncDec.py
@@ -0,0 +1,203 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class my_Layernorm(nn.Module):
+    """
+    Special designed layernorm for the seasonal part
+    """
+
+    def __init__(self, channels):
+        super(my_Layernorm, self).__init__()
+        self.layernorm = nn.LayerNorm(channels)
+
+    def forward(self, x):
+        x_hat = self.layernorm(x)
+        bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
+        return x_hat - bias
+
+
+class moving_avg(nn.Module):
+    """
+    Moving average block to highlight the trend of time series
+    """
+
+    def __init__(self, kernel_size, stride):
+        super(moving_avg, self).__init__()
+        self.kernel_size = kernel_size
+        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
+
+    def forward(self, x):
+        # padding on the both ends of time series
+        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        x = torch.cat([front, x, end], dim=1)
+        x = self.avg(x.permute(0, 2, 1))
+        x = x.permute(0, 2, 1)
+        return x
+
+
+class series_decomp(nn.Module):
+    """
+    Series decomposition block
+    """
+
+    def __init__(self, kernel_size):
+        super(series_decomp, self).__init__()
+        self.moving_avg = moving_avg(kernel_size, stride=1)
+
+    def forward(self, x):
+        moving_mean = self.moving_avg(x)
+        res = x - moving_mean
+        return res, moving_mean
+
+
+class series_decomp_multi(nn.Module):
+    """
+    Multiple Series decomposition block from FEDformer
+    """
+
+    def __init__(self, kernel_size):
+        super(series_decomp_multi, self).__init__()
+        self.kernel_size = kernel_size
+        self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
+
+    def forward(self, x):
+        moving_mean = []
+        res = []
+        for func in self.series_decomp:
+            sea, moving_avg = func(x)
+            moving_mean.append(moving_avg)
+            res.append(sea)
+
+        sea = sum(res) / len(res)
+        moving_mean = sum(moving_mean) / len(moving_mean)
+        return sea, moving_mean
+
+
+class EncoderLayer(nn.Module):
+    """
+    Autoformer encoder layer with the progressive decomposition architecture
+    """
+
+    def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
+        self.decomp1 = series_decomp(moving_avg)
+        self.decomp2 = series_decomp(moving_avg)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, attn_mask=None):
+        new_x, attn = self.attention(
+            x, x, x,
+            attn_mask=attn_mask
+        )
+        x = x + self.dropout(new_x)
+        x, _ = self.decomp1(x)
+        y = x
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        res, _ = self.decomp2(x + y)
+        return res, attn
+
+
+class Encoder(nn.Module):
+    """
+    Autoformer encoder
+    """
+
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None):
+        attns = []
+        if self.conv_layers is not None:
+            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                attns.append(attn)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        return x, attns
+
+
+class DecoderLayer(nn.Module):
+    """
+    Autoformer decoder layer with the progressive decomposition architecture
+    """
+
+    def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
+                 moving_avg=25, dropout=0.1, activation="relu"):
+        super(DecoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
+        self.decomp1 = series_decomp(moving_avg)
+        self.decomp2 = series_decomp(moving_avg)
+        self.decomp3 = series_decomp(moving_avg)
+        self.dropout = nn.Dropout(dropout)
+        self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
+                                    padding_mode='circular', bias=False)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None):
+        x = x + self.dropout(self.self_attention(
+            x, x, x,
+            attn_mask=x_mask
+        )[0])
+        x, trend1 = self.decomp1(x)
+        x = x + self.dropout(self.cross_attention(
+            x, cross, cross,
+            attn_mask=cross_mask
+        )[0])
+        x, trend2 = self.decomp2(x)
+        y = x
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        x, trend3 = self.decomp3(x + y)
+
+        residual_trend = trend1 + trend2 + trend3
+        residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
+        return x, residual_trend
+
+
+class Decoder(nn.Module):
+    """
+    Autoformer encoder
+    """
+
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Decoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
+        for layer in self.layers:
+            x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
+            trend = trend + residual_trend
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if self.projection is not None:
+            x = self.projection(x)
+        return x, trend
diff --git a/layers/Conv_Blocks.py b/layers/Conv_Blocks.py
new file mode 100644
index 0000000..8eddfa3
--- /dev/null
+++ b/layers/Conv_Blocks.py
@@ -0,0 +1,60 @@
+import torch
+import torch.nn as nn
+
+
+class Inception_Block_V1(nn.Module):
+    def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
+        super(Inception_Block_V1, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_kernels = num_kernels
+        kernels = []
+        for i in range(self.num_kernels):
+            kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
+        self.kernels = nn.ModuleList(kernels)
+        if init_weight:
+            self._initialize_weights()
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        res_list = []
+        for i in range(self.num_kernels):
+            res_list.append(self.kernels[i](x))
+        res = torch.stack(res_list, dim=-1).mean(-1)
+        return res
+
+
+class Inception_Block_V2(nn.Module):
+    def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
+        super(Inception_Block_V2, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_kernels = num_kernels
+        kernels = []
+        for i in range(self.num_kernels // 2):
+            kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1]))
+            kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0]))
+        kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
+        self.kernels = nn.ModuleList(kernels)
+        if init_weight:
+            self._initialize_weights()
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        res_list = []
+        for i in range(self.num_kernels // 2 * 2 + 1):
+            res_list.append(self.kernels[i](x))
+        res = torch.stack(res_list, dim=-1).mean(-1)
+        return res
diff --git a/layers/Crossformer_EncDec.py b/layers/Crossformer_EncDec.py
new file mode 100644
index 0000000..42fc322
--- /dev/null
+++ b/layers/Crossformer_EncDec.py
@@ -0,0 +1,131 @@
+import torch
+import torch.nn as nn
+from einops import rearrange, repeat
+from layers.SelfAttention_Family import TwoStageAttentionLayer
+
+
+class SegMerging(nn.Module):
+    def __init__(self, d_model, win_size, norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.d_model = d_model
+        self.win_size = win_size
+        self.linear_trans = nn.Linear(win_size * d_model, d_model)
+        self.norm = norm_layer(win_size * d_model)
+
+    def forward(self, x):
+        batch_size, ts_d, seg_num, d_model = x.shape
+        pad_num = seg_num % self.win_size
+        if pad_num != 0:
+            pad_num = self.win_size - pad_num
+            x = torch.cat((x, x[:, :, -pad_num:, :]), dim=-2)
+
+        seg_to_merge = []
+        for i in range(self.win_size):
+            seg_to_merge.append(x[:, :, i::self.win_size, :])
+        x = torch.cat(seg_to_merge, -1)
+
+        x = self.norm(x)
+        x = self.linear_trans(x)
+
+        return x
+
+
+class scale_block(nn.Module):
+    def __init__(self, configs, win_size, d_model, n_heads, d_ff, depth, dropout, \
+                 seg_num=10, factor=10):
+        super(scale_block, self).__init__()
+
+        if win_size > 1:
+            self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm)
+        else:
+            self.merge_layer = None
+
+        self.encode_layers = nn.ModuleList()
+
+        for i in range(depth):
+            self.encode_layers.append(TwoStageAttentionLayer(configs, seg_num, factor, d_model, n_heads, \
+                                                             d_ff, dropout))
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        _, ts_dim, _, _ = x.shape
+
+        if self.merge_layer is not None:
+            x = self.merge_layer(x)
+
+        for layer in self.encode_layers:
+            x = layer(x)
+
+        return x, None
+
+
+class Encoder(nn.Module):
+    def __init__(self, attn_layers):
+        super(Encoder, self).__init__()
+        self.encode_blocks = nn.ModuleList(attn_layers)
+
+    def forward(self, x):
+        encode_x = []
+        encode_x.append(x)
+
+        for block in self.encode_blocks:
+            x, attns = block(x)
+            encode_x.append(x)
+
+        return encode_x, None
+
+
+class DecoderLayer(nn.Module):
+    def __init__(self, self_attention, cross_attention, seg_len, d_model, d_ff=None, dropout=0.1):
+        super(DecoderLayer, self).__init__()
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model),
+                                  nn.GELU(),
+                                  nn.Linear(d_model, d_model))
+        self.linear_pred = nn.Linear(d_model, seg_len)
+
+    def forward(self, x, cross):
+        batch = x.shape[0]
+        x = self.self_attention(x)
+        x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model')
+
+        cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model')
+        tmp, attn = self.cross_attention(x, cross, cross, None, None, None,)
+        x = x + self.dropout(tmp)
+        y = x = self.norm1(x)
+        y = self.MLP1(y)
+        dec_output = self.norm2(x + y)
+
+        dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b=batch)
+        layer_predict = self.linear_pred(dec_output)
+        layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len')
+
+        return dec_output, layer_predict
+
+
+class Decoder(nn.Module):
+    def __init__(self, layers):
+        super(Decoder, self).__init__()
+        self.decode_layers = nn.ModuleList(layers)
+
+
+    def forward(self, x, cross):
+        final_predict = None
+        i = 0
+
+        ts_d = x.shape[1]
+        for layer in self.decode_layers:
+            cross_enc = cross[i]
+            x, layer_predict = layer(x, cross_enc)
+            if final_predict is None:
+                final_predict = layer_predict
+            else:
+                final_predict = final_predict + layer_predict
+            i += 1
+
+        final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d=ts_d)
+
+        return final_predict
diff --git a/layers/DECOMP.py b/layers/DECOMP.py
new file mode 100644
index 0000000..e10a015
--- /dev/null
+++ b/layers/DECOMP.py
@@ -0,0 +1,22 @@
+import torch
+from torch import nn
+from layers.EMA import EMA
+from layers.DEMA import DEMA
+
+class DECOMP(nn.Module):
+    """
+    Series decomposition block using EMA/DEMA
+    """
+    def __init__(self, ma_type, alpha, beta):
+        super(DECOMP, self).__init__()
+        if ma_type == 'ema':
+            self.ma = EMA(alpha)
+        elif ma_type == 'dema':
+            self.ma = DEMA(alpha, beta)
+        else:
+            raise ValueError(f"Unsupported ma_type: {ma_type}. Use 'ema' or 'dema'")
+
+    def forward(self, x):
+        moving_average = self.ma(x)
+        res = x - moving_average
+        return res, moving_average
\ No newline at end of file
diff --git a/layers/DEMA.py b/layers/DEMA.py
new file mode 100644
index 0000000..963f41f
--- /dev/null
+++ b/layers/DEMA.py
@@ -0,0 +1,23 @@
+import torch
+from torch import nn
+
+class DEMA(nn.Module):
+    """
+    Double Exponential Moving Average (DEMA) block to highlight the trend of time series
+    """
+    def __init__(self, alpha, beta):
+        super(DEMA, self).__init__()
+        self.alpha = alpha.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
+        self.beta = beta.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
+
+    def forward(self, x):
+        s_prev = x[:, 0, :]
+        b = x[:, 1, :] - s_prev
+        res = [s_prev.unsqueeze(1)]
+        for t in range(1, x.shape[1]):
+            xt = x[:, t, :]
+            s = self.alpha * xt + (1 - self.alpha) * (s_prev + b)
+            b = self.beta * (s - s_prev) + (1 - self.beta) * b
+            s_prev = s
+            res.append(s.unsqueeze(1))
+        return torch.cat(res, dim=1)
\ No newline at end of file
diff --git a/layers/DWT_Decomposition.py b/layers/DWT_Decomposition.py
new file mode 100644
index 0000000..d21e9b1
--- /dev/null
+++ b/layers/DWT_Decomposition.py
@@ -0,0 +1,1268 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jan  5
+@author: Murad
+SISLab, USF
+mmurad@usf.edu
+https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer
+"""
+
+import torch
+import torch.nn as nn
+import pywt
+import numpy as np
+import torch.nn.functional as F
+from torch.autograd import Function
+
+
+class Decomposition(nn.Module):
+    def __init__(self,
+                 input_length=[],
+                 pred_length=[],
+                 wavelet_name=[],
+                 level=[],
+                 batch_size=[],
+                 channel=[],
+                 d_model=[],
+                 tfactor=[],
+                 dfactor=[],
+                 device=[],
+                 no_decomposition=[],
+                 use_amp=[]):
+        super(Decomposition, self).__init__()
+        self.input_length = input_length
+        self.pred_length = pred_length
+        self.wavelet_name = wavelet_name
+        self.level = level
+        self.batch_size = batch_size
+        self.channel = channel
+        self.d_model = d_model
+        self.device = device
+        self.no_decomposition = no_decomposition
+        self.use_amp = use_amp
+        self.eps = 1e-5
+
+        self.dwt = DWT1DForward(wave=self.wavelet_name, J=self.level,
+                                use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DForward(
+            wave=self.wavelet_name, J=self.level, use_amp=self.use_amp)
+        self.idwt = DWT1DInverse(wave=self.wavelet_name,
+                                 use_amp=self.use_amp).cuda() if self.device.type == 'cuda' else DWT1DInverse(
+            wave=self.wavelet_name, use_amp=self.use_amp)
+
+        self.input_w_dim = self._dummy_forward(self.input_length) if not self.no_decomposition else [
+            self.input_length]  # length of the input seq after decompose
+        self.pred_w_dim = self._dummy_forward(self.pred_length) if not self.no_decomposition else [
+            self.pred_length]  # required length of the pred seq after decom
+
+        self.tfactor = tfactor
+        self.dfactor = dfactor
+        #################################
+        self.affine = False
+        #################################
+
+        if self.affine:
+            self._init_params()
+
+    def transform(self, x):
+        # input: x shape: batch, channel, seq
+        if not self.no_decomposition:
+            yl, yh = self._wavelet_decompose(x)
+        else:
+            yl, yh = x, []  # no decompose: returning the same value in yl
+        return yl, yh
+
+    def inv_transform(self, yl, yh):
+        if not self.no_decomposition:
+            x = self._wavelet_reverse_decompose(yl, yh)
+        else:
+            x = yl  # no decompose: returning the same value in x
+        return x
+
+    def _dummy_forward(self, input_length):
+        dummy_x = torch.ones((self.batch_size, self.channel, input_length)).to(self.device)
+        yl, yh = self.dwt(dummy_x)
+        l = []
+        l.append(yl.shape[-1])
+        for i in range(len(yh)):
+            l.append(yh[i].shape[-1])
+        return l
+
+    def _init_params(self):
+        self.affine_weight = nn.Parameter(torch.ones((self.level + 1, self.channel)))
+        self.affine_bias = nn.Parameter(torch.zeros((self.level + 1, self.channel)))
+
+    def _wavelet_decompose(self, x):
+        # input: x shape: batch, channel, seq
+        yl, yh = self.dwt(x)
+
+        if self.affine:
+            yl = yl.transpose(1, 2)  # batch, seq, channel
+            yl = yl * self.affine_weight[0]
+            yl = yl + self.affine_bias[0]
+            yl = yl.transpose(1, 2)  # batch, channel, seq
+            for i in range(self.level):
+                yh_ = yh[i].transpose(1, 2)  # batch, seq, channel
+                yh_ = yh_ * self.affine_weight[i + 1]
+                yh_ = yh_ + self.affine_bias[i + 1]
+                yh[i] = yh_.transpose(1, 2)  # batch, channel, seq
+
+        return yl, yh
+
+    def _wavelet_reverse_decompose(self, yl, yh):
+        if self.affine:
+            yl = yl.transpose(1, 2)  # batch, seq, channel
+            yl = yl - self.affine_bias[0]
+            yl = yl / (self.affine_weight[0] + self.eps)
+            yl = yl.transpose(1, 2)  # batch, channel, seq
+            for i in range(self.level):
+                yh_ = yh[i].transpose(1, 2)  # batch, seq, channel
+                yh_ = yh_ - self.affine_bias[i + 1]
+                yh_ = yh_ / (self.affine_weight[i + 1] + self.eps)
+                yh[i] = yh_.transpose(1, 2)  # batch, channel, seq
+
+        x = self.idwt((yl, yh))
+        return x  # shape: batch, channel, seq
+
+
+###############################################################################################
+"""
+Following codes are combined from https://github.com/fbcotter/pytorch_wavelets. 
+To use Wavelet decomposition, you do not need to modify any of the codes below this line,
+we can just play with the class Decomposition(above)
+"""
+
+
+###############################################################################################
+
+class DWT1DForward(nn.Module):
+    """ Performs a 1d DWT Forward decomposition of an image
+
+    Args:
+        J (int): Number of levels of decomposition
+        wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use.
+            Can be:
+            1) a string to pass to pywt.Wavelet constructor
+            2) a pywt.Wavelet class
+            3) a tuple of numpy arrays (h0, h1)
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The
+            padding scheme
+        """
+
+    def __init__(self, J=1, wave='db1', mode='zero', use_amp=False):
+        super().__init__()
+        self.use_amp = use_amp
+        if isinstance(wave, str):
+            wave = pywt.Wavelet(wave)
+        if isinstance(wave, pywt.Wavelet):
+            h0, h1 = wave.dec_lo, wave.dec_hi
+        else:
+            assert len(wave) == 2
+            h0, h1 = wave[0], wave[1]
+
+        # Prepare the filters - this makes them into column filters
+        filts = prep_filt_afb1d(h0, h1)
+        self.register_buffer('h0', filts[0])
+        self.register_buffer('h1', filts[1])
+        self.J = J
+        self.mode = mode
+
+    def forward(self, x):
+        """ Forward pass of the DWT.
+
+        Args:
+            x (tensor): Input of shape :math:`(N, C_{in}, L_{in})`
+
+        Returns:
+            (yl, yh)
+                tuple of lowpass (yl) and bandpass (yh) coefficients.
+                yh is a list of length J with the first entry
+                being the finest scale coefficients.
+        """
+        assert x.ndim == 3, "Can only handle 3d inputs (N, C, L)"
+        highs = []
+        x0 = x
+        mode = mode_to_int(self.mode)
+
+        # Do a multilevel transform
+        for j in range(self.J):
+            x0, x1 = AFB1D.apply(x0, self.h0, self.h1, mode, self.use_amp)
+            highs.append(x1)
+
+        return x0, highs
+
+
+class DWT1DInverse(nn.Module):
+    """ Performs a 1d DWT Inverse reconstruction of an image
+
+    Args:
+        wave (str or pywt.Wavelet or tuple(ndarray)): Which wavelet to use.
+            Can be:
+            1) a string to pass to pywt.Wavelet constructor
+            2) a pywt.Wavelet class
+            3) a tuple of numpy arrays (h0, h1)
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. The
+            padding scheme
+    """
+
+    def __init__(self, wave='db1', mode='zero', use_amp=False):
+        super().__init__()
+        self.use_amp = use_amp
+        if isinstance(wave, str):
+            wave = pywt.Wavelet(wave)
+        if isinstance(wave, pywt.Wavelet):
+            g0, g1 = wave.rec_lo, wave.rec_hi
+        else:
+            assert len(wave) == 2
+            g0, g1 = wave[0], wave[1]
+
+        # Prepare the filters
+        filts = prep_filt_sfb1d(g0, g1)
+        self.register_buffer('g0', filts[0])
+        self.register_buffer('g1', filts[1])
+        self.mode = mode
+
+    def forward(self, coeffs):
+        """
+        Args:
+            coeffs (yl, yh): tuple of lowpass and bandpass coefficients, should
+              match the format returned by DWT1DForward.
+
+        Returns:
+            Reconstructed input of shape :math:`(N, C_{in}, L_{in})`
+
+        Note:
+            Can have None for any of the highpass scales and will treat the
+            values as zeros (not in an efficient way though).
+        """
+        x0, highs = coeffs
+        assert x0.ndim == 3, "Can only handle 3d inputs (N, C, L)"
+        mode = mode_to_int(self.mode)
+        # Do a multilevel inverse transform
+        for x1 in highs[::-1]:
+            if x1 is None:
+                x1 = torch.zeros_like(x0)
+
+            # 'Unpad' added signal
+            if x0.shape[-1] > x1.shape[-1]:
+                x0 = x0[..., :-1]
+            x0 = SFB1D.apply(x0, x1, self.g0, self.g1, mode, self.use_amp)
+        return x0
+
+
+def roll(x, n, dim, make_even=False):
+    if n < 0:
+        n = x.shape[dim] + n
+
+    if make_even and x.shape[dim] % 2 == 1:
+        end = 1
+    else:
+        end = 0
+
+    if dim == 0:
+        return torch.cat((x[-n:], x[:-n + end]), dim=0)
+    elif dim == 1:
+        return torch.cat((x[:, -n:], x[:, :-n + end]), dim=1)
+    elif dim == 2 or dim == -2:
+        return torch.cat((x[:, :, -n:], x[:, :, :-n + end]), dim=2)
+    elif dim == 3 or dim == -1:
+        return torch.cat((x[:, :, :, -n:], x[:, :, :, :-n + end]), dim=3)
+
+
+def mypad(x, pad, mode='constant', value=0):
+    """ Function to do numpy like padding on tensors. Only works for 2-D
+    padding.
+
+    Inputs:
+        x (tensor): tensor to pad
+        pad (tuple): tuple of (left, right, top, bottom) pad sizes
+        mode (str): 'symmetric', 'wrap', 'constant, 'reflect', 'replicate', or
+            'zero'. The padding technique.
+    """
+    if mode == 'symmetric':
+        # Vertical only
+        if pad[0] == 0 and pad[1] == 0:
+            m1, m2 = pad[2], pad[3]
+            l = x.shape[-2]
+            xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5)
+            return x[:, :, xe]
+        # horizontal only
+        elif pad[2] == 0 and pad[3] == 0:
+            m1, m2 = pad[0], pad[1]
+            l = x.shape[-1]
+            xe = reflect(np.arange(-m1, l + m2, dtype='int32'), -0.5, l - 0.5)
+            return x[:, :, :, xe]
+        # Both
+        else:
+            m1, m2 = pad[0], pad[1]
+            l1 = x.shape[-1]
+            xe_row = reflect(np.arange(-m1, l1 + m2, dtype='int32'), -0.5, l1 - 0.5)
+            m1, m2 = pad[2], pad[3]
+            l2 = x.shape[-2]
+            xe_col = reflect(np.arange(-m1, l2 + m2, dtype='int32'), -0.5, l2 - 0.5)
+            i = np.outer(xe_col, np.ones(xe_row.shape[0]))
+            j = np.outer(np.ones(xe_col.shape[0]), xe_row)
+            return x[:, :, i, j]
+    elif mode == 'periodic':
+        # Vertical only
+        if pad[0] == 0 and pad[1] == 0:
+            xe = np.arange(x.shape[-2])
+            xe = np.pad(xe, (pad[2], pad[3]), mode='wrap')
+            return x[:, :, xe]
+        # Horizontal only
+        elif pad[2] == 0 and pad[3] == 0:
+            xe = np.arange(x.shape[-1])
+            xe = np.pad(xe, (pad[0], pad[1]), mode='wrap')
+            return x[:, :, :, xe]
+        # Both
+        else:
+            xe_col = np.arange(x.shape[-2])
+            xe_col = np.pad(xe_col, (pad[2], pad[3]), mode='wrap')
+            xe_row = np.arange(x.shape[-1])
+            xe_row = np.pad(xe_row, (pad[0], pad[1]), mode='wrap')
+            i = np.outer(xe_col, np.ones(xe_row.shape[0]))
+            j = np.outer(np.ones(xe_col.shape[0]), xe_row)
+            return x[:, :, i, j]
+
+    elif mode == 'constant' or mode == 'reflect' or mode == 'replicate':
+        return F.pad(x, pad, mode, value)
+    elif mode == 'zero':
+        return F.pad(x, pad)
+    else:
+        raise ValueError("Unkown pad type: {}".format(mode))
+
+
+def afb1d(x, h0, h1, use_amp, mode='zero', dim=-1):
+    """ 1D analysis filter bank (along one dimension only) of an image
+
+    Inputs:
+        x (tensor): 4D input with the last two dimensions the spatial input
+        h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1,
+            h, 1) or (1, 1, 1, w)
+        h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1,
+            h, 1) or (1, 1, 1, w)
+        mode (str): padding method
+        dim (int) - dimension of filtering. d=2 is for a vertical filter (called
+            column filtering but filters across the rows). d=3 is for a
+            horizontal filter, (called row filtering but filters across the
+            columns).
+
+    Returns:
+        lohi: lowpass and highpass subbands concatenated along the channel
+            dimension
+    """
+    C = x.shape[1]
+    # Convert the dim to positive
+    d = dim % 4
+    s = (2, 1) if d == 2 else (1, 2)
+    N = x.shape[d]
+    # If h0, h1 are not tensors, make them. If they are, then assume that they
+    # are in the right order
+    if not isinstance(h0, torch.Tensor):
+        h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]),
+                          dtype=torch.float, device=x.device)
+    if not isinstance(h1, torch.Tensor):
+        h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]),
+                          dtype=torch.float, device=x.device)
+    L = h0.numel()
+    L2 = L // 2
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    # If h aren't in the right shape, make them so
+    if h0.shape != tuple(shape):
+        h0 = h0.reshape(*shape)
+    if h1.shape != tuple(shape):
+        h1 = h1.reshape(*shape)
+    h = torch.cat([h0, h1] * C, dim=0)
+
+    if mode == 'per' or mode == 'periodization':
+        if x.shape[dim] % 2 == 1:
+            if d == 2:
+                x = torch.cat((x, x[:, :, -1:]), dim=2)
+            else:
+                x = torch.cat((x, x[:, :, :, -1:]), dim=3)
+            N += 1
+        x = roll(x, -L2, dim=d)
+        pad = (L - 1, 0) if d == 2 else (0, L - 1)
+        if use_amp:
+            with torch.cuda.amp.autocast():  # for mixed precision
+                lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
+        else:
+            lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
+        N2 = N // 2
+        if d == 2:
+            lohi[:, :, :L2] = lohi[:, :, :L2] + lohi[:, :, N2:N2 + L2]
+            lohi = lohi[:, :, :N2]
+        else:
+            lohi[:, :, :, :L2] = lohi[:, :, :, :L2] + lohi[:, :, :, N2:N2 + L2]
+            lohi = lohi[:, :, :, :N2]
+    else:
+        # Calculate the pad size
+        outsize = pywt.dwt_coeff_len(N, L, mode=mode)
+        p = 2 * (outsize - 1) - N + L
+        if mode == 'zero':
+            # Sadly, pytorch only allows for same padding before and after, if
+            # we need to do more padding after for odd length signals, have to
+            # prepad
+            if p % 2 == 1:
+                pad = (0, 0, 0, 1) if d == 2 else (0, 1, 0, 0)
+                x = F.pad(x, pad)
+            pad = (p // 2, 0) if d == 2 else (0, p // 2)
+            # Calculate the high and lowpass
+            if use_amp:
+                with torch.cuda.amp.autocast():
+                    lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
+            else:
+                lohi = F.conv2d(x, h, padding=pad, stride=s, groups=C)
+        elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic':
+            pad = (0, 0, p // 2, (p + 1) // 2) if d == 2 else (p // 2, (p + 1) // 2, 0, 0)
+            x = mypad(x, pad=pad, mode=mode)
+            if use_amp:
+                with torch.cuda.amp.autocast():
+                    lohi = F.conv2d(x, h, stride=s, groups=C)
+            else:
+                lohi = F.conv2d(x, h, stride=s, groups=C)
+        else:
+            raise ValueError("Unkown pad type: {}".format(mode))
+
+    return lohi
+
+
+def afb1d_atrous(x, h0, h1, mode='periodic', dim=-1, dilation=1):
+    """ 1D analysis filter bank (along one dimension only) of an image without
+    downsampling. Does the a trous algorithm.
+
+    Inputs:
+        x (tensor): 4D input with the last two dimensions the spatial input
+        h0 (tensor): 4D input for the lowpass filter. Should have shape (1, 1,
+            h, 1) or (1, 1, 1, w)
+        h1 (tensor): 4D input for the highpass filter. Should have shape (1, 1,
+            h, 1) or (1, 1, 1, w)
+        mode (str): padding method
+        dim (int) - dimension of filtering. d=2 is for a vertical filter (called
+            column filtering but filters across the rows). d=3 is for a
+            horizontal filter, (called row filtering but filters across the
+            columns).
+        dilation (int): dilation factor. Should be a power of 2.
+
+    Returns:
+        lohi: lowpass and highpass subbands concatenated along the channel
+            dimension
+    """
+    C = x.shape[1]
+    # Convert the dim to positive
+    d = dim % 4
+    # If h0, h1 are not tensors, make them. If they are, then assume that they
+    # are in the right order
+    if not isinstance(h0, torch.Tensor):
+        h0 = torch.tensor(np.copy(np.array(h0).ravel()[::-1]),
+                          dtype=torch.float, device=x.device)
+    if not isinstance(h1, torch.Tensor):
+        h1 = torch.tensor(np.copy(np.array(h1).ravel()[::-1]),
+                          dtype=torch.float, device=x.device)
+    L = h0.numel()
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    # If h aren't in the right shape, make them so
+    if h0.shape != tuple(shape):
+        h0 = h0.reshape(*shape)
+    if h1.shape != tuple(shape):
+        h1 = h1.reshape(*shape)
+    h = torch.cat([h0, h1] * C, dim=0)
+
+    # Calculate the pad size
+    L2 = (L * dilation) // 2
+    pad = (0, 0, L2 - dilation, L2) if d == 2 else (L2 - dilation, L2, 0, 0)
+    x = mypad(x, pad=pad, mode=mode)
+    lohi = F.conv2d(x, h, groups=C, dilation=dilation)
+
+    return lohi
+
+
+def sfb1d(lo, hi, g0, g1, use_amp, mode='zero', dim=-1):
+    """ 1D synthesis filter bank of an image tensor
+    """
+    C = lo.shape[1]
+    d = dim % 4
+    # If g0, g1 are not tensors, make them. If they are, then assume that they
+    # are in the right order
+    if not isinstance(g0, torch.Tensor):
+        g0 = torch.tensor(np.copy(np.array(g0).ravel()),
+                          dtype=torch.float, device=lo.device)
+    if not isinstance(g1, torch.Tensor):
+        g1 = torch.tensor(np.copy(np.array(g1).ravel()),
+                          dtype=torch.float, device=lo.device)
+    L = g0.numel()
+    shape = [1, 1, 1, 1]
+    shape[d] = L
+    N = 2 * lo.shape[d]
+    # If g aren't in the right shape, make them so
+    if g0.shape != tuple(shape):
+        g0 = g0.reshape(*shape)
+    if g1.shape != tuple(shape):
+        g1 = g1.reshape(*shape)
+
+    s = (2, 1) if d == 2 else (1, 2)
+    g0 = torch.cat([g0] * C, dim=0)
+    g1 = torch.cat([g1] * C, dim=0)
+    if mode == 'per' or mode == 'periodization':
+        if use_amp:
+            with torch.cuda.amp.autocast():
+                y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \
+                    F.conv_transpose2d(hi, g1, stride=s, groups=C)
+        else:
+            y = F.conv_transpose2d(lo, g0, stride=s, groups=C) + \
+                F.conv_transpose2d(hi, g1, stride=s, groups=C)
+        if d == 2:
+            y[:, :, :L - 2] = y[:, :, :L - 2] + y[:, :, N:N + L - 2]
+            y = y[:, :, :N]
+        else:
+            y[:, :, :, :L - 2] = y[:, :, :, :L - 2] + y[:, :, :, N:N + L - 2]
+            y = y[:, :, :, :N]
+        y = roll(y, 1 - L // 2, dim=dim)
+    else:
+        if mode == 'zero' or mode == 'symmetric' or mode == 'reflect' or \
+                mode == 'periodic':
+            pad = (L - 2, 0) if d == 2 else (0, L - 2)
+            if use_amp:
+                with torch.cuda.amp.autocast():
+                    y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \
+                        F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C)
+            else:
+                y = F.conv_transpose2d(lo, g0, stride=s, padding=pad, groups=C) + \
+                    F.conv_transpose2d(hi, g1, stride=s, padding=pad, groups=C)
+        else:
+            raise ValueError("Unkown pad type: {}".format(mode))
+
+    return y
+
+
+def mode_to_int(mode):
+    if mode == 'zero':
+        return 0
+    elif mode == 'symmetric':
+        return 1
+    elif mode == 'per' or mode == 'periodization':
+        return 2
+    elif mode == 'constant':
+        return 3
+    elif mode == 'reflect':
+        return 4
+    elif mode == 'replicate':
+        return 5
+    elif mode == 'periodic':
+        return 6
+    else:
+        raise ValueError("Unkown pad type: {}".format(mode))
+
+
+def int_to_mode(mode):
+    if mode == 0:
+        return 'zero'
+    elif mode == 1:
+        return 'symmetric'
+    elif mode == 2:
+        return 'periodization'
+    elif mode == 3:
+        return 'constant'
+    elif mode == 4:
+        return 'reflect'
+    elif mode == 5:
+        return 'replicate'
+    elif mode == 6:
+        return 'periodic'
+    else:
+        raise ValueError("Unkown pad type: {}".format(mode))
+
+
+class AFB2D(Function):
+    """ Does a single level 2d wavelet decomposition of an input. Does separate
+    row and column filtering by two calls to
+    :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
+
+    Needs to have the tensors in the right form. Because this function defines
+    its own backward pass, saves on memory by not having to save the input
+    tensors.
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        h0_row: row lowpass
+        h1_row: row highpass
+        h0_col: col lowpass
+        h1_col: col highpass
+        mode (int): use mode_to_int to get the int code here
+
+    We encode the mode as an integer rather than a string as gradcheck causes an
+    error when a string is provided.
+
+    Returns:
+        y: Tensor of shape (N, C*4, H, W)
+    """
+
+    @staticmethod
+    def forward(ctx, x, h0_row, h1_row, h0_col, h1_col, mode):
+        ctx.save_for_backward(h0_row, h1_row, h0_col, h1_col)
+        ctx.shape = x.shape[-2:]
+        mode = int_to_mode(mode)
+        ctx.mode = mode
+        lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3)
+        y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2)
+        s = y.shape
+        y = y.reshape(s[0], -1, 4, s[-2], s[-1])
+        low = y[:, :, 0].contiguous()
+        highs = y[:, :, 1:].contiguous()
+        return low, highs
+
+    @staticmethod
+    def backward(ctx, low, highs):
+        dx = None
+        if ctx.needs_input_grad[0]:
+            mode = ctx.mode
+            h0_row, h1_row, h0_col, h1_col = ctx.saved_tensors
+            lh, hl, hh = torch.unbind(highs, dim=2)
+            lo = sfb1d(low, lh, h0_col, h1_col, mode=mode, dim=2)
+            hi = sfb1d(hl, hh, h0_col, h1_col, mode=mode, dim=2)
+            dx = sfb1d(lo, hi, h0_row, h1_row, mode=mode, dim=3)
+            if dx.shape[-2] > ctx.shape[-2] and dx.shape[-1] > ctx.shape[-1]:
+                dx = dx[:, :, :ctx.shape[-2], :ctx.shape[-1]]
+            elif dx.shape[-2] > ctx.shape[-2]:
+                dx = dx[:, :, :ctx.shape[-2]]
+            elif dx.shape[-1] > ctx.shape[-1]:
+                dx = dx[:, :, :, :ctx.shape[-1]]
+        return dx, None, None, None, None, None
+
+
+class AFB1D(Function):
+    """ Does a single level 1d wavelet decomposition of an input.
+
+    Needs to have the tensors in the right form. Because this function defines
+    its own backward pass, saves on memory by not having to save the input
+    tensors.
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        h0: lowpass
+        h1: highpass
+        mode (int): use mode_to_int to get the int code here
+
+    We encode the mode as an integer rather than a string as gradcheck causes an
+    error when a string is provided.
+
+    Returns:
+        x0: Tensor of shape (N, C, L') - lowpass
+        x1: Tensor of shape (N, C, L') - highpass
+    """
+
+    @staticmethod
+    def forward(ctx, x, h0, h1, mode, use_amp):
+        mode = int_to_mode(mode)
+
+        # Make inputs 4d
+        x = x[:, :, None, :]
+        h0 = h0[:, :, None, :]
+        h1 = h1[:, :, None, :]
+
+        # Save for backwards
+        ctx.save_for_backward(h0, h1)
+        ctx.shape = x.shape[3]
+        ctx.mode = mode
+        ctx.use_amp = use_amp
+
+        lohi = afb1d(x, h0, h1, use_amp, mode=mode, dim=3)
+        x0 = lohi[:, ::2, 0].contiguous()
+        x1 = lohi[:, 1::2, 0].contiguous()
+        return x0, x1
+
+    @staticmethod
+    def backward(ctx, dx0, dx1):
+        dx = None
+        if ctx.needs_input_grad[0]:
+            mode = ctx.mode
+            h0, h1 = ctx.saved_tensors
+            use_amp = ctx.use_amp
+
+            # Make grads 4d
+            dx0 = dx0[:, :, None, :]
+            dx1 = dx1[:, :, None, :]
+
+            dx = sfb1d(dx0, dx1, h0, h1, use_amp, mode=mode, dim=3)[:, :, 0]
+
+            # Check for odd input
+            if dx.shape[2] > ctx.shape:
+                dx = dx[:, :, :ctx.shape]
+
+        return dx, None, None, None, None, None
+
+
+def afb2d(x, filts, mode='zero'):
+    """ Does a single level 2d wavelet decomposition of an input. Does separate
+    row and column filtering by two calls to
+    :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        filts (list of ndarray or torch.Tensor): If a list of tensors has been
+            given, this function assumes they are in the right form (the form
+            returned by
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`).
+            Otherwise, this function will prepare the filters to be of the right
+            form by calling
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`.
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
+            padding to use. If periodization, the output size will be half the
+            input size.  Otherwise, the output size will be slightly larger than
+            half.
+
+    Returns:
+        y: Tensor of shape (N, C*4, H, W)
+    """
+    tensorize = [not isinstance(f, torch.Tensor) for f in filts]
+    if len(filts) == 2:
+        h0, h1 = filts
+        if True in tensorize:
+            h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
+                h0, h1, device=x.device)
+        else:
+            h0_col = h0
+            h0_row = h0.transpose(2, 3)
+            h1_col = h1
+            h1_row = h1.transpose(2, 3)
+    elif len(filts) == 4:
+        if True in tensorize:
+            h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
+                *filts, device=x.device)
+        else:
+            h0_col, h1_col, h0_row, h1_row = filts
+    else:
+        raise ValueError("Unknown form for input filts")
+
+    lohi = afb1d(x, h0_row, h1_row, mode=mode, dim=3)
+    y = afb1d(lohi, h0_col, h1_col, mode=mode, dim=2)
+
+    return y
+
+
+def afb2d_atrous(x, filts, mode='periodization', dilation=1):
+    """ Does a single level 2d wavelet decomposition of an input. Does separate
+    row and column filtering by two calls to
+    :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        filts (list of ndarray or torch.Tensor): If a list of tensors has been
+            given, this function assumes they are in the right form (the form
+            returned by
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`).
+            Otherwise, this function will prepare the filters to be of the right
+            form by calling
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d`.
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
+            padding to use. If periodization, the output size will be half the
+            input size.  Otherwise, the output size will be slightly larger than
+            half.
+        dilation (int): dilation factor for the filters. Should be 2**level
+
+    Returns:
+        y: Tensor of shape (N, C, 4, H, W)
+    """
+    tensorize = [not isinstance(f, torch.Tensor) for f in filts]
+    if len(filts) == 2:
+        h0, h1 = filts
+        if True in tensorize:
+            h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
+                h0, h1, device=x.device)
+        else:
+            h0_col = h0
+            h0_row = h0.transpose(2, 3)
+            h1_col = h1
+            h1_row = h1.transpose(2, 3)
+    elif len(filts) == 4:
+        if True in tensorize:
+            h0_col, h1_col, h0_row, h1_row = prep_filt_afb2d(
+                *filts, device=x.device)
+        else:
+            h0_col, h1_col, h0_row, h1_row = filts
+    else:
+        raise ValueError("Unknown form for input filts")
+
+    lohi = afb1d_atrous(x, h0_row, h1_row, mode=mode, dim=3, dilation=dilation)
+    y = afb1d_atrous(lohi, h0_col, h1_col, mode=mode, dim=2, dilation=dilation)
+
+    return y
+
+
+def afb2d_nonsep(x, filts, mode='zero'):
+    """ Does a 1 level 2d wavelet decomposition of an input. Doesn't do separate
+    row and column filtering.
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        filts (list or torch.Tensor): If a list is given, should be the low and
+            highpass filter banks. If a tensor is given, it should be of the
+            form created by
+            :py:func:`pytorch_wavelets.dwt.lowlevel.prep_filt_afb2d_nonsep`
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
+            padding to use. If periodization, the output size will be half the
+            input size.  Otherwise, the output size will be slightly larger than
+            half.
+
+    Returns:
+        y: Tensor of shape (N, C, 4, H, W)
+    """
+    C = x.shape[1]
+    Ny = x.shape[2]
+    Nx = x.shape[3]
+
+    # Check the filter inputs
+    if isinstance(filts, (tuple, list)):
+        if len(filts) == 2:
+            filts = prep_filt_afb2d_nonsep(filts[0], filts[1], device=x.device)
+        else:
+            filts = prep_filt_afb2d_nonsep(
+                filts[0], filts[1], filts[2], filts[3], device=x.device)
+    f = torch.cat([filts] * C, dim=0)
+    Ly = f.shape[2]
+    Lx = f.shape[3]
+
+    if mode == 'periodization' or mode == 'per':
+        if x.shape[2] % 2 == 1:
+            x = torch.cat((x, x[:, :, -1:]), dim=2)
+            Ny += 1
+        if x.shape[3] % 2 == 1:
+            x = torch.cat((x, x[:, :, :, -1:]), dim=3)
+            Nx += 1
+        pad = (Ly - 1, Lx - 1)
+        stride = (2, 2)
+        x = roll(roll(x, -Ly // 2, dim=2), -Lx // 2, dim=3)
+        y = F.conv2d(x, f, padding=pad, stride=stride, groups=C)
+        y[:, :, :Ly // 2] += y[:, :, Ny // 2:Ny // 2 + Ly // 2]
+        y[:, :, :, :Lx // 2] += y[:, :, :, Nx // 2:Nx // 2 + Lx // 2]
+        y = y[:, :, :Ny // 2, :Nx // 2]
+    elif mode == 'zero' or mode == 'symmetric' or mode == 'reflect':
+        # Calculate the pad size
+        out1 = pywt.dwt_coeff_len(Ny, Ly, mode=mode)
+        out2 = pywt.dwt_coeff_len(Nx, Lx, mode=mode)
+        p1 = 2 * (out1 - 1) - Ny + Ly
+        p2 = 2 * (out2 - 1) - Nx + Lx
+        if mode == 'zero':
+            # Sadly, pytorch only allows for same padding before and after, if
+            # we need to do more padding after for odd length signals, have to
+            # prepad
+            if p1 % 2 == 1 and p2 % 2 == 1:
+                x = F.pad(x, (0, 1, 0, 1))
+            elif p1 % 2 == 1:
+                x = F.pad(x, (0, 0, 0, 1))
+            elif p2 % 2 == 1:
+                x = F.pad(x, (0, 1, 0, 0))
+            # Calculate the high and lowpass
+            y = F.conv2d(
+                x, f, padding=(p1 // 2, p2 // 2), stride=2, groups=C)
+        elif mode == 'symmetric' or mode == 'reflect' or mode == 'periodic':
+            pad = (p2 // 2, (p2 + 1) // 2, p1 // 2, (p1 + 1) // 2)
+            x = mypad(x, pad=pad, mode=mode)
+            y = F.conv2d(x, f, stride=2, groups=C)
+    else:
+        raise ValueError("Unkown pad type: {}".format(mode))
+
+    return y
+
+
+def sfb2d(ll, lh, hl, hh, filts, mode='zero'):
+    """ Does a single level 2d wavelet reconstruction of wavelet coefficients.
+    Does separate row and column filtering by two calls to
+    :py:func:`pytorch_wavelets.dwt.lowlevel.sfb1d`
+
+    Inputs:
+        ll (torch.Tensor): lowpass coefficients
+        lh (torch.Tensor): horizontal coefficients
+        hl (torch.Tensor): vertical coefficients
+        hh (torch.Tensor): diagonal coefficients
+        filts (list of ndarray or torch.Tensor): If a list of tensors has been
+            given, this function assumes they are in the right form (the form
+            returned by
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`).
+            Otherwise, this function will prepare the filters to be of the right
+            form by calling
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d`.
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
+            padding to use. If periodization, the output size will be half the
+            input size.  Otherwise, the output size will be slightly larger than
+            half.
+    """
+    tensorize = [not isinstance(x, torch.Tensor) for x in filts]
+    if len(filts) == 2:
+        g0, g1 = filts
+        if True in tensorize:
+            g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(g0, g1)
+        else:
+            g0_col = g0
+            g0_row = g0.transpose(2, 3)
+            g1_col = g1
+            g1_row = g1.transpose(2, 3)
+    elif len(filts) == 4:
+        if True in tensorize:
+            g0_col, g1_col, g0_row, g1_row = prep_filt_sfb2d(*filts)
+        else:
+            g0_col, g1_col, g0_row, g1_row = filts
+    else:
+        raise ValueError("Unknown form for input filts")
+
+    lo = sfb1d(ll, lh, g0_col, g1_col, mode=mode, dim=2)
+    hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2)
+    y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3)
+
+    return y
+
+
+class SFB2D(Function):
+    """ Does a single level 2d wavelet decomposition of an input. Does separate
+    row and column filtering by two calls to
+    :py:func:`pytorch_wavelets.dwt.lowlevel.afb1d`
+
+    Needs to have the tensors in the right form. Because this function defines
+    its own backward pass, saves on memory by not having to save the input
+    tensors.
+
+    Inputs:
+        x (torch.Tensor): Input to decompose
+        h0_row: row lowpass
+        h1_row: row highpass
+        h0_col: col lowpass
+        h1_col: col highpass
+        mode (int): use mode_to_int to get the int code here
+
+    We encode the mode as an integer rather than a string as gradcheck causes an
+    error when a string is provided.
+
+    Returns:
+        y: Tensor of shape (N, C*4, H, W)
+    """
+
+    @staticmethod
+    def forward(ctx, low, highs, g0_row, g1_row, g0_col, g1_col, mode):
+        mode = int_to_mode(mode)
+        ctx.mode = mode
+        ctx.save_for_backward(g0_row, g1_row, g0_col, g1_col)
+
+        lh, hl, hh = torch.unbind(highs, dim=2)
+        lo = sfb1d(low, lh, g0_col, g1_col, mode=mode, dim=2)
+        hi = sfb1d(hl, hh, g0_col, g1_col, mode=mode, dim=2)
+        y = sfb1d(lo, hi, g0_row, g1_row, mode=mode, dim=3)
+        return y
+
+    @staticmethod
+    def backward(ctx, dy):
+        dlow, dhigh = None, None
+        if ctx.needs_input_grad[0]:
+            mode = ctx.mode
+            g0_row, g1_row, g0_col, g1_col = ctx.saved_tensors
+            dx = afb1d(dy, g0_row, g1_row, mode=mode, dim=3)
+            dx = afb1d(dx, g0_col, g1_col, mode=mode, dim=2)
+            s = dx.shape
+            dx = dx.reshape(s[0], -1, 4, s[-2], s[-1])
+            dlow = dx[:, :, 0].contiguous()
+            dhigh = dx[:, :, 1:].contiguous()
+        return dlow, dhigh, None, None, None, None, None
+
+
+class SFB1D(Function):
+    """ Does a single level 1d wavelet decomposition of an input.
+
+    Needs to have the tensors in the right form. Because this function defines
+    its own backward pass, saves on memory by not having to save the input
+    tensors.
+
+    Inputs:
+        low (torch.Tensor): Lowpass to reconstruct of shape (N, C, L)
+        high (torch.Tensor): Highpass to reconstruct of shape (N, C, L)
+        g0: lowpass
+        g1: highpass
+        mode (int): use mode_to_int to get the int code here
+
+    We encode the mode as an integer rather than a string as gradcheck causes an
+    error when a string is provided.
+
+    Returns:
+        y: Tensor of shape (N, C*2, L')
+    """
+
+    @staticmethod
+    def forward(ctx, low, high, g0, g1, mode, use_amp):
+        mode = int_to_mode(mode)
+        # Make into a 2d tensor with 1 row
+        low = low[:, :, None, :]
+        high = high[:, :, None, :]
+        g0 = g0[:, :, None, :]
+        g1 = g1[:, :, None, :]
+
+        ctx.mode = mode
+        ctx.save_for_backward(g0, g1)
+        ctx.use_amp = use_amp
+
+        return sfb1d(low, high, g0, g1, use_amp, mode=mode, dim=3)[:, :, 0]
+
+    @staticmethod
+    def backward(ctx, dy):
+        dlow, dhigh = None, None
+        if ctx.needs_input_grad[0]:
+            mode = ctx.mode
+            use_amp = ctx.use_amp
+            g0, g1, = ctx.saved_tensors
+            dy = dy[:, :, None, :]
+
+            dx = afb1d(dy, g0, g1, use_amp, mode=mode, dim=3)
+
+            dlow = dx[:, ::2, 0].contiguous()
+            dhigh = dx[:, 1::2, 0].contiguous()
+        return dlow, dhigh, None, None, None, None, None
+
+
+def sfb2d_nonsep(coeffs, filts, mode='zero'):
+    """ Does a single level 2d wavelet reconstruction of wavelet coefficients.
+    Does not do separable filtering.
+
+    Inputs:
+        coeffs (torch.Tensor): tensor of coefficients of shape (N, C, 4, H, W)
+            where the third dimension indexes across the (ll, lh, hl, hh) bands.
+        filts (list of ndarray or torch.Tensor): If a list of tensors has been
+            given, this function assumes they are in the right form (the form
+            returned by
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`).
+            Otherwise, this function will prepare the filters to be of the right
+            form by calling
+            :py:func:`~pytorch_wavelets.dwt.lowlevel.prep_filt_sfb2d_nonsep`.
+        mode (str): 'zero', 'symmetric', 'reflect' or 'periodization'. Which
+            padding to use. If periodization, the output size will be half the
+            input size.  Otherwise, the output size will be slightly larger than
+            half.
+    """
+    C = coeffs.shape[1]
+    Ny = coeffs.shape[-2]
+    Nx = coeffs.shape[-1]
+
+    # Check the filter inputs - should be in the form of a torch tensor, but if
+    # not, tensorize it here.
+    if isinstance(filts, (tuple, list)):
+        if len(filts) == 2:
+            filts = prep_filt_sfb2d_nonsep(filts[0], filts[1],
+                                           device=coeffs.device)
+        elif len(filts) == 4:
+            filts = prep_filt_sfb2d_nonsep(
+                filts[0], filts[1], filts[2], filts[3], device=coeffs.device)
+        else:
+            raise ValueError("Unkown form for input filts")
+    f = torch.cat([filts] * C, dim=0)
+    Ly = f.shape[2]
+    Lx = f.shape[3]
+
+    x = coeffs.reshape(coeffs.shape[0], -1, coeffs.shape[-2], coeffs.shape[-1])
+    if mode == 'periodization' or mode == 'per':
+        ll = F.conv_transpose2d(x, f, groups=C, stride=2)
+        ll[:, :, :Ly - 2] += ll[:, :, 2 * Ny:2 * Ny + Ly - 2]
+        ll[:, :, :, :Lx - 2] += ll[:, :, :, 2 * Nx:2 * Nx + Lx - 2]
+        ll = ll[:, :, :2 * Ny, :2 * Nx]
+        ll = roll(roll(ll, 1 - Ly // 2, dim=2), 1 - Lx // 2, dim=3)
+    elif mode == 'symmetric' or mode == 'zero' or mode == 'reflect' or \
+            mode == 'periodic':
+        pad = (Ly - 2, Lx - 2)
+        ll = F.conv_transpose2d(x, f, padding=pad, groups=C, stride=2)
+    else:
+        raise ValueError("Unkown pad type: {}".format(mode))
+
+    return ll.contiguous()
+
+
+def prep_filt_afb2d_nonsep(h0_col, h1_col, h0_row=None, h1_row=None,
+                           device=None):
+    """
+    Prepares the filters to be of the right form for the afb2d_nonsep function.
+    In particular, makes 2d point spread functions, and mirror images them in
+    preparation to do torch.conv2d.
+
+    Inputs:
+        h0_col (array-like): low pass column filter bank
+        h1_col (array-like): high pass column filter bank
+        h0_row (array-like): low pass row filter bank. If none, will assume the
+            same as column filter
+        h1_row (array-like): high pass row filter bank. If none, will assume the
+            same as column filter
+        device: which device to put the tensors on to
+
+    Returns:
+        filts: (4, 1, h, w) tensor ready to get the four subbands
+    """
+    h0_col = np.array(h0_col).ravel()
+    h1_col = np.array(h1_col).ravel()
+    if h0_row is None:
+        h0_row = h0_col
+    if h1_row is None:
+        h1_row = h1_col
+    ll = np.outer(h0_col, h0_row)
+    lh = np.outer(h1_col, h0_row)
+    hl = np.outer(h0_col, h1_row)
+    hh = np.outer(h1_col, h1_row)
+    filts = np.stack([ll[None, ::-1, ::-1], lh[None, ::-1, ::-1],
+                      hl[None, ::-1, ::-1], hh[None, ::-1, ::-1]], axis=0)
+    filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device)
+    return filts
+
+
+def prep_filt_sfb2d_nonsep(g0_col, g1_col, g0_row=None, g1_row=None,
+                           device=None):
+    """
+    Prepares the filters to be of the right form for the sfb2d_nonsep function.
+    In particular, makes 2d point spread functions. Does not mirror image them
+    as sfb2d_nonsep uses conv2d_transpose which acts like normal convolution.
+
+    Inputs:
+        g0_col (array-like): low pass column filter bank
+        g1_col (array-like): high pass column filter bank
+        g0_row (array-like): low pass row filter bank. If none, will assume the
+            same as column filter
+        g1_row (array-like): high pass row filter bank. If none, will assume the
+            same as column filter
+        device: which device to put the tensors on to
+
+    Returns:
+        filts: (4, 1, h, w) tensor ready to combine the four subbands
+    """
+    g0_col = np.array(g0_col).ravel()
+    g1_col = np.array(g1_col).ravel()
+    if g0_row is None:
+        g0_row = g0_col
+    if g1_row is None:
+        g1_row = g1_col
+    ll = np.outer(g0_col, g0_row)
+    lh = np.outer(g1_col, g0_row)
+    hl = np.outer(g0_col, g1_row)
+    hh = np.outer(g1_col, g1_row)
+    filts = np.stack([ll[None], lh[None], hl[None], hh[None]], axis=0)
+    filts = torch.tensor(filts, dtype=torch.get_default_dtype(), device=device)
+    return filts
+
+
+def prep_filt_sfb2d(g0_col, g1_col, g0_row=None, g1_row=None, device=None):
+    """
+    Prepares the filters to be of the right form for the sfb2d function.  In
+    particular, makes the tensors the right shape. It does not mirror image them
+    as as sfb2d uses conv2d_transpose which acts like normal convolution.
+
+    Inputs:
+        g0_col (array-like): low pass column filter bank
+        g1_col (array-like): high pass column filter bank
+        g0_row (array-like): low pass row filter bank. If none, will assume the
+            same as column filter
+        g1_row (array-like): high pass row filter bank. If none, will assume the
+            same as column filter
+        device: which device to put the tensors on to
+
+    Returns:
+        (g0_col, g1_col, g0_row, g1_row)
+    """
+    g0_col, g1_col = prep_filt_sfb1d(g0_col, g1_col, device)
+    if g0_row is None:
+        g0_row, g1_row = g0_col, g1_col
+    else:
+        g0_row, g1_row = prep_filt_sfb1d(g0_row, g1_row, device)
+
+    g0_col = g0_col.reshape((1, 1, -1, 1))
+    g1_col = g1_col.reshape((1, 1, -1, 1))
+    g0_row = g0_row.reshape((1, 1, 1, -1))
+    g1_row = g1_row.reshape((1, 1, 1, -1))
+
+    return g0_col, g1_col, g0_row, g1_row
+
+
+def prep_filt_sfb1d(g0, g1, device=None):
+    """
+    Prepares the filters to be of the right form for the sfb1d function. In
+    particular, makes the tensors the right shape. It does not mirror image them
+    as as sfb2d uses conv2d_transpose which acts like normal convolution.
+
+    Inputs:
+        g0 (array-like): low pass filter bank
+        g1 (array-like): high pass filter bank
+        device: which device to put the tensors on to
+
+    Returns:
+        (g0, g1)
+    """
+    g0 = np.array(g0).ravel()
+    g1 = np.array(g1).ravel()
+    t = torch.get_default_dtype()
+    g0 = torch.tensor(g0, device=device, dtype=t).reshape((1, 1, -1))
+    g1 = torch.tensor(g1, device=device, dtype=t).reshape((1, 1, -1))
+
+    return g0, g1
+
+
+def prep_filt_afb2d(h0_col, h1_col, h0_row=None, h1_row=None, device=None):
+    """
+    Prepares the filters to be of the right form for the afb2d function.  In
+    particular, makes the tensors the right shape. It takes mirror images of
+    them as as afb2d uses conv2d which acts like normal correlation.
+
+    Inputs:
+        h0_col (array-like): low pass column filter bank
+        h1_col (array-like): high pass column filter bank
+        h0_row (array-like): low pass row filter bank. If none, will assume the
+            same as column filter
+        h1_row (array-like): high pass row filter bank. If none, will assume the
+            same as column filter
+        device: which device to put the tensors on to
+
+    Returns:
+        (h0_col, h1_col, h0_row, h1_row)
+    """
+    h0_col, h1_col = prep_filt_afb1d(h0_col, h1_col, device)
+    if h0_row is None:
+        h0_row, h1_row = h0_col, h1_col
+    else:
+        h0_row, h1_row = prep_filt_afb1d(h0_row, h1_row, device)
+
+    h0_col = h0_col.reshape((1, 1, -1, 1))
+    h1_col = h1_col.reshape((1, 1, -1, 1))
+    h0_row = h0_row.reshape((1, 1, 1, -1))
+    h1_row = h1_row.reshape((1, 1, 1, -1))
+    return h0_col, h1_col, h0_row, h1_row
+
+
+def prep_filt_afb1d(h0, h1, device=None):
+    """
+    Prepares the filters to be of the right form for the afb2d function.  In
+    particular, makes the tensors the right shape. It takes mirror images of
+    them as as afb2d uses conv2d which acts like normal correlation.
+
+    Inputs:
+        h0 (array-like): low pass column filter bank
+        h1 (array-like): high pass column filter bank
+        device: which device to put the tensors on to
+
+    Returns:
+        (h0, h1)
+    """
+    h0 = np.array(h0[::-1]).ravel()
+    h1 = np.array(h1[::-1]).ravel()
+    t = torch.get_default_dtype()
+    h0 = torch.tensor(h0, device=device, dtype=t).reshape((1, 1, -1))
+    h1 = torch.tensor(h1, device=device, dtype=t).reshape((1, 1, -1))
+    return h0, h1
+
+
+def reflect(x, minx, maxx):
+    """Reflect the values in matrix *x* about the scalar values *minx* and
+    *maxx*.  Hence a vector *x* containing a long linearly increasing series is
+    converted into a waveform which ramps linearly up and down between *minx*
+    and *maxx*.  If *x* contains integers and *minx* and *maxx* are (integers +
+    0.5), the ramps will have repeated max and min samples.
+
+    .. codeauthor:: Rich Wareham <rjw57@cantab.net>, Aug 2013
+    .. codeauthor:: Nick Kingsbury, Cambridge University, January 1999.
+
+    """
+    x = np.asanyarray(x)
+    rng = maxx - minx
+    rng_by_2 = 2 * rng
+    mod = np.fmod(x - minx, rng_by_2)
+    normed_mod = np.where(mod < 0, mod + rng_by_2, mod)
+    out = np.where(normed_mod >= rng, rng_by_2 - normed_mod, normed_mod) + minx
+    return np.array(out, dtype=x.dtype)
\ No newline at end of file
diff --git a/layers/EMA.py b/layers/EMA.py
new file mode 100644
index 0000000..577dd5e
--- /dev/null
+++ b/layers/EMA.py
@@ -0,0 +1,23 @@
+import torch
+from torch import nn
+
+class EMA(nn.Module):
+    """
+    Exponential Moving Average (EMA) block to highlight the trend of time series
+    """
+    def __init__(self, alpha):
+        super(EMA, self).__init__()
+        self.alpha = alpha
+
+    def forward(self, x):
+        # x: [Batch, Input, Channel]
+        _, t, _ = x.shape
+        powers = torch.flip(torch.arange(t, dtype=torch.double), dims=(0,))
+        weights = torch.pow((1 - self.alpha), powers).to(x.device)
+        divisor = weights.clone()
+        weights[1:] = weights[1:] * self.alpha
+        weights = weights.reshape(1, t, 1)
+        divisor = divisor.reshape(1, t, 1)
+        x = torch.cumsum(x * weights, dim=1)
+        x = torch.div(x, divisor)
+        return x.to(torch.float32)
\ No newline at end of file
diff --git a/layers/ETSformer_EncDec.py b/layers/ETSformer_EncDec.py
new file mode 100644
index 0000000..a3c41ba
--- /dev/null
+++ b/layers/ETSformer_EncDec.py
@@ -0,0 +1,334 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.fft as fft
+from einops import rearrange, reduce, repeat
+import math, random
+from scipy.fftpack import next_fast_len
+
+
+class Transform:
+    def __init__(self, sigma):
+        self.sigma = sigma
+
+    @torch.no_grad()
+    def transform(self, x):
+        return self.jitter(self.shift(self.scale(x)))
+
+    def jitter(self, x):
+        return x + (torch.randn(x.shape).to(x.device) * self.sigma)
+
+    def scale(self, x):
+        return x * (torch.randn(x.size(-1)).to(x.device) * self.sigma + 1)
+
+    def shift(self, x):
+        return x + (torch.randn(x.size(-1)).to(x.device) * self.sigma)
+
+
+def conv1d_fft(f, g, dim=-1):
+    N = f.size(dim)
+    M = g.size(dim)
+
+    fast_len = next_fast_len(N + M - 1)
+
+    F_f = fft.rfft(f, fast_len, dim=dim)
+    F_g = fft.rfft(g, fast_len, dim=dim)
+
+    F_fg = F_f * F_g.conj()
+    out = fft.irfft(F_fg, fast_len, dim=dim)
+    out = out.roll((-1,), dims=(dim,))
+    idx = torch.as_tensor(range(fast_len - N, fast_len)).to(out.device)
+    out = out.index_select(dim, idx)
+
+    return out
+
+
+class ExponentialSmoothing(nn.Module):
+
+    def __init__(self, dim, nhead, dropout=0.1, aux=False):
+        super().__init__()
+        self._smoothing_weight = nn.Parameter(torch.randn(nhead, 1))
+        self.v0 = nn.Parameter(torch.randn(1, 1, nhead, dim))
+        self.dropout = nn.Dropout(dropout)
+        if aux:
+            self.aux_dropout = nn.Dropout(dropout)
+
+    def forward(self, values, aux_values=None):
+        b, t, h, d = values.shape
+
+        init_weight, weight = self.get_exponential_weight(t)
+        output = conv1d_fft(self.dropout(values), weight, dim=1)
+        output = init_weight * self.v0 + output
+
+        if aux_values is not None:
+            aux_weight = weight / (1 - self.weight) * self.weight
+            aux_output = conv1d_fft(self.aux_dropout(aux_values), aux_weight)
+            output = output + aux_output
+
+        return output
+
+    def get_exponential_weight(self, T):
+        # Generate array [0, 1, ..., T-1]
+        powers = torch.arange(T, dtype=torch.float, device=self.weight.device)
+
+        # (1 - \alpha) * \alpha^t, for all t = T-1, T-2, ..., 0]
+        weight = (1 - self.weight) * (self.weight ** torch.flip(powers, dims=(0,)))
+
+        # \alpha^t for all t = 1, 2, ..., T
+        init_weight = self.weight ** (powers + 1)
+
+        return rearrange(init_weight, 'h t -> 1 t h 1'), \
+               rearrange(weight, 'h t -> 1 t h 1')
+
+    @property
+    def weight(self):
+        return torch.sigmoid(self._smoothing_weight)
+
+
+class Feedforward(nn.Module):
+    def __init__(self, d_model, dim_feedforward, dropout=0.1, activation='sigmoid'):
+        # Implementation of Feedforward model
+        super().__init__()
+        self.linear1 = nn.Linear(d_model, dim_feedforward, bias=False)
+        self.dropout1 = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model, bias=False)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = getattr(F, activation)
+
+    def forward(self, x):
+        x = self.linear2(self.dropout1(self.activation(self.linear1(x))))
+        return self.dropout2(x)
+
+
+class GrowthLayer(nn.Module):
+
+    def __init__(self, d_model, nhead, d_head=None, dropout=0.1):
+        super().__init__()
+        self.d_head = d_head or (d_model // nhead)
+        self.d_model = d_model
+        self.nhead = nhead
+
+        self.z0 = nn.Parameter(torch.randn(self.nhead, self.d_head))
+        self.in_proj = nn.Linear(self.d_model, self.d_head * self.nhead)
+        self.es = ExponentialSmoothing(self.d_head, self.nhead, dropout=dropout)
+        self.out_proj = nn.Linear(self.d_head * self.nhead, self.d_model)
+
+        assert self.d_head * self.nhead == self.d_model, "d_model must be divisible by nhead"
+
+    def forward(self, inputs):
+        """
+        :param inputs: shape: (batch, seq_len, dim)
+        :return: shape: (batch, seq_len, dim)
+        """
+        b, t, d = inputs.shape
+        values = self.in_proj(inputs).view(b, t, self.nhead, -1)
+        values = torch.cat([repeat(self.z0, 'h d -> b 1 h d', b=b), values], dim=1)
+        values = values[:, 1:] - values[:, :-1]
+        out = self.es(values)
+        out = torch.cat([repeat(self.es.v0, '1 1 h d -> b 1 h d', b=b), out], dim=1)
+        out = rearrange(out, 'b t h d -> b t (h d)')
+        return self.out_proj(out)
+
+
+class FourierLayer(nn.Module):
+
+    def __init__(self, d_model, pred_len, k=None, low_freq=1):
+        super().__init__()
+        self.d_model = d_model
+        self.pred_len = pred_len
+        self.k = k
+        self.low_freq = low_freq
+
+    def forward(self, x):
+        """x: (b, t, d)"""
+        b, t, d = x.shape
+        x_freq = fft.rfft(x, dim=1)
+
+        if t % 2 == 0:
+            x_freq = x_freq[:, self.low_freq:-1]
+            f = fft.rfftfreq(t)[self.low_freq:-1]
+        else:
+            x_freq = x_freq[:, self.low_freq:]
+            f = fft.rfftfreq(t)[self.low_freq:]
+
+        x_freq, index_tuple = self.topk_freq(x_freq)
+        f = repeat(f, 'f -> b f d', b=x_freq.size(0), d=x_freq.size(2))
+        f = rearrange(f[index_tuple], 'b f d -> b f () d').to(x_freq.device)
+
+        return self.extrapolate(x_freq, f, t)
+
+    def extrapolate(self, x_freq, f, t):
+        x_freq = torch.cat([x_freq, x_freq.conj()], dim=1)
+        f = torch.cat([f, -f], dim=1)
+        t_val = rearrange(torch.arange(t + self.pred_len, dtype=torch.float),
+                          't -> () () t ()').to(x_freq.device)
+
+        amp = rearrange(x_freq.abs() / t, 'b f d -> b f () d')
+        phase = rearrange(x_freq.angle(), 'b f d -> b f () d')
+
+        x_time = amp * torch.cos(2 * math.pi * f * t_val + phase)
+
+        return reduce(x_time, 'b f t d -> b t d', 'sum')
+
+    def topk_freq(self, x_freq):
+        values, indices = torch.topk(x_freq.abs(), self.k, dim=1, largest=True, sorted=True)
+        mesh_a, mesh_b = torch.meshgrid(torch.arange(x_freq.size(0)), torch.arange(x_freq.size(2)))
+        index_tuple = (mesh_a.unsqueeze(1).to(indices.device), indices, mesh_b.unsqueeze(1).to(indices.device))
+        x_freq = x_freq[index_tuple]
+
+        return x_freq, index_tuple
+
+
+class LevelLayer(nn.Module):
+
+    def __init__(self, d_model, c_out, dropout=0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.c_out = c_out
+
+        self.es = ExponentialSmoothing(1, self.c_out, dropout=dropout, aux=True)
+        self.growth_pred = nn.Linear(self.d_model, self.c_out)
+        self.season_pred = nn.Linear(self.d_model, self.c_out)
+
+    def forward(self, level, growth, season):
+        b, t, _ = level.shape
+        growth = self.growth_pred(growth).view(b, t, self.c_out, 1)
+        season = self.season_pred(season).view(b, t, self.c_out, 1)
+        growth = growth.view(b, t, self.c_out, 1)
+        season = season.view(b, t, self.c_out, 1)
+        level = level.view(b, t, self.c_out, 1)
+        out = self.es(level - season, aux_values=growth)
+        out = rearrange(out, 'b t h d -> b t (h d)')
+        return out
+
+
+class EncoderLayer(nn.Module):
+
+    def __init__(self, d_model, nhead, c_out, seq_len, pred_len, k, dim_feedforward=None, dropout=0.1,
+                 activation='sigmoid', layer_norm_eps=1e-5):
+        super().__init__()
+        self.d_model = d_model
+        self.nhead = nhead
+        self.c_out = c_out
+        self.seq_len = seq_len
+        self.pred_len = pred_len
+        dim_feedforward = dim_feedforward or 4 * d_model
+        self.dim_feedforward = dim_feedforward
+
+        self.growth_layer = GrowthLayer(d_model, nhead, dropout=dropout)
+        self.seasonal_layer = FourierLayer(d_model, pred_len, k=k)
+        self.level_layer = LevelLayer(d_model, c_out, dropout=dropout)
+
+        # Implementation of Feedforward model
+        self.ff = Feedforward(d_model, dim_feedforward, dropout=dropout, activation=activation)
+        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps)
+        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps)
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+    def forward(self, res, level, attn_mask=None):
+        season = self._season_block(res)
+        res = res - season[:, :-self.pred_len]
+        growth = self._growth_block(res)
+        res = self.norm1(res - growth[:, 1:])
+        res = self.norm2(res + self.ff(res))
+
+        level = self.level_layer(level, growth[:, :-1], season[:, :-self.pred_len])
+        return res, level, growth, season
+
+    def _growth_block(self, x):
+        x = self.growth_layer(x)
+        return self.dropout1(x)
+
+    def _season_block(self, x):
+        x = self.seasonal_layer(x)
+        return self.dropout2(x)
+
+
+class Encoder(nn.Module):
+
+    def __init__(self, layers):
+        super().__init__()
+        self.layers = nn.ModuleList(layers)
+
+    def forward(self, res, level, attn_mask=None):
+        growths = []
+        seasons = []
+        for layer in self.layers:
+            res, level, growth, season = layer(res, level, attn_mask=None)
+            growths.append(growth)
+            seasons.append(season)
+
+        return level, growths, seasons
+
+
+class DampingLayer(nn.Module):
+
+    def __init__(self, pred_len, nhead, dropout=0.1):
+        super().__init__()
+        self.pred_len = pred_len
+        self.nhead = nhead
+        self._damping_factor = nn.Parameter(torch.randn(1, nhead))
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        x = repeat(x, 'b 1 d -> b t d', t=self.pred_len)
+        b, t, d = x.shape
+
+        powers = torch.arange(self.pred_len).to(self._damping_factor.device) + 1
+        powers = powers.view(self.pred_len, 1)
+        damping_factors = self.damping_factor ** powers
+        damping_factors = damping_factors.cumsum(dim=0)
+        x = x.view(b, t, self.nhead, -1)
+        x = self.dropout(x) * damping_factors.unsqueeze(-1)
+        return x.view(b, t, d)
+
+    @property
+    def damping_factor(self):
+        return torch.sigmoid(self._damping_factor)
+
+
+class DecoderLayer(nn.Module):
+
+    def __init__(self, d_model, nhead, c_out, pred_len, dropout=0.1):
+        super().__init__()
+        self.d_model = d_model
+        self.nhead = nhead
+        self.c_out = c_out
+        self.pred_len = pred_len
+
+        self.growth_damping = DampingLayer(pred_len, nhead, dropout=dropout)
+        self.dropout1 = nn.Dropout(dropout)
+
+    def forward(self, growth, season):
+        growth_horizon = self.growth_damping(growth[:, -1:])
+        growth_horizon = self.dropout1(growth_horizon)
+
+        seasonal_horizon = season[:, -self.pred_len:]
+        return growth_horizon, seasonal_horizon
+
+
+class Decoder(nn.Module):
+
+    def __init__(self, layers):
+        super().__init__()
+        self.d_model = layers[0].d_model
+        self.c_out = layers[0].c_out
+        self.pred_len = layers[0].pred_len
+        self.nhead = layers[0].nhead
+
+        self.layers = nn.ModuleList(layers)
+        self.pred = nn.Linear(self.d_model, self.c_out)
+
+    def forward(self, growths, seasons):
+        growth_repr = []
+        season_repr = []
+
+        for idx, layer in enumerate(self.layers):
+            growth_horizon, season_horizon = layer(growths[idx], seasons[idx])
+            growth_repr.append(growth_horizon)
+            season_repr.append(season_horizon)
+        growth_repr = sum(growth_repr)
+        season_repr = sum(season_repr)
+        return self.pred(growth_repr), self.pred(season_repr)
diff --git a/layers/Embed.py b/layers/Embed.py
new file mode 100644
index 0000000..977e255
--- /dev/null
+++ b/layers/Embed.py
@@ -0,0 +1,190 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm
+import math
+
+
+class PositionalEmbedding(nn.Module):
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space.
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+
+        pe = pe.unsqueeze(0)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        return self.pe[:, :x.size(1)]
+
+
+class TokenEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
+                                   kernel_size=3, padding=padding, padding_mode='circular', bias=False)
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode='fan_in', nonlinearity='leaky_relu')
+
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+
+
+class FixedEmbedding(nn.Module):
+    def __init__(self, c_in, d_model):
+        super(FixedEmbedding, self).__init__()
+
+        w = torch.zeros(c_in, d_model).float()
+        w.require_grad = False
+
+        position = torch.arange(0, c_in).float().unsqueeze(1)
+        div_term = (torch.arange(0, d_model, 2).float()
+                    * -(math.log(10000.0) / d_model)).exp()
+
+        w[:, 0::2] = torch.sin(position * div_term)
+        w[:, 1::2] = torch.cos(position * div_term)
+
+        self.emb = nn.Embedding(c_in, d_model)
+        self.emb.weight = nn.Parameter(w, requires_grad=False)
+
+    def forward(self, x):
+        return self.emb(x).detach()
+
+
+class TemporalEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='fixed', freq='h'):
+        super(TemporalEmbedding, self).__init__()
+
+        minute_size = 4
+        hour_size = 24
+        weekday_size = 7
+        day_size = 32
+        month_size = 13
+
+        Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
+        if freq == 't':
+            self.minute_embed = Embed(minute_size, d_model)
+        self.hour_embed = Embed(hour_size, d_model)
+        self.weekday_embed = Embed(weekday_size, d_model)
+        self.day_embed = Embed(day_size, d_model)
+        self.month_embed = Embed(month_size, d_model)
+
+    def forward(self, x):
+        x = x.long()
+        minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
+            self, 'minute_embed') else 0.
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+
+        return hour_x + weekday_x + day_x + month_x + minute_x
+
+
+class TimeFeatureEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='timeF', freq='h'):
+        super(TimeFeatureEmbedding, self).__init__()
+
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        d_inp = freq_map[freq]
+        self.embed = nn.Linear(d_inp, d_model, bias=False)
+
+    def forward(self, x):
+        return self.embed(x)
+
+
+class DataEmbedding(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x) + self.position_embedding(x)
+        else:
+            x = self.value_embedding(
+                x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
+        return self.dropout(x)
+
+
+class DataEmbedding_inverted(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_inverted, self).__init__()
+        self.value_embedding = nn.Linear(c_in, d_model)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        x = x.permute(0, 2, 1)
+        # x: [Batch Variate Time]
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
+        # x: [Batch Variate d_model]
+        return self.dropout(x)
+
+
+class DataEmbedding_wo_pos(nn.Module):
+    def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
+        super(DataEmbedding_wo_pos, self).__init__()
+
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
+                                                    freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
+            d_model=d_model, embed_type=embed_type, freq=freq)
+        self.dropout = nn.Dropout(p=dropout)
+
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(x) + self.temporal_embedding(x_mark)
+        return self.dropout(x)
+
+
+class PatchEmbedding(nn.Module):
+    def __init__(self, d_model, patch_len, stride, padding, dropout):
+        super(PatchEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
+
+        # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
+        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
+
+        # Positional embedding
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x), n_vars
diff --git a/layers/FourierCorrelation.py b/layers/FourierCorrelation.py
new file mode 100644
index 0000000..6a8cbd4
--- /dev/null
+++ b/layers/FourierCorrelation.py
@@ -0,0 +1,162 @@
+# coding=utf-8
+# author=maziqing
+# email=maziqing.mzq@alibaba-inc.com
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+
+def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
+    """
+    get modes on frequency domain:
+    'random' means sampling randomly;
+    'else' means sampling the lowest modes;
+    """
+    modes = min(modes, seq_len // 2)
+    if mode_select_method == 'random':
+        index = list(range(0, seq_len // 2))
+        np.random.shuffle(index)
+        index = index[:modes]
+    else:
+        index = list(range(0, modes))
+    index.sort()
+    return index
+
+
+# ########## fourier layer #############
+class FourierBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, n_heads, seq_len, modes=0, mode_select_method='random'):
+        super(FourierBlock, self).__init__()
+        print('fourier enhanced block used!')
+        """
+        1D Fourier block. It performs representation learning on frequency domain, 
+        it does FFT, linear transform, and Inverse FFT.    
+        """
+        # get modes on frequency domain
+        self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method)
+        print('modes={}, index={}'.format(modes, self.index))
+
+        self.n_heads = n_heads
+        self.scale = (1 / (in_channels * out_channels))
+        self.weights1 = nn.Parameter(
+            self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads,
+                                    len(self.index), dtype=torch.float))
+        self.weights2 = nn.Parameter(
+            self.scale * torch.rand(self.n_heads, in_channels // self.n_heads, out_channels // self.n_heads,
+                                    len(self.index), dtype=torch.float))
+
+    # Complex multiplication
+    def compl_mul1d(self, order, x, weights):
+        x_flag = True
+        w_flag = True
+        if not torch.is_complex(x):
+            x_flag = False
+            x = torch.complex(x, torch.zeros_like(x).to(x.device))
+        if not torch.is_complex(weights):
+            w_flag = False
+            weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
+        if x_flag or w_flag:
+            return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
+                                 torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
+        else:
+            return torch.einsum(order, x.real, weights.real)
+
+    def forward(self, q, k, v, mask):
+        # size = [B, L, H, E]
+        B, L, H, E = q.shape
+        x = q.permute(0, 2, 3, 1)
+        # Compute Fourier coefficients
+        x_ft = torch.fft.rfft(x, dim=-1)
+        # Perform Fourier neural operations
+        out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
+        for wi, i in enumerate(self.index):
+            if i >= x_ft.shape[3] or wi >= out_ft.shape[3]:
+                continue
+            out_ft[:, :, :, wi] = self.compl_mul1d("bhi,hio->bho", x_ft[:, :, :, i],
+                                                   torch.complex(self.weights1, self.weights2)[:, :, :, wi])
+        # Return to time domain
+        x = torch.fft.irfft(out_ft, n=x.size(-1))
+        return (x, None)
+
+# ########## Fourier Cross Former ####################
+class FourierCrossAttention(nn.Module):
+    def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random',
+                 activation='tanh', policy=0, num_heads=8):
+        super(FourierCrossAttention, self).__init__()
+        print(' fourier enhanced cross attention used!')
+        """
+        1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.    
+        """
+        self.activation = activation
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        # get modes for queries and keys (& values) on frequency domain
+        self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method)
+        self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method)
+
+        print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q))
+        print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv))
+
+        self.scale = (1 / (in_channels * out_channels))
+        self.weights1 = nn.Parameter(
+            self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
+        self.weights2 = nn.Parameter(
+            self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
+
+    # Complex multiplication
+    def compl_mul1d(self, order, x, weights):
+        x_flag = True
+        w_flag = True
+        if not torch.is_complex(x):
+            x_flag = False
+            x = torch.complex(x, torch.zeros_like(x).to(x.device))
+        if not torch.is_complex(weights):
+            w_flag = False
+            weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
+        if x_flag or w_flag:
+            return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
+                                 torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
+        else:
+            return torch.einsum(order, x.real, weights.real)
+
+    def forward(self, q, k, v, mask):
+        # size = [B, L, H, E]
+        B, L, H, E = q.shape
+        xq = q.permute(0, 2, 3, 1)  # size = [B, H, E, L]
+        xk = k.permute(0, 2, 3, 1)
+        xv = v.permute(0, 2, 3, 1)
+
+        # Compute Fourier coefficients
+        xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
+        xq_ft = torch.fft.rfft(xq, dim=-1)
+        for i, j in enumerate(self.index_q):
+            if j >= xq_ft.shape[3]:
+                continue
+            xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
+        xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat)
+        xk_ft = torch.fft.rfft(xk, dim=-1)
+        for i, j in enumerate(self.index_kv):
+            if j >= xk_ft.shape[3]:
+                continue
+            xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
+
+        # perform attention mechanism on frequency domain
+        xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_))
+        if self.activation == 'tanh':
+            xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh())
+        elif self.activation == 'softmax':
+            xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
+            xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
+        else:
+            raise Exception('{} actiation function is not implemented'.format(self.activation))
+        xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_)
+        xqkvw = self.compl_mul1d("bhex,heox->bhox", xqkv_ft, torch.complex(self.weights1, self.weights2))
+        out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
+        for i, j in enumerate(self.index_q):
+            if i >= xqkvw.shape[3] or j >= out_ft.shape[3]:
+                continue
+            out_ft[:, :, :, j] = xqkvw[:, :, :, i]
+        # Return to time domain
+        out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1))
+        return (out, None)
diff --git a/layers/GraphMixer.py b/layers/GraphMixer.py
new file mode 100644
index 0000000..c900805
--- /dev/null
+++ b/layers/GraphMixer.py
@@ -0,0 +1,83 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+class HierarchicalGraphMixer(nn.Module):
+    """
+    分层图混合器，同时考虑宏观通道关系和微观 Patch 级别注意力。
+    输入  z : 形状为 [B, C, N, D] 的张量
+    输出  z_out : 形状同输入
+    """
+    def __init__(self, n_channel: int, dim: int, k: int = 5, tau: float = 0.2):
+        super().__init__()
+        self.k = k
+        self.tau = tau
+        
+        # Level 1: Channel Graph
+        self.A = nn.Parameter(torch.zeros(n_channel, n_channel))
+        self.se = nn.Sequential(
+            nn.Linear(dim, dim // 4, bias=False), nn.ReLU(),
+            nn.Linear(dim // 4, 1, bias=False), nn.Sigmoid()
+        )
+        
+        # Level 2: Patch Cross-Attention
+        self.q_proj = nn.Linear(dim, dim)
+        self.k_proj = nn.Linear(dim, dim)
+        self.v_proj = nn.Linear(dim, dim)
+        self.out_proj = nn.Linear(dim, dim)
+        self.norm = nn.LayerNorm(dim)
+
+    def _row_sparse(self, logits: torch.Tensor) -> torch.Tensor:
+        """Gumbel-Softmax based sparse attention"""
+        g = -torch.empty_like(logits).exponential_().log()
+        y = (logits + g) / self.tau
+        probs = F.softmax(y, dim=-1)
+        
+        # Ensure k doesn't exceed the dimension size
+        k_actual = min(self.k, probs.size(-1))
+        if k_actual <= 0:
+            return torch.zeros_like(probs)
+            
+        topk_val, _ = torch.topk(probs, k_actual, dim=-1)
+        thr = topk_val[..., -1].unsqueeze(-1)
+        sparse = torch.where(probs >= thr, probs, torch.zeros_like(probs))
+        return sparse.detach() + probs - probs.detach()
+
+    def forward(self, z):
+        # z 的形状: [B, C, N, D]
+        B, C, N, D = z.shape
+
+        # --- Level 1: 计算宏观权重 ---
+        A_sparse = self._row_sparse(self.A) # 通道连接稀疏图 A_sparse: [C, C]
+
+        # --- Level 2: 跨通道 Patch 交互 ---
+        out_z = torch.zeros_like(z)
+        for i in range(C):  # 遍历每个目标通道 i
+            target_z = z[:, i, :, :]  # [B, N, D]
+            
+            # 准备聚合来自其他通道的 patch 级别上下文
+            aggregated_context = torch.zeros_like(target_z)
+            
+            for j in range(C):  # 遍历每个源通道 j
+                if A_sparse[i, j] != 0:
+                    source_z = z[:, j, :, :] # [B, N, D]
+
+                    # --- 执行交叉注意力 ---
+                    Q = self.q_proj(target_z)      # Query 来自目标通道 i
+                    K = self.k_proj(source_z)      # Key 来自源通道 j
+                    V = self.v_proj(source_z)      # Value 来自源通道 j
+                    
+                    attn_scores = torch.bmm(Q, K.transpose(1, 2)) / math.sqrt(D)
+                    attn_probs = F.softmax(attn_scores, dim=-1) # [B, N, N]
+                    
+                    context = torch.bmm(attn_probs, V) # [B, N, D], 从 j 聚合到 i 的上下文
+                    
+                    # 加权上下文
+                    weighted_context =  A_sparse[i, j] * context
+                    aggregated_context = aggregated_context + weighted_context
+
+            # 将聚合后的上下文通过输出层，并与原始目标表示相加（残差连接）
+            out_z[:, i, :, :] = self.norm(target_z + self.out_proj(aggregated_context))
+            
+        return out_z
\ No newline at end of file
diff --git a/layers/MultiWaveletCorrelation.py b/layers/MultiWaveletCorrelation.py
new file mode 100644
index 0000000..b6feb59
--- /dev/null
+++ b/layers/MultiWaveletCorrelation.py
@@ -0,0 +1,587 @@
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from torch import Tensor
+from typing import List, Tuple
+import math
+from functools import partial
+from torch import nn, einsum, diagonal
+from math import log2, ceil
+import pdb
+from sympy import Poly, legendre, Symbol, chebyshevt
+from scipy.special import eval_legendre
+
+
+def legendreDer(k, x):
+    def _legendre(k, x):
+        return (2 * k + 1) * eval_legendre(k, x)
+
+    out = 0
+    for i in np.arange(k - 1, -1, -2):
+        out += _legendre(i, x)
+    return out
+
+
+def phi_(phi_c, x, lb=0, ub=1):
+    mask = np.logical_or(x < lb, x > ub) * 1.0
+    return np.polynomial.polynomial.Polynomial(phi_c)(x) * (1 - mask)
+
+
+def get_phi_psi(k, base):
+    x = Symbol('x')
+    phi_coeff = np.zeros((k, k))
+    phi_2x_coeff = np.zeros((k, k))
+    if base == 'legendre':
+        for ki in range(k):
+            coeff_ = Poly(legendre(ki, 2 * x - 1), x).all_coeffs()
+            phi_coeff[ki, :ki + 1] = np.flip(np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64))
+            coeff_ = Poly(legendre(ki, 4 * x - 1), x).all_coeffs()
+            phi_2x_coeff[ki, :ki + 1] = np.flip(np.sqrt(2) * np.sqrt(2 * ki + 1) * np.array(coeff_).astype(np.float64))
+
+        psi1_coeff = np.zeros((k, k))
+        psi2_coeff = np.zeros((k, k))
+        for ki in range(k):
+            psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
+            for i in range(k):
+                a = phi_2x_coeff[ki, :ki + 1]
+                b = phi_coeff[i, :i + 1]
+                prod_ = np.convolve(a, b)
+                prod_[np.abs(prod_) < 1e-8] = 0
+                proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
+                psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+                psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+            for j in range(ki):
+                a = phi_2x_coeff[ki, :ki + 1]
+                b = psi1_coeff[j, :]
+                prod_ = np.convolve(a, b)
+                prod_[np.abs(prod_) < 1e-8] = 0
+                proj_ = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
+                psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
+                psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
+
+            a = psi1_coeff[ki, :]
+            prod_ = np.convolve(a, a)
+            prod_[np.abs(prod_) < 1e-8] = 0
+            norm1 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * np.power(0.5, 1 + np.arange(len(prod_)))).sum()
+
+            a = psi2_coeff[ki, :]
+            prod_ = np.convolve(a, a)
+            prod_[np.abs(prod_) < 1e-8] = 0
+            norm2 = (prod_ * 1 / (np.arange(len(prod_)) + 1) * (1 - np.power(0.5, 1 + np.arange(len(prod_))))).sum()
+            norm_ = np.sqrt(norm1 + norm2)
+            psi1_coeff[ki, :] /= norm_
+            psi2_coeff[ki, :] /= norm_
+            psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
+            psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
+
+        phi = [np.poly1d(np.flip(phi_coeff[i, :])) for i in range(k)]
+        psi1 = [np.poly1d(np.flip(psi1_coeff[i, :])) for i in range(k)]
+        psi2 = [np.poly1d(np.flip(psi2_coeff[i, :])) for i in range(k)]
+
+    elif base == 'chebyshev':
+        for ki in range(k):
+            if ki == 0:
+                phi_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi)
+                phi_2x_coeff[ki, :ki + 1] = np.sqrt(2 / np.pi) * np.sqrt(2)
+            else:
+                coeff_ = Poly(chebyshevt(ki, 2 * x - 1), x).all_coeffs()
+                phi_coeff[ki, :ki + 1] = np.flip(2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
+                coeff_ = Poly(chebyshevt(ki, 4 * x - 1), x).all_coeffs()
+                phi_2x_coeff[ki, :ki + 1] = np.flip(
+                    np.sqrt(2) * 2 / np.sqrt(np.pi) * np.array(coeff_).astype(np.float64))
+
+        phi = [partial(phi_, phi_coeff[i, :]) for i in range(k)]
+
+        x = Symbol('x')
+        kUse = 2 * k
+        roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
+        x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
+        # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
+        # not needed for our purpose here, we use even k always to avoid
+        wm = np.pi / kUse / 2
+
+        psi1_coeff = np.zeros((k, k))
+        psi2_coeff = np.zeros((k, k))
+
+        psi1 = [[] for _ in range(k)]
+        psi2 = [[] for _ in range(k)]
+
+        for ki in range(k):
+            psi1_coeff[ki, :] = phi_2x_coeff[ki, :]
+            for i in range(k):
+                proj_ = (wm * phi[i](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
+                psi1_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+                psi2_coeff[ki, :] -= proj_ * phi_coeff[i, :]
+
+            for j in range(ki):
+                proj_ = (wm * psi1[j](x_m) * np.sqrt(2) * phi[ki](2 * x_m)).sum()
+                psi1_coeff[ki, :] -= proj_ * psi1_coeff[j, :]
+                psi2_coeff[ki, :] -= proj_ * psi2_coeff[j, :]
+
+            psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5)
+            psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5, ub=1)
+
+            norm1 = (wm * psi1[ki](x_m) * psi1[ki](x_m)).sum()
+            norm2 = (wm * psi2[ki](x_m) * psi2[ki](x_m)).sum()
+
+            norm_ = np.sqrt(norm1 + norm2)
+            psi1_coeff[ki, :] /= norm_
+            psi2_coeff[ki, :] /= norm_
+            psi1_coeff[np.abs(psi1_coeff) < 1e-8] = 0
+            psi2_coeff[np.abs(psi2_coeff) < 1e-8] = 0
+
+            psi1[ki] = partial(phi_, psi1_coeff[ki, :], lb=0, ub=0.5 + 1e-16)
+            psi2[ki] = partial(phi_, psi2_coeff[ki, :], lb=0.5 + 1e-16, ub=1)
+
+    return phi, psi1, psi2
+
+
+def get_filter(base, k):
+    def psi(psi1, psi2, i, inp):
+        mask = (inp <= 0.5) * 1.0
+        return psi1[i](inp) * mask + psi2[i](inp) * (1 - mask)
+
+    if base not in ['legendre', 'chebyshev']:
+        raise Exception('Base not supported')
+
+    x = Symbol('x')
+    H0 = np.zeros((k, k))
+    H1 = np.zeros((k, k))
+    G0 = np.zeros((k, k))
+    G1 = np.zeros((k, k))
+    PHI0 = np.zeros((k, k))
+    PHI1 = np.zeros((k, k))
+    phi, psi1, psi2 = get_phi_psi(k, base)
+    if base == 'legendre':
+        roots = Poly(legendre(k, 2 * x - 1)).all_roots()
+        x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
+        wm = 1 / k / legendreDer(k, 2 * x_m - 1) / eval_legendre(k - 1, 2 * x_m - 1)
+
+        for ki in range(k):
+            for kpi in range(k):
+                H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
+                G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
+                H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
+                G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
+
+        PHI0 = np.eye(k)
+        PHI1 = np.eye(k)
+
+    elif base == 'chebyshev':
+        x = Symbol('x')
+        kUse = 2 * k
+        roots = Poly(chebyshevt(kUse, 2 * x - 1)).all_roots()
+        x_m = np.array([rt.evalf(20) for rt in roots]).astype(np.float64)
+        # x_m[x_m==0.5] = 0.5 + 1e-8 # add small noise to avoid the case of 0.5 belonging to both phi(2x) and phi(2x-1)
+        # not needed for our purpose here, we use even k always to avoid
+        wm = np.pi / kUse / 2
+
+        for ki in range(k):
+            for kpi in range(k):
+                H0[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki](x_m / 2) * phi[kpi](x_m)).sum()
+                G0[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, x_m / 2) * phi[kpi](x_m)).sum()
+                H1[ki, kpi] = 1 / np.sqrt(2) * (wm * phi[ki]((x_m + 1) / 2) * phi[kpi](x_m)).sum()
+                G1[ki, kpi] = 1 / np.sqrt(2) * (wm * psi(psi1, psi2, ki, (x_m + 1) / 2) * phi[kpi](x_m)).sum()
+
+                PHI0[ki, kpi] = (wm * phi[ki](2 * x_m) * phi[kpi](2 * x_m)).sum() * 2
+                PHI1[ki, kpi] = (wm * phi[ki](2 * x_m - 1) * phi[kpi](2 * x_m - 1)).sum() * 2
+
+        PHI0[np.abs(PHI0) < 1e-8] = 0
+        PHI1[np.abs(PHI1) < 1e-8] = 0
+
+    H0[np.abs(H0) < 1e-8] = 0
+    H1[np.abs(H1) < 1e-8] = 0
+    G0[np.abs(G0) < 1e-8] = 0
+    G1[np.abs(G1) < 1e-8] = 0
+
+    return H0, H1, G0, G1, PHI0, PHI1
+
+
+class MultiWaveletTransform(nn.Module):
+    """
+    1D multiwavelet block.
+    """
+
+    def __init__(self, ich=1, k=8, alpha=16, c=128,
+                 nCZ=1, L=0, base='legendre', attention_dropout=0.1):
+        super(MultiWaveletTransform, self).__init__()
+        print('base', base)
+        self.k = k
+        self.c = c
+        self.L = L
+        self.nCZ = nCZ
+        self.Lk0 = nn.Linear(ich, c * k)
+        self.Lk1 = nn.Linear(c * k, ich)
+        self.ich = ich
+        self.MWT_CZ = nn.ModuleList(MWT_CZ1d(k, alpha, L, c, base) for i in range(nCZ))
+
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        if L > S:
+            zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
+            values = torch.cat([values, zeros], dim=1)
+            keys = torch.cat([keys, zeros], dim=1)
+        else:
+            values = values[:, :L, :, :]
+            keys = keys[:, :L, :, :]
+        values = values.view(B, L, -1)
+
+        V = self.Lk0(values).view(B, L, self.c, -1)
+        for i in range(self.nCZ):
+            V = self.MWT_CZ[i](V)
+            if i < self.nCZ - 1:
+                V = F.relu(V)
+
+        V = self.Lk1(V.view(B, L, -1))
+        V = V.view(B, L, -1, D)
+        return (V.contiguous(), None)
+
+
+class MultiWaveletCross(nn.Module):
+    """
+    1D Multiwavelet Cross Attention layer.
+    """
+
+    def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes, c=64,
+                 k=8, ich=512,
+                 L=0,
+                 base='legendre',
+                 mode_select_method='random',
+                 initializer=None, activation='tanh',
+                 **kwargs):
+        super(MultiWaveletCross, self).__init__()
+        print('base', base)
+
+        self.c = c
+        self.k = k
+        self.L = L
+        H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k)
+        H0r = H0 @ PHI0
+        G0r = G0 @ PHI0
+        H1r = H1 @ PHI1
+        G1r = G1 @ PHI1
+
+        H0r[np.abs(H0r) < 1e-8] = 0
+        H1r[np.abs(H1r) < 1e-8] = 0
+        G0r[np.abs(G0r) < 1e-8] = 0
+        G1r[np.abs(G1r) < 1e-8] = 0
+        self.max_item = 3
+
+        self.attn1 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
+                                            seq_len_kv=seq_len_kv, modes=modes, activation=activation,
+                                            mode_select_method=mode_select_method)
+        self.attn2 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
+                                            seq_len_kv=seq_len_kv, modes=modes, activation=activation,
+                                            mode_select_method=mode_select_method)
+        self.attn3 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
+                                            seq_len_kv=seq_len_kv, modes=modes, activation=activation,
+                                            mode_select_method=mode_select_method)
+        self.attn4 = FourierCrossAttentionW(in_channels=in_channels, out_channels=out_channels, seq_len_q=seq_len_q,
+                                            seq_len_kv=seq_len_kv, modes=modes, activation=activation,
+                                            mode_select_method=mode_select_method)
+        self.T0 = nn.Linear(k, k)
+        self.register_buffer('ec_s', torch.Tensor(
+            np.concatenate((H0.T, H1.T), axis=0)))
+        self.register_buffer('ec_d', torch.Tensor(
+            np.concatenate((G0.T, G1.T), axis=0)))
+
+        self.register_buffer('rc_e', torch.Tensor(
+            np.concatenate((H0r, G0r), axis=0)))
+        self.register_buffer('rc_o', torch.Tensor(
+            np.concatenate((H1r, G1r), axis=0)))
+
+        self.Lk = nn.Linear(ich, c * k)
+        self.Lq = nn.Linear(ich, c * k)
+        self.Lv = nn.Linear(ich, c * k)
+        self.out = nn.Linear(c * k, ich)
+        self.modes1 = modes
+
+    def forward(self, q, k, v, mask=None):
+        B, N, H, E = q.shape  # (B, N, H, E) torch.Size([3, 768, 8, 2])
+        _, S, _, _ = k.shape  # (B, S, H, E) torch.Size([3, 96, 8, 2])
+
+        q = q.view(q.shape[0], q.shape[1], -1)
+        k = k.view(k.shape[0], k.shape[1], -1)
+        v = v.view(v.shape[0], v.shape[1], -1)
+        q = self.Lq(q)
+        q = q.view(q.shape[0], q.shape[1], self.c, self.k)
+        k = self.Lk(k)
+        k = k.view(k.shape[0], k.shape[1], self.c, self.k)
+        v = self.Lv(v)
+        v = v.view(v.shape[0], v.shape[1], self.c, self.k)
+
+        if N > S:
+            zeros = torch.zeros_like(q[:, :(N - S), :]).float()
+            v = torch.cat([v, zeros], dim=1)
+            k = torch.cat([k, zeros], dim=1)
+        else:
+            v = v[:, :N, :, :]
+            k = k[:, :N, :, :]
+
+        ns = math.floor(np.log2(N))
+        nl = pow(2, math.ceil(np.log2(N)))
+        extra_q = q[:, 0:nl - N, :, :]
+        extra_k = k[:, 0:nl - N, :, :]
+        extra_v = v[:, 0:nl - N, :, :]
+        q = torch.cat([q, extra_q], 1)
+        k = torch.cat([k, extra_k], 1)
+        v = torch.cat([v, extra_v], 1)
+
+        Ud_q = torch.jit.annotate(List[Tuple[Tensor]], [])
+        Ud_k = torch.jit.annotate(List[Tuple[Tensor]], [])
+        Ud_v = torch.jit.annotate(List[Tuple[Tensor]], [])
+
+        Us_q = torch.jit.annotate(List[Tensor], [])
+        Us_k = torch.jit.annotate(List[Tensor], [])
+        Us_v = torch.jit.annotate(List[Tensor], [])
+
+        Ud = torch.jit.annotate(List[Tensor], [])
+        Us = torch.jit.annotate(List[Tensor], [])
+
+        # decompose
+        for i in range(ns - self.L):
+            # print('q shape',q.shape)
+            d, q = self.wavelet_transform(q)
+            Ud_q += [tuple([d, q])]
+            Us_q += [d]
+        for i in range(ns - self.L):
+            d, k = self.wavelet_transform(k)
+            Ud_k += [tuple([d, k])]
+            Us_k += [d]
+        for i in range(ns - self.L):
+            d, v = self.wavelet_transform(v)
+            Ud_v += [tuple([d, v])]
+            Us_v += [d]
+        for i in range(ns - self.L):
+            dk, sk = Ud_k[i], Us_k[i]
+            dq, sq = Ud_q[i], Us_q[i]
+            dv, sv = Ud_v[i], Us_v[i]
+            Ud += [self.attn1(dq[0], dk[0], dv[0], mask)[0] + self.attn2(dq[1], dk[1], dv[1], mask)[0]]
+            Us += [self.attn3(sq, sk, sv, mask)[0]]
+        v = self.attn4(q, k, v, mask)[0]
+
+        # reconstruct
+        for i in range(ns - 1 - self.L, -1, -1):
+            v = v + Us[i]
+            v = torch.cat((v, Ud[i]), -1)
+            v = self.evenOdd(v)
+        v = self.out(v[:, :N, :, :].contiguous().view(B, N, -1))
+        return (v.contiguous(), None)
+
+    def wavelet_transform(self, x):
+        xa = torch.cat([x[:, ::2, :, :],
+                        x[:, 1::2, :, :],
+                        ], -1)
+        d = torch.matmul(xa, self.ec_d)
+        s = torch.matmul(xa, self.ec_s)
+        return d, s
+
+    def evenOdd(self, x):
+        B, N, c, ich = x.shape  # (B, N, c, k)
+        assert ich == 2 * self.k
+        x_e = torch.matmul(x, self.rc_e)
+        x_o = torch.matmul(x, self.rc_o)
+
+        x = torch.zeros(B, N * 2, c, self.k,
+                        device=x.device)
+        x[..., ::2, :, :] = x_e
+        x[..., 1::2, :, :] = x_o
+        return x
+
+
+class FourierCrossAttentionW(nn.Module):
+    def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=16, activation='tanh',
+                 mode_select_method='random'):
+        super(FourierCrossAttentionW, self).__init__()
+        print('corss fourier correlation used!')
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.modes1 = modes
+        self.activation = activation
+
+    def compl_mul1d(self, order, x, weights):
+        x_flag = True
+        w_flag = True
+        if not torch.is_complex(x):
+            x_flag = False
+            x = torch.complex(x, torch.zeros_like(x).to(x.device))
+        if not torch.is_complex(weights):
+            w_flag = False
+            weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
+        if x_flag or w_flag:
+            return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
+                                 torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
+        else:
+            return torch.einsum(order, x.real, weights.real)
+
+    def forward(self, q, k, v, mask):
+        B, L, E, H = q.shape
+
+        xq = q.permute(0, 3, 2, 1)  # size = [B, H, E, L] torch.Size([3, 8, 64, 512])
+        xk = k.permute(0, 3, 2, 1)
+        xv = v.permute(0, 3, 2, 1)
+        self.index_q = list(range(0, min(int(L // 2), self.modes1)))
+        self.index_k_v = list(range(0, min(int(xv.shape[3] // 2), self.modes1)))
+
+        # Compute Fourier coefficients
+        xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
+        xq_ft = torch.fft.rfft(xq, dim=-1)
+        for i, j in enumerate(self.index_q):
+            xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
+
+        xk_ft_ = torch.zeros(B, H, E, len(self.index_k_v), device=xq.device, dtype=torch.cfloat)
+        xk_ft = torch.fft.rfft(xk, dim=-1)
+        for i, j in enumerate(self.index_k_v):
+            xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
+        xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_))
+        if self.activation == 'tanh':
+            xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh())
+        elif self.activation == 'softmax':
+            xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
+            xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
+        else:
+            raise Exception('{} actiation function is not implemented'.format(self.activation))
+        xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_)
+
+        xqkvw = xqkv_ft
+        out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
+        for i, j in enumerate(self.index_q):
+            out_ft[:, :, :, j] = xqkvw[:, :, :, i]
+
+        out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)).permute(0, 3, 2, 1)
+        # size = [B, L, H, E]
+        return (out, None)
+
+
+class sparseKernelFT1d(nn.Module):
+    def __init__(self,
+                 k, alpha, c=1,
+                 nl=1,
+                 initializer=None,
+                 **kwargs):
+        super(sparseKernelFT1d, self).__init__()
+
+        self.modes1 = alpha
+        self.scale = (1 / (c * k * c * k))
+        self.weights1 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float))
+        self.weights2 = nn.Parameter(self.scale * torch.rand(c * k, c * k, self.modes1, dtype=torch.float))
+        self.weights1.requires_grad = True
+        self.weights2.requires_grad = True
+        self.k = k
+
+    def compl_mul1d(self, order, x, weights):
+        x_flag = True
+        w_flag = True
+        if not torch.is_complex(x):
+            x_flag = False
+            x = torch.complex(x, torch.zeros_like(x).to(x.device))
+        if not torch.is_complex(weights):
+            w_flag = False
+            weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
+        if x_flag or w_flag:
+            return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
+                                 torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
+        else:
+            return torch.einsum(order, x.real, weights.real)
+
+    def forward(self, x):
+        B, N, c, k = x.shape  # (B, N, c, k)
+
+        x = x.view(B, N, -1)
+        x = x.permute(0, 2, 1)
+        x_fft = torch.fft.rfft(x)
+        # Multiply relevant Fourier modes
+        l = min(self.modes1, N // 2 + 1)
+        out_ft = torch.zeros(B, c * k, N // 2 + 1, device=x.device, dtype=torch.cfloat)
+        out_ft[:, :, :l] = self.compl_mul1d("bix,iox->box", x_fft[:, :, :l],
+                                            torch.complex(self.weights1, self.weights2)[:, :, :l])
+        x = torch.fft.irfft(out_ft, n=N)
+        x = x.permute(0, 2, 1).view(B, N, c, k)
+        return x
+
+
+# ##
+class MWT_CZ1d(nn.Module):
+    def __init__(self,
+                 k=3, alpha=64,
+                 L=0, c=1,
+                 base='legendre',
+                 initializer=None,
+                 **kwargs):
+        super(MWT_CZ1d, self).__init__()
+
+        self.k = k
+        self.L = L
+        H0, H1, G0, G1, PHI0, PHI1 = get_filter(base, k)
+        H0r = H0 @ PHI0
+        G0r = G0 @ PHI0
+        H1r = H1 @ PHI1
+        G1r = G1 @ PHI1
+
+        H0r[np.abs(H0r) < 1e-8] = 0
+        H1r[np.abs(H1r) < 1e-8] = 0
+        G0r[np.abs(G0r) < 1e-8] = 0
+        G1r[np.abs(G1r) < 1e-8] = 0
+        self.max_item = 3
+
+        self.A = sparseKernelFT1d(k, alpha, c)
+        self.B = sparseKernelFT1d(k, alpha, c)
+        self.C = sparseKernelFT1d(k, alpha, c)
+
+        self.T0 = nn.Linear(k, k)
+
+        self.register_buffer('ec_s', torch.Tensor(
+            np.concatenate((H0.T, H1.T), axis=0)))
+        self.register_buffer('ec_d', torch.Tensor(
+            np.concatenate((G0.T, G1.T), axis=0)))
+
+        self.register_buffer('rc_e', torch.Tensor(
+            np.concatenate((H0r, G0r), axis=0)))
+        self.register_buffer('rc_o', torch.Tensor(
+            np.concatenate((H1r, G1r), axis=0)))
+
+    def forward(self, x):
+        B, N, c, k = x.shape  # (B, N, k)
+        ns = math.floor(np.log2(N))
+        nl = pow(2, math.ceil(np.log2(N)))
+        extra_x = x[:, 0:nl - N, :, :]
+        x = torch.cat([x, extra_x], 1)
+        Ud = torch.jit.annotate(List[Tensor], [])
+        Us = torch.jit.annotate(List[Tensor], [])
+        for i in range(ns - self.L):
+            d, x = self.wavelet_transform(x)
+            Ud += [self.A(d) + self.B(x)]
+            Us += [self.C(d)]
+        x = self.T0(x)  # coarsest scale transform
+
+        #        reconstruct
+        for i in range(ns - 1 - self.L, -1, -1):
+            x = x + Us[i]
+            x = torch.cat((x, Ud[i]), -1)
+            x = self.evenOdd(x)
+        x = x[:, :N, :, :]
+
+        return x
+
+    def wavelet_transform(self, x):
+        xa = torch.cat([x[:, ::2, :, :],
+                        x[:, 1::2, :, :],
+                        ], -1)
+        d = torch.matmul(xa, self.ec_d)
+        s = torch.matmul(xa, self.ec_s)
+        return d, s
+
+    def evenOdd(self, x):
+
+        B, N, c, ich = x.shape  # (B, N, c, k)
+        assert ich == 2 * self.k
+        x_e = torch.matmul(x, self.rc_e)
+        x_o = torch.matmul(x, self.rc_o)
+
+        x = torch.zeros(B, N * 2, c, self.k,
+                        device=x.device)
+        x[..., ::2, :, :] = x_e
+        x[..., 1::2, :, :] = x_o
+        return x
diff --git a/layers/Pyraformer_EncDec.py b/layers/Pyraformer_EncDec.py
new file mode 100644
index 0000000..1af1bf2
--- /dev/null
+++ b/layers/Pyraformer_EncDec.py
@@ -0,0 +1,218 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.modules.linear import Linear
+from layers.SelfAttention_Family import AttentionLayer, FullAttention
+from layers.Embed import DataEmbedding
+import math
+
+
+def get_mask(input_size, window_size, inner_size):
+    """Get the attention mask of PAM-Naive"""
+    # Get the size of all layers
+    all_size = []
+    all_size.append(input_size)
+    for i in range(len(window_size)):
+        layer_size = math.floor(all_size[i] / window_size[i])
+        all_size.append(layer_size)
+
+    seq_length = sum(all_size)
+    mask = torch.zeros(seq_length, seq_length)
+
+    # get intra-scale mask
+    inner_window = inner_size // 2
+    for layer_idx in range(len(all_size)):
+        start = sum(all_size[:layer_idx])
+        for i in range(start, start + all_size[layer_idx]):
+            left_side = max(i - inner_window, start)
+            right_side = min(i + inner_window + 1, start + all_size[layer_idx])
+            mask[i, left_side:right_side] = 1
+
+    # get inter-scale mask
+    for layer_idx in range(1, len(all_size)):
+        start = sum(all_size[:layer_idx])
+        for i in range(start, start + all_size[layer_idx]):
+            left_side = (start - all_size[layer_idx - 1]) + \
+                (i - start) * window_size[layer_idx - 1]
+            if i == (start + all_size[layer_idx] - 1):
+                right_side = start
+            else:
+                right_side = (
+                    start - all_size[layer_idx - 1]) + (i - start + 1) * window_size[layer_idx - 1]
+            mask[i, left_side:right_side] = 1
+            mask[left_side:right_side, i] = 1
+
+    mask = (1 - mask).bool()
+
+    return mask, all_size
+
+
+def refer_points(all_sizes, window_size):
+    """Gather features from PAM's pyramid sequences"""
+    input_size = all_sizes[0]
+    indexes = torch.zeros(input_size, len(all_sizes))
+
+    for i in range(input_size):
+        indexes[i][0] = i
+        former_index = i
+        for j in range(1, len(all_sizes)):
+            start = sum(all_sizes[:j])
+            inner_layer_idx = former_index - (start - all_sizes[j - 1])
+            former_index = start + \
+                min(inner_layer_idx // window_size[j - 1], all_sizes[j] - 1)
+            indexes[i][j] = former_index
+
+    indexes = indexes.unsqueeze(0).unsqueeze(3)
+
+    return indexes.long()
+
+
+class RegularMask():
+    def __init__(self, mask):
+        self._mask = mask.unsqueeze(1)
+
+    @property
+    def mask(self):
+        return self._mask
+
+
+class EncoderLayer(nn.Module):
+    """ Compose with two layers """
+
+    def __init__(self, d_model, d_inner, n_head, dropout=0.1, normalize_before=True):
+        super(EncoderLayer, self).__init__()
+
+        self.slf_attn = AttentionLayer(
+            FullAttention(mask_flag=True, factor=0,
+                          attention_dropout=dropout, output_attention=False),
+            d_model, n_head)
+        self.pos_ffn = PositionwiseFeedForward(
+            d_model, d_inner, dropout=dropout, normalize_before=normalize_before)
+
+    def forward(self, enc_input, slf_attn_mask=None):
+        attn_mask = RegularMask(slf_attn_mask)
+        enc_output, _ = self.slf_attn(
+            enc_input, enc_input, enc_input, attn_mask=attn_mask)
+        enc_output = self.pos_ffn(enc_output)
+        return enc_output
+
+
+class Encoder(nn.Module):
+    """ A encoder model with self attention mechanism. """
+
+    def __init__(self, configs, window_size, inner_size):
+        super().__init__()
+
+        d_bottleneck = configs.d_model//4
+
+        self.mask, self.all_size = get_mask(
+            configs.seq_len, window_size, inner_size)
+        self.indexes = refer_points(self.all_size, window_size)
+        self.layers = nn.ModuleList([
+            EncoderLayer(configs.d_model, configs.d_ff, configs.n_heads, dropout=configs.dropout,
+                         normalize_before=False) for _ in range(configs.e_layers)
+        ])  # naive pyramid attention
+
+        self.enc_embedding = DataEmbedding(
+            configs.enc_in, configs.d_model, configs.dropout)
+        self.conv_layers = Bottleneck_Construct(
+            configs.d_model, window_size, d_bottleneck)
+
+    def forward(self, x_enc, x_mark_enc):
+        seq_enc = self.enc_embedding(x_enc, x_mark_enc)
+
+        mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device)
+        seq_enc = self.conv_layers(seq_enc)
+
+        for i in range(len(self.layers)):
+            seq_enc = self.layers[i](seq_enc, mask)
+
+        indexes = self.indexes.repeat(seq_enc.size(
+            0), 1, 1, seq_enc.size(2)).to(seq_enc.device)
+        indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2))
+        all_enc = torch.gather(seq_enc, 1, indexes)
+        seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1)
+
+        return seq_enc
+
+
+class ConvLayer(nn.Module):
+    def __init__(self, c_in, window_size):
+        super(ConvLayer, self).__init__()
+        self.downConv = nn.Conv1d(in_channels=c_in,
+                                  out_channels=c_in,
+                                  kernel_size=window_size,
+                                  stride=window_size)
+        self.norm = nn.BatchNorm1d(c_in)
+        self.activation = nn.ELU()
+
+    def forward(self, x):
+        x = self.downConv(x)
+        x = self.norm(x)
+        x = self.activation(x)
+        return x
+
+
+class Bottleneck_Construct(nn.Module):
+    """Bottleneck convolution CSCM"""
+
+    def __init__(self, d_model, window_size, d_inner):
+        super(Bottleneck_Construct, self).__init__()
+        if not isinstance(window_size, list):
+            self.conv_layers = nn.ModuleList([
+                ConvLayer(d_inner, window_size),
+                ConvLayer(d_inner, window_size),
+                ConvLayer(d_inner, window_size)
+            ])
+        else:
+            self.conv_layers = []
+            for i in range(len(window_size)):
+                self.conv_layers.append(ConvLayer(d_inner, window_size[i]))
+            self.conv_layers = nn.ModuleList(self.conv_layers)
+        self.up = Linear(d_inner, d_model)
+        self.down = Linear(d_model, d_inner)
+        self.norm = nn.LayerNorm(d_model)
+
+    def forward(self, enc_input):
+        temp_input = self.down(enc_input).permute(0, 2, 1)
+        all_inputs = []
+        for i in range(len(self.conv_layers)):
+            temp_input = self.conv_layers[i](temp_input)
+            all_inputs.append(temp_input)
+
+        all_inputs = torch.cat(all_inputs, dim=2).transpose(1, 2)
+        all_inputs = self.up(all_inputs)
+        all_inputs = torch.cat([enc_input, all_inputs], dim=1)
+
+        all_inputs = self.norm(all_inputs)
+        return all_inputs
+
+
+class PositionwiseFeedForward(nn.Module):
+    """ Two-layer position-wise feed-forward neural network. """
+
+    def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True):
+        super().__init__()
+
+        self.normalize_before = normalize_before
+
+        self.w_1 = nn.Linear(d_in, d_hid)
+        self.w_2 = nn.Linear(d_hid, d_in)
+
+        self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        residual = x
+        if self.normalize_before:
+            x = self.layer_norm(x)
+
+        x = F.gelu(self.w_1(x))
+        x = self.dropout(x)
+        x = self.w_2(x)
+        x = self.dropout(x)
+        x = x + residual
+
+        if not self.normalize_before:
+            x = self.layer_norm(x)
+        return x
diff --git a/layers/RevIN.py b/layers/RevIN.py
new file mode 100644
index 0000000..a9d7e73
--- /dev/null
+++ b/layers/RevIN.py
@@ -0,0 +1,59 @@
+import torch
+from torch import nn
+
+class RevIN(nn.Module):
+    """
+    Reversible Instance Normalization
+    """
+    def __init__(self, num_features: int, eps=1e-5, affine=True, subtract_last=False):
+        super(RevIN, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.affine = affine
+        self.subtract_last = subtract_last
+        if self.affine:
+            self._init_params()
+
+    def forward(self, x, mode: str):
+        if mode == 'norm':
+            self._get_statistics(x)
+            x = self._normalize(x)
+        elif mode == 'denorm':
+            x = self._denormalize(x)
+        else: 
+            raise NotImplementedError
+        return x
+
+    def _init_params(self):
+        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
+        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
+
+    def _get_statistics(self, x):
+        dim2reduce = tuple(range(1, x.ndim-1))
+        if self.subtract_last:
+            self.last = x[:, -1, :].unsqueeze(1)
+        else:
+            self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
+        self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
+
+    def _normalize(self, x):
+        if self.subtract_last:
+            x = x - self.last
+        else:
+            x = x - self.mean
+        x = x / self.stdev
+        if self.affine:
+            x = x * self.affine_weight
+            x = x + self.affine_bias
+        return x
+
+    def _denormalize(self, x):
+        if self.affine:
+            x = x - self.affine_bias
+            x = x / (self.affine_weight + self.eps * self.eps)
+        x = x * self.stdev
+        if self.subtract_last:
+            x = x + self.last
+        else:
+            x = x + self.mean
+        return x
\ No newline at end of file
diff --git a/layers/SeasonPatch.py b/layers/SeasonPatch.py
new file mode 100644
index 0000000..3dd349e
--- /dev/null
+++ b/layers/SeasonPatch.py
@@ -0,0 +1,67 @@
+"""
+SeasonPatch = PatchTST (CI) + ChannelGraphMixer + Linear prediction head
+Adapted for Time-Series-Library-main style
+"""
+import torch
+import torch.nn as nn
+from layers.TSTEncoder import TSTiEncoder
+from layers.GraphMixer import HierarchicalGraphMixer
+
+class SeasonPatch(nn.Module):
+    def __init__(self,
+                 c_in: int,
+                 seq_len: int,
+                 pred_len: int,
+                 patch_len: int,
+                 stride: int,
+                 k_graph: int = 8,
+                 d_model: int = 128,
+                 n_layers: int = 3,
+                 n_heads: int = 16):
+        super().__init__()
+        
+        # Store patch parameters
+        self.patch_len = patch_len
+        self.stride = stride
+
+        # Calculate patch number
+        patch_num = (seq_len - patch_len) // stride + 1
+        
+        # PatchTST encoder (channel independent)
+        self.encoder = TSTiEncoder(
+            c_in=c_in, 
+            patch_num=patch_num, 
+            patch_len=patch_len,
+            d_model=d_model,
+            n_layers=n_layers,
+            n_heads=n_heads
+        )
+        
+        # Cross-channel mixer
+        self.mixer = HierarchicalGraphMixer(c_in, dim=d_model, k=k_graph)
+        
+        # Prediction head
+        self.head = nn.Linear(patch_num * d_model, pred_len)
+
+    def forward(self, x):
+        # x: [B, L, C]
+        x = x.permute(0, 2, 1)  # → [B, C, L]
+        
+        # Patch the input
+        x_patch = x.unfold(-1, self.patch_len, self.stride)  # [B, C, patch_num, patch_len]
+        
+        # Encode patches
+        z = self.encoder(x_patch)  # [B, C, d_model, patch_num]
+        
+        # z: [B, C, d_model, patch_num] → [B, C, patch_num, d_model]
+        B, C, D, N = z.shape
+        z = z.permute(0, 1, 3, 2)  # [B, C, patch_num, d_model]
+        
+        # Cross-channel mixing
+        z_mix = self.mixer(z)  # [B, C, patch_num, d_model]
+        
+        # Flatten and predict
+        z_mix = z_mix.view(B, C, N * D)  # [B, C, patch_num * d_model]
+        y_pred = self.head(z_mix)  # [B, C, pred_len]
+
+        return y_pred
\ No newline at end of file
diff --git a/layers/SelfAttention_Family.py b/layers/SelfAttention_Family.py
new file mode 100644
index 0000000..b151bff
--- /dev/null
+++ b/layers/SelfAttention_Family.py
@@ -0,0 +1,302 @@
+import torch
+import torch.nn as nn
+import numpy as np
+from math import sqrt
+from utils.masking import TriangularCausalMask, ProbMask
+from reformer_pytorch import LSHSelfAttention
+from einops import rearrange, repeat
+
+
+class DSAttention(nn.Module):
+    '''De-stationary Attention'''
+
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(DSAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E)
+
+        tau = 1.0 if tau is None else tau.unsqueeze(
+            1).unsqueeze(1)  # B x 1 x 1 x 1
+        delta = 0.0 if delta is None else delta.unsqueeze(
+            1).unsqueeze(1)  # B x 1 x 1 x S
+
+        # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
+
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+
+        if self.output_attention:
+            return V.contiguous(), A
+        else:
+            return V.contiguous(), None
+
+
+class FullAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(FullAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1. / sqrt(E)
+
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+
+        if self.output_attention:
+            return V.contiguous(), A
+        else:
+            return V.contiguous(), None
+
+
+class ProbAttention(nn.Module):
+    def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
+        super(ProbAttention, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+
+    def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
+        # Q [B, H, L, D]
+        B, H, L_K, E = K.shape
+        _, _, L_Q, _ = Q.shape
+
+        # calculate the sampled Q_K
+        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
+        # real U = U_part(factor*ln(L_k))*L_q
+        index_sample = torch.randint(L_K, (L_Q, sample_k))
+        K_sample = K_expand[:, :, torch.arange(
+            L_Q).unsqueeze(1), index_sample, :]
+        Q_K_sample = torch.matmul(
+            Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
+
+        # find the Top_k query with sparisty measurement
+        M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
+        M_top = M.topk(n_top, sorted=False)[1]
+
+        # use the reduced Q to calculate Q_K
+        Q_reduce = Q[torch.arange(B)[:, None, None],
+                   torch.arange(H)[None, :, None],
+                   M_top, :]  # factor*ln(L_q)
+        Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
+
+        return Q_K, M_top
+
+    def _get_initial_context(self, V, L_Q):
+        B, H, L_V, D = V.shape
+        if not self.mask_flag:
+            # V_sum = V.sum(dim=-2)
+            V_sum = V.mean(dim=-2)
+            contex = V_sum.unsqueeze(-2).expand(B, H,
+                                                L_Q, V_sum.shape[-1]).clone()
+        else:  # use mask
+            # requires that L_Q == L_V, i.e. for self-attention only
+            assert (L_Q == L_V)
+            contex = V.cumsum(dim=-2)
+        return contex
+
+    def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
+        B, H, L_V, D = V.shape
+
+        if self.mask_flag:
+            attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+
+        attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
+
+        context_in[torch.arange(B)[:, None, None],
+        torch.arange(H)[None, :, None],
+        index, :] = torch.matmul(attn, V).type_as(context_in)
+        if self.output_attention:
+            attns = (torch.ones([B, H, L_V, L_V]) /
+                     L_V).type_as(attn).to(attn.device)
+            attns[torch.arange(B)[:, None, None], torch.arange(H)[
+                                                  None, :, None], index, :] = attn
+            return context_in, attns
+        else:
+            return context_in, None
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L_Q, H, D = queries.shape
+        _, L_K, _, _ = keys.shape
+
+        queries = queries.transpose(2, 1)
+        keys = keys.transpose(2, 1)
+        values = values.transpose(2, 1)
+
+        U_part = self.factor * \
+                 np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
+        u = self.factor * \
+            np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
+
+        U_part = U_part if U_part < L_K else L_K
+        u = u if u < L_Q else L_Q
+
+        scores_top, index = self._prob_QK(
+            queries, keys, sample_k=U_part, n_top=u)
+
+        # add scale factor
+        scale = self.scale or 1. / sqrt(D)
+        if scale is not None:
+            scores_top = scores_top * scale
+        # get the context
+        context = self._get_initial_context(values, L_Q)
+        # update the context with selected top_k queries
+        context, attn = self._update_context(
+            context, values, scores_top, index, L_Q, attn_mask)
+
+        return context.contiguous(), attn
+
+
+class AttentionLayer(nn.Module):
+    def __init__(self, attention, d_model, n_heads, d_keys=None,
+                 d_values=None):
+        super(AttentionLayer, self).__init__()
+
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+
+        self.inner_attention = attention
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+
+        out, attn = self.inner_attention(
+            queries,
+            keys,
+            values,
+            attn_mask,
+            tau=tau,
+            delta=delta
+        )
+        out = out.view(B, L, -1)
+
+        return self.out_projection(out), attn
+
+
+class ReformerLayer(nn.Module):
+    def __init__(self, attention, d_model, n_heads, d_keys=None,
+                 d_values=None, causal=False, bucket_size=4, n_hashes=4):
+        super().__init__()
+        self.bucket_size = bucket_size
+        self.attn = LSHSelfAttention(
+            dim=d_model,
+            heads=n_heads,
+            bucket_size=bucket_size,
+            n_hashes=n_hashes,
+            causal=causal
+        )
+
+    def fit_length(self, queries):
+        # inside reformer: assert N % (bucket_size * 2) == 0
+        B, N, C = queries.shape
+        if N % (self.bucket_size * 2) == 0:
+            return queries
+        else:
+            # fill the time series
+            fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
+            return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
+
+    def forward(self, queries, keys, values, attn_mask, tau, delta):
+        # in Reformer: defalut queries=keys
+        B, N, C = queries.shape
+        queries = self.attn(self.fit_length(queries))[:, :N, :]
+        return queries, None
+
+
+class TwoStageAttentionLayer(nn.Module):
+    '''
+    The Two Stage Attention (TSA) Layer
+    input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
+    '''
+
+    def __init__(self, configs,
+                 seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1):
+        super(TwoStageAttentionLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                           output_attention=False), d_model, n_heads)
+        self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                       output_attention=False), d_model, n_heads)
+        self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                                         output_attention=False), d_model, n_heads)
+        self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
+
+        self.dropout = nn.Dropout(dropout)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.norm4 = nn.LayerNorm(d_model)
+
+        self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                  nn.GELU(),
+                                  nn.Linear(d_ff, d_model))
+        self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
+                                  nn.GELU(),
+                                  nn.Linear(d_ff, d_model))
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        # Cross Time Stage: Directly apply MSA to each dimension
+        batch = x.shape[0]
+        time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
+        time_enc, attn = self.time_attention(
+            time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
+        )
+        dim_in = time_in + self.dropout(time_enc)
+        dim_in = self.norm1(dim_in)
+        dim_in = dim_in + self.dropout(self.MLP1(dim_in))
+        dim_in = self.norm2(dim_in)
+
+        # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
+        dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch)
+        batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch)
+        dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None)
+        dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None)
+        dim_enc = dim_send + self.dropout(dim_receive)
+        dim_enc = self.norm3(dim_enc)
+        dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
+        dim_enc = self.norm4(dim_enc)
+
+        final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch)
+
+        return final_out
diff --git a/layers/StandardNorm.py b/layers/StandardNorm.py
new file mode 100755
index 0000000..990d0fd
--- /dev/null
+++ b/layers/StandardNorm.py
@@ -0,0 +1,68 @@
+import torch
+import torch.nn as nn
+
+
+class Normalize(nn.Module):
+    def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
+        """
+        :param num_features: the number of features or channels
+        :param eps: a value added for numerical stability
+        :param affine: if True, RevIN has learnable affine parameters
+        """
+        super(Normalize, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.affine = affine
+        self.subtract_last = subtract_last
+        self.non_norm = non_norm
+        if self.affine:
+            self._init_params()
+
+    def forward(self, x, mode: str):
+        if mode == 'norm':
+            self._get_statistics(x)
+            x = self._normalize(x)
+        elif mode == 'denorm':
+            x = self._denormalize(x)
+        else:
+            raise NotImplementedError
+        return x
+
+    def _init_params(self):
+        # initialize RevIN params: (C,)
+        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
+        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
+
+    def _get_statistics(self, x):
+        dim2reduce = tuple(range(1, x.ndim - 1))
+        if self.subtract_last:
+            self.last = x[:, -1, :].unsqueeze(1)
+        else:
+            self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
+        self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
+
+    def _normalize(self, x):
+        if self.non_norm:
+            return x
+        if self.subtract_last:
+            x = x - self.last
+        else:
+            x = x - self.mean
+        x = x / self.stdev
+        if self.affine:
+            x = x * self.affine_weight
+            x = x + self.affine_bias
+        return x
+
+    def _denormalize(self, x):
+        if self.non_norm:
+            return x
+        if self.affine:
+            x = x - self.affine_bias
+            x = x / (self.affine_weight + self.eps * self.eps)
+        x = x * self.stdev
+        if self.subtract_last:
+            x = x + self.last
+        else:
+            x = x + self.mean
+        return x
diff --git a/layers/TSTEncoder.py b/layers/TSTEncoder.py
new file mode 100644
index 0000000..d03fe6b
--- /dev/null
+++ b/layers/TSTEncoder.py
@@ -0,0 +1,91 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Embed import PositionalEmbedding
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Transformer_EncDec import EncoderLayer
+
+class TSTEncoder(nn.Module):
+    """
+    Transformer encoder for PatchTST, adapted for Time-Series-Library-main style
+    """
+    def __init__(self, q_len, d_model, n_heads, d_k=None, d_v=None, d_ff=None, 
+                        norm='BatchNorm', attn_dropout=0., dropout=0., activation='gelu',
+                        n_layers=1):
+        super().__init__()
+        
+        d_k = d_model // n_heads if d_k is None else d_k
+        d_v = d_model // n_heads if d_v is None else d_v
+        d_ff = d_model * 4 if d_ff is None else d_ff
+
+        self.layers = nn.ModuleList([
+            EncoderLayer(
+                AttentionLayer(
+                    FullAttention(False, attention_dropout=attn_dropout), 
+                    d_model, n_heads
+                ),
+                d_model,
+                d_ff,
+                dropout=dropout,
+                activation=activation
+            ) for i in range(n_layers)
+        ])
+
+    def forward(self, src, attn_mask=None):
+        output = src
+        attns = []
+        for layer in self.layers:
+            output, attn = layer(output, attn_mask)
+            attns.append(attn)
+        return output, attns
+
+
+class TSTiEncoder(nn.Module):
+    """
+    Channel-independent TST Encoder adapted for Time-Series-Library-main
+    """
+    def __init__(self, c_in, patch_num, patch_len, max_seq_len=1024,
+                 n_layers=3, d_model=128, n_heads=16, d_k=None, d_v=None,
+                 d_ff=256, norm='BatchNorm', attn_dropout=0., dropout=0., 
+                 activation="gelu"):
+        super().__init__()
+        
+        self.patch_num = patch_num
+        self.patch_len = patch_len
+        
+        # Input encoding - projection of feature vectors onto a d-dim vector space
+        self.W_P = nn.Linear(patch_len, d_model)
+        
+        # Positional encoding using Time-Series-Library-main's PositionalEmbedding
+        self.pos_embedding = PositionalEmbedding(d_model, max_len=max_seq_len)
+
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+
+        # Encoder
+        self.encoder = TSTEncoder(patch_num, d_model, n_heads, d_k=d_k, d_v=d_v, 
+                                 d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, 
+                                 dropout=dropout, activation=activation, n_layers=n_layers)
+
+    def forward(self, x):
+        # x: [bs x nvars x patch_num x patch_len]
+        bs, n_vars, patch_num, patch_len = x.shape
+        
+        # Input encoding: project patch_len to d_model
+        x = self.W_P(x)  # x: [bs x nvars x patch_num x d_model]
+
+        # Reshape for attention: combine batch and channel dimensions
+        u = torch.reshape(x, (bs * n_vars, patch_num, x.shape[-1]))  # u: [bs * nvars x patch_num x d_model]
+        
+        # Add positional encoding
+        pos = self.pos_embedding(u)  # Get positional encoding [bs*nvars x patch_num x d_model]
+        u = self.dropout(u + pos[:, :patch_num, :])  # Add positional encoding
+        
+        # Encoder
+        z, attns = self.encoder(u)  # z: [bs * nvars x patch_num x d_model]
+        
+        # Reshape back to separate batch and channel dimensions
+        z = torch.reshape(z, (bs, n_vars, patch_num, z.shape[-1]))  # z: [bs x nvars x patch_num x d_model]
+        z = z.permute(0, 1, 3, 2)  # z: [bs x nvars x d_model x patch_num]
+        
+        return z
\ No newline at end of file
diff --git a/layers/Transformer_EncDec.py b/layers/Transformer_EncDec.py
new file mode 100644
index 0000000..dabf4c2
--- /dev/null
+++ b/layers/Transformer_EncDec.py
@@ -0,0 +1,135 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class ConvLayer(nn.Module):
+    def __init__(self, c_in):
+        super(ConvLayer, self).__init__()
+        self.downConv = nn.Conv1d(in_channels=c_in,
+                                  out_channels=c_in,
+                                  kernel_size=3,
+                                  padding=2,
+                                  padding_mode='circular')
+        self.norm = nn.BatchNorm1d(c_in)
+        self.activation = nn.ELU()
+        self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+
+    def forward(self, x):
+        x = self.downConv(x.permute(0, 2, 1))
+        x = self.norm(x)
+        x = self.activation(x)
+        x = self.maxPool(x)
+        x = x.transpose(1, 2)
+        return x
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        new_x, attn = self.attention(
+            x, x, x,
+            attn_mask=attn_mask,
+            tau=tau, delta=delta
+        )
+        x = x + self.dropout(new_x)
+
+        y = x = self.norm1(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm2(x + y), attn
+
+
+class Encoder(nn.Module):
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
+        self.norm = norm_layer
+
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        # x [B, L, D]
+        attns = []
+        if self.conv_layers is not None:
+            for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
+                delta = delta if i == 0 else None
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                attns.append(attn)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        return x, attns
+
+
+class DecoderLayer(nn.Module):
+    def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
+                 dropout=0.1, activation="relu"):
+        super(DecoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        x = x + self.dropout(self.self_attention(
+            x, x, x,
+            attn_mask=x_mask,
+            tau=tau, delta=None
+        )[0])
+        x = self.norm1(x)
+
+        x = x + self.dropout(self.cross_attention(
+            x, cross, cross,
+            attn_mask=cross_mask,
+            tau=tau, delta=delta
+        )[0])
+
+        y = x = self.norm2(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm3(x + y)
+
+
+class Decoder(nn.Module):
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Decoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        for layer in self.layers:
+            x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if self.projection is not None:
+            x = self.projection(x)
+        return x
diff --git a/layers/__init__.py b/layers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/Autoformer.py b/models/Autoformer.py
new file mode 100644
index 0000000..fb0cd52
--- /dev/null
+++ b/models/Autoformer.py
@@ -0,0 +1,157 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
+from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
+from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
+import math
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Autoformer is the first method to achieve the series-wise connection,
+    with inherent O(LlogL) complexity
+    Paper link: https://openreview.net/pdf?id=I55UqU-M11y
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+
+        # Decomp
+        kernel_size = configs.moving_avg
+        self.decomp = series_decomp(kernel_size)
+
+        # Embedding
+        self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                                  configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AutoCorrelationLayer(
+                        AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
+                                        output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    moving_avg=configs.moving_avg,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=my_Layernorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                                      configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AutoCorrelationLayer(
+                            AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
+                                            output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AutoCorrelationLayer(
+                            AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
+                                            output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.c_out,
+                        configs.d_ff,
+                        moving_avg=configs.moving_avg,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=my_Layernorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # decomp init
+        mean = torch.mean(x_enc, dim=1).unsqueeze(
+            1).repeat(1, self.pred_len, 1)
+        zeros = torch.zeros([x_dec.shape[0], self.pred_len,
+                             x_dec.shape[2]], device=x_enc.device)
+        seasonal_init, trend_init = self.decomp(x_enc)
+        # decoder input
+        trend_init = torch.cat(
+            [trend_init[:, -self.label_len:, :], mean], dim=1)
+        seasonal_init = torch.cat(
+            [seasonal_init[:, -self.label_len:, :], zeros], dim=1)
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # dec
+        dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
+        seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None,
+                                                 trend=trend_init)
+        # final
+        dec_out = trend_part + seasonal_part
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(
+                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Crossformer.py b/models/Crossformer.py
new file mode 100644
index 0000000..7757d34
--- /dev/null
+++ b/models/Crossformer.py
@@ -0,0 +1,145 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat
+from layers.Crossformer_EncDec import scale_block, Encoder, Decoder, DecoderLayer
+from layers.Embed import PatchEmbedding
+from layers.SelfAttention_Family import AttentionLayer, FullAttention, TwoStageAttentionLayer
+from models.PatchTST import FlattenHead
+
+
+from math import ceil
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://openreview.net/pdf?id=vSVLM2j9eie
+    """
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.enc_in = configs.enc_in
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.seg_len = 12
+        self.win_size = 2
+        self.task_name = configs.task_name
+
+        # The padding operation to handle invisible sgemnet length
+        self.pad_in_len = ceil(1.0 * configs.seq_len / self.seg_len) * self.seg_len
+        self.pad_out_len = ceil(1.0 * configs.pred_len / self.seg_len) * self.seg_len
+        self.in_seg_num = self.pad_in_len // self.seg_len
+        self.out_seg_num = ceil(self.in_seg_num / (self.win_size ** (configs.e_layers - 1)))
+        self.head_nf = configs.d_model * self.out_seg_num
+
+        # Embedding
+        self.enc_value_embedding = PatchEmbedding(configs.d_model, self.seg_len, self.seg_len, self.pad_in_len - configs.seq_len, 0)
+        self.enc_pos_embedding = nn.Parameter(
+            torch.randn(1, configs.enc_in, self.in_seg_num, configs.d_model))
+        self.pre_norm = nn.LayerNorm(configs.d_model)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                scale_block(configs, 1 if l == 0 else self.win_size, configs.d_model, configs.n_heads, configs.d_ff,
+                            1, configs.dropout,
+                            self.in_seg_num if l == 0 else ceil(self.in_seg_num / self.win_size ** l), configs.factor
+                            ) for l in range(configs.e_layers)
+            ]
+        )
+        # Decoder
+        self.dec_pos_embedding = nn.Parameter(
+            torch.randn(1, configs.enc_in, (self.pad_out_len // self.seg_len), configs.d_model))
+
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    TwoStageAttentionLayer(configs, (self.pad_out_len // self.seg_len), configs.factor, configs.d_model, configs.n_heads,
+                                           configs.d_ff, configs.dropout),
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    self.seg_len,
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    # activation=configs.activation,
+                )
+                for l in range(configs.e_layers + 1)
+            ],
+        )
+        if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len,
+                                    head_dropout=configs.dropout)
+        elif self.task_name == 'classification':
+            self.flatten = nn.Flatten(start_dim=-2)
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                self.head_nf * configs.enc_in, configs.num_class)
+
+
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # embedding
+        x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
+        x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d = n_vars)
+        x_enc += self.enc_pos_embedding
+        x_enc = self.pre_norm(x_enc)
+        enc_out, attns = self.encoder(x_enc)
+
+        dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat=x_enc.shape[0])
+        dec_out = self.decoder(dec_in, enc_out)
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # embedding
+        x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
+        x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
+        x_enc += self.enc_pos_embedding
+        x_enc = self.pre_norm(x_enc)
+        enc_out, attns = self.encoder(x_enc)
+
+        dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
+
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # embedding
+        x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
+        x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
+        x_enc += self.enc_pos_embedding
+        x_enc = self.pre_norm(x_enc)
+        enc_out, attns = self.encoder(x_enc)
+
+        dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # embedding
+        x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
+
+        x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
+        x_enc += self.enc_pos_embedding
+        x_enc = self.pre_norm(x_enc)
+        enc_out, attns = self.encoder(x_enc)
+        # Output from Non-stationary Transformer
+        output = self.flatten(enc_out[-1].permute(0, 1, 3, 2))
+        output = self.dropout(output)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
\ No newline at end of file
diff --git a/models/DLinear.py b/models/DLinear.py
new file mode 100644
index 0000000..3f4d666
--- /dev/null
+++ b/models/DLinear.py
@@ -0,0 +1,110 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Autoformer_EncDec import series_decomp
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/pdf/2205.13504.pdf
+    """
+
+    def __init__(self, configs, individual=False):
+        """
+        individual: Bool, whether shared model among different variates.
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+        # Series decomposition block from Autoformer
+        self.decompsition = series_decomp(configs.moving_avg)
+        self.individual = individual
+        self.channels = configs.enc_in
+
+        if self.individual:
+            self.Linear_Seasonal = nn.ModuleList()
+            self.Linear_Trend = nn.ModuleList()
+
+            for i in range(self.channels):
+                self.Linear_Seasonal.append(
+                    nn.Linear(self.seq_len, self.pred_len))
+                self.Linear_Trend.append(
+                    nn.Linear(self.seq_len, self.pred_len))
+
+                self.Linear_Seasonal[i].weight = nn.Parameter(
+                    (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
+                self.Linear_Trend[i].weight = nn.Parameter(
+                    (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
+        else:
+            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
+            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
+
+            self.Linear_Seasonal.weight = nn.Parameter(
+                (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
+            self.Linear_Trend.weight = nn.Parameter(
+                (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
+
+        if self.task_name == 'classification':
+            self.projection = nn.Linear(
+                configs.enc_in * configs.seq_len, configs.num_class)
+
+    def encoder(self, x):
+        seasonal_init, trend_init = self.decompsition(x)
+        seasonal_init, trend_init = seasonal_init.permute(
+            0, 2, 1), trend_init.permute(0, 2, 1)
+        if self.individual:
+            seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
+                                          dtype=seasonal_init.dtype).to(seasonal_init.device)
+            trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len],
+                                       dtype=trend_init.dtype).to(trend_init.device)
+            for i in range(self.channels):
+                seasonal_output[:, i, :] = self.Linear_Seasonal[i](
+                    seasonal_init[:, i, :])
+                trend_output[:, i, :] = self.Linear_Trend[i](
+                    trend_init[:, i, :])
+        else:
+            seasonal_output = self.Linear_Seasonal(seasonal_init)
+            trend_output = self.Linear_Trend(trend_init)
+        x = seasonal_output + trend_output
+        return x.permute(0, 2, 1)
+
+    def forecast(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def imputation(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def anomaly_detection(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def classification(self, x_enc):
+        # Encoder
+        enc_out = self.encoder(x_enc)
+        # Output
+        # (batch_size, seq_length * d_model)
+        output = enc_out.reshape(enc_out.shape[0], -1)
+        # (batch_size, num_classes)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/ETSformer.py b/models/ETSformer.py
new file mode 100644
index 0000000..a79b3da
--- /dev/null
+++ b/models/ETSformer.py
@@ -0,0 +1,110 @@
+import torch
+import torch.nn as nn
+from layers.Embed import DataEmbedding
+from layers.ETSformer_EncDec import EncoderLayer, Encoder, DecoderLayer, Decoder, Transform
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2202.01381
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+
+        assert configs.e_layers == configs.d_layers, "Encoder and decoder layers must be equal"
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    configs.d_model, configs.n_heads, configs.enc_in, configs.seq_len, self.pred_len, configs.top_k,
+                    dim_feedforward=configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation,
+                ) for _ in range(configs.e_layers)
+            ]
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    configs.d_model, configs.n_heads, configs.c_out, self.pred_len,
+                    dropout=configs.dropout,
+                ) for _ in range(configs.d_layers)
+            ],
+        )
+        self.transform = Transform(sigma=0.2)
+
+        if self.task_name == 'classification':
+            self.act = torch.nn.functional.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        with torch.no_grad():
+            if self.training:
+                x_enc = self.transform.transform(x_enc)
+        res = self.enc_embedding(x_enc, x_mark_enc)
+        level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
+
+        growth, season = self.decoder(growths, seasons)
+        preds = level[:, -1:] + growth + season
+        return preds
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        res = self.enc_embedding(x_enc, x_mark_enc)
+        level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
+        growth, season = self.decoder(growths, seasons)
+        preds = level[:, -1:] + growth + season
+        return preds
+
+    def anomaly_detection(self, x_enc):
+        res = self.enc_embedding(x_enc, None)
+        level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
+        growth, season = self.decoder(growths, seasons)
+        preds = level[:, -1:] + growth + season
+        return preds
+
+    def classification(self, x_enc, x_mark_enc):
+        res = self.enc_embedding(x_enc, None)
+        _, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
+
+        growths = torch.sum(torch.stack(growths, 0), 0)[:, :self.seq_len, :]
+        seasons = torch.sum(torch.stack(seasons, 0), 0)[:, :self.seq_len, :]
+
+        enc_out = growths + seasons
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+
+        # Output
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/FEDformer.py b/models/FEDformer.py
new file mode 100644
index 0000000..726eb4a
--- /dev/null
+++ b/models/FEDformer.py
@@ -0,0 +1,178 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Embed import DataEmbedding
+from layers.AutoCorrelation import AutoCorrelationLayer
+from layers.FourierCorrelation import FourierBlock, FourierCrossAttention
+from layers.MultiWaveletCorrelation import MultiWaveletCross, MultiWaveletTransform
+from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
+
+
+class Model(nn.Module):
+    """
+    FEDformer performs the attention mechanism on frequency domain and achieved O(N) complexity
+    Paper link: https://proceedings.mlr.press/v162/zhou22g.html
+    """
+
+    def __init__(self, configs, version='fourier', mode_select='random', modes=32):
+        """
+        version: str, for FEDformer, there are two versions to choose, options: [Fourier, Wavelets].
+        mode_select: str, for FEDformer, there are two mode selection method, options: [random, low].
+        modes: int, modes to be selected.
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+
+        self.version = version
+        self.mode_select = mode_select
+        self.modes = modes
+
+        # Decomp
+        self.decomp = series_decomp(configs.moving_avg)
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        if self.version == 'Wavelets':
+            encoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre')
+            decoder_self_att = MultiWaveletTransform(ich=configs.d_model, L=1, base='legendre')
+            decoder_cross_att = MultiWaveletCross(in_channels=configs.d_model,
+                                                  out_channels=configs.d_model,
+                                                  seq_len_q=self.seq_len // 2 + self.pred_len,
+                                                  seq_len_kv=self.seq_len,
+                                                  modes=self.modes,
+                                                  ich=configs.d_model,
+                                                  base='legendre',
+                                                  activation='tanh')
+        else:
+            encoder_self_att = FourierBlock(in_channels=configs.d_model,
+                                            out_channels=configs.d_model,
+                                            n_heads=configs.n_heads,
+                                            seq_len=self.seq_len,
+                                            modes=self.modes,
+                                            mode_select_method=self.mode_select)
+            decoder_self_att = FourierBlock(in_channels=configs.d_model,
+                                            out_channels=configs.d_model,
+                                            n_heads=configs.n_heads,
+                                            seq_len=self.seq_len // 2 + self.pred_len,
+                                            modes=self.modes,
+                                            mode_select_method=self.mode_select)
+            decoder_cross_att = FourierCrossAttention(in_channels=configs.d_model,
+                                                      out_channels=configs.d_model,
+                                                      seq_len_q=self.seq_len // 2 + self.pred_len,
+                                                      seq_len_kv=self.seq_len,
+                                                      modes=self.modes,
+                                                      mode_select_method=self.mode_select,
+                                                      num_heads=configs.n_heads)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AutoCorrelationLayer(
+                        encoder_self_att,  # instead of multi-head attention in transformer
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    moving_avg=configs.moving_avg,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=my_Layernorm(configs.d_model)
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    AutoCorrelationLayer(
+                        decoder_self_att,
+                        configs.d_model, configs.n_heads),
+                    AutoCorrelationLayer(
+                        decoder_cross_att,
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.c_out,
+                    configs.d_ff,
+                    moving_avg=configs.moving_avg,
+                    dropout=configs.dropout,
+                    activation=configs.activation,
+                )
+                for l in range(configs.d_layers)
+            ],
+            norm_layer=my_Layernorm(configs.d_model),
+            projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+        )
+
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # decomp init
+        mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1)
+        seasonal_init, trend_init = self.decomp(x_enc)  # x - moving_avg, moving_avg
+        # decoder input
+        trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1)
+        seasonal_init = F.pad(seasonal_init[:, -self.label_len:, :], (0, 0, 0, self.pred_len))
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # dec
+        seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, trend=trend_init)
+        # final
+        dec_out = trend_part + seasonal_part
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/FiLM.py b/models/FiLM.py
new file mode 100644
index 0000000..1240e37
--- /dev/null
+++ b/models/FiLM.py
@@ -0,0 +1,268 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from scipy import signal
+from scipy import special as ss
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+
+def transition(N):
+    Q = np.arange(N, dtype=np.float64)
+    R = (2 * Q + 1)[:, None]  # / theta
+    j, i = np.meshgrid(Q, Q)
+    A = np.where(i < j, -1, (-1.) ** (i - j + 1)) * R
+    B = (-1.) ** Q[:, None] * R
+    return A, B
+
+
+class HiPPO_LegT(nn.Module):
+    def __init__(self, N, dt=1.0, discretization='bilinear'):
+        """
+        N: the order of the HiPPO projection
+        dt: discretization step size - should be roughly inverse to the length of the sequence
+        """
+        super(HiPPO_LegT, self).__init__()
+        self.N = N
+        A, B = transition(N)
+        C = np.ones((1, N))
+        D = np.zeros((1,))
+        A, B, _, _, _ = signal.cont2discrete((A, B, C, D), dt=dt, method=discretization)
+
+        B = B.squeeze(-1)
+
+        self.register_buffer('A', torch.Tensor(A).to(device))
+        self.register_buffer('B', torch.Tensor(B).to(device))
+        vals = np.arange(0.0, 1.0, dt)
+        self.register_buffer('eval_matrix', torch.Tensor(
+            ss.eval_legendre(np.arange(N)[:, None], 1 - 2 * vals).T).to(device))
+
+    def forward(self, inputs):
+        """
+        inputs : (length, ...)
+        output : (length, ..., N) where N is the order of the HiPPO projection
+        """
+        c = torch.zeros(inputs.shape[:-1] + tuple([self.N])).to(device)
+        cs = []
+        for f in inputs.permute([-1, 0, 1]):
+            f = f.unsqueeze(-1)
+            new = f @ self.B.unsqueeze(0)
+            c = F.linear(c, self.A) + new
+            cs.append(c)
+        return torch.stack(cs, dim=0)
+
+    def reconstruct(self, c):
+        return (self.eval_matrix @ c.unsqueeze(-1)).squeeze(-1)
+
+
+class SpectralConv1d(nn.Module):
+    def __init__(self, in_channels, out_channels, seq_len, ratio=0.5):
+        """
+        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.
+        """
+        super(SpectralConv1d, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.ratio = ratio
+        self.modes = min(32, seq_len // 2)
+        self.index = list(range(0, self.modes))
+
+        self.scale = (1 / (in_channels * out_channels))
+        self.weights_real = nn.Parameter(
+            self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float))
+        self.weights_imag = nn.Parameter(
+            self.scale * torch.rand(in_channels, out_channels, len(self.index), dtype=torch.float))
+
+    def compl_mul1d(self, order, x, weights_real, weights_imag):
+        return torch.complex(torch.einsum(order, x.real, weights_real) - torch.einsum(order, x.imag, weights_imag),
+                                 torch.einsum(order, x.real, weights_imag) + torch.einsum(order, x.imag, weights_real))
+
+    def forward(self, x):
+        B, H, E, N = x.shape
+        x_ft = torch.fft.rfft(x)
+        out_ft = torch.zeros(B, H, self.out_channels, x.size(-1) // 2 + 1, device=x.device, dtype=torch.cfloat)
+        a = x_ft[:, :, :, :self.modes]
+        out_ft[:, :, :, :self.modes] = self.compl_mul1d("bjix,iox->bjox", a, self.weights_real, self.weights_imag)
+        x = torch.fft.irfft(out_ft, n=x.size(-1))
+        return x
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2205.08897
+    """
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.configs = configs
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.seq_len if configs.pred_len == 0 else configs.pred_len
+
+        self.seq_len_all = self.seq_len + self.label_len
+
+        self.layers = configs.e_layers
+        self.enc_in = configs.enc_in
+        self.e_layers = configs.e_layers
+        # b, s, f means b, f
+        self.affine_weight = nn.Parameter(torch.ones(1, 1, configs.enc_in))
+        self.affine_bias = nn.Parameter(torch.zeros(1, 1, configs.enc_in))
+
+        self.multiscale = [1, 2, 4]
+        self.window_size = [256]
+        configs.ratio = 0.5
+        self.legts = nn.ModuleList(
+            [HiPPO_LegT(N=n, dt=1. / self.pred_len / i) for n in self.window_size for i in self.multiscale])
+        self.spec_conv_1 = nn.ModuleList([SpectralConv1d(in_channels=n, out_channels=n,
+                                                         seq_len=min(self.pred_len, self.seq_len),
+                                                         ratio=configs.ratio) for n in
+                                          self.window_size for _ in range(len(self.multiscale))])
+        self.mlp = nn.Linear(len(self.multiscale) * len(self.window_size), 1)
+
+        if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.enc_in * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec_true, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc /= stdev
+
+        x_enc = x_enc * self.affine_weight + self.affine_bias
+        x_decs = []
+        jump_dist = 0
+        for i in range(0, len(self.multiscale) * len(self.window_size)):
+            x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
+            x_in = x_enc[:, -x_in_len:]
+            legt = self.legts[i]
+            x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
+            out1 = self.spec_conv_1[i](x_in_c)
+            if self.seq_len >= self.pred_len:
+                x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
+            else:
+                x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
+            x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
+            x_decs.append(x_dec)
+        x_dec = torch.stack(x_decs, dim=-1)
+        x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        x_dec = x_dec - self.affine_bias
+        x_dec = x_dec / (self.affine_weight + 1e-10)
+        x_dec = x_dec * stdev
+        x_dec = x_dec + means
+        return x_dec
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc /= stdev
+
+        x_enc = x_enc * self.affine_weight + self.affine_bias
+        x_decs = []
+        jump_dist = 0
+        for i in range(0, len(self.multiscale) * len(self.window_size)):
+            x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
+            x_in = x_enc[:, -x_in_len:]
+            legt = self.legts[i]
+            x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
+            out1 = self.spec_conv_1[i](x_in_c)
+            if self.seq_len >= self.pred_len:
+                x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
+            else:
+                x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
+            x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
+            x_decs.append(x_dec)
+        x_dec = torch.stack(x_decs, dim=-1)
+        x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        x_dec = x_dec - self.affine_bias
+        x_dec = x_dec / (self.affine_weight + 1e-10)
+        x_dec = x_dec * stdev
+        x_dec = x_dec + means
+        return x_dec
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc /= stdev
+
+        x_enc = x_enc * self.affine_weight + self.affine_bias
+        x_decs = []
+        jump_dist = 0
+        for i in range(0, len(self.multiscale) * len(self.window_size)):
+            x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
+            x_in = x_enc[:, -x_in_len:]
+            legt = self.legts[i]
+            x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
+            out1 = self.spec_conv_1[i](x_in_c)
+            if self.seq_len >= self.pred_len:
+                x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
+            else:
+                x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
+            x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
+            x_decs.append(x_dec)
+        x_dec = torch.stack(x_decs, dim=-1)
+        x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        x_dec = x_dec - self.affine_bias
+        x_dec = x_dec / (self.affine_weight + 1e-10)
+        x_dec = x_dec * stdev
+        x_dec = x_dec + means
+        return x_dec
+
+    def classification(self, x_enc, x_mark_enc):
+        x_enc = x_enc * self.affine_weight + self.affine_bias
+        x_decs = []
+        jump_dist = 0
+        for i in range(0, len(self.multiscale) * len(self.window_size)):
+            x_in_len = self.multiscale[i % len(self.multiscale)] * self.pred_len
+            x_in = x_enc[:, -x_in_len:]
+            legt = self.legts[i]
+            x_in_c = legt(x_in.transpose(1, 2)).permute([1, 2, 3, 0])[:, :, :, jump_dist:]
+            out1 = self.spec_conv_1[i](x_in_c)
+            if self.seq_len >= self.pred_len:
+                x_dec_c = out1.transpose(2, 3)[:, :, self.pred_len - 1 - jump_dist, :]
+            else:
+                x_dec_c = out1.transpose(2, 3)[:, :, -1, :]
+            x_dec = x_dec_c @ legt.eval_matrix[-self.pred_len:, :].T
+            x_decs.append(x_dec)
+        x_dec = torch.stack(x_decs, dim=-1)
+        x_dec = self.mlp(x_dec).squeeze(-1).permute(0, 2, 1)
+
+        # Output from Non-stationary Transformer
+        output = self.act(x_dec)
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/FreTS.py b/models/FreTS.py
new file mode 100644
index 0000000..ca4e0b6
--- /dev/null
+++ b/models/FreTS.py
@@ -0,0 +1,118 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/pdf/2311.06184.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+        self.embed_size = 128  # embed_size
+        self.hidden_size = 256  # hidden_size
+        self.pred_len = configs.pred_len
+        self.feature_size = configs.enc_in  # channels
+        self.seq_len = configs.seq_len
+        self.channel_independence = configs.channel_independence
+        self.sparsity_threshold = 0.01
+        self.scale = 0.02
+        self.embeddings = nn.Parameter(torch.randn(1, self.embed_size))
+        self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
+        self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+        self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
+
+        self.fc = nn.Sequential(
+            nn.Linear(self.seq_len * self.embed_size, self.hidden_size),
+            nn.LeakyReLU(),
+            nn.Linear(self.hidden_size, self.pred_len)
+        )
+
+    # dimension extension
+    def tokenEmb(self, x):
+        # x: [Batch, Input length, Channel]
+        x = x.permute(0, 2, 1)
+        x = x.unsqueeze(3)
+        # N*T*1 x 1*D = N*T*D
+        y = self.embeddings
+        return x * y
+
+    # frequency temporal learner
+    def MLP_temporal(self, x, B, N, L):
+        # [B, N, T, D]
+        x = torch.fft.rfft(x, dim=2, norm='ortho')  # FFT on L dimension
+        y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2)
+        x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho")
+        return x
+
+    # frequency channel learner
+    def MLP_channel(self, x, B, N, L):
+        # [B, N, T, D]
+        x = x.permute(0, 2, 1, 3)
+        # [B, T, N, D]
+        x = torch.fft.rfft(x, dim=2, norm='ortho')  # FFT on N dimension
+        y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1)
+        x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho")
+        x = x.permute(0, 2, 1, 3)
+        # [B, N, T, D]
+        return x
+
+    # frequency-domain MLPs
+    # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights
+    # rb: the real part of bias, ib: the imaginary part of bias
+    def FreMLP(self, B, nd, dimension, x, r, i, rb, ib):
+        o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
+                              device=x.device)
+        o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
+                              device=x.device)
+
+        o1_real = F.relu(
+            torch.einsum('bijd,dd->bijd', x.real, r) - \
+            torch.einsum('bijd,dd->bijd', x.imag, i) + \
+            rb
+        )
+
+        o1_imag = F.relu(
+            torch.einsum('bijd,dd->bijd', x.imag, r) + \
+            torch.einsum('bijd,dd->bijd', x.real, i) + \
+            ib
+        )
+
+        y = torch.stack([o1_real, o1_imag], dim=-1)
+        y = F.softshrink(y, lambd=self.sparsity_threshold)
+        y = torch.view_as_complex(y)
+        return y
+
+    def forecast(self, x_enc):
+        # x: [Batch, Input length, Channel]
+        B, T, N = x_enc.shape
+        # embedding x: [B, N, T, D]
+        x = self.tokenEmb(x_enc)
+        bias = x
+        # [B, N, T, D]
+        if self.channel_independence == '0':
+            x = self.MLP_channel(x, B, N, T)
+        # [B, N, T, D]
+        x = self.MLP_temporal(x, B, N, T)
+        x = x + bias
+        x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1)
+        return x
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        else:
+            raise ValueError('Only forecast tasks implemented yet')
diff --git a/models/Informer.py b/models/Informer.py
new file mode 100644
index 0000000..1c22cfc
--- /dev/null
+++ b/models/Informer.py
@@ -0,0 +1,147 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import ProbAttention, AttentionLayer
+from layers.Embed import DataEmbedding
+
+
+class Model(nn.Module):
+    """
+    Informer with Propspare attention in O(LlogL) complexity
+    Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.label_len = configs.label_len
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            [
+                ConvLayer(
+                    configs.d_model
+                ) for l in range(configs.e_layers - 1)
+            ] if configs.distil and ('forecast' in configs.task_name) else None,
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        self.decoder = Decoder(
+            [
+                DecoderLayer(
+                    AttentionLayer(
+                        ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    AttentionLayer(
+                        ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation,
+                )
+                for l in range(configs.d_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model),
+            projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+        )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+
+        return dec_out  # [B, L, D]
+    
+    def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out  # [B, L, D]
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # enc
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        # final
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'short_term_forecast':
+            dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Koopa.py b/models/Koopa.py
new file mode 100644
index 0000000..0eea941
--- /dev/null
+++ b/models/Koopa.py
@@ -0,0 +1,337 @@
+import math
+import torch
+import torch.nn as nn
+from data_provider.data_factory import data_provider
+
+
+
+class FourierFilter(nn.Module):
+    """
+    Fourier Filter: to time-variant and time-invariant term
+    """
+    def __init__(self, mask_spectrum):
+        super(FourierFilter, self).__init__()
+        self.mask_spectrum = mask_spectrum
+        
+    def forward(self, x):
+        xf = torch.fft.rfft(x, dim=1)
+        mask = torch.ones_like(xf)
+        mask[:, self.mask_spectrum, :] = 0
+        x_var = torch.fft.irfft(xf*mask, dim=1)
+        x_inv = x - x_var
+        
+        return x_var, x_inv
+    
+
+class MLP(nn.Module):
+    '''
+    Multilayer perceptron to encode/decode high dimension representation of sequential data
+    '''
+    def __init__(self, 
+                 f_in, 
+                 f_out, 
+                 hidden_dim=128, 
+                 hidden_layers=2, 
+                 dropout=0.05,
+                 activation='tanh'): 
+        super(MLP, self).__init__()
+        self.f_in = f_in
+        self.f_out = f_out
+        self.hidden_dim = hidden_dim
+        self.hidden_layers = hidden_layers
+        self.dropout = dropout
+        if activation == 'relu':
+            self.activation = nn.ReLU()
+        elif activation == 'tanh':
+            self.activation = nn.Tanh()
+        else:
+            raise NotImplementedError
+        
+        layers = [nn.Linear(self.f_in, self.hidden_dim), 
+                  self.activation, nn.Dropout(self.dropout)]
+        for i in range(self.hidden_layers-2):
+            layers += [nn.Linear(self.hidden_dim, self.hidden_dim),
+                       self.activation, nn.Dropout(dropout)]
+        
+        layers += [nn.Linear(hidden_dim, f_out)]
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # x:     B x S x f_in
+        # y:     B x S x f_out
+        y = self.layers(x)
+        return y
+    
+
+class KPLayer(nn.Module):
+    """
+    A demonstration of finding one step transition of linear system by DMD iteratively
+    """
+    def __init__(self): 
+        super(KPLayer, self).__init__()
+        
+        self.K = None # B E E
+
+    def one_step_forward(self, z, return_rec=False, return_K=False):
+        B, input_len, E = z.shape
+        assert input_len > 1, 'snapshots number should be larger than 1'
+        x, y = z[:, :-1], z[:, 1:]
+
+        # solve linear system
+        self.K = torch.linalg.lstsq(x, y).solution # B E E
+        if torch.isnan(self.K).any():
+            print('Encounter K with nan, replace K by identity matrix')
+            self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1)
+
+        z_pred = torch.bmm(z[:, -1:], self.K)
+        if return_rec:
+            z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1)
+            return z_rec, z_pred
+
+        return z_pred
+    
+    def forward(self, z, pred_len=1):
+        assert pred_len >= 1, 'prediction length should not be less than 1'
+        z_rec, z_pred= self.one_step_forward(z, return_rec=True)
+        z_preds = [z_pred]
+        for i in range(1, pred_len):
+            z_pred = torch.bmm(z_pred, self.K)
+            z_preds.append(z_pred)
+        z_preds = torch.cat(z_preds, dim=1)
+        return z_rec, z_preds
+
+
+class KPLayerApprox(nn.Module):
+    """
+    Find koopman transition of linear system by DMD with multistep K approximation
+    """
+    def __init__(self): 
+        super(KPLayerApprox, self).__init__()
+        
+        self.K = None # B E E
+        self.K_step = None # B E E
+
+    def forward(self, z, pred_len=1):
+        # z:       B L E, koopman invariance space representation
+        # z_rec:   B L E, reconstructed representation
+        # z_pred:  B S E, forecasting representation
+        B, input_len, E = z.shape
+        assert input_len > 1, 'snapshots number should be larger than 1'
+        x, y = z[:, :-1], z[:, 1:]
+
+        # solve linear system
+        self.K = torch.linalg.lstsq(x, y).solution # B E E
+
+        if torch.isnan(self.K).any():
+            print('Encounter K with nan, replace K by identity matrix')
+            self.K = torch.eye(self.K.shape[1]).to(self.K.device).unsqueeze(0).repeat(B, 1, 1)
+
+        z_rec = torch.cat((z[:, :1], torch.bmm(x, self.K)), dim=1) # B L E
+        
+        if pred_len <= input_len:
+            self.K_step = torch.linalg.matrix_power(self.K, pred_len)
+            if torch.isnan(self.K_step).any():
+                print('Encounter multistep K with nan, replace it by identity matrix')
+                self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1)
+            z_pred = torch.bmm(z[:, -pred_len:, :], self.K_step)
+        else:
+            self.K_step = torch.linalg.matrix_power(self.K, input_len)
+            if torch.isnan(self.K_step).any():
+                print('Encounter multistep K with nan, replace it by identity matrix')
+                self.K_step = torch.eye(self.K_step.shape[1]).to(self.K_step.device).unsqueeze(0).repeat(B, 1, 1)
+            temp_z_pred, all_pred = z, []
+            for _ in range(math.ceil(pred_len / input_len)):
+                temp_z_pred = torch.bmm(temp_z_pred, self.K_step)
+                all_pred.append(temp_z_pred)
+            z_pred = torch.cat(all_pred, dim=1)[:, :pred_len, :]
+
+        return z_rec, z_pred
+    
+
+class TimeVarKP(nn.Module):
+    """
+    Koopman Predictor with DMD (analysitical solution of Koopman operator)
+    Utilize local variations within individual sliding window to predict the future of time-variant term
+    """
+    def __init__(self,
+                 enc_in=8,
+                 input_len=96,
+                 pred_len=96,
+                 seg_len=24,
+                 dynamic_dim=128,
+                 encoder=None,
+                 decoder=None,
+                 multistep=False,
+                ):
+        super(TimeVarKP, self).__init__()
+        self.input_len = input_len
+        self.pred_len = pred_len
+        self.enc_in = enc_in
+        self.seg_len = seg_len
+        self.dynamic_dim = dynamic_dim
+        self.multistep = multistep
+        self.encoder, self.decoder = encoder, decoder            
+        self.freq = math.ceil(self.input_len / self.seg_len)  # segment number of input
+        self.step = math.ceil(self.pred_len / self.seg_len)   # segment number of output
+        self.padding_len = self.seg_len * self.freq - self.input_len
+        # Approximate mulitstep K by KPLayerApprox when pred_len is large
+        self.dynamics = KPLayerApprox() if self.multistep else KPLayer() 
+
+    def forward(self, x):
+        # x: B L C
+        B, L, C = x.shape
+
+        res = torch.cat((x[:, L-self.padding_len:, :], x) ,dim=1)
+
+        res = res.chunk(self.freq, dim=1)     # F x B P C, P means seg_len
+        res = torch.stack(res, dim=1).reshape(B, self.freq, -1)   # B F PC
+
+        res = self.encoder(res) # B F H
+        x_rec, x_pred = self.dynamics(res, self.step) # B F H, B S H
+
+        x_rec = self.decoder(x_rec) # B F PC
+        x_rec = x_rec.reshape(B, self.freq, self.seg_len, self.enc_in)
+        x_rec = x_rec.reshape(B, -1, self.enc_in)[:, :self.input_len, :]  # B L C
+        
+        x_pred = self.decoder(x_pred)     # B S PC
+        x_pred = x_pred.reshape(B, self.step, self.seg_len, self.enc_in)
+        x_pred = x_pred.reshape(B, -1, self.enc_in)[:, :self.pred_len, :] # B S C
+
+        return x_rec, x_pred
+
+
+class TimeInvKP(nn.Module):
+    """
+    Koopman Predictor with learnable Koopman operator
+    Utilize lookback and forecast window snapshots to predict the future of time-invariant term
+    """
+    def __init__(self,
+                 input_len=96,
+                 pred_len=96,
+                 dynamic_dim=128,
+                 encoder=None,
+                 decoder=None):
+        super(TimeInvKP, self).__init__()
+        self.dynamic_dim = dynamic_dim
+        self.input_len = input_len
+        self.pred_len = pred_len
+        self.encoder = encoder
+        self.decoder = decoder
+
+        K_init = torch.randn(self.dynamic_dim, self.dynamic_dim)
+        U, _, V = torch.svd(K_init) # stable initialization
+        self.K = nn.Linear(self.dynamic_dim, self.dynamic_dim, bias=False)
+        self.K.weight.data = torch.mm(U, V.t())
+    
+    def forward(self, x):
+        # x: B L C
+        res = x.transpose(1, 2) # B C L
+        res = self.encoder(res) # B C H
+        res = self.K(res) # B C H
+        res = self.decoder(res) # B C S
+        res = res.transpose(1, 2) # B S C
+
+        return res
+
+
+class Model(nn.Module):
+    '''
+    Paper link: https://arxiv.org/pdf/2305.18803.pdf
+    '''
+    def __init__(self, configs, dynamic_dim=128, hidden_dim=64, hidden_layers=2, num_blocks=3, multistep=False):
+        """
+        mask_spectrum: list, shared frequency spectrums
+        seg_len: int, segment length of time series
+        dynamic_dim: int, latent dimension of koopman embedding
+        hidden_dim: int, hidden dimension of en/decoder
+        hidden_layers: int, number of hidden layers of en/decoder
+        num_blocks: int, number of Koopa blocks
+        multistep: bool, whether to use approximation for multistep K
+        alpha: float, spectrum filter ratio
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.enc_in = configs.enc_in
+        self.input_len = configs.seq_len
+        self.pred_len = configs.pred_len
+
+        self.seg_len = self.pred_len
+        self.num_blocks = num_blocks
+        self.dynamic_dim = dynamic_dim
+        self.hidden_dim = hidden_dim
+        self.hidden_layers = hidden_layers
+        self.multistep = multistep
+        self.alpha = 0.2
+        self.mask_spectrum = self._get_mask_spectrum(configs)
+
+        self.disentanglement = FourierFilter(self.mask_spectrum)
+
+        # shared encoder/decoder to make koopman embedding consistent
+        self.time_inv_encoder = MLP(f_in=self.input_len, f_out=self.dynamic_dim, activation='relu',
+                    hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
+        self.time_inv_decoder = MLP(f_in=self.dynamic_dim, f_out=self.pred_len, activation='relu',
+                           hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
+        self.time_inv_kps = self.time_var_kps = nn.ModuleList([
+                                TimeInvKP(input_len=self.input_len,
+                                    pred_len=self.pred_len, 
+                                    dynamic_dim=self.dynamic_dim,
+                                    encoder=self.time_inv_encoder, 
+                                    decoder=self.time_inv_decoder)
+                                for _ in range(self.num_blocks)])
+
+        # shared encoder/decoder to make koopman embedding consistent
+        self.time_var_encoder = MLP(f_in=self.seg_len*self.enc_in, f_out=self.dynamic_dim, activation='tanh',
+                           hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
+        self.time_var_decoder = MLP(f_in=self.dynamic_dim, f_out=self.seg_len*self.enc_in, activation='tanh',
+                           hidden_dim=self.hidden_dim, hidden_layers=self.hidden_layers)
+        self.time_var_kps = nn.ModuleList([
+                    TimeVarKP(enc_in=configs.enc_in,
+                        input_len=self.input_len,
+                        pred_len=self.pred_len,
+                        seg_len=self.seg_len,
+                        dynamic_dim=self.dynamic_dim,
+                        encoder=self.time_var_encoder,
+                        decoder=self.time_var_decoder,
+                        multistep=self.multistep)
+                    for _ in range(self.num_blocks)])
+
+    def _get_mask_spectrum(self, configs):
+        """
+        get shared frequency spectrums
+        """
+        train_data, train_loader = data_provider(configs, 'train')
+        amps = 0.0
+        for data in train_loader:
+            lookback_window = data[0]
+            amps += abs(torch.fft.rfft(lookback_window, dim=1)).mean(dim=0).mean(dim=1)
+        mask_spectrum = amps.topk(int(amps.shape[0]*self.alpha)).indices
+        return mask_spectrum # as the spectrums of time-invariant component
+    
+    def forecast(self, x_enc):
+        # Series Stationarization adopted from NSformer
+        mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc = x_enc / std_enc
+
+        # Koopman Forecasting
+        residual, forecast = x_enc, None
+        for i in range(self.num_blocks):
+            time_var_input, time_inv_input = self.disentanglement(residual)
+            time_inv_output = self.time_inv_kps[i](time_inv_input)
+            time_var_backcast, time_var_output = self.time_var_kps[i](time_var_input)
+            residual = residual - time_var_backcast
+            if forecast is None:
+                forecast = (time_inv_output + time_var_output)
+            else:
+                forecast += (time_inv_output + time_var_output)
+
+        # Series Stationarization adopted from NSformer
+        res = forecast * std_enc + mean_enc
+
+        return res        
+    
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.forecast(x_enc)
+            return dec_out[:, -self.pred_len:, :] # [B, L, D]
diff --git a/models/LightTS.py b/models/LightTS.py
new file mode 100644
index 0000000..a2051e4
--- /dev/null
+++ b/models/LightTS.py
@@ -0,0 +1,165 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class IEBlock(nn.Module):
+    def __init__(self, input_dim, hid_dim, output_dim, num_node):
+        super(IEBlock, self).__init__()
+
+        self.input_dim = input_dim
+        self.hid_dim = hid_dim
+        self.output_dim = output_dim
+        self.num_node = num_node
+
+        self._build()
+
+    def _build(self):
+        self.spatial_proj = nn.Sequential(
+            nn.Linear(self.input_dim, self.hid_dim),
+            nn.LeakyReLU(),
+            nn.Linear(self.hid_dim, self.hid_dim // 4)
+        )
+
+        self.channel_proj = nn.Linear(self.num_node, self.num_node)
+        torch.nn.init.eye_(self.channel_proj.weight)
+
+        self.output_proj = nn.Linear(self.hid_dim // 4, self.output_dim)
+
+    def forward(self, x):
+        x = self.spatial_proj(x.permute(0, 2, 1))
+        x = x.permute(0, 2, 1) + self.channel_proj(x.permute(0, 2, 1))
+        x = self.output_proj(x.permute(0, 2, 1))
+
+        x = x.permute(0, 2, 1)
+
+        return x
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2207.01186
+    """
+
+    def __init__(self, configs, chunk_size=24):
+        """
+        chunk_size: int, reshape T into [num_chunks, chunk_size]
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+
+        if configs.task_name == 'long_term_forecast' or configs.task_name == 'short_term_forecast':
+            self.chunk_size = min(configs.pred_len, configs.seq_len, chunk_size)
+        else:
+            self.chunk_size = min(configs.seq_len, chunk_size)
+        # assert (self.seq_len % self.chunk_size == 0)
+        if self.seq_len % self.chunk_size != 0:
+            self.seq_len += (self.chunk_size - self.seq_len % self.chunk_size)  # padding in order to ensure complete division
+        self.num_chunks = self.seq_len // self.chunk_size
+
+        self.d_model = configs.d_model
+        self.enc_in = configs.enc_in
+        self.dropout = configs.dropout
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.enc_in * configs.seq_len, configs.num_class)
+        self._build()
+
+    def _build(self):
+        self.layer_1 = IEBlock(
+            input_dim=self.chunk_size,
+            hid_dim=self.d_model // 4,
+            output_dim=self.d_model // 4,
+            num_node=self.num_chunks
+        )
+
+        self.chunk_proj_1 = nn.Linear(self.num_chunks, 1)
+
+        self.layer_2 = IEBlock(
+            input_dim=self.chunk_size,
+            hid_dim=self.d_model // 4,
+            output_dim=self.d_model // 4,
+            num_node=self.num_chunks
+        )
+
+        self.chunk_proj_2 = nn.Linear(self.num_chunks, 1)
+
+        self.layer_3 = IEBlock(
+            input_dim=self.d_model // 2,
+            hid_dim=self.d_model // 2,
+            output_dim=self.pred_len,
+            num_node=self.enc_in
+        )
+
+        self.ar = nn.Linear(self.seq_len, self.pred_len)
+
+    def encoder(self, x):
+        B, T, N = x.size()
+
+        # padding
+        x = torch.cat([x, torch.zeros((B, self.seq_len - T, N)).to(x.device)], dim=1)
+
+        highway = self.ar(x.permute(0, 2, 1))
+        highway = highway.permute(0, 2, 1)
+
+        # continuous sampling
+        x1 = x.reshape(B, self.num_chunks, self.chunk_size, N)
+        x1 = x1.permute(0, 3, 2, 1)
+        x1 = x1.reshape(-1, self.chunk_size, self.num_chunks)
+        x1 = self.layer_1(x1)
+        x1 = self.chunk_proj_1(x1).squeeze(dim=-1)
+
+        # interval sampling
+        x2 = x.reshape(B, self.chunk_size, self.num_chunks, N)
+        x2 = x2.permute(0, 3, 1, 2)
+        x2 = x2.reshape(-1, self.chunk_size, self.num_chunks)
+        x2 = self.layer_2(x2)
+        x2 = self.chunk_proj_2(x2).squeeze(dim=-1)
+
+        x3 = torch.cat([x1, x2], dim=-1)
+
+        x3 = x3.reshape(B, N, -1)
+        x3 = x3.permute(0, 2, 1)
+
+        out = self.layer_3(x3)
+
+        out = out + highway
+        return out
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        return self.encoder(x_enc)
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        return self.encoder(x_enc)
+
+    def anomaly_detection(self, x_enc):
+        return self.encoder(x_enc)
+
+    def classification(self, x_enc, x_mark_enc):
+        enc_out = self.encoder(x_enc)
+
+        # Output
+        output = enc_out.reshape(enc_out.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/MICN.py b/models/MICN.py
new file mode 100644
index 0000000..4d79f64
--- /dev/null
+++ b/models/MICN.py
@@ -0,0 +1,222 @@
+import torch
+import torch.nn as nn
+from layers.Embed import DataEmbedding
+from layers.Autoformer_EncDec import series_decomp, series_decomp_multi
+import torch.nn.functional as F
+
+
+class MIC(nn.Module):
+    """
+    MIC layer to extract local and global features
+    """
+
+    def __init__(self, feature_size=512, n_heads=8, dropout=0.05, decomp_kernel=[32], conv_kernel=[24],
+                 isometric_kernel=[18, 6], device='cuda'):
+        super(MIC, self).__init__()
+        self.conv_kernel = conv_kernel
+        self.device = device
+
+        # isometric convolution
+        self.isometric_conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size,
+                                                       kernel_size=i, padding=0, stride=1)
+                                             for i in isometric_kernel])
+
+        # downsampling convolution: padding=i//2, stride=i
+        self.conv = nn.ModuleList([nn.Conv1d(in_channels=feature_size, out_channels=feature_size,
+                                             kernel_size=i, padding=i // 2, stride=i)
+                                   for i in conv_kernel])
+
+        # upsampling convolution
+        self.conv_trans = nn.ModuleList([nn.ConvTranspose1d(in_channels=feature_size, out_channels=feature_size,
+                                                            kernel_size=i, padding=0, stride=i)
+                                         for i in conv_kernel])
+
+        self.decomp = nn.ModuleList([series_decomp(k) for k in decomp_kernel])
+        self.merge = torch.nn.Conv2d(in_channels=feature_size, out_channels=feature_size,
+                                     kernel_size=(len(self.conv_kernel), 1))
+
+        # feedforward network
+        self.conv1 = nn.Conv1d(in_channels=feature_size, out_channels=feature_size * 4, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=feature_size * 4, out_channels=feature_size, kernel_size=1)
+        self.norm1 = nn.LayerNorm(feature_size)
+        self.norm2 = nn.LayerNorm(feature_size)
+
+        self.norm = torch.nn.LayerNorm(feature_size)
+        self.act = torch.nn.Tanh()
+        self.drop = torch.nn.Dropout(0.05)
+
+    def conv_trans_conv(self, input, conv1d, conv1d_trans, isometric):
+        batch, seq_len, channel = input.shape
+        x = input.permute(0, 2, 1)
+
+        # downsampling convolution
+        x1 = self.drop(self.act(conv1d(x)))
+        x = x1
+
+        # isometric convolution
+        zeros = torch.zeros((x.shape[0], x.shape[1], x.shape[2] - 1), device=self.device)
+        x = torch.cat((zeros, x), dim=-1)
+        x = self.drop(self.act(isometric(x)))
+        x = self.norm((x + x1).permute(0, 2, 1)).permute(0, 2, 1)
+
+        # upsampling convolution
+        x = self.drop(self.act(conv1d_trans(x)))
+        x = x[:, :, :seq_len]  # truncate
+
+        x = self.norm(x.permute(0, 2, 1) + input)
+        return x
+
+    def forward(self, src):
+        self.device = src.device
+        # multi-scale
+        multi = []
+        for i in range(len(self.conv_kernel)):
+            src_out, trend1 = self.decomp[i](src)
+            src_out = self.conv_trans_conv(src_out, self.conv[i], self.conv_trans[i], self.isometric_conv[i])
+            multi.append(src_out)
+
+        # merge
+        mg = torch.tensor([], device=self.device)
+        for i in range(len(self.conv_kernel)):
+            mg = torch.cat((mg, multi[i].unsqueeze(1).to(self.device)), dim=1)
+        mg = self.merge(mg.permute(0, 3, 1, 2)).squeeze(-2).permute(0, 2, 1)
+
+        y = self.norm1(mg)
+        y = self.conv2(self.conv1(y.transpose(-1, 1))).transpose(-1, 1)
+
+        return self.norm2(mg + y)
+
+
+class SeasonalPrediction(nn.Module):
+    def __init__(self, embedding_size=512, n_heads=8, dropout=0.05, d_layers=1, decomp_kernel=[32], c_out=1,
+                 conv_kernel=[2, 4], isometric_kernel=[18, 6], device='cuda'):
+        super(SeasonalPrediction, self).__init__()
+
+        self.mic = nn.ModuleList([MIC(feature_size=embedding_size, n_heads=n_heads,
+                                      decomp_kernel=decomp_kernel, conv_kernel=conv_kernel,
+                                      isometric_kernel=isometric_kernel, device=device)
+                                  for i in range(d_layers)])
+
+        self.projection = nn.Linear(embedding_size, c_out)
+
+    def forward(self, dec):
+        for mic_layer in self.mic:
+            dec = mic_layer(dec)
+        return self.projection(dec)
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://openreview.net/pdf?id=zt53IDUR1U
+    """
+    def __init__(self, configs, conv_kernel=[12, 16]):
+        """
+        conv_kernel: downsampling and upsampling convolution kernel_size
+        """
+        super(Model, self).__init__()
+
+        decomp_kernel = []  # kernel of decomposition operation
+        isometric_kernel = []  # kernel of isometric convolution
+        for ii in conv_kernel:
+            if ii % 2 == 0:  # the kernel of decomposition operation must be odd
+                decomp_kernel.append(ii + 1)
+                isometric_kernel.append((configs.seq_len + configs.pred_len + ii) // ii)
+            else:
+                decomp_kernel.append(ii)
+                isometric_kernel.append((configs.seq_len + configs.pred_len + ii - 1) // ii)
+
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.seq_len = configs.seq_len
+
+        # Multiple Series decomposition block from FEDformer
+        self.decomp_multi = series_decomp_multi(decomp_kernel)
+
+        # embedding
+        self.dec_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        self.conv_trans = SeasonalPrediction(embedding_size=configs.d_model, n_heads=configs.n_heads,
+                                             dropout=configs.dropout,
+                                             d_layers=configs.d_layers, decomp_kernel=decomp_kernel,
+                                             c_out=configs.c_out, conv_kernel=conv_kernel,
+                                             isometric_kernel=isometric_kernel, device=torch.device('cuda:0'))
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            # refer to DLinear
+            self.regression = nn.Linear(configs.seq_len, configs.pred_len)
+            self.regression.weight = nn.Parameter(
+                (1 / configs.pred_len) * torch.ones([configs.pred_len, configs.seq_len]),
+                requires_grad=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.c_out * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Multi-scale Hybrid Decomposition
+        seasonal_init_enc, trend = self.decomp_multi(x_enc)
+        trend = self.regression(trend.permute(0, 2, 1)).permute(0, 2, 1)
+
+        # embedding
+        zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device)
+        seasonal_init_dec = torch.cat([seasonal_init_enc[:, -self.seq_len:, :], zeros], dim=1)
+        dec_out = self.dec_embedding(seasonal_init_dec, x_mark_dec)
+        dec_out = self.conv_trans(dec_out)
+        dec_out = dec_out[:, -self.pred_len:, :] + trend[:, -self.pred_len:, :]
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Multi-scale Hybrid Decomposition
+        seasonal_init_enc, trend = self.decomp_multi(x_enc)
+
+        # embedding
+        dec_out = self.dec_embedding(seasonal_init_enc, x_mark_dec)
+        dec_out = self.conv_trans(dec_out)
+        dec_out = dec_out + trend
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Multi-scale Hybrid Decomposition
+        seasonal_init_enc, trend = self.decomp_multi(x_enc)
+
+        # embedding
+        dec_out = self.dec_embedding(seasonal_init_enc, None)
+        dec_out = self.conv_trans(dec_out)
+        dec_out = dec_out + trend
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Multi-scale Hybrid Decomposition
+        seasonal_init_enc, trend = self.decomp_multi(x_enc)
+        # embedding
+        dec_out = self.dec_embedding(seasonal_init_enc, None)
+        dec_out = self.conv_trans(dec_out)
+        dec_out = dec_out + trend
+
+        # Output from Non-stationary Transformer
+        output = self.act(dec_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(
+                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Mamba.py b/models/Mamba.py
new file mode 100644
index 0000000..edece42
--- /dev/null
+++ b/models/Mamba.py
@@ -0,0 +1,50 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from mamba_ssm import Mamba
+
+from layers.Embed import DataEmbedding
+
+class Model(nn.Module):
+    
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+
+        self.d_inner = configs.d_model * configs.expand
+        self.dt_rank = math.ceil(configs.d_model / 16) # TODO implement "auto"
+        
+        self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout)
+
+        self.mamba = Mamba(
+            d_model = configs.d_model,
+            d_state = configs.d_ff,
+            d_conv = configs.d_conv,
+            expand = configs.expand,
+        )
+
+        self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False)
+
+    def forecast(self, x_enc, x_mark_enc):
+        mean_enc = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc = x_enc / std_enc
+
+        x = self.embedding(x_enc, x_mark_enc)
+        x = self.mamba(x)
+        x_out = self.out_layer(x)
+
+        x_out = x_out * std_enc + mean_enc
+        return x_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name in ['short_term_forecast', 'long_term_forecast']:
+            x_out = self.forecast(x_enc, x_mark_enc)
+            return x_out[:, -self.pred_len:, :]
+
+        # other tasks not implemented
\ No newline at end of file
diff --git a/models/MambaSimple.py b/models/MambaSimple.py
new file mode 100644
index 0000000..a81e664
--- /dev/null
+++ b/models/MambaSimple.py
@@ -0,0 +1,162 @@
+import math
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, repeat, einsum
+
+from layers.Embed import DataEmbedding
+
+
+class Model(nn.Module):
+    """
+    Mamba, linear-time sequence modeling with selective state spaces O(L)
+    Paper link: https://arxiv.org/abs/2312.00752
+    Implementation refernce: https://github.com/johnma2006/mamba-minimal/
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+
+        self.d_inner = configs.d_model * configs.expand
+        self.dt_rank = math.ceil(configs.d_model / 16)
+
+        self.embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, configs.dropout)
+
+        self.layers = nn.ModuleList([ResidualBlock(configs, self.d_inner, self.dt_rank) for _ in range(configs.e_layers)])
+        self.norm = RMSNorm(configs.d_model)
+
+        self.out_layer = nn.Linear(configs.d_model, configs.c_out, bias=False)
+
+    def forecast(self, x_enc, x_mark_enc):
+        mean_enc = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()
+        x_enc = x_enc / std_enc
+
+        x = self.embedding(x_enc, x_mark_enc)
+        for layer in self.layers:
+            x = layer(x)
+
+        x = self.norm(x)
+        x_out = self.out_layer(x)
+
+        x_out = x_out * std_enc + mean_enc
+        return x_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name in ['short_term_forecast', 'long_term_forecast']:
+            x_out = self.forecast(x_enc, x_mark_enc)
+            return x_out[:, -self.pred_len:, :]
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, configs, d_inner, dt_rank):
+        super(ResidualBlock, self).__init__()
+        
+        self.mixer = MambaBlock(configs, d_inner, dt_rank)
+        self.norm = RMSNorm(configs.d_model)
+
+    def forward(self, x):
+        output = self.mixer(self.norm(x)) + x
+        return output
+
+class MambaBlock(nn.Module):
+    def __init__(self, configs, d_inner, dt_rank):
+        super(MambaBlock, self).__init__()
+        self.d_inner = d_inner
+        self.dt_rank = dt_rank
+
+        self.in_proj = nn.Linear(configs.d_model, self.d_inner * 2, bias=False)
+        
+        self.conv1d = nn.Conv1d(
+            in_channels = self.d_inner,
+            out_channels = self.d_inner,
+            bias = True,
+            kernel_size = configs.d_conv,
+            padding = configs.d_conv - 1,
+            groups = self.d_inner
+        )
+
+        # takes in x and outputs the input-specific delta, B, C
+        self.x_proj = nn.Linear(self.d_inner, self.dt_rank + configs.d_ff * 2, bias=False)
+
+        # projects delta
+        self.dt_proj = nn.Linear(self.dt_rank, self.d_inner, bias=True)
+
+        A = repeat(torch.arange(1, configs.d_ff + 1), "n -> d n", d=self.d_inner).float()
+        self.A_log = nn.Parameter(torch.log(A))
+        self.D = nn.Parameter(torch.ones(self.d_inner))
+
+        self.out_proj = nn.Linear(self.d_inner, configs.d_model, bias=False)
+
+    def forward(self, x):
+        """
+        Figure 3 in Section 3.4 in the paper
+        """
+        (b, l, d) = x.shape
+
+        x_and_res = self.in_proj(x) # [B, L, 2 * d_inner]
+        (x, res) = x_and_res.split(split_size=[self.d_inner, self.d_inner], dim=-1)
+
+        x = rearrange(x, "b l d -> b d l")
+        x = self.conv1d(x)[:, :, :l]
+        x = rearrange(x, "b d l -> b l d")
+
+        x = F.silu(x)
+
+        y = self.ssm(x)
+        y = y * F.silu(res)
+
+        output = self.out_proj(y)
+        return output
+
+
+    def ssm(self, x):
+        """
+        Algorithm 2 in Section 3.2 in the paper
+        """
+        
+        (d_in, n) = self.A_log.shape
+
+        A = -torch.exp(self.A_log.float()) # [d_in, n]
+        D = self.D.float() # [d_in]
+
+        x_dbl = self.x_proj(x) # [B, L, d_rank + 2 * d_ff]
+        (delta, B, C) = x_dbl.split(split_size=[self.dt_rank, n, n], dim=-1) # delta: [B, L, d_rank]; B, C: [B, L, n]
+        delta = F.softplus(self.dt_proj(delta)) # [B, L, d_in]
+        y = self.selective_scan(x, delta, A, B, C, D)
+
+        return y
+
+    def selective_scan(self, u, delta, A, B, C, D):
+        (b, l, d_in) = u.shape
+        n = A.shape[1]
+
+        deltaA = torch.exp(einsum(delta, A, "b l d, d n -> b l d n")) # A is discretized using zero-order hold (ZOH) discretization
+        deltaB_u = einsum(delta, B, u, "b l d, b l n, b l d -> b l d n") # B is discretized using a simplified Euler discretization instead of ZOH. From a discussion with authors: "A is the more important term and the performance doesn't change much with the simplification on B"
+
+        # selective scan, sequential instead of parallel
+        x = torch.zeros((b, d_in, n), device=deltaA.device)
+        ys = []
+        for i in range(l):
+            x = deltaA[:, i] * x + deltaB_u[:, i]
+            y = einsum(x, C[:, i, :], "b d n, b n -> b d")
+            ys.append(y)
+
+        y = torch.stack(ys, dim=1) # [B, L, d_in]
+        y = y + u * D
+
+        return y
+
+class RMSNorm(nn.Module):
+    def __init__(self, d_model, eps=1e-5):
+        super(RMSNorm, self).__init__()
+        self.eps = eps
+        self.weight = nn.Parameter(torch.ones(d_model))
+
+    def forward(self, x):
+        output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) * self.weight
+        return output
diff --git a/models/MultiPatchFormer.py b/models/MultiPatchFormer.py
new file mode 100644
index 0000000..84acf88
--- /dev/null
+++ b/models/MultiPatchFormer.py
@@ -0,0 +1,365 @@
+import torch
+import torch.nn as nn
+import math
+from einops import rearrange
+
+from layers.SelfAttention_Family import AttentionLayer, FullAttention
+
+
+class FeedForward(nn.Module):
+    def __init__(self, d_model: int, d_hidden: int = 512):
+        super(FeedForward, self).__init__()
+
+        self.linear_1 = torch.nn.Linear(d_model, d_hidden)
+        self.linear_2 = torch.nn.Linear(d_hidden, d_model)
+        self.activation = torch.nn.GELU()
+
+    def forward(self, x):
+        x = self.linear_1(x)
+        x = self.activation(x)
+        x = self.linear_2(x)
+
+        return x
+
+
+class Encoder(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        mha: AttentionLayer,
+        d_hidden: int,
+        dropout: float = 0,
+        channel_wise=False,
+    ):
+        super(Encoder, self).__init__()
+
+        self.channel_wise = channel_wise
+        if self.channel_wise:
+            self.conv = torch.nn.Conv1d(
+                in_channels=d_model,
+                out_channels=d_model,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                padding_mode="reflect",
+            )
+        self.MHA = mha
+        self.feedforward = FeedForward(d_model=d_model, d_hidden=d_hidden)
+        self.dropout = torch.nn.Dropout(p=dropout)
+        self.layerNormal_1 = torch.nn.LayerNorm(d_model)
+        self.layerNormal_2 = torch.nn.LayerNorm(d_model)
+
+    def forward(self, x):
+        residual = x
+        q = residual
+        if self.channel_wise:
+            x_r = self.conv(x.permute(0, 2, 1)).transpose(1, 2)
+            k = x_r
+            v = x_r
+        else:
+            k = residual
+            v = residual
+        x, score = self.MHA(q, k, v, attn_mask=None)
+        x = self.dropout(x)
+        x = self.layerNormal_1(x + residual)
+
+        residual = x
+        x = self.feedforward(residual)
+        x = self.dropout(x)
+        x = self.layerNormal_2(x + residual)
+
+        return x, score
+
+
+class Model(nn.Module):
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.d_channel = configs.enc_in
+        self.N = configs.e_layers
+        # Embedding
+        self.d_model = configs.d_model
+        self.d_hidden = configs.d_ff
+        self.n_heads = configs.n_heads
+        self.mask = True
+        self.dropout = configs.dropout
+
+        self.stride1 = 8
+        self.patch_len1 = 8
+        self.stride2 = 8
+        self.patch_len2 = 16
+        self.stride3 = 7
+        self.patch_len3 = 24
+        self.stride4 = 6
+        self.patch_len4 = 32
+        self.patch_num1 = int((self.seq_len - self.patch_len2) // self.stride2) + 2
+        self.padding_patch_layer1 = nn.ReplicationPad1d((0, self.stride1))
+        self.padding_patch_layer2 = nn.ReplicationPad1d((0, self.stride2))
+        self.padding_patch_layer3 = nn.ReplicationPad1d((0, self.stride3))
+        self.padding_patch_layer4 = nn.ReplicationPad1d((0, self.stride4))
+
+        self.shared_MHA = nn.ModuleList(
+            [
+                AttentionLayer(
+                    FullAttention(mask_flag=self.mask),
+                    d_model=self.d_model,
+                    n_heads=self.n_heads,
+                )
+                for _ in range(self.N)
+            ]
+        )
+
+        self.shared_MHA_ch = nn.ModuleList(
+            [
+                AttentionLayer(
+                    FullAttention(mask_flag=self.mask),
+                    d_model=self.d_model,
+                    n_heads=self.n_heads,
+                )
+                for _ in range(self.N)
+            ]
+        )
+
+        self.encoder_list = nn.ModuleList(
+            [
+                Encoder(
+                    d_model=self.d_model,
+                    mha=self.shared_MHA[ll],
+                    d_hidden=self.d_hidden,
+                    dropout=self.dropout,
+                    channel_wise=False,
+                )
+                for ll in range(self.N)
+            ]
+        )
+
+        self.encoder_list_ch = nn.ModuleList(
+            [
+                Encoder(
+                    d_model=self.d_model,
+                    mha=self.shared_MHA_ch[0],
+                    d_hidden=self.d_hidden,
+                    dropout=self.dropout,
+                    channel_wise=True,
+                )
+                for ll in range(self.N)
+            ]
+        )
+
+        pe = torch.zeros(self.patch_num1, self.d_model)
+        for pos in range(self.patch_num1):
+            for i in range(0, self.d_model, 2):
+                wavelength = 10000 ** ((2 * i) / self.d_model)
+                pe[pos, i] = math.sin(pos / wavelength)
+                pe[pos, i + 1] = math.cos(pos / wavelength)
+        pe = pe.unsqueeze(0)  # add a batch dimention to your pe matrix
+        self.register_buffer("pe", pe)
+
+        self.embedding_channel = nn.Conv1d(
+            in_channels=self.d_model * self.patch_num1,
+            out_channels=self.d_model,
+            kernel_size=1,
+        )
+
+        self.embedding_patch_1 = torch.nn.Conv1d(
+            in_channels=1,
+            out_channels=self.d_model // 4,
+            kernel_size=self.patch_len1,
+            stride=self.stride1,
+        )
+        self.embedding_patch_2 = torch.nn.Conv1d(
+            in_channels=1,
+            out_channels=self.d_model // 4,
+            kernel_size=self.patch_len2,
+            stride=self.stride2,
+        )
+        self.embedding_patch_3 = torch.nn.Conv1d(
+            in_channels=1,
+            out_channels=self.d_model // 4,
+            kernel_size=self.patch_len3,
+            stride=self.stride3,
+        )
+        self.embedding_patch_4 = torch.nn.Conv1d(
+            in_channels=1,
+            out_channels=self.d_model // 4,
+            kernel_size=self.patch_len4,
+            stride=self.stride4,
+        )
+
+        self.out_linear_1 = torch.nn.Linear(self.d_model, self.pred_len // 8)
+        self.out_linear_2 = torch.nn.Linear(
+            self.d_model + self.pred_len // 8, self.pred_len // 8
+        )
+        self.out_linear_3 = torch.nn.Linear(
+            self.d_model + 2 * self.pred_len // 8, self.pred_len // 8
+        )
+        self.out_linear_4 = torch.nn.Linear(
+            self.d_model + 3 * self.pred_len // 8, self.pred_len // 8
+        )
+        self.out_linear_5 = torch.nn.Linear(
+            self.d_model + self.pred_len // 2, self.pred_len // 8
+        )
+        self.out_linear_6 = torch.nn.Linear(
+            self.d_model + 5 * self.pred_len // 8, self.pred_len // 8
+        )
+        self.out_linear_7 = torch.nn.Linear(
+            self.d_model + 6 * self.pred_len // 8, self.pred_len // 8
+        )
+        self.out_linear_8 = torch.nn.Linear(
+            self.d_model + 7 * self.pred_len // 8,
+            self.pred_len - 7 * (self.pred_len // 8),
+        )
+
+        self.remap = torch.nn.Linear(self.d_model, self.seq_len)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # Multi-scale embedding
+        x_i = x_enc.permute(0, 2, 1)
+
+        x_i_p1 = x_i
+        x_i_p2 = self.padding_patch_layer2(x_i)
+        x_i_p3 = self.padding_patch_layer3(x_i)
+        x_i_p4 = self.padding_patch_layer4(x_i)
+        encoding_patch1 = self.embedding_patch_1(
+            rearrange(x_i_p1, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
+        ).permute(0, 2, 1)
+        encoding_patch2 = self.embedding_patch_2(
+            rearrange(x_i_p2, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
+        ).permute(0, 2, 1)
+        encoding_patch3 = self.embedding_patch_3(
+            rearrange(x_i_p3, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
+        ).permute(0, 2, 1)
+        encoding_patch4 = self.embedding_patch_4(
+            rearrange(x_i_p4, "b c l -> (b c) l").unsqueeze(-1).permute(0, 2, 1)
+        ).permute(0, 2, 1)
+
+        encoding_patch = (
+            torch.cat(
+                (encoding_patch1, encoding_patch2, encoding_patch3, encoding_patch4),
+                dim=-1,
+            )
+            + self.pe
+        )
+        # Temporal encoding
+        for i in range(self.N):
+            encoding_patch = self.encoder_list[i](encoding_patch)[0]
+
+        # Channel-wise encoding
+        x_patch_c = rearrange(
+            encoding_patch, "(b c) p d -> b c (p d)", b=x_enc.shape[0], c=self.d_channel
+        )
+        x_ch = self.embedding_channel(x_patch_c.permute(0, 2, 1)).transpose(
+            1, 2
+        )  # [b c d]
+
+        encoding_1_ch = self.encoder_list_ch[0](x_ch)[0]
+
+        # Semi Auto-regressive
+        forecast_ch1 = self.out_linear_1(encoding_1_ch)
+        forecast_ch2 = self.out_linear_2(
+            torch.cat((encoding_1_ch, forecast_ch1), dim=-1)
+        )
+        forecast_ch3 = self.out_linear_3(
+            torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2), dim=-1)
+        )
+        forecast_ch4 = self.out_linear_4(
+            torch.cat((encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3), dim=-1)
+        )
+        forecast_ch5 = self.out_linear_5(
+            torch.cat(
+                (encoding_1_ch, forecast_ch1, forecast_ch2, forecast_ch3, forecast_ch4),
+                dim=-1,
+            )
+        )
+        forecast_ch6 = self.out_linear_6(
+            torch.cat(
+                (
+                    encoding_1_ch,
+                    forecast_ch1,
+                    forecast_ch2,
+                    forecast_ch3,
+                    forecast_ch4,
+                    forecast_ch5,
+                ),
+                dim=-1,
+            )
+        )
+        forecast_ch7 = self.out_linear_7(
+            torch.cat(
+                (
+                    encoding_1_ch,
+                    forecast_ch1,
+                    forecast_ch2,
+                    forecast_ch3,
+                    forecast_ch4,
+                    forecast_ch5,
+                    forecast_ch6,
+                ),
+                dim=-1,
+            )
+        )
+        forecast_ch8 = self.out_linear_8(
+            torch.cat(
+                (
+                    encoding_1_ch,
+                    forecast_ch1,
+                    forecast_ch2,
+                    forecast_ch3,
+                    forecast_ch4,
+                    forecast_ch5,
+                    forecast_ch6,
+                    forecast_ch7,
+                ),
+                dim=-1,
+            )
+        )
+
+        final_forecast = torch.cat(
+            (
+                forecast_ch1,
+                forecast_ch2,
+                forecast_ch3,
+                forecast_ch4,
+                forecast_ch5,
+                forecast_ch6,
+                forecast_ch7,
+                forecast_ch8,
+            ),
+            dim=-1,
+        ).permute(0, 2, 1)
+
+        # De-Normalization
+        dec_out = final_forecast * (
+            stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1)
+        )
+        dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if (
+            self.task_name == "long_term_forecast"
+            or self.task_name == "short_term_forecast"
+        ):
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len :, :]  # [B, L, D]
+        if self.task_name == "imputation":
+            raise NotImplementedError(
+                "Task imputation for WPMixer is temporarily not supported"
+            )
+        if self.task_name == "anomaly_detection":
+            raise NotImplementedError(
+                "Task anomaly_detection for WPMixer is temporarily not supported"
+            )
+        if self.task_name == "classification":
+            raise NotImplementedError(
+                "Task classification for WPMixer is temporarily not supported"
+            )
+        return None
diff --git a/models/Nonstationary_Transformer.py b/models/Nonstationary_Transformer.py
new file mode 100644
index 0000000..211e8b8
--- /dev/null
+++ b/models/Nonstationary_Transformer.py
@@ -0,0 +1,230 @@
+import torch
+import torch.nn as nn
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer
+from layers.SelfAttention_Family import DSAttention, AttentionLayer
+from layers.Embed import DataEmbedding
+import torch.nn.functional as F
+
+
+class Projector(nn.Module):
+    '''
+    MLP to learn the De-stationary factors
+    Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv
+    '''
+
+    def __init__(self, enc_in, seq_len, hidden_dims, hidden_layers, output_dim, kernel_size=3):
+        super(Projector, self).__init__()
+
+        padding = 1 if torch.__version__ >= '1.5.0' else 2
+        self.series_conv = nn.Conv1d(in_channels=seq_len, out_channels=1, kernel_size=kernel_size, padding=padding,
+                                     padding_mode='circular', bias=False)
+
+        layers = [nn.Linear(2 * enc_in, hidden_dims[0]), nn.ReLU()]
+        for i in range(hidden_layers - 1):
+            layers += [nn.Linear(hidden_dims[i], hidden_dims[i + 1]), nn.ReLU()]
+
+        layers += [nn.Linear(hidden_dims[-1], output_dim, bias=False)]
+        self.backbone = nn.Sequential(*layers)
+
+    def forward(self, x, stats):
+        # x:     B x S x E
+        # stats: B x 1 x E
+        # y:     B x O
+        batch_size = x.shape[0]
+        x = self.series_conv(x)  # B x 1 x E
+        x = torch.cat([x, stats], dim=1)  # B x 2 x E
+        x = x.view(batch_size, -1)  # B x 2E
+        y = self.backbone(x)  # B x O
+
+        return y
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://openreview.net/pdf?id=ucNDIDRNjjv
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+
+        # Embedding
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        DSAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                    output_attention=False), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                               configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AttentionLayer(
+                            DSAttention(True, configs.factor, attention_dropout=configs.dropout,
+                                        output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AttentionLayer(
+                            DSAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                        output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.d_ff,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=torch.nn.LayerNorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+        self.tau_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len, hidden_dims=configs.p_hidden_dims,
+                                     hidden_layers=configs.p_hidden_layers, output_dim=1)
+        self.delta_learner = Projector(enc_in=configs.enc_in, seq_len=configs.seq_len,
+                                       hidden_dims=configs.p_hidden_dims, hidden_layers=configs.p_hidden_layers,
+                                       output_dim=configs.seq_len)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        x_raw = x_enc.clone().detach()
+
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+        # B x S x E, B x 1 x E -> B x 1, positive scalar
+        tau = self.tau_learner(x_raw, std_enc)
+        threshold = 80.0
+        tau_clamped = torch.clamp(tau, max=threshold)  # avoid numerical overflow
+        tau = tau_clamped.exp()
+        # B x S x E, B x 1 x E -> B x S
+        delta = self.delta_learner(x_raw, mean_enc)
+
+        x_dec_new = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_dec[:, -self.pred_len:, :])],
+                              dim=1).to(x_enc.device).clone()
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
+
+        dec_out = self.dec_embedding(x_dec_new, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, tau=tau, delta=delta)
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        x_raw = x_enc.clone().detach()
+
+        # Normalization
+        mean_enc = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
+        mean_enc = mean_enc.unsqueeze(1).detach()
+        x_enc = x_enc - mean_enc
+        x_enc = x_enc.masked_fill(mask == 0, 0)
+        std_enc = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5)
+        std_enc = std_enc.unsqueeze(1).detach()
+        x_enc /= std_enc
+        # B x S x E, B x 1 x E -> B x 1, positive scalar
+        tau = self.tau_learner(x_raw, std_enc)
+        threshold = 80.0
+        tau_clamped = torch.clamp(tau, max=threshold)  # avoid numerical overflow
+        tau = tau_clamped.exp()
+        # B x S x E, B x 1 x E -> B x S
+        delta = self.delta_learner(x_raw, mean_enc)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
+
+        dec_out = self.projection(enc_out)
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        x_raw = x_enc.clone().detach()
+
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+        # B x S x E, B x 1 x E -> B x 1, positive scalar
+        tau = self.tau_learner(x_raw, std_enc)
+        threshold = 80.0
+        tau_clamped = torch.clamp(tau, max=threshold)  # avoid numerical overflow
+        tau = tau_clamped.exp()
+        # B x S x E, B x 1 x E -> B x S
+        delta = self.delta_learner(x_raw, mean_enc)
+        # embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
+
+        dec_out = self.projection(enc_out)
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        x_raw = x_enc.clone().detach()
+
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        std_enc = torch.sqrt(
+            torch.var(x_enc - mean_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        # B x S x E, B x 1 x E -> B x 1, positive scalar
+        tau = self.tau_learner(x_raw, std_enc)
+        threshold = 80.0
+        tau_clamped = torch.clamp(tau, max=threshold)  # avoid numerical overflow
+        tau = tau_clamped.exp()
+        # B x S x E, B x 1 x E -> B x S
+        delta = self.delta_learner(x_raw, mean_enc)
+        # embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None, tau=tau, delta=delta)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        # (batch_size, num_classes)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, L, D]
+        return None
diff --git a/models/PAttn.py b/models/PAttn.py
new file mode 100644
index 0000000..b6f4634
--- /dev/null
+++ b/models/PAttn.py
@@ -0,0 +1,62 @@
+import torch
+import torch.nn as nn
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from einops import rearrange
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2406.16964
+    """
+    def __init__(self, configs, patch_len=16, stride=8):
+        super().__init__()
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.patch_size = patch_len 
+        self.stride = stride
+        
+        self.d_model = configs.d_model
+       
+        self.patch_num = (configs.seq_len - self.patch_size) // self.stride + 2
+        self.padding_patch_layer = nn.ReplicationPad1d((0,  self.stride)) 
+        self.in_layer = nn.Linear(self.patch_size, self.d_model)
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(1)
+            ],
+            norm_layer=nn.LayerNorm(configs.d_model)
+        )
+        self.out_layer = nn.Linear(self.d_model * self.patch_num, configs.pred_len)
+            
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+        
+        B, _, C = x_enc.shape
+        x_enc = x_enc.permute(0, 2, 1)
+        x_enc = self.padding_patch_layer(x_enc)
+        x_enc = x_enc.unfold(dimension=-1, size=self.patch_size, step=self.stride)
+        enc_out = self.in_layer(x_enc)
+        enc_out =  rearrange(enc_out, 'b c m l -> (b c) m l')
+        dec_out, _ = self.encoder(enc_out)
+        dec_out =  rearrange(dec_out, '(b c) m l -> b c (m l)' , b=B , c=C)
+        dec_out = self.out_layer(dec_out)
+        dec_out = dec_out.permute(0, 2, 1)
+        
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
\ No newline at end of file
diff --git a/models/PatchTST.py b/models/PatchTST.py
new file mode 100644
index 0000000..085efd8
--- /dev/null
+++ b/models/PatchTST.py
@@ -0,0 +1,227 @@
+import torch
+from torch import nn
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import PatchEmbedding
+
+class Transpose(nn.Module):
+    def __init__(self, *dims, contiguous=False): 
+        super().__init__()
+        self.dims, self.contiguous = dims, contiguous
+    def forward(self, x):
+        if self.contiguous: return x.transpose(*self.dims).contiguous()
+        else: return x.transpose(*self.dims)
+
+
+class FlattenHead(nn.Module):
+    def __init__(self, n_vars, nf, target_window, head_dropout=0):
+        super().__init__()
+        self.n_vars = n_vars
+        self.flatten = nn.Flatten(start_dim=-2)
+        self.linear = nn.Linear(nf, target_window)
+        self.dropout = nn.Dropout(head_dropout)
+
+    def forward(self, x):  # x: [bs x nvars x d_model x patch_num]
+        x = self.flatten(x)
+        x = self.linear(x)
+        x = self.dropout(x)
+        return x
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/pdf/2211.14730.pdf
+    """
+
+    def __init__(self, configs, patch_len=16, stride=8):
+        """
+        patch_len: int, patch len for patch_embedding
+        stride: int, stride for patch_embedding
+        """
+        super().__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        padding = stride
+
+        # patching and embedding
+        self.patch_embedding = PatchEmbedding(
+            configs.d_model, patch_len, stride, padding, configs.dropout)
+
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=nn.Sequential(Transpose(1,2), nn.BatchNorm1d(configs.d_model), Transpose(1,2))
+        )
+
+        # Prediction Head
+        self.head_nf = configs.d_model * \
+                       int((configs.seq_len - patch_len) / stride + 2)
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
+                                    head_dropout=configs.dropout)
+        elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len,
+                                    head_dropout=configs.dropout)
+        elif self.task_name == 'classification':
+            self.flatten = nn.Flatten(start_dim=-2)
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                self.head_nf * configs.enc_in, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # do patching and embedding
+        x_enc = x_enc.permute(0, 2, 1)
+        # u: [bs * nvars x patch_num x d_model]
+        enc_out, n_vars = self.patch_embedding(x_enc)
+
+        # Encoder
+        # z: [bs * nvars x patch_num x d_model]
+        enc_out, attns = self.encoder(enc_out)
+        # z: [bs x nvars x patch_num x d_model]
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        # Decoder
+        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
+        dec_out = dec_out.permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
+        means = means.unsqueeze(1).detach()
+        x_enc = x_enc - means
+        x_enc = x_enc.masked_fill(mask == 0, 0)
+        stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
+                           torch.sum(mask == 1, dim=1) + 1e-5)
+        stdev = stdev.unsqueeze(1).detach()
+        x_enc /= stdev
+
+        # do patching and embedding
+        x_enc = x_enc.permute(0, 2, 1)
+        # u: [bs * nvars x patch_num x d_model]
+        enc_out, n_vars = self.patch_embedding(x_enc)
+
+        # Encoder
+        # z: [bs * nvars x patch_num x d_model]
+        enc_out, attns = self.encoder(enc_out)
+        # z: [bs x nvars x patch_num x d_model]
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        # Decoder
+        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
+        dec_out = dec_out.permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # do patching and embedding
+        x_enc = x_enc.permute(0, 2, 1)
+        # u: [bs * nvars x patch_num x d_model]
+        enc_out, n_vars = self.patch_embedding(x_enc)
+
+        # Encoder
+        # z: [bs * nvars x patch_num x d_model]
+        enc_out, attns = self.encoder(enc_out)
+        # z: [bs x nvars x patch_num x d_model]
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        # Decoder
+        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
+        dec_out = dec_out.permute(0, 2, 1)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # do patching and embedding
+        x_enc = x_enc.permute(0, 2, 1)
+        # u: [bs * nvars x patch_num x d_model]
+        enc_out, n_vars = self.patch_embedding(x_enc)
+
+        # Encoder
+        # z: [bs * nvars x patch_num x d_model]
+        enc_out, attns = self.encoder(enc_out)
+        # z: [bs x nvars x patch_num x d_model]
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        # Decoder
+        output = self.flatten(enc_out)
+        output = self.dropout(output)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(
+                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Pyraformer.py b/models/Pyraformer.py
new file mode 100644
index 0000000..d92693c
--- /dev/null
+++ b/models/Pyraformer.py
@@ -0,0 +1,101 @@
+import torch
+import torch.nn as nn
+from layers.Pyraformer_EncDec import Encoder
+
+
+class Model(nn.Module):
+    """ 
+    Pyraformer: Pyramidal attention to reduce complexity
+    Paper link: https://openreview.net/pdf?id=0EXmFzUn5I
+    """
+
+    def __init__(self, configs, window_size=[4,4], inner_size=5):
+        """
+        window_size: list, the downsample window size in pyramidal attention.
+        inner_size: int, the size of neighbour attention
+        """
+        super().__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.d_model = configs.d_model
+
+        if self.task_name == 'short_term_forecast':
+            window_size = [2,2]
+        self.encoder = Encoder(configs, window_size, inner_size)
+
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(
+                (len(window_size)+1)*self.d_model, self.pred_len * configs.enc_in)
+        elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(
+                (len(window_size)+1)*self.d_model, configs.enc_in, bias=True)
+        elif self.task_name == 'classification':
+            self.act = torch.nn.functional.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                (len(window_size)+1)*self.d_model * configs.seq_len, configs.num_class)
+
+    def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
+        dec_out = self.projection(enc_out).view(
+            enc_out.size(0), self.pred_len, -1)
+        return dec_out
+    
+    def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+
+        enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
+        dec_out = self.projection(enc_out).view(
+            enc_out.size(0), self.pred_len, -1)
+        
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        enc_out = self.encoder(x_enc, x_mark_enc)
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc, x_mark_enc):
+        enc_out = self.encoder(x_enc, x_mark_enc)
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.encoder(x_enc, x_mark_enc=None)
+
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'short_term_forecast':
+            dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(
+                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc, x_mark_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Reformer.py b/models/Reformer.py
new file mode 100644
index 0000000..b29336c
--- /dev/null
+++ b/models/Reformer.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import ReformerLayer
+from layers.Embed import DataEmbedding
+
+
+class Model(nn.Module):
+    """
+    Reformer with O(LlogL) complexity
+    Paper link: https://openreview.net/forum?id=rkgNKkHtvB
+    """
+
+    def __init__(self, configs, bucket_size=4, n_hashes=4):
+        """
+        bucket_size: int, 
+        n_hashes: int, 
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        self.seq_len = configs.seq_len
+
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    ReformerLayer(None, configs.d_model, configs.n_heads,
+                                  bucket_size=bucket_size, n_hashes=n_hashes),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class)
+        else:
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+
+    def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        dec_out = self.projection(enc_out)
+
+        return dec_out  # [B, L, D]
+    
+    def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization
+        mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
+        x_enc = x_enc - mean_enc
+        std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
+        x_enc = x_enc / std_enc
+
+        # add placeholder
+        x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
+        if x_mark_enc is not None:
+            x_mark_enc = torch.cat(
+                [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
+
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+        dec_out = self.projection(enc_out)
+
+        dec_out = dec_out * std_enc + mean_enc
+        return dec_out  # [B, L, D]
+
+    def imputation(self, x_enc, x_mark_enc):
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+
+        enc_out, attns = self.encoder(enc_out)
+        enc_out = self.projection(enc_out)
+
+        return enc_out  # [B, L, D]
+
+    def anomaly_detection(self, x_enc):
+        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+
+        enc_out, attns = self.encoder(enc_out)
+        enc_out = self.projection(enc_out)
+
+        return enc_out  # [B, L, D]
+
+    def classification(self, x_enc, x_mark_enc):
+        # enc
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out)
+
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast':
+            dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'short_term_forecast':
+            dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/SCINet.py b/models/SCINet.py
new file mode 100644
index 0000000..740d0f7
--- /dev/null
+++ b/models/SCINet.py
@@ -0,0 +1,188 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+
+class Splitting(nn.Module):
+    def __init__(self):
+        super(Splitting, self).__init__()
+
+    def even(self, x):
+        return x[:, ::2, :]
+
+    def odd(self, x):
+        return x[:, 1::2, :]
+
+    def forward(self, x):
+        # return the odd and even part
+        return self.even(x), self.odd(x)
+
+
+class CausalConvBlock(nn.Module):
+    def __init__(self, d_model, kernel_size=5, dropout=0.0):
+        super(CausalConvBlock, self).__init__()
+        module_list = [
+            nn.ReplicationPad1d((kernel_size - 1, kernel_size - 1)),
+
+            nn.Conv1d(d_model, d_model,
+                      kernel_size=kernel_size),
+            nn.LeakyReLU(negative_slope=0.01, inplace=True),
+
+            nn.Dropout(dropout),
+            nn.Conv1d(d_model, d_model,
+                      kernel_size=kernel_size),
+            nn.Tanh()
+        ]
+        self.causal_conv = nn.Sequential(*module_list)
+
+    def forward(self, x):
+        return self.causal_conv(x)  # return value is the same as input dimension
+
+
+class SCIBlock(nn.Module):
+    def __init__(self, d_model, kernel_size=5, dropout=0.0):
+        super(SCIBlock, self).__init__()
+        self.splitting = Splitting()
+        self.modules_even, self.modules_odd, self.interactor_even, self.interactor_odd = [CausalConvBlock(d_model) for _ in range(4)]
+
+    def forward(self, x):
+        x_even, x_odd = self.splitting(x)
+        x_even = x_even.permute(0, 2, 1)
+        x_odd = x_odd.permute(0, 2, 1)
+
+        x_even_temp = x_even.mul(torch.exp(self.modules_even(x_odd)))
+        x_odd_temp = x_odd.mul(torch.exp(self.modules_odd(x_even)))
+
+        x_even_update = x_even_temp + self.interactor_even(x_odd_temp)
+        x_odd_update = x_odd_temp - self.interactor_odd(x_even_temp)
+
+        return x_even_update.permute(0, 2, 1), x_odd_update.permute(0, 2, 1)
+
+
+class SCINet(nn.Module):
+    def __init__(self, d_model, current_level=3, kernel_size=5, dropout=0.0):
+        super(SCINet, self).__init__()
+        self.current_level = current_level
+        self.working_block = SCIBlock(d_model, kernel_size, dropout)
+
+        if current_level != 0:
+            self.SCINet_Tree_odd = SCINet(d_model, current_level-1, kernel_size, dropout)
+            self.SCINet_Tree_even = SCINet(d_model, current_level-1, kernel_size, dropout)
+
+    def forward(self, x):
+        odd_flag = False
+        if x.shape[1] % 2 == 1:
+            odd_flag = True
+            x = torch.cat((x, x[:, -1:, :]), dim=1)
+        x_even_update, x_odd_update = self.working_block(x)
+        if odd_flag:
+            x_odd_update = x_odd_update[:, :-1]
+
+        if self.current_level == 0:
+            return self.zip_up_the_pants(x_even_update, x_odd_update)
+        else:
+            return self.zip_up_the_pants(self.SCINet_Tree_even(x_even_update), self.SCINet_Tree_odd(x_odd_update))
+
+    def zip_up_the_pants(self, even, odd):
+        even = even.permute(1, 0, 2)
+        odd = odd.permute(1, 0, 2)
+        even_len = even.shape[0]
+        odd_len = odd.shape[0]
+        min_len = min(even_len, odd_len)
+
+        zipped_data = []
+        for i in range(min_len):
+            zipped_data.append(even[i].unsqueeze(0))
+            zipped_data.append(odd[i].unsqueeze(0))
+        if even_len > odd_len:
+            zipped_data.append(even[-1].unsqueeze(0))
+        return torch.cat(zipped_data,0).permute(1, 0, 2)
+
+
+class Model(nn.Module):
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+
+        # You can set the number of SCINet stacks by argument "d_layers", but should choose 1 or 2.
+        self.num_stacks = configs.d_layers
+        if self.num_stacks == 1:
+            self.sci_net_1 = SCINet(configs.enc_in, dropout=configs.dropout)
+            self.projection_1 = nn.Conv1d(self.seq_len, self.seq_len + self.pred_len, kernel_size=1, stride=1, bias=False)
+        else:
+            self.sci_net_1, self.sci_net_2 = [SCINet(configs.enc_in, dropout=configs.dropout) for _ in range(2)]
+            self.projection_1 = nn.Conv1d(self.seq_len, self.pred_len, kernel_size=1, stride=1, bias=False)
+            self.projection_2 = nn.Conv1d(self.seq_len+self.pred_len, self.seq_len+self.pred_len,
+                                                kernel_size = 1, bias = False)
+
+        # For positional encoding
+        self.pe_hidden_size = configs.enc_in
+        if self.pe_hidden_size % 2 == 1:
+            self.pe_hidden_size += 1
+
+        num_timescales = self.pe_hidden_size // 2
+        max_timescale = 10000.0
+        min_timescale = 1.0
+
+        log_timescale_increment = (
+                math.log(float(max_timescale) / float(min_timescale)) /
+                max(num_timescales - 1, 1))
+        inv_timescales = min_timescale * torch.exp(
+            torch.arange(num_timescales, dtype=torch.float32) *
+            -log_timescale_increment)
+        self.register_buffer('inv_timescales', inv_timescales)
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)  # [B,pred_len,C]
+            dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1)
+            return dec_out  # [B, T, D]
+        return None
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # position-encoding
+        pe = self.get_position_encoding(x_enc)
+        if pe.shape[2] > x_enc.shape[2]:
+            x_enc += pe[:, :, :-1]
+        else:
+            x_enc += self.get_position_encoding(x_enc)
+
+        # SCINet
+        dec_out = self.sci_net_1(x_enc)
+        dec_out += x_enc
+        dec_out = self.projection_1(dec_out)
+        if self.num_stacks != 1:
+            dec_out = torch.cat((x_enc, dec_out), dim=1)
+            temp = dec_out
+            dec_out = self.sci_net_2(dec_out)
+            dec_out += temp
+            dec_out = self.projection_2(dec_out)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1))
+        return dec_out
+
+    def get_position_encoding(self, x):
+        max_length = x.size()[1]
+        position = torch.arange(max_length, dtype=torch.float32,
+                                device=x.device)  # tensor([0., 1., 2., 3., 4.], device='cuda:0')
+        scaled_time = position.unsqueeze(1) * self.inv_timescales.unsqueeze(0)  # 5 256
+        signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)  # [T, C]
+        signal = F.pad(signal, (0, 0, 0, self.pe_hidden_size % 2))
+        signal = signal.view(1, max_length, self.pe_hidden_size)
+
+        return signal
\ No newline at end of file
diff --git a/models/SegRNN.py b/models/SegRNN.py
new file mode 100644
index 0000000..afff1bc
--- /dev/null
+++ b/models/SegRNN.py
@@ -0,0 +1,119 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Autoformer_EncDec import series_decomp
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2308.11200.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+
+        # get parameters
+        self.seq_len = configs.seq_len
+        self.enc_in = configs.enc_in
+        self.d_model = configs.d_model
+        self.dropout = configs.dropout
+
+        self.task_name = configs.task_name
+        if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+
+        self.seg_len = configs.seg_len
+        self.seg_num_x = self.seq_len // self.seg_len
+        self.seg_num_y = self.pred_len // self.seg_len
+
+        # building model
+        self.valueEmbedding = nn.Sequential(
+            nn.Linear(self.seg_len, self.d_model),
+            nn.ReLU()
+        )
+        self.rnn = nn.GRU(input_size=self.d_model, hidden_size=self.d_model, num_layers=1, bias=True,
+                              batch_first=True, bidirectional=False)
+        self.pos_emb = nn.Parameter(torch.randn(self.seg_num_y, self.d_model // 2))
+        self.channel_emb = nn.Parameter(torch.randn(self.enc_in, self.d_model // 2))
+
+        self.predict = nn.Sequential(
+            nn.Dropout(self.dropout),
+            nn.Linear(self.d_model, self.seg_len)
+        )
+
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.enc_in * configs.seq_len, configs.num_class)
+
+    def encoder(self, x):
+        # b:batch_size c:channel_size s:seq_len s:seq_len
+        # d:d_model w:seg_len n:seg_num_x m:seg_num_y
+        batch_size = x.size(0)
+
+        # normalization and permute     b,s,c -> b,c,s
+        seq_last = x[:, -1:, :].detach()
+        x = (x - seq_last).permute(0, 2, 1) # b,c,s
+
+        # segment and embedding    b,c,s -> bc,n,w -> bc,n,d
+        x = self.valueEmbedding(x.reshape(-1, self.seg_num_x, self.seg_len))
+
+        # encoding
+        _, hn = self.rnn(x) # bc,n,d  1,bc,d
+
+        # m,d//2 -> 1,m,d//2 -> c,m,d//2
+        # c,d//2 -> c,1,d//2 -> c,m,d//2
+        # c,m,d -> cm,1,d -> bcm, 1, d
+        pos_emb = torch.cat([
+            self.pos_emb.unsqueeze(0).repeat(self.enc_in, 1, 1),
+            self.channel_emb.unsqueeze(1).repeat(1, self.seg_num_y, 1)
+        ], dim=-1).view(-1, 1, self.d_model).repeat(batch_size,1,1)
+
+        _, hy = self.rnn(pos_emb, hn.repeat(1, 1, self.seg_num_y).view(1, -1, self.d_model)) # bcm,1,d  1,bcm,d
+
+        # 1,bcm,d -> 1,bcm,w -> b,c,s
+        y = self.predict(hy).view(-1, self.enc_in, self.pred_len)
+
+        # permute and denorm
+        y = y.permute(0, 2, 1) + seq_last
+        return y
+
+    def forecast(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def imputation(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def anomaly_detection(self, x_enc):
+        # Encoder
+        return self.encoder(x_enc)
+
+    def classification(self, x_enc):
+        # Encoder
+        enc_out = self.encoder(x_enc)
+        # Output
+        # (batch_size, seq_length * d_model)
+        output = enc_out.reshape(enc_out.shape[0], -1)
+        # (batch_size, num_classes)
+        output = self.projection(output)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/TSMixer.py b/models/TSMixer.py
new file mode 100644
index 0000000..76884d4
--- /dev/null
+++ b/models/TSMixer.py
@@ -0,0 +1,54 @@
+import torch.nn as nn
+
+
+class ResBlock(nn.Module):
+    def __init__(self, configs):
+        super(ResBlock, self).__init__()
+
+        self.temporal = nn.Sequential(
+            nn.Linear(configs.seq_len, configs.d_model),
+            nn.ReLU(),
+            nn.Linear(configs.d_model, configs.seq_len),
+            nn.Dropout(configs.dropout)
+        )
+
+        self.channel = nn.Sequential(
+            nn.Linear(configs.enc_in, configs.d_model),
+            nn.ReLU(),
+            nn.Linear(configs.d_model, configs.enc_in),
+            nn.Dropout(configs.dropout)
+        )
+
+    def forward(self, x):
+        # x: [B, L, D]
+        x = x + self.temporal(x.transpose(1, 2)).transpose(1, 2)
+        x = x + self.channel(x)
+
+        return x
+
+
+class Model(nn.Module):
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.layer = configs.e_layers
+        self.model = nn.ModuleList([ResBlock(configs)
+                                    for _ in range(configs.e_layers)])
+        self.pred_len = configs.pred_len
+        self.projection = nn.Linear(configs.seq_len, configs.pred_len)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+
+        # x: [B, L, D]
+        for i in range(self.layer):
+            x_enc = self.model[i](x_enc)
+        enc_out = self.projection(x_enc.transpose(1, 2)).transpose(1, 2)
+
+        return enc_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        else:
+            raise ValueError('Only forecast tasks implemented yet')
diff --git a/models/TemporalFusionTransformer.py b/models/TemporalFusionTransformer.py
new file mode 100644
index 0000000..564c835
--- /dev/null
+++ b/models/TemporalFusionTransformer.py
@@ -0,0 +1,309 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Embed import DataEmbedding, TemporalEmbedding
+from torch import Tensor
+from typing import Optional
+from collections import namedtuple
+
+# static: time-independent features
+# observed: time features of the past(e.g. predicted targets)
+# known: known information about the past and future(i.e. time stamp)
+TypePos = namedtuple('TypePos', ['static', 'observed'])
+
+# When you want to use new dataset, please add the index of 'static, observed' columns here.
+# 'known' columns needn't be added, because 'known' inputs are automatically judged and provided by the program.
+datatype_dict = {'ETTh1': TypePos([], [x for x in range(7)]),
+                 'ETTm1': TypePos([], [x for x in range(7)])}
+
+
+def get_known_len(embed_type, freq):
+    if embed_type != 'timeF':
+        if freq == 't':
+            return 5
+        else:
+            return 4
+    else:
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        return freq_map[freq]
+
+
+class TFTTemporalEmbedding(TemporalEmbedding):
+    def __init__(self, d_model, embed_type='fixed', freq='h'):
+        super(TFTTemporalEmbedding, self).__init__(d_model, embed_type, freq)
+
+    def forward(self, x):
+        x = x.long()
+        minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
+            self, 'minute_embed') else 0.
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+
+        embedding_x = torch.stack([month_x, day_x, weekday_x, hour_x, minute_x], dim=-2) if hasattr(
+            self, 'minute_embed') else torch.stack([month_x, day_x, weekday_x, hour_x], dim=-2)
+        return embedding_x
+
+
+class TFTTimeFeatureEmbedding(nn.Module):
+    def __init__(self, d_model, embed_type='timeF', freq='h'):
+        super(TFTTimeFeatureEmbedding, self).__init__()
+        d_inp = get_known_len(embed_type, freq)
+        self.embed = nn.ModuleList([nn.Linear(1, d_model, bias=False) for _ in range(d_inp)])
+
+    def forward(self, x):
+        return torch.stack([embed(x[:,:,i].unsqueeze(-1)) for i, embed in enumerate(self.embed)], dim=-2)
+
+
+class TFTEmbedding(nn.Module):
+    def __init__(self, configs):
+        super(TFTEmbedding, self).__init__()
+        self.pred_len = configs.pred_len
+        self.static_pos = datatype_dict[configs.data].static
+        self.observed_pos = datatype_dict[configs.data].observed
+        self.static_len = len(self.static_pos)
+        self.observed_len = len(self.observed_pos)
+
+        self.static_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.static_len)]) \
+            if self.static_len else None
+        self.observed_embedding = nn.ModuleList([DataEmbedding(1,configs.d_model,dropout=configs.dropout) for _ in range(self.observed_len)])
+        self.known_embedding = TFTTemporalEmbedding(configs.d_model, configs.embed, configs.freq) \
+            if configs.embed != 'timeF' else TFTTimeFeatureEmbedding(configs.d_model, configs.embed, configs.freq)
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.static_len:
+            # static_input: [B,C,d_model]
+            static_input = torch.stack([embed(x_enc[:,:1,self.static_pos[i]].unsqueeze(-1), None).squeeze(1) for i, embed in enumerate(self.static_embedding)], dim=-2)
+        else:
+            static_input = None
+
+        # observed_input: [B,T,C,d_model]
+        observed_input = torch.stack([embed(x_enc[:,:,self.observed_pos[i]].unsqueeze(-1), None) for i, embed in enumerate(self.observed_embedding)], dim=-2)
+
+        x_mark = torch.cat([x_mark_enc, x_mark_dec[:,-self.pred_len:,:]], dim=-2)
+        # known_input: [B,T,C,d_model]
+        known_input = self.known_embedding(x_mark)
+
+        return static_input, observed_input, known_input
+
+
+class GLU(nn.Module):
+    def __init__(self, input_size, output_size):
+        super().__init__()
+        self.fc1 = nn.Linear(input_size, output_size)
+        self.fc2 = nn.Linear(input_size, output_size)
+        self.glu = nn.GLU()
+
+    def forward(self, x):
+        a = self.fc1(x)
+        b = self.fc2(x)
+        return self.glu(torch.cat([a, b], dim=-1))
+
+
+class GateAddNorm(nn.Module):
+    def __init__(self, input_size, output_size):
+        super(GateAddNorm, self).__init__()
+        self.glu = GLU(input_size, input_size)
+        self.projection = nn.Linear(input_size, output_size) if input_size != output_size else nn.Identity()
+        self.layer_norm = nn.LayerNorm(output_size)
+
+    def forward(self, x, skip_a):
+        x = self.glu(x)
+        x = x + skip_a
+        return self.layer_norm(self.projection(x))
+
+
+class GRN(nn.Module):
+    def __init__(self, input_size, output_size, hidden_size=None, context_size=None, dropout=0.0):
+        super(GRN, self).__init__()
+        hidden_size = input_size if hidden_size is None else hidden_size
+        self.lin_a = nn.Linear(input_size, hidden_size)
+        self.lin_c = nn.Linear(context_size, hidden_size) if context_size is not None else None
+        self.lin_i = nn.Linear(hidden_size, hidden_size)
+        self.dropout = nn.Dropout(dropout)
+        self.project_a = nn.Linear(input_size, hidden_size) if hidden_size != input_size else nn.Identity()
+        self.gate = GateAddNorm(hidden_size, output_size)
+
+    def forward(self, a: Tensor, c: Optional[Tensor] = None):
+        # a: [B,T,d], c: [B,d]
+        x = self.lin_a(a)
+        if c is not None:
+            x = x + self.lin_c(c).unsqueeze(1)
+        x = F.elu(x)
+        x = self.lin_i(x)
+        x = self.dropout(x)
+        return self.gate(x, self.project_a(a))
+
+
+class VariableSelectionNetwork(nn.Module):
+    def __init__(self, d_model, variable_num, dropout=0.0):
+        super(VariableSelectionNetwork, self).__init__()
+        self.joint_grn = GRN(d_model * variable_num, variable_num, hidden_size=d_model, context_size=d_model, dropout=dropout)
+        self.variable_grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(variable_num)])
+
+    def forward(self, x: Tensor, context: Optional[Tensor] = None):
+        # x: [B,T,C,d] or [B,C,d]
+        # selection_weights: [B,T,C] or [B,C]
+        # x_processed: [B,T,d,C] or [B,d,C]
+        # selection_result: [B,T,d] or [B,d]
+        x_flattened = torch.flatten(x, start_dim=-2)
+        selection_weights = self.joint_grn(x_flattened, context)
+        selection_weights = F.softmax(selection_weights, dim=-1)
+
+        x_processed = torch.stack([grn(x[...,i,:]) for i, grn in enumerate(self.variable_grns)], dim=-1)
+
+        selection_result = torch.matmul(x_processed, selection_weights.unsqueeze(-1)).squeeze(-1)
+        return selection_result
+
+
+class StaticCovariateEncoder(nn.Module):
+    def __init__(self, d_model, static_len, dropout=0.0):
+        super(StaticCovariateEncoder, self).__init__()
+        self.static_vsn = VariableSelectionNetwork(d_model, static_len) if static_len else None
+        self.grns = nn.ModuleList([GRN(d_model, d_model, dropout=dropout) for _ in range(4)])
+
+    def forward(self, static_input):
+        # static_input: [B,C,d]
+        if static_input is not None:
+            static_features = self.static_vsn(static_input)
+            return [grn(static_features) for grn in self.grns]
+        else:
+            return [None] * 4
+
+
+class InterpretableMultiHeadAttention(nn.Module):
+    def __init__(self, configs):
+        super(InterpretableMultiHeadAttention, self).__init__()
+        self.n_heads = configs.n_heads
+        assert configs.d_model % configs.n_heads == 0
+        self.d_head = configs.d_model // configs.n_heads
+        self.qkv_linears = nn.Linear(configs.d_model, (2 * self.n_heads + 1) * self.d_head, bias=False)
+        self.out_projection = nn.Linear(self.d_head, configs.d_model, bias=False)
+        self.out_dropout = nn.Dropout(configs.dropout)
+        self.scale = self.d_head ** -0.5
+        example_len = configs.seq_len + configs.pred_len
+        self.register_buffer("mask", torch.triu(torch.full((example_len, example_len), float('-inf')), 1))
+
+    def forward(self, x):
+        # Q,K,V are all from x
+        B, T, d_model = x.shape
+        qkv = self.qkv_linears(x)
+        q, k, v = qkv.split((self.n_heads * self.d_head, self.n_heads * self.d_head, self.d_head), dim=-1)
+        q = q.view(B, T, self.n_heads, self.d_head)
+        k = k.view(B, T, self.n_heads, self.d_head)
+        v = v.view(B, T, self.d_head)
+
+        attention_score = torch.matmul(q.permute((0, 2, 1, 3)), k.permute((0, 2, 3, 1)))  # [B,n,T,T]
+        attention_score.mul_(self.scale)
+        attention_score = attention_score + self.mask
+        attention_prob = F.softmax(attention_score, dim=3)  # [B,n,T,T]
+
+        attention_out = torch.matmul(attention_prob, v.unsqueeze(1))  # [B,n,T,d]
+        attention_out = torch.mean(attention_out, dim=1)  # [B,T,d]
+        out = self.out_projection(attention_out)
+        out = self.out_dropout(out)  # [B,T,d]
+        return out
+
+
+class TemporalFusionDecoder(nn.Module):
+    def __init__(self, configs):
+        super(TemporalFusionDecoder, self).__init__()
+        self.pred_len = configs.pred_len
+
+        self.history_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True)
+        self.future_encoder = nn.LSTM(configs.d_model, configs.d_model, batch_first=True)
+        self.gate_after_lstm = GateAddNorm(configs.d_model, configs.d_model)
+        self.enrichment_grn = GRN(configs.d_model, configs.d_model, context_size=configs.d_model, dropout=configs.dropout)
+        self.attention = InterpretableMultiHeadAttention(configs)
+        self.gate_after_attention = GateAddNorm(configs.d_model, configs.d_model)
+        self.position_wise_grn = GRN(configs.d_model, configs.d_model, dropout=configs.dropout)
+        self.gate_final = GateAddNorm(configs.d_model, configs.d_model)
+        self.out_projection = nn.Linear(configs.d_model, configs.c_out)
+
+    def forward(self, history_input, future_input, c_c, c_h, c_e):
+        # history_input, future_input: [B,T,d]
+        # c_c, c_h, c_e: [B,d]
+        # LSTM
+        c = (c_c.unsqueeze(0), c_h.unsqueeze(0)) if c_c is not None and c_h is not None else None
+        historical_features, state = self.history_encoder(history_input, c)
+        future_features, _ = self.future_encoder(future_input, state)
+
+        # Skip connection
+        temporal_input = torch.cat([history_input, future_input], dim=1)
+        temporal_features = torch.cat([historical_features, future_features], dim=1)
+        temporal_features = self.gate_after_lstm(temporal_features, temporal_input)  # [B,T,d]
+
+        # Static enrichment
+        enriched_features = self.enrichment_grn(temporal_features, c_e)  # [B,T,d]
+
+        # Temporal self-attention
+        attention_out = self.attention(enriched_features)  # [B,T,d]
+        # Don't compute historical loss
+        attention_out = self.gate_after_attention(attention_out[:,-self.pred_len:], enriched_features[:,-self.pred_len:])
+
+        # Position-wise feed-forward
+        out = self.position_wise_grn(attention_out)  # [B,T,d]
+
+        # Final skip connection
+        out = self.gate_final(out, temporal_features[:,-self.pred_len:])
+        return self.out_projection(out)
+
+
+class Model(nn.Module):
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.configs = configs
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+
+        # Number of variables
+        self.static_len = len(datatype_dict[configs.data].static)
+        self.observed_len = len(datatype_dict[configs.data].observed)
+        self.known_len = get_known_len(configs.embed, configs.freq)
+
+        self.embedding = TFTEmbedding(configs)
+        self.static_encoder = StaticCovariateEncoder(configs.d_model, self.static_len)
+        self.history_vsn = VariableSelectionNetwork(configs.d_model, self.observed_len + self.known_len)
+        self.future_vsn = VariableSelectionNetwork(configs.d_model, self.known_len)
+        self.temporal_fusion_decoder = TemporalFusionDecoder(configs)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        # Data embedding
+        # static_input: [B,C,d], observed_input:[B,T,C,d], known_input: [B,T,C,d]
+        static_input, observed_input, known_input = self.embedding(x_enc, x_mark_enc, x_dec, x_mark_dec)
+
+        # Static context
+        # c_s,...,c_e: [B,d]
+        c_s, c_c, c_h, c_e = self.static_encoder(static_input)
+
+        # Temporal input Selection
+        history_input = torch.cat([observed_input, known_input[:,:self.seq_len]], dim=-2)
+        future_input = known_input[:,self.seq_len:]
+        history_input = self.history_vsn(history_input, c_s)
+        future_input = self.future_vsn(future_input, c_s)
+
+        # TFT main procedure after variable selection
+        # history_input: [B,T,d], future_input: [B,T,d]
+        dec_out = self.temporal_fusion_decoder(history_input, future_input, c_c, c_h, c_e)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)  # [B,pred_len,C]
+            dec_out = torch.cat([torch.zeros_like(x_enc), dec_out], dim=1)
+            return dec_out  # [B, T, D]
+        return None
\ No newline at end of file
diff --git a/models/TiDE.py b/models/TiDE.py
new file mode 100644
index 0000000..0fbb98e
--- /dev/null
+++ b/models/TiDE.py
@@ -0,0 +1,145 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class LayerNorm(nn.Module):
+    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+
+    def __init__(self, ndim, bias):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(ndim))
+        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
+
+    def forward(self, input):
+        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+
+
+
+class ResBlock(nn.Module):
+    def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1, bias=True): 
+        super().__init__()
+
+        self.fc1 = nn.Linear(input_dim, hidden_dim, bias=bias) 
+        self.fc2 = nn.Linear(hidden_dim, output_dim, bias=bias)
+        self.fc3 = nn.Linear(input_dim, output_dim, bias=bias)
+        self.dropout = nn.Dropout(dropout)
+        self.relu = nn.ReLU()
+        self.ln = LayerNorm(output_dim, bias=bias)
+        
+    def forward(self, x):
+
+        out = self.fc1(x)
+        out = self.relu(out)
+        out = self.fc2(out)
+        out = self.dropout(out)
+        out = out + self.fc3(x)
+        out = self.ln(out)
+        return out
+
+
+#TiDE
+class Model(nn.Module):  
+    """
+    paper: https://arxiv.org/pdf/2304.08424.pdf 
+    """
+    def __init__(self, configs, bias=True, feature_encode_dim=2): 
+        super(Model, self).__init__()
+        self.configs = configs
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len  #L 
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len  #H 
+        self.hidden_dim=configs.d_model
+        self.res_hidden=configs.d_model 
+        self.encoder_num=configs.e_layers
+        self.decoder_num=configs.d_layers
+        self.freq=configs.freq
+        self.feature_encode_dim=feature_encode_dim
+        self.decode_dim = configs.c_out
+        self.temporalDecoderHidden=configs.d_ff
+        dropout=configs.dropout
+
+        
+        freq_map = {'h': 4, 't': 5, 's': 6,
+                    'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
+        
+        self.feature_dim=freq_map[self.freq]
+
+
+        flatten_dim = self.seq_len + (self.seq_len + self.pred_len) * self.feature_encode_dim
+
+        self.feature_encoder = ResBlock(self.feature_dim, self.res_hidden, self.feature_encode_dim, dropout, bias)
+        self.encoders = nn.Sequential(ResBlock(flatten_dim, self.res_hidden, self.hidden_dim, dropout, bias),*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.encoder_num-1)))
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.pred_len, dropout, bias))
+            self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
+            self.residual_proj = nn.Linear(self.seq_len, self.pred_len, bias=bias)
+        if self.task_name == 'imputation':
+            self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
+            self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
+            self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
+        if self.task_name == 'anomaly_detection':
+            self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
+            self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
+            self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
+            
+        
+    def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark):
+        # Normalization
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+        
+        feature = self.feature_encoder(batch_y_mark)
+        hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
+        decoded = self.decoders(hidden).reshape(hidden.shape[0], self.pred_len, self.decode_dim)
+        dec_out = self.temporalDecoder(torch.cat([feature[:,self.seq_len:], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
+        
+        
+        # De-Normalization 
+        dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len))
+        dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len))
+        return dec_out
+    
+    def imputation(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask):
+        # Normalization
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        feature = self.feature_encoder(x_mark_enc)
+        hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
+        decoded = self.decoders(hidden).reshape(hidden.shape[0], self.seq_len, self.decode_dim)
+        dec_out = self.temporalDecoder(torch.cat([feature[:,:self.seq_len], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
+    
+        # De-Normalization 
+        dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.seq_len))
+        dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.seq_len))
+        return dec_out
+    
+    
+    def forward(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask=None):
+        '''x_mark_enc is the exogenous dynamic feature described in the original paper'''
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            if batch_y_mark is None:
+                batch_y_mark = torch.zeros((x_enc.shape[0], self.seq_len+self.pred_len, self.feature_dim)).to(x_enc.device).detach()
+            else:
+                batch_y_mark = torch.concat([x_mark_enc, batch_y_mark[:, -self.pred_len:, :]],dim=1)
+            dec_out = torch.stack([self.forecast(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark) for feature in range(x_enc.shape[-1])],dim=-1)
+            return dec_out # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = torch.stack([self.imputation(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark, mask) for feature in range(x_enc.shape[-1])],dim=-1)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            raise NotImplementedError("Task anomaly_detection for Tide is temporarily not supported")
+        if self.task_name == 'classification':
+            raise NotImplementedError("Task classification for Tide is temporarily not supported")
+        return None
+    
+    
+
+
+
diff --git a/models/TimeMixer.py b/models/TimeMixer.py
new file mode 100755
index 0000000..bd488ea
--- /dev/null
+++ b/models/TimeMixer.py
@@ -0,0 +1,516 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Autoformer_EncDec import series_decomp
+from layers.Embed import DataEmbedding_wo_pos
+from layers.StandardNorm import Normalize
+
+
+class DFT_series_decomp(nn.Module):
+    """
+    Series decomposition block
+    """
+
+    def __init__(self, top_k: int = 5):
+        super(DFT_series_decomp, self).__init__()
+        self.top_k = top_k
+
+    def forward(self, x):
+        xf = torch.fft.rfft(x)
+        freq = abs(xf)
+        freq[0] = 0
+        top_k_freq, top_list = torch.topk(freq, k=self.top_k)
+        xf[freq <= top_k_freq.min()] = 0
+        x_season = torch.fft.irfft(xf)
+        x_trend = x - x_season
+        return x_season, x_trend
+
+
+class MultiScaleSeasonMixing(nn.Module):
+    """
+    Bottom-up mixing season pattern
+    """
+
+    def __init__(self, configs):
+        super(MultiScaleSeasonMixing, self).__init__()
+
+        self.down_sampling_layers = torch.nn.ModuleList(
+            [
+                nn.Sequential(
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                        configs.seq_len // (configs.down_sampling_window ** (i + 1)),
+                    ),
+                    nn.GELU(),
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** (i + 1)),
+                        configs.seq_len // (configs.down_sampling_window ** (i + 1)),
+                    ),
+
+                )
+                for i in range(configs.down_sampling_layers)
+            ]
+        )
+
+    def forward(self, season_list):
+
+        # mixing high->low
+        out_high = season_list[0]
+        out_low = season_list[1]
+        out_season_list = [out_high.permute(0, 2, 1)]
+
+        for i in range(len(season_list) - 1):
+            out_low_res = self.down_sampling_layers[i](out_high)
+            out_low = out_low + out_low_res
+            out_high = out_low
+            if i + 2 <= len(season_list) - 1:
+                out_low = season_list[i + 2]
+            out_season_list.append(out_high.permute(0, 2, 1))
+
+        return out_season_list
+
+
+class MultiScaleTrendMixing(nn.Module):
+    """
+    Top-down mixing trend pattern
+    """
+
+    def __init__(self, configs):
+        super(MultiScaleTrendMixing, self).__init__()
+
+        self.up_sampling_layers = torch.nn.ModuleList(
+            [
+                nn.Sequential(
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** (i + 1)),
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                    ),
+                    nn.GELU(),
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                    ),
+                )
+                for i in reversed(range(configs.down_sampling_layers))
+            ])
+
+    def forward(self, trend_list):
+
+        # mixing low->high
+        trend_list_reverse = trend_list.copy()
+        trend_list_reverse.reverse()
+        out_low = trend_list_reverse[0]
+        out_high = trend_list_reverse[1]
+        out_trend_list = [out_low.permute(0, 2, 1)]
+
+        for i in range(len(trend_list_reverse) - 1):
+            out_high_res = self.up_sampling_layers[i](out_low)
+            out_high = out_high + out_high_res
+            out_low = out_high
+            if i + 2 <= len(trend_list_reverse) - 1:
+                out_high = trend_list_reverse[i + 2]
+            out_trend_list.append(out_low.permute(0, 2, 1))
+
+        out_trend_list.reverse()
+        return out_trend_list
+
+
+class PastDecomposableMixing(nn.Module):
+    def __init__(self, configs):
+        super(PastDecomposableMixing, self).__init__()
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.down_sampling_window = configs.down_sampling_window
+
+        self.layer_norm = nn.LayerNorm(configs.d_model)
+        self.dropout = nn.Dropout(configs.dropout)
+        self.channel_independence = configs.channel_independence
+
+        if configs.decomp_method == 'moving_avg':
+            self.decompsition = series_decomp(configs.moving_avg)
+        elif configs.decomp_method == "dft_decomp":
+            self.decompsition = DFT_series_decomp(configs.top_k)
+        else:
+            raise ValueError('decompsition is error')
+
+        if not configs.channel_independence:
+            self.cross_layer = nn.Sequential(
+                nn.Linear(in_features=configs.d_model, out_features=configs.d_ff),
+                nn.GELU(),
+                nn.Linear(in_features=configs.d_ff, out_features=configs.d_model),
+            )
+
+        # Mixing season
+        self.mixing_multi_scale_season = MultiScaleSeasonMixing(configs)
+
+        # Mxing trend
+        self.mixing_multi_scale_trend = MultiScaleTrendMixing(configs)
+
+        self.out_cross_layer = nn.Sequential(
+            nn.Linear(in_features=configs.d_model, out_features=configs.d_ff),
+            nn.GELU(),
+            nn.Linear(in_features=configs.d_ff, out_features=configs.d_model),
+        )
+
+    def forward(self, x_list):
+        length_list = []
+        for x in x_list:
+            _, T, _ = x.size()
+            length_list.append(T)
+
+        # Decompose to obtain the season and trend
+        season_list = []
+        trend_list = []
+        for x in x_list:
+            season, trend = self.decompsition(x)
+            if not self.channel_independence:
+                season = self.cross_layer(season)
+                trend = self.cross_layer(trend)
+            season_list.append(season.permute(0, 2, 1))
+            trend_list.append(trend.permute(0, 2, 1))
+
+        # bottom-up season mixing
+        out_season_list = self.mixing_multi_scale_season(season_list)
+        # top-down trend mixing
+        out_trend_list = self.mixing_multi_scale_trend(trend_list)
+
+        out_list = []
+        for ori, out_season, out_trend, length in zip(x_list, out_season_list, out_trend_list,
+                                                      length_list):
+            out = out_season + out_trend
+            if self.channel_independence:
+                out = ori + self.out_cross_layer(out)
+            out_list.append(out[:, :length, :])
+        return out_list
+
+
+class Model(nn.Module):
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.configs = configs
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+        self.down_sampling_window = configs.down_sampling_window
+        self.channel_independence = configs.channel_independence
+        self.pdm_blocks = nn.ModuleList([PastDecomposableMixing(configs)
+                                         for _ in range(configs.e_layers)])
+
+        self.preprocess = series_decomp(configs.moving_avg)
+        self.enc_in = configs.enc_in
+
+        if self.channel_independence:
+            self.enc_embedding = DataEmbedding_wo_pos(1, configs.d_model, configs.embed, configs.freq,
+                                                      configs.dropout)
+        else:
+            self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                                      configs.dropout)
+
+        self.layer = configs.e_layers
+
+        self.normalize_layers = torch.nn.ModuleList(
+            [
+                Normalize(self.configs.enc_in, affine=True, non_norm=True if configs.use_norm == 0 else False)
+                for i in range(configs.down_sampling_layers + 1)
+            ]
+        )
+
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.predict_layers = torch.nn.ModuleList(
+                [
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                        configs.pred_len,
+                    )
+                    for i in range(configs.down_sampling_layers + 1)
+                ]
+            )
+
+            if self.channel_independence:
+                self.projection_layer = nn.Linear(
+                    configs.d_model, 1, bias=True)
+            else:
+                self.projection_layer = nn.Linear(
+                    configs.d_model, configs.c_out, bias=True)
+
+                self.out_res_layers = torch.nn.ModuleList([
+                    torch.nn.Linear(
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                        configs.seq_len // (configs.down_sampling_window ** i),
+                    )
+                    for i in range(configs.down_sampling_layers + 1)
+                ])
+
+                self.regression_layers = torch.nn.ModuleList(
+                    [
+                        torch.nn.Linear(
+                            configs.seq_len // (configs.down_sampling_window ** i),
+                            configs.pred_len,
+                        )
+                        for i in range(configs.down_sampling_layers + 1)
+                    ]
+                )
+
+        if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            if self.channel_independence:
+                self.projection_layer = nn.Linear(
+                    configs.d_model, 1, bias=True)
+            else:
+                self.projection_layer = nn.Linear(
+                    configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class)
+
+    def out_projection(self, dec_out, i, out_res):
+        dec_out = self.projection_layer(dec_out)
+        out_res = out_res.permute(0, 2, 1)
+        out_res = self.out_res_layers[i](out_res)
+        out_res = self.regression_layers[i](out_res).permute(0, 2, 1)
+        dec_out = dec_out + out_res
+        return dec_out
+
+    def pre_enc(self, x_list):
+        if self.channel_independence:
+            return (x_list, None)
+        else:
+            out1_list = []
+            out2_list = []
+            for x in x_list:
+                x_1, x_2 = self.preprocess(x)
+                out1_list.append(x_1)
+                out2_list.append(x_2)
+            return (out1_list, out2_list)
+
+    def __multi_scale_process_inputs(self, x_enc, x_mark_enc):
+        if self.configs.down_sampling_method == 'max':
+            down_pool = torch.nn.MaxPool1d(self.configs.down_sampling_window, return_indices=False)
+        elif self.configs.down_sampling_method == 'avg':
+            down_pool = torch.nn.AvgPool1d(self.configs.down_sampling_window)
+        elif self.configs.down_sampling_method == 'conv':
+            padding = 1 if torch.__version__ >= '1.5.0' else 2
+            down_pool = nn.Conv1d(in_channels=self.configs.enc_in, out_channels=self.configs.enc_in,
+                                  kernel_size=3, padding=padding,
+                                  stride=self.configs.down_sampling_window,
+                                  padding_mode='circular',
+                                  bias=False)
+        else:
+            return x_enc, x_mark_enc
+        # B,T,C -> B,C,T
+        x_enc = x_enc.permute(0, 2, 1)
+
+        x_enc_ori = x_enc
+        x_mark_enc_mark_ori = x_mark_enc
+
+        x_enc_sampling_list = []
+        x_mark_sampling_list = []
+        x_enc_sampling_list.append(x_enc.permute(0, 2, 1))
+        x_mark_sampling_list.append(x_mark_enc)
+
+        for i in range(self.configs.down_sampling_layers):
+            x_enc_sampling = down_pool(x_enc_ori)
+
+            x_enc_sampling_list.append(x_enc_sampling.permute(0, 2, 1))
+            x_enc_ori = x_enc_sampling
+
+            if x_mark_enc is not None:
+                x_mark_sampling_list.append(x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :])
+                x_mark_enc_mark_ori = x_mark_enc_mark_ori[:, ::self.configs.down_sampling_window, :]
+
+        x_enc = x_enc_sampling_list
+        x_mark_enc = x_mark_sampling_list if x_mark_enc is not None else None
+
+        return x_enc, x_mark_enc
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+
+        x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc)
+
+        x_list = []
+        x_mark_list = []
+        if x_mark_enc is not None:
+            for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc):
+                B, T, N = x.size()
+                x = self.normalize_layers[i](x, 'norm')
+                if self.channel_independence:
+                    x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
+                    x_list.append(x)
+                    x_mark = x_mark.repeat(N, 1, 1)
+                    x_mark_list.append(x_mark)
+                else:
+                    x_list.append(x)
+                    x_mark_list.append(x_mark)
+        else:
+            for i, x in zip(range(len(x_enc)), x_enc, ):
+                B, T, N = x.size()
+                x = self.normalize_layers[i](x, 'norm')
+                if self.channel_independence:
+                    x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
+                x_list.append(x)
+
+        # embedding
+        enc_out_list = []
+        x_list = self.pre_enc(x_list)
+        if x_mark_enc is not None:
+            for i, x, x_mark in zip(range(len(x_list[0])), x_list[0], x_mark_list):
+                enc_out = self.enc_embedding(x, x_mark)  # [B,T,C]
+                enc_out_list.append(enc_out)
+        else:
+            for i, x in zip(range(len(x_list[0])), x_list[0]):
+                enc_out = self.enc_embedding(x, None)  # [B,T,C]
+                enc_out_list.append(enc_out)
+
+        # Past Decomposable Mixing as encoder for past
+        for i in range(self.layer):
+            enc_out_list = self.pdm_blocks[i](enc_out_list)
+
+        # Future Multipredictor Mixing as decoder for future
+        dec_out_list = self.future_multi_mixing(B, enc_out_list, x_list)
+
+        dec_out = torch.stack(dec_out_list, dim=-1).sum(-1)
+        dec_out = self.normalize_layers[0](dec_out, 'denorm')
+        return dec_out
+
+    def future_multi_mixing(self, B, enc_out_list, x_list):
+        dec_out_list = []
+        if self.channel_independence:
+            x_list = x_list[0]
+            for i, enc_out in zip(range(len(x_list)), enc_out_list):
+                dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute(
+                    0, 2, 1)  # align temporal dimension
+                dec_out = self.projection_layer(dec_out)
+                dec_out = dec_out.reshape(B, self.configs.c_out, self.pred_len).permute(0, 2, 1).contiguous()
+                dec_out_list.append(dec_out)
+
+        else:
+            for i, enc_out, out_res in zip(range(len(x_list[0])), enc_out_list, x_list[1]):
+                dec_out = self.predict_layers[i](enc_out.permute(0, 2, 1)).permute(
+                    0, 2, 1)  # align temporal dimension
+                dec_out = self.out_projection(dec_out, i, out_res)
+                dec_out_list.append(dec_out)
+
+        return dec_out_list
+
+    def classification(self, x_enc, x_mark_enc):
+        x_enc, _ = self.__multi_scale_process_inputs(x_enc, None)
+        x_list = x_enc
+
+        # embedding
+        enc_out_list = []
+        for x in x_list:
+            enc_out = self.enc_embedding(x, None)  # [B,T,C]
+            enc_out_list.append(enc_out)
+
+        # MultiScale-CrissCrossAttention  as encoder for past
+        for i in range(self.layer):
+            enc_out_list = self.pdm_blocks[i](enc_out_list)
+
+        enc_out = enc_out_list[0]
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def anomaly_detection(self, x_enc):
+        B, T, N = x_enc.size()
+        x_enc, _ = self.__multi_scale_process_inputs(x_enc, None)
+
+        x_list = []
+
+        for i, x in zip(range(len(x_enc)), x_enc, ):
+            B, T, N = x.size()
+            x = self.normalize_layers[i](x, 'norm')
+            if self.channel_independence:
+                x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
+            x_list.append(x)
+
+        # embedding
+        enc_out_list = []
+        for x in x_list:
+            enc_out = self.enc_embedding(x, None)  # [B,T,C]
+            enc_out_list.append(enc_out)
+
+        # MultiScale-CrissCrossAttention  as encoder for past
+        for i in range(self.layer):
+            enc_out_list = self.pdm_blocks[i](enc_out_list)
+
+        dec_out = self.projection_layer(enc_out_list[0])
+        dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous()
+
+        dec_out = self.normalize_layers[0](dec_out, 'denorm')
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, mask):
+        means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
+        means = means.unsqueeze(1).detach()
+        x_enc = x_enc - means
+        x_enc = x_enc.masked_fill(mask == 0, 0)
+        stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
+                           torch.sum(mask == 1, dim=1) + 1e-5)
+        stdev = stdev.unsqueeze(1).detach()
+        x_enc /= stdev
+
+        B, T, N = x_enc.size()
+        x_enc, x_mark_enc = self.__multi_scale_process_inputs(x_enc, x_mark_enc)
+
+        x_list = []
+        x_mark_list = []
+        if x_mark_enc is not None:
+            for i, x, x_mark in zip(range(len(x_enc)), x_enc, x_mark_enc):
+                B, T, N = x.size()
+                if self.channel_independence:
+                    x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
+                x_list.append(x)
+                x_mark = x_mark.repeat(N, 1, 1)
+                x_mark_list.append(x_mark)
+        else:
+            for i, x in zip(range(len(x_enc)), x_enc, ):
+                B, T, N = x.size()
+                if self.channel_independence:
+                    x = x.permute(0, 2, 1).contiguous().reshape(B * N, T, 1)
+                x_list.append(x)
+
+        # embedding
+        enc_out_list = []
+        for x in x_list:
+            enc_out = self.enc_embedding(x, None)  # [B,T,C]
+            enc_out_list.append(enc_out)
+
+        # MultiScale-CrissCrossAttention  as encoder for past
+        for i in range(self.layer):
+            enc_out_list = self.pdm_blocks[i](enc_out_list)
+
+        dec_out = self.projection_layer(enc_out_list[0])
+        dec_out = dec_out.reshape(B, self.configs.c_out, -1).permute(0, 2, 1).contiguous()
+
+        dec_out = dec_out * \
+                  (stdev[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        dec_out = dec_out + \
+                  (means[:, 0, :].unsqueeze(1).repeat(1, self.seq_len, 1))
+        return dec_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        else:
+            raise ValueError('Other tasks implemented yet')
diff --git a/models/TimeXer.py b/models/TimeXer.py
new file mode 100644
index 0000000..c8026b7
--- /dev/null
+++ b/models/TimeXer.py
@@ -0,0 +1,225 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted, PositionalEmbedding
+import numpy as np
+
+
+class FlattenHead(nn.Module):
+    def __init__(self, n_vars, nf, target_window, head_dropout=0):
+        super().__init__()
+        self.n_vars = n_vars
+        self.flatten = nn.Flatten(start_dim=-2)
+        self.linear = nn.Linear(nf, target_window)
+        self.dropout = nn.Dropout(head_dropout)
+
+    def forward(self, x):  # x: [bs x nvars x d_model x patch_num]
+        x = self.flatten(x)
+        x = self.linear(x)
+        x = self.dropout(x)
+        return x
+
+
+class EnEmbedding(nn.Module):
+    def __init__(self, n_vars, d_model, patch_len, dropout):
+        super(EnEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+
+        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
+        self.glb_token = nn.Parameter(torch.randn(1, n_vars, 1, d_model))
+        self.position_embedding = PositionalEmbedding(d_model)
+
+        self.dropout = nn.Dropout(dropout)
+
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        glb = self.glb_token.repeat((x.shape[0], 1, 1, 1))
+
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.patch_len)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        x = torch.reshape(x, (-1, n_vars, x.shape[-2], x.shape[-1]))
+        x = torch.cat([x, glb], dim=2)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        return self.dropout(x), n_vars
+
+
+class Encoder(nn.Module):
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Encoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        for layer in self.layers:
+            x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
+
+        if self.norm is not None:
+            x = self.norm(x)
+
+        if self.projection is not None:
+            x = self.projection(x)
+        return x
+
+
+class EncoderLayer(nn.Module):
+    def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
+                 dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        B, L, D = cross.shape
+        x = x + self.dropout(self.self_attention(
+            x, x, x,
+            attn_mask=x_mask,
+            tau=tau, delta=None
+        )[0])
+        x = self.norm1(x)
+
+        x_glb_ori = x[:, -1, :].unsqueeze(1)
+        x_glb = torch.reshape(x_glb_ori, (B, -1, D))
+        x_glb_attn = self.dropout(self.cross_attention(
+            x_glb, cross, cross,
+            attn_mask=cross_mask,
+            tau=tau, delta=delta
+        )[0])
+        x_glb_attn = torch.reshape(x_glb_attn,
+                                   (x_glb_attn.shape[0] * x_glb_attn.shape[1], x_glb_attn.shape[2])).unsqueeze(1)
+        x_glb = x_glb_ori + x_glb_attn
+        x_glb = self.norm2(x_glb)
+
+        y = x = torch.cat([x[:, :-1, :], x_glb], dim=1)
+
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+
+        return self.norm3(x + y)
+
+
+class Model(nn.Module):
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.features = configs.features
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.use_norm = configs.use_norm
+        self.patch_len = configs.patch_len
+        self.patch_num = int(configs.seq_len // configs.patch_len)
+        self.n_vars = 1 if configs.features == 'MS' else configs.enc_in
+        # Embedding
+        self.en_embedding = EnEmbedding(self.n_vars, configs.d_model, self.patch_len, configs.dropout)
+
+        self.ex_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
+                                                   configs.dropout)
+
+        # Encoder-only architecture
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False),
+                        configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation,
+                )
+                for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        self.head_nf = configs.d_model * (self.patch_num + 1)
+        self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
+                                head_dropout=configs.dropout)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.use_norm:
+            # Normalization from Non-stationary Transformer
+            means = x_enc.mean(1, keepdim=True).detach()
+            x_enc = x_enc - means
+            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+            x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        en_embed, n_vars = self.en_embedding(x_enc[:, :, -1].unsqueeze(-1).permute(0, 2, 1))
+        ex_embed = self.ex_embedding(x_enc[:, :, :-1], x_mark_enc)
+
+        enc_out = self.encoder(en_embed, ex_embed)
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
+        dec_out = dec_out.permute(0, 2, 1)
+
+        if self.use_norm:
+            # De-Normalization from Non-stationary Transformer
+            dec_out = dec_out * (stdev[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))
+            dec_out = dec_out + (means[:, 0, -1:].unsqueeze(1).repeat(1, self.pred_len, 1))
+
+        return dec_out
+
+
+    def forecast_multi(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        if self.use_norm:
+            # Normalization from Non-stationary Transformer
+            means = x_enc.mean(1, keepdim=True).detach()
+            x_enc = x_enc - means
+            stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+            x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        en_embed, n_vars = self.en_embedding(x_enc.permute(0, 2, 1))
+        ex_embed = self.ex_embedding(x_enc, x_mark_enc)
+
+        enc_out = self.encoder(en_embed, ex_embed)
+        enc_out = torch.reshape(
+            enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
+        # z: [bs x nvars x d_model x patch_num]
+        enc_out = enc_out.permute(0, 1, 3, 2)
+
+        dec_out = self.head(enc_out)  # z: [bs x nvars x target_window]
+        dec_out = dec_out.permute(0, 2, 1)
+
+        if self.use_norm:
+            # De-Normalization from Non-stationary Transformer
+            dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+            dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+
+        return dec_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            if self.features == 'M':
+                dec_out = self.forecast_multi(x_enc, x_mark_enc, x_dec, x_mark_dec)
+                return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+            else:
+                dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+                return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        else:
+            return None
\ No newline at end of file
diff --git a/models/TimesNet.py b/models/TimesNet.py
new file mode 100644
index 0000000..8977428
--- /dev/null
+++ b/models/TimesNet.py
@@ -0,0 +1,215 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.fft
+from layers.Embed import DataEmbedding
+from layers.Conv_Blocks import Inception_Block_V1
+
+
+def FFT_for_Period(x, k=2):
+    # [B, T, C]
+    xf = torch.fft.rfft(x, dim=1)
+    # find period by amplitudes
+    frequency_list = abs(xf).mean(0).mean(-1)
+    frequency_list[0] = 0
+    _, top_list = torch.topk(frequency_list, k)
+    top_list = top_list.detach().cpu().numpy()
+    period = x.shape[1] // top_list
+    return period, abs(xf).mean(-1)[:, top_list]
+
+
+class TimesBlock(nn.Module):
+    def __init__(self, configs):
+        super(TimesBlock, self).__init__()
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.k = configs.top_k
+        # parameter-efficient design
+        self.conv = nn.Sequential(
+            Inception_Block_V1(configs.d_model, configs.d_ff,
+                               num_kernels=configs.num_kernels),
+            nn.GELU(),
+            Inception_Block_V1(configs.d_ff, configs.d_model,
+                               num_kernels=configs.num_kernels)
+        )
+
+    def forward(self, x):
+        B, T, N = x.size()
+        period_list, period_weight = FFT_for_Period(x, self.k)
+
+        res = []
+        for i in range(self.k):
+            period = period_list[i]
+            # padding
+            if (self.seq_len + self.pred_len) % period != 0:
+                length = (
+                                 ((self.seq_len + self.pred_len) // period) + 1) * period
+                padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)
+                out = torch.cat([x, padding], dim=1)
+            else:
+                length = (self.seq_len + self.pred_len)
+                out = x
+            # reshape
+            out = out.reshape(B, length // period, period,
+                              N).permute(0, 3, 1, 2).contiguous()
+            # 2D conv: from 1d Variation to 2d Variation
+            out = self.conv(out)
+            # reshape back
+            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
+            res.append(out[:, :(self.seq_len + self.pred_len), :])
+        res = torch.stack(res, dim=-1)
+        # adaptive aggregation
+        period_weight = F.softmax(period_weight, dim=1)
+        period_weight = period_weight.unsqueeze(
+            1).unsqueeze(1).repeat(1, T, N, 1)
+        res = torch.sum(res * period_weight, -1)
+        # residual connection
+        res = res + x
+        return res
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://openreview.net/pdf?id=ju_Uqw384Oq
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.configs = configs
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = configs.label_len
+        self.pred_len = configs.pred_len
+        self.model = nn.ModuleList([TimesBlock(configs)
+                                    for _ in range(configs.e_layers)])
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        self.layer = configs.e_layers
+        self.layer_norm = nn.LayerNorm(configs.d_model)
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.predict_linear = nn.Linear(
+                self.seq_len, self.pred_len + self.seq_len)
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(
+                configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc.sub(means)
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc = x_enc.div(stdev)
+
+        # embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(
+            0, 2, 1)  # align temporal dimension
+        # TimesNet
+        for i in range(self.layer):
+            enc_out = self.layer_norm(self.model[i](enc_out))
+        # project back
+        dec_out = self.projection(enc_out)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out.mul(
+                  (stdev[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        dec_out = dec_out.add(
+                  (means[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
+        means = means.unsqueeze(1).detach()
+        x_enc = x_enc.sub(means)
+        x_enc = x_enc.masked_fill(mask == 0, 0)
+        stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /
+                           torch.sum(mask == 1, dim=1) + 1e-5)
+        stdev = stdev.unsqueeze(1).detach()
+        x_enc = x_enc.div(stdev)
+
+        # embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
+        # TimesNet
+        for i in range(self.layer):
+            enc_out = self.layer_norm(self.model[i](enc_out))
+        # project back
+        dec_out = self.projection(enc_out)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out.mul(
+                  (stdev[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        dec_out = dec_out.add(
+                  (means[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc.sub(means)
+        stdev = torch.sqrt(
+            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc = x_enc.div(stdev)
+
+        # embedding
+        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+        # TimesNet
+        for i in range(self.layer):
+            enc_out = self.layer_norm(self.model[i](enc_out))
+        # project back
+        dec_out = self.projection(enc_out)
+
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out.mul(
+                  (stdev[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        dec_out = dec_out.add(
+                  (means[:, 0, :].unsqueeze(1).repeat(
+                      1, self.pred_len + self.seq_len, 1)))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # embedding
+        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
+        # TimesNet
+        for i in range(self.layer):
+            enc_out = self.layer_norm(self.model[i](enc_out))
+
+        # Output
+        # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        # zero-out padding embeddings
+        output = output * x_mark_enc.unsqueeze(-1)
+        # (batch_size, seq_length * d_model)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(
+                x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/Transformer.py b/models/Transformer.py
new file mode 100644
index 0000000..3ecae52
--- /dev/null
+++ b/models/Transformer.py
@@ -0,0 +1,124 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import DataEmbedding
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Vanilla Transformer
+    with O(L^2) complexity
+    Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.pred_len = configs.pred_len
+        # Embedding
+        self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
+                                           configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
+                                               configs.dropout)
+            self.decoder = Decoder(
+                [
+                    DecoderLayer(
+                        AttentionLayer(
+                            FullAttention(True, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        AttentionLayer(
+                            FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                          output_attention=False),
+                            configs.d_model, configs.n_heads),
+                        configs.d_model,
+                        configs.d_ff,
+                        dropout=configs.dropout,
+                        activation=configs.activation,
+                    )
+                    for l in range(configs.d_layers)
+                ],
+                norm_layer=torch.nn.LayerNorm(configs.d_model),
+                projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
+            )
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.dec_embedding(x_dec, x_mark_dec)
+        dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out)
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
+        output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/WPMixer.py b/models/WPMixer.py
new file mode 100644
index 0000000..9271b2b
--- /dev/null
+++ b/models/WPMixer.py
@@ -0,0 +1,319 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jan  5 16:10:01 2025
+@author: Murad
+SISLab, USF
+mmurad@usf.edu
+https://github.com/Secure-and-Intelligent-Systems-Lab/WPMixer
+"""
+
+import torch.nn as nn
+import torch
+from layers.DWT_Decomposition import Decomposition
+
+
+class TokenMixer(nn.Module):
+    def __init__(self, input_seq=[], batch_size=[], channel=[], pred_seq=[], dropout=[], factor=[], d_model=[]):
+        super(TokenMixer, self).__init__()
+        self.input_seq = input_seq
+        self.batch_size = batch_size
+        self.channel = channel
+        self.pred_seq = pred_seq
+        self.dropout = dropout
+        self.factor = factor
+        self.d_model = d_model
+
+        self.dropoutLayer = nn.Dropout(self.dropout)
+        self.layers = nn.Sequential(nn.Linear(self.input_seq, self.pred_seq * self.factor),
+                                    nn.GELU(),
+                                    nn.Dropout(self.dropout),
+                                    nn.Linear(self.pred_seq * self.factor, self.pred_seq)
+                                    )
+
+    def forward(self, x):
+        x = x.transpose(1, 2)
+        x = self.layers(x)
+        x = x.transpose(1, 2)
+        return x
+
+
+class Mixer(nn.Module):
+    def __init__(self,
+                 input_seq=[],
+                 out_seq=[],
+                 batch_size=[],
+                 channel=[],
+                 d_model=[],
+                 dropout=[],
+                 tfactor=[],
+                 dfactor=[]):
+        super(Mixer, self).__init__()
+        self.input_seq = input_seq
+        self.pred_seq = out_seq
+        self.batch_size = batch_size
+        self.channel = channel
+        self.d_model = d_model
+        self.dropout = dropout
+        self.tfactor = tfactor  # expansion factor for patch mixer
+        self.dfactor = dfactor  # expansion factor for embedding mixer
+
+        self.tMixer = TokenMixer(input_seq=self.input_seq, batch_size=self.batch_size, channel=self.channel,
+                                 pred_seq=self.pred_seq, dropout=self.dropout, factor=self.tfactor,
+                                 d_model=self.d_model)
+        self.dropoutLayer = nn.Dropout(self.dropout)
+        self.norm1 = nn.BatchNorm2d(self.channel)
+        self.norm2 = nn.BatchNorm2d(self.channel)
+
+        self.embeddingMixer = nn.Sequential(nn.Linear(self.d_model, self.d_model * self.dfactor),
+                                            nn.GELU(),
+                                            nn.Dropout(self.dropout),
+                                            nn.Linear(self.d_model * self.dfactor, self.d_model))
+
+    def forward(self, x):
+        '''
+        Parameters
+        ----------
+        x : input: [Batch, Channel, Patch_number, d_model]
+
+        Returns
+        -------
+        x: output: [Batch, Channel, Patch_number, d_model]
+
+        '''
+        x = self.norm1(x)
+        x = x.permute(0, 3, 1, 2)
+        x = self.dropoutLayer(self.tMixer(x))
+        x = x.permute(0, 2, 3, 1)
+        x = self.norm2(x)
+        x = x + self.dropoutLayer(self.embeddingMixer(x))
+        return x
+
+
+class ResolutionBranch(nn.Module):
+    def __init__(self,
+                 input_seq=[],
+                 pred_seq=[],
+                 batch_size=[],
+                 channel=[],
+                 d_model=[],
+                 dropout=[],
+                 embedding_dropout=[],
+                 tfactor=[],
+                 dfactor=[],
+                 patch_len=[],
+                 patch_stride=[]):
+        super(ResolutionBranch, self).__init__()
+        self.input_seq = input_seq
+        self.pred_seq = pred_seq
+        self.batch_size = batch_size
+        self.channel = channel
+        self.d_model = d_model
+        self.dropout = dropout
+        self.embedding_dropout = embedding_dropout
+        self.tfactor = tfactor
+        self.dfactor = dfactor
+        self.patch_len = patch_len
+        self.patch_stride = patch_stride
+        self.patch_num = int((self.input_seq - self.patch_len) / self.patch_stride + 2)
+
+        self.patch_norm = nn.BatchNorm2d(self.channel)
+        self.patch_embedding_layer = nn.Linear(self.patch_len, self.d_model)  # shared among all channels
+        self.mixer1 = Mixer(input_seq=self.patch_num,
+                            out_seq=self.patch_num,
+                            batch_size=self.batch_size,
+                            channel=self.channel,
+                            d_model=self.d_model,
+                            dropout=self.dropout,
+                            tfactor=self.tfactor,
+                            dfactor=self.dfactor)
+        self.mixer2 = Mixer(input_seq=self.patch_num,
+                            out_seq=self.patch_num,
+                            batch_size=self.batch_size,
+                            channel=self.channel,
+                            d_model=self.d_model,
+                            dropout=self.dropout,
+                            tfactor=self.tfactor,
+                            dfactor=self.dfactor)
+        self.norm = nn.BatchNorm2d(self.channel)
+        self.dropoutLayer = nn.Dropout(self.embedding_dropout)
+        self.head = nn.Sequential(nn.Flatten(start_dim=-2, end_dim=-1),
+                                  nn.Linear(self.patch_num * self.d_model, self.pred_seq))
+
+    def forward(self, x):
+        '''
+        Parameters
+        ----------
+        x : input coefficient series: [Batch, channel, length_of_coefficient_series]
+
+        Returns
+        -------
+        out : predicted coefficient series: [Batch, channel, length_of_pred_coeff_series]
+        '''
+
+        x_patch = self.do_patching(x)
+        x_patch = self.patch_norm(x_patch)
+        x_emb = self.dropoutLayer(self.patch_embedding_layer(x_patch))
+
+        out = self.mixer1(x_emb)
+        res = out
+        out = res + self.mixer2(out)
+        out = self.norm(out)
+
+        out = self.head(out)
+        return out
+
+    def do_patching(self, x):
+        x_end = x[:, :, -1:]
+        x_padding = x_end.repeat(1, 1, self.patch_stride)
+        x_new = torch.cat((x, x_padding), dim=-1)
+        x_patch = x_new.unfold(dimension=-1, size=self.patch_len, step=self.patch_stride)
+        return x_patch
+
+
+class WPMixerCore(nn.Module):
+    def __init__(self,
+                 input_length=[],
+                 pred_length=[],
+                 wavelet_name=[],
+                 level=[],
+                 batch_size=[],
+                 channel=[],
+                 d_model=[],
+                 dropout=[],
+                 embedding_dropout=[],
+                 tfactor=[],
+                 dfactor=[],
+                 device=[],
+                 patch_len=[],
+                 patch_stride=[],
+                 no_decomposition=[],
+                 use_amp=[]):
+        super(WPMixerCore, self).__init__()
+        self.input_length = input_length
+        self.pred_length = pred_length
+        self.wavelet_name = wavelet_name
+        self.level = level
+        self.batch_size = batch_size
+        self.channel = channel
+        self.d_model = d_model
+        self.dropout = dropout
+        self.embedding_dropout = embedding_dropout
+        self.device = device
+        self.no_decomposition = no_decomposition
+        self.tfactor = tfactor
+        self.dfactor = dfactor
+        self.use_amp = use_amp
+
+        self.Decomposition_model = Decomposition(input_length=self.input_length,
+                                                 pred_length=self.pred_length,
+                                                 wavelet_name=self.wavelet_name,
+                                                 level=self.level,
+                                                 batch_size=self.batch_size,
+                                                 channel=self.channel,
+                                                 d_model=self.d_model,
+                                                 tfactor=self.tfactor,
+                                                 dfactor=self.dfactor,
+                                                 device=self.device,
+                                                 no_decomposition=self.no_decomposition,
+                                                 use_amp=self.use_amp)
+
+        self.input_w_dim = self.Decomposition_model.input_w_dim  # list of the length of the input coefficient series
+        self.pred_w_dim = self.Decomposition_model.pred_w_dim  # list of the length of the predicted coefficient series
+
+        self.patch_len = patch_len
+        self.patch_stride = patch_stride
+
+        # (m+1) number of resolutionBranch
+        self.resolutionBranch = nn.ModuleList([ResolutionBranch(input_seq=self.input_w_dim[i],
+                                                                pred_seq=self.pred_w_dim[i],
+                                                                batch_size=self.batch_size,
+                                                                channel=self.channel,
+                                                                d_model=self.d_model,
+                                                                dropout=self.dropout,
+                                                                embedding_dropout=self.embedding_dropout,
+                                                                tfactor=self.tfactor,
+                                                                dfactor=self.dfactor,
+                                                                patch_len=self.patch_len,
+                                                                patch_stride=self.patch_stride) for i in
+                                               range(len(self.input_w_dim))])
+
+    def forward(self, xL):
+        '''
+        Parameters
+        ----------
+        xL : Look back window: [Batch, look_back_length, channel]
+
+        Returns
+        -------
+        xT : Prediction time series: [Batch, prediction_length, output_channel]
+        '''
+        x = xL.transpose(1, 2)  # [batch, channel, look_back_length]
+
+        # xA: approximation coefficient series,
+        # xD: detail coefficient series
+        # yA: predicted approximation coefficient series
+        # yD: predicted detail coefficient series
+
+        xA, xD = self.Decomposition_model.transform(x)
+
+        yA = self.resolutionBranch[0](xA)
+        yD = []
+        for i in range(len(xD)):
+            yD_i = self.resolutionBranch[i + 1](xD[i])
+            yD.append(yD_i)
+
+        y = self.Decomposition_model.inv_transform(yA, yD)
+        y = y.transpose(1, 2)
+        xT = y[:, -self.pred_length:, :]  # decomposition output is always even, but pred length can be odd
+
+        return xT
+
+
+class Model(nn.Module):
+    def __init__(self, args, tfactor=5, dfactor=5, wavelet='db2', level=1, stride=8, no_decomposition=False):
+        super(Model, self).__init__()
+        self.args = args
+        self.task_name = args.task_name
+        self.wpmixerCore = WPMixerCore(input_length=self.args.seq_len,
+                                       pred_length=self.args.pred_len,
+                                       wavelet_name=wavelet,
+                                       level=level,
+                                       batch_size=self.args.batch_size,
+                                       channel=self.args.c_out,
+                                       d_model=self.args.d_model,
+                                       dropout=self.args.dropout,
+                                       embedding_dropout=self.args.dropout,
+                                       tfactor=tfactor,
+                                       dfactor=dfactor,
+                                       device=self.args.device,
+                                       patch_len=self.args.patch_len,
+                                       patch_stride=stride,
+                                       no_decomposition=no_decomposition,
+                                       use_amp=self.args.use_amp)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark):
+        # Normalization
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        pred = self.wpmixerCore(x_enc)
+        pred = pred[:, :, -self.args.c_out:]
+
+        # De-Normalization
+        dec_out = pred * (stdev[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1))
+        dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.args.pred_len, 1))
+        return dec_out
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'imputation':
+            raise NotImplementedError("Task imputation for WPMixer is temporarily not supported")
+        if self.task_name == 'anomaly_detection':
+            raise NotImplementedError("Task anomaly_detection for WPMixer is temporarily not supported")
+        if self.task_name == 'classification':
+            raise NotImplementedError("Task classification for WPMixer is temporarily not supported")
+        return None
diff --git a/models/__init__.py b/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/models/iTransformer.py b/models/iTransformer.py
new file mode 100644
index 0000000..4833a69
--- /dev/null
+++ b/models/iTransformer.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Transformer_EncDec import Encoder, EncoderLayer
+from layers.SelfAttention_Family import FullAttention, AttentionLayer
+from layers.Embed import DataEmbedding_inverted
+import numpy as np
+
+
+class Model(nn.Module):
+    """
+    Paper link: https://arxiv.org/abs/2310.06625
+    """
+
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        # Embedding
+        self.enc_embedding = DataEmbedding_inverted(configs.seq_len, configs.d_model, configs.embed, configs.freq,
+                                                    configs.dropout)
+        # Encoder
+        self.encoder = Encoder(
+            [
+                EncoderLayer(
+                    AttentionLayer(
+                        FullAttention(False, configs.factor, attention_dropout=configs.dropout,
+                                      output_attention=False), configs.d_model, configs.n_heads),
+                    configs.d_model,
+                    configs.d_ff,
+                    dropout=configs.dropout,
+                    activation=configs.activation
+                ) for l in range(configs.e_layers)
+            ],
+            norm_layer=torch.nn.LayerNorm(configs.d_model)
+        )
+        # Decoder
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
+        if self.task_name == 'imputation':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'anomaly_detection':
+            self.projection = nn.Linear(configs.d_model, configs.seq_len, bias=True)
+        if self.task_name == 'classification':
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class)
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, _, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
+        return dec_out
+
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def anomaly_detection(self, x_enc):
+        # Normalization from Non-stationary Transformer
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc /= stdev
+
+        _, L, N = x_enc.shape
+
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
+        # De-Normalization from Non-stationary Transformer
+        dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
+        return dec_out
+
+    def classification(self, x_enc, x_mark_enc):
+        # Embedding
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out, attns = self.encoder(enc_out, attn_mask=None)
+
+        # Output
+        output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
+        output = self.dropout(output)
+        output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
+        output = self.projection(output)  # (batch_size, num_classes)
+        return output
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        if self.task_name == 'imputation':
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'anomaly_detection':
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out  # [B, L, D]
+        if self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        return None
diff --git a/models/xPatch_SparseChannel.py b/models/xPatch_SparseChannel.py
new file mode 100644
index 0000000..ea3862f
--- /dev/null
+++ b/models/xPatch_SparseChannel.py
@@ -0,0 +1,166 @@
+"""
+xPatch_SparseChannel model adapted for Time-Series-Library-main
+Supports both long-term forecasting and classification tasks
+"""
+import torch
+import torch.nn as nn
+from layers.DECOMP import DECOMP
+from layers.SeasonPatch import SeasonPatch
+from layers.RevIN import RevIN
+
+class Model(nn.Module):
+    """
+    xPatch SparseChannel Model
+    """
+    
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        
+        # Model configuration
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.enc_in = configs.enc_in
+        
+        # Model parameters
+        self.patch_len = getattr(configs, 'patch_len', 16)
+        self.stride = getattr(configs, 'stride', 8)
+        
+        # Normalization
+        self.revin = getattr(configs, 'revin', True)
+        if self.revin:
+            self.revin_layer = RevIN(self.enc_in, affine=True, subtract_last=False)
+
+        # Decomposition using original DECOMP with EMA/DEMA
+        ma_type = getattr(configs, 'ma_type', 'ema')
+        alpha = getattr(configs, 'alpha', torch.tensor(0.1))
+        beta = getattr(configs, 'beta', torch.tensor(0.1))
+        self.decomp = DECOMP(ma_type, alpha, beta)
+        
+        # Season network (PatchTST + Graph Mixer)
+        self.season_net = SeasonPatch(
+            c_in=self.enc_in,
+            seq_len=self.seq_len,
+            pred_len=self.pred_len,
+            patch_len=self.patch_len,
+            stride=self.stride,
+            k_graph=getattr(configs, 'k_graph', 8),
+            d_model=getattr(configs, 'd_model', 128),
+            n_layers=getattr(configs, 'e_layers', 3),
+            n_heads=getattr(configs, 'n_heads', 16)
+        )
+        
+        # Trend network (MLP)
+        self.fc5 = nn.Linear(self.seq_len, self.pred_len * 4)
+        self.avgpool1 = nn.AvgPool1d(kernel_size=2)
+        self.ln1 = nn.LayerNorm(self.pred_len * 2)
+        self.fc6 = nn.Linear(self.pred_len * 2, self.pred_len)
+        self.avgpool2 = nn.AvgPool1d(kernel_size=2)
+        self.ln2 = nn.LayerNorm(self.pred_len // 2)
+        self.fc7 = nn.Linear(self.pred_len // 2, self.pred_len)
+        
+        # Task-specific heads
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            self.fc_final = nn.Linear(self.pred_len * 2, self.pred_len)
+        elif self.task_name == 'classification':
+            self.season_attention = nn.Sequential(
+                nn.Linear(self.pred_len, 64),
+                nn.Tanh(),
+                nn.Linear(64, 1)
+            )
+            self.trend_attention = nn.Sequential(
+                nn.Linear(self.pred_len, 64), 
+                nn.Tanh(),
+                nn.Linear(64, 1)
+            )
+            self.classifier = nn.Sequential(
+                nn.Linear(self.enc_in * 2, 128),
+                nn.ReLU(),
+                nn.Dropout(getattr(configs, 'dropout', 0.1)),
+                nn.Linear(128, configs.num_class)
+            )
+
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        """Long-term forecasting"""
+        # Normalization
+        if self.revin:
+            x_enc = self.revin_layer(x_enc, 'norm')
+        
+        # Decomposition
+        seasonal_init, trend_init = self.decomp(x_enc)
+        
+        # Season stream
+        y_season = self.season_net(seasonal_init)  # [B, C, pred_len]
+        
+        # Trend stream
+        B, L, C = trend_init.shape
+        trend = trend_init.permute(0, 2, 1).reshape(B * C, L)  # [B*C, L]
+        trend = self.fc5(trend)
+        trend = self.avgpool1(trend)
+        trend = self.ln1(trend)
+        trend = self.fc6(trend)
+        trend = self.avgpool2(trend)
+        trend = self.ln2(trend)
+        trend = self.fc7(trend)  # [B*C, pred_len]
+        y_trend = trend.view(B, C, -1)  # [B, C, pred_len]
+        
+        # Combine streams
+        y = torch.cat([y_season, y_trend], dim=-1)  # [B, C, 2*pred_len]
+        y = self.fc_final(y)  # [B, C, pred_len]
+        y = y.permute(0, 2, 1)  # [B, pred_len, C]
+        
+        # Denormalization
+        if self.revin:
+            y = self.revin_layer(y, 'denorm')
+        
+        return y
+
+    def classification(self, x_enc, x_mark_enc):
+        """Classification task"""
+        # Normalization
+        #if self.revin:
+        #    x_enc = self.revin_layer(x_enc, 'norm')
+        
+        # Decomposition
+        seasonal_init, trend_init = self.decomp(x_enc)
+        
+        # Season stream
+        y_season = self.season_net(seasonal_init)  # [B, C, pred_len]
+        
+        # print("shape:", trend_init.shape)
+        # Trend stream
+        B, L, C = trend_init.shape
+        trend = trend_init.permute(0, 2, 1).reshape(B * C, L)  # [B*C, L]
+        trend = self.fc5(trend)
+        trend = self.avgpool1(trend)
+        trend = self.ln1(trend)
+        trend = self.fc6(trend)
+        trend = self.avgpool2(trend)
+        trend = self.ln2(trend)
+        trend = self.fc7(trend)  # [B*C, pred_len]
+        y_trend = trend.view(B, C, -1)  # [B, C, pred_len]
+        
+        # Attention-based pooling for classification
+        season_attn_weights = torch.softmax(y_season, dim=-1)
+        season_pooled = (y_season * season_attn_weights).sum(dim=-1)  # [B, C]
+        
+        trend_attn_weights = torch.softmax(y_trend, dim=-1)    # 时间维
+        trend_pooled = (y_trend * trend_attn_weights).sum(dim=-1)     # [B, C]
+        
+        # Combine features
+        features = torch.cat([season_pooled, trend_pooled], dim=-1)  # [B, 2*C]
+        
+        # Classification
+        logits = self.classifier(features)  # [B, num_classes]
+        return logits
+
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        """Forward pass dispatching to task-specific methods"""
+        if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len:, :]  # [B, L, D]
+        elif self.task_name == 'classification':
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out  # [B, N]
+        else:
+            raise ValueError(f'Task {self.task_name} not supported by xPatch_SparseChannel')
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b4ae9ec
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,14 @@
+einops
+local-attention
+matplotlib
+numpy
+pandas
+patool
+reformer-pytorch
+scikit-learn
+scipy
+sktime
+sympy
+torch
+tqdm
+PyWavelets
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..d3d4ca1
--- /dev/null
+++ b/run.py
@@ -0,0 +1,261 @@
+import argparse
+import os
+import torch
+import torch.backends
+from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
+from exp.exp_imputation import Exp_Imputation
+from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
+from exp.exp_anomaly_detection import Exp_Anomaly_Detection
+from exp.exp_classification import Exp_Classification
+from utils.print_args import print_args
+import random
+import numpy as np
+
+if __name__ == '__main__':
+    fix_seed = 2021
+    random.seed(fix_seed)
+    torch.manual_seed(fix_seed)
+    np.random.seed(fix_seed)
+
+    parser = argparse.ArgumentParser(description='TimesNet')
+
+    # basic config
+    parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
+                        help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
+    parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
+    parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
+    parser.add_argument('--model', type=str, required=True, default='Autoformer',
+                        help='model name, options: [Autoformer, Transformer, TimesNet]')
+
+    # data loader
+    parser.add_argument('--data', type=str, required=True, default='ETTh1', help='dataset type')
+    parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
+    parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
+    parser.add_argument('--features', type=str, default='M',
+                        help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
+    parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
+    parser.add_argument('--freq', type=str, default='h',
+                        help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
+    parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
+
+    # forecasting task
+    parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
+    parser.add_argument('--label_len', type=int, default=48, help='start token length')
+    parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
+    parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
+    parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
+
+    # inputation task
+    parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
+
+    # anomaly detection task
+    parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%%)')
+
+    # model define
+    parser.add_argument('--expand', type=int, default=2, help='expansion factor for Mamba')
+    parser.add_argument('--d_conv', type=int, default=4, help='conv kernel size for Mamba')
+    parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
+    parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
+    parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
+    parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
+    parser.add_argument('--c_out', type=int, default=7, help='output size')
+    parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
+    parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
+    parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
+    parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
+    parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
+    parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
+    parser.add_argument('--factor', type=int, default=1, help='attn factor')
+    parser.add_argument('--distil', action='store_false',
+                        help='whether to use distilling in encoder, using this argument means not using distilling',
+                        default=True)
+    parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
+    parser.add_argument('--embed', type=str, default='timeF',
+                        help='time features encoding, options:[timeF, fixed, learned]')
+    parser.add_argument('--activation', type=str, default='gelu', help='activation')
+    parser.add_argument('--channel_independence', type=int, default=1,
+                        help='0: channel dependence 1: channel independence for FreTS model')
+    parser.add_argument('--decomp_method', type=str, default='moving_avg',
+                        help='method of series decompsition, only support moving_avg or dft_decomp')
+    parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
+    parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
+    parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
+    parser.add_argument('--down_sampling_method', type=str, default=None,
+                        help='down sampling method, only support avg, max, conv')
+    parser.add_argument('--seg_len', type=int, default=96,
+                        help='the length of segmen-wise iteration of SegRNN')
+
+    # optimization
+    parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
+    parser.add_argument('--itr', type=int, default=1, help='experiments times')
+    parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
+    parser.add_argument('--batch_size', type=int, default=32, help='batch size of train input data')
+    parser.add_argument('--patience', type=int, default=3, help='early stopping patience')
+    parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
+    parser.add_argument('--des', type=str, default='test', help='exp description')
+    parser.add_argument('--loss', type=str, default='MSE', help='loss function')
+    parser.add_argument('--lradj', type=str, default='type1', help='adjust learning rate')
+    parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
+
+    # GPU
+    parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
+    parser.add_argument('--gpu', type=int, default=0, help='gpu')
+    parser.add_argument('--gpu_type', type=str, default='cuda', help='gpu type')  # cuda or mps
+    parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
+    parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
+
+    # de-stationary projector params
+    parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
+                        help='hidden layer dimensions of projector (List)')
+    parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
+
+    # metrics (dtw)
+    parser.add_argument('--use_dtw', type=bool, default=False,
+                        help='the controller of using dtw metric (dtw is time consuming, not suggested unless necessary)')
+
+    # Augmentation
+    parser.add_argument('--augmentation_ratio', type=int, default=0, help="How many times to augment")
+    parser.add_argument('--seed', type=int, default=2, help="Randomization seed")
+    parser.add_argument('--jitter', default=False, action="store_true", help="Jitter preset augmentation")
+    parser.add_argument('--scaling', default=False, action="store_true", help="Scaling preset augmentation")
+    parser.add_argument('--permutation', default=False, action="store_true",
+                        help="Equal Length Permutation preset augmentation")
+    parser.add_argument('--randompermutation', default=False, action="store_true",
+                        help="Random Length Permutation preset augmentation")
+    parser.add_argument('--magwarp', default=False, action="store_true", help="Magnitude warp preset augmentation")
+    parser.add_argument('--timewarp', default=False, action="store_true", help="Time warp preset augmentation")
+    parser.add_argument('--windowslice', default=False, action="store_true", help="Window slice preset augmentation")
+    parser.add_argument('--windowwarp', default=False, action="store_true", help="Window warp preset augmentation")
+    parser.add_argument('--rotation', default=False, action="store_true", help="Rotation preset augmentation")
+    parser.add_argument('--spawner', default=False, action="store_true", help="SPAWNER preset augmentation")
+    parser.add_argument('--dtwwarp', default=False, action="store_true", help="DTW warp preset augmentation")
+    parser.add_argument('--shapedtwwarp', default=False, action="store_true", help="Shape DTW warp preset augmentation")
+    parser.add_argument('--wdba', default=False, action="store_true", help="Weighted DBA preset augmentation")
+    parser.add_argument('--discdtw', default=False, action="store_true",
+                        help="Discrimitive DTW warp preset augmentation")
+    parser.add_argument('--discsdtw', default=False, action="store_true",
+                        help="Discrimitive shapeDTW warp preset augmentation")
+    parser.add_argument('--extra_tag', type=str, default="", help="Anything extra")
+
+    # TimeXer
+    parser.add_argument('--patch_len', type=int, default=16, help='patch length')
+
+    args, unknown = parser.parse_known_args()
+    
+    # Parse unknown arguments dynamically
+    for i in range(0, len(unknown), 2):
+        if i + 1 < len(unknown) and unknown[i].startswith('--'):
+            param_name = unknown[i][2:]  # Remove '--' prefix
+            param_value = unknown[i + 1]
+            
+            # Smart type conversion
+            if param_value.isdigit() or (param_value.startswith('-') and param_value[1:].isdigit()):
+                param_value = int(param_value)
+            elif param_value.replace('.', '', 1).replace('-', '', 1).isdigit():
+                param_value = float(param_value)
+            elif param_value.lower() in ['true', 'yes', '1']:
+                param_value = True
+            elif param_value.lower() in ['false', 'no', '0']:
+                param_value = False
+            
+            setattr(args, param_name, param_value)
+            print(f"Dynamic parameter: --{param_name} = {param_value} ({type(param_value).__name__})")
+    
+    if unknown:
+        print(f"Parsed {len(unknown)//2} dynamic parameters")
+    if torch.cuda.is_available() and args.use_gpu:
+        args.device = torch.device('cuda:{}'.format(args.gpu))
+        print('Using GPU')
+    else:
+        if hasattr(torch.backends, "mps"):
+            args.device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
+        else:
+            args.device = torch.device("cpu")
+        print('Using cpu or mps')
+
+    if args.use_gpu and args.use_multi_gpu:
+        args.devices = args.devices.replace(' ', '')
+        device_ids = args.devices.split(',')
+        args.device_ids = [int(id_) for id_ in device_ids]
+        args.gpu = args.device_ids[0]
+
+    print('Args in experiment:')
+    print_args(args)
+
+    if args.task_name == 'long_term_forecast':
+        Exp = Exp_Long_Term_Forecast
+    elif args.task_name == 'short_term_forecast':
+        Exp = Exp_Short_Term_Forecast
+    elif args.task_name == 'imputation':
+        Exp = Exp_Imputation
+    elif args.task_name == 'anomaly_detection':
+        Exp = Exp_Anomaly_Detection
+    elif args.task_name == 'classification':
+        Exp = Exp_Classification
+    else:
+        Exp = Exp_Long_Term_Forecast
+
+    if args.is_training:
+        for ii in range(args.itr):
+            # setting record of experiments
+            exp = Exp(args)  # set experiments
+            setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
+                args.task_name,
+                args.model_id,
+                args.model,
+                args.data,
+                args.features,
+                args.seq_len,
+                args.label_len,
+                args.pred_len,
+                args.d_model,
+                args.n_heads,
+                args.e_layers,
+                args.d_layers,
+                args.d_ff,
+                args.expand,
+                args.d_conv,
+                args.factor,
+                args.embed,
+                args.distil,
+                args.des, ii)
+
+            print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
+            exp.train(setting)
+
+            print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
+            exp.test(setting)
+            if args.gpu_type == 'mps':
+                torch.backends.mps.empty_cache()
+            elif args.gpu_type == 'cuda':
+                torch.cuda.empty_cache()
+    else:
+        exp = Exp(args)  # set experiments
+        ii = 0
+        setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_expand{}_dc{}_fc{}_eb{}_dt{}_{}_{}'.format(
+            args.task_name,
+            args.model_id,
+            args.model,
+            args.data,
+            args.features,
+            args.seq_len,
+            args.label_len,
+            args.pred_len,
+            args.d_model,
+            args.n_heads,
+            args.e_layers,
+            args.d_layers,
+            args.d_ff,
+            args.expand,
+            args.d_conv,
+            args.factor,
+            args.embed,
+            args.distil,
+            args.des, ii)
+
+        print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
+        exp.test(setting, test=1)
+        if args.gpu_type == 'mps':
+            torch.backends.mps.empty_cache()
+        elif args.gpu_type == 'cuda':
+            torch.cuda.empty_cache()
diff --git a/scripts/anomaly_detection/MSL/Autoformer.sh b/scripts/anomaly_detection/MSL/Autoformer.sh
new file mode 100644
index 0000000..583ddf9
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Autoformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Autoformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/Crossformer.sh b/scripts/anomaly_detection/MSL/Crossformer.sh
new file mode 100644
index 0000000..39d6c8b
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Crossformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Crossformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/DLinear.sh b/scripts/anomaly_detection/MSL/DLinear.sh
new file mode 100644
index 0000000..d0e2b27
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/DLinear.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model DLinear \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 100 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/ETSformer.sh b/scripts/anomaly_detection/MSL/ETSformer.sh
new file mode 100644
index 0000000..6400458
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/ETSformer.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model ETSformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 100 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --d_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/FEDformer.sh b/scripts/anomaly_detection/MSL/FEDformer.sh
new file mode 100644
index 0000000..a3e69bb
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/FEDformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model FEDformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/FiLM.sh b/scripts/anomaly_detection/MSL/FiLM.sh
new file mode 100644
index 0000000..92ed6ed
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/FiLM.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model FiLM \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 100 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 32 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/Informer.sh b/scripts/anomaly_detection/MSL/Informer.sh
new file mode 100644
index 0000000..6e3f0b0
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Informer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Informer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/LightTS.sh b/scripts/anomaly_detection/MSL/LightTS.sh
new file mode 100644
index 0000000..2a2439e
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/LightTS.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model LightTS \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/MICN.sh b/scripts/anomaly_detection/MSL/MICN.sh
new file mode 100644
index 0000000..80ab7e9
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/MICN.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=1
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model MICN \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/Pyraformer.sh b/scripts/anomaly_detection/MSL/Pyraformer.sh
new file mode 100644
index 0000000..c729ac8
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Pyraformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Pyraformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/Reformer.sh b/scripts/anomaly_detection/MSL/Reformer.sh
new file mode 100644
index 0000000..09507c3
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Reformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Reformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/TimesNet.sh b/scripts/anomaly_detection/MSL/TimesNet.sh
new file mode 100644
index 0000000..d6e4e11
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/TimesNet.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=2
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model TimesNet \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 8 \
+  --d_ff 16 \
+  --e_layers 1 \
+  --enc_in 55 \
+  --c_out 55 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 1
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/Transformer.sh b/scripts/anomaly_detection/MSL/Transformer.sh
new file mode 100644
index 0000000..fd0d820
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/Transformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=1
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model Transformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/MSL/iTransformer.sh b/scripts/anomaly_detection/MSL/iTransformer.sh
new file mode 100644
index 0000000..1dafe12
--- /dev/null
+++ b/scripts/anomaly_detection/MSL/iTransformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/MSL \
+  --model_id MSL \
+  --model iTransformer \
+  --data MSL \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 55 \
+  --c_out 55 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/PSM/Autoformer.sh b/scripts/anomaly_detection/PSM/Autoformer.sh
new file mode 100644
index 0000000..887ac1a
--- /dev/null
+++ b/scripts/anomaly_detection/PSM/Autoformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/PSM \
+  --model_id PSM \
+  --model Autoformer \
+  --data PSM \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/PSM/DLinear.sh b/scripts/anomaly_detection/PSM/DLinear.sh
new file mode 100644
index 0000000..d531bf0
--- /dev/null
+++ b/scripts/anomaly_detection/PSM/DLinear.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/PSM \
+  --model_id PSM \
+  --model DLinear \
+  --data PSM \
+  --features M \
+  --seq_len 100 \
+  --pred_len 100 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/PSM/TimesNet.sh b/scripts/anomaly_detection/PSM/TimesNet.sh
new file mode 100644
index 0000000..e972e61
--- /dev/null
+++ b/scripts/anomaly_detection/PSM/TimesNet.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/PSM \
+  --model_id PSM \
+  --model TimesNet \
+  --data PSM \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 64 \
+  --d_ff 64 \
+  --e_layers 2 \
+  --enc_in 25 \
+  --c_out 25 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/PSM/Transformer.sh b/scripts/anomaly_detection/PSM/Transformer.sh
new file mode 100644
index 0000000..ae765c8
--- /dev/null
+++ b/scripts/anomaly_detection/PSM/Transformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/PSM \
+  --model_id PSM \
+  --model Transformer \
+  --data PSM \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMAP/Autoformer.sh b/scripts/anomaly_detection/SMAP/Autoformer.sh
new file mode 100644
index 0000000..90d3e96
--- /dev/null
+++ b/scripts/anomaly_detection/SMAP/Autoformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=7
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMAP \
+  --model_id SMAP \
+  --model Autoformer \
+  --data SMAP \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMAP/TimesNet.sh b/scripts/anomaly_detection/SMAP/TimesNet.sh
new file mode 100644
index 0000000..5e0bfa5
--- /dev/null
+++ b/scripts/anomaly_detection/SMAP/TimesNet.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=0
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMAP \
+  --model_id SMAP \
+  --model TimesNet \
+  --data SMAP \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMAP/Transformer.sh b/scripts/anomaly_detection/SMAP/Transformer.sh
new file mode 100644
index 0000000..029d25c
--- /dev/null
+++ b/scripts/anomaly_detection/SMAP/Transformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=7
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMAP \
+  --model_id SMAP \
+  --model Transformer \
+  --data SMAP \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 25 \
+  --c_out 25 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMD/Autoformer.sh b/scripts/anomaly_detection/SMD/Autoformer.sh
new file mode 100644
index 0000000..b17c459
--- /dev/null
+++ b/scripts/anomaly_detection/SMD/Autoformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=2
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMD \
+  --model_id SMD \
+  --model Autoformer \
+  --data SMD \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 38 \
+  --c_out 38 \
+  --anomaly_ratio 0.5 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMD/TimesNet.sh b/scripts/anomaly_detection/SMD/TimesNet.sh
new file mode 100644
index 0000000..0064211
--- /dev/null
+++ b/scripts/anomaly_detection/SMD/TimesNet.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=2
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMD \
+  --model_id SMD \
+  --model TimesNet \
+  --data SMD \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 64 \
+  --d_ff 64 \
+  --e_layers 2 \
+  --enc_in 38 \
+  --c_out 38 \
+  --top_k 5 \
+  --anomaly_ratio 0.5 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SMD/Transformer.sh b/scripts/anomaly_detection/SMD/Transformer.sh
new file mode 100644
index 0000000..dc695ae
--- /dev/null
+++ b/scripts/anomaly_detection/SMD/Transformer.sh
@@ -0,0 +1,20 @@
+export CUDA_VISIBLE_DEVICES=2
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SMD \
+  --model_id SMD \
+  --model Transformer \
+  --data SMD \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 38 \
+  --c_out 38 \
+  --anomaly_ratio 0.5 \
+  --batch_size 128 \
+  --train_epochs 10
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SWAT/Autoformer.sh b/scripts/anomaly_detection/SWAT/Autoformer.sh
new file mode 100644
index 0000000..a279913
--- /dev/null
+++ b/scripts/anomaly_detection/SWAT/Autoformer.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=1
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model Autoformer \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SWAT/TimesNet.sh b/scripts/anomaly_detection/SWAT/TimesNet.sh
new file mode 100644
index 0000000..98e00ac
--- /dev/null
+++ b/scripts/anomaly_detection/SWAT/TimesNet.sh
@@ -0,0 +1,161 @@
+export CUDA_VISIBLE_DEVICES=1
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 8 \
+  --d_ff 8 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 16 \
+  --d_ff 16 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 32 \
+  --d_ff 32 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 64 \
+  --d_ff 64 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 8 \
+  --d_ff 8 \
+  --e_layers 2 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 16 \
+  --d_ff 16 \
+  --e_layers 2 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 32 \
+  --d_ff 32 \
+  --e_layers 2 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model TimesNet \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 64 \
+  --d_ff 64 \
+  --e_layers 2 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/anomaly_detection/SWAT/Transformer.sh b/scripts/anomaly_detection/SWAT/Transformer.sh
new file mode 100644
index 0000000..5ae6209
--- /dev/null
+++ b/scripts/anomaly_detection/SWAT/Transformer.sh
@@ -0,0 +1,21 @@
+export CUDA_VISIBLE_DEVICES=1
+
+python -u run.py \
+  --task_name anomaly_detection \
+  --is_training 1 \
+  --root_path ./dataset/SWaT \
+  --model_id SWAT \
+  --model Transformer \
+  --data SWAT \
+  --features M \
+  --seq_len 100 \
+  --pred_len 0 \
+  --d_model 128 \
+  --d_ff 128 \
+  --e_layers 3 \
+  --enc_in 51 \
+  --c_out 51 \
+  --top_k 3 \
+  --anomaly_ratio 1 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/classification/Autoformer.sh b/scripts/classification/Autoformer.sh
new file mode 100644
index 0000000..84f4a7a
--- /dev/null
+++ b/scripts/classification/Autoformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/Crossformer.sh b/scripts/classification/Crossformer.sh
new file mode 100644
index 0000000..c99a414
--- /dev/null
+++ b/scripts/classification/Crossformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/DLinear.sh b/scripts/classification/DLinear.sh
new file mode 100644
index 0000000..662dd1d
--- /dev/null
+++ b/scripts/classification/DLinear.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/ETSformer.sh b/scripts/classification/ETSformer.sh
new file mode 100644
index 0000000..7b281b3
--- /dev/null
+++ b/scripts/classification/ETSformer.sh
@@ -0,0 +1,193 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=ETSformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --d_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/FEDformer.sh b/scripts/classification/FEDformer.sh
new file mode 100644
index 0000000..0ad0192
--- /dev/null
+++ b/scripts/classification/FEDformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=FEDformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/FiLM.sh b/scripts/classification/FiLM.sh
new file mode 100644
index 0000000..421f8b0
--- /dev/null
+++ b/scripts/classification/FiLM.sh
@@ -0,0 +1,185 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --seq_len 1751 \
+  --pred_len 1751 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/Informer.sh b/scripts/classification/Informer.sh
new file mode 100644
index 0000000..cf84a5a
--- /dev/null
+++ b/scripts/classification/Informer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Informer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/LightTS.sh b/scripts/classification/LightTS.sh
new file mode 100644
index 0000000..c47a011
--- /dev/null
+++ b/scripts/classification/LightTS.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=LightTS
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/MICN.sh b/scripts/classification/MICN.sh
new file mode 100644
index 0000000..dbb32ad
--- /dev/null
+++ b/scripts/classification/MICN.sh
@@ -0,0 +1,184 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=MICN
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --c_out 3 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/PatchTST.sh b/scripts/classification/PatchTST.sh
new file mode 100644
index 0000000..333ce8d
--- /dev/null
+++ b/scripts/classification/PatchTST.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/Pyraformer.sh b/scripts/classification/Pyraformer.sh
new file mode 100644
index 0000000..d1a775e
--- /dev/null
+++ b/scripts/classification/Pyraformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 4 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/Reformer.sh b/scripts/classification/Reformer.sh
new file mode 100644
index 0000000..1b0bd9e
--- /dev/null
+++ b/scripts/classification/Reformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/TimesNet.sh b/scripts/classification/TimesNet.sh
new file mode 100644
index 0000000..7d55dd6
--- /dev/null
+++ b/scripts/classification/TimesNet.sh
@@ -0,0 +1,182 @@
+export CUDA_VISIBLE_DEVICES=3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 2 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 2 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 256 \
+  --top_k 3 \
+  --num_kernels 4 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python run.py \
+--task_name classification \
+--is_training 1 \
+--root_path ./dataset/Handwriting/ \
+--model_id Handwriting \
+--model TimesNet \
+--data UEA \
+--e_layers 2 \
+--batch_size 16 \
+--d_model 32 \
+--d_ff 64 \
+--top_k 3 \
+--des 'Exp' \
+--itr 1 \
+--learning_rate 0.001 \
+--train_epochs 30 \
+--patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 1 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 2 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 60 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 6 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 2 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 2 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model TimesNet \
+  --data UEA \
+  --e_layers 2 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 64 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 30 \
+  --patience 10
diff --git a/scripts/classification/Transformer.sh b/scripts/classification/Transformer.sh
new file mode 100644
index 0000000..677bf0d
--- /dev/null
+++ b/scripts/classification/Transformer.sh
@@ -0,0 +1,183 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10
diff --git a/scripts/classification/iTransformer.sh b/scripts/classification/iTransformer.sh
new file mode 100644
index 0000000..2534e63
--- /dev/null
+++ b/scripts/classification/iTransformer.sh
@@ -0,0 +1,193 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 2048 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Handwriting/ \
+  --model_id Handwriting \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/Heartbeat/ \
+  --model_id Heartbeat \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/JapaneseVowels/ \
+  --model_id JapaneseVowels \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/PEMS-SF/ \
+  --model_id PEMS-SF \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP1/ \
+  --model_id SelfRegulationSCP1 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SelfRegulationSCP2/ \
+  --model_id SelfRegulationSCP2 \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/SpokenArabicDigits/ \
+  --model_id SpokenArabicDigits \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/UWaveGestureLibrary/ \
+  --model_id UWaveGestureLibrary \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --enc_in 3
diff --git a/scripts/classification/xPatch_SparseChannel.sh b/scripts/classification/xPatch_SparseChannel.sh
new file mode 100644
index 0000000..544a4a5
--- /dev/null
+++ b/scripts/classification/xPatch_SparseChannel.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+# xPatch_SparseChannel Classification Training Script for FaceDetection Dataset
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=xPatch_SparseChannel
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/FaceDetection/ \
+  --model_id FaceDetection \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 64 \
+  --seq_len 62 \
+  --enc_in 144 \
+  --d_model 128 \
+  --d_ff 256 \
+  --n_heads 8 \
+  --patch_len 16 \
+  --stride 8 \
+  --moving_avg 25 \
+  --dropout 0.1 \
+  --des 'xPatch_SparseChannel_FaceDetection' \
+  --itr 1 \
+  --learning_rate 0.0005 \
+  --train_epochs 100 \
+  --patience 5 \
+  --revin 1 \
+  --k_graph 8
\ No newline at end of file
diff --git a/scripts/exogenous_forecast/ECL/TimeXer.sh b/scripts/exogenous_forecast/ECL/TimeXer.sh
new file mode 100644
index 0000000..53f9a28
--- /dev/null
+++ b/scripts/exogenous_forecast/ECL/TimeXer.sh
@@ -0,0 +1,89 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=TimeXer
+des='Timexer-MS'
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des $des \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des $des \
+  --batch_size 32 \
+  --itr 1
+
+ python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des $des \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des $des \
+  --d_model 512 \
+  --itr 1
diff --git a/scripts/exogenous_forecast/EPF/TimeXer.sh b/scripts/exogenous_forecast/EPF/TimeXer.sh
new file mode 100644
index 0000000..e7fcf3a
--- /dev/null
+++ b/scripts/exogenous_forecast/EPF/TimeXer.sh
@@ -0,0 +1,114 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+des='Timexer-MS'
+patch_len=24
+
+
+python -u run.py \
+  --is_training 1 \
+  --task_name long_term_forecast \
+  --root_path ./dataset/EPF/ \
+  --data_path NP.csv \
+  --model_id NP_168_24 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 168 \
+  --pred_len 24 \
+  --e_layers 3 \
+  --enc_in 3 \
+  --dec_in 3 \
+  --c_out 1 \
+  --des $des \
+  --patch_len $patch_len \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --is_training 1 \
+  --task_name long_term_forecast \
+  --root_path ./dataset/EPF/ \
+  --data_path PJM.csv \
+  --model_id PJM_168_24 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 168 \
+  --pred_len 24 \
+  --e_layers 3 \
+  --enc_in 3 \
+  --dec_in 3 \
+  --c_out 1 \
+  --des $des \
+  --patch_len $patch_len \
+  --d_model 512 \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --is_training 1 \
+  --task_name long_term_forecast \
+  --root_path ./dataset/EPF/ \
+  --data_path BE.csv \
+  --model_id BE_168_24 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 168 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --enc_in 3 \
+  --dec_in 3 \
+  --c_out 1 \
+  --des $des \
+  --patch_len $patch_len \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --itr 1
+
+
+python -u run.py \
+  --is_training 1 \
+  --task_name long_term_forecast \
+  --root_path ./dataset/EPF/ \
+  --data_path FR.csv \
+  --model_id FR_168_24 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 168 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --enc_in 3 \
+  --dec_in 3 \
+  --c_out 1 \
+  --des $des \
+  --patch_len $patch_len \
+  --batch_size 16 \
+  --d_model 512 \
+  --itr 1
+
+python -u run.py \
+  --is_training 1 \
+  --task_name long_term_forecast \
+  --root_path ./dataset/EPF/ \
+  --data_path DE.csv \
+  --model_id DE_168_24 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 168 \
+  --pred_len 24 \
+  --e_layers 1 \
+  --enc_in 3 \
+  --dec_in 3 \
+  --c_out 1 \
+  --des $des \
+  --patch_len $patch_len \
+  --batch_size 4 \
+  --d_model 512 \
+  --itr 1
diff --git a/scripts/exogenous_forecast/ETTh1/TimeXer.sh b/scripts/exogenous_forecast/ETTh1/TimeXer.sh
new file mode 100644
index 0000000..545a2af
--- /dev/null
+++ b/scripts/exogenous_forecast/ETTh1/TimeXer.sh
@@ -0,0 +1,94 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=TimeXer
+des='Timexer-MS'
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --d_ff 512 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --d_ff 128 \
+  --batch_size 4 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 32 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
diff --git a/scripts/exogenous_forecast/ETTh2/TimeXer.sh b/scripts/exogenous_forecast/ETTh2/TimeXer.sh
new file mode 100644
index 0000000..95aa938
--- /dev/null
+++ b/scripts/exogenous_forecast/ETTh2/TimeXer.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=TimeXer
+des='Timexer-MS'
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --d_ff 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --d_ff 512 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --d_ff 256 \
+  --batch_size 16 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des $des \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/exogenous_forecast/ETTm1/TimeXer.sh b/scripts/exogenous_forecast/ETTm1/TimeXer.sh
new file mode 100644
index 0000000..b6ba806
--- /dev/null
+++ b/scripts/exogenous_forecast/ETTm1/TimeXer.sh
@@ -0,0 +1,92 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimeXer
+des='Timexer-MS'
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/exogenous_forecast/ETTm2/TimeXer.sh b/scripts/exogenous_forecast/ETTm2/TimeXer.sh
new file mode 100644
index 0000000..f014b71
--- /dev/null
+++ b/scripts/exogenous_forecast/ETTm2/TimeXer.sh
@@ -0,0 +1,92 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimeXer
+des='Timexer-MS'
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --batch_size 16 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --batch_size 4 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 128 \
+  --batch_size 128 \
+  --des $des \
+  --itr 1
diff --git a/scripts/exogenous_forecast/Traffic/TimeXer.sh b/scripts/exogenous_forecast/Traffic/TimeXer.sh
new file mode 100644
index 0000000..831025a
--- /dev/null
+++ b/scripts/exogenous_forecast/Traffic/TimeXer.sh
@@ -0,0 +1,96 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+des='Timexer-MS'
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --des $des \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --des $des \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --des $des \
+  --batch_size 4 \
+  --itr 1
diff --git a/scripts/exogenous_forecast/Weather/TimeXer.sh b/scripts/exogenous_forecast/Weather/TimeXer.sh
new file mode 100644
index 0000000..30a285f
--- /dev/null
+++ b/scripts/exogenous_forecast/Weather/TimeXer.sh
@@ -0,0 +1,89 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=TimeXer
+des='Timexer-MS'
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des $des \
+  --d_model 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des $des \
+  --d_model 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des $des \
+  --d_model 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features MS \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des $des \
+  --d_model 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/imputation/ECL_script/Autoformer.sh b/scripts/imputation/ECL_script/Autoformer.sh
new file mode 100644
index 0000000..fedd575
--- /dev/null
+++ b/scripts/imputation/ECL_script/Autoformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/DLinear.sh b/scripts/imputation/ECL_script/DLinear.sh
new file mode 100644
index 0000000..2a95e50
--- /dev/null
+++ b/scripts/imputation/ECL_script/DLinear.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/ETSformer.sh b/scripts/imputation/ECL_script/ETSformer.sh
new file mode 100644
index 0000000..36a4cdd
--- /dev/null
+++ b/scripts/imputation/ECL_script/ETSformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=ETSformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/FEDformer.sh b/scripts/imputation/ECL_script/FEDformer.sh
new file mode 100644
index 0000000..57b2f5b
--- /dev/null
+++ b/scripts/imputation/ECL_script/FEDformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=FEDformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/Informer.sh b/scripts/imputation/ECL_script/Informer.sh
new file mode 100644
index 0000000..cfd6264
--- /dev/null
+++ b/scripts/imputation/ECL_script/Informer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Informer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/LightTS.sh b/scripts/imputation/ECL_script/LightTS.sh
new file mode 100644
index 0000000..a7bffa0
--- /dev/null
+++ b/scripts/imputation/ECL_script/LightTS.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=LightTS
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/Pyraformer.sh b/scripts/imputation/ECL_script/Pyraformer.sh
new file mode 100644
index 0000000..93a36e3
--- /dev/null
+++ b/scripts/imputation/ECL_script/Pyraformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/Reformer.sh b/scripts/imputation/ECL_script/Reformer.sh
new file mode 100644
index 0000000..7f50b92
--- /dev/null
+++ b/scripts/imputation/ECL_script/Reformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/TimesNet.sh b/scripts/imputation/ECL_script/TimesNet.sh
new file mode 100644
index 0000000..f1930ab
--- /dev/null
+++ b/scripts/imputation/ECL_script/TimesNet.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/Transformer.sh b/scripts/imputation/ECL_script/Transformer.sh
new file mode 100644
index 0000000..78559ec
--- /dev/null
+++ b/scripts/imputation/ECL_script/Transformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ECL_script/iTransformer.sh b/scripts/imputation/ECL_script/iTransformer.sh
new file mode 100644
index 0000000..93eb2d2
--- /dev/null
+++ b/scripts/imputation/ECL_script/iTransformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Autoformer_ETTh1.sh b/scripts/imputation/ETT_script/Autoformer_ETTh1.sh
new file mode 100644
index 0000000..d71dc2d
--- /dev/null
+++ b/scripts/imputation/ETT_script/Autoformer_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Autoformer_ETTh2.sh b/scripts/imputation/ETT_script/Autoformer_ETTh2.sh
new file mode 100644
index 0000000..ebdee51
--- /dev/null
+++ b/scripts/imputation/ETT_script/Autoformer_ETTh2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Autoformer_ETTm1.sh b/scripts/imputation/ETT_script/Autoformer_ETTm1.sh
new file mode 100644
index 0000000..6210df7
--- /dev/null
+++ b/scripts/imputation/ETT_script/Autoformer_ETTm1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Autoformer_ETTm2.sh b/scripts/imputation/ETT_script/Autoformer_ETTm2.sh
new file mode 100644
index 0000000..a16e618
--- /dev/null
+++ b/scripts/imputation/ETT_script/Autoformer_ETTm2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Crossformer_ETTh1.sh b/scripts/imputation/ETT_script/Crossformer_ETTh1.sh
new file mode 100644
index 0000000..a271a1f
--- /dev/null
+++ b/scripts/imputation/ETT_script/Crossformer_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/DLinear_ETTh1.sh b/scripts/imputation/ETT_script/DLinear_ETTh1.sh
new file mode 100644
index 0000000..5a9da75
--- /dev/null
+++ b/scripts/imputation/ETT_script/DLinear_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/FiLM_ETTh1.sh b/scripts/imputation/ETT_script/FiLM_ETTh1.sh
new file mode 100644
index 0000000..d811908
--- /dev/null
+++ b/scripts/imputation/ETT_script/FiLM_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/MICN_ETTh1.sh b/scripts/imputation/ETT_script/MICN_ETTh1.sh
new file mode 100644
index 0000000..5c742b5
--- /dev/null
+++ b/scripts/imputation/ETT_script/MICN_ETTh1.sh
@@ -0,0 +1,115 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MICN
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --conv_kernel 12 16 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --conv_kernel 12 16 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --conv_kernel 12 16 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --conv_kernel 12 16 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Nonstationary_Transformer_ETTh1.sh b/scripts/imputation/ETT_script/Nonstationary_Transformer_ETTh1.sh
new file mode 100644
index 0000000..ec6bb59
--- /dev/null
+++ b/scripts/imputation/ETT_script/Nonstationary_Transformer_ETTh1.sh
@@ -0,0 +1,119 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
diff --git a/scripts/imputation/ETT_script/TiDE_ETTh1.sh b/scripts/imputation/ETT_script/TiDE_ETTh1.sh
new file mode 100644
index 0000000..bb92069
--- /dev/null
+++ b/scripts/imputation/ETT_script/TiDE_ETTh1.sh
@@ -0,0 +1,110 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TiDE
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 256 \
+  --d_ff 256 \
+  --dropout 0.3 \
+  --learning_rate 0.1 \
+  --patience 5 \
+  --train_epochs 10 \
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/TimesNet_ETTh1.sh b/scripts/imputation/ETT_script/TimesNet_ETTh1.sh
new file mode 100644
index 0000000..6b5af0c
--- /dev/null
+++ b/scripts/imputation/ETT_script/TimesNet_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/TimesNet_ETTh2.sh b/scripts/imputation/ETT_script/TimesNet_ETTh2.sh
new file mode 100644
index 0000000..2780552
--- /dev/null
+++ b/scripts/imputation/ETT_script/TimesNet_ETTh2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/TimesNet_ETTm1.sh b/scripts/imputation/ETT_script/TimesNet_ETTm1.sh
new file mode 100644
index 0000000..5250313
--- /dev/null
+++ b/scripts/imputation/ETT_script/TimesNet_ETTm1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/TimesNet_ETTm2.sh b/scripts/imputation/ETT_script/TimesNet_ETTm2.sh
new file mode 100644
index 0000000..e1e8e5d
--- /dev/null
+++ b/scripts/imputation/ETT_script/TimesNet_ETTm2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Transformer_ETTh1.sh b/scripts/imputation/ETT_script/Transformer_ETTh1.sh
new file mode 100644
index 0000000..eb64ee3
--- /dev/null
+++ b/scripts/imputation/ETT_script/Transformer_ETTh1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Transformer_ETTh2.sh b/scripts/imputation/ETT_script/Transformer_ETTh2.sh
new file mode 100644
index 0000000..f5b7e43
--- /dev/null
+++ b/scripts/imputation/ETT_script/Transformer_ETTh2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Transformer_ETTm1.sh b/scripts/imputation/ETT_script/Transformer_ETTm1.sh
new file mode 100644
index 0000000..dd8523d
--- /dev/null
+++ b/scripts/imputation/ETT_script/Transformer_ETTm1.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/Transformer_ETTm2.sh b/scripts/imputation/ETT_script/Transformer_ETTm2.sh
new file mode 100644
index 0000000..ec27338
--- /dev/null
+++ b/scripts/imputation/ETT_script/Transformer_ETTm2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/ETT_script/iTransformer_ETTh2.sh b/scripts/imputation/ETT_script/iTransformer_ETTh2.sh
new file mode 100644
index 0000000..cc8a92b
--- /dev/null
+++ b/scripts/imputation/ETT_script/iTransformer_ETTh2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/Weather_script/Autoformer.sh b/scripts/imputation/Weather_script/Autoformer.sh
new file mode 100644
index 0000000..4c96108
--- /dev/null
+++ b/scripts/imputation/Weather_script/Autoformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/Weather_script/TimesNet.sh b/scripts/imputation/Weather_script/TimesNet.sh
new file mode 100644
index 0000000..6bb14e0
--- /dev/null
+++ b/scripts/imputation/Weather_script/TimesNet.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 3 \
+  --learning_rate 0.001
diff --git a/scripts/imputation/Weather_script/Transformer.sh b/scripts/imputation/Weather_script/Transformer.sh
new file mode 100644
index 0000000..8020234
--- /dev/null
+++ b/scripts/imputation/Weather_script/Transformer.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.125 \
+  --mask_rate 0.125 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.25 \
+  --mask_rate 0.25 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.375 \
+  --mask_rate 0.375 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
+
+python -u run.py \
+  --task_name imputation \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_mask_0.5 \
+  --mask_rate 0.5 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 0 \
+  --pred_len 0 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 \
+  --learning_rate 0.001
diff --git a/scripts/long_term_forecast/AugmentSample/Classification/PatchTST.sh b/scripts/long_term_forecast/AugmentSample/Classification/PatchTST.sh
new file mode 100644
index 0000000..016ca15
--- /dev/null
+++ b/scripts/long_term_forecast/AugmentSample/Classification/PatchTST.sh
@@ -0,0 +1,28 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PatchTST
+
+for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw
+do
+echo using augmentation: ${aug}
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --augmentation_ratio 1 \
+  --${aug}
+ done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/AugmentSample/Forecasting/PatchTST.sh b/scripts/long_term_forecast/AugmentSample/Forecasting/PatchTST.sh
new file mode 100644
index 0000000..af41d5a
--- /dev/null
+++ b/scripts/long_term_forecast/AugmentSample/Forecasting/PatchTST.sh
@@ -0,0 +1,33 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PatchTST
+for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp discdtw discsdtw
+do
+for pred_len in 96 192 336 720
+do
+echo using augmentation: ${aug}
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_${pred_len} \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len ${pred_len} \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --augmentation_ratio 1 \
+  --${aug}
+done
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/AugmentSample/ReadMe.md b/scripts/long_term_forecast/AugmentSample/ReadMe.md
new file mode 100644
index 0000000..6a2f89e
--- /dev/null
+++ b/scripts/long_term_forecast/AugmentSample/ReadMe.md
@@ -0,0 +1,97 @@
+# Augmentation Feature Roadbook
+
+Hi there! For those who are interested in testing 
+augmentation techniques in `Time-Series-Library`.
+
+For now, we have embedded several augmentation methods
+in this repo. We are still collecting publicly available 
+augmentation algorithms, and we appreciate your valuable
+advice!
+
+```
+The Implemented Augmentation Methods
+1. jitter 
+2. scaling 
+3. permutation 
+4. magwarp 
+5. timewarp 
+6. windowslice 
+7. windowwarp 
+8. rotation 
+9. spawner 
+10. dtwwarp 
+11. shapedtwwarp 
+12. wdba (Specially Designed for Classification tasks)
+13. discdtw
+```
+
+## Usage
+
+In this folder, we present two sample of shell scripts 
+doing augmentation in `Forecasting` and `Classification`
+tasks.
+
+Take `Forecasting` task for example, we test multiple
+augmentation algorithms on `EthanolConcentration` dataset
+(a subset of the popular classification benchmark `UEA`) 
+using `PatchTST` model.
+
+```shell
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PatchTST
+
+for aug in jitter scaling permutation magwarp timewarp windowslice windowwarp rotation spawner dtwwarp shapedtwwarp wdba discdtw discsdtw
+do
+echo using augmentation: ${aug}
+
+python -u run.py \
+  --task_name classification \
+  --is_training 1 \
+  --root_path ./dataset/EthanolConcentration/ \
+  --model_id EthanolConcentration \
+  --model $model_name \
+  --data UEA \
+  --e_layers 3 \
+  --batch_size 16 \
+  --d_model 128 \
+  --d_ff 256 \
+  --top_k 3 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --train_epochs 100 \
+  --patience 10 \
+  --augmentation_ratio 1 \
+  --${aug}
+ done
+```
+
+Here, parameter `augmentation_ratio` represents how many
+times do we want to perform our augmentation method.
+Parameter `${aug}` represents a string of augmentation
+type label. 
+
+The example here only perform augmentation once, so we
+can set `augmentation_ratio` to `1`, followed by one
+augmentation type label. Trivially, you can set 
+`augmentation_ratio` to an integer `num` followed by 
+`num` augmentation type labels.
+
+The augmentation code obeys the same prototype of 
+`Time-Series-Library`. If you want to adjust other 
+training parameters, feel free to add arguments to the
+shell scripts and play around. The full list of parameters
+can be seen in `run.py`.
+
+## Contact Us!
+
+This piece of code is written and maintained by 
+[Yunzhong Qiu](https://github.com/DigitalLifeYZQiu). 
+We thank [Haixu Wu](https://github.com/wuhaixu2016) and
+[Jiaxiang Dong](https://github.com/dongjiaxiang) for 
+insightful discussion and solid support.
+
+If you have difficulties or find bugs in our code, please
+contact us:
+- Email: qiuyz24@mails.tsinghua.edu.cn
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Autoformer.sh b/scripts/long_term_forecast/ECL_script/Autoformer.sh
new file mode 100644
index 0000000..d34e6d4
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Autoformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Crossformer.sh b/scripts/long_term_forecast/ECL_script/Crossformer.sh
new file mode 100644
index 0000000..e88befd
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Crossformer.sh
@@ -0,0 +1,103 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/DLinear.sh b/scripts/long_term_forecast/ECL_script/DLinear.sh
new file mode 100644
index 0000000..4fb44f7
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/DLinear.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/ETSformer.sh b/scripts/long_term_forecast/ECL_script/ETSformer.sh
new file mode 100644
index 0000000..087f72c
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/ETSformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=ETSformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/FEDformer.sh b/scripts/long_term_forecast/ECL_script/FEDformer.sh
new file mode 100644
index 0000000..a748d46
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/FEDformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=FEDformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/ECL_script/FiLM.sh b/scripts/long_term_forecast/ECL_script/FiLM.sh
new file mode 100644
index 0000000..dac33da
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/FiLM.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Informer.sh b/scripts/long_term_forecast/ECL_script/Informer.sh
new file mode 100644
index 0000000..e2a36e6
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Informer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Koopa.sh b/scripts/long_term_forecast/ECL_script/Koopa.sh
new file mode 100644
index 0000000..a8da551
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Koopa.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_192_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_288_144 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_384_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/LightTS.sh b/scripts/long_term_forecast/ECL_script/LightTS.sh
new file mode 100644
index 0000000..8704692
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/LightTS.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=LightTS
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/MICN.sh b/scripts/long_term_forecast/ECL_script/MICN.sh
new file mode 100644
index 0000000..6bbf035
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/MICN.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Mamba.sh b/scripts/long_term_forecast/ECL_script/Mamba.sh
new file mode 100644
index 0000000..931a1b7
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Mamba.sh
@@ -0,0 +1,30 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+# for pred_len in 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 321 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 321 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
diff --git a/scripts/long_term_forecast/ECL_script/MultiPatchFormer.sh b/scripts/long_term_forecast/ECL_script/MultiPatchFormer.sh
new file mode 100644
index 0000000..6d94d4b
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/MultiPatchFormer.sh
@@ -0,0 +1,98 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MultiPatchFormer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Nonstationary_Transformer.sh b/scripts/long_term_forecast/ECL_script/Nonstationary_Transformer.sh
new file mode 100644
index 0000000..28799ef
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Nonstationary_Transformer.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 2048
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 2048
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 2048
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 2048
diff --git a/scripts/long_term_forecast/ECL_script/PatchTST.sh b/scripts/long_term_forecast/ECL_script/PatchTST.sh
new file mode 100644
index 0000000..b9b9863
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/PatchTST.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Pyraformer.sh b/scripts/long_term_forecast/ECL_script/Pyraformer.sh
new file mode 100644
index 0000000..44bf3ba
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Pyraformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Reformer.sh b/scripts/long_term_forecast/ECL_script/Reformer.sh
new file mode 100644
index 0000000..e37a55f
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Reformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/SegRNN.sh b/scripts/long_term_forecast/ECL_script/SegRNN.sh
new file mode 100644
index 0000000..8284e0c
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/SegRNN.sh
@@ -0,0 +1,27 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 24 \
+  --enc_in 321 \
+  --d_model 512 \
+  --dropout 0 \
+  --learning_rate 0.001 \
+  --des 'Exp' \
+  --itr 1
+done
+
diff --git a/scripts/long_term_forecast/ECL_script/TSMixer.sh b/scripts/long_term_forecast/ECL_script/TSMixer.sh
new file mode 100755
index 0000000..07de434
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/TSMixer.sh
@@ -0,0 +1,98 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/TimeMixer.sh b/scripts/long_term_forecast/ECL_script/TimeMixer.sh
new file mode 100755
index 0000000..1e21206
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/TimeMixer.sh
@@ -0,0 +1,134 @@
+#export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=3
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=16
+d_ff=32
+batch_size=32
+train_epochs=20
+patience=10
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$seq_len'_'96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$seq_len'_'192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$seq_len'_'336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_$seq_len'_'720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/TimeXer.sh b/scripts/long_term_forecast/ECL_script/TimeXer.sh
new file mode 100644
index 0000000..8c63f1a
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/TimeXer.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_ff 512 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 4 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
diff --git a/scripts/long_term_forecast/ECL_script/TimesNet.sh b/scripts/long_term_forecast/ECL_script/TimesNet.sh
new file mode 100644
index 0000000..e265dfb
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/TimesNet.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --d_model 256 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/Transformer.sh b/scripts/long_term_forecast/ECL_script/Transformer.sh
new file mode 100644
index 0000000..c2253f5
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/Transformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features S \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ECL_script/WPMixer.sh b/scripts/long_term_forecast/ECL_script/WPMixer.sh
new file mode 100644
index 0000000..ce0284e
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/WPMixer.sh
@@ -0,0 +1,49 @@
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=electricity
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.00328086 0.000493286 0.002505375 0.001977516)
+batches=(32 32 32 32)
+epochs=(100 100 100 100)
+dropouts=(0.1 0.1 0.2 0.1)
+patch_lens=(16 16 16 16)
+lradjs=(type3 type3 type3 type3)
+d_models=(32 32 32 32)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(sym3 coif5 sym4 db2)
+levels=(2 3 1 2)
+tfactors=(3 7 5 7)
+dfactors=(5 5 7 8)
+strides=(8 8 8 8)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/electricity/ \
+		--data_path electricity.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/ECL_script/iTransformer.sh b/scripts/long_term_forecast/ECL_script/iTransformer.sh
new file mode 100644
index 0000000..579ffbe
--- /dev/null
+++ b/scripts/long_term_forecast/ECL_script/iTransformer.sh
@@ -0,0 +1,105 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.0005 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.0005 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.0005 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/electricity/ \
+  --data_path electricity.csv \
+  --model_id ECL_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 321 \
+  --dec_in 321 \
+  --c_out 321 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.0005 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Autoformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Autoformer_ETTh1.sh
new file mode 100644
index 0000000..50c7f2f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Autoformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Autoformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Autoformer_ETTh2.sh
new file mode 100644
index 0000000..8a24405
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Autoformer_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Autoformer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Autoformer_ETTm1.sh
new file mode 100644
index 0000000..323e0a3
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Autoformer_ETTm1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Autoformer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Autoformer_ETTm2.sh
new file mode 100644
index 0000000..88170f9
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Autoformer_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Crossformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Crossformer_ETTh1.sh
new file mode 100644
index 0000000..dc213fe
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Crossformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Crossformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Crossformer_ETTh2.sh
new file mode 100644
index 0000000..e327500
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Crossformer_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Crossformer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Crossformer_ETTm1.sh
new file mode 100644
index 0000000..526494a
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Crossformer_ETTm1.sh
@@ -0,0 +1,83 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Crossformer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Crossformer_ETTm2.sh
new file mode 100644
index 0000000..83e4721
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Crossformer_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/DLinear_ETTh1.sh b/scripts/long_term_forecast/ETT_script/DLinear_ETTh1.sh
new file mode 100644
index 0000000..466776c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/DLinear_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/ETSformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/ETSformer_ETTh1.sh
new file mode 100644
index 0000000..7c59d59
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/ETSformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=ETSformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/FEDformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/FEDformer_ETTh1.sh
new file mode 100644
index 0000000..a33faf5
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/FEDformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=FEDformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/FiLM_ETTh1.sh b/scripts/long_term_forecast/ETT_script/FiLM_ETTh1.sh
new file mode 100644
index 0000000..224af9d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/FiLM_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/FiLM_ETTh2.sh b/scripts/long_term_forecast/ETT_script/FiLM_ETTh2.sh
new file mode 100644
index 0000000..23591a8
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/FiLM_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 168 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 168 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 168 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 168 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/FiLM_ETTm1.sh b/scripts/long_term_forecast/ETT_script/FiLM_ETTm1.sh
new file mode 100644
index 0000000..c131881
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/FiLM_ETTm1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/FiLM_ETTm2.sh b/scripts/long_term_forecast/ETT_script/FiLM_ETTm2.sh
new file mode 100644
index 0000000..164749d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/FiLM_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Informer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Informer_ETTh1.sh
new file mode 100644
index 0000000..0412bef
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Informer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Informer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Koopa_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Koopa_ETTh1.sh
new file mode 100644
index 0000000..2c97a52
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Koopa_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_48 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_192_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_288_144 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_384_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Koopa_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Koopa_ETTh2.sh
new file mode 100644
index 0000000..f4d1f32
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Koopa_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_48 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_192_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_288_144 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_384_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Koopa_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Koopa_ETTm1.sh
new file mode 100644
index 0000000..d1dfbd4
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Koopa_ETTm1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_48 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_192_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_288_144 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_384_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Koopa_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Koopa_ETTm2.sh
new file mode 100644
index 0000000..8f6439d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Koopa_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_48 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_192_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_288_144 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_384_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/LightTS_ETTh1.sh b/scripts/long_term_forecast/ETT_script/LightTS_ETTh1.sh
new file mode 100644
index 0000000..f14f27c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/LightTS_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=LightTS
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/MICN_ETTh1.sh b/scripts/long_term_forecast/ETT_script/MICN_ETTh1.sh
new file mode 100644
index 0000000..77764b9
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MICN_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/ETT_script/MICN_ETTh2.sh b/scripts/long_term_forecast/ETT_script/MICN_ETTh2.sh
new file mode 100644
index 0000000..a137e66
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MICN_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/ETT_script/MICN_ETTm1.sh b/scripts/long_term_forecast/ETT_script/MICN_ETTm1.sh
new file mode 100644
index 0000000..ac57040
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MICN_ETTm1.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/MICN_ETTm2.sh b/scripts/long_term_forecast/ETT_script/MICN_ETTm2.sh
new file mode 100644
index 0000000..d70b8df
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MICN_ETTm2.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --top_k 5 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/MambaSimple_ETTh1.sh b/scripts/long_term_forecast/ETT_script/MambaSimple_ETTh1.sh
new file mode 100644
index 0000000..5e6606a
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MambaSimple_ETTh1.sh
@@ -0,0 +1,29 @@
+model_name=MambaSimple
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 7 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Mamba_ETT_all.sh b/scripts/long_term_forecast/ETT_script/Mamba_ETT_all.sh
new file mode 100644
index 0000000..18558d6
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Mamba_ETT_all.sh
@@ -0,0 +1,4 @@
+./scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh | tee mamba_ett.txt
+./scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh | tee mamba_ett.txt -a
+./scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh | tee mamba_ett.txt -a
+./scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh | tee mamba_ett.txt -a
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh
new file mode 100644
index 0000000..9f29ac3
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Mamba_ETTh1.sh
@@ -0,0 +1,28 @@
+model_name=Mamba
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 7 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh
new file mode 100644
index 0000000..4c61ce7
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Mamba_ETTh2.sh
@@ -0,0 +1,28 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --enc_in 7 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 7 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh
new file mode 100644
index 0000000..eefff6f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Mamba_ETTm1.sh
@@ -0,0 +1,28 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --enc_in 7 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 7 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh
new file mode 100644
index 0000000..2a4458c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Mamba_ETTm2.sh
@@ -0,0 +1,28 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --enc_in 7 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 7 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTh1.sh
new file mode 100644
index 0000000..44edfdd
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTh1.sh
@@ -0,0 +1,90 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MultiPatchFormer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTm1.sh
new file mode 100644
index 0000000..bc8b3e5
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/MultiPatchFormer_ETTm1.sh
@@ -0,0 +1,98 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MultiPatchFormer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh1.sh
new file mode 100644
index 0000000..a0e9f7a
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh1.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 128
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 128
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 128
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+  --d_model 128
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh2.sh
new file mode 100644
index 0000000..022a0a0
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTh2.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 256 256 \
+  --p_hidden_layers 4 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2 \
diff --git a/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm1.sh
new file mode 100644
index 0000000..9550195
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm1.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm2.sh
new file mode 100644
index 0000000..31b3adc
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Nonstationary_Transformer_ETTm2.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 256 256 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 256 256 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 16 16 \
+  --p_hidden_layers 4
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/PAttn_ETTh1.sh b/scripts/long_term_forecast/ETT_script/PAttn_ETTh1.sh
new file mode 100644
index 0000000..c213b44
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/PAttn_ETTh1.sh
@@ -0,0 +1,83 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PAttn
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 16 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh b/scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh
new file mode 100644
index 0000000..b7bda80
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/PatchTST_ETTh1.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 16 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/PatchTST_ETTh2.sh b/scripts/long_term_forecast/ETT_script/PatchTST_ETTh2.sh
new file mode 100644
index 0000000..ba25fc1
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/PatchTST_ETTh2.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/PatchTST_ETTm1.sh b/scripts/long_term_forecast/ETT_script/PatchTST_ETTm1.sh
new file mode 100644
index 0000000..a7cfe0d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/PatchTST_ETTm1.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/PatchTST_ETTm2.sh b/scripts/long_term_forecast/ETT_script/PatchTST_ETTm2.sh
new file mode 100644
index 0000000..d48553d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/PatchTST_ETTm2.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 16 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --batch_size 128 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh1.sh
new file mode 100644
index 0000000..5976c8c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh2.sh
new file mode 100644
index 0000000..0a08129
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm1.sh
new file mode 100644
index 0000000..b26c54f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm2.sh
new file mode 100644
index 0000000..2e31c20
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Pyraformer_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Reformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Reformer_ETTh1.sh
new file mode 100644
index 0000000..cbe5833
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Reformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/SegRNN_ETTh1.sh b/scripts/long_term_forecast/ETT_script/SegRNN_ETTh1.sh
new file mode 100644
index 0000000..3d7527a
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/SegRNN_ETTh1.sh
@@ -0,0 +1,26 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 24 \
+  --enc_in 7 \
+  --d_model 512 \
+  --dropout 0.5 \
+  --learning_rate 0.0001 \
+  --des 'Exp' \
+  --itr 1
+done
diff --git a/scripts/long_term_forecast/ETT_script/SegRNN_ETTh2.sh b/scripts/long_term_forecast/ETT_script/SegRNN_ETTh2.sh
new file mode 100644
index 0000000..e761708
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/SegRNN_ETTh2.sh
@@ -0,0 +1,26 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 24 \
+  --enc_in 7 \
+  --d_model 512 \
+  --dropout 0.5 \
+  --learning_rate 0.0001 \
+  --des 'Exp' \
+  --itr 1
+done
diff --git a/scripts/long_term_forecast/ETT_script/SegRNN_ETTm1.sh b/scripts/long_term_forecast/ETT_script/SegRNN_ETTm1.sh
new file mode 100644
index 0000000..be406c4
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/SegRNN_ETTm1.sh
@@ -0,0 +1,26 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 48 \
+  --enc_in 7 \
+  --d_model 512 \
+  --dropout 0.5 \
+  --learning_rate 0.0001 \
+  --des 'Exp' \
+  --itr 1
+done
diff --git a/scripts/long_term_forecast/ETT_script/SegRNN_ETTm2.sh b/scripts/long_term_forecast/ETT_script/SegRNN_ETTm2.sh
new file mode 100644
index 0000000..756decb
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/SegRNN_ETTm2.sh
@@ -0,0 +1,26 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 48 \
+  --enc_in 7 \
+  --d_model 512 \
+  --dropout 0.5 \
+  --learning_rate 0.0001 \
+  --des 'Exp' \
+  --itr 1
+done
diff --git a/scripts/long_term_forecast/ETT_script/TSMixer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/TSMixer_ETTh1.sh
new file mode 100755
index 0000000..57e2f49
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TSMixer_ETTh1.sh
@@ -0,0 +1,86 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TSMixer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/TSMixer_ETTh2.sh
new file mode 100755
index 0000000..6c33b0c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TSMixer_ETTh2.sh
@@ -0,0 +1,86 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TSMixer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/TSMixer_ETTm1.sh
new file mode 100755
index 0000000..7fbd15d
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TSMixer_ETTm1.sh
@@ -0,0 +1,86 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TSMixer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/TSMixer_ETTm2.sh
new file mode 100755
index 0000000..709b589
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TSMixer_ETTm2.sh
@@ -0,0 +1,86 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TiDE_ETTh1.sh b/scripts/long_term_forecast/ETT_script/TiDE_ETTh1.sh
new file mode 100644
index 0000000..31f56ce
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TiDE_ETTh1.sh
@@ -0,0 +1,112 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TiDE
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 8 \
+  --d_model 256 \
+  --d_ff 256 \
+  --dropout 0.3 \
+  --batch_size 512 \
+  --learning_rate 0.1 \
+  --patience 5 \
+  --train_epochs 10 \
+
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 8 \
+  --d_model 256 \
+  --d_ff 256 \
+  --dropout 0.3 \
+  --batch_size 512 \
+  --learning_rate 0.1 \
+  --patience 5 \
+  --train_epochs 10 \
+
+ 
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 8 \
+  --d_model 256 \
+  --d_ff 256 \
+  --dropout 0.3 \
+  --batch_size 512 \
+  --learning_rate 0.1 \
+  --patience 5 \
+  --train_epochs 10 \
+
+
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 2 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 8 \
+  --d_model 256 \
+  --d_ff 256 \
+  --dropout 0.3 \
+  --batch_size 512 \
+  --learning_rate 0.1 \
+  --patience 5 \
+  --train_epochs 10 \
+
diff --git a/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh1.sh
new file mode 100755
index 0000000..b0f447f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh1.sh
@@ -0,0 +1,125 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=2
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=16
+d_ff=32
+train_epochs=10
+patience=10
+batch_size=16
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path  ./dataset/ETT-small/\
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$seq_len'_'96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --batch_size 128 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$seq_len'_'192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --batch_size 128 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$seq_len'_'336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --batch_size 128 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_$seq_len'_'720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --batch_size 128 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
diff --git a/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh2.sh
new file mode 100755
index 0000000..54492a4
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTh2.sh
@@ -0,0 +1,111 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=2
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=16
+d_ff=32
+batch_size=16
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path  ./dataset/ETT-small/\
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$seq_len'_'96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$seq_len'_'192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$seq_len'_'336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_$seq_len'_'720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
diff --git a/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm1.sh
new file mode 100755
index 0000000..d26fa42
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm1.sh
@@ -0,0 +1,115 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=2
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=16
+d_ff=32
+batch_size=16
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path  ./dataset/ETT-small/\
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$seq_len'_'96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$seq_len'_'192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$seq_len'_'336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_$seq_len'_'720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
diff --git a/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm2.sh
new file mode 100755
index 0000000..c53df60
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeMixer_ETTm2.sh
@@ -0,0 +1,115 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=2
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=32
+d_ff=32
+batch_size=16
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path  ./dataset/ETT-small/\
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$seq_len'_'96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$seq_len'_'192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$seq_len'_'336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_$seq_len'_'720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --enc_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
diff --git a/scripts/long_term_forecast/ETT_script/TimeXer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/TimeXer_ETTh1.sh
new file mode 100644
index 0000000..928b679
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeXer_ETTh1.sh
@@ -0,0 +1,94 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --batch_size 4 \
+  --des 'exp' \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 128 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 1024 \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 256 \
+  --d_ff 1024 \
+  --batch_size 16 \
+  --itr 1
diff --git a/scripts/long_term_forecast/ETT_script/TimeXer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/TimeXer_ETTh2.sh
new file mode 100644
index 0000000..cc29628
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeXer_ETTh2.sh
@@ -0,0 +1,94 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 256 \
+  --d_ff 1024 \
+  --batch_size 16 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 256 \
+  --d_ff 1024 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 1024 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 256 \
+  --d_ff 1024 \
+  --batch_size 16 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TimeXer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/TimeXer_ETTm1.sh
new file mode 100644
index 0000000..95f20d1
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeXer_ETTm1.sh
@@ -0,0 +1,94 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --batch_size 4 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 256 \
+  --batch_size 4 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 1024 \
+  --batch_size 4 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 512 \
+  --batch_size 4 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TimeXer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/TimeXer_ETTm2.sh
new file mode 100644
index 0000000..9261199
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimeXer_ETTm2.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 256 \
+  --d_ff 1024 \
+  --batch_size 16 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --d_ff 1024 \
+  --des 'Exp' \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh b/scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
new file mode 100644
index 0000000..6a03e5f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh
@@ -0,0 +1,102 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5 
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 16 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --top_k 5
diff --git a/scripts/long_term_forecast/ETT_script/TimesNet_ETTh2.sh b/scripts/long_term_forecast/ETT_script/TimesNet_ETTh2.sh
new file mode 100644
index 0000000..1741642
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimesNet_ETTh2.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/ETT_script/TimesNet_ETTm1.sh b/scripts/long_term_forecast/ETT_script/TimesNet_ETTm1.sh
new file mode 100644
index 0000000..7aea983
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimesNet_ETTm1.sh
@@ -0,0 +1,100 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/TimesNet_ETTm2.sh b/scripts/long_term_forecast/ETT_script/TimesNet_ETTm2.sh
new file mode 100644
index 0000000..4c5f8d1
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/TimesNet_ETTm2.sh
@@ -0,0 +1,101 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --itr 1 \
+  --train_epochs 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Transformer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/Transformer_ETTh1.sh
new file mode 100644
index 0000000..57eece1
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Transformer_ETTh1.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_96 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_192 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_336 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh1.csv \
+  --model_id ETTh1_96_720 \
+  --model $model_name \
+  --data ETTh1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Transformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/Transformer_ETTh2.sh
new file mode 100644
index 0000000..1228b03
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Transformer_ETTh2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Transformer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/Transformer_ETTm1.sh
new file mode 100644
index 0000000..7966a93
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Transformer_ETTm1.sh
@@ -0,0 +1,83 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_96 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_192 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_336 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm1.csv \
+  --model_id ETTm1_96_720 \
+  --model $model_name \
+  --data ETTm1 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/Transformer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/Transformer_ETTm2.sh
new file mode 100644
index 0000000..ce6fd1f
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/Transformer_ETTm2.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=2
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_96 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_192 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_336 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTm2.csv \
+  --model_id ETTm2_96_720 \
+  --model $model_name \
+  --data ETTm2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 1 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ETT_script/WPMixer_ETTh1.sh b/scripts/long_term_forecast/ETT_script/WPMixer_ETTh1.sh
new file mode 100644
index 0000000..0f4c07c
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/WPMixer_ETTh1.sh
@@ -0,0 +1,50 @@
+
+# Set the GPU to use
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=ETTh1
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.000242438 0.000201437 0.000132929 0.000239762)
+batches=(256 256 256 256)
+epochs=(30 30 30 30)
+dropouts=(0.4 0.05 0.0 0.2)
+patch_lens=(16 16 16 16)
+lradjs=(type3 type3 type3 type3)
+d_models=(256 256 256 128)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(db2 db3 db2 db2)
+levels=(2 2 1 1)
+tfactors=(5 5 3 5)
+dfactors=(8 5 3 3)
+strides=(8 8 8 8)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/ETT/ \
+		--data_path ETTh1.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/ETT_script/WPMixer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/WPMixer_ETTh2.sh
new file mode 100644
index 0000000..7195138
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/WPMixer_ETTh2.sh
@@ -0,0 +1,49 @@
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=ETTh2
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.000466278 0.000294929 0.000617476 0.000810205)
+batches=(256 256 256 256)
+epochs=(30 30 30 30)
+dropouts=(0.0 0.0 0.1 0.4)
+patch_lens=(16 16 16 16)
+lradjs=(type3 type3 type3 type3)
+d_models=(256 256 128 128)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(db2 db2 db2 db2)
+levels=(2 3 5 5)
+tfactors=(5 3 5 5)
+dfactors=(5 8 3 5)
+strides=(8 8 8 8)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/ETT/ \
+		--data_path ETTh2.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/ETT_script/WPMixer_ETTm1.sh b/scripts/long_term_forecast/ETT_script/WPMixer_ETTm1.sh
new file mode 100644
index 0000000..31c3c0e
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/WPMixer_ETTm1.sh
@@ -0,0 +1,49 @@
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=ETTm1
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.001277976 0.002415901 0.001594735 0.002011441)
+batches=(256 256 256 256)
+epochs=(80 80 80 80)
+dropouts=(0.4 0.4 0.4 0.4)
+patch_lens=(48 48 48 48)
+lradjs=(type3 type3 type3 type3)
+d_models=(256 128 256 128)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(db2 db3 db5 db5)
+levels=(1 1 1 4)
+tfactors=(5 3 7 3)
+dfactors=(3 7 7 8)
+strides=(24 24 24 24)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/ETT/ \
+		--data_path ETTm1.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/ETT_script/WPMixer_ETTm2.sh b/scripts/long_term_forecast/ETT_script/WPMixer_ETTm2.sh
new file mode 100644
index 0000000..56093cf
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/WPMixer_ETTm2.sh
@@ -0,0 +1,50 @@
+
+# Set the GPU to use
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=ETTm2
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.00076587 0.000275775 0.000234608 0.001039536)
+batches=(256 256 256 256)
+epochs=(80 80 80 80)
+dropouts=(0.4 0.2 0.4 0.4)
+patch_lens=(48 48 48 48)
+lradjs=(type3 type3 type3 type3)
+d_models=(256 256 256 256)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(bior3.1 db2 db2 db2)
+levels=(1 1 1 1)
+tfactors=(3 3 3 3)
+dfactors=(8 7 5 8)
+strides=(24 24 24 24)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/ETT/ \
+		--data_path ETTm2.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/ETT_script/iTransformer_ETTh2.sh b/scripts/long_term_forecast/ETT_script/iTransformer_ETTh2.sh
new file mode 100644
index 0000000..e1a58e9
--- /dev/null
+++ b/scripts/long_term_forecast/ETT_script/iTransformer_ETTh2.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_96 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 128 \
+  --d_ff 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_192 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 128 \
+  --d_ff 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_336 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 128 \
+  --d_ff 128 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/ETT-small/ \
+  --data_path ETTh2.csv \
+  --model_id ETTh2_96_720 \
+  --model $model_name \
+  --data ETTh2 \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --d_model 128 \
+  --d_ff 128 \
+  --itr 1
diff --git a/scripts/long_term_forecast/Exchange_script/Autoformer.sh b/scripts/long_term_forecast/Exchange_script/Autoformer.sh
new file mode 100644
index 0000000..a7de6f5
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Autoformer.sh
@@ -0,0 +1,89 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/Crossformer.sh b/scripts/long_term_forecast/Exchange_script/Crossformer.sh
new file mode 100644
index 0000000..5e3c62e
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Crossformer.sh
@@ -0,0 +1,101 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
diff --git a/scripts/long_term_forecast/Exchange_script/FiLM.sh b/scripts/long_term_forecast/Exchange_script/FiLM.sh
new file mode 100644
index 0000000..6e0b082
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/FiLM.sh
@@ -0,0 +1,92 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name  \
+  --data custom \
+  --features M \
+  --seq_len 384 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name  \
+  --data custom \
+  --features M \
+  --seq_len 384 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+  
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name  \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name  \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/Exchange_script/Koopa.sh b/scripts/long_term_forecast/Exchange_script/Koopa.sh
new file mode 100644
index 0000000..953ce92
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Koopa.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_192_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_288_144 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_384_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/MICN.sh b/scripts/long_term_forecast/Exchange_script/MICN.sh
new file mode 100644
index 0000000..97c6881
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/MICN.sh
@@ -0,0 +1,101 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
diff --git a/scripts/long_term_forecast/Exchange_script/Mamba.sh b/scripts/long_term_forecast/Exchange_script/Mamba.sh
new file mode 100644
index 0000000..5a72e3f
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Mamba.sh
@@ -0,0 +1,28 @@
+model_name=Mamba
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 8 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 8 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/Nonstationary_Transformer.sh b/scripts/long_term_forecast/Exchange_script/Nonstationary_Transformer.sh
new file mode 100644
index 0000000..ed3af30
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Nonstationary_Transformer.sh
@@ -0,0 +1,96 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 64 64 64 64 \
+  --p_hidden_layers 4
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/PatchTST.sh b/scripts/long_term_forecast/Exchange_script/PatchTST.sh
new file mode 100644
index 0000000..9727816
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/PatchTST.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/Pyraformer.sh b/scripts/long_term_forecast/Exchange_script/Pyraformer.sh
new file mode 100644
index 0000000..87d7d34
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Pyraformer.sh
@@ -0,0 +1,89 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Exchange_script/TimesNet.sh b/scripts/long_term_forecast/Exchange_script/TimesNet.sh
new file mode 100644
index 0000000..6a7e1a8
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/TimesNet.sh
@@ -0,0 +1,101 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
diff --git a/scripts/long_term_forecast/Exchange_script/Transformer.sh b/scripts/long_term_forecast/Exchange_script/Transformer.sh
new file mode 100644
index 0000000..8f41145
--- /dev/null
+++ b/scripts/long_term_forecast/Exchange_script/Transformer.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/exchange_rate/ \
+  --data_path exchange_rate.csv \
+  --model_id Exchange_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 8 \
+  --dec_in 8 \
+  --c_out 8 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/Autoformer.sh b/scripts/long_term_forecast/ILI_script/Autoformer.sh
new file mode 100644
index 0000000..8934462
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/Autoformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/Crossformer.sh b/scripts/long_term_forecast/ILI_script/Crossformer.sh
new file mode 100644
index 0000000..eaca4ed
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/Crossformer.sh
@@ -0,0 +1,103 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --dropout 0.6 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  -dropout 0.6 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  -dropout 0.6 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  -dropout 0.6 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/FiLM.sh b/scripts/long_term_forecast/ILI_script/FiLM.sh
new file mode 100644
index 0000000..f8fe4ed
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/FiLM.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=5
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 60 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+  
+  
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 60 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 60 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+  
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 60 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/Koopa.sh b/scripts/long_term_forecast/ILI_script/Koopa.sh
new file mode 100644
index 0000000..e3df787
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/Koopa.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_48_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 48 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_72_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 72 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_96_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_120_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 120 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/MICN.sh b/scripts/long_term_forecast/ILI_script/MICN.sh
new file mode 100644
index 0000000..6b79e46
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/MICN.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 36 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 36 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 36 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 36 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/ILI_script/Nonstationary_Transformer.sh b/scripts/long_term_forecast/ILI_script/Nonstationary_Transformer.sh
new file mode 100644
index 0000000..a83b902
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/Nonstationary_Transformer.sh
@@ -0,0 +1,95 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 32 32 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 32 32 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 16 16 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 8 8 \
+  --p_hidden_layers 2
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/PatchTST.sh b/scripts/long_term_forecast/ILI_script/PatchTST.sh
new file mode 100644
index 0000000..ff8fba2
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/PatchTST.sh
@@ -0,0 +1,96 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --d_model 1024\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --d_model 2048\
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 4 \
+  --d_model 2048\
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --n_heads 16 \
+  --d_model 2048\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/TimesNet.sh b/scripts/long_term_forecast/ILI_script/TimesNet.sh
new file mode 100644
index 0000000..cb378a1
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/TimesNet.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --d_model 768 \
+  --d_ff 768 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/ILI_script/Transformer.sh b/scripts/long_term_forecast/ILI_script/Transformer.sh
new file mode 100644
index 0000000..eee0209
--- /dev/null
+++ b/scripts/long_term_forecast/ILI_script/Transformer.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_24 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 24 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_36 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 36 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/illness/ \
+  --data_path national_illness.csv \
+  --model_id ili_36_60 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 36 \
+  --label_len 18 \
+  --pred_len 60 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 7 \
+  --dec_in 7 \
+  --c_out 7 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Mamba_all.sh b/scripts/long_term_forecast/Mamba_all.sh
new file mode 100644
index 0000000..9e34eab
--- /dev/null
+++ b/scripts/long_term_forecast/Mamba_all.sh
@@ -0,0 +1,4 @@
+./scripts/long_term_forecast/ECL_script/Mamba.sh
+./scripts/long_term_forecast/Traffic_script/Mamba.sh
+./scripts/long_term_forecast/Exchange_script/Mamba.sh
+./scripts/long_term_forecast/Weather_script/Mamba.sh
diff --git a/scripts/long_term_forecast/Traffic_script/Autoformer.sh b/scripts/long_term_forecast/Traffic_script/Autoformer.sh
new file mode 100644
index 0000000..9123d7e
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Autoformer.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Crossformer.sh b/scripts/long_term_forecast/Traffic_script/Crossformer.sh
new file mode 100644
index 0000000..d6bb5d5
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Crossformer.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --top_k 5 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --top_k 5 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --top_k 5 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --top_k 5 \
+  --des 'Exp' \
+  --n_heads 2 \
+  --batch_size 4 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/FiLM.sh b/scripts/long_term_forecast/Traffic_script/FiLM.sh
new file mode 100644
index 0000000..6f4a721
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/FiLM.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --batch_size 2
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Koopa.sh b/scripts/long_term_forecast/Traffic_script/Koopa.sh
new file mode 100644
index 0000000..2322080
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Koopa.sh
@@ -0,0 +1,87 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=Koopa
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_48 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --pred_len 48 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_192_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_288_144 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 288 \
+  --pred_len 144 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_384_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 384 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/MICN.sh b/scripts/long_term_forecast/Traffic_script/MICN.sh
new file mode 100644
index 0000000..7f83d28
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/MICN.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Mamba.sh b/scripts/long_term_forecast/Traffic_script/Mamba.sh
new file mode 100644
index 0000000..f531e19
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Mamba.sh
@@ -0,0 +1,29 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 862 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 862 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/MultiPatchFormer.sh b/scripts/long_term_forecast/Traffic_script/MultiPatchFormer.sh
new file mode 100644
index 0000000..7e79479
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/MultiPatchFormer.sh
@@ -0,0 +1,96 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MultiPatchFormer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Nonstationary_Transformer.sh b/scripts/long_term_forecast/Traffic_script/Nonstationary_Transformer.sh
new file mode 100644
index 0000000..c7d63d1
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Nonstationary_Transformer.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3 \
+  --p_hidden_dims 16 16 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/PatchTST.sh b/scripts/long_term_forecast/Traffic_script/PatchTST.sh
new file mode 100644
index 0000000..69d9b6c
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/PatchTST.sh
@@ -0,0 +1,103 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --batch_size 4 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Pyraformer.sh b/scripts/long_term_forecast/Traffic_script/Pyraformer.sh
new file mode 100644
index 0000000..a4e8f3b
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Pyraformer.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/SegRNN.sh b/scripts/long_term_forecast/Traffic_script/SegRNN.sh
new file mode 100644
index 0000000..9fceb70
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/SegRNN.sh
@@ -0,0 +1,27 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 24 \
+  --enc_in 862 \
+  --d_model 512 \
+  --dropout 0 \
+  --learning_rate 0.001 \
+  --des 'Exp' \
+  --itr 1
+done
+
diff --git a/scripts/long_term_forecast/Traffic_script/TSMixer.sh b/scripts/long_term_forecast/Traffic_script/TSMixer.sh
new file mode 100755
index 0000000..4f8d94f
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/TSMixer.sh
@@ -0,0 +1,101 @@
+
+model_name=TSMixer
+learning_rate=0.001
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
diff --git a/scripts/long_term_forecast/Traffic_script/TimeMixer.sh b/scripts/long_term_forecast/Traffic_script/TimeMixer.sh
new file mode 100755
index 0000000..6ee3434
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/TimeMixer.sh
@@ -0,0 +1,125 @@
+#export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=3
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=32
+d_ff=64
+batch_size=8
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id Traffic_$seq_len'_'96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id Traffic_$seq_len'_'192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id Traffic_$seq_len'_'336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id Traffic_$seq_len'_'720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size $batch_size \
+  --learning_rate $learning_rate \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/TimeXer.sh b/scripts/long_term_forecast/Traffic_script/TimeXer.sh
new file mode 100644
index 0000000..6d975b3
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/TimeXer.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
diff --git a/scripts/long_term_forecast/Traffic_script/TimesNet.sh b/scripts/long_term_forecast/Traffic_script/TimesNet.sh
new file mode 100644
index 0000000..0baca10
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/TimesNet.sh
@@ -0,0 +1,99 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --d_model 512 \
+  --d_ff 512 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Traffic_script/Transformer.sh b/scripts/long_term_forecast/Traffic_script/Transformer.sh
new file mode 100644
index 0000000..88b7e5c
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/Transformer.sh
@@ -0,0 +1,91 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
diff --git a/scripts/long_term_forecast/Traffic_script/WPMixer.sh b/scripts/long_term_forecast/Traffic_script/WPMixer.sh
new file mode 100644
index 0000000..16443c3
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/WPMixer.sh
@@ -0,0 +1,49 @@
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=traffic
+seq_lens=(1200 1200 1200 1200)
+pred_lens=(96 192 336 720)
+learning_rates=(0.0010385 0.000567053 0.001026715 0.001496217)
+batches=(16 16 16 16)
+epochs=(60 60 50 60)
+dropouts=(0.05 0.05 0.0 0.05)
+patch_lens=(16 16 16 16)
+lradjs=(type3 type3 type3 type3)
+d_models=(16 32 32 32)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(db3 db3 bior3.1 db3)
+levels=(1 1 1 1)
+tfactors=(3 3 7 7)
+dfactors=(5 5 7 3)
+strides=(8 8 8 8)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/traffic/ \
+		--data_path traffic.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/Traffic_script/iTransformer.sh b/scripts/long_term_forecast/Traffic_script/iTransformer.sh
new file mode 100644
index 0000000..583f28d
--- /dev/null
+++ b/scripts/long_term_forecast/Traffic_script/iTransformer.sh
@@ -0,0 +1,103 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/traffic/ \
+  --data_path traffic.csv \
+  --model_id traffic_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 4 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 862 \
+  --dec_in 862 \
+  --c_out 862 \
+  --des 'Exp' \
+  --d_model 512 \
+  --d_ff 512 \
+  --batch_size 16 \
+  --learning_rate 0.001 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Autoformer.sh b/scripts/long_term_forecast/Weather_script/Autoformer.sh
new file mode 100644
index 0000000..e5dbb52
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Autoformer.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Crossformer.sh b/scripts/long_term_forecast/Weather_script/Crossformer.sh
new file mode 100644
index 0000000..d1698f0
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Crossformer.sh
@@ -0,0 +1,102 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/FiLM.sh b/scripts/long_term_forecast/Weather_script/FiLM.sh
new file mode 100644
index 0000000..7b15125
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/FiLM.sh
@@ -0,0 +1,85 @@
+export CUDA_VISIBLE_DEVICES=6
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 720 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 192 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model FiLM \
+  --data custom \
+  --features M \
+  --seq_len 336 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
diff --git a/scripts/long_term_forecast/Weather_script/MICN.sh b/scripts/long_term_forecast/Weather_script/MICN.sh
new file mode 100644
index 0000000..bbe064f
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/MICN.sh
@@ -0,0 +1,102 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=MICN
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 96 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Mamba.sh b/scripts/long_term_forecast/Weather_script/Mamba.sh
new file mode 100644
index 0000000..a9598bb
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Mamba.sh
@@ -0,0 +1,29 @@
+model_name=Mamba
+
+for pred_len in 96 192 336 720
+do
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_$pred_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $pred_len \
+  --label_len 48 \
+  --pred_len $pred_len \
+  --e_layers 2 \
+  --d_layers 1 \
+  --enc_in 21 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 21 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+
+done
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/MultiPatchFormer.sh b/scripts/long_term_forecast/Weather_script/MultiPatchFormer.sh
new file mode 100644
index 0000000..6bec2d5
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/MultiPatchFormer.sh
@@ -0,0 +1,98 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=MultiPatchFormer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 1 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 256 \
+  --d_ff 512 \
+  --des 'Exp' \
+  --n_heads 8 \
+  --batch_size 32 \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Nonstationary_Transformer.sh b/scripts/long_term_forecast/Weather_script/Nonstationary_Transformer.sh
new file mode 100644
index 0000000..27136d4
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Nonstationary_Transformer.sh
@@ -0,0 +1,96 @@
+export CUDA_VISIBLE_DEVICES=6
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3 \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --p_hidden_dims 128 128 \
+  --p_hidden_layers 2
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/PatchTST.sh b/scripts/long_term_forecast/Weather_script/PatchTST.sh
new file mode 100644
index 0000000..ca19683
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/PatchTST.sh
@@ -0,0 +1,97 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=PatchTST
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --n_heads 4 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --n_heads 16 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --n_heads 4 \
+  --batch_size 128 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --n_heads 4 \
+  --batch_size 128 \
+  --train_epochs 3
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Pyraformer.sh b/scripts/long_term_forecast/Weather_script/Pyraformer.sh
new file mode 100644
index 0000000..7af7d54
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Pyraformer.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 2
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/SegRNN.sh b/scripts/long_term_forecast/Weather_script/SegRNN.sh
new file mode 100644
index 0000000..5ec4613
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/SegRNN.sh
@@ -0,0 +1,27 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=SegRNN
+
+seq_len=96
+for pred_len in 96 192 336 720
+do
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_$seq_len'_'$pred_len \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --pred_len $pred_len \
+  --seg_len 48 \
+  --enc_in 21 \
+  --d_model 512 \
+  --dropout 0.5 \
+  --learning_rate 0.0001 \
+  --des 'Exp' \
+  --itr 1
+done
+
diff --git a/scripts/long_term_forecast/Weather_script/TSMixer.sh b/scripts/long_term_forecast/Weather_script/TSMixer.sh
new file mode 100755
index 0000000..bea5bda
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/TSMixer.sh
@@ -0,0 +1,99 @@
+
+model_name=TSMixer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
diff --git a/scripts/long_term_forecast/Weather_script/TimeMixer.sh b/scripts/long_term_forecast/Weather_script/TimeMixer.sh
new file mode 100755
index 0000000..2d50112
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/TimeMixer.sh
@@ -0,0 +1,133 @@
+#export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+seq_len=96
+e_layers=3
+down_sampling_layers=3
+down_sampling_window=2
+learning_rate=0.01
+d_model=16
+d_ff=32
+batch_size=16
+train_epochs=20
+patience=10
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 96 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 192 \
+  --e_layers $e_layers \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 336 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len $seq_len \
+  --label_len 0 \
+  --pred_len 720 \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --d_model $d_model \
+  --d_ff $d_ff \
+  --batch_size 128 \
+  --learning_rate $learning_rate \
+  --train_epochs $train_epochs \
+  --patience $patience \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/TimeXer.sh b/scripts/long_term_forecast/Weather_script/TimeXer.sh
new file mode 100644
index 0000000..e93bf90
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/TimeXer.sh
@@ -0,0 +1,93 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeXer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 256 \
+  --d_ff 512 \
+  --batch_size 4 \
+  --itr 1 \
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128 \
+  --d_ff 1024 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 256 \
+  --batch_size 4 \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 128 \
+  --batch_size 4 \
+  --itr 1
diff --git a/scripts/long_term_forecast/Weather_script/TimesNet.sh b/scripts/long_term_forecast/Weather_script/TimesNet.sh
new file mode 100644
index 0000000..fe06193
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/TimesNet.sh
@@ -0,0 +1,102 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/Transformer.sh b/scripts/long_term_forecast/Weather_script/Transformer.sh
new file mode 100644
index 0000000..7c0466b
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/Transformer.sh
@@ -0,0 +1,88 @@
+export CUDA_VISIBLE_DEVICES=7
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1 \
+  --train_epochs 3
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --itr 1
\ No newline at end of file
diff --git a/scripts/long_term_forecast/Weather_script/WPMixer.sh b/scripts/long_term_forecast/Weather_script/WPMixer.sh
new file mode 100644
index 0000000..828abb2
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/WPMixer.sh
@@ -0,0 +1,49 @@
+
+export CUDA_VISIBLE_DEVICES=0
+
+# Model name
+model_name=WPMixer
+
+# Datasets and prediction lengths
+dataset=weather
+seq_lens=(512 512 512 512)
+pred_lens=(96 192 336 720)
+learning_rates=(0.000913333 0.001379042 0.000607991 0.001470479)
+batches=(32 64 32 128)
+epochs=(60 60 60 60)
+dropouts=(0.4 0.4 0.4 0.4)
+patch_lens=(16 16 16 16)
+lradjs=(type3 type3 type3 type3)
+d_models=(256 128 128 128)
+patiences=(12 12 12 12)
+
+# Model params below need to be set in WPMixer.py Line 15, instead of this script
+wavelets=(db3 db3 db3 db2)
+levels=(2 1 2 1)
+tfactors=(3 3 7 7)
+dfactors=(7 7 7 5)
+strides=(8 8 8 8)
+
+# Loop over datasets and prediction lengths
+for i in "${!pred_lens[@]}"; do
+	python -u run.py \
+		--is_training 1 \
+		--root_path ./data/weather/ \
+		--data_path weather.csv \
+		--model_id wpmixer \
+		--model $model_name \
+		--task_name long_term_forecast \
+		--data $dataset \
+		--seq_len ${seq_lens[$i]} \
+		--pred_len ${pred_lens[$i]} \
+		--label_len 0 \
+		--d_model ${d_models[$i]} \
+		--patch_len ${patch_lens[$i]} \
+		--batch_size ${batches[$i]} \
+		--learning_rate ${learning_rates[$i]} \
+		--lradj ${lradjs[$i]} \
+		--dropout ${dropouts[$i]} \
+		--patience ${patiences[$i]} \
+		--train_epochs ${epochs[$i]} \
+		--use_amp
+done
diff --git a/scripts/long_term_forecast/Weather_script/iTransformer.sh b/scripts/long_term_forecast/Weather_script/iTransformer.sh
new file mode 100644
index 0000000..4b09121
--- /dev/null
+++ b/scripts/long_term_forecast/Weather_script/iTransformer.sh
@@ -0,0 +1,98 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_96 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 96 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_192 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 192 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_336 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 336 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1 \
+
+
+python -u run.py \
+  --task_name long_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/weather/ \
+  --data_path weather.csv \
+  --model_id weather_96_720 \
+  --model $model_name \
+  --data custom \
+  --features M \
+  --seq_len 96 \
+  --label_len 48 \
+  --pred_len 720 \
+  --e_layers 3 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 21 \
+  --dec_in 21 \
+  --c_out 21 \
+  --des 'Exp' \
+  --d_model 512\
+  --d_ff 512\
+  --itr 1
\ No newline at end of file
diff --git a/scripts/short_term_forecast/Autoformer_M4.sh b/scripts/short_term_forecast/Autoformer_M4.sh
new file mode 100644
index 0000000..f6f88f8
--- /dev/null
+++ b/scripts/short_term_forecast/Autoformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Autoformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/Crossformer_M4.sh b/scripts/short_term_forecast/Crossformer_M4.sh
new file mode 100644
index 0000000..23786da
--- /dev/null
+++ b/scripts/short_term_forecast/Crossformer_M4.sh
@@ -0,0 +1,147 @@
+export CUDA_VISIBLE_DEVICES=5
+
+model_name=Crossformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 16 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/DLinear_M4.sh b/scripts/short_term_forecast/DLinear_M4.sh
new file mode 100644
index 0000000..4b10f02
--- /dev/null
+++ b/scripts/short_term_forecast/DLinear_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=DLinear
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/ETSformer_M4.sh b/scripts/short_term_forecast/ETSformer_M4.sh
new file mode 100644
index 0000000..177bf62
--- /dev/null
+++ b/scripts/short_term_forecast/ETSformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=ETSformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ../dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 2 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/FEDformer_M4.sh b/scripts/short_term_forecast/FEDformer_M4.sh
new file mode 100644
index 0000000..a49411e
--- /dev/null
+++ b/scripts/short_term_forecast/FEDformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=FEDformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/FiLM_M4.sh b/scripts/short_term_forecast/FiLM_M4.sh
new file mode 100644
index 0000000..b4032b5
--- /dev/null
+++ b/scripts/short_term_forecast/FiLM_M4.sh
@@ -0,0 +1,147 @@
+export CUDA_VISIBLE_DEVICES=3
+
+model_name=FiLM
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 16 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/Informer_M4.sh b/scripts/short_term_forecast/Informer_M4.sh
new file mode 100644
index 0000000..b83637a
--- /dev/null
+++ b/scripts/short_term_forecast/Informer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Informer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/LightTS_M4.sh b/scripts/short_term_forecast/LightTS_M4.sh
new file mode 100644
index 0000000..5a35976
--- /dev/null
+++ b/scripts/short_term_forecast/LightTS_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=LightTS
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/MICN_M4.sh b/scripts/short_term_forecast/MICN_M4.sh
new file mode 100644
index 0000000..1d93496
--- /dev/null
+++ b/scripts/short_term_forecast/MICN_M4.sh
@@ -0,0 +1,147 @@
+export CUDA_VISIBLE_DEVICES=4
+
+model_name=MICN
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 16 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/Mamba_M4.sh b/scripts/short_term_forecast/Mamba_M4.sh
new file mode 100644
index 0000000..417a6c3
--- /dev/null
+++ b/scripts/short_term_forecast/Mamba_M4.sh
@@ -0,0 +1,135 @@
+# export CUDA_VISIBLE_DEVICES=1
+
+model_name=Mamba
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'  
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --enc_in 1 \
+  --expand 2 \
+  --d_ff 16 \
+  --d_conv 4 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 128 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/Nonstationary_Transformer_M4.sh b/scripts/short_term_forecast/Nonstationary_Transformer_M4.sh
new file mode 100644
index 0000000..29ea72d
--- /dev/null
+++ b/scripts/short_term_forecast/Nonstationary_Transformer_M4.sh
@@ -0,0 +1,147 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Nonstationary_Transformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE' \
+  --p_hidden_dims 256 256 \
+  --p_hidden_layers 2
\ No newline at end of file
diff --git a/scripts/short_term_forecast/Pyraformer_M4.sh b/scripts/short_term_forecast/Pyraformer_M4.sh
new file mode 100644
index 0000000..66f67b8
--- /dev/null
+++ b/scripts/short_term_forecast/Pyraformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Pyraformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/Reformer_M4.sh b/scripts/short_term_forecast/Reformer_M4.sh
new file mode 100644
index 0000000..d432fed
--- /dev/null
+++ b/scripts/short_term_forecast/Reformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Reformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/TSMixer_M4.sh b/scripts/short_term_forecast/TSMixer_M4.sh
new file mode 100755
index 0000000..34aad15
--- /dev/null
+++ b/scripts/short_term_forecast/TSMixer_M4.sh
@@ -0,0 +1,135 @@
+#export CUDA_VISIBLE_DEVICES=1
+
+model_name=MTSMixer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/TimeMixer_M4.sh b/scripts/short_term_forecast/TimeMixer_M4.sh
new file mode 100755
index 0000000..15802a1
--- /dev/null
+++ b/scripts/short_term_forecast/TimeMixer_M4.sh
@@ -0,0 +1,180 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimeMixer
+
+e_layers=4
+down_sampling_layers=1
+down_sampling_window=2
+learning_rate=0.01
+d_model=32
+d_ff=32
+batch_size=16
+
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 64 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 16 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers $e_layers \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 128 \
+  --d_model $d_model \
+  --d_ff 32 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate $learning_rate \
+  --train_epochs 50 \
+  --patience 20 \
+  --down_sampling_layers $down_sampling_layers \
+  --down_sampling_method avg \
+  --down_sampling_window $down_sampling_window \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/TimesNet_M4.sh b/scripts/short_term_forecast/TimesNet_M4.sh
new file mode 100644
index 0000000..0040273
--- /dev/null
+++ b/scripts/short_term_forecast/TimesNet_M4.sh
@@ -0,0 +1,147 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=TimesNet
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 64 \
+  --d_ff 64 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 16 \
+  --d_ff 16 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 32 \
+  --d_ff 32 \
+  --top_k 5 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
diff --git a/scripts/short_term_forecast/Transformer_M4.sh b/scripts/short_term_forecast/Transformer_M4.sh
new file mode 100644
index 0000000..ba0af09
--- /dev/null
+++ b/scripts/short_term_forecast/Transformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=1
+
+model_name=Transformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/scripts/short_term_forecast/iTransformer_M4.sh b/scripts/short_term_forecast/iTransformer_M4.sh
new file mode 100644
index 0000000..5e4702c
--- /dev/null
+++ b/scripts/short_term_forecast/iTransformer_M4.sh
@@ -0,0 +1,135 @@
+export CUDA_VISIBLE_DEVICES=0
+
+model_name=iTransformer
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Monthly' \
+  --model_id m4_Monthly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Yearly' \
+  --model_id m4_Yearly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Quarterly' \
+  --model_id m4_Quarterly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Weekly' \
+  --model_id m4_Weekly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Daily' \
+  --model_id m4_Daily \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
+
+python -u run.py \
+  --task_name short_term_forecast \
+  --is_training 1 \
+  --root_path ./dataset/m4 \
+  --seasonal_patterns 'Hourly' \
+  --model_id m4_Hourly \
+  --model $model_name \
+  --data m4 \
+  --features M \
+  --e_layers 2 \
+  --d_layers 1 \
+  --factor 3 \
+  --enc_in 1 \
+  --dec_in 1 \
+  --c_out 1 \
+  --batch_size 16 \
+  --d_model 512 \
+  --des 'Exp' \
+  --itr 1 \
+  --learning_rate 0.001 \
+  --loss 'SMAPE'
\ No newline at end of file
diff --git a/tutorial/TimesNet_tutorial.ipynb b/tutorial/TimesNet_tutorial.ipynb
new file mode 100644
index 0000000..d589d2a
--- /dev/null
+++ b/tutorial/TimesNet_tutorial.ipynb
@@ -0,0 +1,1552 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# TimesNet Tutorial\n",
+    "**Set-up instructions:** this notebook give a tutorial on the learning task supported by `TimesNet`.\n",
+    "\n",
+    "`TimesNet` can support basically 5 tasks, which are respectively long-term forecast, short-term forecast, imputation, anomaly detection, classification."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. Install Python 3.8. For convenience, execute the following command."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. Package Import"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch    \n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.fft\n",
+    "from layers.Embed import DataEmbedding\n",
+    "from layers.Conv_Blocks import Inception_Block_V1   \n",
+    "            #convolution block used for convoluting the 2D time data, changeable"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. TimesBlock Construction\n",
+    " The core idea of `TimesNet` lies in the construction of `TimesBlock`, which generally gets the base frequencies by implementing FFT on the data, and then reshapes the times series to 2D variation respectively from the main base frequencies, followed by a 2D convolution whose outputs are reshaped back and added with weight to form the final output.\n",
+    "\n",
+    " In the following section, we will have a detailed view on `TimesBlock`.\n",
+    "\n",
+    " TimesBlock has 2 members. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TimesBlock(nn.Module):\n",
+    "    def __init__(self, configs):\n",
+    "        ...\n",
+    "    \n",
+    "    def forward(self, x):\n",
+    "        ..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First, let's focus on ```__init__(self, configs):```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def __init__(self, configs):    ##configs is the configuration defined for TimesBlock\n",
+    "    super(TimesBlock, self).__init__() \n",
+    "    self.seq_len = configs.seq_len   ##sequence length \n",
+    "    self.pred_len = configs.pred_len ##prediction length\n",
+    "    self.k = configs.top_k    ##k denotes how many top frequencies are \n",
+    "                                                            #taken into consideration\n",
+    "    # parameter-efficient design\n",
+    "    self.conv = nn.Sequential(\n",
+    "        Inception_Block_V1(configs.d_model, configs.d_ff,\n",
+    "                           num_kernels=configs.num_kernels),\n",
+    "        nn.GELU(),\n",
+    "        Inception_Block_V1(configs.d_ff, configs.d_model,\n",
+    "                           num_kernels=configs.num_kernels)\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, have a look at ```forward(self, x)```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def forward(self, x):\n",
+    "        B, T, N = x.size()\n",
+    "            #B: batch size  T: length of time series  N:number of features\n",
+    "        period_list, period_weight = FFT_for_Period(x, self.k)\n",
+    "            #FFT_for_Period() will be shown later. Here, period_list([top_k]) denotes \n",
+    "            #the top_k-significant period and period_weight([B, top_k]) denotes its weight(amplitude)\n",
+    "\n",
+    "        res = []\n",
+    "        for i in range(self.k):\n",
+    "            period = period_list[i]\n",
+    "\n",
+    "            # padding : to form a 2D map, we need total length of the sequence, plus the part \n",
+    "            # to be predicted, to be divisible by the period, so padding is needed\n",
+    "            if (self.seq_len + self.pred_len) % period != 0:\n",
+    "                length = (\n",
+    "                                 ((self.seq_len + self.pred_len) // period) + 1) * period\n",
+    "                padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)\n",
+    "                out = torch.cat([x, padding], dim=1)\n",
+    "            else:\n",
+    "                length = (self.seq_len + self.pred_len)\n",
+    "                out = x\n",
+    "\n",
+    "            # reshape: we need each channel of a single piece of data to be a 2D variable,\n",
+    "            # Also, in order to implement the 2D conv later on, we need to adjust the 2 dimensions \n",
+    "            # to be convolutioned to the last 2 dimensions, by calling the permute() func.\n",
+    "            # Whereafter, to make the tensor contiguous in memory, call contiguous()\n",
+    "            out = out.reshape(B, length // period, period,\n",
+    "                              N).permute(0, 3, 1, 2).contiguous()\n",
+    "            \n",
+    "            #2D convolution to grap the intra- and inter- period information\n",
+    "            out = self.conv(out)\n",
+    "\n",
+    "            # reshape back, similar to reshape\n",
+    "            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)\n",
+    "            \n",
+    "            #truncating down the padded part of the output and put it to result\n",
+    "            res.append(out[:, :(self.seq_len + self.pred_len), :])\n",
+    "        res = torch.stack(res, dim=-1) #res: 4D [B, length , N, top_k]\n",
+    "\n",
+    "        # adaptive aggregation\n",
+    "        #First, use softmax to get the normalized weight from amplitudes --> 2D [B,top_k]\n",
+    "        period_weight = F.softmax(period_weight, dim=1) \n",
+    "\n",
+    "        #after two unsqueeze(1),shape -> [B,1,1,top_k],so repeat the weight to fit the shape of res\n",
+    "        period_weight = period_weight.unsqueeze(\n",
+    "            1).unsqueeze(1).repeat(1, T, N, 1)\n",
+    "        \n",
+    "        #add by weight the top_k periods' result, getting the result of this TimesBlock\n",
+    "        res = torch.sum(res * period_weight, -1)\n",
+    "\n",
+    "        # residual connection\n",
+    "        res = res + x\n",
+    "        return res"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The ```FFT_for_Period``` above is given by:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def FFT_for_Period(x, k=2):\n",
+    "    # xf shape [B, T, C], denoting the amplitude of frequency(T) given the datapiece at B,N\n",
+    "    xf = torch.fft.rfft(x, dim=1) \n",
+    "\n",
+    "    # find period by amplitudes: here we assume that the periodic features are basically constant\n",
+    "    # in different batch and channel, so we mean out these two dimensions, getting a list frequency_list with shape[T] \n",
+    "    # each element at pos t of frequency_list denotes the overall amplitude at frequency (t)\n",
+    "    frequency_list = abs(xf).mean(0).mean(-1) \n",
+    "    frequency_list[0] = 0\n",
+    "\n",
+    "    #by torch.topk(),we can get the biggest k elements of frequency_list, and its positions(i.e. the k-main frequencies in top_list)\n",
+    "    _, top_list = torch.topk(frequency_list, k)\n",
+    "\n",
+    "    #Returns a new Tensor 'top_list', detached from the current graph.\n",
+    "    #The result will never require gradient.Convert to a numpy instance\n",
+    "    top_list = top_list.detach().cpu().numpy()\n",
+    "     \n",
+    "    #period:a list of shape [top_k], recording the periods of mean frequencies respectively\n",
+    "    period = x.shape[1] // top_list\n",
+    "\n",
+    "    #Here,the 2nd item returned has a shape of [B, top_k],representing the biggest top_k amplitudes \n",
+    "    # for each piece of data, with N features being averaged.\n",
+    "    return period, abs(xf).mean(-1)[:, top_list] "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To make it clearer, please see the figures below.\n",
+    "\n",
+    "![FFT demonstrator](./fft.png)\n",
+    "\n",
+    "![2D Conv demonstrator](./conv.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For more details, please read the our paper \n",
+    "(link: https://openreview.net/pdf?id=ju_Uqw384Oq)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 4. TimesNet\n",
+    "\n",
+    "So far we've got `TimesBlock`, which is excel at retrieving  intra- and inter- period temporal information. We become capable of building a `TimesNet`.  `TimesNet` is proficient in multitasks including short- and long-term forecasting, imputation, classification, and anomaly detection.\n",
+    "\n",
+    "In this section, we'll have a detailed overview on how `TimesNet` gains its power in these tasks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Model(nn.Module):\n",
+    "    def __init__(self, configs):\n",
+    "        ...\n",
+    "    \n",
+    "    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n",
+    "        ...\n",
+    "\n",
+    "    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n",
+    "        ...\n",
+    "\n",
+    "    def anomaly_detection(self, x_enc):\n",
+    "        ...\n",
+    "    \n",
+    "    def classification(self, x_enc, x_mark_enc):\n",
+    "        ...\n",
+    "\n",
+    "    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n",
+    "        ..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First of all, let's focus on ```__init__(self, configs):```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def __init__(self, configs):\n",
+    "    super(Model, self).__init__()\n",
+    "    #params init\n",
+    "    self.configs = configs\n",
+    "    self.task_name = configs.task_name\n",
+    "    self.seq_len = configs.seq_len\n",
+    "    self.label_len = configs.label_len\n",
+    "    self.pred_len = configs.pred_len\n",
+    "\n",
+    "    #stack TimesBlock for e_layers times to form the main part of TimesNet, named model\n",
+    "    self.model = nn.ModuleList([TimesBlock(configs)\n",
+    "                                for _ in range(configs.e_layers)])\n",
+    "    \n",
+    "    #embedding & normalization\n",
+    "    # enc_in is the encoder input size, the number of features for a piece of data\n",
+    "    # d_model is the dimension of embedding\n",
+    "    self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,\n",
+    "                                       configs.dropout)\n",
+    "    self.layer = configs.e_layers # num of encoder layers\n",
+    "    self.layer_norm = nn.LayerNorm(configs.d_model)\n",
+    "\n",
+    "    #define the some layers for different tasks\n",
+    "    if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n",
+    "        self.predict_linear = nn.Linear(\n",
+    "            self.seq_len, self.pred_len + self.seq_len)\n",
+    "        self.projection = nn.Linear(\n",
+    "            configs.d_model, configs.c_out, bias=True)\n",
+    "    if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':\n",
+    "        self.projection = nn.Linear(\n",
+    "            configs.d_model, configs.c_out, bias=True)\n",
+    "    if self.task_name == 'classification':\n",
+    "        self.act = F.gelu\n",
+    "        self.dropout = nn.Dropout(configs.dropout)\n",
+    "        self.projection = nn.Linear(\n",
+    "            configs.d_model * configs.seq_len, configs.num_class)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.1 Forecast\n",
+    "\n",
+    "The basic idea of forecasting is to lengthen the known sequence to (seq_len+pred_len), which is the total length after forecasting. Then by several TimesBlock layers together with layer normalization, some underlying intra- and inter- period information is represented. With these information, we can project it to the output space. Whereafter by denorm ( if Non-stationary Transformer) we get the final output."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):\n",
+    "    # Normalization from Non-stationary Transformer at temporal dimension\n",
+    "    means = x_enc.mean(1, keepdim=True).detach() #[B,T]\n",
+    "    x_enc = x_enc - means\n",
+    "    stdev = torch.sqrt(\n",
+    "        torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n",
+    "    x_enc /= stdev\n",
+    "\n",
+    "    # embedding: projecting a number to a C-channel vector\n",
+    "    enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C] C is d_model\n",
+    "    enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(\n",
+    "        0, 2, 1)  # align temporal dimension [B,pred_len+seq_len,C]\n",
+    "    \n",
+    "    # TimesNet: pass through TimesBlock for self.layer times each with layer normalization\n",
+    "    for i in range(self.layer):\n",
+    "        enc_out = self.layer_norm(self.model[i](enc_out))\n",
+    "\n",
+    "    # project back  #[B,T,d_model]-->[B,T,c_out]\n",
+    "    dec_out = self.projection(enc_out) \n",
+    "\n",
+    "    # De-Normalization from Non-stationary Transformer\n",
+    "    dec_out = dec_out * \\\n",
+    "              (stdev[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1)) #lengthen the stdev to fit the dec_out\n",
+    "    dec_out = dec_out + \\\n",
+    "              (means[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1)) #lengthen the mean to fit the dec_out\n",
+    "    return dec_out"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.2 Imputation\n",
+    "\n",
+    "Imputation is a task aiming at completing some missing value in the time series, so in some degree it's similar to forecast. We can still use the similar step to cope with it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n",
+    "    # Normalization from Non-stationary Transformer\n",
+    "    means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n",
+    "    means = means.unsqueeze(1).detach()\n",
+    "    x_enc = x_enc - means\n",
+    "    x_enc = x_enc.masked_fill(mask == 0, 0)\n",
+    "    stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n",
+    "                       torch.sum(mask == 1, dim=1) + 1e-5)\n",
+    "    stdev = stdev.unsqueeze(1).detach()\n",
+    "    x_enc /= stdev\n",
+    "\n",
+    "    # embedding\n",
+    "    enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]\n",
+    "    # TimesNet\n",
+    "    for i in range(self.layer):\n",
+    "        enc_out = self.layer_norm(self.model[i](enc_out))\n",
+    "    # project back\n",
+    "    dec_out = self.projection(enc_out)\n",
+    "\n",
+    "    # De-Normalization from Non-stationary Transformer\n",
+    "    dec_out = dec_out * \\\n",
+    "              (stdev[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1))\n",
+    "    dec_out = dec_out + \\\n",
+    "              (means[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1))\n",
+    "    return dec_out"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.3 Anomaly Detection\n",
+    "\n",
+    "Similar to Imputation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def anomaly_detection(self, x_enc):\n",
+    "    # Normalization from Non-stationary Transformer\n",
+    "    means = x_enc.mean(1, keepdim=True).detach()\n",
+    "    x_enc = x_enc - means\n",
+    "    stdev = torch.sqrt(\n",
+    "        torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)\n",
+    "    x_enc /= stdev\n",
+    "    # embedding\n",
+    "    enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]\n",
+    "    # TimesNet\n",
+    "    for i in range(self.layer):\n",
+    "        enc_out = self.layer_norm(self.model[i](enc_out))\n",
+    "    # project back\n",
+    "    dec_out = self.projection(enc_out)\n",
+    "    # De-Normalization from Non-stationary Transformer\n",
+    "    dec_out = dec_out * \\\n",
+    "              (stdev[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1))\n",
+    "    dec_out = dec_out + \\\n",
+    "              (means[:, 0, :].unsqueeze(1).repeat(\n",
+    "                  1, self.pred_len + self.seq_len, 1))\n",
+    "    return dec_out"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 4.4 Classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def classification(self, x_enc, x_mark_enc):\n",
+    "        # embedding\n",
+    "        enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]\n",
+    "        # TimesNet\n",
+    "        for i in range(self.layer):\n",
+    "            enc_out = self.layer_norm(self.model[i](enc_out))\n",
+    "\n",
+    "        # Output\n",
+    "        # the output transformer encoder/decoder embeddings don't include non-linearity\n",
+    "        output = self.act(enc_out)\n",
+    "        output = self.dropout(output)\n",
+    "\n",
+    "        # zero-out padding embeddings:The primary role of x_mark_enc in the code is to \n",
+    "        # zero out the embeddings for padding positions in the output tensor through \n",
+    "        # element-wise multiplication, helping the model to focus on meaningful data \n",
+    "        # while disregarding padding.\n",
+    "        output = output * x_mark_enc.unsqueeze(-1)\n",
+    "        \n",
+    "        # (batch_size, seq_length * d_model)\n",
+    "        output = output.reshape(output.shape[0], -1)\n",
+    "        output = self.projection(output)  # (batch_size, num_classes)\n",
+    "        return output"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the end, with so many tasks above, we become able to complete `forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):`. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n",
+    "    if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':\n",
+    "        dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)\n",
+    "        return dec_out[:, -self.pred_len:, :]  # [B, L, D] return the predicted part of sequence\n",
+    "    if self.task_name == 'imputation':\n",
+    "        dec_out = self.imputation(\n",
+    "            x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n",
+    "        return dec_out  # [B, L, D] return the whole sequence with missing value estimated\n",
+    "    if self.task_name == 'anomaly_detection':\n",
+    "        dec_out = self.anomaly_detection(x_enc)\n",
+    "        return dec_out  # [B, L, D] return the sequence that should be correct\n",
+    "    if self.task_name == 'classification':\n",
+    "        dec_out = self.classification(x_enc, x_mark_enc)\n",
+    "        return dec_out  # [B, N] return the classification result\n",
+    "    return None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 5. Training and Settings\n",
+    "\n",
+    "By now we've successfully build up `TimesNet`. We are now facing the problem how to train and test this neural network. The action of training, validating as well as testing is implemented at __*exp*__ part, in which codes for different tasks are gathered. These experiments are not only for `TimesNet` training, but also feasible for any other time series representation model. But here, we simply use `TimesNet` to analyse.\n",
+    "\n",
+    "`TimesNet` is a state-of-art in multiple tasks, while here we would only introduce its training for long-term forecast task, since the backbone of the training process for other tasks is similar to this one. Again, test and validation code can be easily understood once you've aware how the training process works. So first of all, we are going to focus on the training of `TimesNet` on task long-term forecasting.\n",
+    "\n",
+    "We will discuss many aspects, including the training process, training loss etc."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.1 Training for Long-term Forecast Task\n",
+    "\n",
+    "The following codes represents the process of training model for long-term forecasting task. We'll have a detailed look at it. To make it brief, the training part can be briefly divided into several parts, including Data Preparation, Creating Save Path, Initialization, Optimizer and Loss Function Selection, Using Mixed Precision Training, Training Loop, Validation and Early Stopping, Learning Rate Adjustment, Loading the Best Model.\n",
+    "\n",
+    "For more details, please see the code below. 'train' process is defined in the experiment  <font color=orange>__class Exp_Long_Term_Forecast__</font>."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train(self, setting):  #setting is the args for this model training\n",
+    "    #get train dataloader\n",
+    "    train_data, train_loader = self._get_data(flag='train')\n",
+    "    vali_data, vali_loader = self._get_data(flag='val')\n",
+    "    test_data, test_loader = self._get_data(flag='test')\n",
+    "\n",
+    "    # set path of checkpoint for saving and loading model\n",
+    "    path = os.path.join(self.args.checkpoints, setting)\n",
+    "    if not os.path.exists(path):\n",
+    "        os.makedirs(path)\n",
+    "    time_now = time.time()\n",
+    "\n",
+    "    train_steps = len(train_loader)\n",
+    "\n",
+    "    # EarlyStopping is typically a custom class or function that monitors the performance \n",
+    "    # of a model during training, usually by tracking a certain metric (commonly validation \n",
+    "    # loss or accuracy).It's a common technique used in deep learning to prevent overfitting \n",
+    "    # during the training\n",
+    "    early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)\n",
+    "\n",
+    "    #Optimizer and Loss Function Selection\n",
+    "    model_optim = self._select_optimizer()\n",
+    "    criterion = self._select_criterion()\n",
+    "\n",
+    "    # AMP training is a technique that uses lower-precision data types (e.g., float16) \n",
+    "    # for certain computations to accelerate training and reduce memory usage.\n",
+    "    if self.args.use_amp:  \n",
+    "        scaler = torch.cuda.amp.GradScaler()\n",
+    "    for epoch in range(self.args.train_epochs):\n",
+    "        iter_count = 0\n",
+    "        train_loss = []\n",
+    "        self.model.train()\n",
+    "        epoch_time = time.time()\n",
+    "\n",
+    "        #begin training in this epoch\n",
+    "        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):\n",
+    "            iter_count += 1\n",
+    "            model_optim.zero_grad()\n",
+    "            batch_x = batch_x.float().to(self.device)  #input features\n",
+    "            batch_y = batch_y.float().to(self.device)  #target features\n",
+    "\n",
+    "            # _mark holds information about time-related features. Specifically, it is a \n",
+    "            # tensor that encodes temporal information and is associated with the \n",
+    "            # input data batch_x.\n",
+    "            batch_x_mark = batch_x_mark.float().to(self.device)\n",
+    "            batch_y_mark = batch_y_mark.float().to(self.device)\n",
+    "            # decoder input(didn't use in TimesNet case)\n",
+    "            dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
+    "            dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
+    "            # encoder - decoder\n",
+    "            if self.args.use_amp: #in the case of TimesNet, use_amp should be False\n",
+    "                with torch.cuda.amp.autocast():\n",
+    "                    # whether to output attention in ecoder,in TimesNet case is no\n",
+    "                    if self.args.output_attention: \n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "                    # model the input\n",
+    "                    else:\n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "\n",
+    "                    # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, \n",
+    "                    # S:univariate predict univariate, MS:multivariate predict univariate'\n",
+    "                    #if multivariate predict univariate',then output should be the last column of the decoder\n",
+    "                    # output, so f_dim = -1 to only contain the last column, else is all columns\n",
+    "                    f_dim = -1 if self.args.features == 'MS' else 0 \n",
+    "                    outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
+    "                    batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
+    "\n",
+    "                    # calc loss\n",
+    "                    loss = criterion(outputs, batch_y)\n",
+    "                    train_loss.append(loss.item())\n",
+    "            else:  #similar to when use_amp is True\n",
+    "                if self.args.output_attention:\n",
+    "                    outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "                else:\n",
+    "                    outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "                f_dim = -1 if self.args.features == 'MS' else 0\n",
+    "                outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
+    "                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
+    "                loss = criterion(outputs, batch_y)\n",
+    "                train_loss.append(loss.item())\n",
+    "\n",
+    "            # When train rounds attain some 100-multiple, print speed, left time, loss. etc feedback\n",
+    "            if (i + 1) % 100 == 0:\n",
+    "                print(\"\\titers: {0}, epoch: {1} | loss: {2:.7f}\".format(i + 1, epoch + 1, loss.item()))\n",
+    "                speed = (time.time() - time_now) / iter_count\n",
+    "                left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)\n",
+    "                print('\\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))\n",
+    "                iter_count = 0\n",
+    "                time_now = time.time()\n",
+    "\n",
+    "            #BP\n",
+    "            if self.args.use_amp:\n",
+    "                scaler.scale(loss).backward()\n",
+    "                scaler.step(model_optim)\n",
+    "                scaler.update()\n",
+    "            else:\n",
+    "                loss.backward()\n",
+    "                model_optim.step()\n",
+    "        \n",
+    "        #This epoch comes to end, print information\n",
+    "        print(\"Epoch: {} cost time: {}\".format(epoch + 1, time.time() - epoch_time))\n",
+    "        train_loss = np.average(train_loss)\n",
+    "\n",
+    "        #run test and validation on current model\n",
+    "        vali_loss = self.vali(vali_data, vali_loader, criterion)\n",
+    "        test_loss = self.vali(test_data, test_loader, criterion)\n",
+    "\n",
+    "        #print train, test, vali loss information\n",
+    "        print(\"Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}\".format(\n",
+    "            epoch + 1, train_steps, train_loss, vali_loss, test_loss))\n",
+    "        \n",
+    "        #Decide whether to trigger Early Stopping. if early_stop is true, it means that \n",
+    "        #this epoch's training is now at a flat slope, so stop further training for this epoch.\n",
+    "        early_stopping(vali_loss, self.model, path)\n",
+    "        if early_stopping.early_stop:\n",
+    "            print(\"Early stopping\")\n",
+    "            break\n",
+    "\n",
+    "        #adjust learning keys\n",
+    "        adjust_learning_rate(model_optim, epoch + 1, self.args)\n",
+    "    best_model_path = path + '/' + 'checkpoint.pth'\n",
+    "\n",
+    "    # loading the trained model's state dictionary from a saved checkpoint file \n",
+    "    # located at best_model_path.\n",
+    "    self.model.load_state_dict(torch.load(best_model_path))\n",
+    "    return self.model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want to learn more, please see it at exp/exp_long_term_forecasting.py"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.2 Early Stopping Mechanism\n",
+    "\n",
+    "<font color=purple>__EarlyStopping__</font> is typically a custom class or function that monitors the performance of a model during training, usually by tracking a certain metric (commonly validation loss or accuracy).It's a common technique used in deep learning to prevent overfitting during the training.\n",
+    "\n",
+    "Let's see the code below(original code is in `tools.py`)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class EarlyStopping:\n",
+    "    def __init__(self, patience=7, verbose=False, delta=0):\n",
+    "        self.patience = patience # how many times will you tolerate for loss not being on decrease\n",
+    "        self.verbose = verbose  # whether to print tip info\n",
+    "        self.counter = 0 # now how many times loss not on decrease\n",
+    "        self.best_score = None\n",
+    "        self.early_stop = False\n",
+    "        self.val_loss_min = np.Inf\n",
+    "        self.delta = delta\n",
+    "\n",
+    "    def __call__(self, val_loss, model, path):\n",
+    "        score = -val_loss\n",
+    "        if self.best_score is None:\n",
+    "            self.best_score = score\n",
+    "            self.save_checkpoint(val_loss, model, path)\n",
+    "\n",
+    "        # meaning: current score is not 'delta' better than best_score, representing that \n",
+    "        # further training may not bring remarkable improvement in loss. \n",
+    "        elif score < self.best_score + self.delta:  \n",
+    "            self.counter += 1\n",
+    "            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')\n",
+    "            # 'No Improvement' times become higher than patience --> Stop Further Training\n",
+    "            if self.counter >= self.patience:\n",
+    "                self.early_stop = True\n",
+    "\n",
+    "        else: #model's loss is still on decrease, save the now best model and go on training\n",
+    "            self.best_score = score\n",
+    "            self.save_checkpoint(val_loss, model, path)\n",
+    "            self.counter = 0\n",
+    "\n",
+    "    def save_checkpoint(self, val_loss, model, path):\n",
+    "    ### used for saving the current best model\n",
+    "        if self.verbose:\n",
+    "            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')\n",
+    "        torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')\n",
+    "        self.val_loss_min = val_loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.3 Optimizer and Criterion\n",
+    "\n",
+    "The optimizer and criterion are defined in <font color=orange>__class Exp_Long_Term_Forecast__</font> and called in the training process by function `self._select_optimizer()` and `self._select_criterion()`. Here, for long-term forecasting task, we simply adopt Adam optimizer and MSELoss to meature the loss between real data and predicted ones."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _select_optimizer(self):\n",
+    "    model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)\n",
+    "    return model_optim\n",
+    "\n",
+    "def _select_criterion(self):\n",
+    "    criterion = nn.MSELoss()\n",
+    "    return criterion"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.4 Automatic Mixed Precision（AMP）\n",
+    "\n",
+    "AMP is a technique used in deep learning to improve training speed and reduce memory usage. AMP achieves this by mixing calculations in half-precision (16-bit floating-point) and single-precision (32-bit floating-point).\n",
+    "\n",
+    "Let's have a closer look on this snippet:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#in forward process:\n",
+    "with torch.cuda.amp.autocast():\n",
+    "\n",
+    "...\n",
+    "\n",
+    "#in BP process:\n",
+    "if self.args.use_amp:\n",
+    "    scaler.scale(loss).backward()\n",
+    "    scaler.step(model_optim)\n",
+    "    scaler.update()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "` with torch.cuda.amp.autocast():` :  The purpose of using torch.cuda.amp.autocast() is to take advantage of the speed and memory efficiency benefits of mixed-precision training while maintaining numerical stability. Some deep learning models can benefit significantly from this technique, especially on modern GPUs with hardware support for half-precision arithmetic. It allows you to perform certain calculations more quickly while still ensuring that critical calculations (e.g., gradient updates) are performed with sufficient precision to avoid loss of accuracy.\n",
+    "\n",
+    "`scaler.scale(loss).backward()`: If AMP is enabled, it uses a scaler object created with torch.cuda.amp.GradScaler() to automatically scale the loss and perform backward propagation. This is a crucial part of AMP, ensuring numerical stability. Before backpropagation, the loss is scaled to an appropriate range to prevent gradients from diverging too quickly or causing numerical instability.\n",
+    "\n",
+    "`scaler.step(model_optim)`: Next, the scaler calls the step method, which applies the scaled gradients to the model's optimizer (model_optim). This is used to update the model's weights to minimize the loss function.\n",
+    "\n",
+    "`scaler.update()`: Finally, the scaler calls the update method, which updates the scaling factor to ensure correct scaling of the loss for the next iteration. This step helps dynamically adjust the scaling of gradients to adapt to different training scenarios."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### 5.5 Learning Rate Adjustment\n",
+    "\n",
+    "While the optimizer are responsible for adapting the learning rate with epochs, we would still like to do some adjustment on it manually, as indicated in the function `adjust_learning_rate(model_optim, epoch + 1, self.args)`, whose codes are shown below(original code is in `tools.py`): "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def adjust_learning_rate(optimizer, epoch, args):\n",
+    "\n",
+    "    #first type: learning rate decrease with epoch by exponential\n",
+    "    if args.lradj == 'type1':\n",
+    "        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}\n",
+    "\n",
+    "    #second type: learning rate decrease manually\n",
+    "    elif args.lradj == 'type2':\n",
+    "        lr_adjust = {\n",
+    "            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,\n",
+    "            10: 5e-7, 15: 1e-7, 20: 5e-8\n",
+    "        }\n",
+    "\n",
+    "    #1st type: update in each epoch\n",
+    "    #2nd type: only update in epochs that are written in Dict lr_adjust\n",
+    "    if epoch in lr_adjust.keys():\n",
+    "        lr = lr_adjust[epoch]\n",
+    "    \n",
+    "        # change the learning rate for different parameter groups within the optimizer\n",
+    "        for param_group in optimizer.param_groups:\n",
+    "            param_group['lr'] = lr\n",
+    "        print('Updating learning rate to {}'.format(lr))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6. Validation and Testing\n",
+    "\n",
+    "During training, the model continuously adjusts its weights and parameters to minimize training error. However, this may not reflect the model's performance on unseen data. Validation allows us to periodically assess the model's performance on data that is different from the training data, providing insights into the model's generalization ability.\n",
+    "\n",
+    "By comparing performance on the validation set, we can identify whether the model is overfitting. Overfitting occurs when a model performs well on training data but poorly on unseen data. Monitoring performance on the validation set helps detect overfitting early and take measures to prevent it, such as early stopping or adjusting hyperparameters.\n",
+    "\n",
+    "Here, we still take long-term forecasting as an example, similar to train process:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def vali(self, vali_data, vali_loader, criterion):\n",
+    "        total_loss = []\n",
+    "\n",
+    "        #evaluation mode\n",
+    "        self.model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):\n",
+    "                batch_x = batch_x.float().to(self.device)\n",
+    "                batch_y = batch_y.float()\n",
+    "\n",
+    "                batch_x_mark = batch_x_mark.float().to(self.device)\n",
+    "                batch_y_mark = batch_y_mark.float().to(self.device)\n",
+    "\n",
+    "                # decoder input\n",
+    "                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
+    "                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
+    "                # encoder - decoder\n",
+    "                if self.args.use_amp:\n",
+    "                    with torch.cuda.amp.autocast():\n",
+    "                        if self.args.output_attention:\n",
+    "                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "                        else:\n",
+    "                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "                else:\n",
+    "                    if self.args.output_attention:\n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "                    else:\n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "                f_dim = -1 if self.args.features == 'MS' else 0\n",
+    "                outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
+    "                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
+    "\n",
+    "                pred = outputs.detach().cpu()\n",
+    "                true = batch_y.detach().cpu()\n",
+    "\n",
+    "                loss = criterion(pred, true)\n",
+    "\n",
+    "                total_loss.append(loss)\n",
+    "        total_loss = np.average(total_loss)\n",
+    "        self.model.train()\n",
+    "        return total_loss"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Testing is similar to validation, but it's purpose is to examine how well the model behaves, so it's common to add some visualization with __matplotlib.pyplot__. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "def visual(true, preds=None, name='./pic/test.pdf'):\n",
+    "    \"\"\"\n",
+    "    Results visualization\n",
+    "    \"\"\"\n",
+    "    plt.figure()\n",
+    "    plt.plot(true, label='GroundTruth', linewidth=2)\n",
+    "    if preds is not None:\n",
+    "        plt.plot(preds, label='Prediction', linewidth=2)\n",
+    "    plt.legend()\n",
+    "    plt.savefig(name, bbox_inches='tight')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def test(self, setting, test=0):\n",
+    "        test_data, test_loader = self._get_data(flag='test')\n",
+    "        if test:\n",
+    "            print('loading model')\n",
+    "            self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))\n",
+    "\n",
+    "        preds = []\n",
+    "        trues = []\n",
+    "        folder_path = './test_results/' + setting + '/'\n",
+    "        if not os.path.exists(folder_path):\n",
+    "            os.makedirs(folder_path)\n",
+    "\n",
+    "        self.model.eval()\n",
+    "        with torch.no_grad():\n",
+    "            for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):\n",
+    "                batch_x = batch_x.float().to(self.device)\n",
+    "                batch_y = batch_y.float().to(self.device)\n",
+    "\n",
+    "                batch_x_mark = batch_x_mark.float().to(self.device)\n",
+    "                batch_y_mark = batch_y_mark.float().to(self.device)\n",
+    "\n",
+    "                # decoder input\n",
+    "                dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()\n",
+    "                dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)\n",
+    "                # encoder - decoder\n",
+    "                if self.args.use_amp:\n",
+    "                    with torch.cuda.amp.autocast():\n",
+    "                        if self.args.output_attention:\n",
+    "                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "                        else:\n",
+    "                            outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "                else:\n",
+    "                    if self.args.output_attention:\n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]\n",
+    "\n",
+    "                    else:\n",
+    "                        outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)\n",
+    "\n",
+    "                f_dim = -1 if self.args.features == 'MS' else 0\n",
+    "                outputs = outputs[:, -self.args.pred_len:, f_dim:]\n",
+    "                batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)\n",
+    "                outputs = outputs.detach().cpu().numpy()\n",
+    "                batch_y = batch_y.detach().cpu().numpy()\n",
+    "\n",
+    "                #inverse the data if scaled\n",
+    "                if test_data.scale and self.args.inverse:\n",
+    "                    outputs = test_data.inverse_transform(outputs)\n",
+    "                    batch_y = test_data.inverse_transform(batch_y)\n",
+    "\n",
+    "                pred = outputs\n",
+    "                true = batch_y\n",
+    "\n",
+    "                preds.append(pred)\n",
+    "                trues.append(true)\n",
+    "\n",
+    "                #visualize one piece of data every 20\n",
+    "                if i % 20 == 0:\n",
+    "                    input = batch_x.detach().cpu().numpy()\n",
+    "                    #the whole sequence\n",
+    "                    gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)\n",
+    "                    pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)\n",
+    "                    visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))\n",
+    "\n",
+    "        preds = np.array(preds)\n",
+    "        trues = np.array(trues)  # shape[batch_num, batch_size, pred_len, features]\n",
+    "        print('test shape:', preds.shape, trues.shape)\n",
+    "        preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])\n",
+    "        trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])\n",
+    "        print('test shape:', preds.shape, trues.shape)\n",
+    "\n",
+    "        # result save\n",
+    "        folder_path = './results/' + setting + '/'\n",
+    "        if not os.path.exists(folder_path):\n",
+    "            os.makedirs(folder_path)\n",
+    "\n",
+    "        mae, mse, rmse, mape, mspe = metric(preds, trues)\n",
+    "        print('mse:{}, mae:{}'.format(mse, mae))\n",
+    "        f = open(\"result_long_term_forecast.txt\", 'a')\n",
+    "        f.write(setting + \"  \\n\")\n",
+    "        f.write('mse:{}, mae:{}'.format(mse, mae))\n",
+    "        f.write('\\n')\n",
+    "        f.write('\\n')\n",
+    "        f.close()\n",
+    "        \n",
+    "        np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))\n",
+    "        np.save(folder_path + 'pred.npy', preds)\n",
+    "        np.save(folder_path + 'true.npy', trues)\n",
+    "\n",
+    "        return\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 7. Dataloader and DataProvider\n",
+    "\n",
+    "In the process of training, we simply take the dataloader for granted, by the function `self._get_data(flag='train')`. So how does this line work? Have a look at the definition(in  <font color=orange>__class Exp_Long_Term_Forecast__</font>):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def _get_data(self, flag):\n",
+    "        data_set, data_loader = data_provider(self.args, flag)\n",
+    "        return data_set, data_loader"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "One step forward, see `data_provider(self.args, flag)`(in `data_factory.py`):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Below are some dataloaders defined in data_loader.py. If you want to add your own data, \n",
+    "# go and check data_loader.py to rewrite a dataloader to fit your data.\n",
+    "data_dict = {\n",
+    "    'ETTh1': Dataset_ETT_hour,\n",
+    "    'ETTh2': Dataset_ETT_hour,\n",
+    "    'ETTm1': Dataset_ETT_minute,\n",
+    "    'ETTm2': Dataset_ETT_minute,\n",
+    "    'custom': Dataset_Custom,\n",
+    "    'm4': Dataset_M4,\n",
+    "    'PSM': PSMSegLoader,\n",
+    "    'MSL': MSLSegLoader,\n",
+    "    'SMAP': SMAPSegLoader,\n",
+    "    'SMD': SMDSegLoader,\n",
+    "    'SWAT': SWATSegLoader,\n",
+    "    'UEA': UEAloader\n",
+    "}\n",
+    "\n",
+    "\n",
+    "def data_provider(args, flag):\n",
+    "    Data = data_dict[args.data]  #data_provider\n",
+    "\n",
+    "    # time features encoding, options:[timeF, fixed, learned]\n",
+    "    timeenc = 0 if args.embed != 'timeF' else 1\n",
+    "\n",
+    "    #test data provider\n",
+    "    if flag == 'test':\n",
+    "        shuffle_flag = False\n",
+    "        drop_last = True\n",
+    "        if args.task_name == 'anomaly_detection' or args.task_name == 'classification':\n",
+    "            batch_size = args.batch_size\n",
+    "\n",
+    "        #Some tasks during the testing phase may require evaluating samples one at a time. \n",
+    "        # This could be due to variations in sample sizes in the test data or because the \n",
+    "        # evaluation process demands finer-grained results or different processing. \n",
+    "        else:\n",
+    "            batch_size = 1  # bsz=1 for evaluation\n",
+    "\n",
+    "        #freq for time features encoding, \n",
+    "        # options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly,\n",
+    "        #  m:monthly], you can also use more detailed freq like 15min or 3h')\n",
+    "        freq = args.freq\n",
+    "    else:\n",
+    "        shuffle_flag = True\n",
+    "        drop_last = True\n",
+    "        batch_size = args.batch_size  # bsz for train and valid\n",
+    "        freq = args.freq\n",
+    "\n",
+    "    if args.task_name == 'anomaly_detection':\n",
+    "        drop_last = False\n",
+    "        data_set = Data(\n",
+    "            root_path=args.root_path, #root path of the data file\n",
+    "            win_size=args.seq_len,    #input sequence length\n",
+    "            flag=flag,\n",
+    "        )\n",
+    "        print(flag, len(data_set))\n",
+    "        data_loader = DataLoader(\n",
+    "            data_set,\n",
+    "            batch_size=batch_size,\n",
+    "            shuffle=shuffle_flag,\n",
+    "            num_workers=args.num_workers,#data loader num workers\n",
+    "            drop_last=drop_last)\n",
+    "        return data_set, data_loader\n",
+    "\n",
+    "    elif args.task_name == 'classification':\n",
+    "        drop_last = False\n",
+    "        data_set = Data(\n",
+    "            root_path=args.root_path,\n",
+    "            flag=flag,\n",
+    "        )\n",
+    "\n",
+    "        data_loader = DataLoader(\n",
+    "            data_set,\n",
+    "            batch_size=batch_size,\n",
+    "            shuffle=shuffle_flag,\n",
+    "            num_workers=args.num_workers,\n",
+    "            drop_last=drop_last,\n",
+    "            collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) \n",
+    "            #define some limits to collate pieces of data into batches\n",
+    "        )\n",
+    "        return data_set, data_loader\n",
+    "    else:\n",
+    "        if args.data == 'm4':\n",
+    "            drop_last = False\n",
+    "        data_set = Data(\n",
+    "            root_path=args.root_path, #eg.  ./data/ETT/\n",
+    "            data_path=args.data_path, #eg.  ETTh1.csv\n",
+    "            flag=flag,\n",
+    "            size=[args.seq_len, args.label_len, args.pred_len],\n",
+    "            features=args.features,   #forecasting task, options:[M, S, MS]; \n",
+    "            # M:multivariate predict multivariate, S:univariate predict univariate,\n",
+    "            # MS:multivariate predict univariate\n",
+    "            \n",
+    "            target=args.target,       #target feature in S or MS task\n",
+    "            timeenc=timeenc,\n",
+    "            freq=freq,\n",
+    "            seasonal_patterns=args.seasonal_patterns\n",
+    "        )\n",
+    "        print(flag, len(data_set))\n",
+    "        data_loader = DataLoader(\n",
+    "            data_set,\n",
+    "            batch_size=batch_size,\n",
+    "            shuffle=shuffle_flag,\n",
+    "            num_workers=args.num_workers,\n",
+    "            drop_last=drop_last)\n",
+    "        return data_set, data_loader\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From above, it's easy to find that data_provider is responsible for collate the dataset into batches according to different tasks and running mode. It passes the parameters to dataloader(`Data`) to instruct it how to manage a data file into pieces of usable data. Then it also generates the final dara_loader by passing the built-up dataset and some other params to the standard class Dataloader. After that, a dataset that fits the need of the model and a enumerable dataloader are generated. \n",
+    "\n",
+    "So how to organize the data file into pieces of data that fits the model? Let's see `data_loader.py`! There are many dataloaders in it, and of course you can write your own dataloader, but here we'll only focus on <font color=orange>__class Dataset_ETT_hour(Dataset)__</font> as an example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Dataset_ETT_hour(Dataset):\n",
+    "        def __init__(self, root_path, flag='train', size=None,\n",
+    "                 features='S', data_path='ETTh1.csv',\n",
+    "                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n",
+    "                ...     \n",
+    "        def __read_data__(self):\n",
+    "                ...     \n",
+    "        def __getitem__(self, index):\n",
+    "                ...\n",
+    "        \n",
+    "        def __len__(self):\n",
+    "                ...\n",
+    "        \n",
+    "        def inverse_transform(self, data):\n",
+    "                ..."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`__init__()`  is the constructor used to initialize various parameters and attributes of the dataset. It takes a series of arguments, including the path to the data file, the dataset's flag (e.g., train, validate, test), dataset size, feature type, target variable, whether to scale the data, time encoding, time frequency, and more. These parameters are used to configure how the dataset is loaded and processed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def __init__(self, root_path, flag='train', size=None,\n",
+    "             features='S', data_path='ETTh1.csv',\n",
+    "             target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):\n",
+    "    # size [seq_len, label_len, pred_len]\n",
+    "    # info\n",
+    "    if size == None:\n",
+    "        self.seq_len = 24 * 4 * 4\n",
+    "        self.label_len = 24 * 4\n",
+    "        self.pred_len = 24 * 4\n",
+    "    else:\n",
+    "        self.seq_len = size[0]\n",
+    "        self.label_len = size[1]\n",
+    "        self.pred_len = size[2]\n",
+    "    # init\n",
+    "    assert flag in ['train', 'test', 'val']\n",
+    "    type_map = {'train': 0, 'val': 1, 'test': 2}\n",
+    "    self.set_type = type_map[flag]\n",
+    "    self.features = features\n",
+    "    self.target = target\n",
+    "    self.scale = scale\n",
+    "    self.timeenc = timeenc\n",
+    "    self.freq = freq\n",
+    "    self.root_path = root_path\n",
+    "    self.data_path = data_path\n",
+    "    \n",
+    "    # After initialization, call __read_data__() to manage the data file.\n",
+    "    self.__read_data__()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The actual process of managing data file into usable data pieces happens in `__read_data__()`, see below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def __read_data__(self):\n",
+    "    self.scaler = StandardScaler()\n",
+    "\n",
+    "    #get raw data from path\n",
+    "    df_raw = pd.read_csv(os.path.join(self.root_path,\n",
+    "                                      self.data_path))\n",
+    "\n",
+    "    # split data set into train, vali, test. border1 is the left border and border2 is the right.\n",
+    "    # Once flag(train, vali, test) is determined, __read_data__ will return certain part of the dataset.\n",
+    "    border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]\n",
+    "    border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]\n",
+    "    border1 = border1s[self.set_type]\n",
+    "    border2 = border2s[self.set_type]\n",
+    "\n",
+    "    #decide which columns to select\n",
+    "    if self.features == 'M' or self.features == 'MS':\n",
+    "        cols_data = df_raw.columns[1:] # column name list (remove 'date')\n",
+    "        df_data = df_raw[cols_data]  #remove the first column, which is time stamp info\n",
+    "    elif self.features == 'S':\n",
+    "        df_data = df_raw[[self.target]] # target column\n",
+    "\n",
+    "    #scale data by the scaler that fits training data\n",
+    "    if self.scale:\n",
+    "        train_data = df_data[border1s[0]:border2s[0]]\n",
+    "        #train_data.values: turn pandas DataFrame into 2D numpy\n",
+    "        self.scaler.fit(train_data.values)  \n",
+    "        data = self.scaler.transform(df_data.values)\n",
+    "    else:\n",
+    "        data = df_data.values \n",
+    "    \n",
+    "    #time stamp:df_stamp is a object of <class 'pandas.core.frame.DataFrame'> and\n",
+    "    # has one column called 'date' like 2016-07-01 00:00:00\n",
+    "    df_stamp = df_raw[['date']][border1:border2]\n",
+    "    \n",
+    "    # Since the date format is uncertain across different data file, we need to \n",
+    "    # standardize it so we call func 'pd.to_datetime'\n",
+    "    df_stamp['date'] = pd.to_datetime(df_stamp.date) \n",
+    "\n",
+    "    if self.timeenc == 0:  #time feature encoding is fixed or learned\n",
+    "        df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)\n",
+    "        df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)\n",
+    "        df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)\n",
+    "        df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)\n",
+    "        #now df_frame has multiple columns recording the month, day etc. time stamp\n",
+    "        # next we delete the 'date' column and turn 'DataFrame' to a list\n",
+    "        data_stamp = df_stamp.drop(['date'], 1).values\n",
+    "\n",
+    "    elif self.timeenc == 1: #time feature encoding is timeF\n",
+    "        '''\n",
+    "         when entering this branch, we choose arg.embed as timeF meaning we want to \n",
+    "         encode the temporal info. 'freq' should be the smallest time step, and has \n",
+    "          options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')\n",
+    "         So you should check the timestep of your data and set 'freq' arg. \n",
+    "         After the time_features encoding, each date info format will be encoded into \n",
+    "         a list, with each element denoting the relative position of this time point\n",
+    "         (e.g. Day of Week, Day of Month, Hour of Day) and each normalized within scope[-0.5, 0.5]\n",
+    "         '''\n",
+    "        data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)\n",
+    "        data_stamp = data_stamp.transpose(1, 0)\n",
+    "        \n",
+    "    \n",
+    "    # data_x and data_y are same copy of a certain part of data\n",
+    "    self.data_x = data[border1:border2]\n",
+    "    self.data_y = data[border1:border2]\n",
+    "    self.data_stamp = data_stamp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`__read_data__()` splits the dataset into 3 parts, selects the needed columns and manages time stamp info. It gives out the well-managed data array for later use. Next, we have to finish the overload of <font color=orange>__class Dataset__</font>, see `__getitem__(self, index)` and `__len__(self)`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def __getitem__(self, index):\n",
+    "    #given an index, calculate the positions after this index to truncate the dataset\n",
+    "    s_begin = index\n",
+    "    s_end = s_begin + self.seq_len\n",
+    "    r_begin = s_end - self.label_len\n",
+    "    r_end = r_begin + self.label_len + self.pred_len\n",
+    "\n",
+    "    #input and output sequence\n",
+    "    seq_x = self.data_x[s_begin:s_end]\n",
+    "    seq_y = self.data_y[r_begin:r_end]\n",
+    "\n",
+    "    #time mark\n",
+    "    seq_x_mark = self.data_stamp[s_begin:s_end]\n",
+    "    seq_y_mark = self.data_stamp[r_begin:r_end]\n",
+    "\n",
+    "    return seq_x, seq_y, seq_x_mark, seq_y_mark\n",
+    "\n",
+    "def __len__(self):\n",
+    "    return len(self.data_x) - self.seq_len - self.pred_len + 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also add an inverse_transform for scaler if needed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def inverse_transform(self, data):\n",
+    "    return self.scaler.inverse_transform(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "By now, we have finished constructing the dataset and dataloader. If you want to construct your own data and run it on the net, you can find proper data and try to accomplish the functions listed above. Here are some widely used datasets in times series analysis.\n",
+    "\n",
+    "![common dataset](./dataset.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 8. Running the Experiment and Visualizing Result\n",
+    "\n",
+    "After managing the data, model well, we need to write a shell script for the experiment. In the script, we need to run `run.py` with several arguments, which is part of the configuration. Here, let's see `TimesNet` on task long-term forecast with dataset ETTh1 for example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model_name=TimesNet\n",
+    "\n",
+    "\n",
+    "python -u run.py \\\n",
+    "  --task_name long_term_forecast \\\n",
+    "  --is_training 1 \\\n",
+    "  --root_path ./dataset/ETT-small/ \\\n",
+    "  --data_path ETTh1.csv \\\n",
+    "  --model_id ETTh1_96_96 \\\n",
+    "  --model $model_name \\\n",
+    "  --data ETTh1 \\\n",
+    "  --features M \\\n",
+    "  --seq_len 96 \\\n",
+    "  --label_len 48 \\\n",
+    "  --pred_len 96 \\\n",
+    "  --e_layers 2 \\\n",
+    "  --d_layers 1 \\\n",
+    "  --factor 3 \\\n",
+    "  --enc_in 7 \\\n",
+    "  --dec_in 7 \\\n",
+    "  --c_out 7 \\\n",
+    "  --d_model 16 \\\n",
+    "  --d_ff 32 \\\n",
+    "  --des 'Exp' \\\n",
+    "  --itr 1 \\\n",
+    "  --top_k 5\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After finishing the shell script, you can run it in shell using bash. For example, you can run the following command, for `TimesNet` ETTh1 long-term forecast:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "shellscript"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "bash ./scripts/long_term_forecast/ETT_script/TimesNet_ETTh1.sh"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here, the bash command may not be successfully implemented due to a lack of proper packages in the environment. If that's the case, simply follow the error information to install the missing package step by step until you achieve success. The sign of a successful experiment running is that information about the experiment is printed out, such as:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "Namespace(task_name='long_term_forecast', is_training=1, model_id='ETTh1_96_96', model='TimesNet', data='ETTh1', root_path='./dataset/ETT-small/', data_path='ETTh1.csv', features='M', target='OT', freq='h', checkpoints='./checkpoints/', seq_len=96, label_len=48, pred_len=96, seasonal_patterns='Monthly', inverse=False, mask_rate=0.25, anomaly_ratio=0.25, top_k=5, num_kernels=6, enc_in=7, dec_in=7, c_out=7, d_model=16, n_heads=8, e_layers=2, d_layers=1, d_ff=32, moving_avg=25, factor=3, distil=True, dropout=0.1, embed='timeF', activation='gelu', output_attention=False, num_workers=10, itr=1, train_epochs=10, batch_size=32, patience=3, learning_rate=0.0001, des='Exp', loss='MSE', lradj='type1', use_amp=False, use_gpu=False, gpu=0, use_multi_gpu=False, devices='0,1,2,3', p_hidden_dims=[128, 128], p_hidden_layers=2)\n",
+    "Use GPU: cuda:0\n",
+    ">>>>>>>start training : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
+    "train 8449\n",
+    "val 2785\n",
+    "test 2785"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then, the model starts training. Once one epoch finishes training, information like below will be printer out："
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "        iters: 100, epoch: 1 | loss: 0.4701951\n",
+    "        speed: 0.2108s/iter; left time: 535.7317s\n",
+    "        iters: 200, epoch: 1 | loss: 0.4496171\n",
+    "        speed: 0.0615s/iter; left time: 150.0223s\n",
+    "Epoch: 1 cost time: 30.09317970275879\n",
+    "Epoch: 1, Steps: 264 | Train Loss: 0.4964185 Vali Loss: 0.8412074 Test Loss: 0.4290483\n",
+    "Validation loss decreased (inf --> 0.841207).  Saving model ...\n",
+    "Updating learning rate to 0.0001"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When all epochs are done, the model steps into testing. The following information about testing will be printed out, giving the MAE and MSE of test."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "vscode": {
+     "languageId": "plaintext"
+    }
+   },
+   "outputs": [],
+   "source": [
+    ">>>>>>>testing : long_term_forecast_ETTh1_96_96_TimesNet_ETTh1_ftM_sl96_ll48_pl96_dm16_nh8_el2_dl1_df32_fc3_ebtimeF_dtTrue_Exp_0<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n",
+    "test 2785\n",
+    "test shape: (2785, 1, 96, 7) (2785, 1, 96, 7)\n",
+    "test shape: (2785, 96, 7) (2785, 96, 7)\n",
+    "mse:0.3890332877635956, mae:0.41201362013816833"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After test finishes, some visible information are already stored in the test_results folder in PDF format. For example:\n",
+    "\n",
+    "![result ETTm1 2440](./result.png)"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorial/conv.png b/tutorial/conv.png
new file mode 100644
index 0000000..97c5172
Binary files /dev/null and b/tutorial/conv.png differ
diff --git a/tutorial/dataset.png b/tutorial/dataset.png
new file mode 100644
index 0000000..baa0cae
Binary files /dev/null and b/tutorial/dataset.png differ
diff --git a/tutorial/fft.png b/tutorial/fft.png
new file mode 100644
index 0000000..1325ba3
Binary files /dev/null and b/tutorial/fft.png differ
diff --git a/tutorial/result.png b/tutorial/result.png
new file mode 100644
index 0000000..0cde789
Binary files /dev/null and b/tutorial/result.png differ
diff --git a/utils/ADFtest.py b/utils/ADFtest.py
new file mode 100644
index 0000000..967f776
--- /dev/null
+++ b/utils/ADFtest.py
@@ -0,0 +1,74 @@
+import pandas as pd
+import numpy as np
+import os
+from statsmodels.tsa.stattools import adfuller
+from arch.unitroot import ADF
+
+def calculate_ADF(root_path,data_path):
+    df_raw = pd.read_csv(os.path.join(root_path,data_path))
+    cols = list(df_raw.columns)
+    cols.remove('date')
+    df_raw = df_raw[cols]
+    adf_list = []
+    for i in cols:
+        df_data = df_raw[i]
+        adf = adfuller(df_data, maxlag = 1)
+        print(adf)
+        adf_list.append(adf)
+    return np.array(adf_list)
+
+def calculate_target_ADF(root_path,data_path,target='OT'):
+    df_raw = pd.read_csv(os.path.join(root_path,data_path))
+    target_cols = target.split(',')
+    # df_data = df_raw[target]
+    df_raw = df_raw[target_cols]
+    adf_list = []
+    for i in target_cols:
+        df_data = df_raw[i]
+        adf = adfuller(df_data, maxlag = 1)
+        # print(adf)
+        adf_list.append(adf)
+    return np.array(adf_list)
+
+def archADF(root_path, data_path):
+    df = pd.read_csv(os.path.join(root_path,data_path))
+    cols = df.columns[1:]
+    stats = 0
+    for target_col in cols:
+        series = df[target_col].values
+        adf = ADF(series)
+        stat = adf.stat
+        stats += stat
+    return stats/len(cols)
+
+if __name__ == '__main__':
+
+    # * Exchange - result: -1.902402344564288 | report: -1.889
+    ADFmetric = archADF(root_path="./dataset/exchange_rate/",data_path="exchange_rate.csv")
+    print("Exchange ADF metric", ADFmetric)
+
+    # * Illness - result: -5.33416661870624 | report: -5.406
+    ADFmetric = archADF(root_path="./dataset/illness/",data_path="national_illness.csv") 
+    print("Illness ADF metric", ADFmetric)
+
+    # * ETTm2 - result: -5.663628743471695 | report: -6.225
+    ADFmetric = archADF(root_path="./dataset/ETT-small/",data_path="ETTm2.csv")
+    print("ETTm2 ADF metric", ADFmetric)
+
+    # * Electricity - result: -8.44485821939281 | report: -8.483
+    ADFmetric = archADF(root_path="./dataset/electricity/",data_path="electricity.csv")
+    print("Electricity ADF metric", ADFmetric)
+
+    # * Traffic - result: -15.020978067839014 | report: -15.046
+    ADFmetric = archADF(root_path="./dataset/traffic/",data_path="traffic.csv")
+    print("Traffic ADF metric", ADFmetric)
+
+    # * Weather - result: -26.681433085204866 | report: -26.661
+    ADFmetric = archADF(root_path="./dataset/weather/",data_path="weather.csv")
+    print("Weather ADF metric", ADFmetric)
+
+
+    # print(ADFmetric)
+
+    # mean_ADFmetric = ADFmetric[:,0].mean()
+    # print(mean_ADFmetric)
\ No newline at end of file
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/utils/augmentation.py b/utils/augmentation.py
new file mode 100644
index 0000000..e574f35
--- /dev/null
+++ b/utils/augmentation.py
@@ -0,0 +1,434 @@
+import numpy as np
+from tqdm import tqdm
+
+def jitter(x, sigma=0.03):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
+
+
+def scaling(x, sigma=0.1):
+    # https://arxiv.org/pdf/1706.00527.pdf
+    factor = np.random.normal(loc=1., scale=sigma, size=(x.shape[0],x.shape[2]))
+    return np.multiply(x, factor[:,np.newaxis,:])
+
+def rotation(x):
+    x = np.array(x)
+    flip = np.random.choice([-1, 1], size=(x.shape[0],x.shape[2]))
+    rotate_axis = np.arange(x.shape[2])
+    np.random.shuffle(rotate_axis)
+    return flip[:,np.newaxis,:] * x[:,:,rotate_axis]
+
+def permutation(x, max_segments=5, seg_mode="equal"):
+    orig_steps = np.arange(x.shape[1])
+    
+    num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
+    
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        if num_segs[i] > 1:
+            if seg_mode == "random":
+                split_points = np.random.choice(x.shape[1]-2, num_segs[i]-1, replace=False)
+                split_points.sort()
+                splits = np.split(orig_steps, split_points)
+            else:
+                splits = np.array_split(orig_steps, num_segs[i])
+            warp = np.concatenate(np.random.permutation(splits)).ravel()
+            # ? Question: What is the point of making segments?
+            # for i in range(len(splits)):
+            #     permute = np.random.permutation(splits[i])
+
+
+            ret[i] = pat[warp]
+        else:
+            ret[i] = pat
+    return ret
+
+def magnitude_warp(x, sigma=0.2, knot=4):
+    from scipy.interpolate import CubicSpline
+    orig_steps = np.arange(x.shape[1])
+    
+    random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2]))
+    warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        warper = np.array([CubicSpline(warp_steps[:,dim], random_warps[i,:,dim])(orig_steps) for dim in range(x.shape[2])]).T
+        ret[i] = pat * warper
+
+    return ret
+
+def time_warp(x, sigma=0.2, knot=4):
+    from scipy.interpolate import CubicSpline
+    orig_steps = np.arange(x.shape[1])
+    
+    random_warps = np.random.normal(loc=1.0, scale=sigma, size=(x.shape[0], knot+2, x.shape[2]))
+    warp_steps = (np.ones((x.shape[2],1))*(np.linspace(0, x.shape[1]-1., num=knot+2))).T
+    
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            time_warp = CubicSpline(warp_steps[:,dim], warp_steps[:,dim] * random_warps[i,:,dim])(orig_steps)
+            scale = (x.shape[1]-1)/time_warp[-1]
+            ret[i,:,dim] = np.interp(orig_steps, np.clip(scale*time_warp, 0, x.shape[1]-1), pat[:,dim]).T
+    return ret
+
+def window_slice(x, reduce_ratio=0.9):
+    # https://halshs.archives-ouvertes.fr/halshs-01357973/document
+    target_len = np.ceil(reduce_ratio*x.shape[1]).astype(int)
+    if target_len >= x.shape[1]:
+        return x
+    starts = np.random.randint(low=0, high=x.shape[1]-target_len, size=(x.shape[0])).astype(int)
+    ends = (target_len + starts).astype(int)
+    
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            ret[i,:,dim] = np.interp(np.linspace(0, target_len, num=x.shape[1]), np.arange(target_len), pat[starts[i]:ends[i],dim]).T
+    return ret
+
+def window_warp(x, window_ratio=0.1, scales=[0.5, 2.]):
+    # https://halshs.archives-ouvertes.fr/halshs-01357973/document
+    warp_scales = np.random.choice(scales, x.shape[0])
+    warp_size = np.ceil(window_ratio*x.shape[1]).astype(int)
+    window_steps = np.arange(warp_size)
+        
+    window_starts = np.random.randint(low=1, high=x.shape[1]-warp_size-1, size=(x.shape[0])).astype(int)
+    window_ends = (window_starts + warp_size).astype(int)
+            
+    ret = np.zeros_like(x)
+    for i, pat in enumerate(x):
+        for dim in range(x.shape[2]):
+            start_seg = pat[:window_starts[i],dim]
+            window_seg = np.interp(np.linspace(0, warp_size-1, num=int(warp_size*warp_scales[i])), window_steps, pat[window_starts[i]:window_ends[i],dim])
+            end_seg = pat[window_ends[i]:,dim]
+            warped = np.concatenate((start_seg, window_seg, end_seg))                
+            ret[i,:,dim] = np.interp(np.arange(x.shape[1]), np.linspace(0, x.shape[1]-1., num=warped.size), warped).T
+    return ret
+
+def spawner(x, labels, sigma=0.05, verbose=0):
+    # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6983028/
+    # use verbose=-1 to turn off warnings
+    # use verbose=1 to print out figures
+    
+    import utils.dtw as dtw
+    random_points = np.random.randint(low=1, high=x.shape[1]-1, size=x.shape[0])
+    window = np.ceil(x.shape[1] / 10.).astype(int)
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+    
+    ret = np.zeros_like(x)
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+        # remove ones of different classes
+        choices = np.where(l[choices] == l[i])[0]
+        if choices.size > 0:     
+            random_sample = x[np.random.choice(choices)]
+            # SPAWNER splits the path into two randomly
+            path1 = dtw.dtw(pat[:random_points[i]], random_sample[:random_points[i]], dtw.RETURN_PATH, slope_constraint="symmetric", window=window)
+            path2 = dtw.dtw(pat[random_points[i]:], random_sample[random_points[i]:], dtw.RETURN_PATH, slope_constraint="symmetric", window=window)
+            combined = np.concatenate((np.vstack(path1), np.vstack(path2+random_points[i])), axis=1)
+            if verbose:
+                # print(random_points[i])
+                dtw_value, cost, DTW_map, path = dtw.dtw(pat, random_sample, return_flag = dtw.RETURN_ALL, slope_constraint=slope_constraint, window=window)
+                dtw.draw_graph1d(cost, DTW_map, path, pat, random_sample)
+                dtw.draw_graph1d(cost, DTW_map, combined, pat, random_sample)
+            mean = np.mean([pat[combined[0]], random_sample[combined[1]]], axis=0)
+            for dim in range(x.shape[2]):
+                ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=mean.shape[0]), mean[:,dim]).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
+            ret[i,:] = pat
+    return jitter(ret, sigma=sigma)
+
+def wdba(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, verbose=0):
+    # https://ieeexplore.ieee.org/document/8215569
+    # use verbose = -1 to turn off warnings    
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    x = np.array(x)
+    import utils.dtw as dtw
+    
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+        
+    ret = np.zeros_like(x)
+    # for i in tqdm(range(ret.shape[0])):
+    for i in range(ret.shape[0]):
+        # get the same class as i
+        choices = np.where(l == l[i])[0]
+        if choices.size > 0:        
+            # pick random intra-class pattern
+            k = min(choices.size, batch_size)
+            random_prototypes = x[np.random.choice(choices, k, replace=False)]
+            
+            # calculate dtw between all
+            dtw_matrix = np.zeros((k, k))
+            for p, prototype in enumerate(random_prototypes):
+                for s, sample in enumerate(random_prototypes):
+                    if p == s:
+                        dtw_matrix[p, s] = 0.
+                    else:
+                        dtw_matrix[p, s] = dtw.dtw(prototype, sample, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
+                        
+            # get medoid
+            medoid_id = np.argsort(np.sum(dtw_matrix, axis=1))[0]
+            nearest_order = np.argsort(dtw_matrix[medoid_id])
+            medoid_pattern = random_prototypes[medoid_id]
+            
+            # start weighted DBA
+            average_pattern = np.zeros_like(medoid_pattern)
+            weighted_sums = np.zeros((medoid_pattern.shape[0]))
+            for nid in nearest_order:
+                if nid == medoid_id or dtw_matrix[medoid_id, nearest_order[1]] == 0.:
+                    average_pattern += medoid_pattern
+                    weighted_sums += np.ones_like(weighted_sums) 
+                else:
+                    path = dtw.dtw(medoid_pattern, random_prototypes[nid], dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
+                    dtw_value = dtw_matrix[medoid_id, nid]
+                    warped = random_prototypes[nid, path[1]]
+                    weight = np.exp(np.log(0.5)*dtw_value/dtw_matrix[medoid_id, nearest_order[1]])
+                    average_pattern[path[0]] += weight * warped
+                    weighted_sums[path[0]] += weight 
+            
+            ret[i,:] = average_pattern / weighted_sums[:,np.newaxis]
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping pattern average".format(l[i]))
+            ret[i,:] = x[i]
+    return ret
+
+# Proposed
+
+def random_guided_warp(x, labels, slope_constraint="symmetric", use_window=True, dtw_type="normal", verbose=0):
+    # use verbose = -1 to turn off warnings
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    # dtw_type is for shapeDTW or DTW. "normal" or "shape"
+    
+    import utils.dtw as dtw
+    
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+    
+    ret = np.zeros_like(x)
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+        # remove ones of different classes
+        choices = np.where(l[choices] == l[i])[0]
+        if choices.size > 0:        
+            # pick random intra-class pattern
+            random_prototype = x[np.random.choice(choices)]
+            
+            if dtw_type == "shape":
+                path = dtw.shape_dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
+            else:
+                path = dtw.dtw(random_prototype, pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
+                            
+            # Time warp
+            warped = pat[path[1]]
+            for dim in range(x.shape[2]):
+                ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}, skipping timewarping".format(l[i]))
+            ret[i,:] = pat
+    return ret
+
+def random_guided_warp_shape(x, labels, slope_constraint="symmetric", use_window=True):
+    return random_guided_warp(x, labels, slope_constraint, use_window, dtw_type="shape")
+
+def discriminative_guided_warp(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True, dtw_type="normal", use_variable_slice=True, verbose=0):
+    # use verbose = -1 to turn off warnings
+    # slope_constraint is for DTW. "symmetric" or "asymmetric"
+    # dtw_type is for shapeDTW or DTW. "normal" or "shape"
+    
+    import utils.dtw as dtw
+    
+    if use_window:
+        window = np.ceil(x.shape[1] / 10.).astype(int)
+    else:
+        window = None
+    orig_steps = np.arange(x.shape[1])
+    l = np.argmax(labels, axis=1) if labels.ndim > 1 else labels
+    
+    positive_batch = np.ceil(batch_size / 2).astype(int)
+    negative_batch = np.floor(batch_size / 2).astype(int)
+        
+    ret = np.zeros_like(x)
+    warp_amount = np.zeros(x.shape[0])
+    # for i, pat in enumerate(tqdm(x)):
+    for i, pat in enumerate(x):
+        # guarentees that same one isnt selected
+        choices = np.delete(np.arange(x.shape[0]), i)
+        
+        # remove ones of different classes
+        positive = np.where(l[choices] == l[i])[0]
+        negative = np.where(l[choices] != l[i])[0]
+        
+        if positive.size > 0 and negative.size > 0:
+            pos_k = min(positive.size, positive_batch)
+            neg_k = min(negative.size, negative_batch)
+            positive_prototypes = x[np.random.choice(positive, pos_k, replace=False)]
+            negative_prototypes = x[np.random.choice(negative, neg_k, replace=False)]
+                        
+            # vector embedding and nearest prototype in one
+            pos_aves = np.zeros((pos_k))
+            neg_aves = np.zeros((pos_k))
+            if dtw_type == "shape":
+                for p, pos_prot in enumerate(positive_prototypes):
+                    for ps, pos_samp in enumerate(positive_prototypes):
+                        if p != ps:
+                            pos_aves[p] += (1./(pos_k-1.))*dtw.shape_dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
+                    for ns, neg_samp in enumerate(negative_prototypes):
+                        neg_aves[p] += (1./neg_k)*dtw.shape_dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
+                selected_id = np.argmax(neg_aves - pos_aves)
+                path = dtw.shape_dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
+            else:
+                for p, pos_prot in enumerate(positive_prototypes):
+                    for ps, pos_samp in enumerate(positive_prototypes):
+                        if p != ps:
+                            pos_aves[p] += (1./(pos_k-1.))*dtw.dtw(pos_prot, pos_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
+                    for ns, neg_samp in enumerate(negative_prototypes):
+                        neg_aves[p] += (1./neg_k)*dtw.dtw(pos_prot, neg_samp, dtw.RETURN_VALUE, slope_constraint=slope_constraint, window=window)
+                selected_id = np.argmax(neg_aves - pos_aves)
+                path = dtw.dtw(positive_prototypes[selected_id], pat, dtw.RETURN_PATH, slope_constraint=slope_constraint, window=window)
+                   
+            # Time warp
+            warped = pat[path[1]]
+            warp_path_interp = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), path[1])
+            warp_amount[i] = np.sum(np.abs(orig_steps-warp_path_interp))
+            for dim in range(x.shape[2]):
+                ret[i,:,dim] = np.interp(orig_steps, np.linspace(0, x.shape[1]-1., num=warped.shape[0]), warped[:,dim]).T
+        else:
+            # if verbose > -1:
+            #     print("There is only one pattern of class {}".format(l[i]))
+            ret[i,:] = pat
+            warp_amount[i] = 0.
+    if use_variable_slice:
+        max_warp = np.max(warp_amount)
+        if max_warp == 0:
+            # unchanged
+            ret = window_slice(ret, reduce_ratio=0.9)
+        else:
+            for i, pat in enumerate(ret):
+                # Variable Sllicing
+                ret[i] = window_slice(pat[np.newaxis,:,:], reduce_ratio=0.9+0.1*warp_amount[i]/max_warp)[0]
+    return ret
+
+def discriminative_guided_warp_shape(x, labels, batch_size=6, slope_constraint="symmetric", use_window=True):
+    return discriminative_guided_warp(x, labels, batch_size, slope_constraint, use_window, dtw_type="shape")
+
+
+def run_augmentation(x, y, args):
+    print("Augmenting %s"%args.data)
+    np.random.seed(args.seed)
+    x_aug = x
+    y_aug = y
+    if args.augmentation_ratio > 0:
+        augmentation_tags = "%d"%args.augmentation_ratio
+        for n in range(args.augmentation_ratio):
+            x_temp, augmentation_tags = augment(x, y, args)
+            x_aug = np.append(x_aug, x_temp, axis=0)
+            y_aug = np.append(y_aug, y, axis=0)
+            print("Round %d: %s done"%(n, augmentation_tags))
+        if args.extra_tag:
+            augmentation_tags += "_"+args.extra_tag
+    else:
+        augmentation_tags = args.extra_tag
+    return x_aug, y_aug, augmentation_tags
+
+def run_augmentation_single(x, y, args):
+    # print("Augmenting %s"%args.data)
+    np.random.seed(args.seed)
+
+    x_aug = x
+    y_aug = y
+
+
+    if len(x.shape)<3:
+        # Augmenting on the entire series: using the input data as "One Big Batch"
+        #   Before  -   (sequence_length, num_channels)
+        #   After   -   (1, sequence_length, num_channels)
+        # Note: the 'sequence_length' here is actually the length of the entire series
+        x_input = x[np.newaxis,:]
+    elif len(x.shape)==3:
+        # Augmenting on the batch series: keep current dimension (batch_size, sequence_length, num_channels)
+        x_input = x
+    else:
+        raise ValueError("Input must be (batch_size, sequence_length, num_channels) dimensional")
+
+    if args.augmentation_ratio > 0:
+        augmentation_tags = "%d"%args.augmentation_ratio
+        for n in range(args.augmentation_ratio):
+            x_aug, augmentation_tags = augment(x_input, y, args)
+            # print("Round %d: %s done"%(n, augmentation_tags))
+        if args.extra_tag:
+            augmentation_tags += "_"+args.extra_tag
+    else:
+        augmentation_tags = args.extra_tag
+
+    if(len(x.shape)<3):
+        # Reverse to two-dimensional in whole series augmentation scenario
+        x_aug = x_aug.squeeze(0)
+    return x_aug, y_aug, augmentation_tags
+
+
+def augment(x, y, args):
+    import utils.augmentation as aug
+    augmentation_tags = ""
+    if args.jitter:
+        x = aug.jitter(x)
+        augmentation_tags += "_jitter"
+    if args.scaling:
+        x = aug.scaling(x)
+        augmentation_tags += "_scaling"
+    if args.rotation:
+        x = aug.rotation(x)
+        augmentation_tags += "_rotation"
+    if args.permutation:
+        x = aug.permutation(x)
+        augmentation_tags += "_permutation"
+    if args.randompermutation:
+        x = aug.permutation(x, seg_mode="random")
+        augmentation_tags += "_randomperm"
+    if args.magwarp:
+        x = aug.magnitude_warp(x)
+        augmentation_tags += "_magwarp"
+    if args.timewarp:
+        x = aug.time_warp(x)
+        augmentation_tags += "_timewarp"
+    if args.windowslice:
+        x = aug.window_slice(x)
+        augmentation_tags += "_windowslice"
+    if args.windowwarp:
+        x = aug.window_warp(x)
+        augmentation_tags += "_windowwarp"
+    if args.spawner:
+        x = aug.spawner(x, y)
+        augmentation_tags += "_spawner"
+    if args.dtwwarp:
+        x = aug.random_guided_warp(x, y)
+        augmentation_tags += "_rgw"
+    if args.shapedtwwarp:
+        x = aug.random_guided_warp_shape(x, y)
+        augmentation_tags += "_rgws"
+    if args.wdba:
+        x = aug.wdba(x, y)
+        augmentation_tags += "_wdba"
+    if args.discdtw:
+        x = aug.discriminative_guided_warp(x, y)
+        augmentation_tags += "_dgw"
+    if args.discsdtw:
+        x = aug.discriminative_guided_warp_shape(x, y)
+        augmentation_tags += "_dgws"
+    return x, augmentation_tags
diff --git a/utils/dtw.py b/utils/dtw.py
new file mode 100644
index 0000000..941eae8
--- /dev/null
+++ b/utils/dtw.py
@@ -0,0 +1,223 @@
+__author__ = 'Brian Iwana'
+
+import numpy as np
+import math
+import sys
+
+RETURN_VALUE = 0
+RETURN_PATH = 1
+RETURN_ALL = -1
+
+# Core DTW
+def _traceback(DTW, slope_constraint):
+    i, j = np.array(DTW.shape) - 1
+    p, q = [i-1], [j-1]
+    
+    if slope_constraint == "asymmetric":
+        while (i > 1):
+            tb = np.argmin((DTW[i-1, j], DTW[i-1, j-1], DTW[i-1, j-2]))
+
+            if (tb == 0):
+                i = i - 1
+            elif (tb == 1):
+                i = i - 1
+                j = j - 1
+            elif (tb == 2):
+                i = i - 1
+                j = j - 2
+
+            p.insert(0, i-1)
+            q.insert(0, j-1)
+    elif slope_constraint == "symmetric":
+        while (i > 1 or j > 1):
+            tb = np.argmin((DTW[i-1, j-1], DTW[i-1, j], DTW[i, j-1]))
+
+            if (tb == 0):
+                i = i - 1
+                j = j - 1
+            elif (tb == 1):
+                i = i - 1
+            elif (tb == 2):
+                j = j - 1
+
+            p.insert(0, i-1)
+            q.insert(0, j-1)
+    else:
+        sys.exit("Unknown slope constraint %s"%slope_constraint)
+        
+    return (np.array(p), np.array(q))
+
+def dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None):
+    """ Computes the DTW of two sequences.
+    :param prototype: np array [0..b]
+    :param sample: np array [0..t]
+    :param extended: bool
+    """
+    p = prototype.shape[0]
+    assert p != 0, "Prototype empty!"
+    s = sample.shape[0]
+    assert s != 0, "Sample empty!"
+    
+    if window is None:
+        window = s
+    
+    cost = np.full((p, s), np.inf)
+    for i in range(p):
+        start = max(0, i-window)
+        end = min(s, i+window)+1
+        cost[i,start:end]=np.linalg.norm(sample[start:end] - prototype[i], axis=1)
+
+    DTW = _cummulative_matrix(cost, slope_constraint, window)
+        
+    if return_flag == RETURN_ALL:
+        return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint)
+    elif return_flag == RETURN_PATH:
+        return _traceback(DTW, slope_constraint)
+    else:
+        return DTW[-1,-1]
+
+def _cummulative_matrix(cost, slope_constraint, window):
+    p = cost.shape[0]
+    s = cost.shape[1]
+    
+    # Note: DTW is one larger than cost and the original patterns
+    DTW = np.full((p+1, s+1), np.inf)
+
+    DTW[0, 0] = 0.0
+
+    if slope_constraint == "asymmetric":
+        for i in range(1, p+1):
+            if i <= window+1:
+                DTW[i,1] = cost[i-1,0] + min(DTW[i-1,0], DTW[i-1,1])
+            for j in range(max(2, i-window), min(s, i+window)+1):
+                DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-2], DTW[i-1,j-1], DTW[i-1,j])
+    elif slope_constraint == "symmetric":
+        for i in range(1, p+1):
+            for j in range(max(1, i-window), min(s, i+window)+1):
+                DTW[i,j] = cost[i-1,j-1] + min(DTW[i-1,j-1], DTW[i,j-1], DTW[i-1,j])
+    else:
+        sys.exit("Unknown slope constraint %s"%slope_constraint)
+        
+    return DTW
+
+def shape_dtw(prototype, sample, return_flag = RETURN_VALUE, slope_constraint="asymmetric", window=None, descr_ratio=0.05):
+    """ Computes the shapeDTW of two sequences.
+    :param prototype: np array [0..b]
+    :param sample: np array [0..t]
+    :param extended: bool
+    """
+    # shapeDTW
+    # https://www.sciencedirect.com/science/article/pii/S0031320317303710
+    
+    p = prototype.shape[0]
+    assert p != 0, "Prototype empty!"
+    s = sample.shape[0]
+    assert s != 0, "Sample empty!"
+    
+    if window is None:
+        window = s
+        
+    p_feature_len = np.clip(np.round(p * descr_ratio), 5, 100).astype(int)
+    s_feature_len = np.clip(np.round(s * descr_ratio), 5, 100).astype(int)
+    
+    # padding
+    p_pad_front = (np.ceil(p_feature_len / 2.)).astype(int)
+    p_pad_back = (np.floor(p_feature_len / 2.)).astype(int)
+    s_pad_front = (np.ceil(s_feature_len / 2.)).astype(int)
+    s_pad_back = (np.floor(s_feature_len / 2.)).astype(int)
+    
+    prototype_pad = np.pad(prototype, ((p_pad_front, p_pad_back), (0, 0)), mode="edge") 
+    sample_pad = np.pad(sample, ((s_pad_front, s_pad_back), (0, 0)), mode="edge") 
+    p_p = prototype_pad.shape[0]
+    s_p = sample_pad.shape[0]
+        
+    cost = np.full((p, s), np.inf)
+    for i in range(p):
+        for j in range(max(0, i-window), min(s, i+window)):
+            cost[i, j] = np.linalg.norm(sample_pad[j:j+s_feature_len] - prototype_pad[i:i+p_feature_len])
+            
+    DTW = _cummulative_matrix(cost, slope_constraint=slope_constraint, window=window)
+    
+    if return_flag == RETURN_ALL:
+        return DTW[-1,-1], cost, DTW[1:,1:], _traceback(DTW, slope_constraint)
+    elif return_flag == RETURN_PATH:
+        return _traceback(DTW, slope_constraint)
+    else:
+        return DTW[-1,-1]
+    
+# Draw helpers
+def draw_graph2d(cost, DTW, path, prototype, sample):
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(12, 8))
+   # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
+
+    #cost
+    plt.subplot(2, 3, 1)
+    plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
+    plt.plot(path[0], path[1], 'y')
+    plt.xlim((-0.5, cost.shape[0]-0.5))
+    plt.ylim((-0.5, cost.shape[0]-0.5))
+
+    #dtw
+    plt.subplot(2, 3, 2)
+    plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
+    plt.plot(path[0]+1, path[1]+1, 'y')
+    plt.xlim((-0.5, DTW.shape[0]-0.5))
+    plt.ylim((-0.5, DTW.shape[0]-0.5))
+
+    #prototype
+    plt.subplot(2, 3, 4)
+    plt.plot(prototype[:,0], prototype[:,1], 'b-o')
+
+    #connection
+    plt.subplot(2, 3, 5)
+    for i in range(0,path[0].shape[0]):
+        plt.plot([prototype[path[0][i],0], sample[path[1][i],0]],[prototype[path[0][i],1], sample[path[1][i],1]], 'y-')
+    plt.plot(sample[:,0], sample[:,1], 'g-o')
+    plt.plot(prototype[:,0], prototype[:,1], 'b-o')
+
+    #sample
+    plt.subplot(2, 3, 6)
+    plt.plot(sample[:,0], sample[:,1], 'g-o')
+
+    plt.tight_layout()
+    plt.show()
+
+def draw_graph1d(cost, DTW, path, prototype, sample):
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=(12, 8))
+   # plt.subplots_adjust(left=.02, right=.98, bottom=.001, top=.96, wspace=.05, hspace=.01)
+    p_steps = np.arange(prototype.shape[0])
+    s_steps = np.arange(sample.shape[0])
+
+    #cost
+    plt.subplot(2, 3, 1)
+    plt.imshow(cost.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
+    plt.plot(path[0], path[1], 'y')
+    plt.xlim((-0.5, cost.shape[0]-0.5))
+    plt.ylim((-0.5, cost.shape[0]-0.5))
+
+    #dtw
+    plt.subplot(2, 3, 2)
+    plt.imshow(DTW.T, cmap=plt.cm.gray, interpolation='none', origin='lower')
+    plt.plot(path[0]+1, path[1]+1, 'y')
+    plt.xlim((-0.5, DTW.shape[0]-0.5))
+    plt.ylim((-0.5, DTW.shape[0]-0.5))
+
+    #prototype
+    plt.subplot(2, 3, 4)
+    plt.plot(p_steps, prototype[:,0], 'b-o')
+
+    #connection
+    plt.subplot(2, 3, 5)
+    for i in range(0,path[0].shape[0]):
+        plt.plot([path[0][i], path[1][i]],[prototype[path[0][i],0], sample[path[1][i],0]], 'y-')
+    plt.plot(p_steps, sample[:,0], 'g-o')
+    plt.plot(s_steps, prototype[:,0], 'b-o')
+
+    #sample
+    plt.subplot(2, 3, 6)
+    plt.plot(s_steps, sample[:,0], 'g-o')
+
+    plt.tight_layout()
+    plt.show()
\ No newline at end of file
diff --git a/utils/dtw_metric.py b/utils/dtw_metric.py
new file mode 100644
index 0000000..5ab39bf
--- /dev/null
+++ b/utils/dtw_metric.py
@@ -0,0 +1,156 @@
+from numpy import array, zeros, full, argmin, inf, ndim
+from scipy.spatial.distance import cdist
+from math import isinf
+
+
+def dtw(x, y, dist, warp=1, w=inf, s=1.0):
+    """
+    Computes Dynamic Time Warping (DTW) of two sequences.
+
+    :param array x: N1*M array
+    :param array y: N2*M array
+    :param func dist: distance used as cost measure
+    :param int warp: how many shifts are computed.
+    :param int w: window size limiting the maximal distance between indices of matched entries |i,j|.
+    :param float s: weight applied on off-diagonal moves of the path. As s gets larger, the warping path is increasingly biased towards the diagonal
+    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
+    """
+    assert len(x)
+    assert len(y)
+    assert isinf(w) or (w >= abs(len(x) - len(y)))
+    assert s > 0
+    r, c = len(x), len(y)
+    if not isinf(w):
+        D0 = full((r + 1, c + 1), inf)
+        for i in range(1, r + 1):
+            D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0
+        D0[0, 0] = 0
+    else:
+        D0 = zeros((r + 1, c + 1))
+        D0[0, 1:] = inf
+        D0[1:, 0] = inf
+    D1 = D0[1:, 1:]  # view
+    for i in range(r):
+        for j in range(c):
+            if (isinf(w) or (max(0, i - w) <= j <= min(c, i + w))):
+                D1[i, j] = dist(x[i], y[j])
+    C = D1.copy()
+    jrange = range(c)
+    for i in range(r):
+        if not isinf(w):
+            jrange = range(max(0, i - w), min(c, i + w + 1))
+        for j in jrange:
+            min_list = [D0[i, j]]
+            for k in range(1, warp + 1):
+                i_k = min(i + k, r)
+                j_k = min(j + k, c)
+                min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
+            D1[i, j] += min(min_list)
+    if len(x) == 1:
+        path = zeros(len(y)), range(len(y))
+    elif len(y) == 1:
+        path = range(len(x)), zeros(len(x))
+    else:
+        path = _traceback(D0)
+    return D1[-1, -1], C, D1, path
+
+
+def accelerated_dtw(x, y, dist, warp=1):
+    """
+    Computes Dynamic Time Warping (DTW) of two sequences in a faster way.
+    Instead of iterating through each element and calculating each distance,
+    this uses the cdist function from scipy (https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.cdist.html)
+
+    :param array x: N1*M array
+    :param array y: N2*M array
+    :param string or func dist: distance parameter for cdist. When string is given, cdist uses optimized functions for the distance metrics.
+    If a string is passed, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
+    :param int warp: how many shifts are computed.
+    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
+    """
+    assert len(x)
+    assert len(y)
+    if ndim(x) == 1:
+        x = x.reshape(-1, 1)
+    if ndim(y) == 1:
+        y = y.reshape(-1, 1)
+    r, c = len(x), len(y)
+    D0 = zeros((r + 1, c + 1))
+    D0[0, 1:] = inf
+    D0[1:, 0] = inf
+    D1 = D0[1:, 1:]
+    D0[1:, 1:] = cdist(x, y, dist)
+    C = D1.copy()
+    for i in range(r):
+        for j in range(c):
+            min_list = [D0[i, j]]
+            for k in range(1, warp + 1):
+                min_list += [D0[min(i + k, r), j],
+                             D0[i, min(j + k, c)]]
+            D1[i, j] += min(min_list)
+    if len(x) == 1:
+        path = zeros(len(y)), range(len(y))
+    elif len(y) == 1:
+        path = range(len(x)), zeros(len(x))
+    else:
+        path = _traceback(D0)
+    return D1[-1, -1], C, D1, path
+
+
+def _traceback(D):
+    i, j = array(D.shape) - 2
+    p, q = [i], [j]
+    while (i > 0) or (j > 0):
+        tb = argmin((D[i, j], D[i, j + 1], D[i + 1, j]))
+        if tb == 0:
+            i -= 1
+            j -= 1
+        elif tb == 1:
+            i -= 1
+        else:  # (tb == 2):
+            j -= 1
+        p.insert(0, i)
+        q.insert(0, j)
+    return array(p), array(q)
+
+
+if __name__ == '__main__':
+    w = inf
+    s = 1.0
+    if 1:  # 1-D numeric
+        from sklearn.metrics.pairwise import manhattan_distances
+        x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0]
+        y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0]
+        dist_fun = manhattan_distances
+        w = 1
+        # s = 1.2
+    elif 0:  # 2-D numeric
+        from sklearn.metrics.pairwise import euclidean_distances
+        x = [[0, 0], [0, 1], [1, 1], [1, 2], [2, 2], [4, 3], [2, 3], [1, 1], [2, 2], [0, 1]]
+        y = [[1, 0], [1, 1], [1, 1], [2, 1], [4, 3], [4, 3], [2, 3], [3, 1], [1, 2], [1, 0]]
+        dist_fun = euclidean_distances
+    else:  # 1-D list of strings
+        from nltk.metrics.distance import edit_distance
+        # x = ['we', 'shelled', 'clams', 'for', 'the', 'chowder']
+        # y = ['class', 'too']
+        x = ['i', 'soon', 'found', 'myself', 'muttering', 'to', 'the', 'walls']
+        y = ['see', 'drown', 'himself']
+        # x = 'we talked about the situation'.split()
+        # y = 'we talked about the situation'.split()
+        dist_fun = edit_distance
+    dist, cost, acc, path = dtw(x, y, dist_fun, w=w, s=s)
+
+    # Vizualize
+    from matplotlib import pyplot as plt
+    plt.imshow(cost.T, origin='lower', cmap=plt.cm.Reds, interpolation='nearest')
+    plt.plot(path[0], path[1], '-o')  # relation
+    plt.xticks(range(len(x)), x)
+    plt.yticks(range(len(y)), y)
+    plt.xlabel('x')
+    plt.ylabel('y')
+    plt.axis('tight')
+    if isinf(w):
+        plt.title('Minimum distance: {}, slope weight: {}'.format(dist, s))
+    else:
+        plt.title('Minimum distance: {}, window widht: {}, slope weight: {}'.format(dist, w, s))
+    plt.show()
\ No newline at end of file
diff --git a/utils/losses.py b/utils/losses.py
new file mode 100644
index 0000000..21438e7
--- /dev/null
+++ b/utils/losses.py
@@ -0,0 +1,89 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright © 2020 Element AI Inc. All rights reserved.
+
+"""
+Loss functions for PyTorch.
+"""
+
+import torch as t
+import torch.nn as nn
+import numpy as np
+import pdb
+
+
+def divide_no_nan(a, b):
+    """
+    a/b where the resulted NaN or Inf are replaced by 0.
+    """
+    result = a / b
+    result[result != result] = .0
+    result[result == np.inf] = .0
+    return result
+
+
+class mape_loss(nn.Module):
+    def __init__(self):
+        super(mape_loss, self).__init__()
+
+    def forward(self, insample: t.Tensor, freq: int,
+                forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
+        """
+        MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
+
+        :param forecast: Forecast values. Shape: batch, time
+        :param target: Target values. Shape: batch, time
+        :param mask: 0/1 mask. Shape: batch, time
+        :return: Loss value
+        """
+        weights = divide_no_nan(mask, target)
+        return t.mean(t.abs((forecast - target) * weights))
+
+
+class smape_loss(nn.Module):
+    def __init__(self):
+        super(smape_loss, self).__init__()
+
+    def forward(self, insample: t.Tensor, freq: int,
+                forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
+        """
+        sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
+
+        :param forecast: Forecast values. Shape: batch, time
+        :param target: Target values. Shape: batch, time
+        :param mask: 0/1 mask. Shape: batch, time
+        :return: Loss value
+        """
+        return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
+                                          t.abs(forecast.data) + t.abs(target.data)) * mask)
+
+
+class mase_loss(nn.Module):
+    def __init__(self):
+        super(mase_loss, self).__init__()
+
+    def forward(self, insample: t.Tensor, freq: int,
+                forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
+        """
+        MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
+
+        :param insample: Insample values. Shape: batch, time_i
+        :param freq: Frequency value
+        :param forecast: Forecast values. Shape: batch, time_o
+        :param target: Target values. Shape: batch, time_o
+        :param mask: 0/1 mask. Shape: batch, time_o
+        :return: Loss value
+        """
+        masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
+        masked_masep_inv = divide_no_nan(mask, masep[:, None])
+        return t.mean(t.abs(target - forecast) * masked_masep_inv)
diff --git a/utils/m4_summary.py b/utils/m4_summary.py
new file mode 100644
index 0000000..acd50fe
--- /dev/null
+++ b/utils/m4_summary.py
@@ -0,0 +1,140 @@
+# This source code is provided for the purposes of scientific reproducibility
+# under the following limited license from Element AI Inc. The code is an
+# implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
+# expansion analysis for interpretable time series forecasting,
+# https://arxiv.org/abs/1905.10437). The copyright to the source code is
+# licensed under the Creative Commons - Attribution-NonCommercial 4.0
+# International license (CC BY-NC 4.0):
+# https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
+# for the benefit of third parties or internally in production) requires an
+# explicit license. The subject-matter of the N-BEATS model and associated
+# materials are the property of Element AI Inc. and may be subject to patent
+# protection. No license to patents is granted hereunder (whether express or
+# implied). Copyright 2020 Element AI Inc. All rights reserved.
+
+"""
+M4 Summary
+"""
+from collections import OrderedDict
+
+import numpy as np
+import pandas as pd
+
+from data_provider.m4 import M4Dataset
+from data_provider.m4 import M4Meta
+import os
+
+
+def group_values(values, groups, group_name):
+    return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
+
+
+def mase(forecast, insample, outsample, frequency):
+    return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
+
+
+def smape_2(forecast, target):
+    denom = np.abs(target) + np.abs(forecast)
+    # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
+    denom[denom == 0.0] = 1.0
+    return 200 * np.abs(forecast - target) / denom
+
+
+def mape(forecast, target):
+    denom = np.abs(target)
+    # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
+    denom[denom == 0.0] = 1.0
+    return 100 * np.abs(forecast - target) / denom
+
+
+class M4Summary:
+    def __init__(self, file_path, root_path):
+        self.file_path = file_path
+        self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
+        self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
+        self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
+
+    def evaluate(self):
+        """
+        Evaluate forecasts using M4 test dataset.
+
+        :param forecast: Forecasts. Shape: timeseries, time.
+        :return: sMAPE and OWA grouped by seasonal patterns.
+        """
+        grouped_owa = OrderedDict()
+
+        naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
+        naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
+
+        model_mases = {}
+        naive2_smapes = {}
+        naive2_mases = {}
+        grouped_smapes = {}
+        grouped_mapes = {}
+        for group_name in M4Meta.seasonal_patterns:
+            file_name = self.file_path + group_name + "_forecast.csv"
+            if os.path.exists(file_name):
+                model_forecast = pd.read_csv(file_name).values
+
+            naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
+            target = group_values(self.test_set.values, self.test_set.groups, group_name)
+            # all timeseries within group have same frequency
+            frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
+            insample = group_values(self.training_set.values, self.test_set.groups, group_name)
+
+            model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
+                                                    insample=insample[i],
+                                                    outsample=target[i],
+                                                    frequency=frequency) for i in range(len(model_forecast))])
+            naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
+                                                     insample=insample[i],
+                                                     outsample=target[i],
+                                                     frequency=frequency) for i in range(len(model_forecast))])
+
+            naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
+            grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
+            grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
+
+        grouped_smapes = self.summarize_groups(grouped_smapes)
+        grouped_mapes = self.summarize_groups(grouped_mapes)
+        grouped_model_mases = self.summarize_groups(model_mases)
+        grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
+        grouped_naive2_mases = self.summarize_groups(naive2_mases)
+        for k in grouped_model_mases.keys():
+            grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
+                              grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
+
+        def round_all(d):
+            return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
+
+        return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
+            grouped_model_mases)
+
+    def summarize_groups(self, scores):
+        """
+        Re-group scores respecting M4 rules.
+        :param scores: Scores per group.
+        :return: Grouped scores.
+        """
+        scores_summary = OrderedDict()
+
+        def group_count(group_name):
+            return len(np.where(self.test_set.groups == group_name)[0])
+
+        weighted_score = {}
+        for g in ['Yearly', 'Quarterly', 'Monthly']:
+            weighted_score[g] = scores[g] * group_count(g)
+            scores_summary[g] = scores[g]
+
+        others_score = 0
+        others_count = 0
+        for g in ['Weekly', 'Daily', 'Hourly']:
+            others_score += scores[g] * group_count(g)
+            others_count += group_count(g)
+        weighted_score['Others'] = others_score
+        scores_summary['Others'] = others_score / others_count
+
+        average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
+        scores_summary['Average'] = average
+
+        return scores_summary
diff --git a/utils/masking.py b/utils/masking.py
new file mode 100644
index 0000000..a19cbf6
--- /dev/null
+++ b/utils/masking.py
@@ -0,0 +1,26 @@
+import torch
+
+
+class TriangularCausalMask():
+    def __init__(self, B, L, device="cpu"):
+        mask_shape = [B, 1, L, L]
+        with torch.no_grad():
+            self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
+
+
+class ProbMask():
+    def __init__(self, B, H, L, index, scores, device="cpu"):
+        _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
+        _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
+        indicator = _mask_ex[torch.arange(B)[:, None, None],
+                    torch.arange(H)[None, :, None],
+                    index, :].to(device)
+        self._mask = indicator.view(scores.shape).to(device)
+
+    @property
+    def mask(self):
+        return self._mask
diff --git a/utils/metrics.py b/utils/metrics.py
new file mode 100644
index 0000000..ccab908
--- /dev/null
+++ b/utils/metrics.py
@@ -0,0 +1,41 @@
+import numpy as np
+
+
+def RSE(pred, true):
+    return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
+
+
+def CORR(pred, true):
+    u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
+    d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
+    return (u / d).mean(-1)
+
+
+def MAE(pred, true):
+    return np.mean(np.abs(true - pred))
+
+
+def MSE(pred, true):
+    return np.mean((true - pred) ** 2)
+
+
+def RMSE(pred, true):
+    return np.sqrt(MSE(pred, true))
+
+
+def MAPE(pred, true):
+    return np.mean(np.abs((true - pred) / true))
+
+
+def MSPE(pred, true):
+    return np.mean(np.square((true - pred) / true))
+
+
+def metric(pred, true):
+    mae = MAE(pred, true)
+    mse = MSE(pred, true)
+    rmse = RMSE(pred, true)
+    mape = MAPE(pred, true)
+    mspe = MSPE(pred, true)
+
+    return mae, mse, rmse, mape, mspe
diff --git a/utils/print_args.py b/utils/print_args.py
new file mode 100644
index 0000000..446d81b
--- /dev/null
+++ b/utils/print_args.py
@@ -0,0 +1,58 @@
+def print_args(args):
+    print("\033[1m" + "Basic Config" + "\033[0m")
+    print(f'  {"Task Name:":<20}{args.task_name:<20}{"Is Training:":<20}{args.is_training:<20}')
+    print(f'  {"Model ID:":<20}{args.model_id:<20}{"Model:":<20}{args.model:<20}')
+    print()
+
+    print("\033[1m" + "Data Loader" + "\033[0m")
+    print(f'  {"Data:":<20}{args.data:<20}{"Root Path:":<20}{args.root_path:<20}')
+    print(f'  {"Data Path:":<20}{args.data_path:<20}{"Features:":<20}{args.features:<20}')
+    print(f'  {"Target:":<20}{args.target:<20}{"Freq:":<20}{args.freq:<20}')
+    print(f'  {"Checkpoints:":<20}{args.checkpoints:<20}')
+    print()
+
+    if args.task_name in ['long_term_forecast', 'short_term_forecast']:
+        print("\033[1m" + "Forecasting Task" + "\033[0m")
+        print(f'  {"Seq Len:":<20}{args.seq_len:<20}{"Label Len:":<20}{args.label_len:<20}')
+        print(f'  {"Pred Len:":<20}{args.pred_len:<20}{"Seasonal Patterns:":<20}{args.seasonal_patterns:<20}')
+        print(f'  {"Inverse:":<20}{args.inverse:<20}')
+        print()
+
+    if args.task_name == 'imputation':
+        print("\033[1m" + "Imputation Task" + "\033[0m")
+        print(f'  {"Mask Rate:":<20}{args.mask_rate:<20}')
+        print()
+
+    if args.task_name == 'anomaly_detection':
+        print("\033[1m" + "Anomaly Detection Task" + "\033[0m")
+        print(f'  {"Anomaly Ratio:":<20}{args.anomaly_ratio:<20}')
+        print()
+
+    print("\033[1m" + "Model Parameters" + "\033[0m")
+    print(f'  {"Top k:":<20}{args.top_k:<20}{"Num Kernels:":<20}{args.num_kernels:<20}')
+    print(f'  {"Enc In:":<20}{args.enc_in:<20}{"Dec In:":<20}{args.dec_in:<20}')
+    print(f'  {"C Out:":<20}{args.c_out:<20}{"d model:":<20}{args.d_model:<20}')
+    print(f'  {"n heads:":<20}{args.n_heads:<20}{"e layers:":<20}{args.e_layers:<20}')
+    print(f'  {"d layers:":<20}{args.d_layers:<20}{"d FF:":<20}{args.d_ff:<20}')
+    print(f'  {"Moving Avg:":<20}{args.moving_avg:<20}{"Factor:":<20}{args.factor:<20}')
+    print(f'  {"Distil:":<20}{args.distil:<20}{"Dropout:":<20}{args.dropout:<20}')
+    print(f'  {"Embed:":<20}{args.embed:<20}{"Activation:":<20}{args.activation:<20}')
+    print()
+
+    print("\033[1m" + "Run Parameters" + "\033[0m")
+    print(f'  {"Num Workers:":<20}{args.num_workers:<20}{"Itr:":<20}{args.itr:<20}')
+    print(f'  {"Train Epochs:":<20}{args.train_epochs:<20}{"Batch Size:":<20}{args.batch_size:<20}')
+    print(f'  {"Patience:":<20}{args.patience:<20}{"Learning Rate:":<20}{args.learning_rate:<20}')
+    print(f'  {"Des:":<20}{args.des:<20}{"Loss:":<20}{args.loss:<20}')
+    print(f'  {"Lradj:":<20}{args.lradj:<20}{"Use Amp:":<20}{args.use_amp:<20}')
+    print()
+
+    print("\033[1m" + "GPU" + "\033[0m")
+    print(f'  {"Use GPU:":<20}{args.use_gpu:<20}{"GPU:":<20}{args.gpu:<20}')
+    print(f'  {"Use Multi GPU:":<20}{args.use_multi_gpu:<20}{"Devices:":<20}{args.devices:<20}')
+    print()
+
+    print("\033[1m" + "De-stationary Projector Params" + "\033[0m")
+    p_hidden_dims_str = ', '.join(map(str, args.p_hidden_dims))
+    print(f'  {"P Hidden Dims:":<20}{p_hidden_dims_str:<20}{"P Hidden Layers:":<20}{args.p_hidden_layers:<20}') 
+    print()
diff --git a/utils/timefeatures.py b/utils/timefeatures.py
new file mode 100644
index 0000000..7c12972
--- /dev/null
+++ b/utils/timefeatures.py
@@ -0,0 +1,148 @@
+# From: gluonts/src/gluonts/time_feature/_base.py
+# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# A copy of the License is located at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the "license" file accompanying this file. This file is distributed
+# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+# express or implied. See the License for the specific language governing
+# permissions and limitations under the License.
+
+from typing import List
+
+import numpy as np
+import pandas as pd
+from pandas.tseries import offsets
+from pandas.tseries.frequencies import to_offset
+
+
+class TimeFeature:
+    def __init__(self):
+        pass
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        pass
+
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+
+
+class SecondOfMinute(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.second / 59.0 - 0.5
+
+
+class MinuteOfHour(TimeFeature):
+    """Minute of hour encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.minute / 59.0 - 0.5
+
+
+class HourOfDay(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.hour / 23.0 - 0.5
+
+
+class DayOfWeek(TimeFeature):
+    """Hour of day encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return index.dayofweek / 6.0 - 0.5
+
+
+class DayOfMonth(TimeFeature):
+    """Day of month encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.day - 1) / 30.0 - 0.5
+
+
+class DayOfYear(TimeFeature):
+    """Day of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.dayofyear - 1) / 365.0 - 0.5
+
+
+class MonthOfYear(TimeFeature):
+    """Month of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.month - 1) / 11.0 - 0.5
+
+
+class WeekOfYear(TimeFeature):
+    """Week of year encoded as value between [-0.5, 0.5]"""
+
+    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
+        return (index.isocalendar().week - 1) / 52.0 - 0.5
+
+
+def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
+    """
+    Returns a list of time features that will be appropriate for the given frequency string.
+    Parameters
+    ----------
+    freq_str
+        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
+    """
+
+    features_by_offsets = {
+        offsets.YearEnd: [],
+        offsets.QuarterEnd: [MonthOfYear],
+        offsets.MonthEnd: [MonthOfYear],
+        offsets.Week: [DayOfMonth, WeekOfYear],
+        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
+        offsets.Minute: [
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+        offsets.Second: [
+            SecondOfMinute,
+            MinuteOfHour,
+            HourOfDay,
+            DayOfWeek,
+            DayOfMonth,
+            DayOfYear,
+        ],
+    }
+
+    offset = to_offset(freq_str)
+
+    for offset_type, feature_classes in features_by_offsets.items():
+        if isinstance(offset, offset_type):
+            return [cls() for cls in feature_classes]
+
+    supported_freq_msg = f"""
+    Unsupported frequency {freq_str}
+    The following frequencies are supported:
+        Y   - yearly
+            alias: A
+        M   - monthly
+        W   - weekly
+        D   - daily
+        B   - business days
+        H   - hourly
+        T   - minutely
+            alias: min
+        S   - secondly
+    """
+    raise RuntimeError(supported_freq_msg)
+
+
+def time_features(dates, freq='h'):
+    return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
diff --git a/utils/tools.py b/utils/tools.py
new file mode 100644
index 0000000..6efa712
--- /dev/null
+++ b/utils/tools.py
@@ -0,0 +1,120 @@
+import os
+
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import pandas as pd
+import math
+
+plt.switch_backend('agg')
+
+
+def adjust_learning_rate(optimizer, epoch, args):
+    # lr = args.learning_rate * (0.2 ** (epoch // 2))
+    if args.lradj == 'type1':
+        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
+    elif args.lradj == 'type2':
+        lr_adjust = {
+            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
+            10: 5e-7, 15: 1e-7, 20: 5e-8
+        }
+    elif args.lradj == 'type3':
+        lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
+    elif args.lradj == "cosine":
+        lr_adjust = {epoch: args.learning_rate /2 * (1 + math.cos(epoch / args.train_epochs * math.pi))}
+    if epoch in lr_adjust.keys():
+        lr = lr_adjust[epoch]
+        for param_group in optimizer.param_groups:
+            param_group['lr'] = lr
+        print('Updating learning rate to {}'.format(lr))
+
+
+class EarlyStopping:
+    def __init__(self, patience=7, verbose=False, delta=0):
+        self.patience = patience
+        self.verbose = verbose
+        self.counter = 0
+        self.best_score = None
+        self.early_stop = False
+        self.val_loss_min = np.inf
+        self.delta = delta
+
+    def __call__(self, val_loss, model, path):
+        score = -val_loss
+        if self.best_score is None:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+        elif score < self.best_score + self.delta:
+            self.counter += 1
+            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.best_score = score
+            self.save_checkpoint(val_loss, model, path)
+            self.counter = 0
+
+    def save_checkpoint(self, val_loss, model, path):
+        if self.verbose:
+            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
+        torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
+        self.val_loss_min = val_loss
+
+
+class dotdict(dict):
+    """dot.notation access to dictionary attributes"""
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+class StandardScaler():
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def transform(self, data):
+        return (data - self.mean) / self.std
+
+    def inverse_transform(self, data):
+        return (data * self.std) + self.mean
+
+
+def visual(true, preds=None, name='./pic/test.pdf'):
+    """
+    Results visualization
+    """
+    plt.figure()
+    if preds is not None:
+        plt.plot(preds, label='Prediction', linewidth=2)
+    plt.plot(true, label='GroundTruth', linewidth=2)
+    plt.legend()
+    plt.savefig(name, bbox_inches='tight')
+
+
+def adjustment(gt, pred):
+    anomaly_state = False
+    for i in range(len(gt)):
+        if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
+            anomaly_state = True
+            for j in range(i, 0, -1):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+            for j in range(i, len(gt)):
+                if gt[j] == 0:
+                    break
+                else:
+                    if pred[j] == 0:
+                        pred[j] = 1
+        elif gt[i] == 0:
+            anomaly_state = False
+        if anomaly_state:
+            pred[i] = 1
+    return gt, pred
+
+
+def cal_accuracy(y_pred, y_true):
+    return np.mean(y_pred == y_true)