feat: add mamba and dynamic chunking related code and test code
This commit is contained in:
102
generate_sine_data.py
Normal file
102
generate_sine_data.py
Normal file
@ -0,0 +1,102 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def generate_sine_wave_data(n_samples=10000, seq_len=200, n_channels=2, save_path='./data/sine_wave/'):
|
||||
"""
|
||||
生成双通道正弦波时序数据
|
||||
|
||||
Args:
|
||||
n_samples: 总样本数
|
||||
seq_len: 每个序列长度
|
||||
n_channels: 通道数 (固定为2)
|
||||
save_path: 保存路径
|
||||
"""
|
||||
|
||||
if not os.path.exists(save_path):
|
||||
os.makedirs(save_path)
|
||||
|
||||
# 生成时间轴
|
||||
t = np.linspace(0, 4*np.pi, seq_len)
|
||||
|
||||
all_data = []
|
||||
|
||||
for i in range(n_samples):
|
||||
# 为每个样本生成不同周期和相位的正弦波
|
||||
# 通道1: 随机周期和相位
|
||||
freq1 = np.random.uniform(0.5, 3.0) # 频率范围
|
||||
phase1 = np.random.uniform(0, 2*np.pi) # 相位
|
||||
amplitude1 = np.random.uniform(0.5, 2.0) # 幅度
|
||||
|
||||
# 通道2: 不同的随机周期和相位
|
||||
freq2 = np.random.uniform(0.3, 2.5)
|
||||
phase2 = np.random.uniform(0, 2*np.pi)
|
||||
amplitude2 = np.random.uniform(0.8, 1.8)
|
||||
|
||||
# 生成正弦波数据
|
||||
channel1 = amplitude1 * np.sin(freq1 * t + phase1)
|
||||
channel2 = amplitude2 * np.sin(freq2 * t + phase2)
|
||||
|
||||
# 添加少量噪声
|
||||
# noise1 = np.random.normal(0, 0.1, seq_len)
|
||||
# noise2 = np.random.normal(0, 0.1, seq_len)
|
||||
#
|
||||
# channel1 += noise1
|
||||
# channel2 += noise2
|
||||
|
||||
# 组合数据: [timestamp, channel1, channel2]
|
||||
timestamp = np.arange(seq_len)
|
||||
sample_data = np.column_stack([timestamp, channel1, channel2])
|
||||
all_data.append(sample_data)
|
||||
|
||||
# 转换为连续的时间序列格式
|
||||
continuous_data = []
|
||||
current_time = 0
|
||||
|
||||
for sample in all_data:
|
||||
sample[:, 0] = current_time + sample[:, 0] # 调整时间戳
|
||||
continuous_data.append(sample)
|
||||
current_time += seq_len
|
||||
|
||||
# 合并所有数据
|
||||
full_data = np.vstack(continuous_data)
|
||||
|
||||
# 创建DataFrame
|
||||
df = pd.DataFrame(full_data, columns=['timestamp', 'channel1', 'channel2'])
|
||||
|
||||
# 按 8:1:1 比例分割训练、验证、测试集
|
||||
total_len = len(df)
|
||||
train_end = int(0.8 * total_len)
|
||||
val_end = int(0.9 * total_len)
|
||||
|
||||
train_df = df[:train_end]
|
||||
val_df = df[train_end:val_end]
|
||||
test_df = df[val_end:]
|
||||
|
||||
# 保存数据
|
||||
train_df.to_csv(os.path.join(save_path, 'train.csv'), index=False)
|
||||
val_df.to_csv(os.path.join(save_path, 'val.csv'), index=False)
|
||||
test_df.to_csv(os.path.join(save_path, 'test.csv'), index=False)
|
||||
|
||||
# 保存完整数据
|
||||
df.to_csv(os.path.join(save_path, 'sine_wave.csv'), index=False)
|
||||
|
||||
print(f"数据已生成并保存到 {save_path}")
|
||||
print(f"训练集: {len(train_df)} 条记录")
|
||||
print(f"验证集: {len(val_df)} 条记录")
|
||||
print(f"测试集: {len(test_df)} 条记录")
|
||||
print(f"总计: {len(df)} 条记录,{n_channels} 个通道")
|
||||
|
||||
return df
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 生成数据
|
||||
data = generate_sine_wave_data(
|
||||
n_samples=200, # 2000个不同的正弦波样本
|
||||
seq_len=200, # 每个样本200个时间点
|
||||
n_channels=2, # 双通道
|
||||
save_path='./data/sine_wave/'
|
||||
)
|
||||
|
||||
print("\n数据统计信息:")
|
||||
print(data.describe())
|
Reference in New Issue
Block a user