103 lines
3.2 KiB
Python
103 lines
3.2 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import os
|
|
|
|
def generate_sine_wave_data(n_samples=10000, seq_len=200, n_channels=2, save_path='./data/sine_wave/'):
|
|
"""
|
|
生成双通道正弦波时序数据
|
|
|
|
Args:
|
|
n_samples: 总样本数
|
|
seq_len: 每个序列长度
|
|
n_channels: 通道数 (固定为2)
|
|
save_path: 保存路径
|
|
"""
|
|
|
|
if not os.path.exists(save_path):
|
|
os.makedirs(save_path)
|
|
|
|
# 生成时间轴
|
|
t = np.linspace(0, 4*np.pi, seq_len)
|
|
|
|
all_data = []
|
|
|
|
for i in range(n_samples):
|
|
# 为每个样本生成不同周期和相位的正弦波
|
|
# 通道1: 随机周期和相位
|
|
freq1 = np.random.uniform(0.5, 3.0) # 频率范围
|
|
phase1 = np.random.uniform(0, 2*np.pi) # 相位
|
|
amplitude1 = np.random.uniform(0.5, 2.0) # 幅度
|
|
|
|
# 通道2: 不同的随机周期和相位
|
|
freq2 = np.random.uniform(0.3, 2.5)
|
|
phase2 = np.random.uniform(0, 2*np.pi)
|
|
amplitude2 = np.random.uniform(0.8, 1.8)
|
|
|
|
# 生成正弦波数据
|
|
channel1 = amplitude1 * np.sin(freq1 * t + phase1)
|
|
channel2 = amplitude2 * np.sin(freq2 * t + phase2)
|
|
|
|
# 添加少量噪声
|
|
# noise1 = np.random.normal(0, 0.1, seq_len)
|
|
# noise2 = np.random.normal(0, 0.1, seq_len)
|
|
#
|
|
# channel1 += noise1
|
|
# channel2 += noise2
|
|
|
|
# 组合数据: [timestamp, channel1, channel2]
|
|
timestamp = np.arange(seq_len)
|
|
sample_data = np.column_stack([timestamp, channel1, channel2])
|
|
all_data.append(sample_data)
|
|
|
|
# 转换为连续的时间序列格式
|
|
continuous_data = []
|
|
current_time = 0
|
|
|
|
for sample in all_data:
|
|
sample[:, 0] = current_time + sample[:, 0] # 调整时间戳
|
|
continuous_data.append(sample)
|
|
current_time += seq_len
|
|
|
|
# 合并所有数据
|
|
full_data = np.vstack(continuous_data)
|
|
|
|
# 创建DataFrame
|
|
df = pd.DataFrame(full_data, columns=['timestamp', 'channel1', 'channel2'])
|
|
|
|
# 按 8:1:1 比例分割训练、验证、测试集
|
|
total_len = len(df)
|
|
train_end = int(0.8 * total_len)
|
|
val_end = int(0.9 * total_len)
|
|
|
|
train_df = df[:train_end]
|
|
val_df = df[train_end:val_end]
|
|
test_df = df[val_end:]
|
|
|
|
# 保存数据
|
|
train_df.to_csv(os.path.join(save_path, 'train.csv'), index=False)
|
|
val_df.to_csv(os.path.join(save_path, 'val.csv'), index=False)
|
|
test_df.to_csv(os.path.join(save_path, 'test.csv'), index=False)
|
|
|
|
# 保存完整数据
|
|
df.to_csv(os.path.join(save_path, 'sine_wave.csv'), index=False)
|
|
|
|
print(f"数据已生成并保存到 {save_path}")
|
|
print(f"训练集: {len(train_df)} 条记录")
|
|
print(f"验证集: {len(val_df)} 条记录")
|
|
print(f"测试集: {len(test_df)} 条记录")
|
|
print(f"总计: {len(df)} 条记录,{n_channels} 个通道")
|
|
|
|
return df
|
|
|
|
if __name__ == "__main__":
|
|
# 生成数据
|
|
data = generate_sine_wave_data(
|
|
n_samples=200, # 2000个不同的正弦波样本
|
|
seq_len=200, # 每个样本200个时间点
|
|
n_channels=2, # 双通道
|
|
save_path='./data/sine_wave/'
|
|
)
|
|
|
|
print("\n数据统计信息:")
|
|
print(data.describe())
|