从 python 中的输出信号中去除周期性噪声信号
Removing a periodic noise signal from an output signal in python
我目前有两个周期性信号:一个以蓝色显示的输出信号和一个以绿色显示的噪声信号。显示的两条曲线都已移动到任意值,以清楚地分开曲线。鉴于噪声和输出具有相似的相位,我想做的是缩放噪声信号,使其具有与输出信号相同的幅度,然后从输出信号中去除噪声以消除任何振荡(希望)通过输出信号的平均值获得一条直线运行。
考虑到噪声信号也在一个平均值附近振荡,我觉得简单地将两个信号相减是行不通的,因为这只会使振荡更大。
输出信号和噪声信号均由不同数量的数据点组成(输出 - 58050 个数据点,噪声 - 52774 个数据点)如何在 python 中实现?
数据文件如下:
噪音:https://drive.google.com/file/d/1RZwknUUAXGG31J9u_37aH7m9Fdyy_opE/view?usp=sharing
输出:https://drive.google.com/file/d/1E6vLa8Z63UtftrscKmicpid5uBVqoMpv/view?usp=sharing
我用来从 .csv 文件导入两个信号并绘制它们的代码如下。
import numpy as np
import pandas as pd
# from scipy.optimize import curve_fit
from datetime import datetime
from datetime import timedelta
import matplotlib
import matplotlib.pyplot as plt
datathick = "20210726_rig_thick.csv"
qcmfilter = "20210726_cool_QCM_act.csv"
with open(datathick) as f:
lines = f.readlines()
dates = [str(line.split(',')[0]) for line in lines]
thick = [float(line.split(',')[1]) for line in lines] #output y data
z = [float(line.split(',')[2]) for line in lines]
date_thick = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates]
with open(qcmfilter) as f:
lines = f.readlines()
dates_qcm = [str(line.split(',')[0]) for line in lines]
temp_qcm = [float(line.split(',')[1])+420 for line in lines] #noise y data
z = [float(line.split(',')[2]) for line in lines]
date_temp_qcm = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates_qcm]
time_list_thick = []
for i in np.arange(0, len(date_thick)):
q = date_thick[i]
t = timedelta(hours= q.hour, minutes=q.minute,seconds=q.second, microseconds = q.microsecond).total_seconds()
time_list_thick.append(float(t))
time_list_temp_qcm = []
for i in np.arange(0, len(date_temp_qcm)):
q3 = date_temp_qcm[i]
t3 = timedelta(hours= q3.hour, minutes=q3.minute,seconds=q3.second, microseconds = q3.microsecond).total_seconds()
time_list_temp_qcm.append(float(t3))
#------------------------------------------------
fig=plt.figure(figsize=(7.,7.))
ax=fig.add_subplot(1,1,1)
ax.set_zorder(1)
ax.patch.set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Amplitude (a.u)')
ax.minorticks_on() # enable minor ticks
ax.xaxis.set_ticks_position('bottom')
ax.spines['left'].set_color('black')
ax.yaxis.label.set_color('black')
ax.set_ylim(440,460)
ax.set_xlim(0, 10000)
ax.tick_params(direction='out', axis='y', which='both', pad=4, colors='black')
ax.grid(b=True, which='major', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on major grid
ax.grid(b=True, which='minor', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on minor grid
ax.plot(time_list_thick, thick,color='blue')
ax.plot(time_list_temp_qcm, temp_qcm, color = 'green')
plt.savefig('QCM.pdf', dpi=300, bbox_inches='tight', format='pdf')
plt.savefig('QCM.png', dpi=300, bbox_inches='tight', format='png')
编辑:按照 Mozway 的回答中给出的建议,我将我的两个数据集更改为 pandas 系列:
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
noise = pd.Series(temp_qcm, index = pd.TimedeltaIndex(time_list_temp_qcm,unit = 's'))
resampled_signal = signal.resample('1S').mean()
resampled_noise = noise.resample('1S').mean()
true_signal = []
for i in np.arange(0,len(resampled_signal)):
value = resampled_signal[i]-resampled_noise[i]
true_signal.append(value)
但是,如下所示,真实信号看起来不稳定,数据中存在间隙,真实信号也不像我最初预期的那样围绕振荡原始信号的平均值。
我会尝试找到一种方法来访问原始数据文件,以便更容易获得答案。
因为我没有您的数据集,所以很难向您展示您的实际数据,但这里有一些示例,说明如何计算具有不同采样率的两个时间序列的差异。
重采样
此示例使用 pandas.Series.resample
对数据进行下采样并对齐系列。这里我选择了一个略低于原始频率的采样率。您必须明智地选择此参数(或通过反复试验)。
xs1 = np.linspace(0, 10, 100)
signal = pd.Series(np.sin(xs1)+2,
index=pd.TimedeltaIndex(xs1, unit='min'),
)
xs2 = np.linspace(0, 10, 120)
noise = pd.Series(np.sin(xs2)+np.random.normal(scale=0.05, size=len(xs)),
index=pd.TimedeltaIndex(xs2, unit='min'),
)
resampled_signal = signal.resample('0.1min').mean()
resampled_noise = noise.resample('0.1min').mean()
pd.DataFrame({'signal': resampled_signal,
'noise': resampled_noise,
'signal-noise': resampled_signal-resampled_noise,
}).plot()
如果全局范围不相等,它也有效,然后在公共范围上计算差异。对于下图,代码中唯一的变化是 xs1 = np.linspace(0, 8, 100)
和 xs2 = np.linspace(2, 10, 120)
插值
此示例使用 pandas.DataFrame.interpolate
在连接两个系列后插入缺失点。有许多参数可用,因此请查看文档以找到最适合您的用例的选项。如果您的系列的范围不相等,请注意边缘处的潜在伪像(参见第二张图)。
xs1 = np.linspace(0, 10, 100)
signal = pd.Series(np.sin(xs1)+2,
index=pd.TimedeltaIndex(xs1, unit='min'),
)
xs2 = np.linspace(0, 10, 120)
noise = pd.Series(np.sin(xs2)+np.random.normal(scale=0.05, size=len(xs)),
index=pd.TimedeltaIndex(xs2, unit='min'),
)
df = pd.concat({'signal': signal,
'noise': noise,
}, axis=1)
df = df.interpolate()
df['signal-noise'] = df['signal']-df['noise']
df.plot()
下面是边缘插值伪像的示例:
merge_asof
具有所提供数据集的 merge_asof
示例:
加载数据:
df_thick = pd.read_csv('20210726_rig_thick.csv', header=None, index_col=0, names=['thick', 'z'])
df_thick.index = pd.to_datetime(df_thick.index)
df_qcm = pd.read_csv('20210726_cool_QCM_act.csv', header=None, index_col=0, names=['temp_qcm', 'z_qcm'])
df_qcm.index = pd.to_datetime(df_qcm.index)
df_qcm['temp_qcm']+=420 # arbitrary to be able to view the lines in the same field.
合并:
df = pd.merge_asof(df_thick, df_qcm,
left_index=True,
right_index=True,
direction='forward')
df.index = df.index - df.index[0]
df['thick_corr'] = df['thick']-df['temp_qcm']+442 # added constant to move curve up for plotting
>>> df.head()
thick z temp_qcm z_qcm thick_corr
0 days 00:00:00 451.372071 0 445.358141 0 448.013930
0 days 00:00:00.999704 451.366733 0 445.350143 0 448.016589
0 days 00:00:02.003954 451.358724 0 445.341953 0 448.016771
0 days 00:00:03.000006 451.356055 0 445.336466 0 448.019589
0 days 00:00:04.003809 451.350716 0 445.331665 0 448.019051
剧情:
ax = df.reset_index().plot(x='index', y='thick')
df.reset_index().plot(x='index', y='temp_qcm', ax=ax, color='r')
df.reset_index().plot(x='index', y='thick_corr', ax=ax, color='g')
ax.set_ylim(440, 460)
我目前有两个周期性信号:一个以蓝色显示的输出信号和一个以绿色显示的噪声信号。显示的两条曲线都已移动到任意值,以清楚地分开曲线。鉴于噪声和输出具有相似的相位,我想做的是缩放噪声信号,使其具有与输出信号相同的幅度,然后从输出信号中去除噪声以消除任何振荡(希望)通过输出信号的平均值获得一条直线运行。
考虑到噪声信号也在一个平均值附近振荡,我觉得简单地将两个信号相减是行不通的,因为这只会使振荡更大。
输出信号和噪声信号均由不同数量的数据点组成(输出 - 58050 个数据点,噪声 - 52774 个数据点)如何在 python 中实现?
数据文件如下:
噪音:https://drive.google.com/file/d/1RZwknUUAXGG31J9u_37aH7m9Fdyy_opE/view?usp=sharing
输出:https://drive.google.com/file/d/1E6vLa8Z63UtftrscKmicpid5uBVqoMpv/view?usp=sharing
我用来从 .csv 文件导入两个信号并绘制它们的代码如下。
import numpy as np
import pandas as pd
# from scipy.optimize import curve_fit
from datetime import datetime
from datetime import timedelta
import matplotlib
import matplotlib.pyplot as plt
datathick = "20210726_rig_thick.csv"
qcmfilter = "20210726_cool_QCM_act.csv"
with open(datathick) as f:
lines = f.readlines()
dates = [str(line.split(',')[0]) for line in lines]
thick = [float(line.split(',')[1]) for line in lines] #output y data
z = [float(line.split(',')[2]) for line in lines]
date_thick = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates]
with open(qcmfilter) as f:
lines = f.readlines()
dates_qcm = [str(line.split(',')[0]) for line in lines]
temp_qcm = [float(line.split(',')[1])+420 for line in lines] #noise y data
z = [float(line.split(',')[2]) for line in lines]
date_temp_qcm = [datetime.strptime(x,'%Y-%m-%dT%H:%M:%S.%f').time() for x in dates_qcm]
time_list_thick = []
for i in np.arange(0, len(date_thick)):
q = date_thick[i]
t = timedelta(hours= q.hour, minutes=q.minute,seconds=q.second, microseconds = q.microsecond).total_seconds()
time_list_thick.append(float(t))
time_list_temp_qcm = []
for i in np.arange(0, len(date_temp_qcm)):
q3 = date_temp_qcm[i]
t3 = timedelta(hours= q3.hour, minutes=q3.minute,seconds=q3.second, microseconds = q3.microsecond).total_seconds()
time_list_temp_qcm.append(float(t3))
#------------------------------------------------
fig=plt.figure(figsize=(7.,7.))
ax=fig.add_subplot(1,1,1)
ax.set_zorder(1)
ax.patch.set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_xlabel('Time (s)')
ax.set_ylabel('Amplitude (a.u)')
ax.minorticks_on() # enable minor ticks
ax.xaxis.set_ticks_position('bottom')
ax.spines['left'].set_color('black')
ax.yaxis.label.set_color('black')
ax.set_ylim(440,460)
ax.set_xlim(0, 10000)
ax.tick_params(direction='out', axis='y', which='both', pad=4, colors='black')
ax.grid(b=True, which='major', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on major grid
ax.grid(b=True, which='minor', color='#eeeeee', linestyle='-', zorder=1, linewidth=0.4) # turn on minor grid
ax.plot(time_list_thick, thick,color='blue')
ax.plot(time_list_temp_qcm, temp_qcm, color = 'green')
plt.savefig('QCM.pdf', dpi=300, bbox_inches='tight', format='pdf')
plt.savefig('QCM.png', dpi=300, bbox_inches='tight', format='png')
编辑:按照 Mozway 的回答中给出的建议,我将我的两个数据集更改为 pandas 系列:
signal = pd.Series(thick, index = pd.TimedeltaIndex(time_list_thick,unit = 's'))
noise = pd.Series(temp_qcm, index = pd.TimedeltaIndex(time_list_temp_qcm,unit = 's'))
resampled_signal = signal.resample('1S').mean()
resampled_noise = noise.resample('1S').mean()
true_signal = []
for i in np.arange(0,len(resampled_signal)):
value = resampled_signal[i]-resampled_noise[i]
true_signal.append(value)
但是,如下所示,真实信号看起来不稳定,数据中存在间隙,真实信号也不像我最初预期的那样围绕振荡原始信号的平均值。
因为我没有您的数据集,所以很难向您展示您的实际数据,但这里有一些示例,说明如何计算具有不同采样率的两个时间序列的差异。
重采样
此示例使用 pandas.Series.resample
对数据进行下采样并对齐系列。这里我选择了一个略低于原始频率的采样率。您必须明智地选择此参数(或通过反复试验)。
xs1 = np.linspace(0, 10, 100)
signal = pd.Series(np.sin(xs1)+2,
index=pd.TimedeltaIndex(xs1, unit='min'),
)
xs2 = np.linspace(0, 10, 120)
noise = pd.Series(np.sin(xs2)+np.random.normal(scale=0.05, size=len(xs)),
index=pd.TimedeltaIndex(xs2, unit='min'),
)
resampled_signal = signal.resample('0.1min').mean()
resampled_noise = noise.resample('0.1min').mean()
pd.DataFrame({'signal': resampled_signal,
'noise': resampled_noise,
'signal-noise': resampled_signal-resampled_noise,
}).plot()
如果全局范围不相等,它也有效,然后在公共范围上计算差异。对于下图,代码中唯一的变化是 xs1 = np.linspace(0, 8, 100)
和 xs2 = np.linspace(2, 10, 120)
插值
此示例使用 pandas.DataFrame.interpolate
在连接两个系列后插入缺失点。有许多参数可用,因此请查看文档以找到最适合您的用例的选项。如果您的系列的范围不相等,请注意边缘处的潜在伪像(参见第二张图)。
xs1 = np.linspace(0, 10, 100)
signal = pd.Series(np.sin(xs1)+2,
index=pd.TimedeltaIndex(xs1, unit='min'),
)
xs2 = np.linspace(0, 10, 120)
noise = pd.Series(np.sin(xs2)+np.random.normal(scale=0.05, size=len(xs)),
index=pd.TimedeltaIndex(xs2, unit='min'),
)
df = pd.concat({'signal': signal,
'noise': noise,
}, axis=1)
df = df.interpolate()
df['signal-noise'] = df['signal']-df['noise']
df.plot()
下面是边缘插值伪像的示例:
merge_asof
具有所提供数据集的 merge_asof
示例:
加载数据:
df_thick = pd.read_csv('20210726_rig_thick.csv', header=None, index_col=0, names=['thick', 'z'])
df_thick.index = pd.to_datetime(df_thick.index)
df_qcm = pd.read_csv('20210726_cool_QCM_act.csv', header=None, index_col=0, names=['temp_qcm', 'z_qcm'])
df_qcm.index = pd.to_datetime(df_qcm.index)
df_qcm['temp_qcm']+=420 # arbitrary to be able to view the lines in the same field.
合并:
df = pd.merge_asof(df_thick, df_qcm,
left_index=True,
right_index=True,
direction='forward')
df.index = df.index - df.index[0]
df['thick_corr'] = df['thick']-df['temp_qcm']+442 # added constant to move curve up for plotting
>>> df.head()
thick z temp_qcm z_qcm thick_corr
0 days 00:00:00 451.372071 0 445.358141 0 448.013930
0 days 00:00:00.999704 451.366733 0 445.350143 0 448.016589
0 days 00:00:02.003954 451.358724 0 445.341953 0 448.016771
0 days 00:00:03.000006 451.356055 0 445.336466 0 448.019589
0 days 00:00:04.003809 451.350716 0 445.331665 0 448.019051
剧情:
ax = df.reset_index().plot(x='index', y='thick')
df.reset_index().plot(x='index', y='temp_qcm', ax=ax, color='r')
df.reset_index().plot(x='index', y='thick_corr', ax=ax, color='g')
ax.set_ylim(440, 460)