如何使用 Python 中的 find_peaks() 查找重复模式的一系列最高峰?
How to find series of highest peaks of a repeating pattern using find_peaks() in Python?
我正在尝试确定以下波形中模式块的最高峰:
基本上,我只需要检测以下峰(突出显示):
如果我使用 scipy.find_peaks()
,它无法检测到适当的峰:
indices = find_peaks(my_waveform, prominence = 1)[0]
它最终检测到以下所有点,这不是我要找的:
我无法向 scipy.find_peaks()
提供 distance
或 height
阈值的输入参数,因为在任一极端上都有许多所需的峰,其高度低于中间有不需要的峰。
注意:正如您在上面的快照中看到的那样,我也对波形进行了去趋势处理以解决上述问题,但它仍然没有给出正确的结果。
那么谁能提供解决此问题的正确方法?
这是完全重现我展示的数据集的代码(“autocorr”是感兴趣的最终波形)
import json
import sys, os
import numpy as np
import pandas as pd
import glob
import pickle
from statsmodels.tsa.stattools import adfuller, acf, pacf
from scipy.signal import find_peaks, square
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
#GENERATION OF A FUNCTION WITH DUAL SEASONALITY & NOISE
def white_noise(mu, sigma, num_pts):
""" Function to generate Gaussian Normal Noise
Args:
sigma: std value
num_pts: no of points
mu: mean value
Returns:
generated Gaussian Normal Noise
"""
noise = np.random.normal(mu, sigma, num_pts)
return noise
def signal_line_plot(input_signal: pd.Series, title: str = "", y_label: str = "Signal"):
""" Function to plot a time series signal
Args:
input_signal: time series signal that you want to plot
title: title on plot
y_label: label of the signal being plotted
Returns:
signal plot
"""
plt.plot(input_signal)
plt.title(title)
plt.ylabel(y_label)
plt.show()
t_week = np.linspace(1,480, 480)
t_weekend=np.linspace(1,192,192)
T=96 #Time Period
x_weekday = 10*square(2*np.pi*t_week/T, duty=0.7)+10 + white_noise(0, 1,480)
x_weekend = 2*square(2*np.pi*t_weekend/T, duty=0.7)+2 + white_noise(0,1,192)
x_daily_weekly = np.concatenate((x_weekday, x_weekend))
x_daily_weekly_long = np.concatenate((x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly))
signal_line_plot(x_daily_weekly_long)
signal_line_plot(x_daily_weekly_long[0:1000])
#x_daily_weekly_long is the final waveform on which I'm carrying out Autocorrelation
#PERFORMING AUTOCORRELATION:
import scipy.signal as signal
autocorr = signal.correlate(x_daily_weekly_long, x_daily_weekly_long, mode = "same")
lags = signal.correlation_lags(len(x_daily_weekly_long), len(x_daily_weekly_long), mode = "same")
#VISUALIZATION:
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
出于测试目的,我对您的信号进行了粗略的重建。
import numpy as np
from scipy.signal import find_peaks, square
import matplotlib.pyplot as plt
x = np.linspace(3,103,10000)
sin = np.clip(np.sin(0.6*x)-0.5,0,10)
tri = np.concatenate([np.linspace(0,0.3,5000),np.linspace(0.3,0,5000)],axis =0)
sig = np.sin(6*x-1.2)
full = sin+tri+sig
峰值运行 #1
peaks = find_peaks(full)[0]
plt.plot(full)
plt.scatter(peaks,full[peaks], color='red', s=5)
plt.show()
peak 运行 #2 + 索引重新提取(这需要信号中的实际值)
peaks2 = find_peaks(full[peaks])[0]
index = peaks[peaks2]
plt.plot(full)
plt.scatter(index,full[index], color='red', s=5)
plt.show()
如果您知道经期,您可以这样做:
w=T
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], autocorr[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], color="r")
结果:
我不知道它在其他情况下是否有效。
编辑:
另一种方法是在切片 window 中找到最大值,但在这种情况下,您必须根据经验定义 window 大小。
w=900
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[filters.maximum_filter(autocorr, size=W)==autocorr], autocorr[filters.maximum_filter(autocorr, size=W)==autocorr], color="r")
结果:
因为你有某种双重(甚至三重)信号,我会尝试双重平滑。
一个去除整体趋势,一个去除尖锐的噪音。
一张图胜过长篇大论:
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
def smooth(s, win):
return pd.Series(s).rolling(window=win, center=True).mean().ffill().bfill()
plt.plot(lags, autocorr, label='data')
WINDOW = 100 # needs to be determined empirically
# and so are the multipliers below
# double smoothing difference + clipping
ddiff = np.clip(smooth(autocorr, 2*WINDOW)-smooth(autocorr, 10*WINDOW), 0, np.inf)
plt.plot(lags, ddiff, label='smooth+clip')
peaks = find_peaks(ddiff, width=WINDOW)[0]
plt.plot(lags[peaks], autocorr[peaks], marker='o', ls='')
plt.plot(lags[peaks], ddiff[peaks], marker='o', ls='')
plt.legend()
输出:
平滑原始信号
在数据分析中,越早执行转换可能越好。您还可以在 运行 自相关之前清理原始信号。这是一个简单的例子(使用上面定义的 smooth
函数):
from scipy.signal import find_peaks
x2 = smooth(x_daily_weekly_long, 100)
autocorr2 = signal.correlate(x2, x2, mode = "same")
plt.plot(lags, autocorr2)
idx = find_peaks(autocorr2)[0]
plt.plot(lags[idx], autocorr2[idx], marker='o', ls='')
清除信号:
我正在尝试确定以下波形中模式块的最高峰:
基本上,我只需要检测以下峰(突出显示):
如果我使用 scipy.find_peaks()
,它无法检测到适当的峰:
indices = find_peaks(my_waveform, prominence = 1)[0]
它最终检测到以下所有点,这不是我要找的:
我无法向 scipy.find_peaks()
提供 distance
或 height
阈值的输入参数,因为在任一极端上都有许多所需的峰,其高度低于中间有不需要的峰。
注意:正如您在上面的快照中看到的那样,我也对波形进行了去趋势处理以解决上述问题,但它仍然没有给出正确的结果。
那么谁能提供解决此问题的正确方法?
这是完全重现我展示的数据集的代码(“autocorr”是感兴趣的最终波形)
import json
import sys, os
import numpy as np
import pandas as pd
import glob
import pickle
from statsmodels.tsa.stattools import adfuller, acf, pacf
from scipy.signal import find_peaks, square
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
#GENERATION OF A FUNCTION WITH DUAL SEASONALITY & NOISE
def white_noise(mu, sigma, num_pts):
""" Function to generate Gaussian Normal Noise
Args:
sigma: std value
num_pts: no of points
mu: mean value
Returns:
generated Gaussian Normal Noise
"""
noise = np.random.normal(mu, sigma, num_pts)
return noise
def signal_line_plot(input_signal: pd.Series, title: str = "", y_label: str = "Signal"):
""" Function to plot a time series signal
Args:
input_signal: time series signal that you want to plot
title: title on plot
y_label: label of the signal being plotted
Returns:
signal plot
"""
plt.plot(input_signal)
plt.title(title)
plt.ylabel(y_label)
plt.show()
t_week = np.linspace(1,480, 480)
t_weekend=np.linspace(1,192,192)
T=96 #Time Period
x_weekday = 10*square(2*np.pi*t_week/T, duty=0.7)+10 + white_noise(0, 1,480)
x_weekend = 2*square(2*np.pi*t_weekend/T, duty=0.7)+2 + white_noise(0,1,192)
x_daily_weekly = np.concatenate((x_weekday, x_weekend))
x_daily_weekly_long = np.concatenate((x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly,x_daily_weekly))
signal_line_plot(x_daily_weekly_long)
signal_line_plot(x_daily_weekly_long[0:1000])
#x_daily_weekly_long is the final waveform on which I'm carrying out Autocorrelation
#PERFORMING AUTOCORRELATION:
import scipy.signal as signal
autocorr = signal.correlate(x_daily_weekly_long, x_daily_weekly_long, mode = "same")
lags = signal.correlation_lags(len(x_daily_weekly_long), len(x_daily_weekly_long), mode = "same")
#VISUALIZATION:
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
出于测试目的,我对您的信号进行了粗略的重建。
import numpy as np
from scipy.signal import find_peaks, square
import matplotlib.pyplot as plt
x = np.linspace(3,103,10000)
sin = np.clip(np.sin(0.6*x)-0.5,0,10)
tri = np.concatenate([np.linspace(0,0.3,5000),np.linspace(0.3,0,5000)],axis =0)
sig = np.sin(6*x-1.2)
full = sin+tri+sig
峰值运行 #1
peaks = find_peaks(full)[0]
plt.plot(full)
plt.scatter(peaks,full[peaks], color='red', s=5)
plt.show()
peak 运行 #2 + 索引重新提取(这需要信号中的实际值)
peaks2 = find_peaks(full[peaks])[0]
index = peaks[peaks2]
plt.plot(full)
plt.scatter(index,full[index], color='red', s=5)
plt.show()
如果您知道经期,您可以这样做:
w=T
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], autocorr[signal.find_peaks(signal.convolve(autocorr, np.ones(w)/w, mode="same"))[0]], color="r")
结果:
我不知道它在其他情况下是否有效。
编辑: 另一种方法是在切片 window 中找到最大值,但在这种情况下,您必须根据经验定义 window 大小。
w=900
f = plt.figure()
f.set_figwidth(40)
f.set_figheight(10)
plt.plot(lags, autocorr)
plt.scatter(lags[filters.maximum_filter(autocorr, size=W)==autocorr], autocorr[filters.maximum_filter(autocorr, size=W)==autocorr], color="r")
结果:
因为你有某种双重(甚至三重)信号,我会尝试双重平滑。
一个去除整体趋势,一个去除尖锐的噪音。
一张图胜过长篇大论:
from scipy.signal import find_peaks
import pandas as pd
import numpy as np
def smooth(s, win):
return pd.Series(s).rolling(window=win, center=True).mean().ffill().bfill()
plt.plot(lags, autocorr, label='data')
WINDOW = 100 # needs to be determined empirically
# and so are the multipliers below
# double smoothing difference + clipping
ddiff = np.clip(smooth(autocorr, 2*WINDOW)-smooth(autocorr, 10*WINDOW), 0, np.inf)
plt.plot(lags, ddiff, label='smooth+clip')
peaks = find_peaks(ddiff, width=WINDOW)[0]
plt.plot(lags[peaks], autocorr[peaks], marker='o', ls='')
plt.plot(lags[peaks], ddiff[peaks], marker='o', ls='')
plt.legend()
输出:
平滑原始信号
在数据分析中,越早执行转换可能越好。您还可以在 运行 自相关之前清理原始信号。这是一个简单的例子(使用上面定义的 smooth
函数):
from scipy.signal import find_peaks
x2 = smooth(x_daily_weekly_long, 100)
autocorr2 = signal.correlate(x2, x2, mode = "same")
plt.plot(lags, autocorr2)
idx = find_peaks(autocorr2)[0]
plt.plot(lags[idx], autocorr2[idx], marker='o', ls='')
清除信号: