平滑数据以确定 Python 中的峰值
Smoothing data for determining peak values in Python
我有一个带峰和谷的横断面,想确定两者的峰值。数据集有相当多的噪音,因此目前,峰值不会 return 作为单个值。
我尝试用滚动均值对数据进行平滑处理,尽管结果比不进行平滑处理要好,但仍然有多个 'peaks'。
[CSV file here]
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema
from pandas import read_csv
from numpy import mean
from matplotlib import pyplot
import csv
df = pd.read_csv('transect2.csv', delimiter=',', header=None, names=['x', 'y'])
plt.plot(df['x'], df['y'], label='Original Height')
rolling = df.rolling(window=100)
rolling_mean = rolling.mean()
plt.xlabel('Distance')
plt.ylabel('Height')
plt.plot(rolling_mean['x'], rolling_mean['y'], label='Mean Height 100')
plt.legend(loc='upper left')
plt.show()
n=1000
ilocs_min = argrelextrema(rolling_mean.y.values, np.less_equal, order=n)[0]
ilocs_max = argrelextrema(rolling_mean.y.values, np.greater_equal, order=n)[0]
df.y.plot (color='gray')
df.iloc[ilocs_max].y.plot(style='.', lw=10, color='red', marker="v");
df.iloc[ilocs_min].y.plot(style='.', lw=10, color='green', marker="^");
进一步平滑数据并不能代表现实,所以我可以改进这种平滑或使用不同的平滑函数。
我的第一直觉是使用 Savitzky-Golay filter for smoothing. The second is to forget the argrelextrema when you have a noisy dataset. I have never had any good results using it this way. Better alternative is find_peaks or find_peaks_cwt。
我锻炼了:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema
from scipy.signal import savgol_filter, find_peaks, find_peaks_cwt
from pandas import read_csv
import csv
df = pd.read_csv('transect2.csv', delimiter=',', header=None, names=['x', 'y'])
plt.plot(df['x'], df['y'], label='Original Height')
#apply a Savitzky-Golay filter
smooth = savgol_filter(df.y.values, window_length = 351, polyorder = 5)
#find the maximums
peaks_idx_max, _ = find_peaks(smooth, prominence = 0.01)
#reciprocal, so mins will become max
smooth_rec = 1/smooth
#find the mins now
peaks_idx_mins, _ = find_peaks(smooth_rec, prominence = 0.01)
plt.xlabel('Distance')
plt.ylabel('Height')
plt.plot(df['x'], smooth, label='smoothed')
#plot them
plt.scatter(df.x.values[peaks_idx_max], smooth[peaks_idx_max], s = 55,
c = 'green', label = 'max')
plt.scatter(df.x.values[peaks_idx_mins], smooth[peaks_idx_mins], s = 55,
c = 'black', label = 'min')
plt.legend(loc='upper left')
plt.show()
输出到this
我有一个带峰和谷的横断面,想确定两者的峰值。数据集有相当多的噪音,因此目前,峰值不会 return 作为单个值。 我尝试用滚动均值对数据进行平滑处理,尽管结果比不进行平滑处理要好,但仍然有多个 'peaks'。 [CSV file here]
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema
from pandas import read_csv
from numpy import mean
from matplotlib import pyplot
import csv
df = pd.read_csv('transect2.csv', delimiter=',', header=None, names=['x', 'y'])
plt.plot(df['x'], df['y'], label='Original Height')
rolling = df.rolling(window=100)
rolling_mean = rolling.mean()
plt.xlabel('Distance')
plt.ylabel('Height')
plt.plot(rolling_mean['x'], rolling_mean['y'], label='Mean Height 100')
plt.legend(loc='upper left')
plt.show()
n=1000
ilocs_min = argrelextrema(rolling_mean.y.values, np.less_equal, order=n)[0]
ilocs_max = argrelextrema(rolling_mean.y.values, np.greater_equal, order=n)[0]
df.y.plot (color='gray')
df.iloc[ilocs_max].y.plot(style='.', lw=10, color='red', marker="v");
df.iloc[ilocs_min].y.plot(style='.', lw=10, color='green', marker="^");
进一步平滑数据并不能代表现实,所以我可以改进这种平滑或使用不同的平滑函数。
我的第一直觉是使用 Savitzky-Golay filter for smoothing. The second is to forget the argrelextrema when you have a noisy dataset. I have never had any good results using it this way. Better alternative is find_peaks or find_peaks_cwt。
我锻炼了:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema
from scipy.signal import savgol_filter, find_peaks, find_peaks_cwt
from pandas import read_csv
import csv
df = pd.read_csv('transect2.csv', delimiter=',', header=None, names=['x', 'y'])
plt.plot(df['x'], df['y'], label='Original Height')
#apply a Savitzky-Golay filter
smooth = savgol_filter(df.y.values, window_length = 351, polyorder = 5)
#find the maximums
peaks_idx_max, _ = find_peaks(smooth, prominence = 0.01)
#reciprocal, so mins will become max
smooth_rec = 1/smooth
#find the mins now
peaks_idx_mins, _ = find_peaks(smooth_rec, prominence = 0.01)
plt.xlabel('Distance')
plt.ylabel('Height')
plt.plot(df['x'], smooth, label='smoothed')
#plot them
plt.scatter(df.x.values[peaks_idx_max], smooth[peaks_idx_max], s = 55,
c = 'green', label = 'max')
plt.scatter(df.x.values[peaks_idx_mins], smooth[peaks_idx_mins], s = 55,
c = 'black', label = 'min')
plt.legend(loc='upper left')
plt.show()
输出到this