如何 运行 Python 中 Dataframe 的每一行的函数
How to run a function to each row of Dataframe in Python
我想在我的数据集 df 的每一行上循环一个函数。 df 是 920 x 10080
。该函数使用快速傅里叶变换的概念从波(由每行数据形成)中提取前 5 个频率分量。函数代码:
def get_fft_values(y_values, T, N, f_s):
f_values = np.linspace(0.0, 1.0/(2.0*T), N//2)
fft_values_ = fft(y_values)
fft_values = 2.0/N * np.abs(fft_values_[0:N//2])
return f_values, fft_values #f_values - frequency(length = 5040) ; fft_values - amplitude (length = 5040)
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(df, T, N, f_s)
print(f_values[0:5], fft_values[0:5]
这是将一行数据作为Dataframe(我切片df = df.iloc[1,:]
)进行傅立叶变换的函数。我不确定如何 运行 每行一个函数。我试过了
for row in df.iterrows():
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(pd.DataFrame(row), T, N, f_s)
plt.plot(f_values, fft_values, 'x', color='blue', markersize = '3')
plt.xlabel('Frequency [Hz]', fontsize=16)
plt.ylabel('Amplitude', fontsize=16)
plt.title("Frequency domain of the data", fontsize=16)
plt.show()
print(f_values[0:5], fft_values[0:5], '\n')
返回错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-158-866f17cf6143> in <module>()
51 f_s = 1/T
52
---> 53 f_values, fft_values = get_fft_values(pd.DataFrame(row), T, N, f_s)
54
55 plt.plot(f_values, fft_values, 'x', color='blue', markersize = '3')
4 frames
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
ValueError: setting an array element with a sequence.
并尝试过:
results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for x in df.values.tolist()])
results = df.apply(lambda x: get_fft_values(*x))
返回错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-154-241b17a458df> in <module>()
----> 1 results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for [x] in df.values.tolist()])
2 results = df.apply(lambda x: get_fft_values(*x))
3 results
<ipython-input-154-241b17a458df> in <listcomp>(.0)
----> 1 results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for [x] in df.values.tolist()])
2 results = df.apply(lambda x: get_fft_values(*x))
3 results
ValueError: too many values to unpack (expected 1)
并尝试了
df = df.iloc[:,0:10080]
def get_fft_values(y_values, T, N, f_s):
f_values = np.linspace(0.0, 1.0/(2.0*T), N//2)
fft_values_ = fft(y_values)
fft_values = 2.0/N * np.abs(fft_values_[0:N//2])
return f_values, fft_values #f_values - frequency(length = 5040) ; fft_values - amplitude (length = 5040)
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(df, T, N, f_s)
df.apply(get_fft_values, axis=1)
返回
--------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-178-75f82bbfdaca> in <module>()
16
17
---> 18 df.apply(get_fft_values, axis=1)
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
TypeError: ("get_fft_values() missing 3 required positional arguments: 'T', 'N', and 'f_s'", 'occurred at index 0')
这似乎是一个简单的问题,但我不确定如何解决它。在这种情况下如何编写正确的 for 循环?非常感谢
关于如何在数据框中的每一行应用一个函数的问题,我想举一个简单的例子,以便您可以相应地更改您的代码。
df = pd.DataFrame(data) ## creating a dataframe
def select_age(row): ## a function which selects and returns only the names which have age greater than 18. Here the argument is row
## i simply named it as row. So that its easy to understand that each row is being passed.
boole = 'No'
age = row['Age'] ## from each row we are extracting the values of age
if age >18:
boole = 'Yes'
return boole ## if age is greater, then we are passing yes for that row
df['Age>18'] = df.apply(lambda x: select_age(x), axis =1) # here we are applying apply function which performs the required action on each row
"""For each row we are passing yes or no and storing it in a new column"""
输出
我想在我的数据集 df 的每一行上循环一个函数。 df 是 920 x 10080
。该函数使用快速傅里叶变换的概念从波(由每行数据形成)中提取前 5 个频率分量。函数代码:
def get_fft_values(y_values, T, N, f_s):
f_values = np.linspace(0.0, 1.0/(2.0*T), N//2)
fft_values_ = fft(y_values)
fft_values = 2.0/N * np.abs(fft_values_[0:N//2])
return f_values, fft_values #f_values - frequency(length = 5040) ; fft_values - amplitude (length = 5040)
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(df, T, N, f_s)
print(f_values[0:5], fft_values[0:5]
这是将一行数据作为Dataframe(我切片df = df.iloc[1,:]
)进行傅立叶变换的函数。我不确定如何 运行 每行一个函数。我试过了
for row in df.iterrows():
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(pd.DataFrame(row), T, N, f_s)
plt.plot(f_values, fft_values, 'x', color='blue', markersize = '3')
plt.xlabel('Frequency [Hz]', fontsize=16)
plt.ylabel('Amplitude', fontsize=16)
plt.title("Frequency domain of the data", fontsize=16)
plt.show()
print(f_values[0:5], fft_values[0:5], '\n')
返回错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-158-866f17cf6143> in <module>()
51 f_s = 1/T
52
---> 53 f_values, fft_values = get_fft_values(pd.DataFrame(row), T, N, f_s)
54
55 plt.plot(f_values, fft_values, 'x', color='blue', markersize = '3')
4 frames
/usr/local/lib/python3.6/dist-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
83
84 """
---> 85 return array(a, dtype, copy=False, order=order)
86
87
ValueError: setting an array element with a sequence.
并尝试过:
results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for x in df.values.tolist()])
results = df.apply(lambda x: get_fft_values(*x))
返回错误
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-154-241b17a458df> in <module>()
----> 1 results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for [x] in df.values.tolist()])
2 results = df.apply(lambda x: get_fft_values(*x))
3 results
<ipython-input-154-241b17a458df> in <listcomp>(.0)
----> 1 results = pd.DataFrame([get_fft_values(*x,T, N, f_s) for [x] in df.values.tolist()])
2 results = df.apply(lambda x: get_fft_values(*x))
3 results
ValueError: too many values to unpack (expected 1)
并尝试了
df = df.iloc[:,0:10080]
def get_fft_values(y_values, T, N, f_s):
f_values = np.linspace(0.0, 1.0/(2.0*T), N//2)
fft_values_ = fft(y_values)
fft_values = 2.0/N * np.abs(fft_values_[0:N//2])
return f_values, fft_values #f_values - frequency(length = 5040) ; fft_values - amplitude (length = 5040)
t_n = 1
N = 10080
T = t_n / N
f_s = 1/T
f_values, fft_values = get_fft_values(df, T, N, f_s)
df.apply(get_fft_values, axis=1)
返回
--------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-178-75f82bbfdaca> in <module>()
16
17
---> 18 df.apply(get_fft_values, axis=1)
3 frames
/usr/local/lib/python3.6/dist-packages/pandas/core/apply.py in apply_series_generator(self)
319 try:
320 for i, v in enumerate(series_gen):
--> 321 results[i] = self.f(v)
322 keys.append(v.name)
323 except Exception as e:
TypeError: ("get_fft_values() missing 3 required positional arguments: 'T', 'N', and 'f_s'", 'occurred at index 0')
这似乎是一个简单的问题,但我不确定如何解决它。在这种情况下如何编写正确的 for 循环?非常感谢
关于如何在数据框中的每一行应用一个函数的问题,我想举一个简单的例子,以便您可以相应地更改您的代码。
df = pd.DataFrame(data) ## creating a dataframe
def select_age(row): ## a function which selects and returns only the names which have age greater than 18. Here the argument is row
## i simply named it as row. So that its easy to understand that each row is being passed.
boole = 'No'
age = row['Age'] ## from each row we are extracting the values of age
if age >18:
boole = 'Yes'
return boole ## if age is greater, then we are passing yes for that row
df['Age>18'] = df.apply(lambda x: select_age(x), axis =1) # here we are applying apply function which performs the required action on each row
"""For each row we are passing yes or no and storing it in a new column"""
输出