循环回归模型返回单个结果,而不是多个输出
Looped Regression Model Returning Single Result, Instead of Multiple Outputs
我是运行一些自举重采样数据的岭回归模型,为了这个问题,假设有两个自举样本
在数据框列表中。但是,当我遍历数据帧列表时,我只得到一个输出,而不是对应的两个输出
到列表中的每个数据框。不确定我的代码中还缺少什么。
下面是示例数据集
import pandas as pd
import numpy as np
# the resampled datasets
d1 = {'v1': [2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8],
'v2': [3.5, 3.8, 2.5, 4.0, 4.0, 3.5, 3.8, 2.5, 4.0, 4.0, 3.5, 3.8, 2.5, 4.0, 4.0, 3.8, 3.89, 2.75, 4.5, 4.25],
'v3': [4.5, 3.8, 3.5, 4.2, 4.3, 1.5, 2.98, 3.5, 3.5, 4.5, 3.8, 3.89, 2.75, 4.5, 4.25, 3.55, 3.85, 2.98, 4.05, 4.50]}
df1 = pd.DataFrame(d1)
d2 = {'v1': [2.6, 4.0, 3.3, 4.0, 3.0, 2.5, 4.5, 3.3, 4.0, 3.8, 4.5, 3.8, 3.5, 4.2, 4.3, 4.25, 3.55, 3.85, 2.98, 4.05],
'v2': [3.8, 3.89, 2.75, 4.5, 4.25, 3.55, 3.85, 2.98, 4.05, 4.50, 3.5, 2.98, 3.5, 3.25, 4.25, 4.0, 4.0, 3.5, 3.8, 2.5],
'v3': [4.0, 3.85, 3.75, 4.0, 4.73, 3.5, 2.98, 3.5, 3.25, 4.25, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5]}
df2 = pd.DataFrame(d2)
dflst = [df1, df2]
以及我在上面 运行 的代码。
from sklearn.linear_model import Ridge
# function to run ridge regression
def ridgereg(data, ynum=1):
y = np.asarray(data.iloc[:, 0:ynum])
X = np.asarray(data.iloc[:, ynum:])
model = Ridge(alpha=1.0).fit(X,y)
return model.coef_
# iterate over list of dfs
for x in range(1, len(dflst)):
resampled_model = {}
resampled_model[x] = ridgereg(dflst[x], ynum=1)
print(resampled_model)
在 for 循环中,您将在每次迭代时创建一个新的字典,并丢弃之前创建的字典。
尝试(使用 enumerate
):
resampled_model = {} # note that it is outside the loop
for i, df in enumerate(dflst, start=1):
resampled_model[i] = ridgereg(df, ynum=1)
print(resampled_model)
# {1: array([[0.35603345, 0.1373456 ]]), 2: array([[ 0.08019198, -0.10895105]])}
代替for
循环,您可以使用字典理解:
resampled_model = {i: ridgereg(df, ynum=1) for i, df in enumerate(dflst, start=1)}
我是运行一些自举重采样数据的岭回归模型,为了这个问题,假设有两个自举样本 在数据框列表中。但是,当我遍历数据帧列表时,我只得到一个输出,而不是对应的两个输出 到列表中的每个数据框。不确定我的代码中还缺少什么。
下面是示例数据集
import pandas as pd
import numpy as np
# the resampled datasets
d1 = {'v1': [2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8],
'v2': [3.5, 3.8, 2.5, 4.0, 4.0, 3.5, 3.8, 2.5, 4.0, 4.0, 3.5, 3.8, 2.5, 4.0, 4.0, 3.8, 3.89, 2.75, 4.5, 4.25],
'v3': [4.5, 3.8, 3.5, 4.2, 4.3, 1.5, 2.98, 3.5, 3.5, 4.5, 3.8, 3.89, 2.75, 4.5, 4.25, 3.55, 3.85, 2.98, 4.05, 4.50]}
df1 = pd.DataFrame(d1)
d2 = {'v1': [2.6, 4.0, 3.3, 4.0, 3.0, 2.5, 4.5, 3.3, 4.0, 3.8, 4.5, 3.8, 3.5, 4.2, 4.3, 4.25, 3.55, 3.85, 2.98, 4.05],
'v2': [3.8, 3.89, 2.75, 4.5, 4.25, 3.55, 3.85, 2.98, 4.05, 4.50, 3.5, 2.98, 3.5, 3.25, 4.25, 4.0, 4.0, 3.5, 3.8, 2.5],
'v3': [4.0, 3.85, 3.75, 4.0, 4.73, 3.5, 2.98, 3.5, 3.25, 4.25, 3.3, 4.0, 3.8, 2.5, 4.5, 3.3, 4.0, 3.8, 2.5, 4.5]}
df2 = pd.DataFrame(d2)
dflst = [df1, df2]
以及我在上面 运行 的代码。
from sklearn.linear_model import Ridge
# function to run ridge regression
def ridgereg(data, ynum=1):
y = np.asarray(data.iloc[:, 0:ynum])
X = np.asarray(data.iloc[:, ynum:])
model = Ridge(alpha=1.0).fit(X,y)
return model.coef_
# iterate over list of dfs
for x in range(1, len(dflst)):
resampled_model = {}
resampled_model[x] = ridgereg(dflst[x], ynum=1)
print(resampled_model)
在 for 循环中,您将在每次迭代时创建一个新的字典,并丢弃之前创建的字典。
尝试(使用 enumerate
):
resampled_model = {} # note that it is outside the loop
for i, df in enumerate(dflst, start=1):
resampled_model[i] = ridgereg(df, ynum=1)
print(resampled_model)
# {1: array([[0.35603345, 0.1373456 ]]), 2: array([[ 0.08019198, -0.10895105]])}
代替for
循环,您可以使用字典理解:
resampled_model = {i: ridgereg(df, ynum=1) for i, df in enumerate(dflst, start=1)}