在 python 中查找逻辑回归系数
Finding coefficients for logistic regression in python
我正在处理分类问题,需要逻辑回归方程的系数。我可以在 R 中找到系数,但我需要在 python 中提交项目。我在 python 中找不到学习逻辑回归系数的代码。如何获取python?
中的系数值
路飞,请记得随时分享您的代码和您的尝试,这样我们才能知道您的尝试并帮助您。不管怎样,我想你正在寻找这个:
import numpy as np
from sklearn.linear_model import LogisticRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) #Your x values, for a 2 variable model.
#y = 1 * x_0 + 2 * x_1 + 3 #This is the "true" model
y = np.dot(X, np.array([1, 2])) + 3 #Generating the true y-values
reg = LogisticRegression().fit(X, y) #Fitting the model given your X and y values.
reg.coef_ #Prints an array of all regressor values (b1 and b2, or as many bs as your model has)
reg.intercept_ #Prints value for intercept/b0
reg.predict(np.array([[3, 5]])) #Predicts an array of y-values with the fitted model given the inputs
看看statsmodels library's Logit model。
你会像这样使用它:
from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools import add_constant
x = [...] # Obesrvations
y = [...] # Response variable
x = add_constant(x)
print(Logit(y, x).fit().summary())
sklearn.linear_model.LogisticRegression 适合你。
看这个例子:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0).fit(X, y)
print(clf.coef_, clf.intercept_)
statsmodels 库将为您提供系数结果的细分,以及相关的 p 值以确定它们的重要性。
以 x1 和 y1 变量为例:
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, random_state=0)
logreg = LogisticRegression().fit(x1_train,y1_train)
logreg
print("Training set score: {:.3f}".format(logreg.score(x1_train,y1_train)))
print("Test set score: {:.3f}".format(logreg.score(x1_test,y1_test)))
import statsmodels.api as sm
logit_model=sm.Logit(y1,x1)
result=logit_model.fit()
print(result.summary())
示例结果:
Optimization terminated successfully.
Current function value: 0.596755
Iterations 7
Logit Regression Results
==============================================================================
Dep. Variable: IsCanceled No. Observations: 20000
Model: Logit Df Residuals: 19996
Method: MLE Df Model: 3
Date: Sat, 17 Aug 2019 Pseudo R-squ.: 0.1391
Time: 23:58:55 Log-Likelihood: -11935.
converged: True LL-Null: -13863.
LLR p-value: 0.000
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const -2.1417 0.050 -43.216 0.000 -2.239 -2.045
x1 0.0055 0.000 32.013 0.000 0.005 0.006
x2 0.0236 0.001 36.465 0.000 0.022 0.025
x3 2.1137 0.104 20.400 0.000 1.911 2.317
==============================================================================
假设你的X
是一个Pandas DataFrame,clf
是你的逻辑回归模型,你可以通过这行代码获取特征的名称及其值:
pd.DataFrame(zip(X_train.columns, np.transpose(clf.coef_)), columns=['features', 'coef'])
稍微更正最后一个答案:
pd.DataFrame(zip(X_train.columns, np.transpose(clf.coef_.tolist()[0])), columns=['features', 'coef'])
更多细节以及如何替换 pytorch 模型的最后一层:
#%%
"""
Get the weights & biases to set them to a nn.Linear layer in pytorch
"""
import numpy as np
import torch
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from torch import nn
X, y = load_iris(return_X_y=True)
print(f'{X.shape=}')
print(f'{y.shape=}')
Din: int = X.shape[1]
total_data_set_size: int = X.shape[0]
assert y.shape[0] == total_data_set_size
clf = LogisticRegression(random_state=0).fit(X, y)
out = clf.predict(X[:2, :])
# print(f'{out=}')
out = clf.predict_proba(X[:2, :])
print(f'{out=}')
clf.score(X, y)
# - coef_ndarray of shape (1, n_features) or (n_classes, n_features)
print(f'{clf.coef_.shape=}')
print(f'{clf.intercept_.shape=}')
assert (clf.coef_.shape[1] == Din)
Dout: int = clf.coef_.shape[0]
print(f'{Dout=} which is the number of classes too in classification')
assert (Dout == clf.intercept_.shape[0])
print()
num_classes: int = Dout
mdl = nn.Linear(in_features=Din, out_features=num_classes)
mdl.weight = torch.nn.Parameter(torch.from_numpy(clf.coef_))
mdl.bias = torch.nn.Parameter(torch.from_numpy(clf.intercept_))
out2 = torch.softmax(mdl(torch.from_numpy(X[:2, :])), dim=1)
print(f'{out2=}')
assert np.isclose(out2.detach().cpu().numpy(), out).all()
# -
# module: nn.Module = getattr(base_model, layer_to_replace)
# num_classes: int = clf.coef_[0] # out_features=Dout
# num_features: int = clf.coef_[1] # in_features
# assert module.weight.Size() == torch.Size([num_features, num_classes])
# assert module.bias.Size() == torch.Size([num_classes])
# module.weight = torch.nn.Parameter(torch.from_numpy(clf.coef_))
# module.bias = torch.nn.Parameter(torch.from_numpy(clf.intercept_))
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='liblinear', random_state=10)
mdle = model.fit(X_train, Y_train)
print(mdle.classes_)
print("model intercept :::" + str(format(model.intercept_[0], '.5f')))
print("model coeffieient :::" + str(format(model.coef_[0][0], '.5f')))
如果您想将系数名称映射到它们的值,您可以使用
def logreg_to_dict(clf: LogisticRegression, feature_names: list[str]) -> dict[str, float]:
coefs = np.concatenate([clf.intercept_, clf.coef_.squeeze()])
return dict(zip(["intercept"] + feature_names, coefs))
feature_names
是训练模型的特征列表。
我正在处理分类问题,需要逻辑回归方程的系数。我可以在 R 中找到系数,但我需要在 python 中提交项目。我在 python 中找不到学习逻辑回归系数的代码。如何获取python?
中的系数值路飞,请记得随时分享您的代码和您的尝试,这样我们才能知道您的尝试并帮助您。不管怎样,我想你正在寻找这个:
import numpy as np
from sklearn.linear_model import LogisticRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) #Your x values, for a 2 variable model.
#y = 1 * x_0 + 2 * x_1 + 3 #This is the "true" model
y = np.dot(X, np.array([1, 2])) + 3 #Generating the true y-values
reg = LogisticRegression().fit(X, y) #Fitting the model given your X and y values.
reg.coef_ #Prints an array of all regressor values (b1 and b2, or as many bs as your model has)
reg.intercept_ #Prints value for intercept/b0
reg.predict(np.array([[3, 5]])) #Predicts an array of y-values with the fitted model given the inputs
看看statsmodels library's Logit model。
你会像这样使用它:
from statsmodels.discrete.discrete_model import Logit
from statsmodels.tools import add_constant
x = [...] # Obesrvations
y = [...] # Response variable
x = add_constant(x)
print(Logit(y, x).fit().summary())
sklearn.linear_model.LogisticRegression 适合你。 看这个例子:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0).fit(X, y)
print(clf.coef_, clf.intercept_)
statsmodels 库将为您提供系数结果的细分,以及相关的 p 值以确定它们的重要性。
以 x1 和 y1 变量为例:
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, random_state=0)
logreg = LogisticRegression().fit(x1_train,y1_train)
logreg
print("Training set score: {:.3f}".format(logreg.score(x1_train,y1_train)))
print("Test set score: {:.3f}".format(logreg.score(x1_test,y1_test)))
import statsmodels.api as sm
logit_model=sm.Logit(y1,x1)
result=logit_model.fit()
print(result.summary())
示例结果:
Optimization terminated successfully.
Current function value: 0.596755
Iterations 7
Logit Regression Results
==============================================================================
Dep. Variable: IsCanceled No. Observations: 20000
Model: Logit Df Residuals: 19996
Method: MLE Df Model: 3
Date: Sat, 17 Aug 2019 Pseudo R-squ.: 0.1391
Time: 23:58:55 Log-Likelihood: -11935.
converged: True LL-Null: -13863.
LLR p-value: 0.000
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const -2.1417 0.050 -43.216 0.000 -2.239 -2.045
x1 0.0055 0.000 32.013 0.000 0.005 0.006
x2 0.0236 0.001 36.465 0.000 0.022 0.025
x3 2.1137 0.104 20.400 0.000 1.911 2.317
==============================================================================
假设你的X
是一个Pandas DataFrame,clf
是你的逻辑回归模型,你可以通过这行代码获取特征的名称及其值:
pd.DataFrame(zip(X_train.columns, np.transpose(clf.coef_)), columns=['features', 'coef'])
稍微更正最后一个答案:
pd.DataFrame(zip(X_train.columns, np.transpose(clf.coef_.tolist()[0])), columns=['features', 'coef'])
更多细节以及如何替换 pytorch 模型的最后一层:
#%%
"""
Get the weights & biases to set them to a nn.Linear layer in pytorch
"""
import numpy as np
import torch
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from torch import nn
X, y = load_iris(return_X_y=True)
print(f'{X.shape=}')
print(f'{y.shape=}')
Din: int = X.shape[1]
total_data_set_size: int = X.shape[0]
assert y.shape[0] == total_data_set_size
clf = LogisticRegression(random_state=0).fit(X, y)
out = clf.predict(X[:2, :])
# print(f'{out=}')
out = clf.predict_proba(X[:2, :])
print(f'{out=}')
clf.score(X, y)
# - coef_ndarray of shape (1, n_features) or (n_classes, n_features)
print(f'{clf.coef_.shape=}')
print(f'{clf.intercept_.shape=}')
assert (clf.coef_.shape[1] == Din)
Dout: int = clf.coef_.shape[0]
print(f'{Dout=} which is the number of classes too in classification')
assert (Dout == clf.intercept_.shape[0])
print()
num_classes: int = Dout
mdl = nn.Linear(in_features=Din, out_features=num_classes)
mdl.weight = torch.nn.Parameter(torch.from_numpy(clf.coef_))
mdl.bias = torch.nn.Parameter(torch.from_numpy(clf.intercept_))
out2 = torch.softmax(mdl(torch.from_numpy(X[:2, :])), dim=1)
print(f'{out2=}')
assert np.isclose(out2.detach().cpu().numpy(), out).all()
# -
# module: nn.Module = getattr(base_model, layer_to_replace)
# num_classes: int = clf.coef_[0] # out_features=Dout
# num_features: int = clf.coef_[1] # in_features
# assert module.weight.Size() == torch.Size([num_features, num_classes])
# assert module.bias.Size() == torch.Size([num_classes])
# module.weight = torch.nn.Parameter(torch.from_numpy(clf.coef_))
# module.bias = torch.nn.Parameter(torch.from_numpy(clf.intercept_))
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(solver='liblinear', random_state=10)
mdle = model.fit(X_train, Y_train)
print(mdle.classes_)
print("model intercept :::" + str(format(model.intercept_[0], '.5f')))
print("model coeffieient :::" + str(format(model.coef_[0][0], '.5f')))
如果您想将系数名称映射到它们的值,您可以使用
def logreg_to_dict(clf: LogisticRegression, feature_names: list[str]) -> dict[str, float]:
coefs = np.concatenate([clf.intercept_, clf.coef_.squeeze()])
return dict(zip(["intercept"] + feature_names, coefs))
feature_names
是训练模型的特征列表。