如何 运行 正确的贝叶斯逻辑回归
How to run a proper Bayesian Logistic Regression
我正在尝试 运行 对 sklearn 包提供的葡萄酒数据集进行贝叶斯逻辑回归。作为变量,我决定使用酒精、color_intensity、类黄酮、色调和镁,其中酒精是我的响应变量,其余的是预测变量。为此,我使用了 pyro 和 torch 包:
import pyro
import torch
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
import pandas as pd
import numpy as np
from pyro.infer import Predictive
import torch.distributions.constraints as constraints
from sklearn import datasets
pyro.set_rng_seed(0)
#loading data and prepearing dataframe
wine = datasets.load_wine()
data = pd.DataFrame(columns = wine['feature_names'], data=wine['data'] )
#choosiing variables: response and predictors
variables = data[['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']]
#standardization
variables = (variables-variables.min())/(variables.max()-variables.min())
#tensorizing
alcohol = torch.tensor(variables['alcohol'].values, dtype=torch.float)
predictors = torch.stack([torch.tensor(variables[column].values, dtype=torch.float)
for column in ['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']], 1)
#splitting data
k = int(0.8 * len(variables))
x_train, y_train = predictors[:k], alcohol[:k]
x_test, y_test = predictors[k:], alcohol[k:]
#modelling
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
#weights
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)))
epsilon = pyro.sample('epsilon', dist.Normal(0.,1.))
#non-linearity
y_hat = torch.sigmoid((w*predictors).sum(dim=1) + epsilon)
sigma = pyro.sample("sigma", dist.Uniform(0.,3.))
with pyro.plate('alcohol', len(alcohol)):
y=pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
w = pyro.sample('w', dist.Normal(w_loc, w_scale))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(n_predictors))
sigma_scale = pyro.param('sigma_scale', torch.rand(n_predictors),
constraint=constraints.positive)
sigma = pyro.sample('sigma', dist.Normal(sigma_loc, sigma_scale))
alcohol_svi = SVI(model=model_alcohol, guide=guide_alcohol, optim=optim.ClippedAdam({'lr' : 0.0002}),
loss=Trace_ELBO())
losses = []
for step in range(10000):
loss = alcohol_svi.step(x_train, y_train)/len(x_train)
losses.append(loss)
因为我必须使用随机变分推理,所以我已经定义了模型和指南。我现在的问题是匹配张量大小,因为我现在收到错误:
RuntimeError: The size of tensor a (142) must match the size of tensor b (5) at non-singleton
dimension 0
Trace Shapes:
Param Sites:
Sample Sites:
w dist 5 |
value 5 |
epsilon dist |
value 1 |
sigma dist |
value 5 |
alcohol dist |
value 142 |
我对自己建模的想法有点陌生,所以很明显代码中存在错误(希望不是它背后的理论)。尽管如此,我还是应该调整指南上的尺寸?我不太确定如何说实话。
您的主要问题是 w
未声明为单个事件 (.to_event(1)
),并且您的方差 (sigma
) 应该与您的观察值 (()
).下面的模型和指南解决了这个问题;我建议您查看 Pyro 中的 auto-generated 指南,以及 sigma
.
中的不同先验
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
# weights
# w is a single event
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)).to_event(1))
epsilon = pyro.sample('epsilon', dist.Normal(0., 1.))
# non-linearity
y_hat = torch.sigmoid(predictors @ w + epsilon) # (predictors * weight).sum(1) == predictors @ w
sigma = pyro.sample("sigma", dist.Uniform(0., 3.))
with pyro.plate('alcohol', len(alcohol)):
pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
pyro.sample('w', dist.Normal(w_loc, w_scale).to_event(1))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(1))
sigma_scale = pyro.param('sigma_scale', torch.rand(1),
constraint=constraints.positive)
pyro.sample('sigma', dist.HalfNormal(sigma_loc, sigma_scale)) # MUST BE POSITIVE
我正在尝试 运行 对 sklearn 包提供的葡萄酒数据集进行贝叶斯逻辑回归。作为变量,我决定使用酒精、color_intensity、类黄酮、色调和镁,其中酒精是我的响应变量,其余的是预测变量。为此,我使用了 pyro 和 torch 包:
import pyro
import torch
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
import pandas as pd
import numpy as np
from pyro.infer import Predictive
import torch.distributions.constraints as constraints
from sklearn import datasets
pyro.set_rng_seed(0)
#loading data and prepearing dataframe
wine = datasets.load_wine()
data = pd.DataFrame(columns = wine['feature_names'], data=wine['data'] )
#choosiing variables: response and predictors
variables = data[['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']]
#standardization
variables = (variables-variables.min())/(variables.max()-variables.min())
#tensorizing
alcohol = torch.tensor(variables['alcohol'].values, dtype=torch.float)
predictors = torch.stack([torch.tensor(variables[column].values, dtype=torch.float)
for column in ['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']], 1)
#splitting data
k = int(0.8 * len(variables))
x_train, y_train = predictors[:k], alcohol[:k]
x_test, y_test = predictors[k:], alcohol[k:]
#modelling
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
#weights
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)))
epsilon = pyro.sample('epsilon', dist.Normal(0.,1.))
#non-linearity
y_hat = torch.sigmoid((w*predictors).sum(dim=1) + epsilon)
sigma = pyro.sample("sigma", dist.Uniform(0.,3.))
with pyro.plate('alcohol', len(alcohol)):
y=pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
w = pyro.sample('w', dist.Normal(w_loc, w_scale))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(n_predictors))
sigma_scale = pyro.param('sigma_scale', torch.rand(n_predictors),
constraint=constraints.positive)
sigma = pyro.sample('sigma', dist.Normal(sigma_loc, sigma_scale))
alcohol_svi = SVI(model=model_alcohol, guide=guide_alcohol, optim=optim.ClippedAdam({'lr' : 0.0002}),
loss=Trace_ELBO())
losses = []
for step in range(10000):
loss = alcohol_svi.step(x_train, y_train)/len(x_train)
losses.append(loss)
因为我必须使用随机变分推理,所以我已经定义了模型和指南。我现在的问题是匹配张量大小,因为我现在收到错误:
RuntimeError: The size of tensor a (142) must match the size of tensor b (5) at non-singleton
dimension 0
Trace Shapes:
Param Sites:
Sample Sites:
w dist 5 |
value 5 |
epsilon dist |
value 1 |
sigma dist |
value 5 |
alcohol dist |
value 142 |
我对自己建模的想法有点陌生,所以很明显代码中存在错误(希望不是它背后的理论)。尽管如此,我还是应该调整指南上的尺寸?我不太确定如何说实话。
您的主要问题是 w
未声明为单个事件 (.to_event(1)
),并且您的方差 (sigma
) 应该与您的观察值 (()
).下面的模型和指南解决了这个问题;我建议您查看 Pyro 中的 auto-generated 指南,以及 sigma
.
def model_alcohol(predictors, alcohol):
n_observations, n_predictors = predictors.shape
# weights
# w is a single event
w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)).to_event(1))
epsilon = pyro.sample('epsilon', dist.Normal(0., 1.))
# non-linearity
y_hat = torch.sigmoid(predictors @ w + epsilon) # (predictors * weight).sum(1) == predictors @ w
sigma = pyro.sample("sigma", dist.Uniform(0., 3.))
with pyro.plate('alcohol', len(alcohol)):
pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)
def guide_alcohol(predictors, alcohol=None):
n_observations, n_predictors = predictors.shape
w_loc = pyro.param('w_loc', torch.rand(n_predictors))
w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
pyro.sample('w', dist.Normal(w_loc, w_scale).to_event(1))
epsilon_loc = pyro.param('b_loc', torch.rand(1))
epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))
sigma_loc = pyro.param('sigma_loc', torch.rand(1))
sigma_scale = pyro.param('sigma_scale', torch.rand(1),
constraint=constraints.positive)
pyro.sample('sigma', dist.HalfNormal(sigma_loc, sigma_scale)) # MUST BE POSITIVE