如何 运行 正确的贝叶斯逻辑回归

How to run a proper Bayesian Logistic Regression

我正在尝试 运行 对 sklearn 包提供的葡萄酒数据集进行贝叶斯逻辑回归。作为变量,我决定使用酒精、color_intensity、类黄酮、色调和镁,其中酒精是我的响应变量,其余的是预测变量。为此,我使用了 pyro 和 torch 包:

import pyro
import torch
import pyro.distributions as dist
import pyro.optim as optim
from pyro.infer import SVI, Trace_ELBO
import pandas as pd
import numpy as np
from pyro.infer import Predictive
import torch.distributions.constraints as constraints
from sklearn import datasets
pyro.set_rng_seed(0)

#loading data and prepearing dataframe
 wine = datasets.load_wine()
 data = pd.DataFrame(columns = wine['feature_names'], data=wine['data'] )

#choosiing variables: response and predictors
 variables = data[['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']]

#standardization
 variables = (variables-variables.min())/(variables.max()-variables.min())

#tensorizing
 alcohol = torch.tensor(variables['alcohol'].values, dtype=torch.float)
 predictors = torch.stack([torch.tensor(variables[column].values, dtype=torch.float)
               for column in ['alcohol', 'color_intensity', 'flavanoids', 'hue', 'magnesium']], 1)

#splitting data
 k = int(0.8 * len(variables))
 x_train, y_train = predictors[:k], alcohol[:k]
 x_test, y_test = predictors[k:], alcohol[k:]

#modelling

def model_alcohol(predictors, alcohol):
    n_observations, n_predictors = predictors.shape

 #weights
    w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)))
    epsilon = pyro.sample('epsilon', dist.Normal(0.,1.))

#non-linearity
    y_hat = torch.sigmoid((w*predictors).sum(dim=1) + epsilon)
    sigma = pyro.sample("sigma", dist.Uniform(0.,3.))
    with pyro.plate('alcohol', len(alcohol)):
       y=pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)

def guide_alcohol(predictors, alcohol=None):
    n_observations, n_predictors = predictors.shape

    w_loc = pyro.param('w_loc', torch.rand(n_predictors))
    w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
    w = pyro.sample('w', dist.Normal(w_loc, w_scale))

    epsilon_loc = pyro.param('b_loc', torch.rand(1))
    epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
    epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))

    sigma_loc = pyro.param('sigma_loc', torch.rand(n_predictors))
    sigma_scale = pyro.param('sigma_scale', torch.rand(n_predictors), 
                   constraint=constraints.positive)
    sigma = pyro.sample('sigma', dist.Normal(sigma_loc, sigma_scale))


alcohol_svi = SVI(model=model_alcohol, guide=guide_alcohol, optim=optim.ClippedAdam({'lr' : 0.0002}),
                loss=Trace_ELBO())

losses = []
for step in range(10000):
   loss = alcohol_svi.step(x_train, y_train)/len(x_train)
   losses.append(loss)

因为我必须使用随机变分推理,所以我已经定义了模型和指南。我现在的问题是匹配张量大小,因为我现在收到错误:

RuntimeError: The size of tensor a (142) must match the size of tensor b (5) at non-singleton 
dimension 0
Trace Shapes:      
Param Sites:      
Sample Sites:      
   w dist   5 |
    value   5 |
 epsilon dist |
    value   1 |
 sigma dist   |
    value   5 |
 alcohol dist |
    value 142 |

我对自己建模的想法有点陌生,所以很明显代码中存在错误(希望不是它背后的理论)。尽管如此,我还是应该调整指南上的尺寸?我不太确定如何说实话。

您的主要问题是 w 未声明为单个事件 (.to_event(1)),并且您的方差 (sigma) 应该与您的观察值 (()).下面的模型和指南解决了这个问题;我建议您查看 Pyro 中的 auto-generated 指南,以及 sigma.

中的不同先验
def model_alcohol(predictors, alcohol):
    n_observations, n_predictors = predictors.shape

    # weights
    # w is a single event
    w = pyro.sample('w', dist.Normal(torch.zeros(n_predictors), torch.ones(n_predictors)).to_event(1))
    epsilon = pyro.sample('epsilon', dist.Normal(0., 1.))

    # non-linearity
    y_hat = torch.sigmoid(predictors @ w + epsilon)  # (predictors * weight).sum(1) == predictors @ w
    sigma = pyro.sample("sigma", dist.Uniform(0., 3.))
    with pyro.plate('alcohol', len(alcohol)):
        pyro.sample('y', dist.Normal(y_hat, sigma), obs=alcohol)


def guide_alcohol(predictors, alcohol=None):
    n_observations, n_predictors = predictors.shape

    w_loc = pyro.param('w_loc', torch.rand(n_predictors))
    w_scale = pyro.param('w_scale', torch.rand(n_predictors), constraint=constraints.positive)
    pyro.sample('w', dist.Normal(w_loc, w_scale).to_event(1))

    epsilon_loc = pyro.param('b_loc', torch.rand(1))
    epsilon_scale = pyro.param('b_scale', torch.rand(1), constraint=constraints.positive)
    epsilon = pyro.sample('epsilon', dist.Normal(epsilon_loc, epsilon_scale))

    sigma_loc = pyro.param('sigma_loc', torch.rand(1))
    sigma_scale = pyro.param('sigma_scale', torch.rand(1),
                             constraint=constraints.positive)
    pyro.sample('sigma', dist.HalfNormal(sigma_loc, sigma_scale))  # MUST BE POSITIVE