隐藏层的神经元数量
Number of neurons for hidden layers
我正在尝试执行我在 Yarin Gal 的论文“深度学习的不确定性”中找到的贝叶斯神经网络。我在 GitHub:
上找到了这段代码
import math
from scipy.misc import logsumexp
import numpy as np
from keras.regularizers import l2
from keras import Input
from keras.layers import Dropout
from keras.layers import Dense
from keras import Model
import time
class net:
def __init__(self, X_train, y_train, n_hidden, n_epochs = 40,
normalize = False, tau = 1.0, dropout = 0.05):
"""
Constructor for the class implementing a Bayesian neural network
trained with the probabilistic back propagation method.
@param X_train Matrix with the features for the training data.
@param y_train Vector with the target variables for the
training data.
@param n_hidden Vector with the number of neurons for each
hidden layer.
@param n_epochs Number of epochs for which to train the
network. The recommended value 40 should be
enough.
@param normalize Whether to normalize the input features. This
is recommended unless the input vector is for
example formed by binary features (a
fingerprint). In that case we do not recommend
to normalize the features.
@param tau Tau value used for regularization
@param dropout Dropout rate for all the dropout layers in the
network.
"""
# We normalize the training data to have zero mean and unit standard
# deviation in the training set if necessary
if normalize:
self.std_X_train = np.std(X_train, 0)
self.std_X_train[ self.std_X_train == 0 ] = 1
self.mean_X_train = np.mean(X_train, 0)
else:
self.std_X_train = np.ones(X_train.shape[ 1 ])
self.mean_X_train = np.zeros(X_train.shape[ 1 ])
X_train = (X_train - np.full(X_train.shape, self.mean_X_train)) / \
np.full(X_train.shape, self.std_X_train)
self.mean_y_train = np.mean(y_train)
self.std_y_train = np.std(y_train)
y_train_normalized = (y_train - self.mean_y_train) / self.std_y_train
y_train_normalized = np.array(y_train_normalized, ndmin = 2).T
# We construct the network
N = X_train.shape[0]
batch_size = 128
lengthscale = 1e-2
reg = lengthscale**2 * (1 - dropout) / (2. * N * tau)
inputs = Input(shape=(X_train.shape[1],))
inter = Dropout(dropout)(inputs, training=True)
inter = Dense(n_hidden[0], activation='relu', W_regularizer=l2(reg))(inter)
for i in range(len(n_hidden) - 1):
inter = Dropout(dropout)(inter, training=True)
inter = Dense(n_hidden[i+1], activation='relu', W_regularizer=l2(reg))(inter)
inter = Dropout(dropout)(inter, training=True)
outputs = Dense(y_train_normalized.shape[1], W_regularizer=l2(reg))(inter)
model = Model(inputs, outputs)
model.compile(loss='mean_squared_error', optimizer='adam')
# We iterate the learning process
start_time = time.time()
model.fit(X_train, y_train_normalized, batch_size=batch_size, nb_epoch=n_epochs, verbose=0)
self.model = model
self.tau = tau
self.running_time = time.time() - start_time
# We are done!
def predict(self, X_test, y_test):
"""
Function for making predictions with the Bayesian neural network.
@param X_test The matrix of features for the test data
@return m The predictive mean for the test target variables.
@return v The predictive variance for the test target
variables.
@return v_noise The estimated variance for the additive noise.
"""
X_test = np.array(X_test, ndmin = 2)
y_test = np.array(y_test, ndmin = 2).T
# We normalize the test set
X_test = (X_test - np.full(X_test.shape, self.mean_X_train)) / \
np.full(X_test.shape, self.std_X_train)
# We compute the predictive mean and variance for the target variables
# of the test data
model = self.model
standard_pred = model.predict(X_test, batch_size=500, verbose=1)
standard_pred = standard_pred * self.std_y_train + self.mean_y_train
rmse_standard_pred = np.mean((y_test.squeeze() - standard_pred.squeeze())**2.)**0.5
T = 10000
Yt_hat = np.array([model.predict(X_test, batch_size=500, verbose=0) for _ in range(T)])
Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train
MC_pred = np.mean(Yt_hat, 0)
rmse = np.mean((y_test.squeeze() - MC_pred.squeeze())**2.)**0.5
# We compute the test log-likelihood
ll = (logsumexp(-0.5 * self.tau * (y_test[None] - Yt_hat)**2., 0) - np.log(T)
- 0.5*np.log(2*np.pi) + 0.5*np.log(self.tau))
test_ll = np.mean(ll)
# We are done!
return rmse_standard_pred, rmse, test_ll
我是编程新手,所以我必须在 Python 上学习 类 以理解代码。但是当我尝试执行代码时,我的回答是,它询问一个“包含每个隐藏层的神经元数量的向量”,我不知道如何创建这个向量,也不知道它对代码意味着什么。我尝试创建不同的向量,比如
vector = np.array([1, 2, 3])
但老实说我不知道正确答案。我只有特征数据和目标数据。我希望你能帮助我。
语法正确 vector = np.array([1, 2, 3])
。这就是在 python 的 numpy 中定义向量的方法。
一个神经网络可以有任意数量的隐藏(内部)层。并且每一层都会有一定数量的神经元。
所以在这段代码中,一个vector=np.array([100, 150, 100])
,表示网络应该有3个隐藏层(因为向量有3个值),隐藏层应该有,从输入到输出100、150、分别为 100 个神经元。
我正在尝试执行我在 Yarin Gal 的论文“深度学习的不确定性”中找到的贝叶斯神经网络。我在 GitHub:
上找到了这段代码import math
from scipy.misc import logsumexp
import numpy as np
from keras.regularizers import l2
from keras import Input
from keras.layers import Dropout
from keras.layers import Dense
from keras import Model
import time
class net:
def __init__(self, X_train, y_train, n_hidden, n_epochs = 40,
normalize = False, tau = 1.0, dropout = 0.05):
"""
Constructor for the class implementing a Bayesian neural network
trained with the probabilistic back propagation method.
@param X_train Matrix with the features for the training data.
@param y_train Vector with the target variables for the
training data.
@param n_hidden Vector with the number of neurons for each
hidden layer.
@param n_epochs Number of epochs for which to train the
network. The recommended value 40 should be
enough.
@param normalize Whether to normalize the input features. This
is recommended unless the input vector is for
example formed by binary features (a
fingerprint). In that case we do not recommend
to normalize the features.
@param tau Tau value used for regularization
@param dropout Dropout rate for all the dropout layers in the
network.
"""
# We normalize the training data to have zero mean and unit standard
# deviation in the training set if necessary
if normalize:
self.std_X_train = np.std(X_train, 0)
self.std_X_train[ self.std_X_train == 0 ] = 1
self.mean_X_train = np.mean(X_train, 0)
else:
self.std_X_train = np.ones(X_train.shape[ 1 ])
self.mean_X_train = np.zeros(X_train.shape[ 1 ])
X_train = (X_train - np.full(X_train.shape, self.mean_X_train)) / \
np.full(X_train.shape, self.std_X_train)
self.mean_y_train = np.mean(y_train)
self.std_y_train = np.std(y_train)
y_train_normalized = (y_train - self.mean_y_train) / self.std_y_train
y_train_normalized = np.array(y_train_normalized, ndmin = 2).T
# We construct the network
N = X_train.shape[0]
batch_size = 128
lengthscale = 1e-2
reg = lengthscale**2 * (1 - dropout) / (2. * N * tau)
inputs = Input(shape=(X_train.shape[1],))
inter = Dropout(dropout)(inputs, training=True)
inter = Dense(n_hidden[0], activation='relu', W_regularizer=l2(reg))(inter)
for i in range(len(n_hidden) - 1):
inter = Dropout(dropout)(inter, training=True)
inter = Dense(n_hidden[i+1], activation='relu', W_regularizer=l2(reg))(inter)
inter = Dropout(dropout)(inter, training=True)
outputs = Dense(y_train_normalized.shape[1], W_regularizer=l2(reg))(inter)
model = Model(inputs, outputs)
model.compile(loss='mean_squared_error', optimizer='adam')
# We iterate the learning process
start_time = time.time()
model.fit(X_train, y_train_normalized, batch_size=batch_size, nb_epoch=n_epochs, verbose=0)
self.model = model
self.tau = tau
self.running_time = time.time() - start_time
# We are done!
def predict(self, X_test, y_test):
"""
Function for making predictions with the Bayesian neural network.
@param X_test The matrix of features for the test data
@return m The predictive mean for the test target variables.
@return v The predictive variance for the test target
variables.
@return v_noise The estimated variance for the additive noise.
"""
X_test = np.array(X_test, ndmin = 2)
y_test = np.array(y_test, ndmin = 2).T
# We normalize the test set
X_test = (X_test - np.full(X_test.shape, self.mean_X_train)) / \
np.full(X_test.shape, self.std_X_train)
# We compute the predictive mean and variance for the target variables
# of the test data
model = self.model
standard_pred = model.predict(X_test, batch_size=500, verbose=1)
standard_pred = standard_pred * self.std_y_train + self.mean_y_train
rmse_standard_pred = np.mean((y_test.squeeze() - standard_pred.squeeze())**2.)**0.5
T = 10000
Yt_hat = np.array([model.predict(X_test, batch_size=500, verbose=0) for _ in range(T)])
Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train
MC_pred = np.mean(Yt_hat, 0)
rmse = np.mean((y_test.squeeze() - MC_pred.squeeze())**2.)**0.5
# We compute the test log-likelihood
ll = (logsumexp(-0.5 * self.tau * (y_test[None] - Yt_hat)**2., 0) - np.log(T)
- 0.5*np.log(2*np.pi) + 0.5*np.log(self.tau))
test_ll = np.mean(ll)
# We are done!
return rmse_standard_pred, rmse, test_ll
我是编程新手,所以我必须在 Python 上学习 类 以理解代码。但是当我尝试执行代码时,我的回答是,它询问一个“包含每个隐藏层的神经元数量的向量”,我不知道如何创建这个向量,也不知道它对代码意味着什么。我尝试创建不同的向量,比如
vector = np.array([1, 2, 3])
但老实说我不知道正确答案。我只有特征数据和目标数据。我希望你能帮助我。
语法正确 vector = np.array([1, 2, 3])
。这就是在 python 的 numpy 中定义向量的方法。
一个神经网络可以有任意数量的隐藏(内部)层。并且每一层都会有一定数量的神经元。
所以在这段代码中,一个vector=np.array([100, 150, 100])
,表示网络应该有3个隐藏层(因为向量有3个值),隐藏层应该有,从输入到输出100、150、分别为 100 个神经元。