在 pylearn2 中使用 RBM 预训练 ANN
Pre-training ANN with RBM in pylearn2
我正在尝试使用 pylearn2
和 RBM 预训练来训练多层 ANN。我稍微修改了包含在 pylearn2\pylearn2\scripts\tutorials\deep_trainer 中的名为 run_deep_trainer 的脚本。我想要一个 4 层网,其中前 3 层由 500 GaussianBinaryRBM
制成,最后一个是 mlp.Softmax
层。
这是我创建的脚本:
from pylearn2.models.rbm import GaussianBinaryRBM
from pylearn2.models.softmax_regression import SoftmaxRegression
from pylearn2.models.mlp import Softmax
from pylearn2.training_algorithms.sgd import SGD
from pylearn2.costs.autoencoder import MeanSquaredReconstructionError
from pylearn2.termination_criteria import EpochCounter
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.energy_functions.rbm_energy import GRBM_Type_1
from pylearn2.blocks import StackedBlocks
from pylearn2.datasets.transformer_dataset import TransformerDataset
from pylearn2.costs.ebm_estimation import SMD
from pylearn2.training_algorithms.sgd import MonitorBasedLRAdjuster
from pylearn2.train import Train
from optparse import OptionParser
import numpy
def get_dataset_timitConsSmall():
print('loading timitConsSmall dataset...')
template = \
"""!obj:pylearn2.datasets.timitConsSmall.timit.TIMIT {
classes_number: 32,
which_set: %s,
}"""
trainset = yaml_parse.load(template % "train")
# testset = yaml_parse.load(template % "test")
print('...done loading timitConsSmall.')
return trainset
def get_grbm(structure):
n_input, n_output = structure
config = {
'nvis': n_input,
'nhid': n_output,
"irange": 0.05,
"energy_function_class": GRBM_Type_1,
"learn_sigma": True,
"init_sigma": .4,
"init_bias_hid": -2.,
"mean_vis": False,
"sigma_lr_scale": 1e-3
}
return GaussianBinaryRBM(**config)
def get_logistic_regressor(structure):
n_input, n_output = structure
layer = SoftmaxRegression(n_classes=n_output, irange=0.02, nvis=n_input)
return layer
def get_mlp_softmax(structure):
n_input, n_output = structure
layer = Softmax(n_classes=n_output, irange=0.02, layer_name='y')
return layer
def get_layer_trainer_softmax(layer, trainset):
# configs on sgd
config = {'learning_rate': 000.1,
'cost': Default(),
'batch_size': 100,
'monitoring_batches': 10,
'monitoring_dataset': trainset,
'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED),
'update_callbacks': None
}
train_algo = SGD(**config)
model = layer
return Train(model=model,
dataset=trainset,
algorithm=train_algo,
extensions=None)
def get_layer_trainer_logistic(layer, trainset):
# configs on sgd
config = {'learning_rate': 0.1,
'cost': Default(),
'batch_size': 10,
'monitoring_batches': 10,
'monitoring_dataset': trainset,
'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED),
'update_callbacks': None
}
train_algo = SGD(**config)
model = layer
return Train(model=model,
dataset=trainset,
algorithm=train_algo,
extensions=None)
def get_layer_trainer_sgd_rbm(layer, trainset):
train_algo = SGD(
learning_rate=1e-2,
batch_size=100,
# "batches_per_iter" : 2000,
monitoring_batches=20,
monitoring_dataset=trainset,
cost=SMD(corruptor=GaussianCorruptor(stdev=0.4)),
termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
)
model = layer
extensions = [MonitorBasedLRAdjuster()]
return Train(model=model, algorithm=train_algo,
save_path='grbm.pkl', save_freq=1,
extensions=extensions, dataset=trainset)
def main(args=None):
trainset = get_dataset_timitConsSmall()
n_output = 32
design_matrix = trainset.get_design_matrix()
n_input = design_matrix.shape[1]
# build layers
layers = []
structure = [[n_input, 500], [500, 500], [500, 500], [500, n_output]]
# layer 0: gaussianRBM
layers.append(get_grbm(structure[0]))
# # layer 1: denoising AE
# layers.append(get_denoising_autoencoder(structure[1]))
# # layer 2: AE
# layers.append(get_autoencoder(structure[2]))
# # layer 3: logistic regression used in supervised training
# layers.append(get_logistic_regressor(structure[3]))
# layer 1: gaussianRBM
layers.append(get_grbm(structure[1]))
# layer 2: gaussianRBM
layers.append(get_grbm(structure[2]))
# layer 3: logistic regression used in supervised training
# layers.append(get_logistic_regressor(structure[3]))
layers.append(get_mlp_softmax(structure[3]))
# construct training sets for different layers
trainset = [trainset,
TransformerDataset(raw=trainset, transformer=layers[0]),
TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:2])),
TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:3]))]
# construct layer trainers
layer_trainers = []
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0]))
# layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[1], trainset[1]))
# layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[2], trainset[2]))
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[1], trainset[1]))
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[2], trainset[2]))
# layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3]))
layer_trainers.append(get_layer_trainer_softmax(layers[3], trainset[3]))
# unsupervised pretraining
for i, layer_trainer in enumerate(layer_trainers[0:3]):
print('-----------------------------------')
print(' Unsupervised training layer %d, %s' % (i, layers[i].__class__))
print('-----------------------------------')
layer_trainer.main_loop()
print('\n')
print('------------------------------------------------------')
print(' Unsupervised training done! Start supervised training...')
print('------------------------------------------------------')
print('\n')
# supervised training
layer_trainers[-1].main_loop()
if __name__ == '__main__':
main()
无监督预训练部分正确,但有监督训练部分出错:
Traceback (most recent call last):
File "run_deep_trainer.py", line 404, in <module>
main()
File "run_deep_trainer.py", line 400, in main
layer_trainers[-1].main_loop()
File "/home/gortolan/pylearn2/pylearn2/train.py", line 141, in main_loop
self.setup()
File "/home/gortolan/pylearn2/pylearn2/train.py", line 121, in setup
self.algorithm.setup(model=self.model, dataset=self.dataset)
File "/home/gortolan/pylearn2/pylearn2/training_algorithms/sgd.py", line 243, in setup
inf_params = [param for param in model.get_params()
File "/home/gortolan/pylearn2/pylearn2/models/model.py", line 503, in get_params
return list(self._params)
AttributeError: 'Softmax' object has no attribute '_params'
如果我在最后一层使用SoftmaxRegression
(作为模型),这意味着将函数get_mlp_softmax()
和get_layer_trainer_softmax()
替换为get_logistic_regressor()
和get_layer_trainer_logistic()
, 一切正常。
似乎模型 mlp.Softmax
没有通过函数 get_params()
.
return 参数 (_params
)
有人知道如何解决这个问题吗?
问题是因为 SoftmaxRegressor
是模型,而 Softmax
是 MLP
的层。一种修复它的方法是
def get_mlp_softmax(structure):
n_input, n_output = structure
layer = MLP(nvis=500, layers=[Softmax(n_classes=n_output, irange=0.02, layer_name='y')])
return layer
其中 MLP
是 mlp.MLP
我正在尝试使用 pylearn2
和 RBM 预训练来训练多层 ANN。我稍微修改了包含在 pylearn2\pylearn2\scripts\tutorials\deep_trainer 中的名为 run_deep_trainer 的脚本。我想要一个 4 层网,其中前 3 层由 500 GaussianBinaryRBM
制成,最后一个是 mlp.Softmax
层。
这是我创建的脚本:
from pylearn2.models.rbm import GaussianBinaryRBM
from pylearn2.models.softmax_regression import SoftmaxRegression
from pylearn2.models.mlp import Softmax
from pylearn2.training_algorithms.sgd import SGD
from pylearn2.costs.autoencoder import MeanSquaredReconstructionError
from pylearn2.termination_criteria import EpochCounter
from pylearn2.datasets.dense_design_matrix import DenseDesignMatrix
from pylearn2.energy_functions.rbm_energy import GRBM_Type_1
from pylearn2.blocks import StackedBlocks
from pylearn2.datasets.transformer_dataset import TransformerDataset
from pylearn2.costs.ebm_estimation import SMD
from pylearn2.training_algorithms.sgd import MonitorBasedLRAdjuster
from pylearn2.train import Train
from optparse import OptionParser
import numpy
def get_dataset_timitConsSmall():
print('loading timitConsSmall dataset...')
template = \
"""!obj:pylearn2.datasets.timitConsSmall.timit.TIMIT {
classes_number: 32,
which_set: %s,
}"""
trainset = yaml_parse.load(template % "train")
# testset = yaml_parse.load(template % "test")
print('...done loading timitConsSmall.')
return trainset
def get_grbm(structure):
n_input, n_output = structure
config = {
'nvis': n_input,
'nhid': n_output,
"irange": 0.05,
"energy_function_class": GRBM_Type_1,
"learn_sigma": True,
"init_sigma": .4,
"init_bias_hid": -2.,
"mean_vis": False,
"sigma_lr_scale": 1e-3
}
return GaussianBinaryRBM(**config)
def get_logistic_regressor(structure):
n_input, n_output = structure
layer = SoftmaxRegression(n_classes=n_output, irange=0.02, nvis=n_input)
return layer
def get_mlp_softmax(structure):
n_input, n_output = structure
layer = Softmax(n_classes=n_output, irange=0.02, layer_name='y')
return layer
def get_layer_trainer_softmax(layer, trainset):
# configs on sgd
config = {'learning_rate': 000.1,
'cost': Default(),
'batch_size': 100,
'monitoring_batches': 10,
'monitoring_dataset': trainset,
'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED),
'update_callbacks': None
}
train_algo = SGD(**config)
model = layer
return Train(model=model,
dataset=trainset,
algorithm=train_algo,
extensions=None)
def get_layer_trainer_logistic(layer, trainset):
# configs on sgd
config = {'learning_rate': 0.1,
'cost': Default(),
'batch_size': 10,
'monitoring_batches': 10,
'monitoring_dataset': trainset,
'termination_criterion': EpochCounter(max_epochs=MAX_EPOCHS_SUPERVISED),
'update_callbacks': None
}
train_algo = SGD(**config)
model = layer
return Train(model=model,
dataset=trainset,
algorithm=train_algo,
extensions=None)
def get_layer_trainer_sgd_rbm(layer, trainset):
train_algo = SGD(
learning_rate=1e-2,
batch_size=100,
# "batches_per_iter" : 2000,
monitoring_batches=20,
monitoring_dataset=trainset,
cost=SMD(corruptor=GaussianCorruptor(stdev=0.4)),
termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
)
model = layer
extensions = [MonitorBasedLRAdjuster()]
return Train(model=model, algorithm=train_algo,
save_path='grbm.pkl', save_freq=1,
extensions=extensions, dataset=trainset)
def main(args=None):
trainset = get_dataset_timitConsSmall()
n_output = 32
design_matrix = trainset.get_design_matrix()
n_input = design_matrix.shape[1]
# build layers
layers = []
structure = [[n_input, 500], [500, 500], [500, 500], [500, n_output]]
# layer 0: gaussianRBM
layers.append(get_grbm(structure[0]))
# # layer 1: denoising AE
# layers.append(get_denoising_autoencoder(structure[1]))
# # layer 2: AE
# layers.append(get_autoencoder(structure[2]))
# # layer 3: logistic regression used in supervised training
# layers.append(get_logistic_regressor(structure[3]))
# layer 1: gaussianRBM
layers.append(get_grbm(structure[1]))
# layer 2: gaussianRBM
layers.append(get_grbm(structure[2]))
# layer 3: logistic regression used in supervised training
# layers.append(get_logistic_regressor(structure[3]))
layers.append(get_mlp_softmax(structure[3]))
# construct training sets for different layers
trainset = [trainset,
TransformerDataset(raw=trainset, transformer=layers[0]),
TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:2])),
TransformerDataset(raw=trainset, transformer=StackedBlocks(layers[0:3]))]
# construct layer trainers
layer_trainers = []
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[0], trainset[0]))
# layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[1], trainset[1]))
# layer_trainers.append(get_layer_trainer_sgd_autoencoder(layers[2], trainset[2]))
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[1], trainset[1]))
layer_trainers.append(get_layer_trainer_sgd_rbm(layers[2], trainset[2]))
# layer_trainers.append(get_layer_trainer_logistic(layers[3], trainset[3]))
layer_trainers.append(get_layer_trainer_softmax(layers[3], trainset[3]))
# unsupervised pretraining
for i, layer_trainer in enumerate(layer_trainers[0:3]):
print('-----------------------------------')
print(' Unsupervised training layer %d, %s' % (i, layers[i].__class__))
print('-----------------------------------')
layer_trainer.main_loop()
print('\n')
print('------------------------------------------------------')
print(' Unsupervised training done! Start supervised training...')
print('------------------------------------------------------')
print('\n')
# supervised training
layer_trainers[-1].main_loop()
if __name__ == '__main__':
main()
无监督预训练部分正确,但有监督训练部分出错:
Traceback (most recent call last):
File "run_deep_trainer.py", line 404, in <module>
main()
File "run_deep_trainer.py", line 400, in main
layer_trainers[-1].main_loop()
File "/home/gortolan/pylearn2/pylearn2/train.py", line 141, in main_loop
self.setup()
File "/home/gortolan/pylearn2/pylearn2/train.py", line 121, in setup
self.algorithm.setup(model=self.model, dataset=self.dataset)
File "/home/gortolan/pylearn2/pylearn2/training_algorithms/sgd.py", line 243, in setup
inf_params = [param for param in model.get_params()
File "/home/gortolan/pylearn2/pylearn2/models/model.py", line 503, in get_params
return list(self._params)
AttributeError: 'Softmax' object has no attribute '_params'
如果我在最后一层使用SoftmaxRegression
(作为模型),这意味着将函数get_mlp_softmax()
和get_layer_trainer_softmax()
替换为get_logistic_regressor()
和get_layer_trainer_logistic()
, 一切正常。
似乎模型 mlp.Softmax
没有通过函数 get_params()
.
_params
)
有人知道如何解决这个问题吗?
问题是因为 SoftmaxRegressor
是模型,而 Softmax
是 MLP
的层。一种修复它的方法是
def get_mlp_softmax(structure):
n_input, n_output = structure
layer = MLP(nvis=500, layers=[Softmax(n_classes=n_output, irange=0.02, layer_name='y')])
return layer
其中 MLP
是 mlp.MLP