从隐藏层提取自动编码器特征
Extracting Autoencoder features from the hidden layer
我已经开发了一些代码来在我的数据集上应用自动编码器,以便从中提取隐藏的特征。我有一个由 84 个变量组成的数据集,它们已经被归一化了。
epochs = 10
batch_size = 128
lr = 0.008
# Convert Input and Output data to Tensors and create a TensorDataset
input = torch.Tensor(input.to_numpy())
output = torch.tensor(output.to_numpy())
data = torch.utils.data.TensorDataset(input, output)
# Split to Train, Validate and Test sets using random_split
number_rows = len(input) # The size of our dataset or the number of rows in excel table.
test_split = int(number_rows*0.3)
train_split = number_rows - test_split
train_set, test_set = random_split(data, [train_split, test_split])
# Create Dataloader to read the data within batch sizes and put into memory.
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
模型结构:
# Model structure
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Linear(84, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, 2),
)
# Decoder
self.decoder = nn.Sequential(
nn.Linear(2, 16),
nn.Tanh(),
nn.Linear(16, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 84),
nn.Sigmoid()
)
def forward(self, inputs):
codes = self.encoder(inputs)
decoded = self.decoder(codes)
return codes, decoded
优化器和损失函数
# Optimizer and loss function
model = AutoEncoder()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_function = nn.MSELoss()
训练步骤:
# Train
for epoch in range(epochs):
for data, labels in train_loader:
inputs = data.view(-1, 84)
# Forward
codes, decoded = model(inputs)
# Backward
optimizer.zero_grad()
loss = loss_function(decoded, inputs)
loss.backward()
optimizer.step()
# Show progress
print('[{}/{}] Loss:'.format(epoch+1, epochs), loss.item())
自动编码器模型保存为:
# Save
torch.save(model,'autoencoder.pth')
在这一点上,我想寻求一些帮助,以了解如何从隐藏层中提取特征。这些从隐藏层提取的特征将用于另一个分类算法。
您需要在模型上放置一个挂钩。你可以使用这个钩子从任何层中提取特征。但是,如果您不使用 nn.Sequential 会容易得多,因为它将层组合在一起并且它们作为一个层。我 运行 你的代码使用这个函数:
有一个特征提取函数,它基本上将模型作为输入并使用图层索引放置一个钩子。
class FE(nn.Module):
def __init__(self,model_instance, output_layers, *args):
super().__init__(*args)
self.output_layers = output_layers
self.selected_out = OrderedDict()
self.pretrained = model_instance
self.fhooks = []
print("model_instance._modules.keys():",model_instance._modules.keys())
for i,l in enumerate(list(self.pretrained._modules.keys())):
print("index:",i, ", keys:",l )
if i in self.output_layers:
print("------------------------ > Hook is placed output of :" , l )
self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
def forward_hook(self,layer_name):
def hook(module, input, output):
self.selected_out[layer_name] = output
return hook
def forward(self, x):
out = self.pretrained(x,None)
return out, self.selected_out
并使用:
model_hooked=FE(model ,output_layers = [0])
model_instance._modules.keys(): odict_keys(['encoder', 'decoder'])
index: 0 , keys: encoder
------------------------ > Hook is placed output of : encoder
index: 1 , keys: decoder
放置钩子后,您可以简单地将数据放入新的钩子模型,它将输出 2 values.First 一个是最后一层的原始输出,第二个输出将是钩子层的输出
out, layerout = model_hooked(data_sample)
如果你想从加载器中提取特征,你可以使用这个函数:
def extract_features(FE ,layer_name, train_loader, test_loader):
extracted_features=[]
lbls=[]
extracted_features_test=[]
lbls_test=[]
for data , target in train_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features.extend(a)
lbls.extend(target)
for data , target in test_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features_test.extend(a)
lbls_test.extend(target)
extracted_features = torch.stack(extracted_features)
extracted_features_test = torch.stack(extracted_features_test)
lbls = torch.stack(lbls)
lbls_test = torch.stack(lbls_test)
return extracted_features, lbls ,extracted_features_test, lbls_test
用法是这样的:
Features_TRAINLOADER , lbls , Features_TESTLOADER, lbls_test =extract_features(model_hooked, "encoder", train_loader, test_loader)
我已经开发了一些代码来在我的数据集上应用自动编码器,以便从中提取隐藏的特征。我有一个由 84 个变量组成的数据集,它们已经被归一化了。
epochs = 10
batch_size = 128
lr = 0.008
# Convert Input and Output data to Tensors and create a TensorDataset
input = torch.Tensor(input.to_numpy())
output = torch.tensor(output.to_numpy())
data = torch.utils.data.TensorDataset(input, output)
# Split to Train, Validate and Test sets using random_split
number_rows = len(input) # The size of our dataset or the number of rows in excel table.
test_split = int(number_rows*0.3)
train_split = number_rows - test_split
train_set, test_set = random_split(data, [train_split, test_split])
# Create Dataloader to read the data within batch sizes and put into memory.
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)
模型结构:
# Model structure
class AutoEncoder(nn.Module):
def __init__(self):
super(AutoEncoder, self).__init__()
# Encoder
self.encoder = nn.Sequential(
nn.Linear(84, 128),
nn.Tanh(),
nn.Linear(128, 64),
nn.Tanh(),
nn.Linear(64, 16),
nn.Tanh(),
nn.Linear(16, 2),
)
# Decoder
self.decoder = nn.Sequential(
nn.Linear(2, 16),
nn.Tanh(),
nn.Linear(16, 64),
nn.Tanh(),
nn.Linear(64, 128),
nn.Tanh(),
nn.Linear(128, 84),
nn.Sigmoid()
)
def forward(self, inputs):
codes = self.encoder(inputs)
decoded = self.decoder(codes)
return codes, decoded
优化器和损失函数
# Optimizer and loss function
model = AutoEncoder()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_function = nn.MSELoss()
训练步骤:
# Train
for epoch in range(epochs):
for data, labels in train_loader:
inputs = data.view(-1, 84)
# Forward
codes, decoded = model(inputs)
# Backward
optimizer.zero_grad()
loss = loss_function(decoded, inputs)
loss.backward()
optimizer.step()
# Show progress
print('[{}/{}] Loss:'.format(epoch+1, epochs), loss.item())
自动编码器模型保存为:
# Save
torch.save(model,'autoencoder.pth')
在这一点上,我想寻求一些帮助,以了解如何从隐藏层中提取特征。这些从隐藏层提取的特征将用于另一个分类算法。
您需要在模型上放置一个挂钩。你可以使用这个钩子从任何层中提取特征。但是,如果您不使用 nn.Sequential 会容易得多,因为它将层组合在一起并且它们作为一个层。我 运行 你的代码使用这个函数:
有一个特征提取函数,它基本上将模型作为输入并使用图层索引放置一个钩子。
class FE(nn.Module):
def __init__(self,model_instance, output_layers, *args):
super().__init__(*args)
self.output_layers = output_layers
self.selected_out = OrderedDict()
self.pretrained = model_instance
self.fhooks = []
print("model_instance._modules.keys():",model_instance._modules.keys())
for i,l in enumerate(list(self.pretrained._modules.keys())):
print("index:",i, ", keys:",l )
if i in self.output_layers:
print("------------------------ > Hook is placed output of :" , l )
self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
def forward_hook(self,layer_name):
def hook(module, input, output):
self.selected_out[layer_name] = output
return hook
def forward(self, x):
out = self.pretrained(x,None)
return out, self.selected_out
并使用:
model_hooked=FE(model ,output_layers = [0])
model_instance._modules.keys(): odict_keys(['encoder', 'decoder']) index: 0 , keys: encoder ------------------------ > Hook is placed output of : encoder index: 1 , keys: decoder
放置钩子后,您可以简单地将数据放入新的钩子模型,它将输出 2 values.First 一个是最后一层的原始输出,第二个输出将是钩子层的输出
out, layerout = model_hooked(data_sample)
如果你想从加载器中提取特征,你可以使用这个函数:
def extract_features(FE ,layer_name, train_loader, test_loader):
extracted_features=[]
lbls=[]
extracted_features_test=[]
lbls_test=[]
for data , target in train_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features.extend(a)
lbls.extend(target)
for data , target in test_loader:
out, layerout = FE(data)
a=layerout[layer_name]
extracted_features_test.extend(a)
lbls_test.extend(target)
extracted_features = torch.stack(extracted_features)
extracted_features_test = torch.stack(extracted_features_test)
lbls = torch.stack(lbls)
lbls_test = torch.stack(lbls_test)
return extracted_features, lbls ,extracted_features_test, lbls_test
用法是这样的:
Features_TRAINLOADER , lbls , Features_TESTLOADER, lbls_test =extract_features(model_hooked, "encoder", train_loader, test_loader)