为什么我在 PyTorch 中用于降噪的自动编码器学会将所有内容清零?
Why does my autoencoder for de-noising in PyTorch learns to zero out everything?
我需要使用 pytorch 构建一个去噪自动编码器来完成清除信号的任务。
例如,我可以使用 cosine
函数并按间隔对其进行采样(我有两个参数 - B
和 K
。B
是数字我在每个示例中采用的间隔的间隔,K
是每个间隔中有多少个采样点(等距离))因此,例如,我可以采用 B = 5
个间隔并测量每个中的 K = 8
个点间隔。因此,每个点之间的距离是 2pi / 8
,我总共有 40
个点。我尝试概括的函数数量是 L
,我将其视为不同的渠道。然后我为每个示例添加一个随机起始位置(使其略有不同)然后随机噪声并将其发送到自动编码器进行训练。
问题是,无论架构或学习率如何,它都会逐渐学会只输出零。自动编码器非常简单,所以我不认为它有问题,而是我如何生成数据的问题。
无论如何我都附上了两个代码:
class ConvAutoencoder(nn.Module):
def __init__(self, enc_channels, dec_channels):
super(ConvAutoencoder, self).__init__()
## encoder layers ##
encoder_layers = []
decoder_layers = []
in_channels = enc_channels[0]
for i in range(1, len(enc_channels)):
out_channels = enc_channels[i]
encoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
in_channels = dec_channels[0]
for i in range(1, len(dec_channels)):
out_channels = dec_channels[i]
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
self.encoder = nn.Sequential(*encoder_layers)
self.decoder = nn.Sequential(*decoder_layers)
def forward(self, x):
if len(x.shape) == 3:
x = x.unsqueeze(dim=-1)
res = self.decoder(self.encoder(x)).squeeze(-1)
return res
并且数据生成如下:
def generate_data(batch_size: int, intervals: int, sample_length: int, channels_functions, noise_scale=1)->torch.tensor:
channels = len(channels_functions)
mul_term = 2 * np.pi / sample_length # each sample is 2pi and equally distance
# each example is K * B long
positions = np.arange(0, sample_length * intervals)
x = positions * mul_term
# creating random start points (from negative to positive)
random_starting_pos = (np.random.rand(batch_size) - 0.5) * 10000
start_pos_mat = np.tile(random_starting_pos , (sample_length * intervals, 1))
start_pos_mat = np.tile(start_pos_mat , (channels, 1)).T
start_pos_mat = np.reshape(start_pos_mat , (batch_size, channels, sample_length * intervals))
X = np.tile(x, (channels, 1))
X = np.repeat(X[np.newaxis, :, :], batch_size, axis=0)
X += start_pos_mat #adding the random starting position
# apply each function to a different channel
for i, function in enumerate(channels_functions):
X[:, i, :] = function(X[:, i, :])
clean = X
noise = np.random.normal(scale=noise_scale, size=clean.shape)
noisy = clean + noise
# normalizing each sample
row_sums = np.linalg.norm(clean, axis=2)
clean = clean / row_sums[:, :, np.newaxis]
row_sums = np.linalg.norm(noisy, axis=2)
noisy = noisy / row_sums[:, :, np.newaxis]
clean = torch.from_numpy(clean)
noisy = torch.from_numpy(noisy)
return clean, noisy
编辑 - 添加了整个训练循环:
if __name__ == '__main__':
func_list = [lambda x: np.cos(x),
lambda x: np.cos((x**4) / 10),
lambda x: np.sin(x**3 * np.cos(x**2)),
lambda x: 0.25*np.cos(x**2) - 10*np.sin(0.25*x)]
L = len(func_list)
K = 3
B = 4
enc_channels = [L, 64, 128, 256]
num_epochs = 100
model = models.ConvAutoencoder(enc_channels, enc_channels[::-1])
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-5)
for epoch in range(num_epochs):
clean, noisy = util.generate_data(128, K, B, func_list)
# ===================forward=====================
output = model(noisy.float())
loss = criterion(output.float(), clean.float())
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.data))
if epoch % 10 == 0:
show_clean, show_noisy = util.generate_data(1, K, B, func_list)
print("clean\n{}".format(show_clean))
print("noisy\n{}".format(show_noisy))
print("denoised\n{}".format(model(show_noisy.float())))
在大约 10 个 epoch 之后,模型输出果然如此:
clean vector
tensor([[[ 0.3611, -0.1905, -0.3611, 0.1905, 0.3611, -0.1905, -0.3611,
0.1905, 0.3611, -0.1905, -0.3611, 0.1905],
[ 0.3387, -0.0575, -0.2506, -0.3531, -0.3035, 0.3451, 0.3537,
-0.2416, 0.2652, -0.3126, -0.3203, -0.1707],
[-0.0369, 0.4412, -0.1323, 0.1802, -0.2943, 0.3590, 0.4549,
0.0827, -0.0164, 0.4350, -0.1413, -0.3395],
[ 0.3997, 0.3516, 0.2451, 0.1136, -0.0458, -0.1944, -0.3225,
-0.3925, -0.3971, -0.3382, -0.2457, -0.1153]]], dtype=torch.float64)
noisy vector
tensor([[[-0.1071, -0.0671, 0.0993, -0.2029, 0.1587, -0.4407, -0.0867,
-0.2598, 0.2426, -0.6939, -0.3011, -0.0870],
[ 0.0889, -0.3415, -0.1434, -0.2393, -0.4708, 0.0144, 0.2620,
-0.1186, 0.6424, 0.0886, -0.2192, -0.1562],
[ 0.1989, 0.2794, 0.0848, -0.2729, -0.2168, 0.1475, 0.5294,
0.4788, 0.1754, 0.2333, -0.0549, -0.3665],
[ 0.3611, 0.3535, 0.1957, 0.1980, -0.1115, -0.1912, -0.2713,
-0.4087, -0.3669, -0.3675, -0.2991, -0.1356]]], dtype=torch.float64)
denoised vector
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SqueezeBackward1>)
谢谢
问题是您在最后一层使用了 ReLU,但是您的目标(干净)包含负值。当您在最后一层使用 ReLU 时,您无法获得负值。
只需将您的解码器替换为:
for i in range(1, len(dec_channels)):
out_channels = dec_channels[i]
if i == len(dec_channels) - 1:
# last layer
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True)]
else:
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
然后我建议使用 L2 损失。
我需要使用 pytorch 构建一个去噪自动编码器来完成清除信号的任务。
例如,我可以使用 cosine
函数并按间隔对其进行采样(我有两个参数 - B
和 K
。B
是数字我在每个示例中采用的间隔的间隔,K
是每个间隔中有多少个采样点(等距离))因此,例如,我可以采用 B = 5
个间隔并测量每个中的 K = 8
个点间隔。因此,每个点之间的距离是 2pi / 8
,我总共有 40
个点。我尝试概括的函数数量是 L
,我将其视为不同的渠道。然后我为每个示例添加一个随机起始位置(使其略有不同)然后随机噪声并将其发送到自动编码器进行训练。
问题是,无论架构或学习率如何,它都会逐渐学会只输出零。自动编码器非常简单,所以我不认为它有问题,而是我如何生成数据的问题。
无论如何我都附上了两个代码:
class ConvAutoencoder(nn.Module):
def __init__(self, enc_channels, dec_channels):
super(ConvAutoencoder, self).__init__()
## encoder layers ##
encoder_layers = []
decoder_layers = []
in_channels = enc_channels[0]
for i in range(1, len(enc_channels)):
out_channels = enc_channels[i]
encoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
in_channels = dec_channels[0]
for i in range(1, len(dec_channels)):
out_channels = dec_channels[i]
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
self.encoder = nn.Sequential(*encoder_layers)
self.decoder = nn.Sequential(*decoder_layers)
def forward(self, x):
if len(x.shape) == 3:
x = x.unsqueeze(dim=-1)
res = self.decoder(self.encoder(x)).squeeze(-1)
return res
并且数据生成如下:
def generate_data(batch_size: int, intervals: int, sample_length: int, channels_functions, noise_scale=1)->torch.tensor:
channels = len(channels_functions)
mul_term = 2 * np.pi / sample_length # each sample is 2pi and equally distance
# each example is K * B long
positions = np.arange(0, sample_length * intervals)
x = positions * mul_term
# creating random start points (from negative to positive)
random_starting_pos = (np.random.rand(batch_size) - 0.5) * 10000
start_pos_mat = np.tile(random_starting_pos , (sample_length * intervals, 1))
start_pos_mat = np.tile(start_pos_mat , (channels, 1)).T
start_pos_mat = np.reshape(start_pos_mat , (batch_size, channels, sample_length * intervals))
X = np.tile(x, (channels, 1))
X = np.repeat(X[np.newaxis, :, :], batch_size, axis=0)
X += start_pos_mat #adding the random starting position
# apply each function to a different channel
for i, function in enumerate(channels_functions):
X[:, i, :] = function(X[:, i, :])
clean = X
noise = np.random.normal(scale=noise_scale, size=clean.shape)
noisy = clean + noise
# normalizing each sample
row_sums = np.linalg.norm(clean, axis=2)
clean = clean / row_sums[:, :, np.newaxis]
row_sums = np.linalg.norm(noisy, axis=2)
noisy = noisy / row_sums[:, :, np.newaxis]
clean = torch.from_numpy(clean)
noisy = torch.from_numpy(noisy)
return clean, noisy
编辑 - 添加了整个训练循环:
if __name__ == '__main__':
func_list = [lambda x: np.cos(x),
lambda x: np.cos((x**4) / 10),
lambda x: np.sin(x**3 * np.cos(x**2)),
lambda x: 0.25*np.cos(x**2) - 10*np.sin(0.25*x)]
L = len(func_list)
K = 3
B = 4
enc_channels = [L, 64, 128, 256]
num_epochs = 100
model = models.ConvAutoencoder(enc_channels, enc_channels[::-1])
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-5)
for epoch in range(num_epochs):
clean, noisy = util.generate_data(128, K, B, func_list)
# ===================forward=====================
output = model(noisy.float())
loss = criterion(output.float(), clean.float())
# ===================backward====================
optimizer.zero_grad()
loss.backward()
optimizer.step()
# ===================log========================
print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.data))
if epoch % 10 == 0:
show_clean, show_noisy = util.generate_data(1, K, B, func_list)
print("clean\n{}".format(show_clean))
print("noisy\n{}".format(show_noisy))
print("denoised\n{}".format(model(show_noisy.float())))
在大约 10 个 epoch 之后,模型输出果然如此:
clean vector
tensor([[[ 0.3611, -0.1905, -0.3611, 0.1905, 0.3611, -0.1905, -0.3611,
0.1905, 0.3611, -0.1905, -0.3611, 0.1905],
[ 0.3387, -0.0575, -0.2506, -0.3531, -0.3035, 0.3451, 0.3537,
-0.2416, 0.2652, -0.3126, -0.3203, -0.1707],
[-0.0369, 0.4412, -0.1323, 0.1802, -0.2943, 0.3590, 0.4549,
0.0827, -0.0164, 0.4350, -0.1413, -0.3395],
[ 0.3997, 0.3516, 0.2451, 0.1136, -0.0458, -0.1944, -0.3225,
-0.3925, -0.3971, -0.3382, -0.2457, -0.1153]]], dtype=torch.float64)
noisy vector
tensor([[[-0.1071, -0.0671, 0.0993, -0.2029, 0.1587, -0.4407, -0.0867,
-0.2598, 0.2426, -0.6939, -0.3011, -0.0870],
[ 0.0889, -0.3415, -0.1434, -0.2393, -0.4708, 0.0144, 0.2620,
-0.1186, 0.6424, 0.0886, -0.2192, -0.1562],
[ 0.1989, 0.2794, 0.0848, -0.2729, -0.2168, 0.1475, 0.5294,
0.4788, 0.1754, 0.2333, -0.0549, -0.3665],
[ 0.3611, 0.3535, 0.1957, 0.1980, -0.1115, -0.1912, -0.2713,
-0.4087, -0.3669, -0.3675, -0.2991, -0.1356]]], dtype=torch.float64)
denoised vector
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]], grad_fn=<SqueezeBackward1>)
谢谢
问题是您在最后一层使用了 ReLU,但是您的目标(干净)包含负值。当您在最后一层使用 ReLU 时,您无法获得负值。
只需将您的解码器替换为:
for i in range(1, len(dec_channels)):
out_channels = dec_channels[i]
if i == len(dec_channels) - 1:
# last layer
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True)]
else:
decoder_layers += [nn.ConvTranspose2d(in_channels, out_channels, kernel_size=1, bias=True),
nn.ReLU()]
in_channels = out_channels
然后我建议使用 L2 损失。