如何尽可能减少自动编码器的瓶颈?
how to reduce as much as possible the bootleneck in autoencoder?
亲爱的
我有以下代码:
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(16,3,activation='relu',padding='same',strides=1)(conv9)
############################# EXTRA
conv10 = Convolution1D( 8, kernel_size = (3), activation='relu', padding='same')(conv10)
pool4 = MaxPooling1D(pool_size = (2), padding='same')(conv10)
conv10 = Convolution1D( 8, 3, activation='relu', padding='same')(pool4)
encoded = Convolution1D( 8, 3, activation='relu', padding='same')(conv10)
#############
如果输入是 27000 信号,这里的瓶颈长度为 6920
我想将瓶颈减少到只有 400,如何做,修正应该从 extra 部分开始
我尝试添加额外的转换和池,但长度不能小于 6920。
您可以通过多种不同的方式获得所需的长度:
一路增加池大小:
pool = MaxPooling1D(pool_size = (4))(prev) # 或者你可以使用更大的数字
在 Conv 和 Pool 层中使用 VALID 填充:
pool = MaxPooling1D(pool_size = (4), padding='valid')(prev)
conv10 = Convolution1D( 8, 3, activation='relu', padding='valid')(prev)
您还可以在 Pool 和 Conv 层中使用更大的步幅大小
pool = MaxPooling1D(pool_size = (4), strides=4, padding='valid')(prev)
conv10 = Convolution1D( 8, 3, strides=4, activation='relu', padding='valid')(prev)
我为您创建了一个草稿如下:
- 一个编码器输入形状 (batch_size, 160, 1),输出形状向量 (batch_size, 1, 4)
- 解码器接受形状为 (batch_size, 1, 4) 的输入,与编码器输出相同
- 组合 encoder_decoder 模型
编码器:
from tensorflow.keras.layers import Input, Convolution1D, MaxPooling1D, GlobalAveragePooling1D, UpSampling1D
import tensorflow as tf
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=6, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(4,3,activation='relu',padding='same',strides=1)(conv9)
encoded = MaxPooling1D(pool_size=3)(conv10)
encoder = tf.keras.Model(inputs=inpt, outputs=encoded)
encoder.summary()
解码器:
input_decoder = Input(shape = (1, 4) ) #############
upsmp1 = UpSampling1D(size=2)(input_decoder)
conv11 = Convolution1D( 4, 3, activation='relu', padding='same')(upsmp1)
upsmp1 = UpSampling1D(size=8)(conv11)
conv11 = Convolution1D( 8, 3, activation='relu', padding='same')(upsmp1)
conv12 = Convolution1D( 8, 3, activation='relu', padding='same')(conv11)
pool4 = UpSampling1D(size=10)(conv12)
conv10 = Convolution1D( 1, kernel_size = (3), activation='relu', padding='same')(pool4)
decoder = tf.keras.Model(inputs=input_decoder, outputs=conv10)
decoder.summary()
组合编解码器:
encoder_decoder = tf.keras.Model(inputs=inpt, outputs=decoder(encoded))
encoder_decoder.summary()
亲爱的
我有以下代码:
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(16,3,activation='relu',padding='same',strides=1)(conv9)
############################# EXTRA
conv10 = Convolution1D( 8, kernel_size = (3), activation='relu', padding='same')(conv10)
pool4 = MaxPooling1D(pool_size = (2), padding='same')(conv10)
conv10 = Convolution1D( 8, 3, activation='relu', padding='same')(pool4)
encoded = Convolution1D( 8, 3, activation='relu', padding='same')(conv10)
#############
如果输入是 27000 信号,这里的瓶颈长度为 6920
我想将瓶颈减少到只有 400,如何做,修正应该从 extra 部分开始 我尝试添加额外的转换和池,但长度不能小于 6920。
您可以通过多种不同的方式获得所需的长度:
一路增加池大小:
pool = MaxPooling1D(pool_size = (4))(prev) # 或者你可以使用更大的数字
在 Conv 和 Pool 层中使用 VALID 填充:
pool = MaxPooling1D(pool_size = (4), padding='valid')(prev)
conv10 = Convolution1D( 8, 3, activation='relu', padding='valid')(prev)
您还可以在 Pool 和 Conv 层中使用更大的步幅大小
pool = MaxPooling1D(pool_size = (4), strides=4, padding='valid')(prev)
conv10 = Convolution1D( 8, 3, strides=4, activation='relu', padding='valid')(prev)
我为您创建了一个草稿如下:
- 一个编码器输入形状 (batch_size, 160, 1),输出形状向量 (batch_size, 1, 4)
- 解码器接受形状为 (batch_size, 1, 4) 的输入,与编码器输出相同
- 组合 encoder_decoder 模型
编码器:
from tensorflow.keras.layers import Input, Convolution1D, MaxPooling1D, GlobalAveragePooling1D, UpSampling1D
import tensorflow as tf
inpt = Input(shape=(160,1))
# Input is 160 samples, 20 ms for sampling rate of 8 kHz
# Of course speech can be wide-band. One should take care then
conv1 = Convolution1D(512,3,activation='relu',padding='same',strides=1)(inpt)
conv2 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv1)
pool1 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv2)
conv3 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool1)
conv4 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(conv3)
pool2 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv4)
conv5 = Convolution1D(256,3,activation='relu',padding='same',strides=1)(pool2)
conv6 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(conv5)
pool3 = MaxPooling1D(pool_size=2, strides=None, padding='valid')(conv6)
conv7 = Convolution1D(128,3,activation='relu',padding='same',strides=1)(pool3)
conv8 = Convolution1D(64,3,activation='relu',padding='same',strides=1)(conv7)
pool4 = MaxPooling1D(pool_size=6, strides=None, padding='valid')(conv8)
conv9 = Convolution1D(32,3,activation='relu',padding='same',strides=1)(pool4)
conv10 = Convolution1D(4,3,activation='relu',padding='same',strides=1)(conv9)
encoded = MaxPooling1D(pool_size=3)(conv10)
encoder = tf.keras.Model(inputs=inpt, outputs=encoded)
encoder.summary()
解码器:
input_decoder = Input(shape = (1, 4) ) #############
upsmp1 = UpSampling1D(size=2)(input_decoder)
conv11 = Convolution1D( 4, 3, activation='relu', padding='same')(upsmp1)
upsmp1 = UpSampling1D(size=8)(conv11)
conv11 = Convolution1D( 8, 3, activation='relu', padding='same')(upsmp1)
conv12 = Convolution1D( 8, 3, activation='relu', padding='same')(conv11)
pool4 = UpSampling1D(size=10)(conv12)
conv10 = Convolution1D( 1, kernel_size = (3), activation='relu', padding='same')(pool4)
decoder = tf.keras.Model(inputs=input_decoder, outputs=conv10)
decoder.summary()
组合编解码器:
encoder_decoder = tf.keras.Model(inputs=inpt, outputs=decoder(encoded))
encoder_decoder.summary()