全卷积网络

Fully Convolutional Network

TensorFlow.js 版本

tfjs-node-gpu 0.2.1

描述问题或功能请求

我正在尝试制作一个受监督的全卷积网络,但无法生成适当的输出。 net结构是根据几个FCN的例子做的,具体这一个:http://deeplearning.net/tutorial/fcn_2D_segm.html

我已经将掩码放在一个单热 4d 布尔向量中,顺序为 [batch, height, width, class],只有一个 class。输入数据被更改为 [batch, height, width, 1](无 RGB 通道)的 float32 张量,范围为 0 到 1.

数据在这里,来自上面的同一教程:https://drive.google.com/file/d/0B_60jvsCt1hhZWNfcW4wbHE5N3M/view

        const input = tf.input({ shape: [this._dims[1], this._dims[2], this._dims[3]], name: 'Input', });

        const batchNorm_0 = tf.layers.batchNormalization().apply(input);
        //**Begin A-Scan Net*/
        const fcn_1_0 = tf.layers.conv2d( { name: '', kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64,  } ).apply(input);
        const fcn_2 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_1_0);

        const fcn_3_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_2);
        const fcn_3_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_3_0);
        const fcn_3_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_3_1);
        const fcn_4 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_3_2);

        const fcn_5_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_4);
        const fcn_5_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_5_0);
        const fcn_5_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_5_1);
        const fcn_6 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_5_2);

        const fcn_7_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_6);
        const fcn_7_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_7_0);
        const fcn_7_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_7_1);
        const fcn_8 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_7_2);

        const fcn_9_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_8);
        const fcn_9_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_9_0);
        const fcn_9_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_9_1);
        const fcn_10 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_9_2);

        const fcn_11 = tf.layers.conv2d({ kernelSize: [1, 1], strides: [1, 1], activation: 'relu', padding: 'same', filters: 2048 }).apply(fcn_10);

        const fcn_12 = tf.layers.conv2d({ kernelSize: [1, 1], strides: [1, 1], activation: 'relu', padding: 'same', filters: this._classes }).apply(fcn_11);

        const upsample_5 = tf.layers.conv2dTranspose( { kernelSize: [32, 32], strides: [32, 32], filters: this._classes, activation: 'relu', padding: 'same' } ).apply(fcn_12);
        const upsample_6 = tf.layers.conv2d( { kernelSize: [1, 1], strides: [1, 1], filters: this._classes, activation: 'softmax', padding: 'same' } ).apply(upsample_5);

        var model = tf.model( { name: 'AdvancedCNN', inputs: [input], outputs: [upsample_6] } );

损失/计量/优化器是:

        const LEARNING_RATE = .00001;
        const optimizer = tf.train.adam(LEARNING_RATE)
        model.compile({
            optimizer,
            loss: tf.losses.logLoss,
            metrics: tf.metrics.categoricalCrossentropy,
        });

问题是网络没有学习,输出 class 要么全为 0 要么全为 1,即使在多个时期之后也是如此。我试过使用和不使用批量规范并改变学习率。数据看起来不错,所以要么我数据格式错误,要么损失函数、标签结构等有问题

有没有其他人使用 TensorFlow.js 构建了 FCN?

在全 CNN 卷积神经网络中,最后一层是密集层或全连接层。正是从那个密集层计算 softmax 激活。目前,您的 NN 神经网络架构缺少这样一个层,您无法在其中正确分类。

实际上这是最后一层,密集层使用卷积层学到的不同特征进行分类。

唯一需要指出的是,您可能需要在密集层的入口处使用展平层 - 仅用于尺寸匹配

更新: 对最后一层使用上采样层可能会导致损失减少。我认为这个问题与转置层有关。 article 解释什么是上采样

我能够解决问题。我用 upSampling2dconv2d 层交换了 conv2dTranspose。掩码的一次性编码就足够了,损失函数的 tf.losses.softmaxCrossEntropy 也足够了。

最后,将我的图像缩小到 256x512 有助于加快训练时间。最终起作用的网络结构(超级原始网络,所以随便用)是:

        const fcn_1_0 = tf.layers.conv2d( { name: '', kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64, } ).apply(input);
        const fcn_1_1 = tf.layers.conv2d( { name: '', kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64,  } ).apply(fcn_1_0);
        const fcn_1_2 = tf.layers.conv2d( { name: '', kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64,  } ).apply(fcn_1_1);
        const fcn_2 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_1_2);

        const fcn_3_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_2);
        const fcn_3_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_3_0);
        const fcn_3_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_3_1);
        const fcn_4 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_3_2);

        const fcn_5_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_4);
        const fcn_5_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_5_0);
        const fcn_5_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_5_1);
        const fcn_6 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_5_2);

        const fcn_7_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_6);
        const fcn_7_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_7_0);
        const fcn_7_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_7_1);
        const fcn_8 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_7_2);

        const fcn_9_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_8);
        const fcn_9_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_9_0);
        const fcn_9_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_9_1);
        const fcn_10 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_9_2);

        // const fcn_11_0 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_10);
        // const fcn_11_1 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_11_0);
        // const fcn_11_2 = tf.layers.conv2d( { kernelSize: [3, 3], strides: [1, 1], activation: 'relu', padding: 'same', filters: 64 } ).apply(fcn_11_1);
        // const fcn_12 = tf.layers.maxPool2d( { kernelSize: [2, 2], strides: [2, 2] } ).apply(fcn_11_2);

        //const fcn_13 = tf.layers.conv2d({ kernelSize: [7, 7], strides: [1, 1], activation: 'relu', padding: 'same', filters: 4096 }).apply(fcn_12);
        const fcn_13_0 = tf.layers.conv2d({ kernelSize: [1, 1], strides: [1, 1], activation: 'relu', padding: 'same', filters: 4096 }).apply(fcn_10);
        //const drop_0 = tf.layers.dropout( { rate: .5 } ).apply(fcn_13);

        //const fcn_15 = tf.layers.conv2d({ kernelSize: [1, 1], strides: [1, 1], activation: 'relu', padding: 'same', filters: this._classes }).apply(fcn_13_0);

        const upsample_1 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(fcn_13_0);
        const conv_upsample1 = tf.layers.conv2d( { kernelSize: 3, strides: 1, activation: 'relu', padding: 'same', filters: 64 }).apply(upsample_1);

        const upsample_2 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(conv_upsample1);
        const conv_upsample2 = tf.layers.conv2d( { kernelSize: 3, strides: 1, activation: 'relu', padding: 'same', filters: 64 }).apply(upsample_2);

        const upsample_3 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(conv_upsample2);
        const conv_upsample3 = tf.layers.conv2d( { kernelSize: 3, strides: 1, activation: 'relu', padding: 'same', filters: 64 }).apply(upsample_3);

        const upsample_4 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(conv_upsample3);
        const conv_upsample4 = tf.layers.conv2d( { kernelSize: 3, strides: 1, activation: 'relu', padding: 'same', filters: 64 }).apply(upsample_4);

        const upsample_5 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(conv_upsample4);
        const conv_upsample5 = tf.layers.conv2d( { kernelSize: 3, strides: 1, activation: 'relu', padding: 'same', filters: 64 }).apply(upsample_5);

        //const upsample_6 = tf.layers.upSampling2d( { size: [2, 2], padding: 'same' } ).apply(fcn_15);
        const conv_upsample = tf.layers.conv2dTranspose( { kernelSize: 1, strides: 1, activation: 'softmax', padding: 'same', filters: this._classes }).apply(conv_upsample5);