Google Net 的感受野计算
Receptive field calculation for Google Net
我正在尝试了解 GoogleNet 的感受野。
我使用的程序如下所示。
import math
convnet = [[7,2,3],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,1],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[7,1,1]]
layer_names = ["conv1/7x7_s2","conv1/relu_7x7","pool1/3x3_s2","pool1/norm1","conv2/3x3_reduce","conv2/relu_3x3_reduce","conv2/3x3","conv2/relu_3x3","pool2/3x3_s2","inception_3a/3x3_reduce","inception_3a/relu_3x3_reduce","inception_3a/3x3","inception_3a/relu_3x3","inception_3b/3x3_reduce","inception_3b/relu_3x3_reduce","inception_3b/3x3","inception_3b/relu_3x3","pool3/3x3_s2","inception_4a/3x3_reduce","inception_4a/relu_3x3_reduce","inception_4a/3x3","inception_4a/relu_3x3","loss1/ave_pool","inception_4b/3x3_reduce","inception_4b/relu_3x3_reduce","inception_4b/3x3","inception_4b/relu_3x3","inception_4c/3x3_reduce","inception_4c/relu_3x3_reduce","inception_4c/3x3","inception_4c/relu_3x3","inception_4d/3x3_reduce","inception_4d/relu_3x3_reduce","inception_4d/3x3","inception_4d/relu_3x3","loss2/ave_pool","inception_4e/3x3_reduce","inception_4e/relu_3x3_reduce","inception_4e/3x3","inception_4e/relu_3x3","pool4/3x3_s2","inception_5a/3x3_reduce","inception_5a/relu_3x3_reduce","inception_5a/3x3","inception_5a/relu_3x3","inception_5b/3x3_reduce","inception_5b/relu_3x3_reduce","inception_5b/3x3","inception_5b/relu_3x3","pool5/7x7_s1"]
imsize = 720
def outFromIn(isz, layernum, net = convnet):
if layernum>len(net): layernum=len(net)
totstride = 1
insize = isz
#for layerparams in net:
for layer in range(layernum):
fsize, stride, pad = net[layer]
outsize = (insize - fsize + 2*pad) / stride + 1
insize = outsize
totstride = totstride * stride
return outsize, totstride
def inFromOut( layernum, net = convnet):
if layernum>len(net): layernum=len(net)
outsize = 1
#for layerparams in net:
for layer in reversed(range(layernum)):
fsize, stride, pad = net[layer]
outsize = ((outsize -1)* stride) + fsize
RFsize = outsize
return RFsize
if __name__ == '__main__':
print "layer output sizes given image = %dx%d" % (imsize, imsize)
for i in range(len(convnet)):
p = outFromIn(imsize,i+1)
rf = inFromOut(i+1)
print "Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d" % (layer_names[i], p[0], p[1], rf)
每一层的感受野计算如下
layer output sizes given image = 224x224
Layer Name = conv1/7x7_s2, Output size = 112, Stride = 2, RF size = 7
Layer Name = conv1/relu_7x7, Output size = 112, Stride = 2, RF size = 7
Layer Name = pool1/3x3_s2, Output size = 55, Stride = 4, RF size = 11
Layer Name = pool1/norm1, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/3x3_reduce, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/relu_3x3_reduce, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/3x3, Output size = 55, Stride = 4, RF size = 19
Layer Name = conv2/relu_3x3, Output size = 55, Stride = 4, RF size = 19
Layer Name = pool2/3x3_s2, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/3x3_reduce, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/relu_3x3_reduce, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/3x3, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3a/relu_3x3, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/3x3_reduce, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/relu_3x3_reduce, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/3x3, Output size = 27, Stride = 8, RF size = 59
Layer Name = inception_3b/relu_3x3, Output size = 27, Stride = 8, RF size = 59
Layer Name = pool3/3x3_s2, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/3x3_reduce, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/3x3, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4a/relu_3x3, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/3x3_reduce, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/3x3, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4b/relu_3x3, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/3x3_reduce, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/3x3, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4c/relu_3x3, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/3x3_reduce, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/3x3, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4d/relu_3x3, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/3x3_reduce, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/3x3, Output size = 13, Stride = 16, RF size = 235
Layer Name = inception_4e/relu_3x3, Output size = 13, Stride = 16, RF size = 235
Layer Name = pool4/3x3_s2, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/3x3_reduce, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/relu_3x3_reduce, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/3x3, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5a/relu_3x3, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/3x3_reduce, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/relu_3x3_reduce, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/3x3, Output size = 7, Stride = 32, RF size = 395
Layer Name = inception_5b/relu_3x3, Output size = 7, Stride = 32, RF size = 395
Layer Name = pool5/7x7_s1, Output size = 3, Stride = 32, RF size = 587
我不明白的是,对于224的输入图像大小,最后一层的感受野大小应该约为224。
现在 pool5/7x7_s1 层是 587。
Inception 模块 inception_4e 的感受野大小与输入图像大小 224 大致相同。
我的计算有什么问题?
在论文中,清楚地说明了 RGB 颜色的感受野是 224 x 224 space,均值为零。
感受野与输入大小无关:
感受野的大小告诉你什么 "pixels" 影响(或参与)计算网络的单个输出。
请注意,在计算感受野时,您完全忽略了填充(!)。
另一方面,当您想要计算网络的输出大小时,您会同时考虑内核大小和步长(影响感受野),但 还要 输入大小和填充.
因此,感受野大小和input/output大小是相关的,但绝不相同。
我正在尝试了解 GoogleNet 的感受野。
我使用的程序如下所示。
import math
convnet = [[7,2,3],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[5,3,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[3,2,1],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[1,1,0],[1,1,0],[3,1,1],[1,1,0],[7,1,1]]
layer_names = ["conv1/7x7_s2","conv1/relu_7x7","pool1/3x3_s2","pool1/norm1","conv2/3x3_reduce","conv2/relu_3x3_reduce","conv2/3x3","conv2/relu_3x3","pool2/3x3_s2","inception_3a/3x3_reduce","inception_3a/relu_3x3_reduce","inception_3a/3x3","inception_3a/relu_3x3","inception_3b/3x3_reduce","inception_3b/relu_3x3_reduce","inception_3b/3x3","inception_3b/relu_3x3","pool3/3x3_s2","inception_4a/3x3_reduce","inception_4a/relu_3x3_reduce","inception_4a/3x3","inception_4a/relu_3x3","loss1/ave_pool","inception_4b/3x3_reduce","inception_4b/relu_3x3_reduce","inception_4b/3x3","inception_4b/relu_3x3","inception_4c/3x3_reduce","inception_4c/relu_3x3_reduce","inception_4c/3x3","inception_4c/relu_3x3","inception_4d/3x3_reduce","inception_4d/relu_3x3_reduce","inception_4d/3x3","inception_4d/relu_3x3","loss2/ave_pool","inception_4e/3x3_reduce","inception_4e/relu_3x3_reduce","inception_4e/3x3","inception_4e/relu_3x3","pool4/3x3_s2","inception_5a/3x3_reduce","inception_5a/relu_3x3_reduce","inception_5a/3x3","inception_5a/relu_3x3","inception_5b/3x3_reduce","inception_5b/relu_3x3_reduce","inception_5b/3x3","inception_5b/relu_3x3","pool5/7x7_s1"]
imsize = 720
def outFromIn(isz, layernum, net = convnet):
if layernum>len(net): layernum=len(net)
totstride = 1
insize = isz
#for layerparams in net:
for layer in range(layernum):
fsize, stride, pad = net[layer]
outsize = (insize - fsize + 2*pad) / stride + 1
insize = outsize
totstride = totstride * stride
return outsize, totstride
def inFromOut( layernum, net = convnet):
if layernum>len(net): layernum=len(net)
outsize = 1
#for layerparams in net:
for layer in reversed(range(layernum)):
fsize, stride, pad = net[layer]
outsize = ((outsize -1)* stride) + fsize
RFsize = outsize
return RFsize
if __name__ == '__main__':
print "layer output sizes given image = %dx%d" % (imsize, imsize)
for i in range(len(convnet)):
p = outFromIn(imsize,i+1)
rf = inFromOut(i+1)
print "Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d" % (layer_names[i], p[0], p[1], rf)
每一层的感受野计算如下
layer output sizes given image = 224x224
Layer Name = conv1/7x7_s2, Output size = 112, Stride = 2, RF size = 7
Layer Name = conv1/relu_7x7, Output size = 112, Stride = 2, RF size = 7
Layer Name = pool1/3x3_s2, Output size = 55, Stride = 4, RF size = 11
Layer Name = pool1/norm1, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/3x3_reduce, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/relu_3x3_reduce, Output size = 55, Stride = 4, RF size = 11
Layer Name = conv2/3x3, Output size = 55, Stride = 4, RF size = 19
Layer Name = conv2/relu_3x3, Output size = 55, Stride = 4, RF size = 19
Layer Name = pool2/3x3_s2, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/3x3_reduce, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/relu_3x3_reduce, Output size = 27, Stride = 8, RF size = 27
Layer Name = inception_3a/3x3, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3a/relu_3x3, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/3x3_reduce, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/relu_3x3_reduce, Output size = 27, Stride = 8, RF size = 43
Layer Name = inception_3b/3x3, Output size = 27, Stride = 8, RF size = 59
Layer Name = inception_3b/relu_3x3, Output size = 27, Stride = 8, RF size = 59
Layer Name = pool3/3x3_s2, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/3x3_reduce, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 75
Layer Name = inception_4a/3x3, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4a/relu_3x3, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/3x3_reduce, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 107
Layer Name = inception_4b/3x3, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4b/relu_3x3, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/3x3_reduce, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 139
Layer Name = inception_4c/3x3, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4c/relu_3x3, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/3x3_reduce, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 171
Layer Name = inception_4d/3x3, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4d/relu_3x3, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/3x3_reduce, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/relu_3x3_reduce, Output size = 13, Stride = 16, RF size = 203
Layer Name = inception_4e/3x3, Output size = 13, Stride = 16, RF size = 235
Layer Name = inception_4e/relu_3x3, Output size = 13, Stride = 16, RF size = 235
Layer Name = pool4/3x3_s2, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/3x3_reduce, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/relu_3x3_reduce, Output size = 7, Stride = 32, RF size = 267
Layer Name = inception_5a/3x3, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5a/relu_3x3, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/3x3_reduce, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/relu_3x3_reduce, Output size = 7, Stride = 32, RF size = 331
Layer Name = inception_5b/3x3, Output size = 7, Stride = 32, RF size = 395
Layer Name = inception_5b/relu_3x3, Output size = 7, Stride = 32, RF size = 395
Layer Name = pool5/7x7_s1, Output size = 3, Stride = 32, RF size = 587
我不明白的是,对于224的输入图像大小,最后一层的感受野大小应该约为224。
现在 pool5/7x7_s1 层是 587。
Inception 模块 inception_4e 的感受野大小与输入图像大小 224 大致相同。
我的计算有什么问题? 在论文中,清楚地说明了 RGB 颜色的感受野是 224 x 224 space,均值为零。
感受野与输入大小无关:
感受野的大小告诉你什么 "pixels" 影响(或参与)计算网络的单个输出。
请注意,在计算感受野时,您完全忽略了填充(!)。
另一方面,当您想要计算网络的输出大小时,您会同时考虑内核大小和步长(影响感受野),但 还要 输入大小和填充.
因此,感受野大小和input/output大小是相关的,但绝不相同。