Caffe 总是 returns 一个标签
Caffe always returns one label
我已经在 bin 下使用 caffe 工具训练了一个模型,现在我正在尝试使用 python 脚本进行测试,我读入图像并自己对其进行预处理(就像我对训练数据集所做的那样)和我将预训练的权重加载到网络中,但我几乎总是(99.99% 的时间)收到相同的结果 -0- 对于每个测试图像。我确实考虑过我的模型可能过度拟合,但在训练了几个模型之后,我开始意识到我从预测中获得的标签很可能是原因。我还增加了 dropout 并随机裁剪以克服过度拟合,我有大约 60K 用于训练。数据集也大致平衡。我在训练的评估步骤中获得了 77 到 87 的准确率(取决于我处理数据的方式、我使用的架构等)
请原谅我的超级 hacky 代码,我已经远离 caffe 测试一段时间了,所以我怀疑问题是我如何将输入数据传递到网络,但我无法确定:
import h5py, os
import sys
sys.path.append("/home/X/Desktop/caffe-caffe-0.16/python")
from caffe.io import oversample
from caffe.io import resize_image
import caffe
from random import randint
import numpy as np
import cv2
import matplotlib.pyplot as plt
from collections import Counter as Cnt
meanImg = cv2.imread('/home/caffe/data/Ch/Final_meanImg.png')
model_def = '/home/X/Desktop/caffe-caffe-0.16/models/bvlc_googlenet/deploy.prototxt'
model_weights = '/media/X/DATA/SDet/Google__iter_140000.caffemodel'
# load the model
#caffe.set_mode_gpu()
#caffe.set_device(0)
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
with open( '/home/caffe/examples/sdet/SDet/test_random.txt', 'r' ) as T, open('/media/X/DATA/SDet/results/testResults.txt','w') as testResultsFile:
readImgCounter = 0
runningCorrect = 0
runningAcc = 0.0
#testResultsFile.write('filename'+' '+'prediction'+' '+'GT')
lines = T.readlines()
for i,l in enumerate(lines):
sp = l.split(' ')
video = sp[0].split('_')[0]
impath = '/home/caffe/data/Ch/images/'+video+'/'+sp[0] +'.jpg'
img = cv2.imread(impath)
resized_img = resize_image(img, (255,255))
oversampledImages = oversample([resized_img], (224,224)) #5 crops x 2 mirror flips = return 10 images
transposed_img = np.zeros( (10, 3, 224, 224), dtype='f4' )
tp = np.zeros( (1, 3, 224, 224), dtype='f4' )
predictedLabels = []
for j in range(0,oversampledImages.shape[0]-1):
transposed_img[j] = oversampledImages[j].transpose((2,0,1))
tp[0] = transposed_img[j]
net.blobs['data'].data[0] = tp
pred = net.forward(data=tp)
predictedLabels.append(pred['prob'].argmax())
print(predictedLabels)
prediction,num_most_common = Cnt(predictedLabels).most_common(1)[0]
print(prediction)
readImgCounter = readImgCounter + 1
if (prediction == int(sp[1])):
runningCorrect = runningCorrect + 1
runningAcc = runningCorrect / readImgCounter
print('runningAcc:')
print(runningAcc)
print('-----------')
print('runningCorrect:')
print(runningCorrect)
print('-----------')
print('totalImgRead:')
print(readImgCounter)
print('-----------')
testResultsFile.write(sp[0]+' '+str(prediction)+' '+sp[1])
testResultsFile.write('\n')
我最终解决了这个问题。我不是 100% 确定什么有效,但很可能是在学习时将偏差更改为 0。
我已经在 bin 下使用 caffe 工具训练了一个模型,现在我正在尝试使用 python 脚本进行测试,我读入图像并自己对其进行预处理(就像我对训练数据集所做的那样)和我将预训练的权重加载到网络中,但我几乎总是(99.99% 的时间)收到相同的结果 -0- 对于每个测试图像。我确实考虑过我的模型可能过度拟合,但在训练了几个模型之后,我开始意识到我从预测中获得的标签很可能是原因。我还增加了 dropout 并随机裁剪以克服过度拟合,我有大约 60K 用于训练。数据集也大致平衡。我在训练的评估步骤中获得了 77 到 87 的准确率(取决于我处理数据的方式、我使用的架构等)
请原谅我的超级 hacky 代码,我已经远离 caffe 测试一段时间了,所以我怀疑问题是我如何将输入数据传递到网络,但我无法确定:
import h5py, os
import sys
sys.path.append("/home/X/Desktop/caffe-caffe-0.16/python")
from caffe.io import oversample
from caffe.io import resize_image
import caffe
from random import randint
import numpy as np
import cv2
import matplotlib.pyplot as plt
from collections import Counter as Cnt
meanImg = cv2.imread('/home/caffe/data/Ch/Final_meanImg.png')
model_def = '/home/X/Desktop/caffe-caffe-0.16/models/bvlc_googlenet/deploy.prototxt'
model_weights = '/media/X/DATA/SDet/Google__iter_140000.caffemodel'
# load the model
#caffe.set_mode_gpu()
#caffe.set_device(0)
net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
with open( '/home/caffe/examples/sdet/SDet/test_random.txt', 'r' ) as T, open('/media/X/DATA/SDet/results/testResults.txt','w') as testResultsFile:
readImgCounter = 0
runningCorrect = 0
runningAcc = 0.0
#testResultsFile.write('filename'+' '+'prediction'+' '+'GT')
lines = T.readlines()
for i,l in enumerate(lines):
sp = l.split(' ')
video = sp[0].split('_')[0]
impath = '/home/caffe/data/Ch/images/'+video+'/'+sp[0] +'.jpg'
img = cv2.imread(impath)
resized_img = resize_image(img, (255,255))
oversampledImages = oversample([resized_img], (224,224)) #5 crops x 2 mirror flips = return 10 images
transposed_img = np.zeros( (10, 3, 224, 224), dtype='f4' )
tp = np.zeros( (1, 3, 224, 224), dtype='f4' )
predictedLabels = []
for j in range(0,oversampledImages.shape[0]-1):
transposed_img[j] = oversampledImages[j].transpose((2,0,1))
tp[0] = transposed_img[j]
net.blobs['data'].data[0] = tp
pred = net.forward(data=tp)
predictedLabels.append(pred['prob'].argmax())
print(predictedLabels)
prediction,num_most_common = Cnt(predictedLabels).most_common(1)[0]
print(prediction)
readImgCounter = readImgCounter + 1
if (prediction == int(sp[1])):
runningCorrect = runningCorrect + 1
runningAcc = runningCorrect / readImgCounter
print('runningAcc:')
print(runningAcc)
print('-----------')
print('runningCorrect:')
print(runningCorrect)
print('-----------')
print('totalImgRead:')
print(readImgCounter)
print('-----------')
testResultsFile.write(sp[0]+' '+str(prediction)+' '+sp[1])
testResultsFile.write('\n')
我最终解决了这个问题。我不是 100% 确定什么有效,但很可能是在学习时将偏差更改为 0。