如何让这个 CNN 检测 RGB 图像?
How do I make this CNN detect RGB images?
我有一个 CNN 模型,我是按照教程 (https://www.youtube.com/watch?v=NMd7WjZiCzc) 制作的。该模型用于检测猫和狗的图像。
该模型只检测黑白图像。我想知道如何更改代码以便它可以接收 RGB 图像。
import cv2
import numpy as np
from random import shuffle
from tqdm import tqdm
import os
TRAIN_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\train'
TEST_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\test'
IMG_SIZE=50
LR=1e-3
MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR,'2conv-basic')
def Label_img(img):
label = img.split('.')[-3]
if label == 'cat':
return [1,0]
elif label == 'dog':
return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = Label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.resize(cv2.imread(path,cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy',training_data)
return training_data
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
img= cv2.resize(cv2.imread(path,cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE))
testing_data.append([np.array(img),img_num])
np.save('testing_data.npy',testing_data)
return testing_data
#train_data = create_train_data()
#if U already have train data then:
train_data = np.load('train_data.npy',allow_pickle=True)
print('data has been loaded')
import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf
tf.reset_default_graph()
convnet = input_data(shape=[None,IMG_SIZE,IMG_SIZE,1],name='input')
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = fully_connected(convnet,1024,activation='relu')
convnet = dropout(convnet,0.8)
convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet,
optimizer='adam',
learning_rate= LR,
loss='categorical_crossentropy',
name='targets')
model = tflearn.DNN(convnet)
if os.path.exists('{}.meta'.format(MODEL_NAME)):
model.load(MODEL_NAME)
print('model has been loaded')
#train = train_data[:-500]
#test = train_data[-500:]
#
#X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
#Y = np.array([i[1] for i in train])
#
#test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
#test_y = np.array([i[1] for i in test])
#
#model.fit({'input':X},{'targets':Y},
# n_epoch=20,validation_set=({'input':test_x},{'targets':test_y}),
# snapshot_step=500,show_metric=True,run_id=MODEL_NAME)
##
#model.save(MODEL_NAME)
import matplotlib.pyplot as plt
#test_data = process_test_data()
#if u already have the data
test_data = np.load('testing_data.npy',allow_pickle=True)
fig = plt.figure()
for num,data in enumerate(test_data[:12]):
img_num = data[1]
img_data = data[0]
y = fig.add_subplot(3,4,num+1)
orig = img_data
data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
model_out = model.predict([data])[0]
if np.argmax(model_out) ==1: str_label = 'Dog'
else: str_label = 'Cat'
y.imshow(orig,cmap='gray')
y.set_title(str_label)
y.axes.get_xaxis().set_visible(False)
y.axes.get_yaxis().set_visible(False)
plt.show()
我希望它在 RGB 图像上进行训练并能够在 RGB 图像上进行测试。
你只需要将输入层的深度设置为 3(红、绿、蓝)。
conv_2d(convnet,32,2,activation='relu') -> conv_2d(convnet,32,3,activation='relu')
灰度图像有 1 个通道,RGB 图像有 3 个通道。因此,为了处理 RGB 图像,您需要在代码中执行以下更改:
- 以彩色图像代替灰度图像作为输入
- 将 input_data 形状从 1 通道更改为 3 通道
- 将训练和测试数据形状从 1 个通道更改为 3 个通道
- 将 img_data 形状从 1 通道更改为 3 通道
更新代码:
import cv2
import numpy as np
from random import shuffle
from tqdm import tqdm
import os
TRAIN_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\train'
TEST_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\test'
IMG_SIZE=50
LR=1e-3
MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR,'2conv-basic')
def Label_img(img):
label = img.split('.')[-3]
if label == 'cat':
return [1,0]
elif label == 'dog':
return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = Label_img(img)
path = os.path.join(TRAIN_DIR,img)
# 1) Taking color image as input and resizing it
img = cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE), interpolation = cv2.INTER_AREA)
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy',training_data)
return training_data
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
# 1) Taking color image as input and resizing it
img = cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE), interpolation = cv2.INTER_AREA)
testing_data.append([np.array(img),img_num])
np.save('testing_data.npy',testing_data)
return testing_data
#Since create_train_data() is modified this function needs to be called
train_data = create_train_data()
train_data = np.load('train_data.npy',allow_pickle=True)
print('data has been loaded')
import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf
tf.reset_default_graph()
# 2) Changing input shape from 1 channel to 3 channel
convnet = input_data(shape=[None,IMG_SIZE,IMG_SIZE,3],name='input')
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = fully_connected(convnet,1024,activation='relu')
convnet = dropout(convnet,0.8)
convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet,
optimizer='adam',
learning_rate= LR,
loss='categorical_crossentropy',
name='targets')
model = tflearn.DNN(convnet)
# if os.path.exists('{}.meta'.format(MODEL_NAME)):
# model.load(MODEL_NAME)
# print('model has been loaded')
train = train_data[:-500]
test = train_data[-500:]
# 3) Changing training data shape from 1 channel to 3 channel
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
Y = np.array([i[1] for i in train])
# 3) Changing testing data shape from 1 channel to 3 channel
test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array([i[1] for i in test])
model.fit({'input':X},{'targets':Y},
n_epoch=20,validation_set=({'input':test_x},{'targets':test_y}),
snapshot_step=500,show_metric=True,run_id=MODEL_NAME)
model.save(MODEL_NAME)
import matplotlib.pyplot as plt
#Since process_test_data() is modified this function needs to be called
test_data = process_test_data()
test_data = np.load('testing_data.npy',allow_pickle=True)
fig = plt.figure()
for num,data in enumerate(test_data[:12]):
img_num = data[1]
img_data = data[0]
y = fig.add_subplot(3,4,num+1)
orig = img_data
# 4) Chaging img_data shape from 1 channel to 3 channel
data = img_data.reshape(IMG_SIZE,IMG_SIZE,3)
model_out = model.predict([data])[0]
if np.argmax(model_out) ==1: str_label = 'Dog'
else: str_label = 'Cat'
y.imshow(orig,cmap='gray')
y.set_title(str_label)
y.axes.get_xaxis().set_visible(False)
y.axes.get_yaxis().set_visible(False)
plt.show()
我有一个 CNN 模型,我是按照教程 (https://www.youtube.com/watch?v=NMd7WjZiCzc) 制作的。该模型用于检测猫和狗的图像。
该模型只检测黑白图像。我想知道如何更改代码以便它可以接收 RGB 图像。
import cv2
import numpy as np
from random import shuffle
from tqdm import tqdm
import os
TRAIN_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\train'
TEST_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\test'
IMG_SIZE=50
LR=1e-3
MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR,'2conv-basic')
def Label_img(img):
label = img.split('.')[-3]
if label == 'cat':
return [1,0]
elif label == 'dog':
return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = Label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.resize(cv2.imread(path,cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy',training_data)
return training_data
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
img= cv2.resize(cv2.imread(path,cv2.IMREAD_GRAYSCALE),(IMG_SIZE,IMG_SIZE))
testing_data.append([np.array(img),img_num])
np.save('testing_data.npy',testing_data)
return testing_data
#train_data = create_train_data()
#if U already have train data then:
train_data = np.load('train_data.npy',allow_pickle=True)
print('data has been loaded')
import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf
tf.reset_default_graph()
convnet = input_data(shape=[None,IMG_SIZE,IMG_SIZE,1],name='input')
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = fully_connected(convnet,1024,activation='relu')
convnet = dropout(convnet,0.8)
convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet,
optimizer='adam',
learning_rate= LR,
loss='categorical_crossentropy',
name='targets')
model = tflearn.DNN(convnet)
if os.path.exists('{}.meta'.format(MODEL_NAME)):
model.load(MODEL_NAME)
print('model has been loaded')
#train = train_data[:-500]
#test = train_data[-500:]
#
#X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
#Y = np.array([i[1] for i in train])
#
#test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
#test_y = np.array([i[1] for i in test])
#
#model.fit({'input':X},{'targets':Y},
# n_epoch=20,validation_set=({'input':test_x},{'targets':test_y}),
# snapshot_step=500,show_metric=True,run_id=MODEL_NAME)
##
#model.save(MODEL_NAME)
import matplotlib.pyplot as plt
#test_data = process_test_data()
#if u already have the data
test_data = np.load('testing_data.npy',allow_pickle=True)
fig = plt.figure()
for num,data in enumerate(test_data[:12]):
img_num = data[1]
img_data = data[0]
y = fig.add_subplot(3,4,num+1)
orig = img_data
data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
model_out = model.predict([data])[0]
if np.argmax(model_out) ==1: str_label = 'Dog'
else: str_label = 'Cat'
y.imshow(orig,cmap='gray')
y.set_title(str_label)
y.axes.get_xaxis().set_visible(False)
y.axes.get_yaxis().set_visible(False)
plt.show()
我希望它在 RGB 图像上进行训练并能够在 RGB 图像上进行测试。
你只需要将输入层的深度设置为 3(红、绿、蓝)。
conv_2d(convnet,32,2,activation='relu') -> conv_2d(convnet,32,3,activation='relu')
灰度图像有 1 个通道,RGB 图像有 3 个通道。因此,为了处理 RGB 图像,您需要在代码中执行以下更改:
- 以彩色图像代替灰度图像作为输入
- 将 input_data 形状从 1 通道更改为 3 通道
- 将训练和测试数据形状从 1 个通道更改为 3 个通道
- 将 img_data 形状从 1 通道更改为 3 通道
更新代码:
import cv2
import numpy as np
from random import shuffle
from tqdm import tqdm
import os
TRAIN_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\train'
TEST_DIR=r'C:\Users\snehal\Desktop\Ansh stuff\Object detection\test'
IMG_SIZE=50
LR=1e-3
MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR,'2conv-basic')
def Label_img(img):
label = img.split('.')[-3]
if label == 'cat':
return [1,0]
elif label == 'dog':
return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = Label_img(img)
path = os.path.join(TRAIN_DIR,img)
# 1) Taking color image as input and resizing it
img = cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE), interpolation = cv2.INTER_AREA)
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy',training_data)
return training_data
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
# 1) Taking color image as input and resizing it
img = cv2.resize(cv2.imread(path),(IMG_SIZE,IMG_SIZE), interpolation = cv2.INTER_AREA)
testing_data.append([np.array(img),img_num])
np.save('testing_data.npy',testing_data)
return testing_data
#Since create_train_data() is modified this function needs to be called
train_data = create_train_data()
train_data = np.load('train_data.npy',allow_pickle=True)
print('data has been loaded')
import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf
tf.reset_default_graph()
# 2) Changing input shape from 1 channel to 3 channel
convnet = input_data(shape=[None,IMG_SIZE,IMG_SIZE,3],name='input')
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,32,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = conv_2d(convnet,64,2,activation='relu')
convnet = max_pool_2d(convnet,2)
convnet = fully_connected(convnet,1024,activation='relu')
convnet = dropout(convnet,0.8)
convnet = fully_connected(convnet,2,activation='softmax')
convnet = regression(convnet,
optimizer='adam',
learning_rate= LR,
loss='categorical_crossentropy',
name='targets')
model = tflearn.DNN(convnet)
# if os.path.exists('{}.meta'.format(MODEL_NAME)):
# model.load(MODEL_NAME)
# print('model has been loaded')
train = train_data[:-500]
test = train_data[-500:]
# 3) Changing training data shape from 1 channel to 3 channel
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
Y = np.array([i[1] for i in train])
# 3) Changing testing data shape from 1 channel to 3 channel
test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array([i[1] for i in test])
model.fit({'input':X},{'targets':Y},
n_epoch=20,validation_set=({'input':test_x},{'targets':test_y}),
snapshot_step=500,show_metric=True,run_id=MODEL_NAME)
model.save(MODEL_NAME)
import matplotlib.pyplot as plt
#Since process_test_data() is modified this function needs to be called
test_data = process_test_data()
test_data = np.load('testing_data.npy',allow_pickle=True)
fig = plt.figure()
for num,data in enumerate(test_data[:12]):
img_num = data[1]
img_data = data[0]
y = fig.add_subplot(3,4,num+1)
orig = img_data
# 4) Chaging img_data shape from 1 channel to 3 channel
data = img_data.reshape(IMG_SIZE,IMG_SIZE,3)
model_out = model.predict([data])[0]
if np.argmax(model_out) ==1: str_label = 'Dog'
else: str_label = 'Cat'
y.imshow(orig,cmap='gray')
y.set_title(str_label)
y.axes.get_xaxis().set_visible(False)
y.axes.get_yaxis().set_visible(False)
plt.show()