深度学习 CNN 图像预处理

Question

这是我用来加载和预处理图像的几行代码

# Loading the images and their labels
# Lists to load data into
x = [] # images
y = [] # labels

# Path to folder with training images
base = "/content/flower_tpu/flower_tpu/flowers_google/flowers_google//"


# Iterating to store images and labels in their respective lists
for idx in range(len(df_flowers)):
  # get the flower row
  flower = df_flowers.iloc[idx]
  # create flower path
  path = f"{base}{flower.id}.jpeg"
  #load image
  img = Image.open(path)
  # convert to numpy
  img = np.array(img)
  # Remove noise using Gaussian Blur
  blur = cv2.GaussianBlur(img, (5, 5), 0)
  # Segmentation
  gray = cv2.cvtColor(blur, cv2.COLOR_RGB2GRAY)
  ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
  # Further noise removal (Morphology)
  kernel = np.ones((3, 3), np.uint8)
  opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
  # sure background area
  sure_bg = cv2.dilate(opening, kernel, iterations=3)
  # Finding sure foreground area
  dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
  ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
  # Finding unknown region
  sure_fg = np.uint8(sure_fg)
  unknown = cv2.subtract(sure_bg, sure_fg)
  # Marker labelling
  ret, markers = cv2.connectedComponents(sure_fg)
  # Add one to all labels so that sure background is not 0, but 1
  markers = markers + 1
  # Now, mark the region of unknown with zero
  markers[unknown == 255] = 0
  markers = cv2.watershed(img, markers)
  img[markers == -1] = [255, 0, 0]
  #save to X
  x.append(markers)
  # get label
  label = df_labels[df_labels['flower_class'] == flower.flower_cls].label.values[0]
  # save to y
  y.append(label)

代码有效，但它将图像的形状从 (224,224,3) 更改为 (224,224)

因此，当我尝试使用 VGG16 模型训练此模型时，出现此错误：

Input 0 of layer block1_conv1 is incompatible with the layer: expected ndim=4, found ndim=3. Full shape received: [None, 224, 224]

我该如何解决这个问题？

Answer 1

from PIL import Image
import numpy as np
x = []
path = "data/25_12024_010.jpg"
#load image
img = Image.open(path)
# convert to numpy
img = np.array(img)
# Remove noise using Gaussian Blur
blur = cv2.GaussianBlur(img, (5, 5), 0)
# Segmentation
gray = cv2.cvtColor(blur, cv2.COLOR_RGB2GRAY)
ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Further noise removal (Morphology)
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
# sure background area
sure_bg = cv2.dilate(opening, kernel, iterations=3)
# Finding sure foreground area
dist_transform = cv2.distanceTransform(opening, cv2.DIST_L2, 5)
ret, sure_fg = cv2.threshold(dist_transform, 0.7 * dist_transform.max(), 255, 0)
# Finding unknown region
sure_fg = np.uint8(sure_fg)
unknown = cv2.subtract(sure_bg, sure_fg)
# Marker labelling
ret, markers = cv2.connectedComponents(sure_fg)
# Add one to all labels so that sure background is not 0, but 1
markers = markers + 1
# Now, mark the region of unknown with zero
markers[unknown == 255] = 0
markers = cv2.watershed(img, markers)
img[markers == -1] = [255, 0, 0]
#save to X
x.append(markers)

print(x[0].shape) # (120,120)

markers = np.stack((markers,)*3, axis=-1)

x.append(markers)

print(x[1].shape) # (120,120,3)

刚刚测试了您的代码，markers 为您提供了一个二维数组，因此您只需将其转换为一个三维数组（3 通道图像）即可。

只需在 x.append(markers)

之前添加以下行

markers = np.stack((markers,)*3, axis=-1)

深度学习 CNN 图像预处理

Deep Learning CNN image preprocessing

python

machine-learning

image-processing

image-recognition

vgg-net