Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray) 这与我看到的答案不同

Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray) which is different from the answers I've seen

我正在学习创建自定义对象检测模型来识别徽标的教程,但我已经停滞了一段时间。所以我已经尝试了我在堆栈溢出上看到的所有解决方案来解决这个问题,但不幸的是它没有奏效。我目前遇到的问题是我的 np 数组是一个形状为 (145,) 的一维数组。我试过用它来修复它的结构,但对我来说没用。我试过了:

train_images=np.array(train_images,dtype=object)

train_images=np.asarray(train_images)

和其他几个变体

图书馆:

import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
import pathlib
import pandas as pd
from PIL import IcnsImagePlugin
from PIL.ImageDraw import Draw
import glob
import pandas as pd
import xml.etree.ElementTree as ET
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image 
from sklearn.model_selection import train_test_split
from skimage.segmentation import mark_boundaries 

将我的 xml 文件 [边界框图像] 转换为 2 个 csv 文件。一张用于训练图像,一张用于验证图像。使用 Labelimg 创建边界框:

SKIP_NEGATIVES = True
NEGATIVE_CLASS = "no_logo"


def xml_to_csv(path, skipNegatives):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        #print("XML_FILE is: "+xml_file)
        tree = ET.parse(xml_file)
        root = tree.getroot()
        if root.find('object'):           
            for member in root.findall('object'):
                bbx = member.find('bndbox')                
                xmin = round(float(bbx.find('xmin').text))
                ymin = round(float(bbx.find('ymin').text))
                xmax = round(float(bbx.find('xmax').text))
                ymax = round(float(bbx.find('ymax').text))
                label = member.find('name').text
                value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        label,
                        xmin,
                        ymin,
                        xmax,
                        ymax
                        )
                print(value)
                
                if(value[1]>0 and value[2]>0):
                  xml_list.append(value)
                  print("Value appended",end=" ")
                  print(value)


        elif not skipNegatives:
            value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        NEGATIVE_CLASS,
                        0,
                        0,
                        0,
                        0
                        )
            print("Printing Value")
            print(value)

            if(value[1]>0 and value[2]>0):
              xml_list.append(value)
              print("Value appended",end=" ")
              print(value)
            else:
              print("VALUE NOT APPENDED")

    column_name = ['filename', 'width', 'height',
                   'class', 'xmin', 'ymin', 'xmax', 'ymax']

    print("Printing XML_LIST: ")
    print(xml_list)
    xml_df = pd.DataFrame(xml_list, columns=column_name)

    print("Printing xml_df")
    print(xml_df)
    return xml_df


def main():
    datasets = ['/content/drive/MyDrive/Logo_Model/train','/content/drive/MyDrive/Logo_Model/validation']

    for ds in datasets:
        image_path = os.path.join(os.getcwd(), 'Images', ds)
        xml_df = xml_to_csv(image_path, SKIP_NEGATIVES)

        print(xml_df)
        xml_df.to_csv('/{}_data.csv'.format(ds), index=None)
        print('Successfully converted xml to csv.')


main()

这就是问题开始的地方。当 train_img_arr 附加到 train_images 时,我得到的 train_images 形状为 (145,)。验证数据的过程相同。

num_classes = 2
classes = ["logo","no_logo"]

TRAINING_CSV_FILE = '/content/drive/MyDrive/Logo_Model/train/logo_data.csv'
TRAINING_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/train/'
training_image_records = pd.read_csv(TRAINING_CSV_FILE)

train_image_path = os.path.join(os.getcwd(), TRAINING_IMAGE_DIR)

train_images = []
train_targets = []
train_labels = []

for index, row in training_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  train_image_fullpath = os.path.join(train_image_path, filename)
  train_img = tf.keras.preprocessing.image.load_img(train_image_fullpath, target_size=(height, width))
  train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  train_images.append(train_img_arr)
  train_targets.append((xmin, ymin, xmax, ymax))
  train_labels.append(classes.index(class_name))

train_img_arr 的示例输出:

(78, 323, 3)
(180, 235, 3)
(180, 166, 3)
(156, 311, 3)
(180, 342, 3)
(180, 197, 3)
(180, 315, 3)
(180, 297, 3)
(180, 156, 3)
(180, 190, 3)
(180, 325, 3)
(180, 227, 3)
(176, 192, 3)
(180, 235, 3)
(180, 138, 3)
(180, 222, 3)
(180, 213, 3)

转换为 np 数组:

train_images = np.array(train_images)
train_targets = np.array(train_targets)
train_labels = np.array(train_labels)

print(train_images.shape)
print(train_targets.shape)
print(train_labels.shape)

输出:

(145,)
(145, 4)
(145,)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:2: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.

这是我的其余代码

VALIDATION_CSV_FILE = '/content/drive/MyDrive/Logo_Model/validation/logo_data.csv'
VALIDATION_IMAGE_DIR = '/content/drive/MyDrive/Logo_Model/validation/'
validation_image_records = pd.read_csv(VALIDATION_CSV_FILE)

validation_image_path = os.path.join(os.getcwd(), VALIDATION_IMAGE_DIR)

validation_images = []
validation_targets = []
validation_labels = []

for index, row in validation_image_records.iterrows():
    
  (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
  
  validation_image_fullpath = os.path.join(validation_image_path, filename)
  validation_img = tf.keras.preprocessing.image.load_img(validation_image_fullpath, target_size=(height, width))
  validation_img_arr =tf.keras.preprocessing.image.img_to_array(validation_img)
  
  
  xmin = round(xmin/ width, 2)
  ymin = round(ymin/ height, 2)
  xmax = round(xmax/ width, 2)
  ymax = round(ymax/ height, 2)
  
  validation_images.append(validation_img_arr)
  validation_targets.append((xmin, ymin, xmax, ymax))
  validation_labels.append(classes.index(class_name))
validation_images = np.array(validation_images)
validation_targets = np.array(validation_targets)
validation_labels = np.array(validation_labels)

print(validation_images.shape)
print(validation_targets.shape)
print(validation_labels.shape)
input_shape = (height,width,3)
input_layer = tf.keras.layers.Input(input_shape)

#create the base layers
base_layers = tf.keras.layers.experimental.preprocessing.Rescaling(1./255, name='bl_1')(input_layer)
base_layers = tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu', name='bl_2')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_3')(base_layers)
base_layers = tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu', name='bl_4')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_5')(base_layers)
base_layers = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu', name='bl_6')(base_layers)
base_layers = tf.keras.layers.MaxPooling2D(name='bl_7')(base_layers)
base_layers = tf.keras.layers.Flatten(name='bl_8')(base_layers)

#create the classifier branch
classifier_branch = tf.keras.layers.Dense(128, activation='relu', name='cl_1')(base_layers)
classifier_branch = tf.keras.layers.Dense(num_classes, name='cl_head')(classifier_branch)  

#create the localiser branch
locator_branch = tf.keras.layers.Dense(128, activation='relu', name='bb_1')(base_layers)
locator_branch = tf.keras.layers.Dense(64, activation='relu', name='bb_2')(locator_branch)
locator_branch = tf.keras.layers.Dense(32, activation='relu', name='bb_3')(locator_branch)
locator_branch = tf.keras.layers.Dense(4, activation='sigmoid', name='bb_head')(locator_branch)

model = tf.keras.Model(input_layer,outputs=[classifier_branch,locator_branch])

model.summary()
losses ={"cl_head":tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), "bb_head":tf.keras.losses.MSE}
model.compile(loss=losses, optimizer='Adam', metrics=['accuracy'])
trainTargets = {
    "cl_head": train_labels,
    "bb_head": train_targets
}
validationTargets = {
    "cl_head": validation_labels,
    "bb_head": validation_targets
}
history = model.fit(train_images, trainTargets,validation_data=(validation_images, validationTargets),batch_size=4,epochs=20,shuffle=True,verbose=1)

numpy 数组的形状不会显示出来,因为每个图像的形状不固定。

添加tf.image.resize,让图像的形状固定。

train_img_arr = tf.keras.preprocessing.image.img_to_array(train_img)
train_img_arr = tf.image.resize(train_img_arr, (size1, size2))

如果你想让你的输入数据有不同的形状,你可以检查这个link