交通标志识别 iOS CoreML - 标签未显示 class

Question

我正在按照本教程构建一个用于交通标志识别的简单深度学习应用程序。 link

我做了一个自己的模型，我也用这个库中的模型试过了：link

当我在 iPhone 上运行来自 xcode 的应用程序时，我可以看到相机的图片，但文本总是显示“标签”，无论是什么屏幕上。我从教程中唯一修改的是我在转换为 mlmodel:

之前对类进行了硬编码

    # import necessary packages
from keras.models import load_model
import coremltools
import argparse
import pickle
# construct the argument parser and parse the arguments

# load the class labels
print("[INFO] loading class labels from label binarizer")
# lb = pickle.loads(open(args["labelbin"], "rb").read())
# class_labels = lb.classes_.tolist()
class_labels = list(range(1, 43))
print("[INFO] class labels: {}".format(class_labels))
# load the trained convolutional neural network
print("[INFO] loading model...")
model = load_model('my_model.h5')
# convert the model to coreml format
print("[INFO] converting model")
coreml_model = coremltools.converters.keras.convert(model,
    input_names="image",
    image_input_names="image",
    image_scale=1/255.0,
    class_labels=class_labels,
    is_bgr=True)
# save the model to disk
output = "mymodel.mlmodel"
print("[INFO] saving model as {}".format(output))
coreml_model.save(output)

所以我没有使用 laber 二值化器，而是告诉转换器我的模型中有 43 类。

这是我的 AppDelegate.swift:

    //
//  AppDelegate.swift
//  trafficsign
//
//  Created by administrator on 2020. 11. 11..
//  Copyright © 2020. administrator. All rights reserved.
//

import UIKit

@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {

    var window: UIWindow?

    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
        // Override point for customization after application launch.
        // Override point for customization after application launch.
        
        window = UIWindow()
        window?.makeKeyAndVisible()
        let vc = ViewController()
        
        window?.rootViewController = vc
        return true
    }

    


}

我的SceneDelegate.swift:

    //
//  SceneDelegate.swift
//  trafficsign
//
//  Created by administrator on 2020. 11. 11..
//  Copyright © 2020. administrator. All rights reserved.
//

import UIKit

class SceneDelegate: UIResponder, UIWindowSceneDelegate {

    var window: UIWindow?


    func scene(_ scene: UIScene, willConnectTo session: UISceneSession, options connectionOptions: UIScene.ConnectionOptions) {
        // Use this method to optionally configure and attach the UIWindow `window` to the provided UIWindowScene `scene`.
        // If using a storyboard, the `window` property will automatically be initialized and attached to the scene.
        // This delegate does not imply the connecting scene or session are new (see `application:configurationForConnectingSceneSession` instead).
        guard let windowScene = (scene as? UIWindowScene) else { return }
        window = UIWindow(windowScene: windowScene)
        window?.rootViewController = ViewController()
        window?.makeKeyAndVisible()
    }

    func sceneDidDisconnect(_ scene: UIScene) {
        // Called as the scene is being released by the system.
        // This occurs shortly after the scene enters the background, or when its session is discarded.
        // Release any resources associated with this scene that can be re-created the next time the scene connects.
        // The scene may re-connect later, as its session was not neccessarily discarded (see `application:didDiscardSceneSessions` instead).
    }

    func sceneDidBecomeActive(_ scene: UIScene) {
        // Called when the scene has moved from an inactive state to an active state.
        // Use this method to restart any tasks that were paused (or not yet started) when the scene was inactive.
    }

    func sceneWillResignActive(_ scene: UIScene) {
        // Called when the scene will move from an active state to an inactive state.
        // This may occur due to temporary interruptions (ex. an incoming phone call).
    }

    func sceneWillEnterForeground(_ scene: UIScene) {
        // Called as the scene transitions from the background to the foreground.
        // Use this method to undo the changes made on entering the background.
    }

    func sceneDidEnterBackground(_ scene: UIScene) {
        // Called as the scene transitions from the foreground to the background.
        // Use this method to save data, release shared resources, and store enough scene-specific state information
        // to restore the scene back to its current state.
    }


}

最重要的是我的 SceneDelegate.swift:

    //
//  ViewController.swift
//  trafficsign
//
//  Created by administrator on 2020. 11. 11..
//  Copyright © 2020. administrator. All rights reserved.
//

import UIKit
import AVFoundation
import Vision

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    let label: UILabel = {
        let label = UILabel()
        label.textColor = .white
        label.translatesAutoresizingMaskIntoConstraints = false
        label.text = "Label"
        label.font = label.font.withSize(30)
        return label
    }()
    
    override func viewDidLoad() {
        super.viewDidLoad()
        
        setupCaptureSession()
        
        view.addSubview(label)
        setupLabel()
    }
    
    override func didReceiveMemoryWarning() {
        // call the parent function
        super.didReceiveMemoryWarning()
        
        // Dispose of any resources that can be recreated.
    }
    
    func setupCaptureSession() {
        // create a new capture session
        let captureSession = AVCaptureSession()
        
        // find the available cameras
        let availableDevices = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .back).devices
        
        do {
            // select a camera
            if let captureDevice = availableDevices.first {
                captureSession.addInput(try AVCaptureDeviceInput(device: captureDevice))
            }
        } catch {
            // print an error if the camera is not available
            print(error.localizedDescription)
        }
        
        // setup the video output to the screen and add output to our capture session
        let captureOutput = AVCaptureVideoDataOutput()
        captureSession.addOutput(captureOutput)
        let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
        previewLayer.frame = view.frame
        view.layer.addSublayer(previewLayer)
        
        // buffer the video and start the capture session
        captureOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoQueue"))
        captureSession.startRunning()
        
//        // creates a new capture session
//        let captureSession = AVCaptureSession()
//
//        // search for available capture devices
//        let availableDevices = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .back).devices
//
//        // get capture device, add device input to capture session
//        do {
//            if let captureDevice = availableDevices.first {
//                captureSession.addInput(try AVCaptureDeviceInput(device: captureDevice))
//            }
//        } catch {
//            print(error.localizedDescription)
//        }
//
//        // setup output, add output to capture session
//        let captureOutput = AVCaptureVideoDataOutput()
//        captureSession.addOutput(captureOutput)
//
//        captureOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "videoQueue"))
//
//        let previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
//        previewLayer.frame = view.frame
//        previewLayer.videoGravity = .resizeAspectFill
//        view.layer.addSublayer(previewLayer)
//
//        captureSession.startRunning()
    }
    
    // called everytime a frame is captured
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        // load our CoreML Pokedex model
        guard let model = try? VNCoreMLModel(for: model_squeezeNet_TSR().model) else { return }
        // run an inference with CoreML
        let request = VNCoreMLRequest(model: model) { (finishedRequest, error) in
            // grab the inference results
            guard let results = finishedRequest.results as? [VNClassificationObservation] else { return }
            
            // grab the highest confidence result
            guard let Observation = results.first else { return }
            
            // create the label text components
            let predclass = "\(Observation.identifier)"
            let predconfidence = String(format: "%.02f%", Observation.confidence * 100)
            // set the label text
            DispatchQueue.main.async(execute: {
                self.label.text = "\(predclass) \(predconfidence)"
            })
        }
        
        // create a Core Video pixel buffer which is an image buffer that holds pixels in main memory
        // Applications generating frames, compressing or decompressing video, or using Core Image
        // can all make use of Core Video pixel buffers
        guard let pixelBuffer: CVPixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
        
        // execute the request
        try? VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]).perform([request])
        
//        guard let model = try? VNCoreMLModel(for: model_squeezeNet_TSR().model) else { return }
//        let request = VNCoreMLRequest(model: model) { (finishedRequest, error) in
//            guard let results = finishedRequest.results as? [VNClassificationObservation] else { return }
//            guard let Observation = results.first else { return }
//
//            DispatchQueue.main.async(execute: {
//                self.label.text = "\(Observation.identifier)"
//                print(Observation.confidence)
//            })
//        }
//        guard let pixelBuffer: CVPixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
//        // executes request
//        try? VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]).perform([request])
    }
    
    func setupLabel() {
        label.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
        label.bottomAnchor.constraint(equalTo: view.bottomAnchor, constant: -50).isActive = true
    }
}

Answer 1

我不知道这是否解决了问题，但在您的转换脚本中尝试以下操作：

class_labels = list(range(1, 43))
class_labels = [str(x) for x in class_labels]   # add this line

目前您的 class 标签是整数。这可能会在某些时候混淆 Core ML 或 Vision。

Answer 2

所以我在我的模型生成器脚本中添加了一个标签二值化器。我使用了原始的 coremlconverter 脚本，它从标签二值化器中获取标签。当我运行它时，它还列出了 42 个标签，所以它得到了它。但是当我运行应用程序时，它仍然显示不一致的东西。不是“标签”文本，而是随机值，无论相机上有什么。这是一个视频：link

这是我创建 python 脚本的模型（那些匈牙利语的注释并不重要，对于了解 python 的人来说很明显）：

#!/usr/bin/env python
# coding: utf-8

# In[1]:


pip install tensorflow==1.15.0 keras==2.2.4 scikit-learn==0.19.2 matplotlib pandas pillow opencv-python


# In[9]:


import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import cv2
import pickle
#import tensorflow as tf
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from sklearn.preprocessing import LabelBinarizer

data = []
labels = []
classes = 43
cur_path = os.getcwd()

#Képek és címkék betöltése
for i in range(classes):
    path = os.path.join(cur_path,'train',str(i))
    images = os.listdir(path)

    for a in images:
        try:
            image = Image.open(path + '\'+ a)
            image = image.resize((30,30))
            image = np.array(image)
            #sim = Image.fromarray(image)
            data.append(image)
            labels.append(i)
        except:
            print("Error loading image")

#Listák numpy tömbökbe konvertálása
data = np.array(data)
labels = np.array(labels)
#these two lines are commented when the script is used without label binarizer
lb = LabelBinarizer()
labels = lb.fit_transform(labels)


# In[10]:


print(data.shape, labels.shape)
#Az adathalmaz szétválasztása teszt és edző adathalmazokra
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
#Címkék konvertálása one-hot kódolásúra
#the next two lines are uncommented, when the script is used without label binarizer
#y_train = to_categorical(y_train, 43)
#y_test = to_categorical(y_test, 43)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


# In[11]:


#A modell kialakítása
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=X_train.shape[1:]))
model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(rate=0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(43, activation='softmax'))

#A modell összeállítása
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


# In[12]:


epochs = 15
history = model.fit(X_train, y_train, batch_size=32, epochs=epochs, validation_data=(X_test, y_test))
model.save("my_model.h5")


# In[5]:


# save the label binarizer to disk
import pickle
print("[INFO] serializing label binarizer...")
f = open("labelbin", "wb")
f.write(pickle.dumps(lb))
f.close()


# In[13]:


#Pontosság ábrázolása grafikonon
plt.figure(0)
plt.plot(history.history['acc'], label='training accuracy')
plt.plot(history.history['val_acc'], label='val accuracy')
plt.title('Accuracy')
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()

plt.figure(1)
plt.plot(history.history['loss'], label='training loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.show()

# In[8]:


#Pontosság tesztelése a tényleges tesztadathalmazon
from sklearn.metrics import accuracy_score

print(os.path.abspath(path))

y_test = pd.read_csv('Test.csv')

labels = y_test["ClassId"].values
imgs = y_test["Path"].values

data=[]

for img in imgs:
    image = Image.open(img)
    image = image.resize((30,30))
    data.append(np.array(image))

X_test=np.array(data)

pred = model.predict_classes(X_test)

#Pontosság teszt adatokkal
from sklearn.metrics import accuracy_score
print(accuracy_score(labels, pred))


# In[13]:



import os
import time
from sklearn.metrics import accuracy_score

cur_path = os.getcwd()
path = os.path.join(cur_path)
print(os.path.abspath(path))

y_test = pd.read_csv('Test.csv')

labels = y_test["ClassId"].values
imgs = y_test["Path"].values

data=[]
icount=0
timesum=0
timecur=0
#Átlagos futásidő kiszámítása
for img in imgs:
    start_time = time.time()
    image = Image.open(img)
    image = image.resize((30,30))
    image = np.expand_dims(image, axis=0)
    image = np.array(image)
    pred = model.predict_classes([image])[0]
    timecur = (time.time() - start_time)
    print("--- %s seconds ---" % (timecur))
    timesum = timesum + timecur
    icount = icount+1

for img in imgs:
    image = Image.open(img)
    image = image.resize((30,30))
    data.append(np.array(image))

X_test=np.array(data)
print("Átlag: ", (timesum/icount))

pred = model.predict_classes(X_test)

from sklearn.metrics import accuracy_score
print(accuracy_score(labels, pred))


# In[7]:


import tkinter as tk
import time
from tkinter import filedialog
from tkinter import *

from PIL import ImageTk, Image
import numpy
#Az edzett modell betöltése
from keras.models import load_model
model = load_model('my_model.h5')
#Az osztályok nevének listája a kiiratáshoz
classes = { 1:'Speed limit (20km/h)',
            2:'Speed limit (30km/h)', 
            3:'Speed limit (50km/h)', 
            4:'Speed limit (60km/h)', 
            5:'Speed limit (70km/h)', 
            6:'Speed limit (80km/h)', 
            7:'End of speed limit (80km/h)', 
            8:'Speed limit (100km/h)', 
            9:'Speed limit (120km/h)', 
            10:'No passing', 
            11:'No passing veh over 3.5 tons', 
            12:'Right-of-way at intersection', 
            13:'Priority road', 
            14:'Yield', 
            15:'Stop', 
            16:'No vehicles', 
            17:'Veh > 3.5 tons prohibited', 
            18:'No entry', 
            19:'General caution', 
            20:'Dangerous curve left', 
            21:'Dangerous curve right', 
            22:'Double curve', 
            23:'Bumpy road', 
            24:'Slippery road', 
            25:'Road narrows on the right', 
            26:'Road work', 
            27:'Traffic signals', 
            28:'Pedestrians', 
            29:'Children crossing', 
            30:'Bicycles crossing', 
            31:'Beware of ice/snow',
            32:'Wild animals crossing', 
            33:'End speed + passing limits', 
            34:'Turn right ahead', 
            35:'Turn left ahead', 
            36:'Ahead only', 
            37:'Go straight or right', 
            38:'Go straight or left', 
            39:'Keep right', 
            40:'Keep left', 
            41:'Roundabout mandatory', 
            42:'End of no passing', 
            43:'End no passing veh > 3.5 tons' }
#GUI betöltése
top=tk.Tk()
top.geometry('800x600')
top.title('Traffic sign classification')
top.configure(background='#CDCDCD')
label=Label(top,background='#CDCDCD', font=('arial',15,'bold'))
sign_image = Label(top)
def classify(file_path):
    start_time = time.time()
    global label_packed
    image = Image.open(file_path)
    image = image.resize((30,30))
    image = numpy.expand_dims(image, axis=0)
    image = numpy.array(image)
    pred = model.predict_classes([image])[0]
    sign = classes[pred+1]
    print("--- %s seconds ---" % (time.time() - start_time))
    print(sign)
    label.configure(foreground='#011638', text=sign) 
def show_classify_button(file_path):
    classify_b=Button(top,text="Classify Image",command=lambda: classify(file_path),padx=10,pady=5)
    classify_b.configure(background='#364156', foreground='white',font=('arial',10,'bold'))
    classify_b.place(relx=0.79,rely=0.46)
def upload_image():
    try:
        file_path=filedialog.askopenfilename()
        uploaded=Image.open(file_path)
        uploaded.thumbnail(((top.winfo_width()/2.25),(top.winfo_height()/2.25)))
        im=ImageTk.PhotoImage(uploaded)
        sign_image.configure(image=im)
        sign_image.image=im
        label.configure(text='')
        show_classify_button(file_path)
    except:
        pass
upload=Button(top,text="Upload an image",command=upload_image,padx=10,pady=5)
upload.configure(background='#364156', foreground='white',font=('arial',10,'bold'))
upload.pack(side=BOTTOM,pady=50)
sign_image.pack(side=BOTTOM,expand=True)
label.pack(side=BOTTOM,expand=True)
heading = Label(top, text="Know Your Traffic Sign",pady=20, font=('arial',20,'bold'))
heading.configure(background='#CDCDCD',foreground='#364156')
heading.pack()
top.mainloop()



# In[ ]:

交通标志识别 iOS CoreML - 标签未显示 class

Traffic sign recognition iOS CoreML - label not showing class

python

ios

deep-learning

swift

coreml