图像中的数字识别器

Number recognizer in Image

您好,我是 Swift 的新手,仍在学习新事物。我想要构建一个文本识别器来过滤图像中的所有数字并对它们进行排序。例如,当我有一张包含多个号码的信用卡时。我如何识别特定字段(IBAN、BIC 等)。最后,用户界面中的这个字段应该自动填充卡片输入图像。 我的第一个方法是构建主要文本识别器。没问题。但我不知道,如何继续对为图像捕获的所有数据进行排序。有小费吗?谢谢


struct ScanDocumentView: UIViewControllerRepresentable {

    @Environment(\.presentationMode) var presentationMode
    @Binding var recognizedText: String
    @Binding var recognizedImage: Image
    
    
    func makeCoordinator() -> Coordinator {
        Coordinator(recognizedText: $recognizedText, recognizedImage: $recognizedImage, parent: self)
    }
    
    func makeUIViewController(context: Context) -> VNDocumentCameraViewController {
        let documentViewController = VNDocumentCameraViewController()
        documentViewController.delegate = context.coordinator
        return documentViewController
    }
    
    func updateUIViewController(_ uiViewController: VNDocumentCameraViewController, context: Context) {
        // nothing to do here
    }
    
    class Coordinator: NSObject, VNDocumentCameraViewControllerDelegate {
        var recognizedText: Binding<String>
        var recognizedImage: Binding<Image>
        var parent: ScanDocumentView
        
        init(recognizedText: Binding<String>, recognizedImage: Binding<Image>,parent: ScanDocumentView) {
            self.recognizedText = recognizedText
            self.recognizedImage = recognizedImage
            self.parent = parent
        }
        
        func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFinishWith scan: VNDocumentCameraScan) {
            let extractedImages = extractImages(from: scan)
            let processedText = recognizeText(from: extractedImages)
            recognizedText.wrappedValue = processedText
            let originalImage = scan.imageOfPage(at: 0)
            let newImage = compressedImage(originalImage)
            processImage(newImage)
            parent.presentationMode.wrappedValue.dismiss()
        }
        
        fileprivate func extractImages(from scan: VNDocumentCameraScan) -> [CGImage] {
            var extractedImages = [CGImage]()
            for index in 0..<scan.pageCount {
                let extractedImage = scan.imageOfPage(at: index)
                guard let cgImage = extractedImage.cgImage else { continue }
                
                extractedImages.append(cgImage)
            }
            return extractedImages
        }
        private func processImage(_ uiImage: UIImage) {
            recognizedImage.wrappedValue = Image(uiImage: uiImage)
        }
        
        func compressedImage(_ originalImage: UIImage) -> UIImage {
            guard let imageData = originalImage.jpegData(compressionQuality: 1),
                let reloadedImage = UIImage(data: imageData) else {
                    return originalImage
            }
            return reloadedImage
        }
        
        fileprivate func recognizeText(from images: [CGImage]) -> String {
            var entireRecognizedText = ""
            let recognizeTextRequest = VNRecognizeTextRequest { (request, error) in
                guard error == nil else { return }
                
                guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
                
                let maximumRecognitionCandidates = 1
                for observation in observations {
                    guard let candidate = observation.topCandidates(maximumRecognitionCandidates).first else { continue }
                    
                    entireRecognizedText += "\(candidate.string)\n"
                    
                }
            }
            recognizeTextRequest.recognitionLevel = .accurate
            
            for image in images {
                let requestHandler = VNImageRequestHandler(cgImage: image, options: [:])
                
                try? requestHandler.perform([recognizeTextRequest])
            }
            
            return entireRecognizedText

您将在 Binding var recognize Text Request 中获得所有识别文本。然后你可以用正则表达式解析它或者在里面搜索任何类型的文本。