图像中的数字识别器
Number recognizer in Image
您好,我是 Swift 的新手,仍在学习新事物。我想要构建一个文本识别器来过滤图像中的所有数字并对它们进行排序。例如,当我有一张包含多个号码的信用卡时。我如何识别特定字段(IBAN、BIC 等)。最后,用户界面中的这个字段应该自动填充卡片输入图像。
我的第一个方法是构建主要文本识别器。没问题。但我不知道,如何继续对为图像捕获的所有数据进行排序。有小费吗?谢谢
struct ScanDocumentView: UIViewControllerRepresentable {
@Environment(\.presentationMode) var presentationMode
@Binding var recognizedText: String
@Binding var recognizedImage: Image
func makeCoordinator() -> Coordinator {
Coordinator(recognizedText: $recognizedText, recognizedImage: $recognizedImage, parent: self)
}
func makeUIViewController(context: Context) -> VNDocumentCameraViewController {
let documentViewController = VNDocumentCameraViewController()
documentViewController.delegate = context.coordinator
return documentViewController
}
func updateUIViewController(_ uiViewController: VNDocumentCameraViewController, context: Context) {
// nothing to do here
}
class Coordinator: NSObject, VNDocumentCameraViewControllerDelegate {
var recognizedText: Binding<String>
var recognizedImage: Binding<Image>
var parent: ScanDocumentView
init(recognizedText: Binding<String>, recognizedImage: Binding<Image>,parent: ScanDocumentView) {
self.recognizedText = recognizedText
self.recognizedImage = recognizedImage
self.parent = parent
}
func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFinishWith scan: VNDocumentCameraScan) {
let extractedImages = extractImages(from: scan)
let processedText = recognizeText(from: extractedImages)
recognizedText.wrappedValue = processedText
let originalImage = scan.imageOfPage(at: 0)
let newImage = compressedImage(originalImage)
processImage(newImage)
parent.presentationMode.wrappedValue.dismiss()
}
fileprivate func extractImages(from scan: VNDocumentCameraScan) -> [CGImage] {
var extractedImages = [CGImage]()
for index in 0..<scan.pageCount {
let extractedImage = scan.imageOfPage(at: index)
guard let cgImage = extractedImage.cgImage else { continue }
extractedImages.append(cgImage)
}
return extractedImages
}
private func processImage(_ uiImage: UIImage) {
recognizedImage.wrappedValue = Image(uiImage: uiImage)
}
func compressedImage(_ originalImage: UIImage) -> UIImage {
guard let imageData = originalImage.jpegData(compressionQuality: 1),
let reloadedImage = UIImage(data: imageData) else {
return originalImage
}
return reloadedImage
}
fileprivate func recognizeText(from images: [CGImage]) -> String {
var entireRecognizedText = ""
let recognizeTextRequest = VNRecognizeTextRequest { (request, error) in
guard error == nil else { return }
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
let maximumRecognitionCandidates = 1
for observation in observations {
guard let candidate = observation.topCandidates(maximumRecognitionCandidates).first else { continue }
entireRecognizedText += "\(candidate.string)\n"
}
}
recognizeTextRequest.recognitionLevel = .accurate
for image in images {
let requestHandler = VNImageRequestHandler(cgImage: image, options: [:])
try? requestHandler.perform([recognizeTextRequest])
}
return entireRecognizedText
您将在 Binding var recognize Text Request 中获得所有识别文本。然后你可以用正则表达式解析它或者在里面搜索任何类型的文本。
您好,我是 Swift 的新手,仍在学习新事物。我想要构建一个文本识别器来过滤图像中的所有数字并对它们进行排序。例如,当我有一张包含多个号码的信用卡时。我如何识别特定字段(IBAN、BIC 等)。最后,用户界面中的这个字段应该自动填充卡片输入图像。 我的第一个方法是构建主要文本识别器。没问题。但我不知道,如何继续对为图像捕获的所有数据进行排序。有小费吗?谢谢
struct ScanDocumentView: UIViewControllerRepresentable {
@Environment(\.presentationMode) var presentationMode
@Binding var recognizedText: String
@Binding var recognizedImage: Image
func makeCoordinator() -> Coordinator {
Coordinator(recognizedText: $recognizedText, recognizedImage: $recognizedImage, parent: self)
}
func makeUIViewController(context: Context) -> VNDocumentCameraViewController {
let documentViewController = VNDocumentCameraViewController()
documentViewController.delegate = context.coordinator
return documentViewController
}
func updateUIViewController(_ uiViewController: VNDocumentCameraViewController, context: Context) {
// nothing to do here
}
class Coordinator: NSObject, VNDocumentCameraViewControllerDelegate {
var recognizedText: Binding<String>
var recognizedImage: Binding<Image>
var parent: ScanDocumentView
init(recognizedText: Binding<String>, recognizedImage: Binding<Image>,parent: ScanDocumentView) {
self.recognizedText = recognizedText
self.recognizedImage = recognizedImage
self.parent = parent
}
func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFinishWith scan: VNDocumentCameraScan) {
let extractedImages = extractImages(from: scan)
let processedText = recognizeText(from: extractedImages)
recognizedText.wrappedValue = processedText
let originalImage = scan.imageOfPage(at: 0)
let newImage = compressedImage(originalImage)
processImage(newImage)
parent.presentationMode.wrappedValue.dismiss()
}
fileprivate func extractImages(from scan: VNDocumentCameraScan) -> [CGImage] {
var extractedImages = [CGImage]()
for index in 0..<scan.pageCount {
let extractedImage = scan.imageOfPage(at: index)
guard let cgImage = extractedImage.cgImage else { continue }
extractedImages.append(cgImage)
}
return extractedImages
}
private func processImage(_ uiImage: UIImage) {
recognizedImage.wrappedValue = Image(uiImage: uiImage)
}
func compressedImage(_ originalImage: UIImage) -> UIImage {
guard let imageData = originalImage.jpegData(compressionQuality: 1),
let reloadedImage = UIImage(data: imageData) else {
return originalImage
}
return reloadedImage
}
fileprivate func recognizeText(from images: [CGImage]) -> String {
var entireRecognizedText = ""
let recognizeTextRequest = VNRecognizeTextRequest { (request, error) in
guard error == nil else { return }
guard let observations = request.results as? [VNRecognizedTextObservation] else { return }
let maximumRecognitionCandidates = 1
for observation in observations {
guard let candidate = observation.topCandidates(maximumRecognitionCandidates).first else { continue }
entireRecognizedText += "\(candidate.string)\n"
}
}
recognizeTextRequest.recognitionLevel = .accurate
for image in images {
let requestHandler = VNImageRequestHandler(cgImage: image, options: [:])
try? requestHandler.perform([recognizeTextRequest])
}
return entireRecognizedText
您将在 Binding var recognize Text Request 中获得所有识别文本。然后你可以用正则表达式解析它或者在里面搜索任何类型的文本。