AndroidX Camera Core ImageAnalysis.Analyser 导致图像失真
AndroidX Camera Core ImageAnalysis.Analyser results in distorted image
我正在使用 ImageAnalysis 库提取实时预览,然后进行条形码扫描和 OCR。
我对条形码扫描完全没有任何问题,但 OCR 导致了一些较弱的结果。我敢肯定这可能有几个原因。我目前尝试解决该问题的方法是将帧发送到 GCP - 在对帧进行 运行 OCR(或条形码)之前进行存储,以便批量查看它们。它们看起来都非常相似:
我最好的猜测是我处理帧的方式可能导致像素在缓冲区中的组织不正确(我对 Android 没有经验 - 抱歉)。意思不是组织 0,0 然后 0,1..... 它随机获取像素并将它们放在随机区域。我不知道这是在哪里发生的。一旦我可以查看图像质量,我就可以分析 OCR 的问题所在,但不幸的是,这是我目前的障碍。
额外说明:我正在将图像上传到 GCP - 存储甚至在 运行ning OCR 之前,所以为了看这个,我们可以忽略我所做的 OCR 声明 - 我只是想介绍一下背景。
下面是我启动相机和分析仪然后观察帧的代码
private void startCamera() {
//make sure there isn't another camera instance running before starting
CameraX.unbindAll();
/* start preview */
int aspRatioW = txView.getWidth(); //get width of screen
int aspRatioH = txView.getHeight(); //get height
Rational asp = new Rational (aspRatioW, aspRatioH); //aspect ratio
Size screen = new Size(aspRatioW, aspRatioH); //size of the screen
//config obj for preview/viewfinder thingy.
PreviewConfig pConfig = new PreviewConfig.Builder().setTargetResolution(screen).build();
Preview preview = new Preview(pConfig); //lets build it
preview.setOnPreviewOutputUpdateListener(
new Preview.OnPreviewOutputUpdateListener() {
//to update the surface texture we have to destroy it first, then re-add it
@Override
public void onUpdated(Preview.PreviewOutput output){
ViewGroup parent = (ViewGroup) txView.getParent();
parent.removeView(txView);
parent.addView(txView, 0);
txView.setSurfaceTexture(output.getSurfaceTexture());
updateTransform();
}
});
/* image capture */
//config obj, selected capture mode
ImageCaptureConfig imgCapConfig = new ImageCaptureConfig.Builder().setCaptureMode(ImageCapture.CaptureMode.MAX_QUALITY)
.setTargetRotation(getWindowManager().getDefaultDisplay().getRotation()).build();
final ImageCapture imgCap = new ImageCapture(imgCapConfig);
findViewById(R.id.imgCapture).setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
Log.d("image taken", "image taken");
}
});
/* image analyser */
ImageAnalysisConfig imgAConfig = new ImageAnalysisConfig.Builder().setImageReaderMode(ImageAnalysis.ImageReaderMode.ACQUIRE_LATEST_IMAGE).build();
ImageAnalysis analysis = new ImageAnalysis(imgAConfig);
analysis.setAnalyzer(
Executors.newSingleThreadExecutor(), new ImageAnalysis.Analyzer(){
@Override
public void analyze(ImageProxy imageProxy, int degrees){
Log.d("analyze", "just analyzing");
if (imageProxy == null || imageProxy.getImage() == null) {
return;
}
Image mediaImage = imageProxy.getImage();
int rotation = degreesToFirebaseRotation(degrees);
FirebaseVisionImage image = FirebaseVisionImage.fromBitmap(toBitmap(mediaImage));
if (!isMachineLearning){
Log.d("analyze", "isMachineLearning is about to be true");
isMachineLearning = true;
String haha = MediaStore.Images.Media.insertImage(getContentResolver(), toBitmap(mediaImage), "image" , "theImageDescription");
Log.d("uploadingimage: ", haha);
extractBarcode(image, toBitmap(mediaImage));
}
}
});
//bind to lifecycle:
CameraX.bindToLifecycle(this, analysis, imgCap, preview);
}
以下是我构建检测的方式(非常简单明了):
FirebaseVisionBarcodeDetectorOptions options = new FirebaseVisionBarcodeDetectorOptions.Builder()
.setBarcodeFormats(FirebaseVisionBarcode.FORMAT_ALL_FORMATS)
.build();
FirebaseVisionBarcodeDetector detector = FirebaseVision.getInstance().getVisionBarcodeDetector(options);
detector.detectInImage(firebaseVisionImage)
最后,当我将图像上传到 GCP - 存储时,它是这样的:
ByteArrayOutputStream baos = new ByteArrayOutputStream();
bmp.compress(Bitmap.CompressFormat.JPEG, 100, baos); //bmp being the image that I ran barcode scanning on - as well as OCR
byte[] data = baos.toByteArray();
UploadTask uploadTask = storageRef.putBytes(data);
谢谢大家的帮助(:
我的问题是我试图在条形码扫描后转换为位图。转换没有正确编写,但我找到了一种无需编写自己的位图转换函数的方法(尽管我打算回到它,因为我认为自己需要它,并且真正的好奇心希望我弄清楚)
我正在使用 ImageAnalysis 库提取实时预览,然后进行条形码扫描和 OCR。
我对条形码扫描完全没有任何问题,但 OCR 导致了一些较弱的结果。我敢肯定这可能有几个原因。我目前尝试解决该问题的方法是将帧发送到 GCP - 在对帧进行 运行 OCR(或条形码)之前进行存储,以便批量查看它们。它们看起来都非常相似:
我最好的猜测是我处理帧的方式可能导致像素在缓冲区中的组织不正确(我对 Android 没有经验 - 抱歉)。意思不是组织 0,0 然后 0,1..... 它随机获取像素并将它们放在随机区域。我不知道这是在哪里发生的。一旦我可以查看图像质量,我就可以分析 OCR 的问题所在,但不幸的是,这是我目前的障碍。
额外说明:我正在将图像上传到 GCP - 存储甚至在 运行ning OCR 之前,所以为了看这个,我们可以忽略我所做的 OCR 声明 - 我只是想介绍一下背景。
下面是我启动相机和分析仪然后观察帧的代码
private void startCamera() {
//make sure there isn't another camera instance running before starting
CameraX.unbindAll();
/* start preview */
int aspRatioW = txView.getWidth(); //get width of screen
int aspRatioH = txView.getHeight(); //get height
Rational asp = new Rational (aspRatioW, aspRatioH); //aspect ratio
Size screen = new Size(aspRatioW, aspRatioH); //size of the screen
//config obj for preview/viewfinder thingy.
PreviewConfig pConfig = new PreviewConfig.Builder().setTargetResolution(screen).build();
Preview preview = new Preview(pConfig); //lets build it
preview.setOnPreviewOutputUpdateListener(
new Preview.OnPreviewOutputUpdateListener() {
//to update the surface texture we have to destroy it first, then re-add it
@Override
public void onUpdated(Preview.PreviewOutput output){
ViewGroup parent = (ViewGroup) txView.getParent();
parent.removeView(txView);
parent.addView(txView, 0);
txView.setSurfaceTexture(output.getSurfaceTexture());
updateTransform();
}
});
/* image capture */
//config obj, selected capture mode
ImageCaptureConfig imgCapConfig = new ImageCaptureConfig.Builder().setCaptureMode(ImageCapture.CaptureMode.MAX_QUALITY)
.setTargetRotation(getWindowManager().getDefaultDisplay().getRotation()).build();
final ImageCapture imgCap = new ImageCapture(imgCapConfig);
findViewById(R.id.imgCapture).setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
Log.d("image taken", "image taken");
}
});
/* image analyser */
ImageAnalysisConfig imgAConfig = new ImageAnalysisConfig.Builder().setImageReaderMode(ImageAnalysis.ImageReaderMode.ACQUIRE_LATEST_IMAGE).build();
ImageAnalysis analysis = new ImageAnalysis(imgAConfig);
analysis.setAnalyzer(
Executors.newSingleThreadExecutor(), new ImageAnalysis.Analyzer(){
@Override
public void analyze(ImageProxy imageProxy, int degrees){
Log.d("analyze", "just analyzing");
if (imageProxy == null || imageProxy.getImage() == null) {
return;
}
Image mediaImage = imageProxy.getImage();
int rotation = degreesToFirebaseRotation(degrees);
FirebaseVisionImage image = FirebaseVisionImage.fromBitmap(toBitmap(mediaImage));
if (!isMachineLearning){
Log.d("analyze", "isMachineLearning is about to be true");
isMachineLearning = true;
String haha = MediaStore.Images.Media.insertImage(getContentResolver(), toBitmap(mediaImage), "image" , "theImageDescription");
Log.d("uploadingimage: ", haha);
extractBarcode(image, toBitmap(mediaImage));
}
}
});
//bind to lifecycle:
CameraX.bindToLifecycle(this, analysis, imgCap, preview);
}
以下是我构建检测的方式(非常简单明了):
FirebaseVisionBarcodeDetectorOptions options = new FirebaseVisionBarcodeDetectorOptions.Builder()
.setBarcodeFormats(FirebaseVisionBarcode.FORMAT_ALL_FORMATS)
.build();
FirebaseVisionBarcodeDetector detector = FirebaseVision.getInstance().getVisionBarcodeDetector(options);
detector.detectInImage(firebaseVisionImage)
最后,当我将图像上传到 GCP - 存储时,它是这样的:
ByteArrayOutputStream baos = new ByteArrayOutputStream();
bmp.compress(Bitmap.CompressFormat.JPEG, 100, baos); //bmp being the image that I ran barcode scanning on - as well as OCR
byte[] data = baos.toByteArray();
UploadTask uploadTask = storageRef.putBytes(data);
谢谢大家的帮助(:
我的问题是我试图在条形码扫描后转换为位图。转换没有正确编写,但我找到了一种无需编写自己的位图转换函数的方法(尽管我打算回到它,因为我认为自己需要它,并且真正的好奇心希望我弄清楚)