如何在屏幕上绘制标签:MLKit 对象检测

How to draw labels on a screen: MLKit Object Detection

这是我之前提出的问题的扩展。

我使用的是最新版本的 MLKit 对象检测(不需要 firebase)。我正在使用自定义模型和 CameraX 来检测对象和标签 them/get 信息。

现在,使用我的代码,它可以检测到该区域中存在物体,但是:

  1. 未显示任何标签或边界框;
  2. 一次检测不到一个以上的对象;
  3. 一旦它检测到一个物体,应用程序就不会“改变”(即当我移动 phone 以尝试检测另一个物体时,显示中没有任何变化。

这是我的代码:

package com.example.mlkitobjecttest;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.Camera;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.CameraX;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageProxy;
import androidx.camera.core.Preview;
import androidx.camera.core.impl.PreviewConfig;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.camera.view.PreviewView;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;
import androidx.lifecycle.LifecycleOwner;

import android.content.pm.PackageManager;
import android.graphics.Rect;
import android.media.Image;
import android.os.Bundle;
import android.text.Layout;
import android.util.Rational;
import android.util.Size;
import android.view.View;
import android.widget.TextView;
import android.widget.Toast;

import com.google.android.gms.tasks.OnFailureListener;
import com.google.android.gms.tasks.OnSuccessListener;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.mlkit.common.model.LocalModel;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.objects.DetectedObject;
import com.google.mlkit.vision.objects.ObjectDetection;
import com.google.mlkit.vision.objects.ObjectDetector;
import com.google.mlkit.vision.objects.custom.CustomObjectDetectorOptions;

import org.w3c.dom.Text;

import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {

    private class YourAnalyzer implements ImageAnalysis.Analyzer {

        @Override
        @androidx.camera.core.ExperimentalGetImage
        public void analyze(ImageProxy imageProxy) {

            Image mediaImage = imageProxy.getImage();
            if (mediaImage != null) {
                InputImage image =
                        InputImage.fromMediaImage(mediaImage, imageProxy.getImageInfo().getRotationDegrees());
                // Pass image to an ML Kit Vision API
                // ...
                LocalModel localModel =
                        new LocalModel.Builder()
                                .setAssetFilePath("mobilenet_v1_1.0_128_quantized_1_default_1.tflite")
                                // or .setAbsoluteFilePath(absolute file path to tflite model)
                                .build();

                CustomObjectDetectorOptions customObjectDetectorOptions =
                        new CustomObjectDetectorOptions.Builder(localModel)
                                .setDetectorMode(CustomObjectDetectorOptions.SINGLE_IMAGE_MODE)
                                .enableMultipleObjects()
                                .enableClassification()
                                .setClassificationConfidenceThreshold(0.5f)
                                .setMaxPerObjectLabelCount(3)
                                .build();

                ObjectDetector objectDetector =
                        ObjectDetection.getClient(customObjectDetectorOptions);

                objectDetector
                        .process(image)
                        .addOnFailureListener(new OnFailureListener() {
                            @Override
                            public void onFailure(@NonNull Exception e) {
                                //Toast.makeText(getApplicationContext(), "Fail. Sad!", Toast.LENGTH_SHORT).show();
                                //textView.setText("Fail. Sad!");
                                imageProxy.close();
                            }
                        })
                        .addOnSuccessListener(new OnSuccessListener<List<DetectedObject>>() {
                            @Override
                            public void onSuccess(List<DetectedObject> results) {

                                for (DetectedObject detectedObject : results) {
                                    Rect box = detectedObject.getBoundingBox();


                                    for (DetectedObject.Label label : detectedObject.getLabels()) {
                                        String text = label.getText();
                                        int index = label.getIndex();
                                        float confidence = label.getConfidence();
                                        textView.setText(text);



                                }}
                                imageProxy.close();
                            }
                        });

            }
            //ImageAnalysis.Builder.fromConfig(new ImageAnalysisConfig).setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST);

        }

    }


    PreviewView prevView;
    private ListenableFuture<ProcessCameraProvider> cameraProviderFuture;
    private ExecutorService executor = Executors.newSingleThreadExecutor();
    TextView textView;

    private int REQUEST_CODE_PERMISSIONS = 101;
    private String[] REQUIRED_PERMISSIONS = new String[]{"android.permission.CAMERA"};
   /* @NonNull
    @Override
    public CameraXConfig getCameraXConfig() {
        return CameraXConfig.Builder.fromConfig(Camera2Config.defaultConfig())
                .setCameraExecutor(ContextCompat.getMainExecutor(this))
                .build();
    }
*/
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        prevView = findViewById(R.id.viewFinder);
        textView = findViewById(R.id.scan_button);

        if(allPermissionsGranted()){
            startCamera();
        }else{
            ActivityCompat.requestPermissions(this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

    }

    private void startCamera() {
        cameraProviderFuture = ProcessCameraProvider.getInstance(this);
        cameraProviderFuture.addListener(new Runnable() {
            @Override
            public void run() {
                try {
                    ProcessCameraProvider cameraProvider = cameraProviderFuture.get();
                    bindPreview(cameraProvider);
                } catch (ExecutionException | InterruptedException e) {
                    // No errors need to be handled for this Future.
                    // This should never be reached.
                }
            }
        }, ContextCompat.getMainExecutor(this));


    }

    void bindPreview(@NonNull ProcessCameraProvider cameraProvider) {

        Preview preview = new Preview.Builder()
                .build();

        CameraSelector cameraSelector = new CameraSelector.Builder()
                .requireLensFacing(CameraSelector.LENS_FACING_BACK)
                .build();

        preview.setSurfaceProvider(prevView.createSurfaceProvider());

        ImageAnalysis imageAnalysis =
                new ImageAnalysis.Builder()
                        .setTargetResolution(new Size(1280, 720))
                        .setBackpressureStrategy(ImageAnalysis.STRATEGY_KEEP_ONLY_LATEST)
                        .build();
        imageAnalysis.setAnalyzer(ContextCompat.getMainExecutor(this), new YourAnalyzer());

        Camera camera = cameraProvider.bindToLifecycle((LifecycleOwner)this, cameraSelector, preview, imageAnalysis);


    }



    private boolean allPermissionsGranted() {
        for(String permission: REQUIRED_PERMISSIONS){
            if(ContextCompat.checkSelfPermission(this, permission) != PackageManager.PERMISSION_GRANTED){
                return false;
            }
        }
        return true;
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {

        if(requestCode == REQUEST_CODE_PERMISSIONS){
            if(allPermissionsGranted()){
                startCamera();
            } else{
                Toast.makeText(this, "Permissions not granted by the user.", Toast.LENGTH_SHORT).show();
                this.finish();
            }
        }
    }

}
```

回答您的第 3 个问题:

Once it detects an object, the app won't "change" (i.e when I move the phone, to try to detect another object, nothing in the display changes.

我猜这是因为您的 imageProxy().close 需要成为 OnCompletedListener 的一部分,否则它会导致各种线程问题,并可能导致阻止任何其他图像被接收处理了你提到的。

即:

改变这个:

objectDetector
    .process(image)
    .addOnFailureListener(new OnFailureListener() {
        @Override
        public void onFailure(@NonNull Exception e) {
            //Toast.makeText(getApplicationContext(), "Fail. Sad!", Toast.LENGTH_SHORT).show();
            //textView.setText("Fail. Sad!");
            imageProxy.close();
        }
    })
    .addOnSuccessListener(new OnSuccessListener<List<DetectedObject>>() {
        @Override
        public void onSuccess(List<DetectedObject> results) {

            for (DetectedObject detectedObject : results) {
                Rect box = detectedObject.getBoundingBox();

                for (DetectedObject.Label label : detectedObject.getLabels()) {
                    String text = label.getText();
                    int index = label.getIndex();
                    float confidence = label.getConfidence();
                    textView.setText(text);
            }}
            mediaImage.close();
            imageProxy.close();
        }
    });

对此:

objectDetector
    .process(image)
    .addOnFailureListener(new OnFailureListener() {
        @Override
        public void onFailure(@NonNull Exception e) {
            //Toast.makeText(getApplicationContext(), "Fail. Sad!", Toast.LENGTH_SHORT).show();
            //textView.setText("Fail. Sad!");
            imageProxy.close();
        }
    })
    .addOnSuccessListener(new OnSuccessListener<List<DetectedObject>>() {
        @Override
        public void onSuccess(List<DetectedObject> results) {

            for (DetectedObject detectedObject : results) {
                Rect box = detectedObject.getBoundingBox();

                for (DetectedObject.Label label : detectedObject.getLabels()) {
                    String text = label.getText();
                    int index = label.getIndex();
                    float confidence = label.getConfidence();
                    textView.setText(text);
            }}
        }
    }).addOnCompleteListener(new OnCompleteListener<List<Barcode>>() {  
        @Override  
        public void onComplete(@NonNull Task<List<Barcode>> task) {   
            imageProxy.close();  
            }  
    });

请注意,我没有检查您的花括号的准确性 locations/levels,因此请确保您的花括号也正确。

我有类似的问题,其中所有问题都与缺少的 OnCompleteListener 有关。有关详细信息,请参阅我找到的原始推理 here and how this applies more specifically to the Task object created by your objectDetector.process(image), or in my case Task<List<Barcode>> result = scanner.process(image)

所以我想通了。添加 TensorFlow 模型以帮助进行对象检测时,显然它必须包含元数据(这样一来,当你想调用“getLabels()”及其适当的方法时,它实际上会 return 一个标签。否则它将 return 什么都没有,并且显然会导致错误。

这是我用过的那个:mobilenet_v1_0.50_192_quantized_1_metadata_1.tflite