使用 React Native 和 Tensorflow.js 对直播视频进行预测

Making predictions on live video feed using React Native and Tensorflow.js

我已经设置了我的 React 本机应用程序,完成了 unimodules 和包的所有安装和配置,都按预期工作。依赖等没问题

现在我想实现一个张量流模型,我已经通过 google 从 teachablemachine 训练过,但我不明白如何将它与相机一起使用,因为我想实时处理帧就像 tensorflow react native api 文档说的那样。 这是我在网上找到的代码,我会用我的模型更改它,但问题是它只在用户拍照时检测到模型。我想让我的相机像人脸检测、条码扫描仪一样实时了解模型。

Main.js

import React, {useRef, useEffect, useState} from 'react';
import {View, StyleSheet, Dimensions} from 'react-native';

import {
  getModel,
  convertBase64ToTensor,
  startPrediction,
} from '../../helpers/tensor-helper';

import {Camera} from 'expo-camera';

import * as tf from '@tensorflow/tfjs';
import '@tensorflow/tfjs-react-native';
import {
  cameraWithTensors,
  bundleResourceIO,
} from '@tensorflow/tfjs-react-native';

const TensorCamera = cameraWithTensors(Camera);

const Main = () => {
  const [model, setModel] = useState();
  const [prediction, setPredictions] = useState();
  const cameraRef = useRef(null);

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrame = 1;

  const modelJson = require('../../model/model.json');
  const modelWeights = require('../../model/weights.bin');

  const getModel = async () => {
    try {
      await tf.ready();
      const model = await tf.loadLayersModel(
        bundleResourceIO(modelJson, modelWeights),
      );
      return model;
    } catch (error) {
      console.log('Could not load model', error);
    }
  };
  useEffect(() => {
    setModel(getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = tensors => {
    if (!tensors) {
      console.log('Image not found!');
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
          console.log(`prediction: ${JSON.stringify(prediction)}`);
        }
        tf.dispose(tensors);
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    console.log(`prediction: ${JSON.stringify(prediction)}`);

    loop();
    console.log(`prediction: ${JSON.stringify(prediction)}`);
  };

  let textureDims;
  if (Platform.OS === 'ios') {
    textureDims = {
      height: 1920,
      width: 1080,
    };
  } else {
    textureDims = {
      height: 1200,
      width: 1600,
    };
  }
  return (
    <View style={styles.container}>
      <TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={50}
        resizeWidth={50}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />
    </View>
  );
};



export default Main;

tensorhelper.js:

import * as tf from '@tensorflow/tfjs';
import {bundleResourceIO, decodeJpeg} from '@tensorflow/tfjs-react-native';
import * as tfc from '@tensorflow/tfjs-core';

import {Base64Binary} from '../utils/utils';
const BITMAP_DIMENSION = 224;

const modelJson = require('../model/model.json');
const modelWeights = require('../model/weights.bin');

// 0: channel from JPEG-encoded image
// 1: gray scale
// 3: RGB image
const TENSORFLOW_CHANNEL = 3;

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights),
    );
    return model;
  } catch (error) {
    console.log('Could not load model', error);
  }
};

export const convertBase64ToTensor = async base64 => {
  try {
    const uIntArray = Base64Binary.decode(base64);
    // decode a JPEG-encoded image to a 3D Tensor of dtype
    const decodedImage = decodeJpeg(uIntArray, 3);
    // reshape Tensor into a 4D array
    return decodedImage.reshape([
      1,
      BITMAP_DIMENSION,
      BITMAP_DIMENSION,
      TENSORFLOW_CHANNEL,
    ]);
  } catch (error) {
    console.log('Could not convert base64 string to tesor', error);
  }
};

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor);
    // return typed array

    return tfc.tensor().dataSync();
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};

我编辑了文件并将其作为输出:

 LOG  prediction: undefined
 LOG  prediction: undefined
 WARN  Possible Unhandled Promise Rejection (id: 1):
Error: When using targetShape.depth=3, targetShape.width must be a multiple of 4. Alternatively do not call detectGLCapabilities()
fromTexture@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:267911:24
nextFrameGenerator$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268598:67  
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
loop$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126503:43
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26610:30
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26640:19
tryCallTwo@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31390:9
doResolve@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31554:25
Promise@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31413:14
callInvokeWithMethodAndArg@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26639:33
enqueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26644:157
async@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26661:69
loop@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126494:42
handleCameraStream@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126535:11   
onReady@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126572:34
onGLContextCreate$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268641:37   
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32   
__callImmediates@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3317:35       
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3096:34
__guard@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3300:15
flushedQueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3095:21
flushedQueue@[native code]
invokeCallbackAndReturnFlushedQueue@[native code]```

好吧,我(去年)做了这个,所以我可能忘记了一些东西,但你可以参考代码 here,使用 Expo 并对实时视频提要进行预测,请原谅糟糕的代码(我现在写出更好的代码)。

无论如何,这是一个关于您需要做什么的简单更新,主要是关于 handleCameraStream()。您将需要 运行 两个不同的 useEffect 挂钩,一个用于初始加载模型,另一个用于取消您在不断进行预测时需要使用的动画帧。

将模型设置为状态,然后您可以使用 model 从文件中的任何部分访问它。我也为 predictions.

做了同样的事情

我还添加了每 N 帧进行预测的能力,通过将 makePredictionsEveryNFrames 设置为 1 它基本上将张量从 TensorCamera 传递到函数对每一帧进行预测。做出预测后,您还需要使用 tf.dispose() 处理张量。此函数 loop() 需要 运行 无限地对即将到来的帧进行连续预测。

const Main = () => {
  const [model, setModel] = useState();
  const [predictions, setPredictions] = useState();

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrames = 1;

  useEffect(() => {
    setModel(await getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
        }
        tf.dispose(tensors); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };
}

我在加载模型时将 getModel() 更新为 return 模型,这样我们就可以将其设置为状态。

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights)
    );
    return model;
  } catch (error) {
    console.log("Could not load model", error);
  }
};

因此您只需要访问 predictions 并渲染它们。

编辑 1:

回顾一下代码,startPredictions 函数存在一些问题,您实际上并没有 return 从模型中进行预测,您需要对单批数据进行预测一次显示图像。

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor, {batchSize: 1});
    return output.dataSync(); 
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};

编辑 2:

查看模型输入形状 here,预期输入形状为 (batch_size, 224,224,3)。但是你传递的是 (batch_size, 50,50,3) 的图像。所以尝试将参数 resizeWidthresizeHeight 更新为 224.

<TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={224}
        resizeWidth={224}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />

除此之外,您还需要将 3D 张量转换为 4D 张量,然后再将其传递给模型进行预测,也称为扩展维度之一。也将 handleCameraStream 函数更新为此。张量的大小是 (224,224,3) 并且扩展第一维后它将是 (1,224,224,3).

const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const imageTensorReshaped = imageTensor.expandDims(axis=0);
          const results = await startPrediction(model, imageTensorReshaped);
          setPredictions(results);
        }
        tf.dispose(imageTensorReshaped); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };