如何减小 Tflite 模型的大小或下载并以编程方式设置?
How to Reduce size of Tflite model or Download and set it programmatically?
好的,所以在我的应用程序中,我正在尝试使用人脸网络模型实现人脸识别,该模型转换为 tflite 平均约为 93 MB,
然而,这个模型最终会增加我的 apk 的大小。
所以我正在尝试寻找其他方法来处理这个问题
首先我想到的是通过某种方式压缩它,然后在安装应用程序时解压缩
另一种方法是我应该将该模型上传到服务器,并在下载后将其加载到我的应用程序中。
但是我似乎不知道如何实现这个:
默认情况下,face net 允许从 assets 文件夹中实现
var facenet = FaceNet(getAssets());
但如果我正在下载该模型,我该如何将其加载到我的应用程序中?
这是我的人脸网初始化代码:
public FaceNet(AssetManager assetManager) throws IOException {
tfliteModel = loadModelFile(assetManager);
tflite = new Interpreter(tfliteModel, tfliteOptions);
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL);
imgData.order(ByteOrder.nativeOrder());
}
private MappedByteBuffer loadModelFile(AssetManager assetManager) throws IOException {
AssetFileDescriptor fileDescriptor = assetManager.openFd(MODEL_PATH);
FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
FileChannel fileChannel = inputStream.getChannel();
long startOffset = fileDescriptor.getStartOffset();
long declaredLength = fileDescriptor.getDeclaredLength();
return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
}
我的 FaceNet Class:
public class FaceNet {
private static final String MODEL_PATH = "facenet.tflite";
private static final float IMAGE_MEAN = 127.5f;
private static final float IMAGE_STD = 127.5f;
private static final int BATCH_SIZE = 1;
private static final int IMAGE_HEIGHT = 160;
private static final int IMAGE_WIDTH = 160;
private static final int NUM_CHANNELS = 3;
private static final int NUM_BYTES_PER_CHANNEL = 4;
private static final int EMBEDDING_SIZE = 512;
private final int[] intValues = new int[IMAGE_HEIGHT * IMAGE_WIDTH];
private ByteBuffer imgData;
private MappedByteBuffer tfliteModel;
private Interpreter tflite;
private final Interpreter.Options tfliteOptions = new Interpreter.Options();
public FaceNet(AssetManager assetManager) throws IOException {
tfliteModel = loadModelFile(assetManager);
tflite = new Interpreter(tfliteModel, tfliteOptions);
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL);
imgData.order(ByteOrder.nativeOrder());
}
private MappedByteBuffer loadModelFile(AssetManager assetManager) throws IOException {
AssetFileDescriptor fileDescriptor = assetManager.openFd(MODEL_PATH);
FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
FileChannel fileChannel = inputStream.getChannel();
long startOffset = fileDescriptor.getStartOffset();
long declaredLength = fileDescriptor.getDeclaredLength();
return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
}
private void convertBitmapToByteBuffer(Bitmap bitmap) {
if (imgData == null) {
return;
}
imgData.rewind();
bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
// Convert the image to floating point.
int pixel = 0;
for (int i = 0; i < IMAGE_HEIGHT; ++i) {
for (int j = 0; j < IMAGE_WIDTH; ++j) {
final int val = intValues[pixel++];
addPixelValue(val);
}
}
}
private void addPixelValue(int pixelValue){
//imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
//imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
//imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
imgData.putFloat(((pixelValue >> 16) & 0xFF) / 255.0f);
imgData.putFloat(((pixelValue >> 8) & 0xFF) / 255.0f);
imgData.putFloat((pixelValue & 0xFF) / 255.0f);
}
public void inspectModel(){
String tag = "Model Inspection";
Log.i(tag, "Number of input tensors: " + String.valueOf(tflite.getInputTensorCount()));
Log.i(tag, "Number of output tensors: " + String.valueOf(tflite.getOutputTensorCount()));
Log.i(tag, tflite.getInputTensor(0).toString());
Log.i(tag, "Input tensor data type: " + tflite.getInputTensor(0).dataType());
Log.i(tag, "Input tensor shape: " + Arrays.toString(tflite.getInputTensor(0).shape()));
Log.i(tag, "Output tensor 0 shape: " + Arrays.toString(tflite.getOutputTensor(0).shape()));
}
private Bitmap resizedBitmap(Bitmap bitmap, int height, int width){
return Bitmap.createScaledBitmap(bitmap, width, height, true);
}
private Bitmap croppedBitmap(Bitmap bitmap, int upperCornerX, int upperCornerY, int height, int width){
return Bitmap.createBitmap(bitmap, upperCornerX, upperCornerY, width, height);
}
private float[][] run(Bitmap bitmap){
bitmap = resizedBitmap(bitmap, IMAGE_HEIGHT, IMAGE_WIDTH);
convertBitmapToByteBuffer(bitmap);
float[][] embeddings = new float[1][512];
tflite.run(imgData, embeddings);
return embeddings;
}
public double getSimilarityScore(Bitmap face1, Bitmap face2){
float[][] face1_embedding = run(face1);
float[][] face2_embedding = run(face2);
double distance = 0.0;
for (int i = 0; i < EMBEDDING_SIZE; i++){
distance += (face1_embedding[0][i] - face2_embedding[0][i]) * (face1_embedding[0][i] - face2_embedding[0][i]);
}
distance = Math.sqrt(distance);
return distance;
}
public void close(){
if (tflite != null) {
tflite.close();
tflite = null;
}
tfliteModel = null;
}
}
好吧,我想不出任何减少模型文件大小的解决方案,但通过观察你的 class 我可以说,毕竟它从你的文件输入流返回一个映射字节缓冲区,所以从存储中获取文件只需将文件放在外部存储的 facenet 文件夹中,然后在文件输入流上获取映射的字节缓冲区,这是 kotlin 中的解决方案。
class FaceNetStorage @Throws(IOException::class)
constructor() {
private val intValues = IntArray(IMAGE_HEIGHT * IMAGE_WIDTH)
private var imgData: ByteBuffer? = null
private var tfliteModel: MappedByteBuffer? = null
private var tflite: Interpreter? = null
private val tfliteOptions = Interpreter.Options()
init {
val str = Environment.getExternalStorageDirectory().toString()+"/Facenet"
val sd_main = File(str)
var success = true
if (!sd_main.exists()) {
success = sd_main.mkdir()
}
if (success) {
val sd = File(str+"/"+MODEL_PATH)
tfliteModel = loadModelFile(sd)
tflite = Interpreter(tfliteModel!!, tfliteOptions)
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL)
imgData!!.order(ByteOrder.nativeOrder())
}
}
@Throws(IOException::class)
private fun loadModelFile(file: File): MappedByteBuffer {
val inputStream = FileInputStream(file)
val fileChannel = inputStream.channel
return fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size())
}
private fun convertBitmapToByteBuffer(bitmap: Bitmap) {
if (imgData == null) {
return
}
imgData!!.rewind()
bitmap.getPixels(intValues, 0, bitmap.width, 0, 0, bitmap.width, bitmap.height)
// Convert the image to floating point.
var pixel = 0
for (i in 0 until IMAGE_HEIGHT) {
for (j in 0 until IMAGE_WIDTH) {
val `val` = intValues[pixel++]
addPixelValue(`val`)
}
}
}
private fun addPixelValue(pixelValue: Int) {
imgData!!.putFloat((pixelValue shr 16 and 0xFF) / 255.0f)
imgData!!.putFloat((pixelValue shr 8 and 0xFF) / 255.0f)
imgData!!.putFloat((pixelValue and 0xFF) / 255.0f)
}
fun inspectModel() {
val tag = "Model Inspection"
Log.i(tag, "Number of input tensors: " + tflite!!.inputTensorCount.toString())
Log.i(tag, "Number of output tensors: " + tflite!!.outputTensorCount.toString())
Log.i(tag, tflite!!.getInputTensor(0).toString())
Log.i(tag, "Input tensor data type: " + tflite!!.getInputTensor(0).dataType())
Log.i(tag, "Input tensor shape: " + Arrays.toString(tflite!!.getInputTensor(0).shape()))
Log.i(tag, "Output tensor 0 shape: " + Arrays.toString(tflite!!.getOutputTensor(0).shape()))
}
private fun resizedBitmap(bitmap: Bitmap, height: Int, width: Int): Bitmap {
return Bitmap.createScaledBitmap(bitmap, width, height, true)
}
private fun croppedBitmap(bitmap: Bitmap, upperCornerX: Int, upperCornerY: Int, height: Int, width: Int): Bitmap {
return Bitmap.createBitmap(bitmap, upperCornerX, upperCornerY, width, height)
}
private fun run(bitmap: Bitmap): Array<FloatArray> {
var bitmap = bitmap
bitmap = resizedBitmap(bitmap, IMAGE_HEIGHT, IMAGE_WIDTH)
convertBitmapToByteBuffer(bitmap)
val embeddings = Array(1) { FloatArray(512) }
tflite!!.run(imgData, embeddings)
return embeddings
}
fun getSimilarityScore(face1: Bitmap, face2: Bitmap): Double {
val face1_embedding = run(face1)
val face2_embedding = run(face2)
var distance = 0.0
for (i in 0 until EMBEDDING_SIZE) {
distance += ((face1_embedding[0][i] - face2_embedding[0][i]) * (face1_embedding[0][i] - face2_embedding[0][i])).toDouble()
}
distance = Math.sqrt(distance)
return distance
}
fun close() {
if (tflite != null) {
tflite!!.close()
tflite = null
}
tfliteModel = null
}
companion object {
private val MODEL_PATH = "facenet.tflite"
private val IMAGE_MEAN = 127.5f
private val IMAGE_STD = 127.5f
private val BATCH_SIZE = 1
private val IMAGE_HEIGHT = 160
private val IMAGE_WIDTH = 160
private val NUM_CHANNELS = 3
private val NUM_BYTES_PER_CHANNEL = 4
private val EMBEDDING_SIZE = 512
}
}
我建议量化您的模型。这将使文件大小减少大约 1/4。你可以只尝试权重量化,或者完全量化。
使用Python API,仅用于权重量化:
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_quant_model = converter.convert()
对于完全量化,我建议使用具有代表性的数据集来减少与量化相关的精度损失。
import tensorflow as tf
def representative_dataset_gen():
for _ in range(num_calibration_steps):
# Get sample input data as a numpy array in a method of your choosing.
yield [input]
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
tflite_quant_model = converter.convert()
你也可以试试mobilenet架构。这些的量化版本可以从 <1MB 到 ~5MB 不等。您可以通过快速 google 搜索轻松找到 mobilefacenets 的一些 Tensorflow 实现,但这里是启动它的论文的 link:
https://arxiv.org/abs/1804.07573
好的,所以在我的应用程序中,我正在尝试使用人脸网络模型实现人脸识别,该模型转换为 tflite 平均约为 93 MB, 然而,这个模型最终会增加我的 apk 的大小。 所以我正在尝试寻找其他方法来处理这个问题
首先我想到的是通过某种方式压缩它,然后在安装应用程序时解压缩
另一种方法是我应该将该模型上传到服务器,并在下载后将其加载到我的应用程序中。 但是我似乎不知道如何实现这个:
默认情况下,face net 允许从 assets 文件夹中实现
var facenet = FaceNet(getAssets());
但如果我正在下载该模型,我该如何将其加载到我的应用程序中?
这是我的人脸网初始化代码:
public FaceNet(AssetManager assetManager) throws IOException {
tfliteModel = loadModelFile(assetManager);
tflite = new Interpreter(tfliteModel, tfliteOptions);
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL);
imgData.order(ByteOrder.nativeOrder());
}
private MappedByteBuffer loadModelFile(AssetManager assetManager) throws IOException {
AssetFileDescriptor fileDescriptor = assetManager.openFd(MODEL_PATH);
FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
FileChannel fileChannel = inputStream.getChannel();
long startOffset = fileDescriptor.getStartOffset();
long declaredLength = fileDescriptor.getDeclaredLength();
return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
}
我的 FaceNet Class:
public class FaceNet {
private static final String MODEL_PATH = "facenet.tflite";
private static final float IMAGE_MEAN = 127.5f;
private static final float IMAGE_STD = 127.5f;
private static final int BATCH_SIZE = 1;
private static final int IMAGE_HEIGHT = 160;
private static final int IMAGE_WIDTH = 160;
private static final int NUM_CHANNELS = 3;
private static final int NUM_BYTES_PER_CHANNEL = 4;
private static final int EMBEDDING_SIZE = 512;
private final int[] intValues = new int[IMAGE_HEIGHT * IMAGE_WIDTH];
private ByteBuffer imgData;
private MappedByteBuffer tfliteModel;
private Interpreter tflite;
private final Interpreter.Options tfliteOptions = new Interpreter.Options();
public FaceNet(AssetManager assetManager) throws IOException {
tfliteModel = loadModelFile(assetManager);
tflite = new Interpreter(tfliteModel, tfliteOptions);
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL);
imgData.order(ByteOrder.nativeOrder());
}
private MappedByteBuffer loadModelFile(AssetManager assetManager) throws IOException {
AssetFileDescriptor fileDescriptor = assetManager.openFd(MODEL_PATH);
FileInputStream inputStream = new FileInputStream(fileDescriptor.getFileDescriptor());
FileChannel fileChannel = inputStream.getChannel();
long startOffset = fileDescriptor.getStartOffset();
long declaredLength = fileDescriptor.getDeclaredLength();
return fileChannel.map(FileChannel.MapMode.READ_ONLY, startOffset, declaredLength);
}
private void convertBitmapToByteBuffer(Bitmap bitmap) {
if (imgData == null) {
return;
}
imgData.rewind();
bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
// Convert the image to floating point.
int pixel = 0;
for (int i = 0; i < IMAGE_HEIGHT; ++i) {
for (int j = 0; j < IMAGE_WIDTH; ++j) {
final int val = intValues[pixel++];
addPixelValue(val);
}
}
}
private void addPixelValue(int pixelValue){
//imgData.putFloat((((pixelValue >> 16) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
//imgData.putFloat((((pixelValue >> 8) & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
//imgData.putFloat(((pixelValue & 0xFF) - IMAGE_MEAN) / IMAGE_STD);
imgData.putFloat(((pixelValue >> 16) & 0xFF) / 255.0f);
imgData.putFloat(((pixelValue >> 8) & 0xFF) / 255.0f);
imgData.putFloat((pixelValue & 0xFF) / 255.0f);
}
public void inspectModel(){
String tag = "Model Inspection";
Log.i(tag, "Number of input tensors: " + String.valueOf(tflite.getInputTensorCount()));
Log.i(tag, "Number of output tensors: " + String.valueOf(tflite.getOutputTensorCount()));
Log.i(tag, tflite.getInputTensor(0).toString());
Log.i(tag, "Input tensor data type: " + tflite.getInputTensor(0).dataType());
Log.i(tag, "Input tensor shape: " + Arrays.toString(tflite.getInputTensor(0).shape()));
Log.i(tag, "Output tensor 0 shape: " + Arrays.toString(tflite.getOutputTensor(0).shape()));
}
private Bitmap resizedBitmap(Bitmap bitmap, int height, int width){
return Bitmap.createScaledBitmap(bitmap, width, height, true);
}
private Bitmap croppedBitmap(Bitmap bitmap, int upperCornerX, int upperCornerY, int height, int width){
return Bitmap.createBitmap(bitmap, upperCornerX, upperCornerY, width, height);
}
private float[][] run(Bitmap bitmap){
bitmap = resizedBitmap(bitmap, IMAGE_HEIGHT, IMAGE_WIDTH);
convertBitmapToByteBuffer(bitmap);
float[][] embeddings = new float[1][512];
tflite.run(imgData, embeddings);
return embeddings;
}
public double getSimilarityScore(Bitmap face1, Bitmap face2){
float[][] face1_embedding = run(face1);
float[][] face2_embedding = run(face2);
double distance = 0.0;
for (int i = 0; i < EMBEDDING_SIZE; i++){
distance += (face1_embedding[0][i] - face2_embedding[0][i]) * (face1_embedding[0][i] - face2_embedding[0][i]);
}
distance = Math.sqrt(distance);
return distance;
}
public void close(){
if (tflite != null) {
tflite.close();
tflite = null;
}
tfliteModel = null;
}
}
好吧,我想不出任何减少模型文件大小的解决方案,但通过观察你的 class 我可以说,毕竟它从你的文件输入流返回一个映射字节缓冲区,所以从存储中获取文件只需将文件放在外部存储的 facenet 文件夹中,然后在文件输入流上获取映射的字节缓冲区,这是 kotlin 中的解决方案。
class FaceNetStorage @Throws(IOException::class)
constructor() {
private val intValues = IntArray(IMAGE_HEIGHT * IMAGE_WIDTH)
private var imgData: ByteBuffer? = null
private var tfliteModel: MappedByteBuffer? = null
private var tflite: Interpreter? = null
private val tfliteOptions = Interpreter.Options()
init {
val str = Environment.getExternalStorageDirectory().toString()+"/Facenet"
val sd_main = File(str)
var success = true
if (!sd_main.exists()) {
success = sd_main.mkdir()
}
if (success) {
val sd = File(str+"/"+MODEL_PATH)
tfliteModel = loadModelFile(sd)
tflite = Interpreter(tfliteModel!!, tfliteOptions)
imgData = ByteBuffer.allocateDirect(
BATCH_SIZE
* IMAGE_HEIGHT
* IMAGE_WIDTH
* NUM_CHANNELS
* NUM_BYTES_PER_CHANNEL)
imgData!!.order(ByteOrder.nativeOrder())
}
}
@Throws(IOException::class)
private fun loadModelFile(file: File): MappedByteBuffer {
val inputStream = FileInputStream(file)
val fileChannel = inputStream.channel
return fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size())
}
private fun convertBitmapToByteBuffer(bitmap: Bitmap) {
if (imgData == null) {
return
}
imgData!!.rewind()
bitmap.getPixels(intValues, 0, bitmap.width, 0, 0, bitmap.width, bitmap.height)
// Convert the image to floating point.
var pixel = 0
for (i in 0 until IMAGE_HEIGHT) {
for (j in 0 until IMAGE_WIDTH) {
val `val` = intValues[pixel++]
addPixelValue(`val`)
}
}
}
private fun addPixelValue(pixelValue: Int) {
imgData!!.putFloat((pixelValue shr 16 and 0xFF) / 255.0f)
imgData!!.putFloat((pixelValue shr 8 and 0xFF) / 255.0f)
imgData!!.putFloat((pixelValue and 0xFF) / 255.0f)
}
fun inspectModel() {
val tag = "Model Inspection"
Log.i(tag, "Number of input tensors: " + tflite!!.inputTensorCount.toString())
Log.i(tag, "Number of output tensors: " + tflite!!.outputTensorCount.toString())
Log.i(tag, tflite!!.getInputTensor(0).toString())
Log.i(tag, "Input tensor data type: " + tflite!!.getInputTensor(0).dataType())
Log.i(tag, "Input tensor shape: " + Arrays.toString(tflite!!.getInputTensor(0).shape()))
Log.i(tag, "Output tensor 0 shape: " + Arrays.toString(tflite!!.getOutputTensor(0).shape()))
}
private fun resizedBitmap(bitmap: Bitmap, height: Int, width: Int): Bitmap {
return Bitmap.createScaledBitmap(bitmap, width, height, true)
}
private fun croppedBitmap(bitmap: Bitmap, upperCornerX: Int, upperCornerY: Int, height: Int, width: Int): Bitmap {
return Bitmap.createBitmap(bitmap, upperCornerX, upperCornerY, width, height)
}
private fun run(bitmap: Bitmap): Array<FloatArray> {
var bitmap = bitmap
bitmap = resizedBitmap(bitmap, IMAGE_HEIGHT, IMAGE_WIDTH)
convertBitmapToByteBuffer(bitmap)
val embeddings = Array(1) { FloatArray(512) }
tflite!!.run(imgData, embeddings)
return embeddings
}
fun getSimilarityScore(face1: Bitmap, face2: Bitmap): Double {
val face1_embedding = run(face1)
val face2_embedding = run(face2)
var distance = 0.0
for (i in 0 until EMBEDDING_SIZE) {
distance += ((face1_embedding[0][i] - face2_embedding[0][i]) * (face1_embedding[0][i] - face2_embedding[0][i])).toDouble()
}
distance = Math.sqrt(distance)
return distance
}
fun close() {
if (tflite != null) {
tflite!!.close()
tflite = null
}
tfliteModel = null
}
companion object {
private val MODEL_PATH = "facenet.tflite"
private val IMAGE_MEAN = 127.5f
private val IMAGE_STD = 127.5f
private val BATCH_SIZE = 1
private val IMAGE_HEIGHT = 160
private val IMAGE_WIDTH = 160
private val NUM_CHANNELS = 3
private val NUM_BYTES_PER_CHANNEL = 4
private val EMBEDDING_SIZE = 512
}
}
我建议量化您的模型。这将使文件大小减少大约 1/4。你可以只尝试权重量化,或者完全量化。
使用Python API,仅用于权重量化:
import tensorflow as tf
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_quant_model = converter.convert()
对于完全量化,我建议使用具有代表性的数据集来减少与量化相关的精度损失。
import tensorflow as tf
def representative_dataset_gen():
for _ in range(num_calibration_steps):
# Get sample input data as a numpy array in a method of your choosing.
yield [input]
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
tflite_quant_model = converter.convert()
你也可以试试mobilenet架构。这些的量化版本可以从 <1MB 到 ~5MB 不等。您可以通过快速 google 搜索轻松找到 mobilefacenets 的一些 Tensorflow 实现,但这里是启动它的论文的 link: https://arxiv.org/abs/1804.07573