为 TensorFlow Lite C++ 编写 read_jpeg 和 decode_jpeg 函数
Writing read_jpeg and decode_jpeg functions for TensorFlow Lite C++
TensorFlow Lite 在他们的回购中有一个很好的 C++ 图像分类示例,here。
但是,我正在使用 .jpeg 并且此示例仅限于使用 bitmap_helpers.cc.
解码 .bmp 图像
我正在尝试创建自己的 jpeg 解码器,但我不太精通图像处理,因此需要一些帮助。我正在重复使用 this jpeg decoder as a third party helper library. In the example's bmp decoding, I don't quite understand what's the deal with calculating row_sizes and taking in the bytes array after the header。谁能阐明这将如何应用于 jpeg 解码器?或者,更好的是,是否已经有一个 C++ decode_jpeg 函数隐藏在我没有找到的地方?
最终的实现必须在 C++ 的 TensorFlow Lite 中。
非常感谢!
编辑:
以下是我目前所拥有的。当我对相同的输入图像和 tflite 模型使用图像分类器的 Python 示例时,我没有得到相同的置信度值,因此这清楚地表明出现了问题。我基本上从 read_bmp 复制并粘贴了 row_size 计算,但没有理解它,所以我怀疑这可能是问题所在。 row_size代表什么?
std::vector<uint8_t> decode_jpeg(const uint8_t* input, int row_size, int width, int height) {
// Channels will always be 3. Hardcode it for now.
int channels = 3;
// The output that wil lcontain the data for TensorFlow to process.
std::vector<uint8_t> output(height * width * channels);
// Go through every pixel of the image.
for(int i = 0; i < height; i++) {
int src_pos;
int dst_pos;
for(int j = 0; j < width; j++) {
src_pos = i * row_size + j * channels;
dst_pos = (i * width + j) * channels;
// Put RGB channel data into the output array.
output[dst_pos] = input[src_pos + 2];
output[dst_pos + 1] = input[src_pos + 1];
output[dst_pos + 2] = input[src_pos];
}
}
return output;
}
std::vector<uint8_t> read_jpeg(const std::string& input_jpeg_name, int* width, int* height, Settings* s) {
// Size and buffer.
size_t size;
unsigned char *buf;
// Open the input file.
FILE *f;
f = fopen(input_jpeg_name.c_str(), "rb");
if (!f) {
if (s->verbose) LOG(INFO) << "Error opening the input file\n";
exit(-1);
}
// Read the file.
fseek(f, 0, SEEK_END);
// Ge tthe file size.
size = ftell(f);
// Get file data into buffer.
buf = (unsigned char*)malloc(size);
fseek(f, 0, SEEK_SET);
size_t read = fread(buf, 1, size, f);
// Close the file.
fclose(f);
// Decode the file.
Decoder decoder(buf, size);
if (decoder.GetResult() != Decoder::OK)
{
if (s->verbose) LOG(INFO) << "Error decoding the input file\n";
exit(-1);
}
// Get the image from the decoded file.
unsigned char* img = decoder.GetImage();
// Get image width and height.
*width = decoder.GetWidth();
*height = decoder.GetHeight();
// TODO: Understand what this row size means. Don't just copy and paste.
const int row_size = (8 * *channels * *width + 31) / 32 * 4;
// Decode the JPEG.
return decode_jpeg(img, row_size, *width, *height);
}
您正在使用的库已经在为您处理解码,decoder.getImage() 包含原始 rgb 数据。您无需计算任何尺寸。
像row_size这样的东西是BMP文件格式特有的东西。 BMP 文件可能包含一些填充字节以及像素颜色数据,代码正在处理这些东西。
BMP 文件也以 BGR 顺序存储像素值,这就是为什么您在原始代码中进行反向排序的原因:
// Put RGB channel data into the output array.
output[dst_pos] = input[src_pos + 2];
output[dst_pos + 1] = input[src_pos + 1];
output[dst_pos + 2] = input[src_pos];
下面的代码应该适合你(注意 decode_jpeg 函数不执行任何解码):
std::vector<uint8_t> decode_jpeg(const uint8_t* input, int width, int height) {
// Channels will always be 3. Hardcode it for now.
int channels = 3;
// The output that will contain the data for TensorFlow to process.
std::vector<uint8_t> output(height * width * channels);
// Copy pixel data to output
for (size_t i = 0; i < height*width*channels; ++i)
{
output[i] = input[i];
}
return output;
}
std::vector<uint8_t> read_jpeg(const std::string& input_jpeg_name, int* width, int* height, Settings* s) {
// Size and buffer.
size_t size;
unsigned char *buf;
// Open the input file.
FILE *f;
f = fopen(input_jpeg_name.c_str(), "rb");
if (!f) {
if (s->verbose) LOG(INFO) << "Error opening the input file\n";
exit(-1);
}
// Read the file.
fseek(f, 0, SEEK_END);
// Ge tthe file size.
size = ftell(f);
// Get file data into buffer.
buf = (unsigned char*)malloc(size);
fseek(f, 0, SEEK_SET);
size_t read = fread(buf, 1, size, f);
// Close the file.
fclose(f);
// Decode the file.
Decoder decoder(buf, size);
if (decoder.GetResult() != Decoder::OK)
{
if (s->verbose) LOG(INFO) << "Error decoding the input file\n";
exit(-1);
}
// Get the image from the decoded file.
unsigned char* img = decoder.GetImage();
// Get image width and height.
*width = decoder.GetWidth();
*height = decoder.GetHeight();
// Decode the JPEG.
return decode_jpeg(img, *width, *height);
}
TensorFlow Lite 在他们的回购中有一个很好的 C++ 图像分类示例,here。 但是,我正在使用 .jpeg 并且此示例仅限于使用 bitmap_helpers.cc.
解码 .bmp 图像我正在尝试创建自己的 jpeg 解码器,但我不太精通图像处理,因此需要一些帮助。我正在重复使用 this jpeg decoder as a third party helper library. In the example's bmp decoding, I don't quite understand what's the deal with calculating row_sizes and taking in the bytes array after the header。谁能阐明这将如何应用于 jpeg 解码器?或者,更好的是,是否已经有一个 C++ decode_jpeg 函数隐藏在我没有找到的地方?
最终的实现必须在 C++ 的 TensorFlow Lite 中。
非常感谢!
编辑:
以下是我目前所拥有的。当我对相同的输入图像和 tflite 模型使用图像分类器的 Python 示例时,我没有得到相同的置信度值,因此这清楚地表明出现了问题。我基本上从 read_bmp 复制并粘贴了 row_size 计算,但没有理解它,所以我怀疑这可能是问题所在。 row_size代表什么?
std::vector<uint8_t> decode_jpeg(const uint8_t* input, int row_size, int width, int height) {
// Channels will always be 3. Hardcode it for now.
int channels = 3;
// The output that wil lcontain the data for TensorFlow to process.
std::vector<uint8_t> output(height * width * channels);
// Go through every pixel of the image.
for(int i = 0; i < height; i++) {
int src_pos;
int dst_pos;
for(int j = 0; j < width; j++) {
src_pos = i * row_size + j * channels;
dst_pos = (i * width + j) * channels;
// Put RGB channel data into the output array.
output[dst_pos] = input[src_pos + 2];
output[dst_pos + 1] = input[src_pos + 1];
output[dst_pos + 2] = input[src_pos];
}
}
return output;
}
std::vector<uint8_t> read_jpeg(const std::string& input_jpeg_name, int* width, int* height, Settings* s) {
// Size and buffer.
size_t size;
unsigned char *buf;
// Open the input file.
FILE *f;
f = fopen(input_jpeg_name.c_str(), "rb");
if (!f) {
if (s->verbose) LOG(INFO) << "Error opening the input file\n";
exit(-1);
}
// Read the file.
fseek(f, 0, SEEK_END);
// Ge tthe file size.
size = ftell(f);
// Get file data into buffer.
buf = (unsigned char*)malloc(size);
fseek(f, 0, SEEK_SET);
size_t read = fread(buf, 1, size, f);
// Close the file.
fclose(f);
// Decode the file.
Decoder decoder(buf, size);
if (decoder.GetResult() != Decoder::OK)
{
if (s->verbose) LOG(INFO) << "Error decoding the input file\n";
exit(-1);
}
// Get the image from the decoded file.
unsigned char* img = decoder.GetImage();
// Get image width and height.
*width = decoder.GetWidth();
*height = decoder.GetHeight();
// TODO: Understand what this row size means. Don't just copy and paste.
const int row_size = (8 * *channels * *width + 31) / 32 * 4;
// Decode the JPEG.
return decode_jpeg(img, row_size, *width, *height);
}
您正在使用的库已经在为您处理解码,decoder.getImage() 包含原始 rgb 数据。您无需计算任何尺寸。
像row_size这样的东西是BMP文件格式特有的东西。 BMP 文件可能包含一些填充字节以及像素颜色数据,代码正在处理这些东西。
BMP 文件也以 BGR 顺序存储像素值,这就是为什么您在原始代码中进行反向排序的原因:
// Put RGB channel data into the output array.
output[dst_pos] = input[src_pos + 2];
output[dst_pos + 1] = input[src_pos + 1];
output[dst_pos + 2] = input[src_pos];
下面的代码应该适合你(注意 decode_jpeg 函数不执行任何解码):
std::vector<uint8_t> decode_jpeg(const uint8_t* input, int width, int height) {
// Channels will always be 3. Hardcode it for now.
int channels = 3;
// The output that will contain the data for TensorFlow to process.
std::vector<uint8_t> output(height * width * channels);
// Copy pixel data to output
for (size_t i = 0; i < height*width*channels; ++i)
{
output[i] = input[i];
}
return output;
}
std::vector<uint8_t> read_jpeg(const std::string& input_jpeg_name, int* width, int* height, Settings* s) {
// Size and buffer.
size_t size;
unsigned char *buf;
// Open the input file.
FILE *f;
f = fopen(input_jpeg_name.c_str(), "rb");
if (!f) {
if (s->verbose) LOG(INFO) << "Error opening the input file\n";
exit(-1);
}
// Read the file.
fseek(f, 0, SEEK_END);
// Ge tthe file size.
size = ftell(f);
// Get file data into buffer.
buf = (unsigned char*)malloc(size);
fseek(f, 0, SEEK_SET);
size_t read = fread(buf, 1, size, f);
// Close the file.
fclose(f);
// Decode the file.
Decoder decoder(buf, size);
if (decoder.GetResult() != Decoder::OK)
{
if (s->verbose) LOG(INFO) << "Error decoding the input file\n";
exit(-1);
}
// Get the image from the decoded file.
unsigned char* img = decoder.GetImage();
// Get image width and height.
*width = decoder.GetWidth();
*height = decoder.GetHeight();
// Decode the JPEG.
return decode_jpeg(img, *width, *height);
}