cpp rgb 到 yuv422 转换
cpp rgb to yuv422 conversion
我正在尝试将 RGB/RGBA 格式(可以更改)的图像(最初来自 QImage)转换为 YUV422 格式。
我的初衷是使用 OpenCV cvtColor 来完成这项工作,但它无法将 RGB/RGBA 转换为 422 格式。
我搜索了替代方案,甚至考虑根据 编写自己的转换,但速度不够快。
我搜索了另一个要使用的库并找到了 this post 但它已经过时且不太相关。
所以我的问题是我有哪些好的 RGB->YUV422 转换选项?如果他们在 GPU 而不是 CPU.
上执行转换会更好
提前致谢
在这方面 related answer, they suggest to use Intel Performance Primitives 并且 OP 似乎达到了预期的结果(实时转换许多 PAL 流)。
我使用 OpenCL 解决了我的问题,如下所示:Tutorial: Simple start with OpenCL and C++
我将转换 Format_ARGB32_Premultiplied 更改为 YUV422,但它可以轻松更改为任何格式。
openclwrapper.h:
class OpenClWrapper
{
public:
OpenClWrapper(size_t width, size_t height);
~OpenClWrapper();
void RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg);
private:
std::vector<cl::Platform> m_all_platforms;
std::vector<cl::Device> m_all_devices;
cl::Platform m_default_platform;
cl::Device m_default_device;
cl::Context m_context;
cl::Program::Sources m_sources;
cl::Program m_program;
cl::CommandQueue m_queue;
cl::Buffer m_buffer_yuv;
cl::Buffer m_buffer_rgb;
std::string m_kernel_code;
size_t m_width;
size_t m_height;
};
openclwrapper.cpp:
#include "openclwrapper.h"
#include <iostream>
#include <sstream>
OpenClWrapper::OpenClWrapper(size_t width, size_t height) :
m_height(height),
m_width(width)
{
//get all platforms (drivers)
cl::Platform::get(&m_all_platforms);
if(m_all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
m_default_platform=m_all_platforms[0];
//get default device of the default platform
m_default_platform.getDevices(CL_DEVICE_TYPE_ALL, &m_all_devices);
if(m_all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
m_default_device=m_all_devices[0];
m_context = *(new cl::Context({m_default_device}));
std::ostringstream oss;
oss <<
" void kernel RGB2YUV422(global const unsigned char rgbImg[" << m_height << "][" << m_width << "*4], global unsigned int yuvImg[" << m_height << "][" << m_width << "/2]){ \n"
" int x_idx = get_global_id(0); \n"
" int y_idx = get_global_id(1)*8; \n"
" int alpha1 = rgbImg[x_idx][y_idx+3]; \n"
" int alpha2 = rgbImg[x_idx][y_idx+7]; \n"
" unsigned char R1 = rgbImg[x_idx][y_idx+2] * (255 / alpha1); \n"
" unsigned char G1 = rgbImg[x_idx][y_idx+1] * (255 / alpha1); \n"
" unsigned char B1 = rgbImg[x_idx][y_idx] * (255 / alpha1); \n"
" unsigned char R2 = rgbImg[x_idx][y_idx+6] * (255 / alpha2); \n"
" unsigned char G2 = rgbImg[x_idx][y_idx+5] * (255 / alpha2); \n"
" unsigned char B2 = rgbImg[x_idx][y_idx+4] * (255 / alpha2); \n"
" unsigned char Y1 = (unsigned char)(0.299000*R1 + 0.587000*G1 + 0.114000*B1); \n"
" unsigned char Y2 = (unsigned char)(0.299000*R2 + 0.587000*G2 + 0.114000*B2); \n"
" unsigned char U = (unsigned char)(-0.168736*R1-0.331264*G1+0.500000*B1+128);//(0.492*(B1-Y1)); \n"
" unsigned char V = (unsigned char)(0.500000*R1-0.418688*G1-0.081312*B1+128);//(0.877*(R1-Y1)); \n"
" yuvImg[get_global_id(0)][get_global_id(1)] = (unsigned int)(Y2 << 24 | V << 16 | Y1 << 8 | U); \n"
" } ";
m_kernel_code = oss.str();
m_sources.push_back({m_kernel_code.c_str(),m_kernel_code.length()});
m_program = *(new cl::Program(m_context,m_sources));
if(m_program.build({m_default_device})!=CL_SUCCESS){
std::cout<<" Error building: "<<m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_default_device)<<"\n";
exit(1);
}
// create buffers on the device
m_buffer_yuv = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned int)*(m_width*m_height/2))); //each cell is int so it is 4 times the mem nedded, but each pixel is represented by 16 bits
m_buffer_rgb = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned char)*(m_width*m_height*4))); // each pixel is represented by 4 bytes (alpha, RGB)
}
OpenClWrapper::~OpenClWrapper(){
free(&m_buffer_rgb);
free(&m_buffer_yuv);
}
void OpenClWrapper::RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg){
cl::CommandQueue queue(m_context,m_default_device);
//write rgb image to the OpenCl buffer
queue.enqueueWriteBuffer(m_buffer_rgb,CL_TRUE,0,sizeof(unsigned char)*(m_width*m_height*4),rgbImg);
//run the kernel
cl::Kernel kernel_yuv2rgb=cl::Kernel(m_program,"RGB2YUV422");
kernel_yuv2rgb.setArg(0,m_buffer_rgb);
kernel_yuv2rgb.setArg(1,m_buffer_yuv);
queue.enqueueNDRangeKernel(kernel_yuv2rgb,cl::NullRange,cl::NDRange(m_height,(m_width/2)),cl::NullRange); //range is divided by 2 because we have width is represented in integers instead of 16bit (as needed in yuv422).
queue.finish();
//read result yuv Image from the device to yuv Image pointer
queue.enqueueReadBuffer(m_buffer_yuv,CL_TRUE,0,sizeof(unsigned int)*(m_width*m_height/2),yuvImg);
}
OpenCV 的简单实现:
void rgb_to_yuv422_uyvy(const cv::Mat& rgb, cv::Mat& yuv) {
assert(rgb.size() == yuv.size() &&
rgb.depth() == CV_8U &&
rgb.channels() == 3 &&
yuv.depth() == CV_8U &&
yuv.channels() == 2);
for (int ih = 0; ih < rgb.rows; ih++) {
const uint8_t* rgbRowPtr = rgb.ptr<uint8_t>(ih);
uint8_t* yuvRowPtr = yuv.ptr<uint8_t>(ih);
for (int iw = 0; iw < rgb.cols; iw = iw + 2) {
const int rgbColIdxBytes = iw * rgb.elemSize();
const int yuvColIdxBytes = iw * yuv.elemSize();
const uint8_t R1 = rgbRowPtr[rgbColIdxBytes + 0];
const uint8_t G1 = rgbRowPtr[rgbColIdxBytes + 1];
const uint8_t B1 = rgbRowPtr[rgbColIdxBytes + 2];
const uint8_t R2 = rgbRowPtr[rgbColIdxBytes + 3];
const uint8_t G2 = rgbRowPtr[rgbColIdxBytes + 4];
const uint8_t B2 = rgbRowPtr[rgbColIdxBytes + 5];
const int Y = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16.0f ;
const int U = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128.0f;
const int V = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128.0f;
const int Y2 = (0.257f * R2) + (0.504f * G2) + (0.098f * B2) + 16.0f ;
yuvRowPtr[yuvColIdxBytes + 0] = cv::saturate_cast<uint8_t>(U );
yuvRowPtr[yuvColIdxBytes + 1] = cv::saturate_cast<uint8_t>(Y );
yuvRowPtr[yuvColIdxBytes + 2] = cv::saturate_cast<uint8_t>(V );
yuvRowPtr[yuvColIdxBytes + 3] = cv::saturate_cast<uint8_t>(Y2);
}
}
}
请注意,这假定(并检查)RGB 以及 YUV422 UYVY 风格。我发现这相当快,但显然是 embarrassingly parallel.
我正在尝试将 RGB/RGBA 格式(可以更改)的图像(最初来自 QImage)转换为 YUV422 格式。 我的初衷是使用 OpenCV cvtColor 来完成这项工作,但它无法将 RGB/RGBA 转换为 422 格式。
我搜索了替代方案,甚至考虑根据
我搜索了另一个要使用的库并找到了 this post 但它已经过时且不太相关。
所以我的问题是我有哪些好的 RGB->YUV422 转换选项?如果他们在 GPU 而不是 CPU.
上执行转换会更好提前致谢
在这方面 related answer, they suggest to use Intel Performance Primitives 并且 OP 似乎达到了预期的结果(实时转换许多 PAL 流)。
我使用 OpenCL 解决了我的问题,如下所示:Tutorial: Simple start with OpenCL and C++
我将转换 Format_ARGB32_Premultiplied 更改为 YUV422,但它可以轻松更改为任何格式。
openclwrapper.h:
class OpenClWrapper
{
public:
OpenClWrapper(size_t width, size_t height);
~OpenClWrapper();
void RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg);
private:
std::vector<cl::Platform> m_all_platforms;
std::vector<cl::Device> m_all_devices;
cl::Platform m_default_platform;
cl::Device m_default_device;
cl::Context m_context;
cl::Program::Sources m_sources;
cl::Program m_program;
cl::CommandQueue m_queue;
cl::Buffer m_buffer_yuv;
cl::Buffer m_buffer_rgb;
std::string m_kernel_code;
size_t m_width;
size_t m_height;
};
openclwrapper.cpp:
#include "openclwrapper.h"
#include <iostream>
#include <sstream>
OpenClWrapper::OpenClWrapper(size_t width, size_t height) :
m_height(height),
m_width(width)
{
//get all platforms (drivers)
cl::Platform::get(&m_all_platforms);
if(m_all_platforms.size()==0){
std::cout<<" No platforms found. Check OpenCL installation!\n";
exit(1);
}
m_default_platform=m_all_platforms[0];
//get default device of the default platform
m_default_platform.getDevices(CL_DEVICE_TYPE_ALL, &m_all_devices);
if(m_all_devices.size()==0){
std::cout<<" No devices found. Check OpenCL installation!\n";
exit(1);
}
m_default_device=m_all_devices[0];
m_context = *(new cl::Context({m_default_device}));
std::ostringstream oss;
oss <<
" void kernel RGB2YUV422(global const unsigned char rgbImg[" << m_height << "][" << m_width << "*4], global unsigned int yuvImg[" << m_height << "][" << m_width << "/2]){ \n"
" int x_idx = get_global_id(0); \n"
" int y_idx = get_global_id(1)*8; \n"
" int alpha1 = rgbImg[x_idx][y_idx+3]; \n"
" int alpha2 = rgbImg[x_idx][y_idx+7]; \n"
" unsigned char R1 = rgbImg[x_idx][y_idx+2] * (255 / alpha1); \n"
" unsigned char G1 = rgbImg[x_idx][y_idx+1] * (255 / alpha1); \n"
" unsigned char B1 = rgbImg[x_idx][y_idx] * (255 / alpha1); \n"
" unsigned char R2 = rgbImg[x_idx][y_idx+6] * (255 / alpha2); \n"
" unsigned char G2 = rgbImg[x_idx][y_idx+5] * (255 / alpha2); \n"
" unsigned char B2 = rgbImg[x_idx][y_idx+4] * (255 / alpha2); \n"
" unsigned char Y1 = (unsigned char)(0.299000*R1 + 0.587000*G1 + 0.114000*B1); \n"
" unsigned char Y2 = (unsigned char)(0.299000*R2 + 0.587000*G2 + 0.114000*B2); \n"
" unsigned char U = (unsigned char)(-0.168736*R1-0.331264*G1+0.500000*B1+128);//(0.492*(B1-Y1)); \n"
" unsigned char V = (unsigned char)(0.500000*R1-0.418688*G1-0.081312*B1+128);//(0.877*(R1-Y1)); \n"
" yuvImg[get_global_id(0)][get_global_id(1)] = (unsigned int)(Y2 << 24 | V << 16 | Y1 << 8 | U); \n"
" } ";
m_kernel_code = oss.str();
m_sources.push_back({m_kernel_code.c_str(),m_kernel_code.length()});
m_program = *(new cl::Program(m_context,m_sources));
if(m_program.build({m_default_device})!=CL_SUCCESS){
std::cout<<" Error building: "<<m_program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(m_default_device)<<"\n";
exit(1);
}
// create buffers on the device
m_buffer_yuv = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned int)*(m_width*m_height/2))); //each cell is int so it is 4 times the mem nedded, but each pixel is represented by 16 bits
m_buffer_rgb = *(new cl::Buffer(m_context,CL_MEM_READ_WRITE,sizeof(unsigned char)*(m_width*m_height*4))); // each pixel is represented by 4 bytes (alpha, RGB)
}
OpenClWrapper::~OpenClWrapper(){
free(&m_buffer_rgb);
free(&m_buffer_yuv);
}
void OpenClWrapper::RGB2YUV422(unsigned int * yuvImg, unsigned char * rgbImg){
cl::CommandQueue queue(m_context,m_default_device);
//write rgb image to the OpenCl buffer
queue.enqueueWriteBuffer(m_buffer_rgb,CL_TRUE,0,sizeof(unsigned char)*(m_width*m_height*4),rgbImg);
//run the kernel
cl::Kernel kernel_yuv2rgb=cl::Kernel(m_program,"RGB2YUV422");
kernel_yuv2rgb.setArg(0,m_buffer_rgb);
kernel_yuv2rgb.setArg(1,m_buffer_yuv);
queue.enqueueNDRangeKernel(kernel_yuv2rgb,cl::NullRange,cl::NDRange(m_height,(m_width/2)),cl::NullRange); //range is divided by 2 because we have width is represented in integers instead of 16bit (as needed in yuv422).
queue.finish();
//read result yuv Image from the device to yuv Image pointer
queue.enqueueReadBuffer(m_buffer_yuv,CL_TRUE,0,sizeof(unsigned int)*(m_width*m_height/2),yuvImg);
}
OpenCV 的简单实现:
void rgb_to_yuv422_uyvy(const cv::Mat& rgb, cv::Mat& yuv) {
assert(rgb.size() == yuv.size() &&
rgb.depth() == CV_8U &&
rgb.channels() == 3 &&
yuv.depth() == CV_8U &&
yuv.channels() == 2);
for (int ih = 0; ih < rgb.rows; ih++) {
const uint8_t* rgbRowPtr = rgb.ptr<uint8_t>(ih);
uint8_t* yuvRowPtr = yuv.ptr<uint8_t>(ih);
for (int iw = 0; iw < rgb.cols; iw = iw + 2) {
const int rgbColIdxBytes = iw * rgb.elemSize();
const int yuvColIdxBytes = iw * yuv.elemSize();
const uint8_t R1 = rgbRowPtr[rgbColIdxBytes + 0];
const uint8_t G1 = rgbRowPtr[rgbColIdxBytes + 1];
const uint8_t B1 = rgbRowPtr[rgbColIdxBytes + 2];
const uint8_t R2 = rgbRowPtr[rgbColIdxBytes + 3];
const uint8_t G2 = rgbRowPtr[rgbColIdxBytes + 4];
const uint8_t B2 = rgbRowPtr[rgbColIdxBytes + 5];
const int Y = (0.257f * R1) + (0.504f * G1) + (0.098f * B1) + 16.0f ;
const int U = -(0.148f * R1) - (0.291f * G1) + (0.439f * B1) + 128.0f;
const int V = (0.439f * R1) - (0.368f * G1) - (0.071f * B1) + 128.0f;
const int Y2 = (0.257f * R2) + (0.504f * G2) + (0.098f * B2) + 16.0f ;
yuvRowPtr[yuvColIdxBytes + 0] = cv::saturate_cast<uint8_t>(U );
yuvRowPtr[yuvColIdxBytes + 1] = cv::saturate_cast<uint8_t>(Y );
yuvRowPtr[yuvColIdxBytes + 2] = cv::saturate_cast<uint8_t>(V );
yuvRowPtr[yuvColIdxBytes + 3] = cv::saturate_cast<uint8_t>(Y2);
}
}
}
请注意,这假定(并检查)RGB 以及 YUV422 UYVY 风格。我发现这相当快,但显然是 embarrassingly parallel.