如何在 C++ 中将 webrtc::VideoFrame 转换为 OpenCv Mat
How to convert webrtc::VideoFrame to OpenCv Mat in C++
我正在尝试使用 OpenCV imshow()
显示接收到的 WebRTC 帧。 WebRTC 将帧作为 webrtc::VideoFrame
的对象传递,在我的例子中,我可以从中访问 webrtc::I420Buffer
。现在我的问题是如何将 webrtc::I420Buffer
中的数据转换为 cv::Mat
,以便我可以将其提供给 imshow()
?
这就是 webrtc::I420Buffer
的定义
namespace webrtc {
// Plain I420 buffer in standard memory.
class RTC_EXPORT I420Buffer : public I420BufferInterface {
public:
...
int width() const override;
int height() const override;
const uint8_t* DataY() const override;
const uint8_t* DataU() const override;
const uint8_t* DataV() const override;
int StrideY() const override;
int StrideU() const override;
int StrideV() const override;
uint8_t* MutableDataY();
uint8_t* MutableDataU();
uint8_t* MutableDataV();
...
private:
const int width_;
const int height_;
const int stride_y_;
const int stride_u_;
const int stride_v_;
const std::unique_ptr<uint8_t, AlignedFreeDeleter> data_;
};
主要问题是从 I420 颜色格式转换为 OpenCV 使用的 BGR(或 BGRA)颜色格式。
两个不错的颜色转换选项:
- 使用
sws_scale
- FFmpeg 的部分 C 接口库。
- 像ippiYCbCr420ToBGR_709HDTV_8u_P3C4R一样使用IPP颜色转换功能。
我们也可以使用 cv::cvtColor 和 cv::COLOR_YUV2BGR_I420
参数。
不太推荐这样做,因为 Y、U 和 V 颜色通道在内存中必须是顺序的——在一般情况下,它需要太多的“深拷贝”操作。
颜色转换后,我们可以使用 cv:Mat 构造函数来“包装”BGR(或 BGRA)内存缓冲区(不使用“深层复制”)。
示例(术语“step”、“stride”和“linesize”是等价的):
cv::Mat bgra_img = cv::Mat(height, width, CV_8UC4, pDst, dstStep);
创建 I420 格式的示例原始图像以进行测试:
我们可以使用 FFmpeg CLI 创建用于测试的输入文件:
ffmpeg -f lavfi -i testsrc=size=640x480:duration=1:rate=1 -pix_fmt yuv420p -f rawvideo I420.yuv
注意:FFmpeg yuv420p
等同于 I420
格式。
代码示例包括两部分:
第一部分使用sws_scale,第二部分使用IPP.
选择其中之一(您不必同时使用两者)。
为了测试,我重新定义并添加了一些功能到 class I420Buffer
。
它可能看起来很奇怪,但它仅用于测试。
只需按照代码示例,看看它是否有意义...
这里是代码示例(请阅读评论):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//Use OpenCV for showing the image
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
extern "C" {
//Required for using sws_scale
#include <libavutil/frame.h>
#include <libswscale/swscale.h>
}
//We don't need both IPP and LibAV, the IPP solution is a separate example.
#include <ipp.h>
#include <ippi.h>
//I420 format:
// <------ stride_y_------>
// <------- width ------>
// data_y_ -> yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
//
// < stride_u_>
// <-width/2->
// data_u_ -> uuuuuuuuuuu0
// uuuuuuuuuuu0
// uuuuuuuuuuu0
//
// < stride_v_>
// <-width/2->
// data_v_ -> uuuuuuuuuuu0
// uuuuuuuuuuu0
// uuuuuuuuuuu0
// Plain I420 buffer in standard memory.
// Some extra functionality is added for testing
////////////////////////////////////////////////////////////////////////////////
class I420Buffer {
public:
//Constructor (for testing):
//Allocate buffers, and read I420 image for binary file.
explicit I420Buffer(int w, int h, const char *input_file_name) : width_(w), height_(h), stride_y_(w), stride_u_(w / 2), stride_v_(w / 2)
{
//The example uses stride = width (but in the general case the stride may be larger than width).
data_y_ = new uint8_t[w*h];
data_u_ = new uint8_t[w*h / 4];
data_v_ = new uint8_t[w*h / 4];
FILE* f = fopen(input_file_name, "rb");
fread(data_y_, 1, w*h, f); //Read Y color channel.
fread(data_u_, 1, w*h/4, f); //Read U color channel.
fread(data_v_, 1, w*h/4, f); //Read V color channel.
fclose(f);
};
//Destructor (for testing):
~I420Buffer()
{
delete[] data_y_;
delete[] data_u_;
delete[] data_v_;
}
int width() const { return width_; };
int height() const { return height_; };
const uint8_t* DataY() const { return data_y_; };
const uint8_t* DataU() const { return data_u_; };
const uint8_t* DataV() const { return data_v_; };
int StrideY() const { return stride_y_; };
int StrideU() const { return stride_u_; };
int StrideV() const { return stride_v_; };
//uint8_t* MutableDataY();
//uint8_t* MutableDataU();
//uint8_t* MutableDataV();
private:
const int width_;
const int height_;
const int stride_y_;
const int stride_u_;
const int stride_v_;
//const std::unique_ptr<uint8_t, AlignedFreeDeleter> data_;
uint8_t* data_y_; //Assume data_ is internally divided into Y, U and V buffers.
uint8_t* data_u_;
uint8_t* data_v_;
};
////////////////////////////////////////////////////////////////////////////////
int main()
{
//Create raw video frame in I420 format using FFmpeg (for testing):
//ffmpeg -f lavfi -i testsrc=size=640x480:duration=1:rate=1 -pix_fmt yuv420p -f rawvideo I420.yuv
int width = 640;
int height = 480;
I420Buffer I(width, height, "I420.yuv");
//Create SWS Context for converting from decode pixel format (like YUV420) to BGR
////////////////////////////////////////////////////////////////////////////
struct SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(I.width(),
I.height(),
AV_PIX_FMT_YUV420P, //Input format is yuv420p (equivalent to I420).
I.width(),
I.height(),
AV_PIX_FMT_BGR24, //For OpenCV, we want BGR pixel format.
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return -1; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to BGR.
////////////////////////////////////////////////////////////////////////////
AVFrame* pBGRFrame = av_frame_alloc(); //Allocate frame, because it is more continent than allocating and initializing data buffer and linesize.
pBGRFrame->format = AV_PIX_FMT_BGR24;
pBGRFrame->width = I.width();
pBGRFrame->height = I.height();
int sts = av_frame_get_buffer(pBGRFrame, 0); //Buffers allocation
if (sts < 0)
{
return -1; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Convert from input format (e.g YUV420) to BGR:
//Use BT.601 conversion formula. It is more likely that the input is BT.709 and not BT.601 (read about it in Wikipedia).
//It is possible to select BT.709 using sws_setColorspaceDetails
////////////////////////////////////////////////////////////////////////////
const uint8_t* const src_data[] = { I.DataY(), I.DataU(), I.DataV() };
const int src_stride[] = { I.StrideY(), I.StrideU(), I.StrideV() };
sts = sws_scale(sws_ctx, //struct SwsContext* c,
src_data, //const uint8_t* const srcSlice[],
src_stride, //const int srcStride[],
0, //int srcSliceY,
I.height(), //int srcSliceH,
pBGRFrame->data, //uint8_t* const dst[],
pBGRFrame->linesize); //const int dstStride[]);
if (sts != I.height())
{
return -1; //Error!
}
//Use OpenCV for showing the image (and save the image in JPEG format):
////////////////////////////////////////////////////////////////////////////
cv::Mat img = cv::Mat(pBGRFrame->height, pBGRFrame->width, CV_8UC3, pBGRFrame->data[0], pBGRFrame->linesize[0]); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("img", img);
//cv::waitKey();
//Save the inage in PNG format using OpenCV
cv::imwrite("rgb.png", img);
////////////////////////////////////////////////////////////////////////////
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pBGRFrame);
// Solution using IPP:
// The IPP sample use BT.709 conversion formula, and convert to BGRA (not BGR)
// It is more likely that the input is BT.709 and not BT.601 (read about it in Wikipedia).
// Using color conversion function: ippiYCbCr420ToBGR_709HDTV_8u_P3C4R
//https://www.intel.com/content/www/us/en/develop/documentation/ipp-dev-reference/top/volume-2-image-processing/image-color-conversion/color-model-conversion/ycbcr420tobgr-709hdtv.html
////////////////////////////////////////////////////////////////////////////
IppStatus ipp_sts = ippInit();
if (ipp_sts < ippStsNoErr)
{
return -1; //Error.
}
const Ipp8u* pSrc[3] = { I.DataY(), I.DataU(), I.DataV() };
int srcStep[3] = { I.StrideY(), I.StrideU(), I.StrideV() };
Ipp8u* pDst = new uint8_t[I.width() * I.height() * 4];
int dstStep = I.width() * 4;
IppiSize roiSize = { I.width(), I.height() };
ipp_sts = ippiYCbCr420ToBGR_709HDTV_8u_P3C4R(pSrc, //const Ipp8u* pSrc[3],
srcStep, //int srcStep[3],
pDst, //Ipp8u* pDst,
dstStep, //int dstStep,
roiSize, //IppiSize roiSize,
255); //Ipp8u aval)
if (ipp_sts < ippStsNoErr)
{
return -1; //Error.
}
cv::Mat bgra_img = cv::Mat(I.height(), I.width(), CV_8UC4, pDst, dstStep); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("bgra_img", bgra_img);
cv::waitKey();
delete[] pDst;
////////////////////////////////////////////////////////////////////////////
return 0;
}
示例输出(调整大小):
注:
实际使用WebRTC输入流似乎很难。
假设解码的原始视频帧已经存在于 I420Buffer
中,如您的 post.
中所定义
我正在尝试使用 OpenCV imshow()
显示接收到的 WebRTC 帧。 WebRTC 将帧作为 webrtc::VideoFrame
的对象传递,在我的例子中,我可以从中访问 webrtc::I420Buffer
。现在我的问题是如何将 webrtc::I420Buffer
中的数据转换为 cv::Mat
,以便我可以将其提供给 imshow()
?
这就是 webrtc::I420Buffer
的定义
namespace webrtc {
// Plain I420 buffer in standard memory.
class RTC_EXPORT I420Buffer : public I420BufferInterface {
public:
...
int width() const override;
int height() const override;
const uint8_t* DataY() const override;
const uint8_t* DataU() const override;
const uint8_t* DataV() const override;
int StrideY() const override;
int StrideU() const override;
int StrideV() const override;
uint8_t* MutableDataY();
uint8_t* MutableDataU();
uint8_t* MutableDataV();
...
private:
const int width_;
const int height_;
const int stride_y_;
const int stride_u_;
const int stride_v_;
const std::unique_ptr<uint8_t, AlignedFreeDeleter> data_;
};
主要问题是从 I420 颜色格式转换为 OpenCV 使用的 BGR(或 BGRA)颜色格式。
两个不错的颜色转换选项:
- 使用
sws_scale
- FFmpeg 的部分 C 接口库。 - 像ippiYCbCr420ToBGR_709HDTV_8u_P3C4R一样使用IPP颜色转换功能。
我们也可以使用 cv::cvtColor 和 cv::COLOR_YUV2BGR_I420
参数。
不太推荐这样做,因为 Y、U 和 V 颜色通道在内存中必须是顺序的——在一般情况下,它需要太多的“深拷贝”操作。
颜色转换后,我们可以使用 cv:Mat 构造函数来“包装”BGR(或 BGRA)内存缓冲区(不使用“深层复制”)。
示例(术语“step”、“stride”和“linesize”是等价的):
cv::Mat bgra_img = cv::Mat(height, width, CV_8UC4, pDst, dstStep);
创建 I420 格式的示例原始图像以进行测试:
我们可以使用 FFmpeg CLI 创建用于测试的输入文件:
ffmpeg -f lavfi -i testsrc=size=640x480:duration=1:rate=1 -pix_fmt yuv420p -f rawvideo I420.yuv
注意:FFmpeg yuv420p
等同于 I420
格式。
代码示例包括两部分:
第一部分使用sws_scale,第二部分使用IPP.
选择其中之一(您不必同时使用两者)。
为了测试,我重新定义并添加了一些功能到 class I420Buffer
。
它可能看起来很奇怪,但它仅用于测试。
只需按照代码示例,看看它是否有意义...
这里是代码示例(请阅读评论):
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
//Use OpenCV for showing the image
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
extern "C" {
//Required for using sws_scale
#include <libavutil/frame.h>
#include <libswscale/swscale.h>
}
//We don't need both IPP and LibAV, the IPP solution is a separate example.
#include <ipp.h>
#include <ippi.h>
//I420 format:
// <------ stride_y_------>
// <------- width ------>
// data_y_ -> yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
// yyyyyyyyyyyyyyyyyyyyyy00
//
// < stride_u_>
// <-width/2->
// data_u_ -> uuuuuuuuuuu0
// uuuuuuuuuuu0
// uuuuuuuuuuu0
//
// < stride_v_>
// <-width/2->
// data_v_ -> uuuuuuuuuuu0
// uuuuuuuuuuu0
// uuuuuuuuuuu0
// Plain I420 buffer in standard memory.
// Some extra functionality is added for testing
////////////////////////////////////////////////////////////////////////////////
class I420Buffer {
public:
//Constructor (for testing):
//Allocate buffers, and read I420 image for binary file.
explicit I420Buffer(int w, int h, const char *input_file_name) : width_(w), height_(h), stride_y_(w), stride_u_(w / 2), stride_v_(w / 2)
{
//The example uses stride = width (but in the general case the stride may be larger than width).
data_y_ = new uint8_t[w*h];
data_u_ = new uint8_t[w*h / 4];
data_v_ = new uint8_t[w*h / 4];
FILE* f = fopen(input_file_name, "rb");
fread(data_y_, 1, w*h, f); //Read Y color channel.
fread(data_u_, 1, w*h/4, f); //Read U color channel.
fread(data_v_, 1, w*h/4, f); //Read V color channel.
fclose(f);
};
//Destructor (for testing):
~I420Buffer()
{
delete[] data_y_;
delete[] data_u_;
delete[] data_v_;
}
int width() const { return width_; };
int height() const { return height_; };
const uint8_t* DataY() const { return data_y_; };
const uint8_t* DataU() const { return data_u_; };
const uint8_t* DataV() const { return data_v_; };
int StrideY() const { return stride_y_; };
int StrideU() const { return stride_u_; };
int StrideV() const { return stride_v_; };
//uint8_t* MutableDataY();
//uint8_t* MutableDataU();
//uint8_t* MutableDataV();
private:
const int width_;
const int height_;
const int stride_y_;
const int stride_u_;
const int stride_v_;
//const std::unique_ptr<uint8_t, AlignedFreeDeleter> data_;
uint8_t* data_y_; //Assume data_ is internally divided into Y, U and V buffers.
uint8_t* data_u_;
uint8_t* data_v_;
};
////////////////////////////////////////////////////////////////////////////////
int main()
{
//Create raw video frame in I420 format using FFmpeg (for testing):
//ffmpeg -f lavfi -i testsrc=size=640x480:duration=1:rate=1 -pix_fmt yuv420p -f rawvideo I420.yuv
int width = 640;
int height = 480;
I420Buffer I(width, height, "I420.yuv");
//Create SWS Context for converting from decode pixel format (like YUV420) to BGR
////////////////////////////////////////////////////////////////////////////
struct SwsContext* sws_ctx = NULL;
sws_ctx = sws_getContext(I.width(),
I.height(),
AV_PIX_FMT_YUV420P, //Input format is yuv420p (equivalent to I420).
I.width(),
I.height(),
AV_PIX_FMT_BGR24, //For OpenCV, we want BGR pixel format.
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
if (sws_ctx == nullptr)
{
return -1; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Allocate frame for storing image converted to BGR.
////////////////////////////////////////////////////////////////////////////
AVFrame* pBGRFrame = av_frame_alloc(); //Allocate frame, because it is more continent than allocating and initializing data buffer and linesize.
pBGRFrame->format = AV_PIX_FMT_BGR24;
pBGRFrame->width = I.width();
pBGRFrame->height = I.height();
int sts = av_frame_get_buffer(pBGRFrame, 0); //Buffers allocation
if (sts < 0)
{
return -1; //Error!
}
////////////////////////////////////////////////////////////////////////////
//Convert from input format (e.g YUV420) to BGR:
//Use BT.601 conversion formula. It is more likely that the input is BT.709 and not BT.601 (read about it in Wikipedia).
//It is possible to select BT.709 using sws_setColorspaceDetails
////////////////////////////////////////////////////////////////////////////
const uint8_t* const src_data[] = { I.DataY(), I.DataU(), I.DataV() };
const int src_stride[] = { I.StrideY(), I.StrideU(), I.StrideV() };
sts = sws_scale(sws_ctx, //struct SwsContext* c,
src_data, //const uint8_t* const srcSlice[],
src_stride, //const int srcStride[],
0, //int srcSliceY,
I.height(), //int srcSliceH,
pBGRFrame->data, //uint8_t* const dst[],
pBGRFrame->linesize); //const int dstStride[]);
if (sts != I.height())
{
return -1; //Error!
}
//Use OpenCV for showing the image (and save the image in JPEG format):
////////////////////////////////////////////////////////////////////////////
cv::Mat img = cv::Mat(pBGRFrame->height, pBGRFrame->width, CV_8UC3, pBGRFrame->data[0], pBGRFrame->linesize[0]); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("img", img);
//cv::waitKey();
//Save the inage in PNG format using OpenCV
cv::imwrite("rgb.png", img);
////////////////////////////////////////////////////////////////////////////
//Free
sws_freeContext(sws_ctx);
av_frame_free(&pBGRFrame);
// Solution using IPP:
// The IPP sample use BT.709 conversion formula, and convert to BGRA (not BGR)
// It is more likely that the input is BT.709 and not BT.601 (read about it in Wikipedia).
// Using color conversion function: ippiYCbCr420ToBGR_709HDTV_8u_P3C4R
//https://www.intel.com/content/www/us/en/develop/documentation/ipp-dev-reference/top/volume-2-image-processing/image-color-conversion/color-model-conversion/ycbcr420tobgr-709hdtv.html
////////////////////////////////////////////////////////////////////////////
IppStatus ipp_sts = ippInit();
if (ipp_sts < ippStsNoErr)
{
return -1; //Error.
}
const Ipp8u* pSrc[3] = { I.DataY(), I.DataU(), I.DataV() };
int srcStep[3] = { I.StrideY(), I.StrideU(), I.StrideV() };
Ipp8u* pDst = new uint8_t[I.width() * I.height() * 4];
int dstStep = I.width() * 4;
IppiSize roiSize = { I.width(), I.height() };
ipp_sts = ippiYCbCr420ToBGR_709HDTV_8u_P3C4R(pSrc, //const Ipp8u* pSrc[3],
srcStep, //int srcStep[3],
pDst, //Ipp8u* pDst,
dstStep, //int dstStep,
roiSize, //IppiSize roiSize,
255); //Ipp8u aval)
if (ipp_sts < ippStsNoErr)
{
return -1; //Error.
}
cv::Mat bgra_img = cv::Mat(I.height(), I.width(), CV_8UC4, pDst, dstStep); //cv::Mat is OpenCV "thin image wrapper".
cv::imshow("bgra_img", bgra_img);
cv::waitKey();
delete[] pDst;
////////////////////////////////////////////////////////////////////////////
return 0;
}
示例输出(调整大小):
注:
实际使用WebRTC输入流似乎很难。
假设解码的原始视频帧已经存在于 I420Buffer
中,如您的 post.