使用 libav* 将原始帧 (D3D11Texture2D) 编码为 rtsp 流

Question

我已经设法使用 libav* 和 directX 纹理（我从 GDI API using Bitblit 方法获得）创建了一个 rtsp 流。这是我创建实时 rtsp 流的方法：

创建输出上下文和流（跳过此处的检查）
- avformat_alloc_output_context2(&ofmt_ctx, NULL, "rtsp", rtsp_url); //RTSP
- vid_codec = avcodec_find_encoder(ofmt_ctx->oformat->video_codec);
- vid_stream = avformat_new_stream(ofmt_ctx,vid_codec);
- vid_codec_ctx = avcodec_alloc_context3(vid_codec);

设置编解码器参数

codec_ctx->codec_tag = 0;
codec_ctx->codec_id = ofmt_ctx->oformat->video_codec;
//codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->width = width;   codec_ctx->height = height;
codec_ctx->gop_size = 12;
 //codec_ctx->gop_size = 40;
 //codec_ctx->max_b_frames = 3;
codec_ctx->pix_fmt = target_pix_fmt; // AV_PIX_FMT_YUV420P
codec_ctx->framerate = { stream_fps, 1 };
codec_ctx->time_base = { 1, stream_fps};
if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
 {
     codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 }

初始化视频流

if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0)
{
 Debug::Error("Could not initialize stream codec parameters!");
 return false;
}

AVDictionary* codec_options = nullptr;
if (codec->id == AV_CODEC_ID_H264) {
 av_dict_set(&codec_options, "profile", "high", 0);
 av_dict_set(&codec_options, "preset", "fast", 0);
 av_dict_set(&codec_options, "tune", "zerolatency", 0);
}
// open video encoder
int ret = avcodec_open2(codec_ctx, codec, &codec_options);
if (ret<0) {
 Debug::Error("Could not open video encoder: ", avcodec_get_name(codec->id), " error ret: ", AVERROR(ret));
 return false;
}

stream->codecpar->extradata = codec_ctx->extradata;
stream->codecpar->extradata_size = codec_ctx->extradata_size;

开始直播

// Create new frame and allocate buffer
AVFrame* AllocateFrameBuffer(AVCodecContext* codec_ctx, double width, double height)
{
 AVFrame* frame = av_frame_alloc();
 std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
 av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
 frame->width = width;
 frame->height = height;
 frame->format = static_cast<int>(codec_ctx->pix_fmt);
 //Debug::Log("framebuf size: ", framebuf.size(), "  frame format: ", frame->format);
 return frame;
}

void RtspStream(AVFormatContext* ofmt_ctx, AVStream* vid_stream, AVCodecContext* vid_codec_ctx, char* rtsp_url)
{
 printf("Output stream info:\n");
 av_dump_format(ofmt_ctx, 0, rtsp_url, 1);

 const int width = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureWidth();
 const int height = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureHeight();

 //DirectX BGRA to h264 YUV420p
 SwsContext* conversion_ctx = sws_getContext(width, height, src_pix_fmt,
     vid_stream->codecpar->width, vid_stream->codecpar->height, target_pix_fmt, 
     SWS_BICUBIC | SWS_BITEXACT, nullptr, nullptr, nullptr);
if (!conversion_ctx)
{
     Debug::Error("Could not initialize sample scaler!");
     return;
}

 AVFrame* frame = AllocateFrameBuffer(vid_codec_ctx,vid_codec_ctx->width,vid_codec_ctx->height);
 if (!frame) {
     Debug::Error("Could not allocate video frame\n");
     return;
 }


 if (avformat_write_header(ofmt_ctx, NULL) < 0) {
     Debug::Error("Error occurred when writing header");
     return;
 }
 if (av_frame_get_buffer(frame, 0) < 0) {
     Debug::Error("Could not allocate the video frame data\n");
     return;
 }

 int frame_cnt = 0;
 //av start time in microseconds
 int64_t start_time_av = av_gettime();
 AVRational time_base = vid_stream->time_base;
 AVRational time_base_q = { 1, AV_TIME_BASE };

 // frame pixel data info
 int data_size = width * height * 4;
 uint8_t* data = new uint8_t[data_size];
//    AVPacket* pkt = av_packet_alloc();

 while (RtspStreaming::IsStreaming())
 {
     /* make sure the frame data is writable */
     if (av_frame_make_writable(frame) < 0)
     {
         Debug::Error("Can't make frame writable");
         break;
     }

     //get copy/ref of the texture
     //uint8_t* data = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetBuffer();
     if (!WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetPixels(data, 0, 0, width, height))
     {
         Debug::Error("Failed to get frame buffer. ID: ", RtspStreaming::WindowId());
         std::this_thread::sleep_for (std::chrono::seconds(2));
         continue;
     }
     //printf("got pixels data\n");
     // convert BGRA to yuv420 pixel format
     int srcStrides[1] = { 4 * width };
     if (sws_scale(conversion_ctx, &data, srcStrides, 0, height, frame->data, frame->linesize) < 0)
     {
         Debug::Error("Unable to scale d3d11 texture to frame. ", frame_cnt);
         break;
     }
     //Debug::Log("frame pts: ", frame->pts, "  time_base:", av_rescale_q(1, vid_codec_ctx->time_base, vid_stream->time_base));
     frame->pts = frame_cnt++; 
     //frame_cnt++;
     //printf("scale conversion done\n");

     //encode to the video stream
     int ret = avcodec_send_frame(vid_codec_ctx, frame);
     if (ret < 0)
     {
         Debug::Error("Error sending frame to codec context! ",frame_cnt);
         break;
     }

     AVPacket* pkt = av_packet_alloc();
     //av_init_packet(pkt);
     ret = avcodec_receive_packet(vid_codec_ctx, pkt);
     if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
     {
         //av_packet_unref(pkt);
         av_packet_free(&pkt);
         continue;
     }
     else if (ret < 0)
     {
         Debug::Error("Error during receiving packet: ",AVERROR(ret));
         //av_packet_unref(pkt);
         av_packet_free(&pkt);
         break;
     }

     if (pkt->pts == AV_NOPTS_VALUE)
     {
         //Write PTS
         //Duration between 2 frames (us)
         int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(vid_stream->r_frame_rate);
         //Parameters
         pkt->pts = (double)(frame_cnt * calc_duration) / (double)(av_q2d(time_base) * AV_TIME_BASE);
         pkt->dts = pkt->pts;
         pkt->duration = (double)calc_duration / (double)(av_q2d(time_base) * AV_TIME_BASE);
     }
     int64_t pts_time = av_rescale_q(pkt->dts, time_base, time_base_q);
     int64_t now_time = av_gettime() - start_time_av;

     if (pts_time > now_time)
         av_usleep(pts_time - now_time);

     //pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
     //pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
     //pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
     //pkt->pos = -1;

     //write frame and send
     if (av_interleaved_write_frame(ofmt_ctx, pkt)<0)
     {
         Debug::Error("Error muxing packet, frame number:",frame_cnt);
         break;
     }

     //Debug::Log("RTSP streaming...");
     //sstd::this_thread::sleep_for(std::chrono::milliseconds(1000/20));
     //av_packet_unref(pkt);
     av_packet_free(&pkt);
 }

 //av_free_packet(pkt);
 delete[] data;

 /* Write the trailer, if any. The trailer must be written before you
  * close the CodecContexts open when you wrote the header; otherwise
  * av_write_trailer() may try to use memory that was freed on
  * av_codec_close(). */
 av_write_trailer(ofmt_ctx);
 av_frame_unref(frame);
 av_frame_free(&frame);
 printf("streaming thread CLOSED!\n");
}

现在，这允许我连接到我的 rtsp 服务器并保持连接。但是，在 rtsp 客户端，我得到灰色或单个静态帧，如下所示：

如果您能帮助解决以下问题，我们将不胜感激：

首先，尽管持续连接到服务器并更新帧，但为什么流不工作？
视频编解码器。默认 rtsp 格式使用 Mpeg4 编解码器，是否可以使用 h264？当我手动将其设置为 AV_CODEC_ID_H264 时，程序在 avcodec_open2 处失败，return 值为 -22。
我是否需要为每一帧创建和分配新的“AVFrame”和“AVPacket”？或者我可以为此重用全局变量吗？
我是否需要为实时流明确定义一些代码？（就像在 ffmpeg 中我们使用“-re”标志）。

如果您能指出一些创建直播的示例代码，那就太好了。我检查了以下资源：

更新

测试时我发现我可以使用 ffplay 播放流，但它卡在 VLC 播放器上。这是 ffplay 日志上的快照

Answer 1

基本构造和初始化似乎没问题。在下面找到对您问题的回答

为什么尽管持续连接到服务器并更新帧，但流仍无法正常工作？

如果您遇到错误或流中断，您可能需要检查您的数据包的演示和解压缩时间戳 (pts/dts)。

在您的代码中，我注意到您从视频流对象中获取 time_base，该对象不保证与编解码器->time_base 值相同，并且通常因活动流而异。

 AVRational time_base = vid_stream->time_base;
 AVRational time_base_q = { 1, AV_TIME_BASE };

视频编解码器。默认rtsp格式使用Mpeg4编解码器，是否可以使用h264？

我不明白为什么不...RTSP 只是一种用于在网络上传输数据包的协议。所以你应该能够使用 AV_CODEC_ID_H264 来编码流。

我是否需要为每一帧创建和分配新的“AVFrame”和“AVPacket”？或者我可以为此重用全局变量吗？

在 libav 中，在编码过程中，单个数据包用于对视频帧进行编码，而单个数据包中可以有多个音频帧。我应该参考这个，但目前似乎找不到任何来源。但无论如何，关键是您每次都需要创建新数据包。

我是否需要为实时流明确定义一些代码？（就像在 ffmpeg 中我们使用“-re”标志）。

您无需为实时流式传输添加任何其他内容。尽管您可能想实施它以限制传递给编码器的帧更新数量并节省一些性能。

Answer 2

对我来说，ffplay 成功捕获和 VLC 错误捕获（对于 UDP 数据包）之间的区别是 pkt_size=xxx 属性（ffmpeg -re -i test.mp4 -f mpegts udp://127.0.0.1 :23000?pkt_size=1316)（VLC 打开媒体网络选项卡 udp://@:23000:pkt_size=1316）。因此，只有定义了 pkt_size（且等于），VLC 才能捕获。

使用 libav* 将原始帧 (D3D11Texture2D) 编码为 rtsp 流

Encoding of raw frames (D3D11Texture2D) to an rtsp stream using libav*

streaming

ffmpeg

rtsp

h.264

libav