混合时 C++ ffmpeg 库帧率不正确
C++ ffmpeg library framerate incorrect when muxing
我正在尝试创建一个函数,它将合并一个音频文件和一个视频文件并将它们输出到 mp4。除了输出的帧率不正确外,我已经成功地做到了这一点。与原版有非常细微的差别。 30.13 而它应该正好是 30。当我将这些文件与 ffmpeg 程序组合时,结果正好是 30。
我相信它与接收乱序数据时的 dts/pts 更正有关,但 ffmpeg 程序也以类似的方式执行此操作。所以我不确定从这里去哪里。我查看了 ffmpeg 源代码并复制了他们的一些 dts 更正,但仍然没有成功。我在这里做错了什么?
bool mux_audio_video(const char* audio_filename, const char* video_filename, const char* output_filename){
av_register_all();
AVOutputFormat* out_format = NULL;
AVFormatContext* audio_context = NULL, *video_context = NULL, *output_context = NULL;
int video_index_in = -1, audio_index_in = -1;
int video_index_out = -1, audio_index_out = -1;
if(avformat_open_input(&audio_context, audio_filename, 0, 0) < 0)
return false;
if(avformat_find_stream_info(audio_context, 0) < 0){
avformat_close_input(&audio_context);
return false;
}
if(avformat_open_input(&video_context, video_filename, 0, 0) < 0){
avformat_close_input(&audio_context);
return false;
}
if(avformat_find_stream_info(video_context, 0) < 0){
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
if(avformat_alloc_output_context2(&output_context, av_guess_format("mp4", NULL, NULL), NULL, output_filename) < 0){
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
out_format = output_context->oformat;
//find first audio stream in the audio file input
for(size_t i = 0;i < audio_context->nb_streams;++i){
if(audio_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO){
audio_index_in = i;
AVStream* in_stream = audio_context->streams[i];
AVCodec* codec = avcodec_find_encoder(in_stream->codecpar->codec_id);
AVCodecContext* tmp = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(tmp, in_stream->codecpar);
AVStream* out_stream = avformat_new_stream(output_context, codec);
audio_index_out = out_stream->index;
if(output_context->oformat->flags & AVFMT_GLOBALHEADER){
tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
tmp->codec_tag = 0;
avcodec_parameters_from_context(out_stream->codecpar, tmp);
avcodec_free_context(&tmp);
break;
}
}
//find first video stream in the video file input
for(size_t i = 0;i < video_context->nb_streams;++i){
if(video_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO){
video_index_in = i;
AVStream* in_stream = video_context->streams[i];
AVCodec* codec = avcodec_find_encoder(in_stream->codecpar->codec_id);
AVCodecContext* tmp = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(tmp, in_stream->codecpar);
AVStream* out_stream = avformat_new_stream(output_context, codec);
video_index_out = out_stream->index;
if(output_context->oformat->flags & AVFMT_GLOBALHEADER){
tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
tmp->codec_tag = 0;
avcodec_parameters_from_context(out_stream->codecpar, tmp);
avcodec_free_context(&tmp);
break;
}
}
//setup output
if(!(out_format->flags & AVFMT_NOFILE)){
if(avio_open(&output_context->pb, output_filename, AVIO_FLAG_WRITE) < 0){
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
}
if(avformat_write_header(output_context, NULL) < 0){
if(!(out_format->flags & AVFMT_NOFILE)){
avio_close(output_context->pb);
}
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
int64_t video_pts = 0, audio_pts = 0;
int64_t last_video_dts = 0, last_audio_dts = 0;
while(true){
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
int64_t* last_dts;
AVFormatContext* in_context;
int stream_index = 0;
AVStream* in_stream, *out_stream;
//Read in a frame from the next stream
if(av_compare_ts(video_pts, video_context->streams[video_index_in]->time_base,
audio_pts, audio_context->streams[audio_index_in]->time_base) <= 0)
{
//video
last_dts = &last_video_dts;
in_context = video_context;
stream_index = video_index_out;
if(av_read_frame(in_context, &packet) >= 0){
do{
if(packet.stream_index == video_index_in){
video_pts = packet.pts;
break;
}
av_packet_unref(&packet);
}while(av_read_frame(in_context, &packet) >= 0);
}else{
break;
}
}else{
//audio
last_dts = &last_audio_dts;
in_context = audio_context;
stream_index = audio_index_out;
if(av_read_frame(in_context, &packet) >= 0){
do{
if(packet.stream_index == audio_index_in){
audio_pts = packet.pts;
break;
}
av_packet_unref(&packet);
}while(av_read_frame(in_context, &packet) >= 0);
}else{
break;
}
}
in_stream = in_context->streams[packet.stream_index];
out_stream = output_context->streams[stream_index];
av_packet_rescale_ts(&packet, in_stream->time_base, out_stream->time_base);
//if dts is out of order, ffmpeg throws an error. So manually fix. Similar to what ffmpeg does in ffmpeg.c
if(packet.dts < (*last_dts + !(output_context->oformat->flags & AVFMT_TS_NONSTRICT)) && packet.dts != AV_NOPTS_VALUE && (*last_dts) != AV_NOPTS_VALUE){
int64_t next_dts = (*last_dts)+1;
if(packet.pts >= packet.dts && packet.pts != AV_NOPTS_VALUE){
packet.pts = FFMAX(packet.pts, next_dts);
}
if(packet.pts == AV_NOPTS_VALUE){
packet.pts = next_dts;
}
packet.dts = next_dts;
}
(*last_dts) = packet.dts;
packet.pos = -1;
packet.stream_index = stream_index;
//output packet
if(av_interleaved_write_frame(output_context, &packet) < 0){
break;
}
av_packet_unref(&packet);
}
av_write_trailer(output_context);
//cleanup
if(!(out_format->flags & AVFMT_NOFILE)){
avio_close(output_context->pb);
}
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return true;
}
我发现了问题。我只需要将 last_video_dts
和 last_audio_dts
初始化为 int64_t 的最小值而不是 0.
int64_t last_video_dts, last_audio_dts;
last_video_dts = last_audio_dts = std::numeric_limits<int64_t>::lowest();
现在的输出与ffmpeg程序的输出基本一致
编辑:
正如kamilz所说,使用AV_NOPTS_VALUE.
更好更便携
int64_t last_video_dts, last_audio_dts;
last_video_dts = last_audio_dts = AV_NOPTS_VALUE;
我正在尝试创建一个函数,它将合并一个音频文件和一个视频文件并将它们输出到 mp4。除了输出的帧率不正确外,我已经成功地做到了这一点。与原版有非常细微的差别。 30.13 而它应该正好是 30。当我将这些文件与 ffmpeg 程序组合时,结果正好是 30。
我相信它与接收乱序数据时的 dts/pts 更正有关,但 ffmpeg 程序也以类似的方式执行此操作。所以我不确定从这里去哪里。我查看了 ffmpeg 源代码并复制了他们的一些 dts 更正,但仍然没有成功。我在这里做错了什么?
bool mux_audio_video(const char* audio_filename, const char* video_filename, const char* output_filename){
av_register_all();
AVOutputFormat* out_format = NULL;
AVFormatContext* audio_context = NULL, *video_context = NULL, *output_context = NULL;
int video_index_in = -1, audio_index_in = -1;
int video_index_out = -1, audio_index_out = -1;
if(avformat_open_input(&audio_context, audio_filename, 0, 0) < 0)
return false;
if(avformat_find_stream_info(audio_context, 0) < 0){
avformat_close_input(&audio_context);
return false;
}
if(avformat_open_input(&video_context, video_filename, 0, 0) < 0){
avformat_close_input(&audio_context);
return false;
}
if(avformat_find_stream_info(video_context, 0) < 0){
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
if(avformat_alloc_output_context2(&output_context, av_guess_format("mp4", NULL, NULL), NULL, output_filename) < 0){
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
out_format = output_context->oformat;
//find first audio stream in the audio file input
for(size_t i = 0;i < audio_context->nb_streams;++i){
if(audio_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO){
audio_index_in = i;
AVStream* in_stream = audio_context->streams[i];
AVCodec* codec = avcodec_find_encoder(in_stream->codecpar->codec_id);
AVCodecContext* tmp = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(tmp, in_stream->codecpar);
AVStream* out_stream = avformat_new_stream(output_context, codec);
audio_index_out = out_stream->index;
if(output_context->oformat->flags & AVFMT_GLOBALHEADER){
tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
tmp->codec_tag = 0;
avcodec_parameters_from_context(out_stream->codecpar, tmp);
avcodec_free_context(&tmp);
break;
}
}
//find first video stream in the video file input
for(size_t i = 0;i < video_context->nb_streams;++i){
if(video_context->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO){
video_index_in = i;
AVStream* in_stream = video_context->streams[i];
AVCodec* codec = avcodec_find_encoder(in_stream->codecpar->codec_id);
AVCodecContext* tmp = avcodec_alloc_context3(codec);
avcodec_parameters_to_context(tmp, in_stream->codecpar);
AVStream* out_stream = avformat_new_stream(output_context, codec);
video_index_out = out_stream->index;
if(output_context->oformat->flags & AVFMT_GLOBALHEADER){
tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
tmp->codec_tag = 0;
avcodec_parameters_from_context(out_stream->codecpar, tmp);
avcodec_free_context(&tmp);
break;
}
}
//setup output
if(!(out_format->flags & AVFMT_NOFILE)){
if(avio_open(&output_context->pb, output_filename, AVIO_FLAG_WRITE) < 0){
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
}
if(avformat_write_header(output_context, NULL) < 0){
if(!(out_format->flags & AVFMT_NOFILE)){
avio_close(output_context->pb);
}
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return false;
}
int64_t video_pts = 0, audio_pts = 0;
int64_t last_video_dts = 0, last_audio_dts = 0;
while(true){
AVPacket packet;
av_init_packet(&packet);
packet.data = NULL;
packet.size = 0;
int64_t* last_dts;
AVFormatContext* in_context;
int stream_index = 0;
AVStream* in_stream, *out_stream;
//Read in a frame from the next stream
if(av_compare_ts(video_pts, video_context->streams[video_index_in]->time_base,
audio_pts, audio_context->streams[audio_index_in]->time_base) <= 0)
{
//video
last_dts = &last_video_dts;
in_context = video_context;
stream_index = video_index_out;
if(av_read_frame(in_context, &packet) >= 0){
do{
if(packet.stream_index == video_index_in){
video_pts = packet.pts;
break;
}
av_packet_unref(&packet);
}while(av_read_frame(in_context, &packet) >= 0);
}else{
break;
}
}else{
//audio
last_dts = &last_audio_dts;
in_context = audio_context;
stream_index = audio_index_out;
if(av_read_frame(in_context, &packet) >= 0){
do{
if(packet.stream_index == audio_index_in){
audio_pts = packet.pts;
break;
}
av_packet_unref(&packet);
}while(av_read_frame(in_context, &packet) >= 0);
}else{
break;
}
}
in_stream = in_context->streams[packet.stream_index];
out_stream = output_context->streams[stream_index];
av_packet_rescale_ts(&packet, in_stream->time_base, out_stream->time_base);
//if dts is out of order, ffmpeg throws an error. So manually fix. Similar to what ffmpeg does in ffmpeg.c
if(packet.dts < (*last_dts + !(output_context->oformat->flags & AVFMT_TS_NONSTRICT)) && packet.dts != AV_NOPTS_VALUE && (*last_dts) != AV_NOPTS_VALUE){
int64_t next_dts = (*last_dts)+1;
if(packet.pts >= packet.dts && packet.pts != AV_NOPTS_VALUE){
packet.pts = FFMAX(packet.pts, next_dts);
}
if(packet.pts == AV_NOPTS_VALUE){
packet.pts = next_dts;
}
packet.dts = next_dts;
}
(*last_dts) = packet.dts;
packet.pos = -1;
packet.stream_index = stream_index;
//output packet
if(av_interleaved_write_frame(output_context, &packet) < 0){
break;
}
av_packet_unref(&packet);
}
av_write_trailer(output_context);
//cleanup
if(!(out_format->flags & AVFMT_NOFILE)){
avio_close(output_context->pb);
}
avformat_free_context(output_context);
avformat_close_input(&audio_context);
avformat_close_input(&video_context);
return true;
}
我发现了问题。我只需要将 last_video_dts
和 last_audio_dts
初始化为 int64_t 的最小值而不是 0.
int64_t last_video_dts, last_audio_dts;
last_video_dts = last_audio_dts = std::numeric_limits<int64_t>::lowest();
现在的输出与ffmpeg程序的输出基本一致
编辑:
正如kamilz所说,使用AV_NOPTS_VALUE.
更好更便携int64_t last_video_dts, last_audio_dts;
last_video_dts = last_audio_dts = AV_NOPTS_VALUE;