转码后的视频流无法在 QuickTime 播放器中播放
Transcoded video stream unplayable in QuickTime player
目前我正在编写使用 ffmpeg 库对媒体文件进行转码的软件。问题是在 H264 QuickTime 的情况下无法播放结果流并显示黑屏。音频流按预期工作。我读到 QuickTime 只能处理 yuv420p
像素格式,对于编码视频也是如此。
我查看了 ffmpeg 示例和 ffmpeg 源代码,但没有找到任何可能存在问题的线索。如果有任何帮助,我将不胜感激。
我唯一设法从 QuickTime 得到的是
SeqAndPicParamSetFromCFDictionaryRef, bad config record
控制台中的消息。 AVPlayer 从 AVFoundation 记录了同样的事情。
这里是输出流和编码器的初始化。
int status;
// avformat_alloc_output_context2()
if ((status = formatContext.open(destFilename)) < 0) {
return status;
}
AVDictionary *fmtOptions = nullptr;
av_dict_set(&fmtOptions, "movflags", "faststart", 0);
av_dict_set(&fmtOptions, "brand", "mp42", 0);
streams.resize(input->getStreamsCount());
for (int i = 0; i < input->getStreamsCount(); ++i) {
AVStream *inputStream = input->getStreamAtIndex(i);
CodecContext &decoderContext = input->getDecoderAtIndex(i);
// retrieve output codec by codec id
auto encoderCodecId = decoderContext.getCodecID();;
if (decoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO || decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
int codecIdKey = decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO ? IPROC_KEY_INT(TargetAudioCodecID) : IPROC_KEY_INT(TargetVideoCodecID);
auto codecIdParam = static_cast<AVCodecID>(params[codecIdKey]);
if (codecIdParam != AV_CODEC_ID_NONE) {
encoderCodecId = codecIdParam;
}
}
AVCodec *encoder = nullptr;
if ((encoder = avcodec_find_encoder(encoderCodecId)) == nullptr) {
status = AVERROR_ENCODER_NOT_FOUND;
return status;
}
// create stream with specific codec and format
AVStream *outputStream = nullptr;
// avformat_new_stream()
if ((outputStream = formatContext.newStream(encoder)) == nullptr) {
return AVERROR(ENOMEM);
}
CodecContext encoderContext;
// avcodec_alloc_context3()
if ((status = encoderContext.init(encoder)) < 0) {
return status;
}
outputStream->disposition = inputStream->disposition;
encoderContext.getRawCtx()->chroma_sample_location = decoderContext.getRawCtx()->chroma_sample_location;
if (encoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
// prepare encoder context
int targetWidth = params[IPROC_KEY_INT(TargetVideoWidth)];
int targetHeight = params[IPROC_KEY_INT(TargetVideHeight)];
encoderContext.width() = targetWidth > 0 ? targetWidth : decoderContext.width();
encoderContext.height() = targetHeight > 0 ? targetHeight : decoderContext.height();
encoderContext.pixelFormat() = encoder->pix_fmts ? encoder->pix_fmts[0] : decoderContext.pixelFormat();;
encoderContext.timeBase() = decoderContext.timeBase();
encoderContext.getRawCtx()->level = 31;
encoderContext.getRawCtx()->gop_size = 25;
double far = static_cast<double>(encoderContext.getRawCtx()->width) / encoderContext.getRawCtx()->height;
double dar = static_cast<double>(decoderContext.width()) / decoderContext.height();
encoderContext.sampleAspectRatio() = av_d2q(dar / far, 255);
encoderContext.getRawCtx()->bits_per_raw_sample = FFMIN(decoderContext.getRawCtx()->bits_per_raw_sample,
av_pix_fmt_desc_get(encoderContext.pixelFormat())->comp[0].depth);
encoderContext.getRawCtx()->framerate = inputStream->r_frame_rate;
outputStream->avg_frame_rate = encoderContext.getRawCtx()->framerate;
VideoFilterGraphParameters params;
params.height = encoderContext.height();
params.width = encoderContext.width();
params.pixelFormat = encoderContext.pixelFormat();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
} else if (encoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
encoderContext.sampleRate() = params[IPROC_KEY_INT(TargetAudioSampleRate)] ? : decoderContext.sampleRate();
encoderContext.channels() = params[IPROC_KEY_INT(TargetAudioChannels)] ? : decoderContext.channels();
auto paramChannelLayout = params[IPROC_KEY_INT(TargetAudioChannelLayout)];
if (paramChannelLayout) {
encoderContext.channelLayout() = paramChannelLayout;
} else {
encoderContext.channelLayout() = av_get_default_channel_layout(encoderContext.channels());
}
AVSampleFormat sampleFormatParam = static_cast<AVSampleFormat>(params[IPROC_KEY_INT(TargetAudioSampleFormat)]);
if (sampleFormatParam != AV_SAMPLE_FMT_NONE) {
encoderContext.sampleFormat() = sampleFormatParam;
} else if (encoder->sample_fmts) {
encoderContext.sampleFormat() = encoder->sample_fmts[0];
} else {
encoderContext.sampleFormat() = decoderContext.sampleFormat();
}
encoderContext.timeBase().num = 1;
encoderContext.timeBase().den = encoderContext.sampleRate();
AudioFilterGraphParameters params;
params.channelLayout = encoderContext.channelLayout();
params.channels = encoderContext.channels();
params.format = encoderContext.sampleFormat();
params.sampleRate = encoderContext.sampleRate();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
// avcodec_open2()
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}
if (streams[i].filterGraph) {
streams[i].filterGraph.setOutputFrameSize(encoderContext.getFrameSize());
}
// avcodec_parameters_from_context()
if ((status = encoderContext.fillParamters(outputStream->codecpar)) < 0) {
return status;
}
outputStream->codecpar->format = encoderContext.getRawCtx()->pix_fmt;
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (encoderContext.getRawCtx()->nb_coded_side_data) {
int i;
for (i = 0; i < encoderContext.getRawCtx()->nb_coded_side_data; i++) {
const AVPacketSideData *sd_src = &encoderContext.getRawCtx()->coded_side_data[i];
uint8_t *dst_data;
dst_data = av_stream_new_side_data(outputStream, sd_src->type, sd_src->size);
if (!dst_data)
return AVERROR(ENOMEM);
memcpy(dst_data, sd_src->data, sd_src->size);
}
}
/*
* Add global input side data. For now this is naive, and copies it
* from the input stream's global side data. All side data should
* really be funneled over AVFrame and libavfilter, then added back to
* packet side data, and then potentially using the first packet for
* global side data.
*/
for (int i = 0; i < inputStream->nb_side_data; i++) {
AVPacketSideData *sd = &inputStream->side_data[i];
uint8_t *dst = av_stream_new_side_data(outputStream, sd->type, sd->size);
if (!dst)
return AVERROR(ENOMEM);
memcpy(dst, sd->data, sd->size);
}
// copy timebase while removing common factors
if (outputStream->time_base.num <= 0 || outputStream->time_base.den <= 0) {
outputStream->time_base = av_add_q(encoderContext.timeBase(), (AVRational){0, 1});
}
// copy estimated duration as a hint to the muxer
if (outputStream->duration <= 0 && inputStream->duration > 0) {
outputStream->duration = av_rescale_q(inputStream->duration, inputStream->time_base, outputStream->time_base);
}
streams[i].codecType = encoderContext.getRawCtx()->codec_type;
streams[i].codec = std::move(encoderContext);
streams[i].streamIndex = i;
}
// avio_open() and avformat_write_header()
if ((status = formatContext.writeHeader(fmtOptions)) < 0) {
return status;
}
formatContext.dumpFormat();
正在从流中读取。
int InputProcessor::performStep() {
int status;
Packet nextPacket;
if ((status = input->getFormatContext().readFrame(nextPacket)) < 0) {
return status;
}
++streams[nextPacket.getStreamIndex()].readPackets;
int streamIndex = nextPacket.getStreamIndex();
CodecContext &decoder = input->getDecoderAtIndex(streamIndex);
AVStream *inputStream = input->getStreamAtIndex(streamIndex);
if (streams[nextPacket.getStreamIndex()].readPackets == 1) {
for (int i = 0; i < inputStream->nb_side_data; ++i) {
AVPacketSideData *src_sd = &inputStream->side_data[i];
uint8_t *dst_data;
if (src_sd->type == AV_PKT_DATA_DISPLAYMATRIX) {
continue;
}
if (av_packet_get_side_data(nextPacket.getRawPtr(), src_sd->type, nullptr)) {
continue;
}
dst_data = av_packet_new_side_data(nextPacket.getRawPtr(), src_sd->type, src_sd->size);
if (!dst_data) {
return AVERROR(ENOMEM);
}
memcpy(dst_data, src_sd->data, src_sd->size);
}
}
nextPacket.rescaleTimestamps(inputStream->time_base, decoder.timeBase());
status = decodePacket(&nextPacket, nextPacket.getStreamIndex());
if (status < 0 && status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
这是 decoding/encoding 代码。
int InputProcessor::decodePacket(Packet *packet, int streamIndex) {
int status;
int sendStatus;
auto &decoder = input->getDecoderAtIndex(streamIndex);
do {
if (packet == nullptr) {
sendStatus = decoder.flushDecodedFrames();
} else {
sendStatus = decoder.sendPacket(*packet);
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return sendStatus;
}
if (sendStatus == 0 && packet) {
++streams[streamIndex].decodedPackets;
}
Frame decodedFrame;
while (true) {
if ((status = decoder.receiveFrame(decodedFrame)) < 0) {
break;
}
++streams[streamIndex].decodedFrames;
if ((status = filterAndWriteFrame(&decodedFrame, streamIndex)) < 0) {
break;
}
decodedFrame.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
return status;
}
int InputProcessor::encodeAndWriteFrame(Frame *frame, int streamIndex) {
assert(input->isValid());
assert(formatContext);
int status = 0;
int sendStatus;
Packet packet;
CodecContext &encoderContext = streams[streamIndex].codec;
do {
if (frame) {
sendStatus = encoderContext.sendFrame(*frame);
} else {
sendStatus = encoderContext.flushEncodedPackets();
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return status;
}
if (sendStatus == 0 && frame) {
++streams[streamIndex].encodedFrames;
}
while (true) {
if ((status = encoderContext.receivePacket(packet)) < 0) {
break;
}
++streams[streamIndex].encodedPackets;
packet.setStreamIndex(streamIndex);
auto sourceTimebase = encoderContext.timeBase();
auto dstTimebase = formatContext.getStreams()[streamIndex]->time_base;
packet.rescaleTimestamps(sourceTimebase, dstTimebase);
if ((status = formatContext.writeFrameInterleaved(packet)) < 0) {
return status;
}
packet.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
if (status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
原始视频的 FFprobe 输出。
Input #0, matroska,webm, from 'testvideo':
Metadata:
title : TestVideo
encoder : libebml v1.3.0 + libmatroska v1.4.0
creation_time : 2014-12-23T03:38:05.000000Z
Duration: 00:02:29.25, start: 0.000000, bitrate: 79549 kb/s
Stream #0:0(rus): Video: h264 (High 4:4:4 Predictive), yuv444p10le(pc, bt709, progressive), 2048x858 [SAR 1:1 DAR 1024:429], 24 fps, 24 tbr, 1k tbn, 48 tbc (default)
Stream #0:1(rus): Audio: pcm_s24le, 48000 Hz, 6 channels, s32 (24 bit), 6912 kb/s (default)
已转码:
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '123.mp4':
Metadata:
major_brand : mp42
minor_version : 512
compatible_brands: isomiso2avc1mp41
encoder : Lavf57.71.100
Duration: 00:02:29.27, start: 0.000000, bitrate: 4282 kb/s
Stream #0:0(rus): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 1280x720 [SAR 192:143 DAR 1024:429], 3940 kb/s, 24.01 fps, 24 tbr, 12288 tbn, 96 tbc (default)
Metadata:
handler_name : VideoHandler
Stream #0:1(rus): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, 5.1, fltp, 336 kb/s (default)
Metadata:
handler_name : SoundHandler
问题出在初始化编码器时步骤顺序错误。在 transcoding.c
示例中,他们在调用 avcodec_open2()
后将 CODEC_FLAG_GLOBAL_HEADER
分配给 AVCodecContext.flags
属性。我认为这是正确的,并在我的代码中做了同样的事情。它导致 extradata
字段未初始化,QuickTime 无法解析结果流。在打开编解码器之前设置标志解决了问题。
结果代码:
// should be placed before avcodec_open2
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}
目前我正在编写使用 ffmpeg 库对媒体文件进行转码的软件。问题是在 H264 QuickTime 的情况下无法播放结果流并显示黑屏。音频流按预期工作。我读到 QuickTime 只能处理 yuv420p
像素格式,对于编码视频也是如此。
我查看了 ffmpeg 示例和 ffmpeg 源代码,但没有找到任何可能存在问题的线索。如果有任何帮助,我将不胜感激。
我唯一设法从 QuickTime 得到的是
SeqAndPicParamSetFromCFDictionaryRef, bad config record
控制台中的消息。 AVPlayer 从 AVFoundation 记录了同样的事情。
这里是输出流和编码器的初始化。
int status;
// avformat_alloc_output_context2()
if ((status = formatContext.open(destFilename)) < 0) {
return status;
}
AVDictionary *fmtOptions = nullptr;
av_dict_set(&fmtOptions, "movflags", "faststart", 0);
av_dict_set(&fmtOptions, "brand", "mp42", 0);
streams.resize(input->getStreamsCount());
for (int i = 0; i < input->getStreamsCount(); ++i) {
AVStream *inputStream = input->getStreamAtIndex(i);
CodecContext &decoderContext = input->getDecoderAtIndex(i);
// retrieve output codec by codec id
auto encoderCodecId = decoderContext.getCodecID();;
if (decoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO || decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
int codecIdKey = decoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO ? IPROC_KEY_INT(TargetAudioCodecID) : IPROC_KEY_INT(TargetVideoCodecID);
auto codecIdParam = static_cast<AVCodecID>(params[codecIdKey]);
if (codecIdParam != AV_CODEC_ID_NONE) {
encoderCodecId = codecIdParam;
}
}
AVCodec *encoder = nullptr;
if ((encoder = avcodec_find_encoder(encoderCodecId)) == nullptr) {
status = AVERROR_ENCODER_NOT_FOUND;
return status;
}
// create stream with specific codec and format
AVStream *outputStream = nullptr;
// avformat_new_stream()
if ((outputStream = formatContext.newStream(encoder)) == nullptr) {
return AVERROR(ENOMEM);
}
CodecContext encoderContext;
// avcodec_alloc_context3()
if ((status = encoderContext.init(encoder)) < 0) {
return status;
}
outputStream->disposition = inputStream->disposition;
encoderContext.getRawCtx()->chroma_sample_location = decoderContext.getRawCtx()->chroma_sample_location;
if (encoderContext.getCodecType() == AVMEDIA_TYPE_VIDEO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
// prepare encoder context
int targetWidth = params[IPROC_KEY_INT(TargetVideoWidth)];
int targetHeight = params[IPROC_KEY_INT(TargetVideHeight)];
encoderContext.width() = targetWidth > 0 ? targetWidth : decoderContext.width();
encoderContext.height() = targetHeight > 0 ? targetHeight : decoderContext.height();
encoderContext.pixelFormat() = encoder->pix_fmts ? encoder->pix_fmts[0] : decoderContext.pixelFormat();;
encoderContext.timeBase() = decoderContext.timeBase();
encoderContext.getRawCtx()->level = 31;
encoderContext.getRawCtx()->gop_size = 25;
double far = static_cast<double>(encoderContext.getRawCtx()->width) / encoderContext.getRawCtx()->height;
double dar = static_cast<double>(decoderContext.width()) / decoderContext.height();
encoderContext.sampleAspectRatio() = av_d2q(dar / far, 255);
encoderContext.getRawCtx()->bits_per_raw_sample = FFMIN(decoderContext.getRawCtx()->bits_per_raw_sample,
av_pix_fmt_desc_get(encoderContext.pixelFormat())->comp[0].depth);
encoderContext.getRawCtx()->framerate = inputStream->r_frame_rate;
outputStream->avg_frame_rate = encoderContext.getRawCtx()->framerate;
VideoFilterGraphParameters params;
params.height = encoderContext.height();
params.width = encoderContext.width();
params.pixelFormat = encoderContext.pixelFormat();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
} else if (encoderContext.getCodecType() == AVMEDIA_TYPE_AUDIO) {
auto lang = av_dict_get(input->getStreamAtIndex(i)->metadata, "language", nullptr, 0);
if (lang) {
av_dict_set(&outputStream->metadata, "language", lang->value, 0);
}
encoderContext.sampleRate() = params[IPROC_KEY_INT(TargetAudioSampleRate)] ? : decoderContext.sampleRate();
encoderContext.channels() = params[IPROC_KEY_INT(TargetAudioChannels)] ? : decoderContext.channels();
auto paramChannelLayout = params[IPROC_KEY_INT(TargetAudioChannelLayout)];
if (paramChannelLayout) {
encoderContext.channelLayout() = paramChannelLayout;
} else {
encoderContext.channelLayout() = av_get_default_channel_layout(encoderContext.channels());
}
AVSampleFormat sampleFormatParam = static_cast<AVSampleFormat>(params[IPROC_KEY_INT(TargetAudioSampleFormat)]);
if (sampleFormatParam != AV_SAMPLE_FMT_NONE) {
encoderContext.sampleFormat() = sampleFormatParam;
} else if (encoder->sample_fmts) {
encoderContext.sampleFormat() = encoder->sample_fmts[0];
} else {
encoderContext.sampleFormat() = decoderContext.sampleFormat();
}
encoderContext.timeBase().num = 1;
encoderContext.timeBase().den = encoderContext.sampleRate();
AudioFilterGraphParameters params;
params.channelLayout = encoderContext.channelLayout();
params.channels = encoderContext.channels();
params.format = encoderContext.sampleFormat();
params.sampleRate = encoderContext.sampleRate();
if ((status = generateGraph(decoderContext, encoderContext, params, streams[i].filterGraph)) < 0) {
return status;
}
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
// avcodec_open2()
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}
if (streams[i].filterGraph) {
streams[i].filterGraph.setOutputFrameSize(encoderContext.getFrameSize());
}
// avcodec_parameters_from_context()
if ((status = encoderContext.fillParamters(outputStream->codecpar)) < 0) {
return status;
}
outputStream->codecpar->format = encoderContext.getRawCtx()->pix_fmt;
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
if (encoderContext.getRawCtx()->nb_coded_side_data) {
int i;
for (i = 0; i < encoderContext.getRawCtx()->nb_coded_side_data; i++) {
const AVPacketSideData *sd_src = &encoderContext.getRawCtx()->coded_side_data[i];
uint8_t *dst_data;
dst_data = av_stream_new_side_data(outputStream, sd_src->type, sd_src->size);
if (!dst_data)
return AVERROR(ENOMEM);
memcpy(dst_data, sd_src->data, sd_src->size);
}
}
/*
* Add global input side data. For now this is naive, and copies it
* from the input stream's global side data. All side data should
* really be funneled over AVFrame and libavfilter, then added back to
* packet side data, and then potentially using the first packet for
* global side data.
*/
for (int i = 0; i < inputStream->nb_side_data; i++) {
AVPacketSideData *sd = &inputStream->side_data[i];
uint8_t *dst = av_stream_new_side_data(outputStream, sd->type, sd->size);
if (!dst)
return AVERROR(ENOMEM);
memcpy(dst, sd->data, sd->size);
}
// copy timebase while removing common factors
if (outputStream->time_base.num <= 0 || outputStream->time_base.den <= 0) {
outputStream->time_base = av_add_q(encoderContext.timeBase(), (AVRational){0, 1});
}
// copy estimated duration as a hint to the muxer
if (outputStream->duration <= 0 && inputStream->duration > 0) {
outputStream->duration = av_rescale_q(inputStream->duration, inputStream->time_base, outputStream->time_base);
}
streams[i].codecType = encoderContext.getRawCtx()->codec_type;
streams[i].codec = std::move(encoderContext);
streams[i].streamIndex = i;
}
// avio_open() and avformat_write_header()
if ((status = formatContext.writeHeader(fmtOptions)) < 0) {
return status;
}
formatContext.dumpFormat();
正在从流中读取。
int InputProcessor::performStep() {
int status;
Packet nextPacket;
if ((status = input->getFormatContext().readFrame(nextPacket)) < 0) {
return status;
}
++streams[nextPacket.getStreamIndex()].readPackets;
int streamIndex = nextPacket.getStreamIndex();
CodecContext &decoder = input->getDecoderAtIndex(streamIndex);
AVStream *inputStream = input->getStreamAtIndex(streamIndex);
if (streams[nextPacket.getStreamIndex()].readPackets == 1) {
for (int i = 0; i < inputStream->nb_side_data; ++i) {
AVPacketSideData *src_sd = &inputStream->side_data[i];
uint8_t *dst_data;
if (src_sd->type == AV_PKT_DATA_DISPLAYMATRIX) {
continue;
}
if (av_packet_get_side_data(nextPacket.getRawPtr(), src_sd->type, nullptr)) {
continue;
}
dst_data = av_packet_new_side_data(nextPacket.getRawPtr(), src_sd->type, src_sd->size);
if (!dst_data) {
return AVERROR(ENOMEM);
}
memcpy(dst_data, src_sd->data, src_sd->size);
}
}
nextPacket.rescaleTimestamps(inputStream->time_base, decoder.timeBase());
status = decodePacket(&nextPacket, nextPacket.getStreamIndex());
if (status < 0 && status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
这是 decoding/encoding 代码。
int InputProcessor::decodePacket(Packet *packet, int streamIndex) {
int status;
int sendStatus;
auto &decoder = input->getDecoderAtIndex(streamIndex);
do {
if (packet == nullptr) {
sendStatus = decoder.flushDecodedFrames();
} else {
sendStatus = decoder.sendPacket(*packet);
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return sendStatus;
}
if (sendStatus == 0 && packet) {
++streams[streamIndex].decodedPackets;
}
Frame decodedFrame;
while (true) {
if ((status = decoder.receiveFrame(decodedFrame)) < 0) {
break;
}
++streams[streamIndex].decodedFrames;
if ((status = filterAndWriteFrame(&decodedFrame, streamIndex)) < 0) {
break;
}
decodedFrame.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
return status;
}
int InputProcessor::encodeAndWriteFrame(Frame *frame, int streamIndex) {
assert(input->isValid());
assert(formatContext);
int status = 0;
int sendStatus;
Packet packet;
CodecContext &encoderContext = streams[streamIndex].codec;
do {
if (frame) {
sendStatus = encoderContext.sendFrame(*frame);
} else {
sendStatus = encoderContext.flushEncodedPackets();
}
if (sendStatus < 0 && sendStatus != AVERROR(EAGAIN) && sendStatus != AVERROR_EOF) {
return status;
}
if (sendStatus == 0 && frame) {
++streams[streamIndex].encodedFrames;
}
while (true) {
if ((status = encoderContext.receivePacket(packet)) < 0) {
break;
}
++streams[streamIndex].encodedPackets;
packet.setStreamIndex(streamIndex);
auto sourceTimebase = encoderContext.timeBase();
auto dstTimebase = formatContext.getStreams()[streamIndex]->time_base;
packet.rescaleTimestamps(sourceTimebase, dstTimebase);
if ((status = formatContext.writeFrameInterleaved(packet)) < 0) {
return status;
}
packet.unref();
}
} while (sendStatus == AVERROR(EAGAIN));
if (status != AVERROR(EAGAIN)) {
return status;
}
return 0;
}
原始视频的 FFprobe 输出。
Input #0, matroska,webm, from 'testvideo':
Metadata:
title : TestVideo
encoder : libebml v1.3.0 + libmatroska v1.4.0
creation_time : 2014-12-23T03:38:05.000000Z
Duration: 00:02:29.25, start: 0.000000, bitrate: 79549 kb/s
Stream #0:0(rus): Video: h264 (High 4:4:4 Predictive), yuv444p10le(pc, bt709, progressive), 2048x858 [SAR 1:1 DAR 1024:429], 24 fps, 24 tbr, 1k tbn, 48 tbc (default)
Stream #0:1(rus): Audio: pcm_s24le, 48000 Hz, 6 channels, s32 (24 bit), 6912 kb/s (default)
已转码:
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '123.mp4':
Metadata:
major_brand : mp42
minor_version : 512
compatible_brands: isomiso2avc1mp41
encoder : Lavf57.71.100
Duration: 00:02:29.27, start: 0.000000, bitrate: 4282 kb/s
Stream #0:0(rus): Video: h264 (High) (avc1 / 0x31637661), yuv420p, 1280x720 [SAR 192:143 DAR 1024:429], 3940 kb/s, 24.01 fps, 24 tbr, 12288 tbn, 96 tbc (default)
Metadata:
handler_name : VideoHandler
Stream #0:1(rus): Audio: aac (LC) (mp4a / 0x6134706D), 48000 Hz, 5.1, fltp, 336 kb/s (default)
Metadata:
handler_name : SoundHandler
问题出在初始化编码器时步骤顺序错误。在 transcoding.c
示例中,他们在调用 avcodec_open2()
后将 CODEC_FLAG_GLOBAL_HEADER
分配给 AVCodecContext.flags
属性。我认为这是正确的,并在我的代码中做了同样的事情。它导致 extradata
字段未初始化,QuickTime 无法解析结果流。在打开编解码器之前设置标志解决了问题。
结果代码:
// should be placed before avcodec_open2
if (formatContext.getRawCtx()->oformat->flags & AVFMT_GLOBALHEADER) {
encoderContext.getRawCtx()->flags |= CODEC_FLAG_GLOBAL_HEADER;
}
// before using encoder, we should open it and update its parameters
printf("Codec bits per sample %d\n", av_get_bits_per_sample(encoderCodecId));
AVDictionary *options = nullptr;
if ((status = encoderContext.open(encoder, &options)) < 0) {
return status;
}