使用 FFMPEG LibAV 重采样音频
Resampling audio with FFMPEG LibAV
好吧,因为 FFMPEG 文档和代码示例绝对是垃圾,我想我唯一的选择就是到这里来 aks。
所以我想做的只是从麦克风录制音频并将其写入文件。所以我初始化我的输入和输出格式,我得到一个音频数据包对其进行解码、重新采样、编码和写入。但每次我尝试播放和播放音频时,都只有一个数据存根。似乎出于某种原因它只写了一个起始数据包。这仍然很奇怪,让我解释一下原因:
if((response = swr_config_frame(resampleContext, audioOutputFrame, frame) < 0)) qDebug() << "can't configure frame!" << av_make_error(response);
if((response = swr_convert_frame(resampleContext, audioOutputFrame, frame) < 0)) qDebug() << "can't resample frame!" << av_make_error(response);
这是我用来重新采样的代码。我的 frame
有数据,但 swr_convert_frame
将空数据写入 audioOutputFrame
我该如何解决? FFMPEG 真的快把我逼疯了。
这是我的 class
的完整代码
VideoReader.h
#ifndef VIDEOREADER_H
#define VIDEOREADER_H
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavdevice/avdevice.h>
#include "libavutil/audio_fifo.h"
#include "libavformat/avio.h"
#include "libswresample/swresample.h"
#include <inttypes.h>
}
#include <QString>
#include <QElapsedTimer>
class VideoReader
{
public:
VideoReader();
bool open(const char* filename);
bool fillFrame();
bool readFrame(uint8_t *&frameData);
void close();
int width, height;
private:
bool configInput();
bool configOutput(const char *filename);
bool configResampler();
bool encode(AVFrame *frame, AVCodecContext *encoderContext, AVPacket *outputPacket, int streamIndex, QString type);
int audioStreamIndex = -1;
int videoStreamIndex = -1;
int64_t videoStartPts = 0;
int64_t audioStartPts = 0;
AVFormatContext* inputFormatContext = nullptr;
AVFormatContext* outputFormatContext = nullptr;
AVCodecContext* videoDecoderContext = nullptr;
AVCodecContext* videoEncoderContext = nullptr;
AVCodecContext* audioDecoderContext = nullptr;
AVCodecContext* audioEncoderContext = nullptr;
AVFrame* videoInputFrame = nullptr;
AVFrame* audioInputFrame = nullptr;
AVFrame* videoOutputFrame = nullptr;
AVFrame* audioOutputFrame = nullptr;
AVPacket* inputPacket = nullptr;
AVPacket* videoOutputPacket = nullptr;
AVPacket* audioOutputPacket = nullptr;
SwsContext* innerScaleContext = nullptr;
SwsContext* outerScaleContext = nullptr;
SwrContext *resampleContext = nullptr;
};
#endif // VIDEOREADER_H
VideoReader.cpp
#include "VideoReader.h"
#include <QDebug>
static const char* av_make_error(int errnum)
{
static char str[AV_ERROR_MAX_STRING_SIZE];
memset(str, 0, sizeof(str));
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
VideoReader::VideoReader()
{
}
bool VideoReader::open(const char *filename)
{
if(!configInput()) return false;
if(!configOutput(filename)) return false;
if(!configResampler()) return false;
return true;
}
bool VideoReader::fillFrame()
{
auto convertToYUV = [=](AVFrame* frame)
{
int response = 0;
if((response = sws_scale(outerScaleContext, frame->data, frame->linesize, 0, videoEncoderContext->height, videoOutputFrame->data, videoOutputFrame->linesize)) < 0) qDebug() << "can't rescale" << av_make_error(response);
};
auto convertAudio = [this](AVFrame* frame)
{
int response = 0;
auto& out = audioOutputFrame;
qDebug() << out->linesize[0] << out->nb_samples;
if((response = swr_convert_frame(resampleContext, audioOutputFrame, frame)) < 0) qDebug() << "can't resample frame!" << av_make_error(response);
qDebug() << "poop";
};
auto decodeEncode = [=](AVPacket* inputPacket, AVFrame* inputFrame, AVCodecContext* decoderContext,
AVPacket* outputPacket, AVFrame* outputFrame, AVCodecContext* encoderContext,
std::function<void (AVFrame*)> convertFunc,
int streamIndex, int64_t startPts, QString type)
{
int response = avcodec_send_packet(decoderContext, inputPacket);
if(response < 0) { qDebug() << "failed to send" << type << "packet!" << av_make_error(response); return false; }
response = avcodec_receive_frame(decoderContext, inputFrame);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(inputPacket); return false; }
else if (response < 0) { qDebug() << "failed to decode" << type << "frame!" << response << av_make_error(response); return false; }
if(encoderContext)
{
outputFrame->pts = inputPacket->pts - startPts;
convertFunc(inputFrame);
if(!encode(outputFrame, encoderContext, outputPacket, streamIndex, type)) return false;
}
av_packet_unref(inputPacket);
return true;
};
while(av_read_frame(inputFormatContext, inputPacket) >= 0) //actually read packet
{
if(inputPacket->stream_index == videoStreamIndex)
{
if(!videoStartPts) videoStartPts = inputPacket->pts;
if(decodeEncode(inputPacket, videoInputFrame, videoDecoderContext, videoOutputPacket, videoOutputFrame, videoEncoderContext, convertToYUV, videoStreamIndex, videoStartPts, "video")) break;
}
else if(inputPacket->stream_index == audioStreamIndex)
{
if(!audioStartPts) audioStartPts = inputPacket->pts;
if(decodeEncode(inputPacket, audioInputFrame, audioDecoderContext, audioOutputPacket, audioOutputFrame, audioEncoderContext, convertAudio, audioStreamIndex, audioStartPts, "audio")) break;
}
}
return true;
}
bool VideoReader::readFrame(uint8_t *&frameData)
{
if(!fillFrame()) { qDebug() << "readFrame method failed!"; return false; };
const int bytesPerPixel = 4;
uint8_t* destination[bytesPerPixel] = {frameData, NULL, NULL, NULL};
int destinationLinesize[bytesPerPixel] = { videoInputFrame->width * bytesPerPixel, 0, 0, 0};
sws_scale(innerScaleContext, videoInputFrame->data, videoInputFrame->linesize, 0, videoInputFrame->height, destination, destinationLinesize);
return true;
}
void VideoReader::close()
{
encode(NULL, videoEncoderContext, videoOutputPacket, videoStreamIndex, "video");
encode(NULL, audioEncoderContext, audioOutputPacket, audioStreamIndex, "audio");
if(av_write_trailer(outputFormatContext) < 0) { qDebug() << "failed to write trailer"; };
avformat_close_input(&outputFormatContext);
avformat_free_context(outputFormatContext);
avformat_close_input(&inputFormatContext);
avformat_free_context(inputFormatContext);
av_frame_free(&videoInputFrame);
av_frame_free(&audioInputFrame);
av_frame_free(&videoOutputFrame);
av_frame_free(&audioOutputFrame);
av_packet_free(&inputPacket);
av_packet_free(&videoOutputPacket);
av_packet_free(&audioOutputPacket);
avcodec_free_context(&videoDecoderContext);
avcodec_free_context(&videoEncoderContext);
avcodec_free_context(&audioDecoderContext);
avcodec_free_context(&audioEncoderContext);
sws_freeContext(innerScaleContext);
sws_freeContext(outerScaleContext);
swr_free(&resampleContext);
}
bool VideoReader::configInput()
{
avdevice_register_all();
inputFormatContext = avformat_alloc_context();
if(!inputFormatContext) { qDebug() << "can't create context!"; return false; }
const char* inputFormatName = "dshow";/*"gdigrab"*/
AVInputFormat* inputFormat = av_find_input_format(inputFormatName);
if(!inputFormat){ qDebug() << "Can't find" << inputFormatName; return false; }
AVDictionary* options = NULL;
av_dict_set(&options, "framerate", "30", 0);
av_dict_set(&options, "video_size", "1920x1080", 0);
if(avformat_open_input(&inputFormatContext, "video=HD USB Camera:audio=Microphone (High Definition Audio Device)" /*"desktop"*/, inputFormat, &options) != 0) { qDebug() << "can't open video file!"; return false; }
AVCodecParameters* videoCodecParams = nullptr;
AVCodecParameters* audioCodecParams = nullptr;
AVCodec* videoDecoder = nullptr;
AVCodec* audioDecoder = nullptr;
for (uint i = 0; i < inputFormatContext->nb_streams; ++i)
{
auto stream = inputFormatContext->streams[i];
auto codecParams = stream->codecpar;
if(codecParams->codec_type == AVMEDIA_TYPE_AUDIO) { audioStreamIndex = i; audioDecoder = avcodec_find_decoder(codecParams->codec_id); audioCodecParams = codecParams; }
if(codecParams->codec_type == AVMEDIA_TYPE_VIDEO) { videoStreamIndex = i; videoDecoder = avcodec_find_decoder(codecParams->codec_id); videoCodecParams = codecParams; }
if(audioStreamIndex != -1 && videoStreamIndex != -1) break;
}
if(audioStreamIndex == -1) { qDebug() << "failed to find audio stream inside file"; return false; }
if(videoStreamIndex == -1) { qDebug() << "failed to find video stream inside file"; return false; }
auto configureCodecContext = [=](AVCodecContext*& context, AVCodec* decoder, AVCodecParameters* params, AVFrame*& frame, QString type)
{
context = avcodec_alloc_context3(decoder);
if(!context) { qDebug() << "failed to create" << type << "decoder context!"; return false; }
if(avcodec_parameters_to_context(context, params) < 0) { qDebug() << "can't initialize input" << type << "decoder context"; return false; }
if(avcodec_open2(context, decoder, NULL) < 0) { qDebug() << "can't open" << type << "decoder"; return false; }
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate" << type << "frame"; return false; }
return true;
};
if(!configureCodecContext(videoDecoderContext, videoDecoder, videoCodecParams, videoInputFrame, "video")) return false;
if(!configureCodecContext(audioDecoderContext, audioDecoder, audioCodecParams, audioInputFrame, "audio")) return false;
audioDecoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
audioInputFrame->channel_layout = audioDecoderContext->channel_layout;
inputPacket = av_packet_alloc();
if(!inputPacket) { qDebug() << "can't allocate input packet!"; return false; }
//first frame, needed fo initialization
if(!fillFrame()) { qDebug() << "Failed to fill frame on init!"; return false; };
width = videoDecoderContext->width;
height = videoDecoderContext->height;
innerScaleContext = sws_getContext(width, height, videoDecoderContext->pix_fmt,
width, height, AV_PIX_FMT_RGB0,
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
outerScaleContext = sws_getContext(width, height, videoDecoderContext->pix_fmt,
width, height, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
if(!innerScaleContext) { qDebug() << "failed to initialize scaler context"; return false; }
return true;
}
bool VideoReader::configOutput(const char *filename)
{
avformat_alloc_output_context2(&outputFormatContext, NULL, NULL, filename);
if(!outputFormatContext) { qDebug() << "failed to create output context"; return false; }
AVOutputFormat* outputFormat = outputFormatContext->oformat;
auto prepareOutputContext = [=](AVCodecContext*& encoderContext,
std::function<void (AVCodecContext*, AVCodec*)> configureContextFunc,
std::function<void (AVFrame*)> configureFrameFunc,
AVCodecID codecId, AVFrame*& frame, AVPacket*& packet, QString type)
{
auto stream = avformat_new_stream(outputFormatContext, NULL);
if(!stream) { qDebug() << "failed to allocate output" << type << "stream"; return false; }
AVCodec* encoder = avcodec_find_encoder(codecId);
if(!encoder) { qDebug() << "failed to find" << type << "encoder!"; return false; }
encoderContext = avcodec_alloc_context3(encoder);
if(!encoderContext) { qDebug() << "failed to create video encoder context!"; return false; }
configureContextFunc(encoderContext, encoder);
int result = avcodec_open2(encoderContext, encoder, NULL);
if(result < 0) { qDebug() << "failed to open audio encoder" << av_make_error(result); return false; }
if(avcodec_parameters_from_context(stream->codecpar, encoderContext) < 0) { qDebug() << "failed to copy parameters to audio output stream"; return false; }
packet = av_packet_alloc();
if(!packet) {qDebug() << "failed allocate output" << type << "packet"; return false;}
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate output" << type << "frame"; return false; }
configureFrameFunc(frame);
av_frame_get_buffer(frame, 0);
return true;
};
auto configureAudioFrame = [=](AVFrame* frame)
{
frame->nb_samples = audioEncoderContext->frame_size;
frame->format = audioEncoderContext->sample_fmt;
frame->sample_rate = audioEncoderContext->sample_rate;
frame->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
};
auto configureAudioEncoderContext = [=](AVCodecContext* encoderContext, AVCodec* encoder)
{
encoderContext->bit_rate = 64000;
encoderContext->sample_fmt = encoder->sample_fmts[0];
encoderContext->sample_rate = 44100;
encoderContext->codec_type = AVMEDIA_TYPE_AUDIO;
encoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
encoderContext->channels = av_get_channel_layout_nb_channels(encoderContext->channel_layout);
};
auto configureVideoFrame = [=](AVFrame* frame)
{
frame->format = videoEncoderContext->pix_fmt;
frame->width = videoEncoderContext->width;
frame->height = videoEncoderContext->height;
};
auto configureVideoEncoderContext = [=](AVCodecContext* encoderContext, AVCodec* encoder)
{
encoderContext->width = videoDecoderContext->width;
encoderContext->height = videoDecoderContext->height;
encoderContext->pix_fmt = encoder->pix_fmts[0];
encoderContext->gop_size = 10;
encoderContext->max_b_frames = 1;
encoderContext->framerate = AVRational{30, 1};
encoderContext->time_base = AVRational{1, 30};
av_opt_set(encoderContext->priv_data, "preset", "ultrafast", 0);
av_opt_set(encoderContext->priv_data, "tune", "zerolatency", 0);
};
if(!prepareOutputContext(videoEncoderContext, configureVideoEncoderContext, configureVideoFrame, outputFormat->video_codec, videoOutputFrame, videoOutputPacket, "video")) return false;
if(!prepareOutputContext(audioEncoderContext, configureAudioEncoderContext, configureAudioFrame, outputFormat->audio_codec, audioOutputFrame, audioOutputPacket, "audio")) return false;
if(outputFormat->flags & AVFMT_GLOBALHEADER) outputFormat->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
int result = 0;
if(!(outputFormat->flags & AVFMT_NOFILE))
if((result = avio_open(&outputFormatContext->pb, filename, AVIO_FLAG_WRITE)) < 0)
{ qDebug() << "failed to open file" << av_make_error(result); return false; }
result = avformat_write_header(outputFormatContext, NULL);
if(result < 0) {qDebug() << "failed to write header!" << av_make_error(result); return false; }
return true;
}
bool VideoReader::configResampler()
{
resampleContext = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(audioEncoderContext->channels),
audioEncoderContext->sample_fmt,
audioEncoderContext->sample_rate,
av_get_default_channel_layout(audioDecoderContext->channels),
audioDecoderContext->sample_fmt,
audioDecoderContext->sample_rate,
0, NULL);
if (!resampleContext) { qDebug() << "Could not allocate resample context"; return false; }
int error;
if ((error = swr_init(resampleContext)) < 0) { qDebug() << "Could not open resample context"; swr_free(&resampleContext); return false; }
return true;
}
bool VideoReader::encode(AVFrame* frame, AVCodecContext* encoderContext, AVPacket* outputPacket, int streamIndex, QString type)
{
int response;
response = avcodec_send_frame(encoderContext, frame);
if(response < 0) { qDebug() << "failed to send" << type << "frame" << av_make_error(response); return false; }
while(response >= 0)
{
response = avcodec_receive_packet(encoderContext, outputPacket);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(outputPacket); continue; }
else if (response < 0) { qDebug() << "failed to encode" << type << "frame!" << response << av_make_error(response); return false; }
outputPacket->stream_index = streamIndex;
AVStream *inStream = inputFormatContext->streams[streamIndex];
AVStream *outStream = outputFormatContext->streams[streamIndex];
av_packet_rescale_ts(outputPacket, inStream->time_base, outStream->time_base);
if((response = av_interleaved_write_frame(outputFormatContext, outputPacket)) != 0) { qDebug() << "Failed to write" << type << "packet!" << av_make_error(response); av_packet_unref(outputPacket); return false; }
av_packet_unref(outputPacket);
}
return true;
}
如果需要,我可以试着写下更短的例子
据我所知,有几种情况swr_convert_frame
可能什么都不写:
- 您没有正确初始化输出帧。如果是这样,请检查以下代码段:
audioFrame = av_frame_alloc();
if (audioFrame == NULL) {
// error handling
}
audioFrame->format = /* the sample format you'd like to use */;
audioFrame->channel_layout = audioCodecContext->channel_layout;
audioFrame->nb_samples = audioCodecContext->frame_size;
if (av_frame_get_buffer(encoder->audioFrame, 0) < 0) {
// error handling
}
- 输入帧中的样本不足以生成完整的输出帧。如果是这样,您需要
swr_get_delay
.
if (swr_convert(swrContext, audioFrame->data,
audioFrame->nb_samples,
(uint8_t const**)frame->data, frame->nb_samples) < 0) {
// handle error
}
// do stuff with your audioFrame
...
while (swr_get_delay(swrContext, audioCodecContext->sample_rate)
> audioFrame->nb_samples) {
if (swr_convert(swrContext, audioFrame->data,
audioFrame->nb_samples, NULL, 0) < 0) {
// handle error
}
// do stuff with your audioFrame
}
不管怎样,多提供一些信息,至少要提供一个最小的可重复样本,以供进一步诊断。
我不得不承认 libav
的文档太差了,它曾经让我抓狂。但是诅咒 libav
的作者不会有任何帮助,而且,开源贡献者不欠你任何东西。
好吧,因为 FFMPEG 文档和代码示例绝对是垃圾,我想我唯一的选择就是到这里来 aks。
所以我想做的只是从麦克风录制音频并将其写入文件。所以我初始化我的输入和输出格式,我得到一个音频数据包对其进行解码、重新采样、编码和写入。但每次我尝试播放和播放音频时,都只有一个数据存根。似乎出于某种原因它只写了一个起始数据包。这仍然很奇怪,让我解释一下原因:
if((response = swr_config_frame(resampleContext, audioOutputFrame, frame) < 0)) qDebug() << "can't configure frame!" << av_make_error(response);
if((response = swr_convert_frame(resampleContext, audioOutputFrame, frame) < 0)) qDebug() << "can't resample frame!" << av_make_error(response);
这是我用来重新采样的代码。我的 frame
有数据,但 swr_convert_frame
将空数据写入 audioOutputFrame
我该如何解决? FFMPEG 真的快把我逼疯了。
这是我的 class
的完整代码VideoReader.h
#ifndef VIDEOREADER_H
#define VIDEOREADER_H
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavdevice/avdevice.h>
#include "libavutil/audio_fifo.h"
#include "libavformat/avio.h"
#include "libswresample/swresample.h"
#include <inttypes.h>
}
#include <QString>
#include <QElapsedTimer>
class VideoReader
{
public:
VideoReader();
bool open(const char* filename);
bool fillFrame();
bool readFrame(uint8_t *&frameData);
void close();
int width, height;
private:
bool configInput();
bool configOutput(const char *filename);
bool configResampler();
bool encode(AVFrame *frame, AVCodecContext *encoderContext, AVPacket *outputPacket, int streamIndex, QString type);
int audioStreamIndex = -1;
int videoStreamIndex = -1;
int64_t videoStartPts = 0;
int64_t audioStartPts = 0;
AVFormatContext* inputFormatContext = nullptr;
AVFormatContext* outputFormatContext = nullptr;
AVCodecContext* videoDecoderContext = nullptr;
AVCodecContext* videoEncoderContext = nullptr;
AVCodecContext* audioDecoderContext = nullptr;
AVCodecContext* audioEncoderContext = nullptr;
AVFrame* videoInputFrame = nullptr;
AVFrame* audioInputFrame = nullptr;
AVFrame* videoOutputFrame = nullptr;
AVFrame* audioOutputFrame = nullptr;
AVPacket* inputPacket = nullptr;
AVPacket* videoOutputPacket = nullptr;
AVPacket* audioOutputPacket = nullptr;
SwsContext* innerScaleContext = nullptr;
SwsContext* outerScaleContext = nullptr;
SwrContext *resampleContext = nullptr;
};
#endif // VIDEOREADER_H
VideoReader.cpp
#include "VideoReader.h"
#include <QDebug>
static const char* av_make_error(int errnum)
{
static char str[AV_ERROR_MAX_STRING_SIZE];
memset(str, 0, sizeof(str));
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
VideoReader::VideoReader()
{
}
bool VideoReader::open(const char *filename)
{
if(!configInput()) return false;
if(!configOutput(filename)) return false;
if(!configResampler()) return false;
return true;
}
bool VideoReader::fillFrame()
{
auto convertToYUV = [=](AVFrame* frame)
{
int response = 0;
if((response = sws_scale(outerScaleContext, frame->data, frame->linesize, 0, videoEncoderContext->height, videoOutputFrame->data, videoOutputFrame->linesize)) < 0) qDebug() << "can't rescale" << av_make_error(response);
};
auto convertAudio = [this](AVFrame* frame)
{
int response = 0;
auto& out = audioOutputFrame;
qDebug() << out->linesize[0] << out->nb_samples;
if((response = swr_convert_frame(resampleContext, audioOutputFrame, frame)) < 0) qDebug() << "can't resample frame!" << av_make_error(response);
qDebug() << "poop";
};
auto decodeEncode = [=](AVPacket* inputPacket, AVFrame* inputFrame, AVCodecContext* decoderContext,
AVPacket* outputPacket, AVFrame* outputFrame, AVCodecContext* encoderContext,
std::function<void (AVFrame*)> convertFunc,
int streamIndex, int64_t startPts, QString type)
{
int response = avcodec_send_packet(decoderContext, inputPacket);
if(response < 0) { qDebug() << "failed to send" << type << "packet!" << av_make_error(response); return false; }
response = avcodec_receive_frame(decoderContext, inputFrame);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(inputPacket); return false; }
else if (response < 0) { qDebug() << "failed to decode" << type << "frame!" << response << av_make_error(response); return false; }
if(encoderContext)
{
outputFrame->pts = inputPacket->pts - startPts;
convertFunc(inputFrame);
if(!encode(outputFrame, encoderContext, outputPacket, streamIndex, type)) return false;
}
av_packet_unref(inputPacket);
return true;
};
while(av_read_frame(inputFormatContext, inputPacket) >= 0) //actually read packet
{
if(inputPacket->stream_index == videoStreamIndex)
{
if(!videoStartPts) videoStartPts = inputPacket->pts;
if(decodeEncode(inputPacket, videoInputFrame, videoDecoderContext, videoOutputPacket, videoOutputFrame, videoEncoderContext, convertToYUV, videoStreamIndex, videoStartPts, "video")) break;
}
else if(inputPacket->stream_index == audioStreamIndex)
{
if(!audioStartPts) audioStartPts = inputPacket->pts;
if(decodeEncode(inputPacket, audioInputFrame, audioDecoderContext, audioOutputPacket, audioOutputFrame, audioEncoderContext, convertAudio, audioStreamIndex, audioStartPts, "audio")) break;
}
}
return true;
}
bool VideoReader::readFrame(uint8_t *&frameData)
{
if(!fillFrame()) { qDebug() << "readFrame method failed!"; return false; };
const int bytesPerPixel = 4;
uint8_t* destination[bytesPerPixel] = {frameData, NULL, NULL, NULL};
int destinationLinesize[bytesPerPixel] = { videoInputFrame->width * bytesPerPixel, 0, 0, 0};
sws_scale(innerScaleContext, videoInputFrame->data, videoInputFrame->linesize, 0, videoInputFrame->height, destination, destinationLinesize);
return true;
}
void VideoReader::close()
{
encode(NULL, videoEncoderContext, videoOutputPacket, videoStreamIndex, "video");
encode(NULL, audioEncoderContext, audioOutputPacket, audioStreamIndex, "audio");
if(av_write_trailer(outputFormatContext) < 0) { qDebug() << "failed to write trailer"; };
avformat_close_input(&outputFormatContext);
avformat_free_context(outputFormatContext);
avformat_close_input(&inputFormatContext);
avformat_free_context(inputFormatContext);
av_frame_free(&videoInputFrame);
av_frame_free(&audioInputFrame);
av_frame_free(&videoOutputFrame);
av_frame_free(&audioOutputFrame);
av_packet_free(&inputPacket);
av_packet_free(&videoOutputPacket);
av_packet_free(&audioOutputPacket);
avcodec_free_context(&videoDecoderContext);
avcodec_free_context(&videoEncoderContext);
avcodec_free_context(&audioDecoderContext);
avcodec_free_context(&audioEncoderContext);
sws_freeContext(innerScaleContext);
sws_freeContext(outerScaleContext);
swr_free(&resampleContext);
}
bool VideoReader::configInput()
{
avdevice_register_all();
inputFormatContext = avformat_alloc_context();
if(!inputFormatContext) { qDebug() << "can't create context!"; return false; }
const char* inputFormatName = "dshow";/*"gdigrab"*/
AVInputFormat* inputFormat = av_find_input_format(inputFormatName);
if(!inputFormat){ qDebug() << "Can't find" << inputFormatName; return false; }
AVDictionary* options = NULL;
av_dict_set(&options, "framerate", "30", 0);
av_dict_set(&options, "video_size", "1920x1080", 0);
if(avformat_open_input(&inputFormatContext, "video=HD USB Camera:audio=Microphone (High Definition Audio Device)" /*"desktop"*/, inputFormat, &options) != 0) { qDebug() << "can't open video file!"; return false; }
AVCodecParameters* videoCodecParams = nullptr;
AVCodecParameters* audioCodecParams = nullptr;
AVCodec* videoDecoder = nullptr;
AVCodec* audioDecoder = nullptr;
for (uint i = 0; i < inputFormatContext->nb_streams; ++i)
{
auto stream = inputFormatContext->streams[i];
auto codecParams = stream->codecpar;
if(codecParams->codec_type == AVMEDIA_TYPE_AUDIO) { audioStreamIndex = i; audioDecoder = avcodec_find_decoder(codecParams->codec_id); audioCodecParams = codecParams; }
if(codecParams->codec_type == AVMEDIA_TYPE_VIDEO) { videoStreamIndex = i; videoDecoder = avcodec_find_decoder(codecParams->codec_id); videoCodecParams = codecParams; }
if(audioStreamIndex != -1 && videoStreamIndex != -1) break;
}
if(audioStreamIndex == -1) { qDebug() << "failed to find audio stream inside file"; return false; }
if(videoStreamIndex == -1) { qDebug() << "failed to find video stream inside file"; return false; }
auto configureCodecContext = [=](AVCodecContext*& context, AVCodec* decoder, AVCodecParameters* params, AVFrame*& frame, QString type)
{
context = avcodec_alloc_context3(decoder);
if(!context) { qDebug() << "failed to create" << type << "decoder context!"; return false; }
if(avcodec_parameters_to_context(context, params) < 0) { qDebug() << "can't initialize input" << type << "decoder context"; return false; }
if(avcodec_open2(context, decoder, NULL) < 0) { qDebug() << "can't open" << type << "decoder"; return false; }
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate" << type << "frame"; return false; }
return true;
};
if(!configureCodecContext(videoDecoderContext, videoDecoder, videoCodecParams, videoInputFrame, "video")) return false;
if(!configureCodecContext(audioDecoderContext, audioDecoder, audioCodecParams, audioInputFrame, "audio")) return false;
audioDecoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
audioInputFrame->channel_layout = audioDecoderContext->channel_layout;
inputPacket = av_packet_alloc();
if(!inputPacket) { qDebug() << "can't allocate input packet!"; return false; }
//first frame, needed fo initialization
if(!fillFrame()) { qDebug() << "Failed to fill frame on init!"; return false; };
width = videoDecoderContext->width;
height = videoDecoderContext->height;
innerScaleContext = sws_getContext(width, height, videoDecoderContext->pix_fmt,
width, height, AV_PIX_FMT_RGB0,
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
outerScaleContext = sws_getContext(width, height, videoDecoderContext->pix_fmt,
width, height, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR,
NULL,
NULL,
NULL);
if(!innerScaleContext) { qDebug() << "failed to initialize scaler context"; return false; }
return true;
}
bool VideoReader::configOutput(const char *filename)
{
avformat_alloc_output_context2(&outputFormatContext, NULL, NULL, filename);
if(!outputFormatContext) { qDebug() << "failed to create output context"; return false; }
AVOutputFormat* outputFormat = outputFormatContext->oformat;
auto prepareOutputContext = [=](AVCodecContext*& encoderContext,
std::function<void (AVCodecContext*, AVCodec*)> configureContextFunc,
std::function<void (AVFrame*)> configureFrameFunc,
AVCodecID codecId, AVFrame*& frame, AVPacket*& packet, QString type)
{
auto stream = avformat_new_stream(outputFormatContext, NULL);
if(!stream) { qDebug() << "failed to allocate output" << type << "stream"; return false; }
AVCodec* encoder = avcodec_find_encoder(codecId);
if(!encoder) { qDebug() << "failed to find" << type << "encoder!"; return false; }
encoderContext = avcodec_alloc_context3(encoder);
if(!encoderContext) { qDebug() << "failed to create video encoder context!"; return false; }
configureContextFunc(encoderContext, encoder);
int result = avcodec_open2(encoderContext, encoder, NULL);
if(result < 0) { qDebug() << "failed to open audio encoder" << av_make_error(result); return false; }
if(avcodec_parameters_from_context(stream->codecpar, encoderContext) < 0) { qDebug() << "failed to copy parameters to audio output stream"; return false; }
packet = av_packet_alloc();
if(!packet) {qDebug() << "failed allocate output" << type << "packet"; return false;}
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate output" << type << "frame"; return false; }
configureFrameFunc(frame);
av_frame_get_buffer(frame, 0);
return true;
};
auto configureAudioFrame = [=](AVFrame* frame)
{
frame->nb_samples = audioEncoderContext->frame_size;
frame->format = audioEncoderContext->sample_fmt;
frame->sample_rate = audioEncoderContext->sample_rate;
frame->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
};
auto configureAudioEncoderContext = [=](AVCodecContext* encoderContext, AVCodec* encoder)
{
encoderContext->bit_rate = 64000;
encoderContext->sample_fmt = encoder->sample_fmts[0];
encoderContext->sample_rate = 44100;
encoderContext->codec_type = AVMEDIA_TYPE_AUDIO;
encoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
encoderContext->channels = av_get_channel_layout_nb_channels(encoderContext->channel_layout);
};
auto configureVideoFrame = [=](AVFrame* frame)
{
frame->format = videoEncoderContext->pix_fmt;
frame->width = videoEncoderContext->width;
frame->height = videoEncoderContext->height;
};
auto configureVideoEncoderContext = [=](AVCodecContext* encoderContext, AVCodec* encoder)
{
encoderContext->width = videoDecoderContext->width;
encoderContext->height = videoDecoderContext->height;
encoderContext->pix_fmt = encoder->pix_fmts[0];
encoderContext->gop_size = 10;
encoderContext->max_b_frames = 1;
encoderContext->framerate = AVRational{30, 1};
encoderContext->time_base = AVRational{1, 30};
av_opt_set(encoderContext->priv_data, "preset", "ultrafast", 0);
av_opt_set(encoderContext->priv_data, "tune", "zerolatency", 0);
};
if(!prepareOutputContext(videoEncoderContext, configureVideoEncoderContext, configureVideoFrame, outputFormat->video_codec, videoOutputFrame, videoOutputPacket, "video")) return false;
if(!prepareOutputContext(audioEncoderContext, configureAudioEncoderContext, configureAudioFrame, outputFormat->audio_codec, audioOutputFrame, audioOutputPacket, "audio")) return false;
if(outputFormat->flags & AVFMT_GLOBALHEADER) outputFormat->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
int result = 0;
if(!(outputFormat->flags & AVFMT_NOFILE))
if((result = avio_open(&outputFormatContext->pb, filename, AVIO_FLAG_WRITE)) < 0)
{ qDebug() << "failed to open file" << av_make_error(result); return false; }
result = avformat_write_header(outputFormatContext, NULL);
if(result < 0) {qDebug() << "failed to write header!" << av_make_error(result); return false; }
return true;
}
bool VideoReader::configResampler()
{
resampleContext = swr_alloc_set_opts(NULL,
av_get_default_channel_layout(audioEncoderContext->channels),
audioEncoderContext->sample_fmt,
audioEncoderContext->sample_rate,
av_get_default_channel_layout(audioDecoderContext->channels),
audioDecoderContext->sample_fmt,
audioDecoderContext->sample_rate,
0, NULL);
if (!resampleContext) { qDebug() << "Could not allocate resample context"; return false; }
int error;
if ((error = swr_init(resampleContext)) < 0) { qDebug() << "Could not open resample context"; swr_free(&resampleContext); return false; }
return true;
}
bool VideoReader::encode(AVFrame* frame, AVCodecContext* encoderContext, AVPacket* outputPacket, int streamIndex, QString type)
{
int response;
response = avcodec_send_frame(encoderContext, frame);
if(response < 0) { qDebug() << "failed to send" << type << "frame" << av_make_error(response); return false; }
while(response >= 0)
{
response = avcodec_receive_packet(encoderContext, outputPacket);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(outputPacket); continue; }
else if (response < 0) { qDebug() << "failed to encode" << type << "frame!" << response << av_make_error(response); return false; }
outputPacket->stream_index = streamIndex;
AVStream *inStream = inputFormatContext->streams[streamIndex];
AVStream *outStream = outputFormatContext->streams[streamIndex];
av_packet_rescale_ts(outputPacket, inStream->time_base, outStream->time_base);
if((response = av_interleaved_write_frame(outputFormatContext, outputPacket)) != 0) { qDebug() << "Failed to write" << type << "packet!" << av_make_error(response); av_packet_unref(outputPacket); return false; }
av_packet_unref(outputPacket);
}
return true;
}
如果需要,我可以试着写下更短的例子
据我所知,有几种情况swr_convert_frame
可能什么都不写:
- 您没有正确初始化输出帧。如果是这样,请检查以下代码段:
audioFrame = av_frame_alloc();
if (audioFrame == NULL) {
// error handling
}
audioFrame->format = /* the sample format you'd like to use */;
audioFrame->channel_layout = audioCodecContext->channel_layout;
audioFrame->nb_samples = audioCodecContext->frame_size;
if (av_frame_get_buffer(encoder->audioFrame, 0) < 0) {
// error handling
}
- 输入帧中的样本不足以生成完整的输出帧。如果是这样,您需要
swr_get_delay
.
if (swr_convert(swrContext, audioFrame->data,
audioFrame->nb_samples,
(uint8_t const**)frame->data, frame->nb_samples) < 0) {
// handle error
}
// do stuff with your audioFrame
...
while (swr_get_delay(swrContext, audioCodecContext->sample_rate)
> audioFrame->nb_samples) {
if (swr_convert(swrContext, audioFrame->data,
audioFrame->nb_samples, NULL, 0) < 0) {
// handle error
}
// do stuff with your audioFrame
}
不管怎样,多提供一些信息,至少要提供一个最小的可重复样本,以供进一步诊断。
我不得不承认 libav
的文档太差了,它曾经让我抓狂。但是诅咒 libav
的作者不会有任何帮助,而且,开源贡献者不欠你任何东西。