使用 ffmpeg API 将 pcm_alaw 类型的音频数据包装到 MKA 音频文件中
Wrap audio data of the pcm_alaw type into an MKA audio file using the ffmpeg API
想象一下,在我的项目中,我收到 RTP
个负载为 type-8 的数据包,以便稍后将此负载保存为音轨的第 N 部分。我从 RTP
数据包中提取此负载并将其保存到临时缓冲区:
...
while ((rtp = receiveRtpPackets()).withoutErrors()) {
payloadData.push(rtp.getPayloadData());
}
audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();
...
在用这个payload填充一定大小的临时缓冲区后,我对这个缓冲区进行处理,即提取整个payload并使用ffmpeg for further saving to an audio file in Matroska format. But I have a problem. Since the payload of the RTP
packet is type 8
, I have to save the raw audio data of the pcm_alaw格式编码为mka
音频格式。但是当将原始数据 pcm_alaw
保存到音频文件时,我从库中得到了这些信息:
...
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
...
当您在 vlc 中打开音频文件时,没有播放任何内容(缺少音轨时间戳)。
我的项目的任务是简单地获取pcm_alaw数据并将其打包成一个容器,格式为mka
。确定编解码器的最佳方法是使用 av_guess_codec() 函数,它会自动选择所需的编解码器 ID。但是如何将原始数据正确打包到容器中,我不知道。
重要的是要注意我可以将此数据的任何格式(音频格式)作为原始数据RTP
数据包类型(All 类型的 RTP
数据包负载)。我所知道的是,无论如何,我必须将音频数据打包到 mka
容器中。
我还附上了我使用的代码(借自 this 资源):
audiogenerater.h
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}
class AudioGenerater
{
public:
AudioGenerater();
~AudioGenerater() = default;
void generateAudioFileWithOptions(
QString fileName,
QByteArray pcmData,
int channel,
int bitRate,
int sampleRate,
AVSampleFormat format);
private:
// init Format
bool initFormat(QString audioFileName);
private:
AVCodec *m_AudioCodec = nullptr;
AVCodecContext *m_AudioCodecContext = nullptr;
AVFormatContext *m_FormatContext = nullptr;
AVOutputFormat *m_OutputFormat = nullptr;
};
audiogenerater.cpp
AudioGenerater::AudioGenerater()
{
av_register_all();
avcodec_register_all();
}
AudioGenerater::~AudioGenerater()
{
// ...
}
bool AudioGenerater::initFormat(QString audioFileName)
{
// Create an output Format context
int result = avformat_alloc_output_context2(&m_FormatContext, nullptr, nullptr, audioFileName.toLocal8Bit().data());
if (result < 0) {
return false;
}
m_OutputFormat = m_FormatContext->oformat;
// Create an audio stream
AVStream* audioStream = avformat_new_stream(m_FormatContext, m_AudioCodec);
if (audioStream == nullptr) {
avformat_free_context(m_FormatContext);
return false;
}
// Set the parameters in the stream
audioStream->id = m_FormatContext->nb_streams - 1;
audioStream->time_base = { 1, 8000 };
result = avcodec_parameters_from_context(audioStream->codecpar, m_AudioCodecContext);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
// Print FormatContext information
av_dump_format(m_FormatContext, 0, audioFileName.toLocal8Bit().data(), 1);
// Open file IO
if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
result = avio_open(&m_FormatContext->pb, audioFileName.toLocal8Bit().data(), AVIO_FLAG_WRITE);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
}
return true;
}
void AudioGenerater::generateAudioFileWithOptions(
QString _fileName,
QByteArray _pcmData,
int _channel,
int _bitRate,
int _sampleRate,
AVSampleFormat _format)
{
AVFormatContext* oc;
if (avformat_alloc_output_context2(
&oc, nullptr, nullptr, _fileName.toStdString().c_str())
< 0) {
qDebug() << "Error in line: " << __LINE__;
return;
}
if (!oc) {
printf("Could not deduce output format from file extension: using mka.\n");
avformat_alloc_output_context2(
&oc, nullptr, "mka", _fileName.toStdString().c_str());
}
if (!oc) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVOutputFormat* fmt = oc->oformat;
if (fmt->audio_codec == AV_CODEC_ID_NONE) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVCodecID codecID = av_guess_codec(
fmt, nullptr, _fileName.toStdString().c_str(), nullptr, AVMEDIA_TYPE_AUDIO);
// Find Codec
m_AudioCodec = avcodec_find_encoder(codecID);
if (m_AudioCodec == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Create an encoder context
m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
if (m_AudioCodecContext == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Setting parameters
m_AudioCodecContext->bit_rate = _bitRate;
m_AudioCodecContext->sample_rate = _sampleRate;
m_AudioCodecContext->sample_fmt = _format;
m_AudioCodecContext->channels = _channel;
m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Turn on the encoder
int result = avcodec_open2(m_AudioCodecContext, m_AudioCodec, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create a package
if (!initFormat(_fileName)) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// write to the file header
result = avformat_write_header(m_FormatContext, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create Frame
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
int nb_samples = 0;
if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
nb_samples = 10000;
}
else {
nb_samples = m_AudioCodecContext->frame_size;
}
// Set the parameters of the Frame
frame->nb_samples = nb_samples;
frame->format = m_AudioCodecContext->sample_fmt;
frame->channel_layout = m_AudioCodecContext->channel_layout;
// Apply for data memory
result = av_frame_get_buffer(frame, 0);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
// Set the Frame to be writable
result = av_frame_make_writable(frame);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
int perFrameDataSize = frame->linesize[0];
int count = _pcmData.size() / perFrameDataSize;
bool needAddOne = false;
if (_pcmData.size() % perFrameDataSize != 0) {
count++;
needAddOne = true;
}
int frameCount = 0;
for (int i = 0; i < count; ++i) {
// Create a Packet
AVPacket* pkt = av_packet_alloc();
if (pkt == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
av_init_packet(pkt);
if (i == count - 1)
perFrameDataSize = _pcmData.size() % perFrameDataSize;
// Synthesize WAV files
memset(frame->data[0], 0, perFrameDataSize);
memcpy(frame->data[0], &(_pcmData.data()[perFrameDataSize * i]), perFrameDataSize);
frame->pts = frameCount++;
// send Frame
result = avcodec_send_frame(m_AudioCodecContext, frame);
if (result < 0)
continue;
// Receive the encoded Packet
result = avcodec_receive_packet(m_AudioCodecContext, pkt);
if (result < 0) {
av_packet_free(&pkt);
continue;
}
// write to file
av_packet_rescale_ts(pkt, m_AudioCodecContext->time_base, m_FormatContext->streams[0]->time_base);
pkt->stream_index = 0;
result = av_interleaved_write_frame(m_FormatContext, pkt);
if (result < 0)
continue;
av_packet_free(&pkt);
}
// write to the end of the file
av_write_trailer(m_FormatContext);
// Close file IO
avio_closep(&m_FormatContext->pb);
// Release Frame memory
av_frame_free(&frame);
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
}
main.cpp
int main(int argc, char **argv)
{
av_log_set_level(AV_LOG_TRACE);
QFile file("rawDataOfPcmAlawType.bin");
if (!file.open(QIODevice::ReadOnly)) {
return EXIT_FAILURE;
}
QByteArray rawData(file.readAll());
AudioGenerater generator;
generator.generateAudioFileWithOptions(
"test.mka",
rawData,
1,
64000,
8000,
AV_SAMPLE_FMT_S16);
return 0;
}
重要你帮我找到最合适的方法来记录pcm_alaw
或MKA
音频文件中的不同数据格式。
请有知道的大侠帮忙(时间太少无法实现这个项目)
这些有用的链接将对您有所帮助:
- 对 libav 中数据处理顺序的一个很好的概述:ffmpeg-libav-tutorial
- ffmpeg 开发人员自己的示例:avio_reading, resampling_audio, transcode_aac
想象一下,在我的项目中,我收到 RTP
个负载为 type-8 的数据包,以便稍后将此负载保存为音轨的第 N 部分。我从 RTP
数据包中提取此负载并将其保存到临时缓冲区:
...
while ((rtp = receiveRtpPackets()).withoutErrors()) {
payloadData.push(rtp.getPayloadData());
}
audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();
...
在用这个payload填充一定大小的临时缓冲区后,我对这个缓冲区进行处理,即提取整个payload并使用ffmpeg for further saving to an audio file in Matroska format. But I have a problem. Since the payload of the RTP
packet is type 8
, I have to save the raw audio data of the pcm_alaw格式编码为mka
音频格式。但是当将原始数据 pcm_alaw
保存到音频文件时,我从库中得到了这些信息:
...
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
...
当您在 vlc 中打开音频文件时,没有播放任何内容(缺少音轨时间戳)。
我的项目的任务是简单地获取pcm_alaw数据并将其打包成一个容器,格式为mka
。确定编解码器的最佳方法是使用 av_guess_codec() 函数,它会自动选择所需的编解码器 ID。但是如何将原始数据正确打包到容器中,我不知道。
重要的是要注意我可以将此数据的任何格式(音频格式)作为原始数据RTP
数据包类型(All 类型的 RTP
数据包负载)。我所知道的是,无论如何,我必须将音频数据打包到 mka
容器中。
我还附上了我使用的代码(借自 this 资源):
audiogenerater.h
extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}
class AudioGenerater
{
public:
AudioGenerater();
~AudioGenerater() = default;
void generateAudioFileWithOptions(
QString fileName,
QByteArray pcmData,
int channel,
int bitRate,
int sampleRate,
AVSampleFormat format);
private:
// init Format
bool initFormat(QString audioFileName);
private:
AVCodec *m_AudioCodec = nullptr;
AVCodecContext *m_AudioCodecContext = nullptr;
AVFormatContext *m_FormatContext = nullptr;
AVOutputFormat *m_OutputFormat = nullptr;
};
audiogenerater.cpp
AudioGenerater::AudioGenerater()
{
av_register_all();
avcodec_register_all();
}
AudioGenerater::~AudioGenerater()
{
// ...
}
bool AudioGenerater::initFormat(QString audioFileName)
{
// Create an output Format context
int result = avformat_alloc_output_context2(&m_FormatContext, nullptr, nullptr, audioFileName.toLocal8Bit().data());
if (result < 0) {
return false;
}
m_OutputFormat = m_FormatContext->oformat;
// Create an audio stream
AVStream* audioStream = avformat_new_stream(m_FormatContext, m_AudioCodec);
if (audioStream == nullptr) {
avformat_free_context(m_FormatContext);
return false;
}
// Set the parameters in the stream
audioStream->id = m_FormatContext->nb_streams - 1;
audioStream->time_base = { 1, 8000 };
result = avcodec_parameters_from_context(audioStream->codecpar, m_AudioCodecContext);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
// Print FormatContext information
av_dump_format(m_FormatContext, 0, audioFileName.toLocal8Bit().data(), 1);
// Open file IO
if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
result = avio_open(&m_FormatContext->pb, audioFileName.toLocal8Bit().data(), AVIO_FLAG_WRITE);
if (result < 0) {
avformat_free_context(m_FormatContext);
return false;
}
}
return true;
}
void AudioGenerater::generateAudioFileWithOptions(
QString _fileName,
QByteArray _pcmData,
int _channel,
int _bitRate,
int _sampleRate,
AVSampleFormat _format)
{
AVFormatContext* oc;
if (avformat_alloc_output_context2(
&oc, nullptr, nullptr, _fileName.toStdString().c_str())
< 0) {
qDebug() << "Error in line: " << __LINE__;
return;
}
if (!oc) {
printf("Could not deduce output format from file extension: using mka.\n");
avformat_alloc_output_context2(
&oc, nullptr, "mka", _fileName.toStdString().c_str());
}
if (!oc) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVOutputFormat* fmt = oc->oformat;
if (fmt->audio_codec == AV_CODEC_ID_NONE) {
qDebug() << "Error in line: " << __LINE__;
return;
}
AVCodecID codecID = av_guess_codec(
fmt, nullptr, _fileName.toStdString().c_str(), nullptr, AVMEDIA_TYPE_AUDIO);
// Find Codec
m_AudioCodec = avcodec_find_encoder(codecID);
if (m_AudioCodec == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Create an encoder context
m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
if (m_AudioCodecContext == nullptr) {
qDebug() << "Error in line: " << __LINE__;
return;
}
// Setting parameters
m_AudioCodecContext->bit_rate = _bitRate;
m_AudioCodecContext->sample_rate = _sampleRate;
m_AudioCodecContext->sample_fmt = _format;
m_AudioCodecContext->channels = _channel;
m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
// Turn on the encoder
int result = avcodec_open2(m_AudioCodecContext, m_AudioCodec, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create a package
if (!initFormat(_fileName)) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// write to the file header
result = avformat_write_header(m_FormatContext, nullptr);
if (result < 0) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
// Create Frame
AVFrame* frame = av_frame_alloc();
if (frame == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
int nb_samples = 0;
if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
nb_samples = 10000;
}
else {
nb_samples = m_AudioCodecContext->frame_size;
}
// Set the parameters of the Frame
frame->nb_samples = nb_samples;
frame->format = m_AudioCodecContext->sample_fmt;
frame->channel_layout = m_AudioCodecContext->channel_layout;
// Apply for data memory
result = av_frame_get_buffer(frame, 0);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
// Set the Frame to be writable
result = av_frame_make_writable(frame);
if (result < 0) {
av_frame_free(&frame);
{
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
}
int perFrameDataSize = frame->linesize[0];
int count = _pcmData.size() / perFrameDataSize;
bool needAddOne = false;
if (_pcmData.size() % perFrameDataSize != 0) {
count++;
needAddOne = true;
}
int frameCount = 0;
for (int i = 0; i < count; ++i) {
// Create a Packet
AVPacket* pkt = av_packet_alloc();
if (pkt == nullptr) {
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
return;
}
av_init_packet(pkt);
if (i == count - 1)
perFrameDataSize = _pcmData.size() % perFrameDataSize;
// Synthesize WAV files
memset(frame->data[0], 0, perFrameDataSize);
memcpy(frame->data[0], &(_pcmData.data()[perFrameDataSize * i]), perFrameDataSize);
frame->pts = frameCount++;
// send Frame
result = avcodec_send_frame(m_AudioCodecContext, frame);
if (result < 0)
continue;
// Receive the encoded Packet
result = avcodec_receive_packet(m_AudioCodecContext, pkt);
if (result < 0) {
av_packet_free(&pkt);
continue;
}
// write to file
av_packet_rescale_ts(pkt, m_AudioCodecContext->time_base, m_FormatContext->streams[0]->time_base);
pkt->stream_index = 0;
result = av_interleaved_write_frame(m_FormatContext, pkt);
if (result < 0)
continue;
av_packet_free(&pkt);
}
// write to the end of the file
av_write_trailer(m_FormatContext);
// Close file IO
avio_closep(&m_FormatContext->pb);
// Release Frame memory
av_frame_free(&frame);
avcodec_free_context(&m_AudioCodecContext);
if (m_FormatContext != nullptr)
avformat_free_context(m_FormatContext);
}
main.cpp
int main(int argc, char **argv)
{
av_log_set_level(AV_LOG_TRACE);
QFile file("rawDataOfPcmAlawType.bin");
if (!file.open(QIODevice::ReadOnly)) {
return EXIT_FAILURE;
}
QByteArray rawData(file.readAll());
AudioGenerater generator;
generator.generateAudioFileWithOptions(
"test.mka",
rawData,
1,
64000,
8000,
AV_SAMPLE_FMT_S16);
return 0;
}
重要你帮我找到最合适的方法来记录pcm_alaw
或MKA
音频文件中的不同数据格式。
请有知道的大侠帮忙(时间太少无法实现这个项目)
这些有用的链接将对您有所帮助:
- 对 libav 中数据处理顺序的一个很好的概述:ffmpeg-libav-tutorial
- ffmpeg 开发人员自己的示例:avio_reading, resampling_audio, transcode_aac