FFmpeg.swr_convert：原始 16 位 pcm 音频，与 xna SoundEffect 一起使用。转换时音频中断

Question

我想将 mkv(vp8/ogg) 和原始 4 位 adpcm 重新采样为原始 16 位 pcm byte[]，以便从 xna 库加载到 SoundEffect 中。所以我可以在使用其他代码显示帧时播放它（视频端正在工作）。我可以读取 16 位 wav 文件并播放它。但是当我重新采样某些东西时，它不会 100% 播放。一个文件是 3 分 15 秒。在它停止播放之前，我只得到 13 秒和 739 毫秒。我一直在学习如何通过在 C++ 中查找代码示例并使用 ffmpeg.autogen.

将其更正为在 C# 中工作

以下是我重新采样的最佳尝试。

            int nb_samples = Frame->nb_samples;
                    int output_nb_samples = nb_samples;
                    int nb_channels = ffmpeg.av_get_channel_layout_nb_channels(ffmpeg.AV_CH_LAYOUT_STEREO);
                    int bytes_per_sample = ffmpeg.av_get_bytes_per_sample(AVSampleFormat.AV_SAMPLE_FMT_S16) * nb_channels;
                    int bufsize = ffmpeg.av_samples_get_buffer_size(null, nb_channels, nb_samples,
                                                             AVSampleFormat.AV_SAMPLE_FMT_S16, 1);

                    byte*[] b = Frame->data;
                    fixed (byte** input = b)
                    {
                        byte* output = null;
                        ffmpeg.av_samples_alloc(&output, null,
                            nb_channels,
                            nb_samples,
                            (AVSampleFormat)Frame->format, 0);//

                        // Buffer input

                        Ret = ffmpeg.swr_convert(Swr, &output, output_nb_samples / 2, input, nb_samples);
                        CheckRet();
                        WritetoMs(output, 0, Ret * bytes_per_sample);
                        output_nb_samples -= Ret;

                        // Drain buffer
                        while ((Ret = ffmpeg.swr_convert(Swr, &output, output_nb_samples, null, 0)) > 0)
                        {
                            CheckRet();
                            WritetoMs(output, 0, Ret * bytes_per_sample);
                            output_nb_samples -= Ret;
                        }
                    }

我把这一切都改成了这个，但它很快就中断了。

  Channels = ffmpeg.av_get_channel_layout_nb_channels(OutFrame->channel_layout);
                    int nb_channels = ffmpeg.av_get_channel_layout_nb_channels(ffmpeg.AV_CH_LAYOUT_STEREO);
                    int bytes_per_sample = ffmpeg.av_get_bytes_per_sample(AVSampleFormat.AV_SAMPLE_FMT_S16) * nb_channels;

                    if((Ret = ffmpeg.swr_convert_frame(Swr, OutFrame, Frame))>=0)
                        WritetoMs(*OutFrame->extended_data, 0, OutFrame->nb_samples * bytes_per_sample);
                    CheckRet();

两个代码都使用一个函数来设置 Swr，它在第一帧解码后运行一次。

        private void PrepareResampler()
    {
        ffmpeg.av_frame_copy_props(OutFrame, Frame);
        OutFrame->channel_layout = ffmpeg.AV_CH_LAYOUT_STEREO;
        OutFrame->format = (int)AVSampleFormat.AV_SAMPLE_FMT_S16;
        OutFrame->sample_rate = Frame->sample_rate;
        OutFrame->channels = 2;
        Swr = ffmpeg.swr_alloc();
        if (Swr == null)
            throw new Exception("SWR = Null");
        Ret = ffmpeg.swr_config_frame(Swr, OutFrame, Frame);
        CheckRet();
        Ret = ffmpeg.swr_init(Swr);
        CheckRet();
        Ret = ffmpeg.swr_is_initialized(Swr);
        CheckRet();
    }

这是我获取输出并将其放入音效的地方

private void ReadAll()
    {

        using (Ms = new MemoryStream())
        {
            while (true)
            {
                Ret = ffmpeg.av_read_frame(Format, Packet);
                if (Ret == ffmpeg.AVERROR_EOF)
                    break;
                CheckRet();
                Decode();
            }
            if (Ms.Length > 0)
            {
                se = new SoundEffect(Ms.ToArray(), 0, (int)Ms.Length, OutFrame->sample_rate, (AudioChannels)Channels, 0, 0);
                //se.Duration; Stream->duration;


                see = se.CreateInstance();
                see.Play();
            } 
        }
    }

Answer 1

我发现中的一些代码适用于 C#。在我的程序中慢慢尝试了一些。原来我的解码器做错了什么。所以所有重采样或编码的尝试都将失败。

using FFmpeg.AutoGen;
using System;
using System.IO;

namespace ConsoleApp1
{
    //adapted using code from 
    public unsafe class Program
    {
        public static AVStream* in_audioStream { get; private set; }

        static unsafe void die(string str)
        {
            throw new Exception(str);
        }

        private static unsafe AVStream* add_audio_stream(AVFormatContext* oc, AVCodecID codec_id, int sample_rate = 44100)
        {
            AVCodecContext* c;
            AVCodec* encoder = ffmpeg.avcodec_find_encoder(codec_id);
            AVStream* st = ffmpeg.avformat_new_stream(oc, encoder);

            if (st == null)
            {
                die("av_new_stream");
            }

            c = st->codec;
            c->codec_id = codec_id;
            c->codec_type = AVMediaType.AVMEDIA_TYPE_AUDIO;

            /* put sample parameters */
            c->bit_rate = 64000;
            c->sample_rate = sample_rate;
            c->channels = 2;
            c->sample_fmt = encoder->sample_fmts[0];
            c->channel_layout = ffmpeg.AV_CH_LAYOUT_STEREO;

            // some formats want stream headers to be separate
            if ((oc->oformat->flags & ffmpeg.AVFMT_GLOBALHEADER) != 0)
            {
                c->flags |= ffmpeg.AV_CODEC_FLAG_GLOBAL_HEADER;
            }

            return st;
        }

        private static unsafe void open_audio(AVFormatContext* oc, AVStream* st)
        {
            AVCodecContext* c = st->codec;
            AVCodec* codec;

            /* find the audio encoder */
            codec = ffmpeg.avcodec_find_encoder(c->codec_id);
            if (codec == null)
            {
                die("avcodec_find_encoder");
            }

            /* open it */
            AVDictionary* dict = null;
            ffmpeg.av_dict_set(&dict, "strict", "+experimental", 0);
            int res = ffmpeg.avcodec_open2(c, codec, &dict);
            if (res < 0)
            {
                die("avcodec_open");
            }
        }
        public static int DecodeNext(AVCodecContext* avctx, AVFrame* frame, ref int got_frame_ptr, AVPacket* avpkt)
        {
            int ret = 0;
            got_frame_ptr = 0;
            if ((ret = ffmpeg.avcodec_receive_frame(avctx, frame)) == 0)
            {
                //0 on success, otherwise negative error code
                got_frame_ptr = 1;
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EAGAIN))
            {
                //AVERROR(EAGAIN): input is not accepted in the current state - user must read output with avcodec_receive_packet()
                //(once all output is read, the packet should be resent, and the call will not fail with EAGAIN)
                ret = Decode(avctx, frame, ref got_frame_ptr, avpkt);
            }
            else if (ret == ffmpeg.AVERROR_EOF)
            {
                die("AVERROR_EOF: the encoder has been flushed, and no new frames can be sent to it");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EINVAL))
            {
                die("AVERROR(EINVAL): codec not opened, refcounted_frames not set, it is a decoder, or requires flush");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.ENOMEM))
            {
                die("Failed to add packet to internal queue, or similar other errors: legitimate decoding errors");
            }
            else
            {
                die("unknown");
            }
            return ret;
        }
        public static int Decode(AVCodecContext* avctx, AVFrame* frame, ref int got_frame_ptr, AVPacket* avpkt)
        {
            int ret = 0;
            got_frame_ptr = 0;
            if ((ret = ffmpeg.avcodec_send_packet(avctx, avpkt)) == 0)
            {
                //0 on success, otherwise negative error code
                return DecodeNext(avctx, frame, ref got_frame_ptr, avpkt);
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EAGAIN))
            {
                die("input is not accepted in the current state - user must read output with avcodec_receive_frame()(once all output is read, the packet should be resent, and the call will not fail with EAGAIN");
            }
            else if (ret == ffmpeg.AVERROR_EOF)
            {
                die("AVERROR_EOF: the decoder has been flushed, and no new packets can be sent to it (also returned if more than 1 flush packet is sent");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EINVAL))
            {
                die("codec not opened, it is an encoder, or requires flush");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.ENOMEM))
            {
                die("Failed to add packet to internal queue, or similar other errors: legitimate decoding errors");
            }
            else
            {
                die("unknown");
            }
            return ret;//ffmpeg.avcodec_decode_audio4(fileCodecContext, audioFrameDecoded, &frameFinished, &inPacket);
        }
        public static int DecodeFlush(AVCodecContext* avctx, AVPacket* avpkt)
        {
            avpkt->data = null;
            avpkt->size = 0;
            return ffmpeg.avcodec_send_packet(avctx, avpkt);
        }
        public static int EncodeNext(AVCodecContext* avctx, AVPacket* avpkt, AVFrame* frame, ref int got_packet_ptr)
        {
            int ret = 0;
            got_packet_ptr = 0;
            if ((ret = ffmpeg.avcodec_receive_packet(avctx, avpkt)) == 0)
            {
                got_packet_ptr = 1;
                //0 on success, otherwise negative error code
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EAGAIN))
            {
                //output is not available in the current state - user must try to send input
                return Encode(avctx, avpkt, frame, ref got_packet_ptr);
            }
            else if (ret == ffmpeg.AVERROR_EOF)
            {
                die("AVERROR_EOF: the encoder has been fully flushed, and there will be no more output packets");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EINVAL))
            {
                die("AVERROR(EINVAL) codec not opened, or it is an encoder other errors: legitimate decoding errors");
            }
            else
            {
                die("unknown");
            }
            return ret;//ffmpeg.avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished)
        }
        public static int Encode(AVCodecContext* avctx, AVPacket* avpkt, AVFrame* frame, ref int got_packet_ptr)
        {
            int ret = 0;
            got_packet_ptr = 0;
            if ((ret = ffmpeg.avcodec_send_frame(avctx, frame)) == 0)
            {
                //0 on success, otherwise negative error code
                return EncodeNext(avctx, avpkt, frame, ref got_packet_ptr);
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EAGAIN))
            {
                die("input is not accepted in the current state - user must read output with avcodec_receive_packet() (once all output is read, the packet should be resent, and the call will not fail with EAGAIN)");
            }
            else if (ret == ffmpeg.AVERROR_EOF)
            {
                die("AVERROR_EOF: the decoder has been flushed, and no new packets can be sent to it (also returned if more than 1 flush packet is sent");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.EINVAL))
            {
                die("AVERROR(ffmpeg.EINVAL) codec not opened, refcounted_frames not set, it is a decoder, or requires flush");
            }
            else if (ret == ffmpeg.AVERROR(ffmpeg.ENOMEM))
            {
                die("AVERROR(ENOMEM) failed to add packet to internal queue, or similar other errors: legitimate decoding errors");
            }
            else
            {
                die("unknown");
            }
            return ret;//ffmpeg.avcodec_encode_audio2(audioCodecContext, &outPacket, audioFrameConverted, &frameFinished)
        }
        public static int EncodeFlush(AVCodecContext* avctx)
        {
            return ffmpeg.avcodec_send_frame(avctx, null);
        }
        public static void Main(string[] argv)
        {
            //ffmpeg.av_register_all();

            if (argv.Length != 2)
            {
                //fprintf(stderr, "%s <in> <out>\n", argv[0]);
                return;
            }

            // Allocate and init re-usable frames
            AVCodecContext* fileCodecContext, audioCodecContext;
            AVFormatContext* formatContext, outContext;
            AVStream* out_audioStream;
            SwrContext* swrContext;
            int streamId;

            // input file
            string file = argv[0];
            int res = ffmpeg.avformat_open_input(&formatContext, file, null, null);
            if (res != 0)
            {
                die("avformat_open_input");
            }

            res = ffmpeg.avformat_find_stream_info(formatContext, null);
            if (res < 0)
            {
                die("avformat_find_stream_info");
            }

            AVCodec* codec;
            res = ffmpeg.av_find_best_stream(formatContext, AVMediaType.AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
            if (res < 0)
            {
                return; // die("av_find_best_stream");
            }

            streamId = res;
            fileCodecContext = ffmpeg.avcodec_alloc_context3(codec);
            AVCodecParameters* cp = null;
            ffmpeg.avcodec_parameters_to_context(fileCodecContext, formatContext->streams[streamId]->codecpar);
            res = ffmpeg.avcodec_open2(fileCodecContext, codec, null);
            if (res < 0)
            {
                die("avcodec_open2");
            }

            in_audioStream = formatContext->streams[streamId];

            // output file
            //string outfile = Path.Combine(Path.GetTempPath(), $"{Path.GetFileNameWithoutExtension(argv[0])}.pcm");
            //AVOutputFormat* fmt = fmt = ffmpeg.av_guess_format("s16le", null, null);
            string outfile = argv[1];
            AVOutputFormat * fmt = fmt = ffmpeg.av_guess_format(null, outfile, null);
            if (fmt == null)
            {
                die("av_guess_format");
            }

            outContext = ffmpeg.avformat_alloc_context();
            outContext->oformat = fmt;
            out_audioStream = add_audio_stream(outContext, fmt->audio_codec, in_audioStream->codec->sample_rate);
            open_audio(outContext, out_audioStream);
            out_audioStream->time_base = in_audioStream->time_base;
            res = ffmpeg.avio_open2(&outContext->pb, outfile, ffmpeg.AVIO_FLAG_WRITE, null, null);
            if (res < 0)
            {
                die("url_fopen");
            }

            ffmpeg.avformat_write_header(outContext, null);
            AVCodec* ocodec;
            res = ffmpeg.av_find_best_stream(outContext, AVMediaType.AVMEDIA_TYPE_AUDIO, -1, -1, &ocodec, 0);
            audioCodecContext = ffmpeg.avcodec_alloc_context3(ocodec);
            ffmpeg.avcodec_parameters_to_context(audioCodecContext, out_audioStream->codecpar);
            res = ffmpeg.avcodec_open2(audioCodecContext, ocodec, null);
            if (res < 0)
            {
                die("avcodec_open2");
            }
            // resampling
            swrContext = ffmpeg.swr_alloc();
            ffmpeg.av_opt_set_channel_layout(swrContext, "in_channel_layout", (long)fileCodecContext->channel_layout, 0);
            ffmpeg.av_opt_set_channel_layout(swrContext, "out_channel_layout", (long)audioCodecContext->channel_layout, 0);
            ffmpeg.av_opt_set_int(swrContext, "in_sample_rate", fileCodecContext->sample_rate, 0);
            ffmpeg.av_opt_set_int(swrContext, "out_sample_rate", audioCodecContext->sample_rate, 0);
            ffmpeg.av_opt_set_sample_fmt(swrContext, "in_sample_fmt", fileCodecContext->sample_fmt, 0);
            ffmpeg.av_opt_set_sample_fmt(swrContext, "out_sample_fmt", audioCodecContext->sample_fmt, 0);
            res = ffmpeg.swr_init(swrContext);
            if (res < 0)
            {
                die("swr_init");
            }

            AVFrame* audioFrameDecoded = ffmpeg.av_frame_alloc();
            if (audioFrameDecoded == null)
            {
                die("Could not allocate audio frame");
            }

            audioFrameDecoded->format = (int)fileCodecContext->sample_fmt;
            audioFrameDecoded->channel_layout = fileCodecContext->channel_layout;
            audioFrameDecoded->channels = fileCodecContext->channels;
            audioFrameDecoded->sample_rate = fileCodecContext->sample_rate;

            AVFrame* audioFrameConverted = ffmpeg.av_frame_alloc();
            if (audioFrameConverted == null)
            {
                die("Could not allocate audio frame");
            }

            audioFrameConverted->nb_samples = audioCodecContext->frame_size;
            audioFrameConverted->format = (int)audioCodecContext->sample_fmt;
            audioFrameConverted->channel_layout = audioCodecContext->channel_layout;
            audioFrameConverted->channels = audioCodecContext->channels;
            audioFrameConverted->sample_rate = audioCodecContext->sample_rate;
            if (audioFrameConverted->nb_samples <= 0)
            {
                audioFrameConverted->nb_samples = 32;
            }

            AVPacket inPacket;
            ffmpeg.av_init_packet(&inPacket);
            inPacket.data = null;
            inPacket.size = 0;

            int frameFinished = 0;


            for (; ; )
            {
                if (ffmpeg.av_read_frame(formatContext, &inPacket) < 0)
                {
                    break;
                }

                if (inPacket.stream_index == streamId)
                {
                    int len = Decode(fileCodecContext, audioFrameDecoded, ref frameFinished, &inPacket);
                    if (len == ffmpeg.AVERROR_EOF)
                    {
                        break;
                    }

                    if (frameFinished != 0)
                    {

                        // Convert

                        byte* convertedData = null;

                        if (ffmpeg.av_samples_alloc(&convertedData,
                                     null,
                                     audioCodecContext->channels,
                                     audioFrameConverted->nb_samples,
                                     audioCodecContext->sample_fmt, 0) < 0)
                        {
                            die("Could not allocate samples");
                        }

                        int outSamples = 0;
                        fixed (byte** tmp = (byte*[])audioFrameDecoded->data)
                        {
                            outSamples = ffmpeg.swr_convert(swrContext, null, 0,
                                         //&convertedData,
                                         //audioFrameConverted->nb_samples,
                                         tmp,
                                 audioFrameDecoded->nb_samples);
                        }
                        if (outSamples < 0)
                        {
                            die("Could not convert");
                        }

                        for (; ; )
                        {
                            outSamples = ffmpeg.swr_get_out_samples(swrContext, 0);
                            if ((outSamples < audioCodecContext->frame_size * audioCodecContext->channels) || audioCodecContext->frame_size == 0 && (outSamples < audioFrameConverted->nb_samples * audioCodecContext->channels))
                            {
                                break; // see comments, thanks to @dajuric for fixing this
                            }

                            outSamples = ffmpeg.swr_convert(swrContext,
                                                     &convertedData,
                                                     audioFrameConverted->nb_samples, null, 0);

                            int buffer_size = ffmpeg.av_samples_get_buffer_size(null,
                                           audioCodecContext->channels,
                                           audioFrameConverted->nb_samples,
                                           audioCodecContext->sample_fmt,
                                           0);
                            if (buffer_size < 0)
                            {
                                die("Invalid buffer size");
                            }

                            if (ffmpeg.avcodec_fill_audio_frame(audioFrameConverted,
                                     audioCodecContext->channels,
                                     audioCodecContext->sample_fmt,
                                     convertedData,
                                     buffer_size,
                                     0) < 0)
                            {
                                die("Could not fill frame");
                            }

                            AVPacket outPacket;
                            ffmpeg.av_init_packet(&outPacket);
                            outPacket.data = null;
                            outPacket.size = 0;
                            if (Encode(audioCodecContext, &outPacket, audioFrameConverted, ref frameFinished) < 0)
                            {
                                die("Error encoding audio frame");
                            }


                            //outPacket.flags |= ffmpeg.AV_PKT_FLAG_KEY;
                            outPacket.stream_index = out_audioStream->index;
                            //outPacket.data = audio_outbuf;
                            outPacket.dts = audioFrameDecoded->pkt_dts;
                            outPacket.pts = audioFrameDecoded->pkt_pts;
                            ffmpeg.av_packet_rescale_ts(&outPacket, in_audioStream->time_base, out_audioStream->time_base);

                            if (frameFinished != 0)
                            {


                                if (ffmpeg.av_interleaved_write_frame(outContext, &outPacket) != 0)
                                {
                                    die("Error while writing audio frame");
                                }

                                ffmpeg.av_packet_unref(&outPacket);
                            }
                        }
                    }
                }
            }
            EncodeFlush(audioCodecContext);
            DecodeFlush(fileCodecContext, &inPacket);

            ffmpeg.swr_close(swrContext);
            ffmpeg.swr_free(&swrContext);
            ffmpeg.av_frame_free(&audioFrameConverted);
            ffmpeg.av_frame_free(&audioFrameDecoded);
            ffmpeg.av_packet_unref(&inPacket);
            ffmpeg.av_write_trailer(outContext);
            ffmpeg.avio_close(outContext->pb);
            ffmpeg.avcodec_close(fileCodecContext);
            ffmpeg.avcodec_free_context(&fileCodecContext);
            ffmpeg.avformat_close_input(&formatContext);
            return;
        }
    }
}

FFmpeg.swr_convert：原始 16 位 pcm 音频，与 xna SoundEffect 一起使用。转换时音频中断

FFmpeg.swr_convert: audio to raw 16 bit pcm, to be used with xna SoundEffect. Audio cuts out when i convert

c#

ffmpeg

monogame

soundeffect