如何使用 AVFoundation 合并 1 个视频和 2 个或更多音频文件

How to merge 1 video and 2 or more audio files with AVFoundation

我正在尝试通过合并 1 个“.mov”文件和 2 个或更多“.caf”文件来创建视频,我的代码在合并 1 个“.mov”文件和 1 个“.caf”文件时工作正常,并且在将它与更多“.caf”文件合并时它不会崩溃也不会抛出任何错误,但是,在后一种情况下,唯一可以听到的音频是第一个。 我尝试过在不同时间添加音频,同时添加2个,但结果是一样的,只有1个音频。

如果我尝试合并视频的音频,同样的事情会发生,只能听到第一个音频(视频的音频)。

这是我的代码:

        public void mergeAudios()
        {
            //This funtion merges the final video with the new audio
            //microphone
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);

            AVMutableComposition mixComposition = new AVMutableComposition();

            #region HoldVideoTrack
            AVAsset video_asset = AVAsset.FromUrl(NSUrl.FromFilename(FinalVideo));

            CMTimeRange range;
            range = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = video_asset.Duration
            };

            AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
            AVAssetTrack assetVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Video)[0];
            videoTrack.InsertTimeRange(range, assetVideoTrack, mixComposition.Duration, out NSError error1);
            
            try
            {
                CMTime prevDur = CMTime.Zero;
                foreach (Audio _audio in TTS_list)
                {
                    AVAsset audio_asset = AVAsset.FromUrl(NSUrl.FromFilename(_audio.Path));
                    CMTime aaa = new CMTime((long)(_audio.Starting_Point * Convert.ToDouble(mixComposition.Duration.TimeScale)), mixComposition.Duration.TimeScale);

                    AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);

                    AVAssetTrack assetAudioTrack = audio_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                    audioTrack.InsertTimeRange(_audio.Range, assetAudioTrack, prevDur /*aaa*/, out NSError error3);
                    prevDur += _audio.Range.Duration;

                }
            }
            catch (Exception error) { }
            #endregion


            #region Instructions
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[1];
            Instruction_Array[0] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);
            #endregion

            // 6
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(0.63f, 0.84f, 0.82f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreenWidth, UIScreenHeight)
            };

            //... export video ...

            pathh = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole2.mov"));
            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole2.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

如果您需要更多信息,我会在看到您的请求后立即提供。谢谢大家的宝贵时间,祝你有美好的一天。

好的,我刚刚找到问题所在;基本上,在使用 AVMutableComposition 时必须遵循一条黄金法则(至少要合并多个音频),即:

1 audio = 1 video + 1 instruction

换句话说,对于每个音频,必须有 1 个视频和 1 个说​​明。遵循这条规则,我之前的代码结果如下:

public void mergeAudios()
        {
            //This funtion merges the final video with the new audio

            #region HoldVideoTrack
            AVAsset video_asset = AVAsset.FromUrl(NSUrl.FromFilename(FinalVideo));

            //This range applies to the video, not to the mixcomposition
            CMTimeRange range = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = video_asset.Duration
            };
            #endregion

            AVMutableComposition mixComposition = new AVMutableComposition();

            #region AddsVideo
            AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
            AVAssetTrack assetVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Video)[0];
            videoTrack.InsertTimeRange(range, assetVideoTrack, CMTime.Zero, out NSError error1);
            #endregion

            #region AddsVideo'sAudio
            //If the device can't use the microphone then the original video's audio will not exist
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);
            if (microphone != null)
            {
                AVMutableCompositionTrack audio_video_Track = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioVideoTrack = video_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audio_video_Track.InsertTimeRange(range, assetAudioVideoTrack, mixComposition.Duration, out NSError error2);
            }
            #endregion

            //[TTS_list.Count + 1]; +1 = original Video
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[TTS_list.Count + 1];
            //This instruction is for "FinalVideo"
            Instruction_Array[0] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);


            #region TestingEnviroment
            //We will use counter to specify the position in Instruction_Array, we start with 1 because we have already added 1 instruction for "FinalVideo"
            int counter = 1;
            foreach(Audio _audioo in TTS_list)
            {
                #region Video
                AVMutableCompositionTrack videoTrack_forAudio = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVAssetTrack assetVideoTrack_forAudio = video_asset.TracksWithMediaType(AVMediaType.Video)[0];

                //This range applies to the video, not to the mixcomposition, making its duration 0 and having no overall effect on the final video.
                //We have to declare 1 video for each audio in order to merge multiple audios. Doing it this way the videos have no effect, but the audios do
                CMTimeRange range0 = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(0, 600)
                };
                videoTrack_forAudio.InsertTimeRange(range0, assetVideoTrack_forAudio, mixComposition.Duration, out NSError error4);
                #endregion

                #region Audio
                AVAsset audio_asset = AVAsset.FromUrl(NSUrl.FromFilename(_audioo.Path));

                //This range applies to the video, not to the mixcomposition
                //We use _audio.Duration instead of audio_asset.Duration.Seconds because the audio's duration might be trimmed
                CMTimeRange audio_CMTime = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = CMTime.FromSeconds(_audioo.Duration, 600)
                };

                //This range applies to mixcomposition, not to the video
                var starting_CMTime = CMTime.FromSeconds(_audioo.Starting_Point, 600);

                AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                AVAssetTrack assetAudioTrack = audio_asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audioTrack.InsertTimeRange(audio_CMTime, assetAudioTrack, starting_CMTime, out NSError error5);
                #endregion

                #region Instruction
                Instruction_Array[counter] = SetInstruction(video_asset, mixComposition.Duration, videoTrack);
                counter += 1;
                #endregion
            }
            #endregion


            #region Instructions
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(0.63f, 0.84f, 0.82f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;
            #endregion

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreenWidth, UIScreenHeight)
            };

            finalVideo_path = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/FinalVideoEdit.mov"));
            if (File.Exists(Path.GetTempPath() + "temporaryClip/FinalVideoEdit.mov"))
            {
                File.Delete(Path.GetTempPath() + "temporaryClip/FinalVideoEdit.mov");
            }

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = finalVideo_path,
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }