使用 AVFoundation 合并剪辑创建黑色的单个视频

Merging clips with AVFoundation creates single video in black

我正在使用 AVFoundation 将两个视频合并为一个。我尝试的结果是单个视频的长度等于所有剪辑的总和,并显示黑屏。

这是我的代码:

        public void mergeclips()
        {
            AVMutableComposition mixComposition = new AVMutableComposition();
            CMTime previous_asset_duration = CMTime.Zero;
            CMTime AllAssetDurations = CMTime.Zero;
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];
            

            foreach (string clip in Clips)
            {
                #region HoldVideoTrack
                AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));

                AVMutableCompositionTrack Track = mixComposition.AddMutableTrack(AVMediaType.Video, 0);

                CMTimeRange range = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = asset.Duration
                };

                AVAssetTrack track = asset.TracksWithMediaType(AVMediaType.Video)[0];
                Track.InsertTimeRange(range, track, previous_asset_duration, out NSError error);
                #endregion

                #region Instructions
                // 7
                var Instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(Track);

                Instruction.SetOpacity(0, asset.Duration);

                // 8
                Instruction_Array[Clips.IndexOf(clip)] = Instruction;
                #endregion

                previous_asset_duration = asset.Duration;
                AllAssetDurations = asset.Duration;
            }

            // 6
            var mainInstruction = new List<AVMutableVideoCompositionInstruction>();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = AllAssetDurations
            };

            mainInstruction[0].TimeRange = rangeIns;
            
            mainInstruction[0].LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition();
            mainComposition.Instructions = mainInstruction.ToArray();
            mainComposition.FrameDuration = new CMTime(1, 30);
            mainComposition.RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height);


            //... export video ...

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                //APP crashes here
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

        private static void _OnExportDone()
        {
            var library = new ALAssetsLibrary();
            library.WriteVideoToSavedPhotosAlbum(NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")), (path, e2) =>
            {
                if (e2 != null)
                {
                    new UIAlertView("Error", e2.ToString(), null, "OK", null).Show();
                }
                else
                {
                }
            });
        }

编辑: 我添加了更多代码,具体来说,我向 AVAssetExportSession 添加了“ShouldOptimizeForNetworkUse”和 VideoCompositions。 我正在使用 List 而不是 AVMutableVideoCompositionInstruction,因为 AVMutableVideoComposition.Instructions 需要类型为 AVVideoCompositionInstructions[] 的 class。 使用之前的代码,应用程序在以下行崩溃“VideoComposition = mainComposition”

编辑: 在对指令进行转换并进行 Shawn 指出的更正后,我可以合并 2 个或更多视频并将通用视频保存到一个文件中。不幸的是,根本问题仍然存在,最终视频仅显示 AVMutableVideoCompositionInstruction 的 backgroundColor,而不是我们预期的所有剪辑。这些视频的音频也被忽略了,我不知道这个是否必须分开添加,但知道它可能也会有所帮助。

这是我的代码:

        public void mergeclips()
        {
            AVMutableComposition mixComposition = new AVMutableComposition();
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];

            foreach (string clip in Clips)
            {
                #region HoldVideoTrack
                AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));

                AVMutableCompositionTrack Track = mixComposition.AddMutableTrack(AVMediaType.Video, 0);

                CMTimeRange range = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = asset.Duration
                };

                AVAssetTrack track = asset.TracksWithMediaType(AVMediaType.Video)[0];
                Track.InsertTimeRange(range, track, mixComposition.Duration, out NSError error);
                #endregion

                #region Instructions
                Instruction_Array[Clips.IndexOf(clip)] = SetInstruction(asset, mixComposition.Duration, Track);
                #endregion
            }

            // 6
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(1f, 1f, 1f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height)
            };

            //... export video ...

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

        private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVMutableCompositionTrack assetTrack)
        {
            var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(assetTrack);

            var transform = assetTrack.PreferredTransform;
            var transformSize = assetTrack.NaturalSize; //for export session
            var newAssetSize = new CoreGraphics.CGSize(transformSize.Width, transformSize.Height); // for export session

            if (newAssetSize.Width > newAssetSize.Height)//portrait
            {
                //Starting here, all newassetsize have its height and width inverted, height should be width and vice versa 
                var scaleRatio = UIScreen.MainScreen.Bounds.Height / newAssetSize.Width;
                var _coreGraphic = new CoreGraphics.CGAffineTransform(0, 0, 0, 0, 0, 0);
                _coreGraphic.Scale(scaleRatio, scaleRatio);
                var tx = UIScreen.MainScreen.Bounds.Width / 2 - newAssetSize.Height * scaleRatio / 2;
                var ty = UIScreen.MainScreen.Bounds.Height / 2 - newAssetSize.Width * scaleRatio / 2;
                _coreGraphic.Translate(tx, ty);

                instruction.SetTransform(_coreGraphic, currentTime);

            }

            var endTime = CMTime.Add(currentTime, asset.Duration);
            instruction.SetOpacity(0, endTime);


            return instruction;
        }

编辑: 由于 Shawn 的帮助,代码中的几个错误已得到纠正。问题依旧(生成的视频没有图像)

这是我的代码:

        public void mergeclips()
        {
            //microphone
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);

            AVMutableComposition mixComposition = new AVMutableComposition();
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];

            foreach (string clip in Clips)
            {
                #region HoldVideoTrack

                AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));

                CMTimeRange range = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = asset.Duration
                };

                AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVAssetTrack assetVideoTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
                videoTrack.InsertTimeRange(range, assetVideoTrack, mixComposition.Duration, out NSError error);

                if (microphone != null)
                {
                    AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                    AVAssetTrack assetAudioTrack = asset.TracksWithMediaType(AVMediaType.Audio)[0];
                    audioTrack.InsertTimeRange(range, assetAudioTrack, mixComposition.Duration, out NSError error2);
                }
                #endregion


                #region Instructions
                Instruction_Array[Clips.IndexOf(clip)] = SetInstruction(asset, mixComposition.Duration, videoTrack);
                #endregion
            }

            // 6
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(1f, 1f, 1f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height)
            };

            //... export video ...

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

        private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVMutableCompositionTrack mixComposition_video_Track)
        {
            //The following code triggers when a device has no camera or no microphone (for instance an emulator)
            var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(mixComposition_video_Track);

            //Get the individual AVAsset's track to use for transform
            AVAssetTrack assetTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];

            //Set transform the the preferredTransform of the AVAssetTrack, not the AVMutableCompositionTrack
            CGAffineTransform transform = assetTrack.PreferredTransform;
            //Set the transformSize to be the asset natural size AFTER applying preferredTransform.
            CGSize transformSize = transform.TransformSize(assetTrack.NaturalSize);

            //Handle any negative values resulted from applying transform by using the absolute value
            CGSize newAssetSize = new CoreGraphics.CGSize(Math.Abs(transformSize.Width), Math.Abs(transformSize.Height));

            //change back to less than
            if (newAssetSize.Width < newAssetSize.Height)//portrait
            {
                /*newAssetSize should no longer be inverted since preferredTransform handles this. Remember that the asset was never 
                 * actually transformed yet. newAssetSize just represents the size the video is going to be after you call 
                 * instruction.setTransform(transform). Since transform is the first transform in concatenation, this is the size that 
                 * the scale and translate transforms will be using, which is why we needed to reference newAssetSize after applying 
                 * transform. Also you should concatenate in this order: transform -> scale -> translate, otherwise you won't get 
                 * desired results*/
                nfloat scaleRatio = UIScreen.MainScreen.Bounds.Height / newAssetSize.Height;

                //Apply scale to transform. Transform is never actually applied unless you do this.
                transform.Scale(scaleRatio, scaleRatio); 
                nfloat tx = UIScreen.MainScreen.Bounds.Width / 2 - newAssetSize.Width * scaleRatio / 2;
                nfloat ty = UIScreen.MainScreen.Bounds.Height / 2 - newAssetSize.Height * scaleRatio / 2;
                transform.Translate(tx, ty);

                instruction.SetTransform(transform, currentTime);
            }

            var endTime = CMTime.Add(currentTime, asset.Duration);
            instruction.SetOpacity(0, endTime);

            return instruction;
        }

您在 CMTime.zero 处插入每个时间范围,而不是在前一个资产的末尾处。另外,您在导出时是否使用了 videoComposition?

更新:很久以前我在应用程序中播放视频,所以我实际上并没有导出,但是当我第一次开始时,我先导出然后将导出的视频作为 AVAsset 传递到 AVPlayer。从那以后我改变了很多,所以我不会导出视频只是为了在应用程序中播放,因为它效率低下而且浪费时间,但我的代码在合并资产方面完美无缺。我在导出时让它工作,但从那时起我也改变了我的合并功能很多所以不能保证这将适用于导出会话。

func mergeVideos(mixComposition: Binding<AVMutableComposition>, videoComposition: Binding<AVMutableVideoComposition>, mainInstruction: Binding<AVMutableVideoCompositionInstruction>) -> AVPlayerItem {

    guard let documentDirectory = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first else {
        return AVPlayerItem(asset: mixComposition.wrappedValue)
    }
    
    //Remove all existing videos, tracks and instructions
    self.assets.removeAll()
    
    for track in mixComposition.wrappedValue.tracks {
        mixComposition.wrappedValue.removeTrack(track)
    }
    
    //Add all videos to asset array
    for video in videos {
        let url = documentDirectory.appendingPathComponent(video.videoURL)
            let asset = AVURLAsset(url: url, options: [AVURLAssetPreferPreciseDurationAndTimingKey : true])
            self.assets.append(asset)
    }
    
    //add instructions and assets to mixComposition
    assets.forEach { asset in
        self.addTrackToComposition(asset: asset, mixComposition: mixComposition, videoComposition: videoComposition, mainInstruction: mainInstruction)
    }//forEach
    
    //create playerITem with videoComposition
    videoComposition.wrappedValue.instructions = [mainInstruction.wrappedValue]
    videoComposition.wrappedValue.frameDuration = CMTimeMake(value: 1, timescale: 30)
    videoComposition.wrappedValue.renderSize = renderSize
    
    let item = AVPlayerItem(asset: mixComposition.wrappedValue)
    item.seekingWaitsForVideoCompositionRendering = true
    item.videoComposition = videoComposition.wrappedValue
    
    return item
}//mergeVideo

func addTrackToComposition(asset: AVAsset, mixComposition: Binding<AVMutableComposition>, videoComposition: Binding<AVMutableVideoComposition>, mainInstruction: Binding<AVMutableVideoCompositionInstruction>) {
    
    let currentTime = mixComposition.wrappedValue.duration
            
    guard let assetVideoTrack = mixComposition.wrappedValue.addMutableTrack(withMediaType: .video, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else {return}
    
    guard let assetAudioTrack = mixComposition.wrappedValue.addMutableTrack(withMediaType: .audio, preferredTrackID: Int32(kCMPersistentTrackID_Invalid)) else {return}
    
    do {
        let timeRange = CMTimeRangeMake(start: .zero, duration: asset.duration)
        // Insert video to Mutable Composition at right time.
        try assetVideoTrack.insertTimeRange(timeRange, of: asset.tracks(withMediaType: .video)[0], at: currentTime)
        try assetAudioTrack.insertTimeRange(timeRange, of: asset.tracks(withMediaType: .audio)[0], at: currentTime)
        
        let videoInstruction = videoCompositionInstruction(track: assetVideoTrack, asset: asset, currentTime: currentTime)
        
        mainInstruction.wrappedValue.layerInstructions.append(videoInstruction)
        mainInstruction.wrappedValue.timeRange = CMTimeRange(start: .zero, duration: mixComposition.wrappedValue.duration)
    } catch let error {
        print(error.localizedDescription)
    }

}//addTrackToComposition

func videoCompositionInstruction(track: AVCompositionTrack, asset: AVAsset, currentTime: CMTime) -> AVMutableVideoCompositionLayerInstruction {
    let instruction = AVMutableVideoCompositionLayerInstruction(assetTrack: track)
    guard let assetTrack = asset.tracks(withMediaType: .video).first else { return instruction }

    let transform = assetTrack.preferredTransform
    let transformSize = assetTrack.naturalSize.applying(transform) //for export session
    let newAssetSize = CGSize(width: abs(transformSize.width), height: abs(transformSize.height)) // for export session
    
    if newAssetSize.width < newAssetSize.height { //portrait

        let scaleRatio = renderSize.height / newAssetSize.height
        let scale = CGAffineTransform(scaleX: scaleRatio, y: scaleRatio)

        let tx = renderSize.width / 2 - newAssetSize.width * scaleRatio / 2
        let ty = renderSize.height / 2 - newAssetSize.height * scaleRatio / 2
        let translate = CGAffineTransform(translationX: tx, y: ty)

        let concatenation = transform.concatenating(scale).concatenating(translate)

        instruction.setTransform(concatenation, at: currentTime)

    } else if newAssetSize.width > newAssetSize.height { //landscape

        let scaleRatio = renderSize.width / newAssetSize.width
        let scale = CGAffineTransform(scaleX: scaleRatio, y: scaleRatio)

        let tx = renderSize.width / 2 - newAssetSize.width * scaleRatio / 2
        let ty = renderSize.height / 2 - newAssetSize.height * scaleRatio / 2
        let translate = CGAffineTransform(translationX: tx, y: ty)

        let concatenation = transform.concatenating(scale).concatenating(translate)

        instruction.setTransform(concatenation, at: currentTime)

    } else if newAssetSize.width == newAssetSize.height {
        //if landscape, fill height first, if portrait fill width first, if square doesnt matter just scale either width or height
        if renderSize.width > renderSize.height { //landscape
            let scaleRatio = renderSize.height / newAssetSize.height
            let scale = CGAffineTransform(scaleX: scaleRatio, y: scaleRatio)

            let tx = renderSize.width / 2 - newAssetSize.width * scaleRatio / 2
            let ty = renderSize.height / 2 - newAssetSize.height * scaleRatio / 2
            let translate = CGAffineTransform(translationX: tx, y: ty)

            let concatenation = transform.concatenating(scale).concatenating(translate)

            instruction.setTransform(concatenation, at: currentTime)
        } else { //portrait and square
            let scaleRatio = renderSize.width / newAssetSize.width
            let scale = CGAffineTransform(scaleX: scaleRatio, y: scaleRatio)

            let tx = renderSize.width / 2 - newAssetSize.width * scaleRatio / 2
            let ty = renderSize.height / 2 - newAssetSize.height * scaleRatio / 2
            let translate = CGAffineTransform(translationX: tx, y: ty)

            let concatenation = transform.concatenating(scale).concatenating(translate)

            instruction.setTransform(concatenation, at: currentTime)
        }
    }
    
    let endTime = CMTimeAdd(currentTime, asset.duration)
    instruction.setOpacity(0, at: endTime)
    
    return instruction
}//videoCompositionInstruction

我将在这里简要解释一下我在做什么。

您无需为 AVMutableComposition、AVMutableVideoComposition 或 AVMutableVideoCompositionInstructions 传递绑定。我只对我的应用程序中的某些功能这样做。您可以在执行任何其他操作之前在函数中实例化所有这些。

我在 class 中有一个数组,它包含我所有的资产,所以这就是 self.assets 的意思。 “视频”引用了我用来存储用户从他们的照片库中选择的视频的最后路径组件的领域模型。您可能不需要删除所有现有的视频、曲目和说明,因为您没有传递对作品和说明的引用。我这样做是因为我在整个应用程序中对这些对象进行了更改。您也不需要使用任何 wrappedValues,因为它仅用于绑定。

填充资产数组后,我会遍历它并调用 addTrackToComposition 并传入每个资产。此函数将音频和视频轨道添加到每个资产的 mixComposition 中。然后在 do-catch 块内,它尝试将资产音频和视频轨道插入到您刚刚为 mixComposition 创建的空 mutableTracks 中。因此,mixComposition 将为每个资产(一个音频和一个视频)提供 2 个轨道。我这样做是为了更好地控制我的指令并将不同的转换应用于每个资产,而不是整个 mixComposition 作为一个整体。您也可以只在 for 循环之外为 mixComposition 创建空的 mutableTracks,然后将资产的轨道插入该轨道(实际上是两个轨道 - audio/video)。我知道这听起来很混乱,尝试慢慢分解它。需要注意的是,在我的 do-catch 块中,我传递的 timeRange 是资产时间范围,但我将其插入到 mixComposition 的末尾 (currentTime = mixComposition.duration)。这就是为什么 timeRange 从 kCMTimeZero (.zero) 开始,但我为 at: 参数传入了 currentTime。

然后我使用一个函数为每个资产创建图层指令。这会缩放和定位每个资产,以便它在我的自定义视频播放器中正确显示。它还将资产末尾的不透明度设置为 0。这里我的 renderSize 在我的领域模型中声明并且是一个 CGSize(width: 1280, height: 720)。现在,我不确定这些转换是否适用于您的用例,但我知道您肯定需要转换,否则您的资产将以错误的方向导出 and/or size/position。至少您需要设置资产 preferredTrackTransform。使用 AVAssetTrack 的 preferredTransform 而不是 AVCompositionTrack 的 preferredTransform 很重要。这会为您处理方向,但不会处理比例和位置。玩弄它。这段代码花了我 1-2 周的时间来弄清楚如何为我的用例工作。

然后我将层指令附加到 mainInstruction 并将 mainInstructions timeRange 设置为等于 mixCompositions timeRange。我不知道为什么我在 for 循环的每次迭代中都设置 timeRange,我绝对可以在添加所有指令和轨道之后这样做,所以它只发生一次而不是每次迭代。

最后,我将 videoCompositions 指令设置为仅包含 mainInstruction 的数组,并设置了帧速率和 renderSize。希望当您将其传递到导出会话时所有这些对您有用。

看看您尝试实现它的方式,我会说您不需要 layerInstructions 的数组。只需创建一个 AVMutableVideoCompositionInstruction 对象 (mainInstruction) 并将层指令附加到该对象。

您使用以前的资产持续时间也有问题。插入新资产的时间范围时,需要传入 mixCompositions duration。您正在做的是在之前的资产持续时间内插入,因此您最终会得到一堆重叠的资产。您想要在所有之前的资产持续时间合并后插入它,这将 mixCompositions 当前持续时间。

而且 mainInstruction 也不应该是一个列表。它应该只是一个 AVMutableVideoCompositionInstruction()。 AVMutableVideoCompositionInstruction 有一个 layerInstructions 属性,它是一个 layerInstructions 数组。您可以直接附加到此。 mainInstruction 不应超过一个。应该只有多个 layerInstructions。

请耐心等待。由于没有 AVFoundation 经验,我花了很长时间才弄清楚自己。老实说,我仍然不知道您当前的代码有什么问题,但我所知道的是这对我有用。希望它也对你有用。自从我几个月前启动这个应用程序以来,我可能已经更改了这个功能 20 次。

更新:所以你走在正确的道路上,但仍有一些问题可能是你问题的原因。

1.) 上次忘记提了,但是当我遇到同样的问题时,很多人告诉我必须单独处理音轨。显然,如果不这样做,视频也无法播放。我从未真正测试过这是否属实,但值得一试。您可以再次参考我的代码,看看我是如何处理音轨的。它与视频轨道本质上是一样的,但您不对其应用任何指令。

2.) 在你的指令函数中几乎没有问题。您的指令 属性 是正确的,但是您的 transform、transformSize 和 newAssetSize 不正确。当前您将变换设置为 assetTrack.preferredTransform。这其实就是mixComposition的transform,不过你要用的是原来的AVAsset的preferredTransform。

使用 assetTrack(mixComposition 的轨道)初始化指令后,您需要声明一个新的 属性 以获取 AVAsset 的轨道。请参考我的代码。我实际上使用名称“assetTrack”来执行此操作,所以不要与我们的变量名称混淆。您的“assetTrack”是我作为参数传入的“轨迹”。我的“assetTrack”是你需要添加的,但显然你可以使用任何你想要的名称。

所以在我们的设备上录制的视频有点奇怪。纵向录制的视频实际上是横向的。然而,每项资产都带有通知设备应如何显示的数据(即旋转视频,使其显示与录制时相同的方式)。这就是 preferredTransform 的意思。它会将资产转换为以正确的方向显示。这就是为什么您需要确保您使用的是每个单独资产的 preferredTransform,而不是您在代码中使用的 mixComposition 的 preferredTransform。 mixComposition 的 preferredTransform 只是一个单位矩阵,它实际上什么都不做。这就是为什么您的资产的自然大小是“倒置的”。它不是倒置的,这就是苹果存储所有视频和图片的方式。元数据处理 preferredTransform 中的正确方向,这将导致“正确”的宽度和高度。

现在您已经在转换 属性 中存储了正确的转换,您的转换大小 属性 需要反映这一点,但是您忘记了将“应用(转换)”添加到大小。这个很重要。您当前拥有的 transformSize 只是 naturalSize,而您想要的是将转换应用到资产后的大小(以便宽度和高度实际上反映视频的正确方向)。

所以现在 newAssetSize 旨在处理由 transformSize 产生的任何负值。因此,当您创建 newAssetSize 时,您需要确保您使用的是 transformSize.width 和 transformSize.height 的绝对值。这就是为什么我在我的代码中将它作为“abs(transformSize.width)”。这也很关键。

3.) 您从未将 preferredTransform 应用于视频,而是将比例变换应用于全 0 的矩阵,这永远不会起作用。至少您需要连接比例并转换为单位矩阵,尽管您实际上应该连接它们以进行转换。如果您不更改此部分,无论您做什么,您的视频都不会显示。您在零矩阵上连接的任何变换都不会产生任何效果,您仍然会得到一个 0 矩阵,这意味着您的视频根本不会显示。

尝试做出这些改变,尤其是指令功能的改变。我相信您在更改这些属性后还必须重做转换逻辑,因为看起来您试图补偿宽度和高度颠倒的事实。

您的代码应如下所示(请记住,我根本不熟悉 c#):

  public void mergeclips()
        {
            AVMutableComposition mixComposition = new AVMutableComposition();
            AVMutableVideoCompositionLayerInstruction[] Instruction_Array = new AVMutableVideoCompositionLayerInstruction[Clips.Count];

            foreach (string clip in Clips)
            {
                #region HoldVideoTrack
                AVAsset asset = AVAsset.FromUrl(NSUrl.FromFilename(clip));

                AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);

                CMTimeRange range = new CMTimeRange()
                {
                    Start = new CMTime(0, 0),
                    Duration = asset.Duration
                };

                AVAssetTrack assetVideoTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
                videoTrack.InsertTimeRange(range, assetVideoTrack, mixComposition.Duration, out NSError error);
                
                AVAssetTrack assetAudioTrack = asset.TracksWithMediaType(AVMediaType.Audio)[0];
                audioTrack.InsertTimeRange(range, assetAudioTrack, mixComposition.Duration, out NSError error);
                #endregion

                #region Instructions
                Instruction_Array[Clips.IndexOf(clip)] = SetInstruction(asset, mixComposition.Duration, videoTrack);
                #endregion
            }

            // 6
            var mainInstruction = new AVMutableVideoCompositionInstruction();

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };

            mainInstruction.BackgroundColor = UIColor.FromRGBA(1f, 1f, 1f, 1.000f).CGColor;
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;

            var mainComposition = new AVMutableVideoComposition()
            {
                Instructions = new AVVideoCompositionInstruction[1] { mainInstruction },
                FrameDuration = new CMTime(1, 30),
                RenderSize = new CoreGraphics.CGSize(UIScreen.MainScreen.Bounds.Width, UIScreen.MainScreen.Bounds.Height)
            };

            //... export video ...

            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.MediumQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "temporaryClip/Whole.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

        private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVMutableCompositionTrack assetTrack)
        {
            var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(assetTrack);

    //Get the individual AVAsset's track to use for transform
            AVAssetTrack avAssetTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];

    //Set transform the the preferredTransform of the AVAssetTrack, not the AVMutableCompositionTrack
            var transform = avAssetTrack.PreferredTransform;

    //Set the transformSize to be the asset natural size AFTER applying preferredTransform.

            var transformSize = avAssetTrack.NaturalSize.applying(transform);

    //Handle any negative values resulted from applying transform by using the absolute value
            var newAssetSize = new CoreGraphics.CGSize(Abs(transformSize.Width), Abs(transformSize.Height)); // for export session
            //change back to less than
            if (newAssetSize.Width < newAssetSize.Height)//portrait
            {
                //newAssetSize should no longer be inverted since preferredTransform handles this. Remember that the asset was never actually transformed yet. newAssetSize just represents the size the video is going to be after you call instruction.setTransform(transform). Since transform is the first transform in concatenation, this is the size that the scale and translate transforms will be using, which is why we needed to reference newAssetSize after applying transform. Also you should concatenate in this order: transform -> scale -> translate, otherwise you won't get desired results

                var scaleRatio = UIScreen.MainScreen.Bounds.Height / newAssetSize.Height; //change back to height. Keep in mind that this scaleRatio will fill the height of the screen first and the width will probably exceed the screen bounds. I had it set like this because I was displaying my video in a view that is much smaller than the screen size. If you want to display the video centered on the phone screen, try using scaleRation = UIScreen.MainScreen.Bounds.Width / newAssetSize.Width. This will scale the video to fit the width of the screen perfectly and then the height will be whatever it is with respect to the videos aspect ratio.
                
                //Apply scale to transform. Transform is never actually applied unless you do this.
                var _coreGraphic = transform.Scale(scaleRatio, scaleRatio);
                var tx = UIScreen.MainScreen.Bounds.Width / 2 - newAssetSize.Height * scaleRatio / 2;
                var ty = UIScreen.MainScreen.Bounds.Height / 2 - newAssetSize.Width * scaleRatio / 2;
                _coreGraphic.Translate(tx, ty);

                instruction.SetTransform(_coreGraphic, currentTime);

            }

            var endTime = CMTime.Add(currentTime, asset.Duration);
            instruction.SetOpacity(0, endTime);


            return instruction;
        }
 

好的,感谢 Shawn 的帮助,我已经完成了我想做的事情。 我的代码中有 2 个主要错误导致了这个问题,第一个是如何设置给 VideoTrack 的 CMTime 的 属性:Start = new CMTime(0,0), 而不是 Start = new CMTime.Zero,。我仍然不知道它有什么区别,但它阻止了代码显示每个资产的视频和音频,留下一个视频,其中包含所有剪辑的总长度和 AVMutableVideoCompositionInstruction 的背景。 第二个错误是我如何设置指令,可以在以下代码中找到适合我的配置。

这是正确运行的最终函数:

public void MergeClips()
        {
            //microphone
            AVCaptureDevice microphone = AVCaptureDevice.DefaultDeviceWithMediaType(AVMediaType.Audio);

            AVMutableComposition mixComposition = AVMutableComposition.Create();
            AVVideoCompositionLayerInstruction[] Instruction_Array = new AVVideoCompositionLayerInstruction[Clips.Count];

            foreach (string clip in Clips)
            {
                var asset = AVUrlAsset.FromUrl(new NSUrl(clip, false)) as AVUrlAsset;
                #region HoldVideoTrack

                //This range applies to the video, not to the mixcomposition
                CMTimeRange range = new CMTimeRange()
                {
                    Start = CMTime.Zero,
                    Duration = asset.Duration
                };

                var duration = mixComposition.Duration;
                NSError error;

                AVMutableCompositionTrack videoTrack = mixComposition.AddMutableTrack(AVMediaType.Video, 0);
                AVAssetTrack assetVideoTrack = asset.TracksWithMediaType(AVMediaType.Video)[0];
                videoTrack.InsertTimeRange(range, assetVideoTrack, duration, out error);
                videoTrack.PreferredTransform = assetVideoTrack.PreferredTransform;

                if (microphone != null)
                {
                    AVMutableCompositionTrack audioTrack = mixComposition.AddMutableTrack(AVMediaType.Audio, 0);
                    AVAssetTrack assetAudioTrack = asset.TracksWithMediaType(AVMediaType.Audio)[0];
                    audioTrack.InsertTimeRange(range, assetAudioTrack, duration, out error);
                }
                #endregion

                #region Instructions
                int counter = Clips.IndexOf(clip);
                Instruction_Array[counter] = SetInstruction(asset, mixComposition.Duration, videoTrack);
                #endregion
            }

            // 6
            AVMutableVideoCompositionInstruction mainInstruction = AVMutableVideoCompositionInstruction.Create() as AVMutableVideoCompositionInstruction;

            CMTimeRange rangeIns = new CMTimeRange()
            {
                Start = new CMTime(0, 0),
                Duration = mixComposition.Duration
            };
            mainInstruction.TimeRange = rangeIns;
            mainInstruction.LayerInstructions = Instruction_Array;
            
            var mainComposition = AVMutableVideoComposition.Create();
            mainComposition.Instructions = new AVVideoCompositionInstruction[1] { mainInstruction };
            mainComposition.FrameDuration = new CMTime(1, 30);
            mainComposition.RenderSize = new CGSize(mixComposition.NaturalSize.Height, mixComposition.NaturalSize.Width);
            
            finalVideo_path = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "Whole2.mov"));
            if (File.Exists(Path.GetTempPath() + "Whole2.mov"))
            {
                File.Delete(Path.GetTempPath() + "Whole2.mov");
            }

            //... export video ...
            AVAssetExportSession exportSession = new AVAssetExportSession(mixComposition, AVAssetExportSessionPreset.HighestQuality)
            {
                OutputUrl = NSUrl.FromFilename(Path.Combine(Path.GetTempPath(), "Whole2.mov")),
                OutputFileType = AVFileType.QuickTimeMovie,
                ShouldOptimizeForNetworkUse = true,
                VideoComposition = mainComposition
            };
            exportSession.ExportAsynchronously(_OnExportDone);
        }

        private AVMutableVideoCompositionLayerInstruction SetInstruction(AVAsset asset, CMTime currentTime, AVAssetTrack mixComposition_video_Track)
        {
            var instruction = AVMutableVideoCompositionLayerInstruction.FromAssetTrack(mixComposition_video_Track);

            var startTime = CMTime.Subtract(currentTime, asset.Duration);

            //NaturalSize.Height is passed as a width parameter because IOS stores the video recording horizontally 
            CGAffineTransform translateToCenter = CGAffineTransform.MakeTranslation(mixComposition_video_Track.NaturalSize.Height, 0);
            //Angle in radiants, not in degrees
            CGAffineTransform rotate = CGAffineTransform.Rotate(translateToCenter, (nfloat)(Math.PI / 2));

            instruction.SetTransform(rotate, (CMTime.Subtract(currentTime, asset.Duration)));

            instruction.SetOpacity(1, startTime);
            instruction.SetOpacity(0, currentTime);

            return instruction;
        }

正如我所说,多亏了 Shawn 的帮助,我解决了我的问题,并且这段代码的大部分是从他的回答中翻译成 C# 的,所以,如果您打算投票赞成这个答案,请投票赞成 Shawn 的答案,或者两者。