使用 AVAssetWriter 录制无缝音频

Recording gapless audio with AVAssetWriter

我正在尝试录制音频片段并在不产生音频间隙的情况下重新组合它们。

最终目标是也有视频,但我发现音频本身在与 ffmpeg -f concat -i list.txt -c copy out.mp4

结合时会产生差距

如果我把音频放在一个HLS播放列表中,也会有间隙,所以我认为这不是ffmpeg独有的。

我的想法是样本不断地进入,我的控制器将样本路由到正确的 AVAssetWriter。如何消除音频中的间隙?

import Foundation
import UIKit
import AVFoundation

class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
    var closingAudioInput: AVAssetWriterInput?
    var closingAssetWriter: AVAssetWriter?

    var currentAudioInput: AVAssetWriterInput?
    var currentAssetWriter: AVAssetWriter?

    var nextAudioInput: AVAssetWriterInput?
    var nextAssetWriter: AVAssetWriter?

    var videoHelper: VideoHelper?

    var startTime: NSTimeInterval = 0
    let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);

    override func viewDidLoad() {
        super.viewDidLoad()
        startTime = NSDate().timeIntervalSince1970
        createSegmentWriter()
        videoHelper = VideoHelper()
        videoHelper!.delegate = self
        videoHelper!.startSession()
        NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
    }

    func createSegmentWriter() {
        print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
        let outputPath = OutputFileNameHelper.instance.pathForOutput()
        OutputFileNameHelper.instance.incrementSegmentIndex()
        try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
        nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
        nextAssetWriter!.shouldOptimizeForNetworkUse = true

        let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
        nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
        nextAudioInput!.expectsMediaDataInRealTime = true
        nextAssetWriter?.addInput(nextAudioInput!)

        nextAssetWriter!.startWriting()
    }

    func closeWriterIfNecessary() {
        if closing && audioFinished {
            closing = false
            audioFinished = false
            let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
            closingAssetWriter?.finishWritingWithCompletionHandler() {
                let delta = NSDate().timeIntervalSince1970 - self.startTime
                print("segment \(outputFile!) finished at t=\(delta)")
            }
            self.closingAudioInput = nil
            self.closingAssetWriter = nil
        }
    }

    var audioFinished = false
    var closing = false

    func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
        if let nextWriter = nextAssetWriter {
            if nextWriter.status.rawValue != 0 {
                if (currentAssetWriter != nil) {
                    closing = true
                }

                var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
                CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)

                print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
                closingAssetWriter = currentAssetWriter
                closingAudioInput = currentAudioInput

                currentAssetWriter = nextAssetWriter
                currentAudioInput = nextAudioInput

                nextAssetWriter = nil
                nextAudioInput = nil

                currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
            }
        }

        if let _ = captureOutput as? AVCaptureVideoDataOutput {
        } else if let _ = captureOutput as? AVCaptureAudioDataOutput {
            captureAudioSample(sampleBuffer)
        }

        dispatch_async(closeAssetQueue) {
            self.closeWriterIfNecessary()
        }
    }

    func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
        var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
        CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
        let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
        print("\(prefix):\(presentationTime)")
    }

    func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
        printTimingInfo(sampleBuffer, prefix: "A")
        if (closing && !audioFinished) {
            if closingAudioInput?.readyForMoreMediaData == true {
                closingAudioInput?.appendSampleBuffer(sampleBuffer)
            }
            closingAudioInput?.markAsFinished()
            audioFinished = true
        } else {
            if currentAudioInput?.readyForMoreMediaData == true {
                currentAudioInput?.appendSampleBuffer(sampleBuffer)
            }
        }
    }
}

不确定这是否对您有帮助,但如果您有一堆 MP4,您可以使用此代码将它们组合起来:

func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {

    // Create the audio composition
    let composition = AVMutableComposition()

    // Merge
    for (var i = 0; i < audioFileUrls.count; i++) {

        let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())

        let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)

        let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]

        let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)

        try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
    }

    // Create output url
    let format = NSDateFormatter()
    format.dateFormat="yyyy-MM-dd-HH-mm-ss"
    let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
    print(currentFileName)

    let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
    let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
    print(outputUrl.absoluteString)

    // Export it
    let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
    assetExport?.outputFileType = AVFileTypeAppleM4A
    assetExport?.outputURL = outputUrl

    assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
        switch assetExport!.status {
            case AVAssetExportSessionStatus.Failed:
                callback(url: nil, error: assetExport?.error)
            default:
                callback(url: assetExport?.outputURL, error: nil)
        }
    })

}

对于像 AAC 这样的数据包格式,您在开头有静默启动帧(a.k.a 编码器延迟),在结尾有剩余帧(当您的音频长度不是数据包大小的倍数时)。在您的情况下,每个文件的开头有 2112 个。 Priming 和 remainder 帧打破了在不对文件进行转码的情况下连接文件的可能性,因此你不能真正责怪 ffmpeg -c copy 没有产生无缝输出。

我不确定这会给您留下什么视频 - 显然音频与视频同步,即使存在启动帧也是如此。

这完全取决于您打算如何连接最终音频(以及最终视频)。如果您使用 AVFoundation 自己进行操作,那么您可以使用

检测并计算 priming/remainder 帧
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL) 
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL) 

作为短期解决方案,您可以切换到非 "packetised" 以获得无缝、可连接(使用 ffmpeg)文件。

例如

AVFormatIDKey: kAudioFormatAppleIMA4fileType: AVFileTypeAIFC、后缀“.aifc”或 AVFormatIDKey: kAudioFormatLinearPCMfileType: AVFileTypeWAVE、后缀“.wav”

p.s。您可以使用无处不在的 afinfo 工具查看启动帧和剩余帧以及数据包大小。

afinfo chunk.mp4

Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...