pausing/resuming时实时AVAssetWriter同步音视频

Real-time AVAssetWriter synchronise audio and video when pausing/resuming

我正在尝试使用 iPhone 的前置摄像头录制有声视频。由于我还需要支持 pause/resume 功能,因此我需要使用 AVAssetWriter。我在网上找了个例子,用Objective-C写的,差不多实现了想要的功能(http://www.gdcl.co.uk/2013/02/20/iPhone-Pause.html)

不幸的是,在将此示例转换为 Swift 之后,我注意到如果我 pause/resume,在每个 "section" 的末尾都有一小段但很明显的时间段,在此期间视频只是一个静止帧,音频正在播放。所以,似乎触发isPaused时,录制的音轨比录制的视频轨长。

抱歉,如果这看起来像是一个菜鸟问题,但我不是 AVFoundation 方面的专家,如果能提供一些帮助,我们将不胜感激!

下面是我post我对didOutput sampleBuffer的实现。

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    var isVideo = true
    if videoConntection != connection {
        isVideo = false
    }
    if (!isCapturing || isPaused) {
        return
    }

    if (encoder == nil) {
        if isVideo {
            return
        }
        if let fmt = CMSampleBufferGetFormatDescription(sampleBuffer) {
            let desc = CMAudioFormatDescriptionGetStreamBasicDescription(fmt as CMAudioFormatDescription)
            if let chan = desc?.pointee.mChannelsPerFrame, let rate = desc?.pointee.mSampleRate {
                let path = tempPath()!
                encoder = VideoEncoder(path: path, height: Int(cameraSize.height), width: Int(cameraSize.width), channels: chan, rate: rate)
            }
        }
    }
    if discont {
        if isVideo {
            return
        }
        discont = false
        var pts = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
        let last = lastAudio
        if last.flags.contains(CMTimeFlags.valid) {
            if cmOffset.flags.contains(CMTimeFlags.valid) {
                pts = CMTimeSubtract(pts, cmOffset)
            }
            let off = CMTimeSubtract(pts, last)
            print("setting offset from \(isVideo ? "video":"audio")")
            print("adding \(CMTimeGetSeconds(off)) to \(CMTimeGetSeconds(cmOffset)) (pts \(CMTimeGetSeconds(cmOffset)))")
            if cmOffset.value == 0 {
                cmOffset = off
            }
            else {
                cmOffset = CMTimeAdd(cmOffset, off)
            }
        }
        lastVideo.flags = []
        lastAudio.flags = []
        return
    }
    var out:CMSampleBuffer?
    if cmOffset.value > 0 {
        var count:CMItemCount = CMSampleBufferGetNumSamples(sampleBuffer)
        let pInfo = UnsafeMutablePointer<CMSampleTimingInfo>.allocate(capacity: count)
        CMSampleBufferGetSampleTimingInfoArray(sampleBuffer, entryCount: count, arrayToFill: pInfo, entriesNeededOut: &count)
        var i = 0
        while i<count {
            pInfo[i].decodeTimeStamp = CMTimeSubtract(pInfo[i].decodeTimeStamp, cmOffset)
            pInfo[i].presentationTimeStamp = CMTimeSubtract(pInfo[i].presentationTimeStamp, cmOffset)
            i+=1
        }
        CMSampleBufferCreateCopyWithNewTiming(allocator: nil, sampleBuffer: sampleBuffer, sampleTimingEntryCount: count, sampleTimingArray: pInfo, sampleBufferOut: &out)
    }
    else {
        out = sampleBuffer
    }
    var pts = CMSampleBufferGetPresentationTimeStamp(out!)
    let dur = CMSampleBufferGetDuration(out!)
    if (dur.value > 0)
    {
        pts = CMTimeAdd(pts, dur);
    }
    if (isVideo) {
        lastVideo = pts;
    }
    else {
        lastAudio = pts;
    }
    encoder?.encodeFrame(sampleBuffer: out!, isVideo: isVideo)
}

这是我的 VideoEncoder class:

final class VideoEncoder {
    var writer:AVAssetWriter
    var videoInput:AVAssetWriterInput
    var audioInput:AVAssetWriterInput
    var path:String

    init(path:String, height:Int, width:Int, channels:UInt32, rate:Float64) {
        self.path = path
        if FileManager.default.fileExists(atPath:path) {
            try? FileManager.default.removeItem(atPath: path)
        }
        let url = URL(fileURLWithPath: path)
        writer = try! AVAssetWriter(outputURL: url, fileType: .mp4)
        videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: [
            AVVideoCodecKey: AVVideoCodecType.h264,
            AVVideoWidthKey:height,
            AVVideoHeightKey:width
        ])
        videoInput.expectsMediaDataInRealTime = true
        writer.add(videoInput)

        audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: [
            AVFormatIDKey:kAudioFormatMPEG4AAC,
            AVNumberOfChannelsKey:channels,
            AVSampleRateKey:rate
        ])
        audioInput.expectsMediaDataInRealTime = true
        writer.add(audioInput)
    }

    func finish(with completionHandler:@escaping ()->Void) {
        writer.finishWriting(completionHandler: completionHandler)
    }

    func encodeFrame(sampleBuffer:CMSampleBuffer, isVideo:Bool) -> Bool {
        if CMSampleBufferDataIsReady(sampleBuffer) {
            if writer.status == .unknown {
                writer.startWriting()
                writer.startSession(atSourceTime: CMSampleBufferGetPresentationTimeStamp(sampleBuffer))
            }
            if writer.status == .failed {
                QFLogger.shared.addLog(format: "[ERROR initiating AVAssetWriter]", args: [], error: writer.error)
                return false
            }
            if isVideo {
                if videoInput.isReadyForMoreMediaData {
                    videoInput.append(sampleBuffer)
                    return true
                }
            }
            else {
                if audioInput.isReadyForMoreMediaData {
                    audioInput.append(sampleBuffer)
                    return true
                }
            }
        }
        return false
    }
}

其余的代码应该很明显,但为了使其完整,这里是我暂停的代码:

isPaused = true
discont = true

这是简历:

isPaused = false

如果有人能帮助我了解如何在这种现场录制过程中对齐视频和音频轨道,那就太好了!

好的,事实证明我提供的代码没有错误。我遇到的问题是由打开的视频平滑引起的:)我猜它需要额外的帧来平滑视频,这就是为什么视频输出在最后冻结了一小段时间。