使用 AVAssetWriter 录制无缝音频
Recording gapless audio with AVAssetWriter
我正在尝试录制音频片段并在不产生音频间隙的情况下重新组合它们。
最终目标是也有视频,但我发现音频本身在与 ffmpeg -f concat -i list.txt -c copy out.mp4
结合时会产生差距
如果我把音频放在一个HLS播放列表中,也会有间隙,所以我认为这不是ffmpeg独有的。
我的想法是样本不断地进入,我的控制器将样本路由到正确的 AVAssetWriter
。如何消除音频中的间隙?
import Foundation
import UIKit
import AVFoundation
class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
var closingAudioInput: AVAssetWriterInput?
var closingAssetWriter: AVAssetWriter?
var currentAudioInput: AVAssetWriterInput?
var currentAssetWriter: AVAssetWriter?
var nextAudioInput: AVAssetWriterInput?
var nextAssetWriter: AVAssetWriter?
var videoHelper: VideoHelper?
var startTime: NSTimeInterval = 0
let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);
override func viewDidLoad() {
super.viewDidLoad()
startTime = NSDate().timeIntervalSince1970
createSegmentWriter()
videoHelper = VideoHelper()
videoHelper!.delegate = self
videoHelper!.startSession()
NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
}
func createSegmentWriter() {
print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
let outputPath = OutputFileNameHelper.instance.pathForOutput()
OutputFileNameHelper.instance.incrementSegmentIndex()
try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
nextAssetWriter!.shouldOptimizeForNetworkUse = true
let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
nextAudioInput!.expectsMediaDataInRealTime = true
nextAssetWriter?.addInput(nextAudioInput!)
nextAssetWriter!.startWriting()
}
func closeWriterIfNecessary() {
if closing && audioFinished {
closing = false
audioFinished = false
let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
closingAssetWriter?.finishWritingWithCompletionHandler() {
let delta = NSDate().timeIntervalSince1970 - self.startTime
print("segment \(outputFile!) finished at t=\(delta)")
}
self.closingAudioInput = nil
self.closingAssetWriter = nil
}
}
var audioFinished = false
var closing = false
func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
if let nextWriter = nextAssetWriter {
if nextWriter.status.rawValue != 0 {
if (currentAssetWriter != nil) {
closing = true
}
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
closingAssetWriter = currentAssetWriter
closingAudioInput = currentAudioInput
currentAssetWriter = nextAssetWriter
currentAudioInput = nextAudioInput
nextAssetWriter = nil
nextAudioInput = nil
currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
}
}
if let _ = captureOutput as? AVCaptureVideoDataOutput {
} else if let _ = captureOutput as? AVCaptureAudioDataOutput {
captureAudioSample(sampleBuffer)
}
dispatch_async(closeAssetQueue) {
self.closeWriterIfNecessary()
}
}
func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
print("\(prefix):\(presentationTime)")
}
func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
printTimingInfo(sampleBuffer, prefix: "A")
if (closing && !audioFinished) {
if closingAudioInput?.readyForMoreMediaData == true {
closingAudioInput?.appendSampleBuffer(sampleBuffer)
}
closingAudioInput?.markAsFinished()
audioFinished = true
} else {
if currentAudioInput?.readyForMoreMediaData == true {
currentAudioInput?.appendSampleBuffer(sampleBuffer)
}
}
}
}
不确定这是否对您有帮助,但如果您有一堆 MP4,您可以使用此代码将它们组合起来:
func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {
// Create the audio composition
let composition = AVMutableComposition()
// Merge
for (var i = 0; i < audioFileUrls.count; i++) {
let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())
let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)
let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]
let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)
try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
}
// Create output url
let format = NSDateFormatter()
format.dateFormat="yyyy-MM-dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
print(outputUrl.absoluteString)
// Export it
let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
assetExport?.outputFileType = AVFileTypeAppleM4A
assetExport?.outputURL = outputUrl
assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
switch assetExport!.status {
case AVAssetExportSessionStatus.Failed:
callback(url: nil, error: assetExport?.error)
default:
callback(url: assetExport?.outputURL, error: nil)
}
})
}
对于像 AAC 这样的数据包格式,您在开头有静默启动帧(a.k.a 编码器延迟),在结尾有剩余帧(当您的音频长度不是数据包大小的倍数时)。在您的情况下,每个文件的开头有 2112 个。 Priming 和 remainder 帧打破了在不对文件进行转码的情况下连接文件的可能性,因此你不能真正责怪 ffmpeg -c copy
没有产生无缝输出。
我不确定这会给您留下什么视频 - 显然音频与视频同步,即使存在启动帧也是如此。
这完全取决于您打算如何连接最终音频(以及最终视频)。如果您使用 AVFoundation
自己进行操作,那么您可以使用
检测并计算 priming/remainder 帧
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL)
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)
作为短期解决方案,您可以切换到非 "packetised" 以获得无缝、可连接(使用 ffmpeg)文件。
例如
AVFormatIDKey: kAudioFormatAppleIMA4
、fileType: AVFileTypeAIFC
、后缀“.aifc”或
AVFormatIDKey: kAudioFormatLinearPCM
、fileType: AVFileTypeWAVE
、后缀“.wav”
p.s。您可以使用无处不在的 afinfo
工具查看启动帧和剩余帧以及数据包大小。
afinfo chunk.mp4
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...
我正在尝试录制音频片段并在不产生音频间隙的情况下重新组合它们。
最终目标是也有视频,但我发现音频本身在与 ffmpeg -f concat -i list.txt -c copy out.mp4
如果我把音频放在一个HLS播放列表中,也会有间隙,所以我认为这不是ffmpeg独有的。
我的想法是样本不断地进入,我的控制器将样本路由到正确的 AVAssetWriter
。如何消除音频中的间隙?
import Foundation
import UIKit
import AVFoundation
class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
var closingAudioInput: AVAssetWriterInput?
var closingAssetWriter: AVAssetWriter?
var currentAudioInput: AVAssetWriterInput?
var currentAssetWriter: AVAssetWriter?
var nextAudioInput: AVAssetWriterInput?
var nextAssetWriter: AVAssetWriter?
var videoHelper: VideoHelper?
var startTime: NSTimeInterval = 0
let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);
override func viewDidLoad() {
super.viewDidLoad()
startTime = NSDate().timeIntervalSince1970
createSegmentWriter()
videoHelper = VideoHelper()
videoHelper!.delegate = self
videoHelper!.startSession()
NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
}
func createSegmentWriter() {
print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
let outputPath = OutputFileNameHelper.instance.pathForOutput()
OutputFileNameHelper.instance.incrementSegmentIndex()
try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
nextAssetWriter!.shouldOptimizeForNetworkUse = true
let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
nextAudioInput!.expectsMediaDataInRealTime = true
nextAssetWriter?.addInput(nextAudioInput!)
nextAssetWriter!.startWriting()
}
func closeWriterIfNecessary() {
if closing && audioFinished {
closing = false
audioFinished = false
let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
closingAssetWriter?.finishWritingWithCompletionHandler() {
let delta = NSDate().timeIntervalSince1970 - self.startTime
print("segment \(outputFile!) finished at t=\(delta)")
}
self.closingAudioInput = nil
self.closingAssetWriter = nil
}
}
var audioFinished = false
var closing = false
func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
if let nextWriter = nextAssetWriter {
if nextWriter.status.rawValue != 0 {
if (currentAssetWriter != nil) {
closing = true
}
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
closingAssetWriter = currentAssetWriter
closingAudioInput = currentAudioInput
currentAssetWriter = nextAssetWriter
currentAudioInput = nextAudioInput
nextAssetWriter = nil
nextAudioInput = nil
currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
}
}
if let _ = captureOutput as? AVCaptureVideoDataOutput {
} else if let _ = captureOutput as? AVCaptureAudioDataOutput {
captureAudioSample(sampleBuffer)
}
dispatch_async(closeAssetQueue) {
self.closeWriterIfNecessary()
}
}
func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
print("\(prefix):\(presentationTime)")
}
func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
printTimingInfo(sampleBuffer, prefix: "A")
if (closing && !audioFinished) {
if closingAudioInput?.readyForMoreMediaData == true {
closingAudioInput?.appendSampleBuffer(sampleBuffer)
}
closingAudioInput?.markAsFinished()
audioFinished = true
} else {
if currentAudioInput?.readyForMoreMediaData == true {
currentAudioInput?.appendSampleBuffer(sampleBuffer)
}
}
}
}
不确定这是否对您有帮助,但如果您有一堆 MP4,您可以使用此代码将它们组合起来:
func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {
// Create the audio composition
let composition = AVMutableComposition()
// Merge
for (var i = 0; i < audioFileUrls.count; i++) {
let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())
let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)
let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]
let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)
try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
}
// Create output url
let format = NSDateFormatter()
format.dateFormat="yyyy-MM-dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
print(outputUrl.absoluteString)
// Export it
let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
assetExport?.outputFileType = AVFileTypeAppleM4A
assetExport?.outputURL = outputUrl
assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
switch assetExport!.status {
case AVAssetExportSessionStatus.Failed:
callback(url: nil, error: assetExport?.error)
default:
callback(url: assetExport?.outputURL, error: nil)
}
})
}
对于像 AAC 这样的数据包格式,您在开头有静默启动帧(a.k.a 编码器延迟),在结尾有剩余帧(当您的音频长度不是数据包大小的倍数时)。在您的情况下,每个文件的开头有 2112 个。 Priming 和 remainder 帧打破了在不对文件进行转码的情况下连接文件的可能性,因此你不能真正责怪 ffmpeg -c copy
没有产生无缝输出。
我不确定这会给您留下什么视频 - 显然音频与视频同步,即使存在启动帧也是如此。
这完全取决于您打算如何连接最终音频(以及最终视频)。如果您使用 AVFoundation
自己进行操作,那么您可以使用
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL)
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)
作为短期解决方案,您可以切换到非 "packetised" 以获得无缝、可连接(使用 ffmpeg)文件。
例如
AVFormatIDKey: kAudioFormatAppleIMA4
、fileType: AVFileTypeAIFC
、后缀“.aifc”或
AVFormatIDKey: kAudioFormatLinearPCM
、fileType: AVFileTypeWAVE
、后缀“.wav”
p.s。您可以使用无处不在的 afinfo
工具查看启动帧和剩余帧以及数据包大小。
afinfo chunk.mp4
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...