使 CIContext.render(CIImage, CVPixelBuffer) 与 AVAssetWriter 一起工作

Making CIContext.render(CIImage, CVPixelBuffer) work with AVAssetWriter

我想在 macOS 上使用 Core Image 处理一堆 CGImage 对象并将它们变成 QuickTime 电影。下面的代码演示了需要什么,但是 output contains a lot of blank (black) frames:

import AppKit
import AVFoundation
import CoreGraphics
import Foundation
import CoreVideo
import Metal

// Video output url.
let url: URL = try! FileManager.default.url(for: .downloadsDirectory, in: .userDomainMask, appropriateFor: nil, create: false).appendingPathComponent("av.mov")
try? FileManager.default.removeItem(at: url)

// Video frame size, total frame count, frame rate and frame image.
let frameSize: CGSize = CGSize(width: 2000, height: 1000)
let frameCount: Int = 100
let frameRate: Double = 1 / 30
let frameImage: CGImage

frameImage = NSImage(size: frameSize, flipped: false, drawingHandler: {
    NSColor.red.setFill()
    [=11=].fill()
    return true
}).cgImage(forProposedRect: nil, context: nil, hints: nil)!

let pixelBufferAttributes: [CFString: Any]
let outputSettings: [String: Any]

pixelBufferAttributes = [
    kCVPixelBufferPixelFormatTypeKey: Int(kCVPixelFormatType_32ARGB),
    kCVPixelBufferWidthKey: Float(frameSize.width),
    kCVPixelBufferHeightKey: Float(frameSize.height),
    kCVPixelBufferMetalCompatibilityKey: true,
    kCVPixelBufferCGImageCompatibilityKey: true,
    kCVPixelBufferCGBitmapContextCompatibilityKey: true,
]

outputSettings = [
    AVVideoCodecKey: AVVideoCodecType.h264,
    AVVideoWidthKey: Int(frameSize.width),
    AVVideoHeightKey: Int(frameSize.height),
]

let writer: AVAssetWriter = try! AVAssetWriter(outputURL: url, fileType: .mov)
let input: AVAssetWriterInput = AVAssetWriterInput(mediaType: .video, outputSettings: outputSettings)
let pixelBufferAdaptor: AVAssetWriterInputPixelBufferAdaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: input, sourcePixelBufferAttributes: pixelBufferAttributes as [String: Any])

input.expectsMediaDataInRealTime = true

precondition(writer.canAdd(input))
writer.add(input)

precondition(writer.startWriting())
writer.startSession(atSourceTime: CMTime.zero)

let colorSpace: CGColorSpace = CGColorSpace(name: CGColorSpace.sRGB) ?? CGColorSpaceCreateDeviceRGB()
let context = CIContext(mtlDevice: MTLCreateSystemDefaultDevice()!)

Swift.print("Starting the render…")

// Preferred scenario: using CoreImage to fill the buffer from the pixel buffer adapter. Shows that
// CIImage + AVAssetWriterInputPixelBufferAdaptor are not working together.

for frameNumber in 0 ..< frameCount {
    var pixelBuffer: CVPixelBuffer?
    guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
    precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)

    precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
    defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }

    let ciImage = CIImage(cgImage: frameImage)
    context.render(ciImage, to: pixelBuffer!)

    //  This fails – the pixel buffer doesn't get filled. AT ALL! Why? How to make it work?
    let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
    precondition(bytes.contains(where: { [=11=] != 0 }))

    while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
    precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
}


// Unpreferred scenario: using CoreImage to fill the manually created buffer. Proves that CIImage 
// can fill buffer and working.

// for frameNumber in 0 ..< frameCount {
//     var pixelBuffer: CVPixelBuffer?
//     precondition(CVPixelBufferCreate(nil, frameImage.width, frameImage.height, kCVPixelFormatType_32ARGB, pixelBufferAttributes as CFDictionary, &pixelBuffer) == kCVReturnSuccess)
//
//     precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
//     defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
//     let ciImage = CIImage(cgImage: frameImage)
//     context.render(ciImage, to: pixelBuffer!)
//
//     // ✅ This passes.
//     let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
//     precondition(bytes.contains(where: { [=11=] != 0 }))
//
//     while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
//     precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }


// Unpreferred scenario: using CoreGraphics to fill the buffer from the pixel buffer adapter. Shows that
// buffer from pixel buffer adapter can be filled and working.

// for frameNumber in 0 ..< frameCount {
//     var pixelBuffer: CVPixelBuffer?
//     guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
//     precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)
//
//     precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
//     defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }
//
//     guard let context: CGContext = CGContext(data: CVPixelBufferGetBaseAddress(pixelBuffer!), width: frameImage.width, height: frameImage.height, bitsPerComponent: 8, bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer!), space: colorSpace, bitmapInfo: CGImageAlphaInfo.premultipliedFirst.rawValue) else { preconditionFailure() }
//     context.clear(CGRect(origin: .zero, size: frameSize))
//     context.draw(frameImage, in: CGRect(origin: .zero, size: frameSize))
//
//     // ✅ This passes.
//     let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
//     precondition(bytes.contains(where: { [=11=] != 0 }))
//
//     while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
//     precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
// }

let semaphore = DispatchSemaphore(value: 0)

input.markAsFinished()
writer.endSession(atSourceTime: CMTime(seconds: Double(frameCount) * frameRate, preferredTimescale: 600))
writer.finishWriting(completionHandler: { semaphore.signal() })

semaphore.wait()

Swift.print("Successfully finished rendering to \(url.path)")

然而,以下内容适用于 CGContext,但我 需要 CIContext 才能使用 GPU。问题似乎出在 AVAssetWriterInputPixelBufferAdaptor 的缓冲池提供的像素缓冲区上。将 CIContext 渲染到单独创建的缓冲区并将它们附加到适配器是可行的,但效率非常低。将 CIContext 渲染到适配器池提供的缓冲区中会导致根本没有数据写入缓冲区 ,它实际上包含所有零,就好像两个不兼容一样!但是,使用 CGImage 进行渲染是可行的,因此可以手动复制数据。

主要观察结果是 CIContext.render 似乎工作不同步,或者在填充缓冲区和将数据写入视频流之间出现问题。换句话说,当缓冲区被刷新时,缓冲区中没有数据。以下内容有点指向那个方向:

  1. 删除缓冲区锁定导致几乎所有帧都被写入,除了前几个,上面的代码实际上产生了 correct output,但对于实际数据,行为如所描述的那样。
  2. 使用不同的编解码器(如 ProRes422)会导致几乎所有帧都被正确写入,只有少数空白 – 上面的代码也会生成 correct output,但较大和复杂的图像会导致跳帧。

这段代码有什么问题,正确的做法是什么?

P.S。大多数 iOS 示例使用几乎相同的实现并且似乎工作得很好。我发现 它可能与 macOS 不同,但看不到任何官方文档。

对于您的用例,最好使用 AVAssetWriterInputpull-style APIs,因为您不需要处理 real-time 中的任何媒体(就像您从相机)。

因此,与其在输入未准备好时暂停线程,不如等待它拉出下一帧。在这种情况下,请记住还要将 expectsMediaDataInRealTime 设置为 false

我认为您当前方法的主要问题是您在作者尚未准备好时暂停了正在进行视频处理的线程。

(顺便说一句:您可以直接创建纯色的CIImageCIImage(color:));无需先创建CGImage。)

与 Apple 开发人员技术支持交谈后发现:

Core Image defers the rendering until the client requests the access to the frame buffer, i.e. CVPixelBufferLockBaseAddress.

所以,解决方案就是在调用 CIContext.render 之后简单地执行 CVPixelBufferLockBaseAddress,如下所示:

for frameNumber in 0 ..< frameCount {
    var pixelBuffer: CVPixelBuffer?
    guard let pixelBufferPool: CVPixelBufferPool = pixelBufferAdaptor.pixelBufferPool else { preconditionFailure() }
    precondition(CVPixelBufferPoolCreatePixelBuffer(nil, pixelBufferPool, &pixelBuffer) == kCVReturnSuccess)

    let ciImage = CIImage(cgImage: frameImage)
    context.render(ciImage, to: pixelBuffer!)

    precondition(CVPixelBufferLockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess)
    defer { precondition(CVPixelBufferUnlockBaseAddress(pixelBuffer!, []) == kCVReturnSuccess) }

    let bytes = UnsafeBufferPointer(start: CVPixelBufferGetBaseAddress(pixelBuffer!)!.assumingMemoryBound(to: UInt8.self), count: CVPixelBufferGetDataSize(pixelBuffer!))
    precondition(bytes.contains(where: { [=10=] != 0 }))

    while !input.isReadyForMoreMediaData { Thread.sleep(forTimeInterval: 10 / 1000) }
    precondition(pixelBufferAdaptor.append(pixelBuffer!, withPresentationTime: CMTime(seconds: Double(frameNumber) * frameRate, preferredTimescale: 600)))
}