CIFilter颜色立方体数据加载

CIFilter color cube data loading

我有大约 50 个 3D LUT(存储为 png 图像,每个大小为 900KB)并使用 CIColorCube 过滤器生成过滤后的图像。我使用 UICollectionView 为每个 LUT 显示过滤后的缩略图 (100x100)(就像在照片应用程序中一样)。问题是当我在用户滚动时生成过滤图像时,UICollectionView 滚动变得非常慢(没有接近照片应用程序的平滑度)。我想到了预先生成过滤图像,但问题是从 LUT png 生成 cubeData 大约需要 150 毫秒,因此对于 50 个缩略图,准备过滤缩略图大约需要 7-8 秒,这很长。这也正是滚动性能的罪魁祸首。我想知道我可以做些什么来让它像在照片应用程序或其他照片编辑应用程序中一样流畅。这是我从 LUT png 生成立方体数据的代码。我相信解决问题的 CoreImage/Metal 技巧比基于 UIKit/DispatchQueue/NSOperation 的修复更多。

      public static func colorCubeDataFromLUTPNGImage(_ image : UIImage, lutSize:Int) -> Data? {

        let size = lutSize

        let lutImage = image.cgImage!
        let lutWidth    = lutImage.width
        let lutHeight   = lutImage.height
        let rowCount    = lutHeight / size
        let columnCount = lutWidth / size

        if ((lutWidth % size != 0) || (lutHeight % size != 0) || (rowCount * columnCount != size)) {
            NSLog("Invalid colorLUT")
            return nil
        }

        let bitmap  = getBytesFromImage(image: image)!
        let floatSize = MemoryLayout<Float>.size

        let cubeData = UnsafeMutablePointer<Float>.allocate(capacity: size * size * size * 4 * floatSize)
        var z = 0
        var bitmapOffset = 0

        for _ in 0 ..< rowCount {
            for y in 0 ..< size {
                let tmp = z
                for _ in 0 ..< columnCount {
                    for x in 0 ..< size {

                        let alpha   = Float(bitmap[bitmapOffset]) / 255.0
                        let red     = Float(bitmap[bitmapOffset+1]) / 255.0
                        let green   = Float(bitmap[bitmapOffset+2]) / 255.0
                        let blue    = Float(bitmap[bitmapOffset+3]) / 255.0

                        let dataOffset = (z * size * size + y * size + x) * 4

                        cubeData[dataOffset + 3] = alpha
                        cubeData[dataOffset + 2] = red
                        cubeData[dataOffset + 1] = green
                        cubeData[dataOffset + 0] = blue
                        bitmapOffset += 4
                    }
                    z += 1
                }
                z = tmp
            }
            z += columnCount
        }

    let colorCubeData = Data(bytesNoCopy: cubeData, count: size * size * size * 4 * floatSize, deallocator: Data.Deallocator.free)
    
    return colorCubeData
}


fileprivate static func getBytesFromImage(image:UIImage?) -> [UInt8]?
{
    var pixelValues: [UInt8]?
    if let imageRef = image?.cgImage {
        let width = Int(imageRef.width)
        let height = Int(imageRef.height)
        let bitsPerComponent = 8
        let bytesPerRow = width * 4
        let totalBytes = height * bytesPerRow

        let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue | CGBitmapInfo.byteOrder32Little.rawValue
        let colorSpace = CGColorSpaceCreateDeviceRGB()
        var intensities = [UInt8](repeating: 0, count: totalBytes)

        let contextRef = CGContext(data: &intensities, width: width, height: height, bitsPerComponent: bitsPerComponent, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
        contextRef?.draw(imageRef, in: CGRect(x: 0.0, y: 0.0, width: CGFloat(width), height: CGFloat(height)))

        pixelValues = intensities
    }
    return pixelValues!
}

这是我的 UICollectionViewCell 设置代码:

   func collectionView(_ collectionView: UICollectionView, cellForItemAt indexPath: IndexPath) -> UICollectionViewCell {
    let lutPath = self.lutPaths[indexPath.item]
    
    let cell = collectionView.dequeueReusableCell(withReuseIdentifier: "FilterCell", for: indexPath) as! FilterCell
    
    if let lutImage = UIImage(contentsOfFile: lutPath) {
        let renderer = CIFilter(name: "CIColorCube")!
        let lutData = ColorCubeHelper.colorCubeDataFromLUTPNGImage(lutImage, lutSize: 64)
       
        renderer.setValue(lutData!, forKey: "inputCubeData")
        renderer.setValue(64, forKey: "inputCubeDimension")
        renderer.setValue(inputCIImage, forKey: kCIInputImageKey)
        let outputImage = renderer.outputImage!
        
        let cgImage = self.ciContext.createCGImage(outputImage, from: outputImage.extent)!
        cell.configure(image: UIImage(cgImage: cgImage))
        
    } else {
        NSLog("LUT not found at \(indexPath.item)")
    }
    
    return cell
}

我们发现您可以将 LUT 图像直接渲染到基于浮点数的上下文中以获得 CIColorCube 所需的格式:

// render LUT into a 32-bit float context, since that's the data format needed by CIColorCube
let pixelData = UnsafeMutablePointer<simd_float4>.allocate(capacity: lutImage.width * lutImage.height)
let bitmapInfo = CGImageAlphaInfo.premultipliedLast.rawValue | CGBitmapInfo.floatComponents.rawValue | CGBitmapInfo.byteOrder32Little.rawValue
guard let bitmapContext = CGContext(data: pixelData,
                                    width: lutImage.width,
                                    height: lutImage.height,
                                    bitsPerComponent: MemoryLayout<simd_float4.Scalar>.size * 8,
                                    bytesPerRow: MemoryLayout<simd_float4>.size * lutImage.width,
                                    space: lutImage.colorSpace ?? CGColorSpace.sRGBColorSpace,
                                    bitmapInfo: bitmapInfo)
else {
    assertionFailure("Failed to create bitmap context for conversion")
    return nil
}
bitmapContext.draw(lutImage, in: CGRect(x: 0, y: 0, width: lutImage.width, height: lutImage.height))

let lutData = Data(bytesNoCopy: pixelData, count: bitmapContext.bytesPerRow * bitmapContext.height, deallocator: .free)

但是,如果我没记错的话,我们不得不交换 LUT 图像中的红色和蓝色分量,因为 Core Image 使用 BGRA 格式(就像您在代码中所做的那样)。

此外,在您的集合视图委托中,如果您尽可能快地 return 单元格并且 post 将缩略图生成到将设置的后台线程,则可能会提高性能完成后单元格的图像。像这样:

func collectionView(_ collectionView: UICollectionView, cellForItemAt indexPath: IndexPath) -> UICollectionViewCell {
    let lutPath = self.lutPaths[indexPath.item]

    let cell = collectionView.dequeueReusableCell(withReuseIdentifier: "FilterCell", for: indexPath) as! FilterCell

    DispatchQueue.global(qos: .background).async {
        if let lutImage = UIImage(contentsOfFile: lutPath) {
            let renderer = CIFilter(name: "CIColorCube")!
            let lutData = ColorCubeHelper.colorCubeDataFromLUTPNGImage(lutImage, lutSize: 64)

            renderer.setValue(lutData!, forKey: "inputCubeData")
            renderer.setValue(64, forKey: "inputCubeDimension")
            renderer.setValue(inputCIImage, forKey: kCIInputImageKey)
            let outputImage = renderer.outputImage!

            let cgImage = self.ciContext.createCGImage(outputImage, from: outputImage.extent)!

            DispatchQueue.main.async {
                cell.configure(image: UIImage(cgImage: cgImage))
            }
        } else {
            NSLog("LUT not found at \(indexPath.item)")
        }
    }

    return cell
}

我认为这是一种更有效的方法,可以通过正确的组件顺序从 LUT 图像中获取字节,同时始终保持在核心图像 space 中,直到图像呈现到屏幕上.

        guard let image = CIImage(contentsOf: url) else {
            return
        }
        let totalPixels = Int(image.extent.width) * Int(image.extent.height)
        let pixelData = UnsafeMutablePointer<simd_float4>.allocate(capacity: totalPixels)

        // [.workingColorSpace: NSNull()] below is important.
        // We don't want any color conversion when rendering pixels to buffer.
        let context = CIContext(options: [.workingColorSpace: NSNull()])
        context.render(image,
                       toBitmap: pixelData,
                       rowBytes: Int(image.extent.width) * MemoryLayout<simd_float4>.size,
                       bounds: image.extent,
                       format: .RGBAf, // Float32 per component in that order
                       colorSpace: nil)

        let dimension = cbrt(Double(totalPixels))
        let data = Data(bytesNoCopy: pixelData, count: totalPixels * MemoryLayout<simd_float4>.size, deallocator: .free)

Core Image 使用 BGRA 格式的假设是不正确的。 Core Image 使用 RGBA 颜色格式(RGBA8、RGBAf、RGBAh 等)。 CIColorCube 查找 table 是按 BGR 顺序布局的,但颜色本身是 RGBAf 格式,其中每个组件由 32 位浮点数表示。

当然,要使上面的代码正常工作,LUT 图像必须以某种方式进行布局。这是标识 LUT PNG 的示例:Identity LUT

顺便说一句,请查看此应用:https://apps.apple.com/us/app/filter-magic/id1594986951。新鲜出炉。它不具有从 LUT png 加载查找 table 的功能(目前),但具有许多其他有用的功能,并提供了一个可触及的游乐场来试验那里的每个过滤器,

我终于通过使用 vDSP 函数实现了它的工作,这使得 cubeData 生成超快,速度如此之快以至于滚动在 iPhone X 上变得平滑,而无需使用任何后台队列来加载纹理!

   public static func cubeDataForLut64(_ lutImage: NSImage) -> Data? {
       guard let lutCgImage = lutImage.cgImage else {
         return nil
       }
    
      return cubeDataForLut64(lutCgImage)
   }

   private static func cubeDataForLut64(_ lutImage: CGImage) -> Data? {
     let cubeDimension = 64
     let cubeSize = (cubeDimension * cubeDimension * cubeDimension * MemoryLayout<Float>.size * 4)

     let imageWidth = lutImage.width
     let imageHeight = lutImage.height
     let rowCount = imageHeight / cubeDimension
     let columnCount = imageWidth / cubeDimension
    
     guard ((imageWidth % cubeDimension == 0) || (imageHeight % cubeDimension == 0) || (rowCount * columnCount == cubeDimension)) else {
        print("Invalid LUT")
        return nil
    }
    
     let bitmapData = createRGBABitmapFromImage(lutImage)
     let cubeData = UnsafeMutablePointer<Float>.allocate(capacity: cubeSize)

     var bitmapOffset: Int = 0
     var z: Int = 0
    for _ in 0 ..< rowCount{ // ROW
        for y in 0 ..< cubeDimension{
            let tmp = z
            for _ in 0 ..< columnCount{ // COLUMN
                let dataOffset = (z * cubeDimension * cubeDimension + y * cubeDimension) * 4
                var divider: Float = 255.0
                vDSP_vsdiv(&bitmapData[bitmapOffset], 1, &divider, &cubeData[dataOffset], 1, UInt(cubeDimension) * 4)
                bitmapOffset += cubeDimension * 4
                z += 1
            }
            z = tmp
        }
        z += columnCount
    }
    
     free(bitmapData)
     return Data(bytesNoCopy: cubeData, count: cubeSize, deallocator: .free)
 }


 fileprivate static func createRGBABitmapFromImage(_ image: CGImage) -> UnsafeMutablePointer<Float> {
    let bitsPerPixel = 32
    let bitsPerComponent = 8
    let bytesPerPixel = bitsPerPixel / bitsPerComponent // 4 bytes = RGBA
    
    let imageWidth = image.width
    let imageHeight = image.height
    
    let bitmapBytesPerRow = imageWidth * bytesPerPixel
    let bitmapByteCount = bitmapBytesPerRow * imageHeight
    
    let colorSpace = CGColorSpaceCreateDeviceRGB()
    
    let bitmapData = malloc(bitmapByteCount)
    let bitmapInfo = CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedLast.rawValue).rawValue
    
    let context = CGContext(data: bitmapData, width: imageWidth, height: imageHeight, bitsPerComponent: bitsPerComponent, bytesPerRow: bitmapBytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
    
    let rect = CGRect(x: 0, y: 0, width: imageWidth, height: imageHeight)
    
    context?.draw(image, in: rect)
    
    // Convert UInt8 byte array to single precision Float's
    let convertedBitmap = malloc(bitmapByteCount * MemoryLayout<Float>.size)
    vDSP_vfltu8(UnsafePointer<UInt8>(bitmapData!.assumingMemoryBound(to: UInt8.self)), 1,
                UnsafeMutablePointer<Float>(convertedBitmap!.assumingMemoryBound(to: Float.self)), 1,
                vDSP_Length(bitmapByteCount))

    free(bitmapData)
    
    return UnsafeMutablePointer<Float>(convertedBitmap!.assumingMemoryBound(to: Float.self))
}