如何归一化 iOS 中的视差数据？

Question

在 WWDC session "Image Editing with Depth" 他们提到了几次 normalizedDisparity 和 normalizedDisparityImage:

"The basic idea is that we're going to map our normalized disparity values into values between 0 and 1"

"So once you know the min and max you can normalize the depth or disparity between 0 and 1."

我试图首先像这样获得视差图像：

let disparityImage = depthImage.applyingFilter(
    "CIDepthToDisparity", withInputParameters: nil)

然后我尝试获取 depthDataMap 并进行归一化，但没有成功。我在正确的轨道上吗？将不胜感激一些关于该做什么的提示。

编辑：

这是我的测试代码，质量请见谅。我得到 min 和 max 然后我尝试遍历数据以对其进行标准化 (let normalizedPoint = (point - min) / (max - min))

let depthDataMap = depthData!.depthDataMap
let width = CVPixelBufferGetWidth(depthDataMap) //768 on an iPhone 7+
let height = CVPixelBufferGetHeight(depthDataMap) //576 on an iPhone 7+
CVPixelBufferLockBaseAddress(depthDataMap, CVPixelBufferLockFlags(rawValue: 0))
// Convert the base address to a safe pointer of the appropriate type
let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(depthDataMap), 
    to: UnsafeMutablePointer<Float32>.self)
var min = floatBuffer[0]
var max = floatBuffer[0]
for x in 0..<width{
    for y in 0..<height{
        let distanceAtXYPoint = floatBuffer[Int(x * y)]
        if(distanceAtXYPoint < min){
            min = distanceAtXYPoint
        }
        if(distanceAtXYPoint > max){
            max = distanceAtXYPoint
        }
    }
}

我期望的是数据将反映用户点击图像但不匹配的差异。查找用户点击的视差的代码是 here:

// Apply the filter with the sampleRect from the user’s tap. Don’t forget to clamp!
let minMaxImage = normalized?.clampingToExtent().applyingFilter(
    "CIAreaMinMaxRed", withInputParameters: 
        [kCIInputExtentKey : CIVector(cgRect:rect2)])
// A four-byte buffer to store a single pixel value
var pixel = [UInt8](repeating: 0, count: 4)
// Render the image to a 1x1 rect. Be sure to use a nil color space.
context.render(minMaxImage!, toBitmap: &pixel, rowBytes: 4,
    bounds: CGRect(x:0, y:0, width:1, height:1), 
    format:  kCIFormatRGBA8, colorSpace: nil)
// The max is stored in the green channel. Min is in the red.
let disparity = Float(pixel[1]) / 255.0

Answer 1

raywenderlich.com 上有一个名为“Image Depth Maps Tutorial for iOS”的新博客 post，其中包含示例应用程序和与使用深度相关的详细信息。示例代码显示了如何使用 CVPixelBuffer 扩展规范化深度数据：

extension CVPixelBuffer {

  func normalize() {

    let width = CVPixelBufferGetWidth(self)
    let height = CVPixelBufferGetHeight(self)

    CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
    let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(self), to: UnsafeMutablePointer<Float>.self)

    var minPixel: Float = 1.0
    var maxPixel: Float = 0.0

    for y in 0 ..< height {
      for x in 0 ..< width {
        let pixel = floatBuffer[y * width + x]
        minPixel = min(pixel, minPixel)
        maxPixel = max(pixel, maxPixel)
      }
    }

    let range = maxPixel - minPixel

    for y in 0 ..< height {
      for x in 0 ..< width {
        let pixel = floatBuffer[y * width + x]
        floatBuffer[y * width + x] = (pixel - minPixel) / range
      }
    }

    CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
  }
}

在处理深度数据时要记住，它们的分辨率低于实际图像，因此您需要按比例放大（更多信息请参见博客和 WWDC video）

Answer 2

尝试使用 Accelerate Framework vDSP 向量函数。这是两个函数中的规范化。

将 cvPixel 缓冲区更改为 0..1 标准化范围

myCVPixelBuffer.setUpNormalize()


import Accelerate

extension CVPixelBuffer {
    func vectorNormalize( targetVector: UnsafeMutableBufferPointer<Float>) -> [Float] {
        // range = max - min
        // normalized to 0..1 is (pixel - minPixel) / range

        // see Documentation "Using vDSP for Vector-based Arithmetic" in vDSP under system "Accelerate" documentation

        // see also the Accelerate documentation section 'Vector extrema calculation'
        // Maximium static func maximum<U>(U) -> Float
        //      Returns the maximum element of a single-precision vector.

        //static func minimum<U>(U) -> Float
        //      Returns the minimum element of a single-precision vector.


        let maxValue = vDSP.maximum(targetVector)
        let minValue = vDSP.minimum(targetVector)

        let range = maxValue - minValue
        let negMinValue = -minValue

        let subtractVector = vDSP.add(negMinValue, targetVector)
            // adding negative value is subtracting
        let result = vDSP.divide(subtractVector, range)

        return result
    }

    func setUpNormalize() -> CVPixelBuffer {
        // grayscale buffer float32 ie Float
        // return normalized CVPixelBuffer

        CVPixelBufferLockBaseAddress(self,
                                     CVPixelBufferLockFlags(rawValue: 0))
        let width = CVPixelBufferGetWidthOfPlane(self, 0)
        let height = CVPixelBufferGetHeightOfPlane(self, 0)
        let count = width * height

        let bufferBaseAddress = CVPixelBufferGetBaseAddressOfPlane(self, 0)
            // UnsafeMutableRawPointer

        let pixelBufferBase  = unsafeBitCast(bufferBaseAddress, to: UnsafeMutablePointer<Float>.self)

        let depthCopy  =   UnsafeMutablePointer<Float>.allocate(capacity: count)
        depthCopy.initialize(from: pixelBufferBase, count: count)
        let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: depthCopy, count: count)

        let normalizedDisparity = vectorNormalize(targetVector: depthCopyBuffer)

        pixelBufferBase.initialize(from: normalizedDisparity, count: count)
            // copy back the normalized map into the CVPixelBuffer

        depthCopy.deallocate()
//        depthCopyBuffer.deallocate()

        CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))

        return self

    }

}

您可以在

的 Apple 示例 'PhotoBrowse' 应用程序的修改版本中看到它的实际效果

https://github.com/racewalkWill/PhotoBrowseModified

Answer 3

上面will的回答很好，但是可以改进如下。我将它与照片中的深度数据一起使用，如果深度数据不遵循 16 位，如上文所述，它可能无法工作。还没找到这样的照片。我很惊讶在 Core Image 中没有过滤器来处理这个问题。

extension CVPixelBuffer {

func normalize() {
    CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
    
    let width = CVPixelBufferGetWidthOfPlane(self, 0)
    let height = CVPixelBufferGetHeightOfPlane(self, 0)
    let count = width * height

    let pixelBufferBase = unsafeBitCast(CVPixelBufferGetBaseAddressOfPlane(self, 0), to: UnsafeMutablePointer<Float>.self)
    let depthCopyBuffer = UnsafeMutableBufferPointer<Float>(start: pixelBufferBase, count: count)

    let maxValue = vDSP.maximum(depthCopyBuffer)
    let minValue = vDSP.minimum(depthCopyBuffer)
    let range = maxValue - minValue
    let negMinValue = -minValue

    let subtractVector = vDSP.add(negMinValue, depthCopyBuffer)
    let normalizedDisparity = vDSP.divide(subtractVector, range)
    pixelBufferBase.initialize(from: normalizedDisparity, count: count)

    CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}

}

如何归一化 iOS 中的视差数据？

How to normalize disparity data in iOS?

depth

ios

swift