
Face Detection with Camera


我注意到 AVCaptureStillImageOutput 在 10.0 之后被弃用了,所以我使用 AVCapturePhotoOutput 代替。但是,我发现我保存的用于面部检测的图像并不那么令人满意?有任何想法吗?


尝试提到@Shravya Boggarapu 之后。目前,我使用 AVCaptureMetadataOutput 来检测没有 CIFaceDetector 的人脸。它按预期工作。但是,当我尝试绘制脸部边界时,它似乎错位了。任何的想法?

let metaDataOutput = AVCaptureMetadataOutput()

captureSession.sessionPreset = AVCaptureSessionPresetPhoto
    let backCamera = AVCaptureDevice.defaultDevice(withDeviceType: .builtInWideAngleCamera, mediaType: AVMediaTypeVideo, position: .back)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {

            // MetadataOutput instead
            if(captureSession.canAddOutput(metaDataOutput)) {

                metaDataOutput.setMetadataObjectsDelegate(self, queue: DispatchQueue.main)
                metaDataOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.frame = cameraView.bounds
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill



    } catch {

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
        findFaceControl = false
        for metadataObject in metadataObjects {
            if (metadataObject as AnyObject).type == AVMetadataObjectTypeFace {
                let bounds = (metadataObject as! AVMetadataFaceObject).bounds
                print("origin x: \(bounds.origin.x)")
                print("origin y: \(bounds.origin.y)")
                print("size width: \(bounds.size.width)")
                print("size height: \(bounds.size.height)")
                print("cameraView width: \(self.cameraView.frame.width)")
                print("cameraView height: \(self.cameraView.frame.height)")
                var face = CGRect()
                face.origin.x = bounds.origin.x * self.cameraView.frame.width
                face.origin.y = bounds.origin.y * self.cameraView.frame.height
                face.size.width = bounds.size.width * self.cameraView.frame.width
                face.size.height = bounds.size.height * self.cameraView.frame.height

                showBounds(at: face)



var captureSession = AVCaptureSession()
var photoOutput = AVCapturePhotoOutput()
var previewLayer: AVCaptureVideoPreviewLayer?    

override func viewWillAppear(_ animated: Bool) {

    captureSession.sessionPreset = AVCaptureSessionPresetHigh

    let backCamera = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {


                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill
                previewLayer?.frame = cameraView.bounds


    } catch {


func captureImage() {
    let settings = AVCapturePhotoSettings()
    let previewPixelType = settings.availablePreviewPhotoPixelFormatTypes.first!
    let previewFormat = [kCVPixelBufferPixelFormatTypeKey as String: previewPixelType
    settings.previewPhotoFormat = previewFormat
    photoOutput.capturePhoto(with: settings, delegate: self)


func capture(_ captureOutput: AVCapturePhotoOutput, didFinishProcessingPhotoSampleBuffer photoSampleBuffer: CMSampleBuffer?, previewPhotoSampleBuffer: CMSampleBuffer?, resolvedSettings: AVCaptureResolvedPhotoSettings, bracketSettings: AVCaptureBracketedStillImageSettings?, error: Error?) {
    if let error = error {
    // Not include previewPhotoSampleBuffer
    if let sampleBuffer = photoSampleBuffer,
        let dataImage = AVCapturePhotoOutput.jpegPhotoDataRepresentation(forJPEGSampleBuffer: sampleBuffer, previewPhotoSampleBuffer: nil) {
            self.imageView.image = UIImage(data: dataImage)
            self.imageView.isHidden = false
            self.previewLayer?.isHidden = true
            self.findFace(img: self.imageView.image!)

findFace 适用于普通图像。但是,我通过相机拍摄的图像不起作用或有时只能识别一张脸。



func findFace(img: UIImage) {
    guard let faceImage = CIImage(image: img) else { return }
    let accuracy = [CIDetectorAccuracy: CIDetectorAccuracyHigh]
    let faceDetector = CIDetector(ofType: CIDetectorTypeFace, context: nil, options: accuracy)

    // For converting the Core Image Coordinates to UIView Coordinates
    let detectedImageSize = faceImage.extent.size
    var transform = CGAffineTransform(scaleX: 1, y: -1)
    transform = transform.translatedBy(x: 0, y: -detectedImageSize.height)

    if let faces = faceDetector?.features(in: faceImage, options: [CIDetectorSmile: true, CIDetectorEyeBlink: true]) {
        for face in faces as! [CIFaceFeature] {

            // Apply the transform to convert the coordinates
            var faceViewBounds =  face.bounds.applying(transform)
            // Calculate the actual position and size of the rectangle in the image view
            let viewSize = imageView.bounds.size
            let scale = min(viewSize.width / detectedImageSize.width,
                            viewSize.height / detectedImageSize.height)
            let offsetX = (viewSize.width - detectedImageSize.width * scale) / 2
            let offsetY = (viewSize.height - detectedImageSize.height * scale) / 2

            faceViewBounds = faceViewBounds.applying(CGAffineTransform(scaleX: scale, y: scale))
            print("faceBounds = \(faceViewBounds)")
            faceViewBounds.origin.x += offsetX
            faceViewBounds.origin.y += offsetY

            showBounds(at: faceViewBounds)

        if faces.count != 0 {
            print("Number of faces: \(faces.count)")
        } else {
            print("No faces ")


func showBounds(at bounds: CGRect) {
    let indicator = UIView(frame: bounds)
    indicator.frame =  bounds
    indicator.layer.borderWidth = 3
    indicator.layer.borderColor = UIColor.red.cgColor
    indicator.backgroundColor = .clear



通过查看您的代码,我发现了 2 个可能导致 wrong/poor 面部检测的东西。

  1. 其中之一是面部检测器功能选项,您可以在其中按 [CIDetectorSmile: true, CIDetectorEyeBlink: true] 过滤结果。尝试将其设置为零:faceDetector?.features(in: faceImage, options: nil)
  2. 我的另一个猜测是结果图像方向。我注意到您使用 AVCapturePhotoOutput.jpegPhotoDataRepresentation 方法生成检测源图像,系统默认生成具有特定方向的图像,类型为 Left/LandscapeLeft,我认为.所以,基本上,您可以使用 CIDetectorImageOrientation 键告诉人脸检测器记住这一点。

CIDetectorImageOrientation: the value for this key is an integer NSNumber from 1..8 such as that found in kCGImagePropertyOrientation. If present, the detection will be done based on that orientation but the coordinates in the returned features will still be based on those of the image.

尝试设置成faceDetector?.features(in: faceImage, options: [CIDetectorImageOrientation: 8 /*Left, bottom*/]).

  1. 创建CaptureSession
  2. 为 AVCaptureVideoDataOutput 创建以下设置

    output.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as AnyHashable: Int(kCMPixelFormat_32BGRA) ]

3.When 您收到 CMSampleBuffer,创建图像

DispatchQueue.main.async {
    let sampleImg = self.imageFromSampleBuffer(sampleBuffer: sampleBuffer)
    self.imageView.image = sampleImg
func imageFromSampleBuffer(sampleBuffer : CMSampleBuffer) -> UIImage
        // Get a CMSampleBuffer's Core Video image buffer for the media data
        let  imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
        // Lock the base address of the pixel buffer
        CVPixelBufferLockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);

        // Get the number of bytes per row for the pixel buffer
        let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer!);

        // Get the number of bytes per row for the pixel buffer
        let bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer!);
        // Get the pixel buffer width and height
        let width = CVPixelBufferGetWidth(imageBuffer!);
        let height = CVPixelBufferGetHeight(imageBuffer!);

        // Create a device-dependent RGB color space
        let colorSpace = CGColorSpaceCreateDeviceRGB();

        // Create a bitmap graphics context with the sample buffer data
        var bitmapInfo: UInt32 = CGBitmapInfo.byteOrder32Little.rawValue
        bitmapInfo |= CGImageAlphaInfo.premultipliedFirst.rawValue & CGBitmapInfo.alphaInfoMask.rawValue
        //let bitmapInfo: UInt32 = CGBitmapInfo.alphaInfoMask.rawValue
        let context = CGContext.init(data: baseAddress, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
        // Create a Quartz image from the pixel data in the bitmap graphics context
        let quartzImage = context?.makeImage();
        // Unlock the pixel buffer
        CVPixelBufferUnlockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);

        // Create an image object from the Quartz image
        let image = UIImage.init(cgImage: quartzImage!);

        return (image);


CIFaceDetector 具有更多功能,它可以为您提供眼睛和嘴巴的位置、微笑检测器等

另一方面,AVCaptureMetadataOutput 是在帧上计算的,检测到的面部被跟踪,我们没有额外的代码要添加。我发现,因为跟踪。在此过程中更可靠地检测到人脸。这样做的缺点是你只会检测到人脸,而不是眼睛或嘴巴的位置。 此方法的另一个优点是方向问题较小,因为只要设备方向发生变化并且面的方向将相对于该方向,您就可以使用 videoOrientation

就我而言,我的应用程序使用 YUV420 作为所需格式,因此在 real-time 中使用 CIDetector(与 RGB 一起使用)是不可行的。由于持续跟踪,使用 AVCaptureMetadataOutput 节省了大量精力并且执行更可靠。





面部矩形是图像原点。所以,对于屏幕来说,可能就不一样了。 使用:

for (AVMetadataFaceObject *faceFeatures in metadataObjects) {
    CGRect face = faceFeatures.bounds;
    CGRect facePreviewBounds = CGRectMake(face.origin.y * previewLayerRect.size.width,
                               face.origin.x * previewLayerRect.size.height,
                               face.size.width * previewLayerRect.size.height,
                               face.size.height * previewLayerRect.size.width);

    /* Draw rectangle facePreviewBounds on screen */

要在 iOS 上执行人脸检测,有 CIDetector (Apple) 或 Mobile Vision (Google) API.

IMO,Google Mobile Vision 提供更好的性能。

如果你有兴趣,here is the project you can play with. (iOS 10.2, Swift 3)

WWDC 2017 之后,Apple 在 iOS 11 中引入了 CoreMLVision框架让人脸检测更准确:)

我做了一个Demo Project。包含 愿景 v.s。 CIDetector.此外,它还包含实时面部特征检测。

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
  func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
      findFaceControl = false
      let faces = metadata.flatMap { [=10=] as? AVMetadataFaceObject } .flatMap { (face) -> CGRect in
                  guard let localizedFace =
      previewLayer?.transformedMetadataObject(for: face) else { return nil }
                  return localizedFace.bounds }
      for face in faces {
        let temp = UIView(frame: face)
        temp.layer.borderColor = UIColor.white
        temp.layer.borderWidth = 2.0
        view.addSubview(view: temp)

一定要删除 didOutputMetadataObjects 创建的视图。

跟踪活动面部 ID 是执行此操作的最佳方法^

此外,当您尝试为预览图层查找面部位置时,使用面部数据和变换会容易得多。另外我认为 CIDetector 是垃圾,metadataoutput 将使用硬件进行面部检测,使其非常快。

有点晚了,但是这里是坐标问题的解决方案。您可以在预览层上调用一种方法将元数据对象转换为您的坐标系:transformedMetadataObject(for: metadataObject).

guard let transformedObject = previewLayer.transformedMetadataObject(for: metadataObject) else {
let bounds = transformedObject.bounds
showBounds(at: bounds)


顺便说一句,如果您正在使用(或将您的项目升级到)Swift 4,AVCaptureMetadataOutputsObject 的委托方法已更改为:

func metadataOutput(_ output: AVCaptureMetadataOutput, didOutput metadataObjects: [AVMetadataObject], from connection: AVCaptureConnection)
