Swift 计算大文件的 MD5 校验和

Swift Calculate MD5 Checksum for Large Files

我正在为大型视频文件创建 MD5 校验和。我目前正在使用代码:

extension NSData {
func MD5() -> NSString {
    let digestLength = Int(CC_MD5_DIGEST_LENGTH)
    let md5Buffer = UnsafeMutablePointer<CUnsignedChar>.allocate(capacity: digestLength)

    CC_MD5(bytes, CC_LONG(length), md5Buffer)
    let output = NSMutableString(capacity: Int(CC_MD5_DIGEST_LENGTH * 2))
    for i in 0..<digestLength {
        output.appendFormat("%02x", md5Buffer[i])
    }

    return NSString(format: output)
    }
}

但这会创建一个内存缓冲区,对于大型视频文件来说并不理想。 Swift 中有没有一种方法可以计算读取文件流的 MD5 校验和,从而使内存占用最小?

您可以分块计算 MD5 校验和,如图所示 例如在 Is there a MD5 library that doesn't require the whole input at the same time?.

这是使用 Swift 的可能实现(现已更新为 Swift 5)

import CommonCrypto

func md5File(url: URL) -> Data? {

    let bufferSize = 1024 * 1024

    do {
        // Open file for reading:
        let file = try FileHandle(forReadingFrom: url)
        defer {
            file.closeFile()
        }

        // Create and initialize MD5 context:
        var context = CC_MD5_CTX()
        CC_MD5_Init(&context)

        // Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
        while autoreleasepool(invoking: {
            let data = file.readData(ofLength: bufferSize)
            if data.count > 0 {
                data.withUnsafeBytes {
                    _ = CC_MD5_Update(&context, [=10=].baseAddress, numericCast(data.count))
                }
                return true // Continue
            } else {
                return false // End of file
            }
        }) { }

        // Compute the MD5 digest:
        var digest: [UInt8] = Array(repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
        _ = CC_MD5_Final(&digest, &context)

        return Data(digest)

    } catch {
        print("Cannot open file:", error.localizedDescription)
        return nil
    }
}

需要自动释放池来释放由return编辑的内存 file.readData(),没有它整个(可能很大)文件 将被加载到内存中。感谢 Abhi Beckert 注意到这一点 并提供一个实现。

如果您需要十六进制编码字符串形式的摘要,请更改 return 键入 String? 并替换

return digest

来自

let hexDigest = digest.map { String(format: "%02hhx", [=12=]) }.joined()
return hexDigest

SHA256 哈希的解决方案(基于 Martin R 的回答):

func sha256(url: URL) -> Data? {
    do {
        let bufferSize = 1024 * 1024
        // Open file for reading:
        let file = try FileHandle(forReadingFrom: url)
        defer {
            file.closeFile()
        }

        // Create and initialize SHA256 context:
        var context = CC_SHA256_CTX()
        CC_SHA256_Init(&context)

        // Read up to `bufferSize` bytes, until EOF is reached, and update SHA256 context:
        while autoreleasepool(invoking: {
            // Read up to `bufferSize` bytes
            let data = file.readData(ofLength: bufferSize)
            if data.count > 0 {
                data.withUnsafeBytes {
                    _ = CC_SHA256_Update(&context, [=10=], numericCast(data.count))
                }
                // Continue
                return true
            } else {
                // End of file
                return false
            }
        }) { }

        // Compute the SHA256 digest:
        var digest = Data(count: Int(CC_SHA256_DIGEST_LENGTH))
        digest.withUnsafeMutableBytes {
            _ = CC_SHA256_Final([=10=], &context)
        }

        return digest
    } catch {
        print(error)
        return nil
    }
}

先前创建的名称为 fileURL 的类型 URL 实例的用法:

if let digestData = sha256(url: fileURL) {
    let calculatedHash = digestData.map { String(format: "%02hhx", [=11=]) }.joined()
    DDLogDebug(calculatedHash)
}

自从 iOS13

'CC_MD5_Init' was deprecated in iOS 13.0

您可以将代码替换为 CryptoKit

import Foundation
import CryptoKit

extension URL {

    func checksumInBase64() -> String? {
        let bufferSize = 16*1024

        do {
            // Open file for reading:
            let file = try FileHandle(forReadingFrom: self)
            defer {
                file.closeFile()
            }

            // Create and initialize MD5 context:
            var md5 = CryptoKit.Insecure.MD5()
            
            // Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
            while autoreleasepool(invoking: {
                let data = file.readData(ofLength: bufferSize)
                if data.count > 0 {
                    md5.update(data: data)
                    return true // Continue
                } else {
                    return false // End of file
                }
            }) { }

            // Compute the MD5 digest:
            let data = Data(md5.finalize())
            
            return data.base64EncodedString()
        } catch {
            error_log(error)
            
            return nil
        }
    }
}