Swift3 中的 Levenshtein 距离

Levenshtein distance in Swift3

我正在使用 Rosetta Code 中的教程来计算 Levenshtein 距离。似乎他们的代码在 Swift2 中,所以我在执行此操作时收到此错误 Binary operator '+' cannot be applied to operands of type '[Int]' and 'Repeated<String.CharacterView>'var cur = [i + 2] + empty where let empty = repeatElement(s, count: 0)。我该怎么做?

需要进行一些更改。

  • 构造空数组
  • enumerate() 现在是枚举()
  • successor() 不再存在所以我用 +1
  • 替换了它

所以函数现在是

Swift 4:

func levDis(_ w1: String, _ w2: String) -> Int {
    let empty = [Int](repeating:0, count: w2.count)
    var last = [Int](0...w2.count)

    for (i, char1) in w1.enumerated() {
        var cur = [i + 1] + empty
        for (j, char2) in w2.enumerated() {
            cur[j + 1] = char1 == char2 ? last[j] : min(last[j], last[j + 1], cur[j]) + 1
        }
        last = cur
    }
    return last.last!
}

Swift 3:

func levDis(w1: String, w2: String) -> Int {

    let (t, s) = (w1.characters, w2.characters)

    let empty = Array<Int>(repeating:0, count: s.count)
    var last = [Int](0...s.count)

    for (i, tLett) in t.enumerated() {
        var cur = [i + 1] + empty
        for (j, sLett) in s.enumerated() {
            cur[j + 1] = tLett == sLett ? last[j] : min(last[j], last[j + 1], cur[j])+1
        }
        last = cur
    }
    return last.last!
}

更新并改进了 Swift 4 的答案,基于@Spads 的答案。

extension String {
    func levenshteinDistanceScore(to string: String, ignoreCase: Bool = true, trimWhiteSpacesAndNewLines: Bool = true) -> Float {

        var firstString = self
        var secondString = string   

        if ignoreCase {
            firstString = firstString.lowercased()
            secondString = secondString.lowercased()
        }
        if trimWhiteSpacesAndNewLines {
            firstString = firstString.trimmingCharacters(in: .whitespacesAndNewlines)
            secondString = secondString.trimmingCharacters(in: .whitespacesAndNewlines)
        }

        let empty = [Int](repeating:0, count: secondString.count)
        var last = [Int](0...secondString.count)

        for (i, tLett) in firstString.enumerated() {
            var cur = [i + 1] + empty
            for (j, sLett) in secondString.enumerated() {
                cur[j + 1] = tLett == sLett ? last[j] : Swift.min(last[j], last[j + 1], cur[j])+1
            }
            last = cur
        }

        // maximum string length between the two
        let lowestScore = max(firstString.count, secondString.count)

        if let validDistance = last.last {
            return  1 - (Float(validDistance) / Float(lowestScore))
        }

        return 0.0
    }
}

infix operator =~
func =~(string: String, otherString: String) -> Bool {
    return string.levenshteinDistanceScore(to: otherString) >= 0.85
}
func ~=(string: String, otherString: String) -> Bool {
    return string.levenshteinDistanceScore(to: otherString) >= 0.85 
}

由于@Daniel Illescas 的回答无效,这里是使用 Int return 类型和断言的工作版本。

extension String {

    func levenshteinDistance(to string: String, ignoreCase: Bool = true, trimWhiteSpacesAndNewLines: Bool = true) -> Int {
        
        var firstString = self
        var secondString = string
        
        if ignoreCase {
            firstString = firstString.lowercased()
            secondString = secondString.lowercased()
        }
        
        if trimWhiteSpacesAndNewLines {
            firstString = firstString.trimmingCharacters(in: .whitespacesAndNewlines)
            secondString = secondString.trimmingCharacters(in: .whitespacesAndNewlines)
        }
        
        let empty = [Int](repeating: 0, count: secondString.count)
        var last = [Int](0...secondString.count)
        
        for (i, tLett) in firstString.enumerated() {
            var cur = [i + 1] + empty
            for (j, sLett) in secondString.enumerated() {
                cur[j + 1] = tLett == sLett ? last[j] : Swift.min(last[j], last[j + 1], cur[j]) + 1
            }
            
            last = cur
        }
        
        if let validDistance = last.last {
            return validDistance
        }
        
        assertionFailure()
        return 0
    }
}