如何获取句子的上下文

Question

所以我正在开发一个可以修补损坏单词的应用程序。

让我们采取：

mny people say there is a error in this sentence

有了 swift 在这里我们可以 UITextChecker 并得到 mny 这个词实际上可能是什么的美妙结果......但是，我实际上有几个选择，其中一个是 many，另一个是 money，所以很明显，money 不适合用在这句话中。有没有办法检查句子本身是否合乎逻辑？

Answer 1

认为这还有待改进。我更新了 this swift 3 solution to Swift 5. Worth to mention that it was originally inspired by this python tutorial

创建一个新的 iOS 项目，添加一个名为 bigtext.txt 的文本文件，其中将包含 this text。这将是我们的 "learning" 字典。然后在 ViewController:

import UIKit
import NaturalLanguage

class ViewController: UIViewController {

    override func viewDidLoad() {
        super.viewDidLoad()

        let inputString = "mny people say there is a error in this sentence"
        var newString = inputString

        // Read a text file and "study" the model
        guard let path = Bundle.main.path(forResource: "bigtext", ofType: "txt") else {
            print("Path not available")
            return
        }
        let checker = SpellChecker(contentsOfFile: path)

        // better to use this to iterate between words in a sentence
        let tokenizer = NLTokenizer(unit: .word)
        tokenizer.string = inputString
        tokenizer.enumerateTokens(in: inputString.startIndex..<inputString.endIndex) { tokenRange, _ in
            let word = String(inputString[tokenRange])
            let checked = checker?.correct(word: word)
            let candidates = checker?.candidates(word: word)

            if word == checked {
                print("\(word) unchanged")
            } else {
                if let checked = checked {
                    newString.replaceSubrange(tokenRange, with: checked)
                }
                print("Correct:\t\(word) -> \(String(describing: checked))")
                print("Candidates:\t\(word) -> \(String(describing: candidates))")
            }
            return true
        }
        print("Result: \(newString)")
    }
}

func edits(word: String) -> Set<String> {
    if word.isEmpty { return [] }

    let splits = word.indices.map {
        (word[word.startIndex..<[=10=]], word[[=10=]..<word.endIndex])
    }

    let deletes = splits.map { [=10=].0 +  String([=10=].1.dropFirst()) }

    let transposes: [String] = splits.map { left, right in
        if let fst = right.first {
            let drop1 = String(right.dropFirst())
            if let snd = drop1.first {
                let drop2 = String(drop1.dropFirst())
                return "\(left)\(snd)\(fst)\(drop2)"
            }
        }
        return ""
    }.filter { ![=10=].isEmpty }

    let alphabet = "abcdefghijklmnopqrstuvwxyz"

    let replaces = splits.flatMap { left, right in
        alphabet.map { "\(left)\([=10=])\(String(right.dropFirst()))" }
    }

    let inserts = splits.flatMap { left, right in
        alphabet.map { "\(left)\([=10=])\(right)" }
    }
    let setString = [String(deletes.first!)] + transposes + replaces + inserts
    return Set(setString)
}

struct SpellChecker {

    var knownWords: [String:Int] = [:]

    mutating func train(word: String) {
        if let idx = knownWords[word] {
            knownWords[word] = idx + 1
        }
        else {
            knownWords[word] = 1
        }
    }

    init?(contentsOfFile file: String) {
        do {
            let text = try String(contentsOfFile: file, encoding: .utf8).lowercased()
            let words = text.unicodeScalars.split(whereSeparator: { !("a"..."z").contains([=10=]) }).map { String([=10=]) }
            for word in words { self.train(word: word) }
        }
        catch {
            return nil
        }
    }

    func knownEdits2(word: String) -> Set<String>? {
        var known_edits: Set<String> = []
        for edit in edits(word: word) {
            if let k = known(words: edits(word: edit)) {
                known_edits.formUnion(k)
            }
        }
        return known_edits.isEmpty ? nil : known_edits
    }

    func known<S: Sequence>(words: S) -> Set<String>? where S.Iterator.Element == String {
        let s = Set(words.filter { self.knownWords.index(forKey: [=10=]) != nil })
        return s.isEmpty ? nil : s
    }

    func candidates(word: String) -> Set<String> {
        guard let result = known(words: [word]) ?? known(words: edits(word: word)) ?? knownEdits2(word: word) else {
            return Set<String>()
        }

        return result
    }

    func correct(word: String) -> String {
        return candidates(word: word).reduce(word) {
            (knownWords[[=10=]] ?? 1) < (knownWords[] ?? 1) ?  : [=10=]
        }
    }
}

会输出你：

Correct:    mny -> Optional("may")
Candidates: mny -> Optional(Set(["any", "ny", "may", "many"]))
people unchanged
say unchanged
there unchanged
is unchanged
a unchanged
error unchanged
in unchanged
this unchanged
sentence unchanged
Result: may people say there is a error in this sentence

请考虑我们采用了第一个更正候选。需要先弄清楚词序，理解句子的上下文。

如何获取句子的上下文

How to get context of a sentence

string

nlp

spell-checking

uitextchecker

swift