计算 Swift 中字典中值的出现次数

Count occurrences of values in dictionaries in Swift

有人可以帮我解决这个问题吗?提前谢谢你

import Foundation

func countOccurance(topics: [String : [String]], reviews: [String]) -> [String : Int] {
    
    var count: [String: Int] = [:]
    
    for (topicKeys, topicValues) in topics {
        for key in topicKeys {
            for val in topicValues {
                if reviews.contains(val) {
                   // count += 1
                    count["\(key)"]! += 1
                }
            }
        }
    }
    return count
}

let topics = [
    "price" : ["cheap", "expensive", "price"],
    "business" : ["small", "medium", "large"]
]
let reviews = "large company with expensive items. Some are very cheap"
let result = countOccurance(topics: topics, reviews: [reviews])

for (key,value) in result.enumerated() {
    print("\(key) : \(value)")
}

我想要 return 以下格式的字典。 示例输出 { “价格”:2 “业务”:1 }

这里有一个可能的方法,它应该比过度使用循环稍微快一些。

方法:

  1. 将每条评论拆分为单独的单词。
  2. 创建一个字典,以评论词为键,以频率为值。
  3. 遍历每个主题,然后遍历该主题中的每个关键字。
  4. 如果关键字在 reviewsDict 中,获取出现次数并将其添加到 count 的出现次数中。
  5. Return 包含主题及其频率的字典结果。

解决方案:

func countOccurance(topics: [String: [String]], reviews: [String]) -> [String : Int] {
    var reviewsDict: [String: Int] = [:]
    for review in reviews {
        let reviewWords = review.components(separatedBy: CharacterSet.letters.inverted)

        for word in reviewWords {
            guard !word.isEmpty else { continue }
            reviewsDict[word.lowercased(), default: 0] += 1
        }
    }

    var count: [String: Int] = [:]
    for (topic, topicKeywords) in topics {
        for topicKeyword in topicKeywords {
            guard let occurrences = reviewsDict[topicKeyword] else { continue }
            count[topic, default: 0] += occurrences
        }
    }

    return count
}

结果:

0 : (key: "price", value: 2)
1 : (key: "business", value: 1)

我认为你的countOccurance(topics:reviews:)函数违反了单一职责原则(它不是真正计算出现次数,它也是过滤单词)。因此,它非常专门针对您的一个用例,您找不到任何内置工具来帮助您。

另一方面,如果您将问题分解为更小、更简单、通用的步骤,则可以利用现有的 API。以下是我的操作方法:

不知道大家对Sequence APIs的熟悉程度,所以补充了一些意见。当然,你应该从你的真实代码中删除这些。

我还添加了一些中间变量。我认为他们的名字可以作为有用的文档(当然比使用注释更好),但这是一个品味问题。

extension Sequence where Element: Hashable {
    typealias Histogram = [Element: Int]
    
    func histogram() -> Histogram { // I really with this was built-in :(
        reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
    }
}

let topics = [
    "price" : ["cheap", "expensive", "price"],
    "business" : ["small", "medium", "large"]
]

// Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
let topicsByKeyword = Dictionary(uniqueKeysWithValues:
        topics.lazy.flatMap { topic, keywords in
            keywords.map { keyword in (key: keyword, value: topic) }
        }
    )

let reviews = ["large company with expensive items. Some are very cheap"]

let reviewWords = reviews
    .flatMap { [=10=].components(separatedBy: CharacterSet.letters.inverted) } // Get a flat array of all words in all reviews
    .filter { ![=10=].isEmpty } // Filter out the empty words
    .map { [=10=].lowercased() } // Lowercase them all
    
let reviewTopicKeywords = reviewWords
    .compactMap { word in topicsByKeyword[word] } // Map words to the topics they represent

let reviewTopicKeywordCounts = reviewTopicKeywords.histogram() // Count the occurrences of the keywords, which is our final result.

使用类型可能有助于组织其中一些相关行为:

import Foundation

extension Sequence where Element: Hashable {
    typealias Histogram = [Element: Int]
    
    func histogram() -> Histogram {
        reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
    }
}

struct TopicKeywordCounter {
    let topicsByKeyword: [String: String]
    
    init(keywordsByTopic: [String: [String]]) {
        // Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
        self.topicsByKeyword = Dictionary(uniqueKeysWithValues:
            keywordsByTopic.lazy.flatMap { topic, keywords in
                keywords.map { keyword in (key: keyword, value: topic) }
            }
        )
    }
    
    public func countOccurances(in reivews: [String]) -> [String: Int] {
        let allReviewTopicKeywords = reivews.flatMap { review -> [String] in
            let reviewWords = allSanitzedWords(in: review)
            let reviewKeywords = mapWordsToTopics(from: reviewWords)
            return reviewKeywords
        }
        
        return allReviewTopicKeywords.histogram()
    }
    
    private func allSanitzedWords(in review: String) -> [String] {
        review
            .components(separatedBy: CharacterSet.letters.inverted)
            .filter { ![=11=].isEmpty }
            .map { [=11=].lowercased() }
    }
    
    private func mapWordsToTopics(from words: [String]) -> [String] {
        words.compactMap { topicsByKeyword[[=11=]] }
    }
}

// Make your TopicKeywordCounter
let topicKeywordCounter = TopicKeywordCounter(keywordsByTopic: [
    "price" : ["cheap", "expensive", "price"],
    "business" : ["small", "medium", "large"]
])

let reviews = ["large company with expensive items. Some are very cheap"]

// ...then use it for any arrays of you reviews you want
let reviewTopicKeywordCounts = topicKeywordCounter.countOccurances(in: reviews)

print(reviewTopicKeywordCounts)

如果您有任何问题,请告诉我!