计算 Swift 中字典中值的出现次数
Count occurrences of values in dictionaries in Swift
有人可以帮我解决这个问题吗?提前谢谢你
import Foundation
func countOccurance(topics: [String : [String]], reviews: [String]) -> [String : Int] {
var count: [String: Int] = [:]
for (topicKeys, topicValues) in topics {
for key in topicKeys {
for val in topicValues {
if reviews.contains(val) {
// count += 1
count["\(key)"]! += 1
}
}
}
}
return count
}
let topics = [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
]
let reviews = "large company with expensive items. Some are very cheap"
let result = countOccurance(topics: topics, reviews: [reviews])
for (key,value) in result.enumerated() {
print("\(key) : \(value)")
}
我想要 return 以下格式的字典。
示例输出
{
“价格”:2
“业务”:1
}
这里有一个可能的方法,它应该比过度使用循环稍微快一些。
方法:
- 将每条评论拆分为单独的单词。
- 创建一个字典,以评论词为键,以频率为值。
- 遍历每个主题,然后遍历该主题中的每个关键字。
- 如果关键字在
reviewsDict
中,获取出现次数并将其添加到 count
的出现次数中。
- Return 包含主题及其频率的字典结果。
解决方案:
func countOccurance(topics: [String: [String]], reviews: [String]) -> [String : Int] {
var reviewsDict: [String: Int] = [:]
for review in reviews {
let reviewWords = review.components(separatedBy: CharacterSet.letters.inverted)
for word in reviewWords {
guard !word.isEmpty else { continue }
reviewsDict[word.lowercased(), default: 0] += 1
}
}
var count: [String: Int] = [:]
for (topic, topicKeywords) in topics {
for topicKeyword in topicKeywords {
guard let occurrences = reviewsDict[topicKeyword] else { continue }
count[topic, default: 0] += occurrences
}
}
return count
}
结果:
0 : (key: "price", value: 2)
1 : (key: "business", value: 1)
我认为你的countOccurance(topics:reviews:)
函数违反了单一职责原则(它不是真正计算出现次数,它也是过滤单词)。因此,它非常专门针对您的一个用例,您找不到任何内置工具来帮助您。
另一方面,如果您将问题分解为更小、更简单、通用的步骤,则可以利用现有的 API。以下是我的操作方法:
不知道大家对Sequence APIs的熟悉程度,所以补充了一些意见。当然,你应该从你的真实代码中删除这些。
我还添加了一些中间变量。我认为他们的名字可以作为有用的文档(当然比使用注释更好),但这是一个品味问题。
extension Sequence where Element: Hashable {
typealias Histogram = [Element: Int]
func histogram() -> Histogram { // I really with this was built-in :(
reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
}
}
let topics = [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
]
// Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
let topicsByKeyword = Dictionary(uniqueKeysWithValues:
topics.lazy.flatMap { topic, keywords in
keywords.map { keyword in (key: keyword, value: topic) }
}
)
let reviews = ["large company with expensive items. Some are very cheap"]
let reviewWords = reviews
.flatMap { [=10=].components(separatedBy: CharacterSet.letters.inverted) } // Get a flat array of all words in all reviews
.filter { ![=10=].isEmpty } // Filter out the empty words
.map { [=10=].lowercased() } // Lowercase them all
let reviewTopicKeywords = reviewWords
.compactMap { word in topicsByKeyword[word] } // Map words to the topics they represent
let reviewTopicKeywordCounts = reviewTopicKeywords.histogram() // Count the occurrences of the keywords, which is our final result.
使用类型可能有助于组织其中一些相关行为:
import Foundation
extension Sequence where Element: Hashable {
typealias Histogram = [Element: Int]
func histogram() -> Histogram {
reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
}
}
struct TopicKeywordCounter {
let topicsByKeyword: [String: String]
init(keywordsByTopic: [String: [String]]) {
// Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
self.topicsByKeyword = Dictionary(uniqueKeysWithValues:
keywordsByTopic.lazy.flatMap { topic, keywords in
keywords.map { keyword in (key: keyword, value: topic) }
}
)
}
public func countOccurances(in reivews: [String]) -> [String: Int] {
let allReviewTopicKeywords = reivews.flatMap { review -> [String] in
let reviewWords = allSanitzedWords(in: review)
let reviewKeywords = mapWordsToTopics(from: reviewWords)
return reviewKeywords
}
return allReviewTopicKeywords.histogram()
}
private func allSanitzedWords(in review: String) -> [String] {
review
.components(separatedBy: CharacterSet.letters.inverted)
.filter { ![=11=].isEmpty }
.map { [=11=].lowercased() }
}
private func mapWordsToTopics(from words: [String]) -> [String] {
words.compactMap { topicsByKeyword[[=11=]] }
}
}
// Make your TopicKeywordCounter
let topicKeywordCounter = TopicKeywordCounter(keywordsByTopic: [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
])
let reviews = ["large company with expensive items. Some are very cheap"]
// ...then use it for any arrays of you reviews you want
let reviewTopicKeywordCounts = topicKeywordCounter.countOccurances(in: reviews)
print(reviewTopicKeywordCounts)
如果您有任何问题,请告诉我!
有人可以帮我解决这个问题吗?提前谢谢你
import Foundation
func countOccurance(topics: [String : [String]], reviews: [String]) -> [String : Int] {
var count: [String: Int] = [:]
for (topicKeys, topicValues) in topics {
for key in topicKeys {
for val in topicValues {
if reviews.contains(val) {
// count += 1
count["\(key)"]! += 1
}
}
}
}
return count
}
let topics = [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
]
let reviews = "large company with expensive items. Some are very cheap"
let result = countOccurance(topics: topics, reviews: [reviews])
for (key,value) in result.enumerated() {
print("\(key) : \(value)")
}
我想要 return 以下格式的字典。 示例输出 { “价格”:2 “业务”:1 }
这里有一个可能的方法,它应该比过度使用循环稍微快一些。
方法:
- 将每条评论拆分为单独的单词。
- 创建一个字典,以评论词为键,以频率为值。
- 遍历每个主题,然后遍历该主题中的每个关键字。
- 如果关键字在
reviewsDict
中,获取出现次数并将其添加到count
的出现次数中。 - Return 包含主题及其频率的字典结果。
解决方案:
func countOccurance(topics: [String: [String]], reviews: [String]) -> [String : Int] {
var reviewsDict: [String: Int] = [:]
for review in reviews {
let reviewWords = review.components(separatedBy: CharacterSet.letters.inverted)
for word in reviewWords {
guard !word.isEmpty else { continue }
reviewsDict[word.lowercased(), default: 0] += 1
}
}
var count: [String: Int] = [:]
for (topic, topicKeywords) in topics {
for topicKeyword in topicKeywords {
guard let occurrences = reviewsDict[topicKeyword] else { continue }
count[topic, default: 0] += occurrences
}
}
return count
}
结果:
0 : (key: "price", value: 2) 1 : (key: "business", value: 1)
我认为你的countOccurance(topics:reviews:)
函数违反了单一职责原则(它不是真正计算出现次数,它也是过滤单词)。因此,它非常专门针对您的一个用例,您找不到任何内置工具来帮助您。
另一方面,如果您将问题分解为更小、更简单、通用的步骤,则可以利用现有的 API。以下是我的操作方法:
不知道大家对Sequence APIs的熟悉程度,所以补充了一些意见。当然,你应该从你的真实代码中删除这些。
我还添加了一些中间变量。我认为他们的名字可以作为有用的文档(当然比使用注释更好),但这是一个品味问题。
extension Sequence where Element: Hashable {
typealias Histogram = [Element: Int]
func histogram() -> Histogram { // I really with this was built-in :(
reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
}
}
let topics = [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
]
// Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
let topicsByKeyword = Dictionary(uniqueKeysWithValues:
topics.lazy.flatMap { topic, keywords in
keywords.map { keyword in (key: keyword, value: topic) }
}
)
let reviews = ["large company with expensive items. Some are very cheap"]
let reviewWords = reviews
.flatMap { [=10=].components(separatedBy: CharacterSet.letters.inverted) } // Get a flat array of all words in all reviews
.filter { ![=10=].isEmpty } // Filter out the empty words
.map { [=10=].lowercased() } // Lowercase them all
let reviewTopicKeywords = reviewWords
.compactMap { word in topicsByKeyword[word] } // Map words to the topics they represent
let reviewTopicKeywordCounts = reviewTopicKeywords.histogram() // Count the occurrences of the keywords, which is our final result.
使用类型可能有助于组织其中一些相关行为:
import Foundation
extension Sequence where Element: Hashable {
typealias Histogram = [Element: Int]
func histogram() -> Histogram {
reduce(into: [:]) { acc, word in acc[word, default: 0] += 1 }
}
}
struct TopicKeywordCounter {
let topicsByKeyword: [String: String]
init(keywordsByTopic: [String: [String]]) {
// Invert the "topics" dictionary, to obtain a dictionary that can tell you what topic a keyword belongs to.
self.topicsByKeyword = Dictionary(uniqueKeysWithValues:
keywordsByTopic.lazy.flatMap { topic, keywords in
keywords.map { keyword in (key: keyword, value: topic) }
}
)
}
public func countOccurances(in reivews: [String]) -> [String: Int] {
let allReviewTopicKeywords = reivews.flatMap { review -> [String] in
let reviewWords = allSanitzedWords(in: review)
let reviewKeywords = mapWordsToTopics(from: reviewWords)
return reviewKeywords
}
return allReviewTopicKeywords.histogram()
}
private func allSanitzedWords(in review: String) -> [String] {
review
.components(separatedBy: CharacterSet.letters.inverted)
.filter { ![=11=].isEmpty }
.map { [=11=].lowercased() }
}
private func mapWordsToTopics(from words: [String]) -> [String] {
words.compactMap { topicsByKeyword[[=11=]] }
}
}
// Make your TopicKeywordCounter
let topicKeywordCounter = TopicKeywordCounter(keywordsByTopic: [
"price" : ["cheap", "expensive", "price"],
"business" : ["small", "medium", "large"]
])
let reviews = ["large company with expensive items. Some are very cheap"]
// ...then use it for any arrays of you reviews you want
let reviewTopicKeywordCounts = topicKeywordCounter.countOccurances(in: reviews)
print(reviewTopicKeywordCounts)
如果您有任何问题,请告诉我!