如何使用 Kotlin 将文本转换为按计数分组的单词列表?
How to transform text into list of words grouped by count using Kotlin?
我有这样的字符串文本:
val text: String = "aa bb cc aa bb aa aa / <"
我首先尝试跳过像 <*/&^$
这样的特殊字符,然后将单词分组到对象单词列表中:
data class Word(val id: Int, val text: String, val count: Int)
listOf(Word(1, aa, 4), Word(2, bb, 2), Word(3, cc, 1))
这是我的方法,但它需要 3 个循环,这很糟糕加上样板代码
val wordWithCountMap = mutableMapOf<String, Int>()
text.trim().split(" ").forEach { word ->
if (word.isNotEmpty() && word.isNotBlank()) {
val key = regex.replace(word, "")
wordWithCountMap[key] = wordWithCountMap[word]?.plus(1) ?: 1
}
}
val wordList = arrayListOf<Word>()
wordWithCountMap.onEachIndexed { index, entry ->
wordList.add(
Word(
id = index, text = entry.key,
count = entry.value
)
)
}
val text: String = "aa bb cc aa bb aa aa / <"
data class Word(
val id: Int,
val text: String,
val count: Int
)
val result = text
.split("\b".toRegex())
.filter { it.any { char -> char.isLetterOrDigit() } }
.groupingBy { it }
.eachCount()
.entries
.sortedByDescending { it.value } // mabye remove this line (see @mattFreake's comment below)
.mapIndexed { index, textCount -> Word(index + 1, textCount.key, textCount.value) }
result.forEach(::println)
我有这样的字符串文本:
val text: String = "aa bb cc aa bb aa aa / <"
我首先尝试跳过像 <*/&^$
这样的特殊字符,然后将单词分组到对象单词列表中:
data class Word(val id: Int, val text: String, val count: Int)
listOf(Word(1, aa, 4), Word(2, bb, 2), Word(3, cc, 1))
这是我的方法,但它需要 3 个循环,这很糟糕加上样板代码
val wordWithCountMap = mutableMapOf<String, Int>()
text.trim().split(" ").forEach { word ->
if (word.isNotEmpty() && word.isNotBlank()) {
val key = regex.replace(word, "")
wordWithCountMap[key] = wordWithCountMap[word]?.plus(1) ?: 1
}
}
val wordList = arrayListOf<Word>()
wordWithCountMap.onEachIndexed { index, entry ->
wordList.add(
Word(
id = index, text = entry.key,
count = entry.value
)
)
}
val text: String = "aa bb cc aa bb aa aa / <"
data class Word(
val id: Int,
val text: String,
val count: Int
)
val result = text
.split("\b".toRegex())
.filter { it.any { char -> char.isLetterOrDigit() } }
.groupingBy { it }
.eachCount()
.entries
.sortedByDescending { it.value } // mabye remove this line (see @mattFreake's comment below)
.mapIndexed { index, textCount -> Word(index + 1, textCount.key, textCount.value) }
result.forEach(::println)