如何找到文本中出现频率最高的单词?
How can I find the most frequent word in a text?
我有一个 problem.It 好像如果我有这样的输入:
"Thanks Thanks Thanks car car"
输出将是 "thanks"。如果我的单词以大写字母开头,它将以小写字母打印该单词。
我可以在我的解决方案中添加什么来解决这个问题?
public class Main {
public static void main(String[] args) throws IOException {
String line;
String[] words = new String[100];
Map < String, Integer > frequency = new HashMap < > ();
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
while ((line = reader.readLine()) != null) {
line = line.trim();
if (!line.isEmpty()) {
words = line.split("\W+");
for (String word: words) {
String processed = word.toLowerCase();
processed = processed.replace(",", "");
if (frequency.containsKey(processed)) {
frequency.put(processed,
frequency.get(processed) + 1);
} else {
frequency.put(processed, 1);
}
}
}
}
int mostFrequentlyUsed = 0;
String theWord = null;
for (String word: frequency.keySet()) {
Integer theVal = frequency.get(word);
if (theVal > mostFrequentlyUsed) {
mostFrequentlyUsed = theVal;
theWord = word;
} else if (theVal == mostFrequentlyUsed && word.length() <
theWord.length()) {
theWord = word;
mostFrequentlyUsed = theVal;
}
}
System.out.printf(theWord);
}
要让代码以输入的格式而不是小写形式打印最常出现的单词,您可以更改下面的代码行。
String processed = word.toLowerCase();
改为:
String processed = word;
但请注意 containsKey()
方法 区分大小写 并且不会将 "Thanks" 和“谢谢”视为同一个词。
Please find the below program which print both upper and lower case based on input.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
public class Main {
public static void main(String[] args) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
String[] strArr=reader.readLine().split(" ");
String result=null;
int maxCount=0;
Map<String, Integer> strMap=new HashMap<String, Integer>();
int count=0;
for(String s:strArr){
count=0;
if(strMap.containsKey(s)){
count=strMap.get(s);
strMap.put(s,++count);
}else{
strMap.put(s, ++count);
}
}
//find Maximum
for(Map.Entry<String, Integer> itr: strMap.entrySet()){
if(maxCount==0){
maxCount=itr.getValue();
result=itr.getKey();
}else{
if(maxCount < itr.getValue()){
maxCount=itr.getValue();
result=itr.getKey();
}
}
}
// No of occurences with count
System.out.println("word"+ result+"count"+ maxCount);
printInLowerOrUpperCare(result);
}
public static void printInLowerOrUpperCare(String result){
if(result.charAt(0) >='a' && result.charAt(0) >= 'z' ){
System.out.println(result.toUpperCase());
}else{
System.out.println(result.toLowerCase());
}
}
}
我有一个 problem.It 好像如果我有这样的输入: "Thanks Thanks Thanks car car" 输出将是 "thanks"。如果我的单词以大写字母开头,它将以小写字母打印该单词。 我可以在我的解决方案中添加什么来解决这个问题?
public class Main {
public static void main(String[] args) throws IOException {
String line;
String[] words = new String[100];
Map < String, Integer > frequency = new HashMap < > ();
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
while ((line = reader.readLine()) != null) {
line = line.trim();
if (!line.isEmpty()) {
words = line.split("\W+");
for (String word: words) {
String processed = word.toLowerCase();
processed = processed.replace(",", "");
if (frequency.containsKey(processed)) {
frequency.put(processed,
frequency.get(processed) + 1);
} else {
frequency.put(processed, 1);
}
}
}
}
int mostFrequentlyUsed = 0;
String theWord = null;
for (String word: frequency.keySet()) {
Integer theVal = frequency.get(word);
if (theVal > mostFrequentlyUsed) {
mostFrequentlyUsed = theVal;
theWord = word;
} else if (theVal == mostFrequentlyUsed && word.length() <
theWord.length()) {
theWord = word;
mostFrequentlyUsed = theVal;
}
}
System.out.printf(theWord);
}
要让代码以输入的格式而不是小写形式打印最常出现的单词,您可以更改下面的代码行。
String processed = word.toLowerCase();
改为:
String processed = word;
但请注意 containsKey()
方法 区分大小写 并且不会将 "Thanks" 和“谢谢”视为同一个词。
Please find the below program which print both upper and lower case based on input.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
public class Main {
public static void main(String[] args) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(System.in));
String[] strArr=reader.readLine().split(" ");
String result=null;
int maxCount=0;
Map<String, Integer> strMap=new HashMap<String, Integer>();
int count=0;
for(String s:strArr){
count=0;
if(strMap.containsKey(s)){
count=strMap.get(s);
strMap.put(s,++count);
}else{
strMap.put(s, ++count);
}
}
//find Maximum
for(Map.Entry<String, Integer> itr: strMap.entrySet()){
if(maxCount==0){
maxCount=itr.getValue();
result=itr.getKey();
}else{
if(maxCount < itr.getValue()){
maxCount=itr.getValue();
result=itr.getKey();
}
}
}
// No of occurences with count
System.out.println("word"+ result+"count"+ maxCount);
printInLowerOrUpperCare(result);
}
public static void printInLowerOrUpperCare(String result){
if(result.charAt(0) >='a' && result.charAt(0) >= 'z' ){
System.out.println(result.toUpperCase());
}else{
System.out.println(result.toLowerCase());
}
}
}