如何计算 java 中 pdf 文件中的字母数量
how to count the number of alphabet in a pdf file in java
在这个程序中,我需要统计所有字母(a-z)的个数,而下面的代码只能统计"u"的个数。那么如果我要数26,是否需要在for循环中写26个if语句?
输出应该是:
a:389
b:777
c:909
..
..
请给我其他解决方案
String[] words = sourceCode.split(" ");
amountOfWords = amountOfWords + words.length;
for (String word : words) {
amountOfChars = amountOfChars + word.length();
for (int i = 0; i < word.length(); i++) {
if (word.charAt(i) == 'u' || word.charAt(i) == 'U') {
u++;
}
}
}
试试这个:
String str = new PDFTextStripper().getText(doc);
int len = str.length();
Map<Character, Integer> numOfChars = new HashMap<Character, Integer>(Math.min(len, 26));
for (int i = 0; i < len; ++i)//pre increment
{
char alphabet = str.charAt(i);
if (!numOfChars.containsKey(alphabet ))
{
numOfChars.put(alphabet , 1);
}
else
{
numOfChars.put(alphabet , numOfChars.get(alphabet ) + 1);
}
}
System.out.println(numOfChars);
你可以试试这个。我正在使用库:fontbox-2.0.12 和 pdfbox-2.0.12 和 commons-logging-1.2
try {
PDDocument doc = PDDocument.load(new File("E:\project-test\scloud\test\src\main\resources\Shadip_Banik.pdf"));
String text = new PDFTextStripper().getText(doc);
sourceCode = text.replace("-", "").replace(".", "");
String[] words = sourceCode.split(" ");
amountOfWords = amountOfWords + words.length;
StringBuilder builder = new StringBuilder();
for (String word : words) {
amountOfChars = amountOfChars + word.length();
Pattern pattern = Pattern.compile("[a-zA-Z]");
Matcher matcher = pattern.matcher(word);
//System.out.println("MatchesCount "+matcher.group());
while (matcher.find()) {
builder.append(matcher.group());
}
}
String allData = builder.toString();
System.out.println(builder.toString());
int total = 0;
for (int i=0;i<allAlphabate.length();i++)
{
int alphabateCount = 0;
Pattern pattern = Pattern.compile(Character.toString(allAlphabate.charAt(i)));
Matcher matcher = pattern.matcher(allData);
while (matcher.find()) {
alphabateCount++;
}
total+=alphabateCount;
System.out.println(allAlphabate.charAt(i) +" : "+alphabateCount);
}
if(total == builder.toString().length())
{
System.out.println("Yes -------------------------------------------------");
}
System.out.println("Amount of Chars is " + amountOfChars);
System.out.println("Amount of Words is " + (amountOfWords + 1));
System.out.println("Average Word Length is " + (amountOfChars / amountOfWords));
} catch (Exception ert) {
}
但是如果你想一起计算大写和小写,请使用此代码
for (String word : words) {
amountOfChars = amountOfChars + word.length();
Pattern pattern = Pattern.compile("[a-zA-Z]");
Matcher matcher = pattern.matcher(word.toLowerCase());
//System.out.println("MatchesCount "+matcher.group());
while (matcher.find()) {
builder.append(matcher.group());
}
}
String allData = builder.toString();
int total = 0;
for (int i=0;i<allAlphabate.length();i++)
{
int alphabateCount = 0;
Pattern pattern = Pattern.compile(Character.toString(allAlphabate.charAt(i)));
Matcher matcher = pattern.matcher(allData.toLowerCase());
while (matcher.find()) {
alphabateCount++;
}
total+=alphabateCount;
System.out.println(allAlphabate.charAt(i) +" : "+alphabateCount);
}
if(total == builder.toString().length())
{
System.out.println("Yes -------------------------------------------------");
}
在这个程序中,我需要统计所有字母(a-z)的个数,而下面的代码只能统计"u"的个数。那么如果我要数26,是否需要在for循环中写26个if语句?
输出应该是: a:389 b:777 c:909 .. ..
请给我其他解决方案
String[] words = sourceCode.split(" ");
amountOfWords = amountOfWords + words.length;
for (String word : words) {
amountOfChars = amountOfChars + word.length();
for (int i = 0; i < word.length(); i++) {
if (word.charAt(i) == 'u' || word.charAt(i) == 'U') {
u++;
}
}
}
试试这个:
String str = new PDFTextStripper().getText(doc);
int len = str.length();
Map<Character, Integer> numOfChars = new HashMap<Character, Integer>(Math.min(len, 26));
for (int i = 0; i < len; ++i)//pre increment
{
char alphabet = str.charAt(i);
if (!numOfChars.containsKey(alphabet ))
{
numOfChars.put(alphabet , 1);
}
else
{
numOfChars.put(alphabet , numOfChars.get(alphabet ) + 1);
}
}
System.out.println(numOfChars);
你可以试试这个。我正在使用库:fontbox-2.0.12 和 pdfbox-2.0.12 和 commons-logging-1.2
try {
PDDocument doc = PDDocument.load(new File("E:\project-test\scloud\test\src\main\resources\Shadip_Banik.pdf"));
String text = new PDFTextStripper().getText(doc);
sourceCode = text.replace("-", "").replace(".", "");
String[] words = sourceCode.split(" ");
amountOfWords = amountOfWords + words.length;
StringBuilder builder = new StringBuilder();
for (String word : words) {
amountOfChars = amountOfChars + word.length();
Pattern pattern = Pattern.compile("[a-zA-Z]");
Matcher matcher = pattern.matcher(word);
//System.out.println("MatchesCount "+matcher.group());
while (matcher.find()) {
builder.append(matcher.group());
}
}
String allData = builder.toString();
System.out.println(builder.toString());
int total = 0;
for (int i=0;i<allAlphabate.length();i++)
{
int alphabateCount = 0;
Pattern pattern = Pattern.compile(Character.toString(allAlphabate.charAt(i)));
Matcher matcher = pattern.matcher(allData);
while (matcher.find()) {
alphabateCount++;
}
total+=alphabateCount;
System.out.println(allAlphabate.charAt(i) +" : "+alphabateCount);
}
if(total == builder.toString().length())
{
System.out.println("Yes -------------------------------------------------");
}
System.out.println("Amount of Chars is " + amountOfChars);
System.out.println("Amount of Words is " + (amountOfWords + 1));
System.out.println("Average Word Length is " + (amountOfChars / amountOfWords));
} catch (Exception ert) {
}
但是如果你想一起计算大写和小写,请使用此代码
for (String word : words) {
amountOfChars = amountOfChars + word.length();
Pattern pattern = Pattern.compile("[a-zA-Z]");
Matcher matcher = pattern.matcher(word.toLowerCase());
//System.out.println("MatchesCount "+matcher.group());
while (matcher.find()) {
builder.append(matcher.group());
}
}
String allData = builder.toString();
int total = 0;
for (int i=0;i<allAlphabate.length();i++)
{
int alphabateCount = 0;
Pattern pattern = Pattern.compile(Character.toString(allAlphabate.charAt(i)));
Matcher matcher = pattern.matcher(allData.toLowerCase());
while (matcher.find()) {
alphabateCount++;
}
total+=alphabateCount;
System.out.println(allAlphabate.charAt(i) +" : "+alphabateCount);
}
if(total == builder.toString().length())
{
System.out.println("Yes -------------------------------------------------");
}