我正在使用 pdfbox-app-2.0.0-RC3 但在 PDF 解析器中使用 RndomAccessFile 时仍然出现错误
I am using pdfbox-app-2.0.0-RC3 but still I am getting Error while using RndomAccessFile in PDF parser
-您可以通过此 link 查看示例:
http://radixcode.com/pdfbox-example-code-how-to-extract-text-from-pdf-file-with-java/
import java.io.IOException;
public class JavaPDFTest {
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
pdfManger.setFilePath("E:\test.pdf");
System.out.println(pdfManager.ToText());
}
}
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class PDFManager {
private PDFParser parser;
private PDFTextStripper pdfStripper;
private PDDocument pdDoc ;
private COSDocument cosDoc ;
private String Text ;
private String filePath;
private File file;
public PDFManager() {
}
public String ToText() throws IOException
{
this.pdfStripper = null;
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file,"r")); // update for PDFBox V 2.0
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
pdDoc.getNumberOfPages();
pdfStripper.setStartPage(1);
pdfStripper.setEndPage(10);
// reading text from page 1 to 10
// if you want to get text from full pdf file use this code
// pdfStripper.setEndPage(pdDoc.getNumberOfPages());
Text = pdfStripper.getText(pdDoc);
return Text;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
}
错误
Exception in thread "main" java.lang.ClassCastException: java.io.RandomAccessFile cannot be cast to org.apache.pdfbox.io.RandomAccessRead
at aechaec.PDFManager.ToText(PDFManager.java:43)
at aechaec.AechAEC.main(AechAEC.java:25)
Java Result: 1
是不是安全权限引起的?因为我在 mac?
上使用 Netbeans
周围有很多过时的例子,它们可能有效也可能无效。请替换此代码
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file,"r"));
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
这段代码:
pdDoc = PDDocument.load(new File(filePath));
pdfStripper = new PDFTextStripper();
并更新到2.0的发布版本。
-您可以通过此 link 查看示例: http://radixcode.com/pdfbox-example-code-how-to-extract-text-from-pdf-file-with-java/
import java.io.IOException;
public class JavaPDFTest {
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
pdfManger.setFilePath("E:\test.pdf");
System.out.println(pdfManager.ToText());
}
}
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
public class PDFManager {
private PDFParser parser;
private PDFTextStripper pdfStripper;
private PDDocument pdDoc ;
private COSDocument cosDoc ;
private String Text ;
private String filePath;
private File file;
public PDFManager() {
}
public String ToText() throws IOException
{
this.pdfStripper = null;
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file,"r")); // update for PDFBox V 2.0
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
pdDoc.getNumberOfPages();
pdfStripper.setStartPage(1);
pdfStripper.setEndPage(10);
// reading text from page 1 to 10
// if you want to get text from full pdf file use this code
// pdfStripper.setEndPage(pdDoc.getNumberOfPages());
Text = pdfStripper.getText(pdDoc);
return Text;
}
public void setFilePath(String filePath) {
this.filePath = filePath;
}
}
错误
Exception in thread "main" java.lang.ClassCastException: java.io.RandomAccessFile cannot be cast to org.apache.pdfbox.io.RandomAccessRead
at aechaec.PDFManager.ToText(PDFManager.java:43)
at aechaec.AechAEC.main(AechAEC.java:25)
Java Result: 1
是不是安全权限引起的?因为我在 mac?
上使用 Netbeans周围有很多过时的例子,它们可能有效也可能无效。请替换此代码
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file,"r"));
parser.parse();
cosDoc = parser.getDocument();
pdfStripper = new PDFTextStripper();
pdDoc = new PDDocument(cosDoc);
这段代码:
pdDoc = PDDocument.load(new File(filePath));
pdfStripper = new PDFTextStripper();
并更新到2.0的发布版本。