itextpdf HTML 到包含西里尔字母的 PDF
itextpdf HTML to PDF containing Cyrillic letters
我已经问过关于这个问题的另一个问题,但我无法让它工作。我更改了我的代码,所以现在它是这样的:
import java.io.FileOutputStream;
import java.io.StringReader;
import com.itextpdf.text.Document;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
public class HTM {
public static void main(String ... args ) {
try {
Document document = new Document(PageSize.LETTER);
PdfWriter pdfWriter = PdfWriter.getInstance
(document, new FileOutputStream("C:\testpdf.pdf"));
document.open();
XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
String htmlString = "<html><head>"
+ "<meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />"
+ "</head><body>"
+ "<h1>Zdravo Кристијан!</h1>"
+ "</body></html>";
worker.parseXHtml(pdfWriter, document, new StringReader(htmlString));
document.close();
System.out.println("Done.");
}
catch (Exception e) {
e.printStackTrace();
}
}
}
我的问题是 pdf 不显示西里尔字符。我知道如何使用不同的字符集和字体制作简单的 pdf,但我想将 html 文件或字符串(在我的例子中是 html 字符串)转换为 pdf。提前致谢。
根据@bruno-lowagie 的评论,只需对您发布的代码进行少量更改即可使其在 Windows 上运行。有关如何指定特定字体的更多信息,请查看 Bruno 提出的示例。
public class HTM {
public static void main(String ... args ) {
try {
Document document = new Document(PageSize.LETTER);
PdfWriter pdfWriter = PdfWriter.getInstance(document, new FileOutputStream("testpdf.pdf"));
document.open();
XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
String htmlString = "<html><head>"
+ "<meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />"
+ "</head><body>"
+ "<p style=\"font-family:courier new\">" // the font to use
+ "<h1>Zdravo Кристијан!</h1>"
+ "</p>"
+ "<h1>Zdravo Кристијан!</h1>"
+ "</body></html>";
worker.parseXHtml(pdfWriter, document, new StringReader(htmlString));
document.close();
System.out.println("Done.");
}
catch (Exception e) {
e.printStackTrace();
}
}
}
我尝试了很多东西,但每次我都会遗漏一些东西。感谢@B运行oLowagie 和@SubOptimal。这是我为自定义字体制作的代码 运行。它还包含一个简单的 html 作为字符串,但是(在注释中)显示了如何使用实际的 html 和 css 文件来完成。
public class HtmlToPdf {
public static final String DEST = "/home/christian/Desktop/testDoc.pdf";
public void createPdf(String file) throws IOException, DocumentException {
// step 1
Document document = new Document();
// step 2
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
writer.setInitialLeading(12.5f);
// step 3
document.open();
// step 4
// CSS
CSSResolver cssResolver = new StyleAttrCSSResolver();
// CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
// cssResolver.addCss(cssFile);
// HTML
XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
fontProvider.register("fonts/Arimo-Regular.ttf");
fontProvider.register("fonts/Arimo-Bold.ttf");
fontProvider.register("fonts/Arimo-Italic.ttf");
fontProvider.addFontSubstitute("lowagie", "Arimo");
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
// Pipelines
PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
// XML Worker
XMLWorker worker = new XMLWorker(css, true);
XMLParser p = new XMLParser(worker);
// p.parse(new FileInputStream(HTML));
String htmlContent = " HERE GOES HTML CODE ";
p.parse(new StringReader(htmlContent));
// step 5
document.close();
}
public static void main(String[] args) throws IOException, DocumentException {
new D06_ParseHtmlFonts().createPdf(DEST);
}
}
我注意到在 css/html 中使用 font-family: actual font that supports wished encoding;
很重要,并且电子邮件客户端始终使用内联 css.
我已经问过关于这个问题的另一个问题,但我无法让它工作。我更改了我的代码,所以现在它是这样的:
import java.io.FileOutputStream;
import java.io.StringReader;
import com.itextpdf.text.Document;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
public class HTM {
public static void main(String ... args ) {
try {
Document document = new Document(PageSize.LETTER);
PdfWriter pdfWriter = PdfWriter.getInstance
(document, new FileOutputStream("C:\testpdf.pdf"));
document.open();
XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
String htmlString = "<html><head>"
+ "<meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />"
+ "</head><body>"
+ "<h1>Zdravo Кристијан!</h1>"
+ "</body></html>";
worker.parseXHtml(pdfWriter, document, new StringReader(htmlString));
document.close();
System.out.println("Done.");
}
catch (Exception e) {
e.printStackTrace();
}
}
}
我的问题是 pdf 不显示西里尔字符。我知道如何使用不同的字符集和字体制作简单的 pdf,但我想将 html 文件或字符串(在我的例子中是 html 字符串)转换为 pdf。提前致谢。
根据@bruno-lowagie 的评论,只需对您发布的代码进行少量更改即可使其在 Windows 上运行。有关如何指定特定字体的更多信息,请查看 Bruno 提出的示例。
public class HTM {
public static void main(String ... args ) {
try {
Document document = new Document(PageSize.LETTER);
PdfWriter pdfWriter = PdfWriter.getInstance(document, new FileOutputStream("testpdf.pdf"));
document.open();
XMLWorkerHelper worker = XMLWorkerHelper.getInstance();
String htmlString = "<html><head>"
+ "<meta http-equiv=\"content-type\" content=\"application/xhtml+xml; charset=UTF-8\" />"
+ "</head><body>"
+ "<p style=\"font-family:courier new\">" // the font to use
+ "<h1>Zdravo Кристијан!</h1>"
+ "</p>"
+ "<h1>Zdravo Кристијан!</h1>"
+ "</body></html>";
worker.parseXHtml(pdfWriter, document, new StringReader(htmlString));
document.close();
System.out.println("Done.");
}
catch (Exception e) {
e.printStackTrace();
}
}
}
我尝试了很多东西,但每次我都会遗漏一些东西。感谢@B运行oLowagie 和@SubOptimal。这是我为自定义字体制作的代码 运行。它还包含一个简单的 html 作为字符串,但是(在注释中)显示了如何使用实际的 html 和 css 文件来完成。
public class HtmlToPdf {
public static final String DEST = "/home/christian/Desktop/testDoc.pdf";
public void createPdf(String file) throws IOException, DocumentException {
// step 1
Document document = new Document();
// step 2
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(file));
writer.setInitialLeading(12.5f);
// step 3
document.open();
// step 4
// CSS
CSSResolver cssResolver = new StyleAttrCSSResolver();
// CssFile cssFile = XMLWorkerHelper.getCSS(new FileInputStream(CSS));
// cssResolver.addCss(cssFile);
// HTML
XMLWorkerFontProvider fontProvider = new XMLWorkerFontProvider(XMLWorkerFontProvider.DONTLOOKFORFONTS);
fontProvider.register("fonts/Arimo-Regular.ttf");
fontProvider.register("fonts/Arimo-Bold.ttf");
fontProvider.register("fonts/Arimo-Italic.ttf");
fontProvider.addFontSubstitute("lowagie", "Arimo");
CssAppliers cssAppliers = new CssAppliersImpl(fontProvider);
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
// Pipelines
PdfWriterPipeline pdf = new PdfWriterPipeline(document, writer);
HtmlPipeline html = new HtmlPipeline(htmlContext, pdf);
CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
// XML Worker
XMLWorker worker = new XMLWorker(css, true);
XMLParser p = new XMLParser(worker);
// p.parse(new FileInputStream(HTML));
String htmlContent = " HERE GOES HTML CODE ";
p.parse(new StringReader(htmlContent));
// step 5
document.close();
}
public static void main(String[] args) throws IOException, DocumentException {
new D06_ParseHtmlFonts().createPdf(DEST);
}
}
我注意到在 css/html 中使用 font-family: actual font that supports wished encoding;
很重要,并且电子邮件客户端始终使用内联 css.