具有特殊字符的 PDFBox 在 Windows 上工作正常,但字符在 Linux 上被其他字符替换
PDFBox with special characters working fine on Windows but characters getting replaced with other characters in Linux
我的要求是我有一个带有少量文本字段的 PDF Acroform 模板,并且基于登录用户,这些文本字段预先填充了用户名和城市并呈现给用户(注意用户不是手动输入数据到PDF 但我的后端代码正在这样做)。
所以这些名称和城市字段给非英语字符带来了问题。
奇怪的是这在我的 loca (windows 10) 上工作得很好。然而,当代码部署在 lunix 机器上时,那些非英文字符将被替换为其他一些字符。
我正在使用 pdfbox-app 版本 2.0.24
我的类路径中有这种字体 Arial_Narrow.ttf。 (解决另一个问题 - java.lang.IllegalArgumentException:...在此字体的编码中不可用:WinAnsiEncoding)
我的假设是因为此 Arial_Narrow.ttf 字体具有 WinAnsiEncoding,它可以在 Windows 10 机器上运行,但在部署到 Lunix 机器上时会出现问题。
请帮忙!
package com.example.demo;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextField;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class PDFMailMergeUtil {
/**
* constants used to check or uncheck a checkbox in the PDF
*/
private static final String CHECKBOX_VALUE_ON = "ON";
private static final String CHECKBOX_VALUE_OFF = "OFF";
/**
* This method takes PDF Template and a map of PDF Form field names and their values,
* and returns Actual PDF Document with appropriate values.
* Consider templateContent as Class and returned data as Object of that cklass.
*
* @param templateContent PDF Template
* @param pdf_fieldName_fieldValue_map PDF form field names and their values
* @return Actual PDF document with filled values
*/
public static byte[] createPdfDocumentFromPdfTemplate(byte[] templateContent, Map<String, String> pdf_fieldName_fieldValue_map) {
byte[] mailMergedPDFContent = null;
// validation
if (templateContent == null) {
System.out.println ("PDF Template Content is null.");
return null;
}
ByteArrayOutputStream out = null;
PDDocument pdDoc = null;
try {
pdDoc = PDDocument.load(templateContent);
PDAcroForm pdAcroForm = pdDoc.getDocumentCatalog().getAcroForm();
if (pdAcroForm == null) {
System.out.println("No Form Field present in the PDF Template.");
} else {
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf")); // I have this Arial_Narrow.ttf font in my resources folder so available in classpath
PDResources res = pdAcroForm.getDefaultResources();
String fontName = res.add(font).getName();
String defaultAppearanceString = "/" + fontName + " 10 Tf 0 g";
Iterator<PDField> fieldsIterator = pdAcroForm.getFieldIterator();
while (fieldsIterator.hasNext()) {
PDField pdfield = fieldsIterator.next();
String formFieldName = pdfield.getFullyQualifiedName();
// check if fieldName-Value Map contains the form field name in template
if (!pdf_fieldName_fieldValue_map.containsKey(formFieldName)) {
continue;
}
// We are here - means the PDF Acro Form Field name is present in our name-value map
// get field value from map
String formFieldValue = pdf_fieldName_fieldValue_map.get(formFieldName);
if (pdfield instanceof PDTextField) {
// if the PDF Form field is a Text Field
((PDTextField) pdfield).setDefaultAppearance(defaultAppearanceString);
pdfield.setValue(formFieldValue);
pdfield.setReadOnly(true);
} else if (pdfield instanceof PDRadioButton) {
PDRadioButton pdRadioButton = (PDRadioButton)pdfield;
// if the PDF Form field is a Radio Button
Set<String> allowedValues = pdRadioButton.getOnValues();
if (allowedValues != null && !allowedValues.isEmpty() && allowedValues.contains(formFieldValue)) {
pdfield.setValue(formFieldValue);
} else {
System.out.println("PDF Form Field with name '" + formFieldName + "' received value as '" + formFieldValue
+ "'. However allowed values for this field are " + allowedValues);
}
pdfield.setReadOnly(true);
} else if (pdfield instanceof PDCheckBox) {
// if the PDF Form field is a Checkbox
PDCheckBox pdCheckBox = (PDCheckBox)pdfield;
if (CHECKBOX_VALUE_ON.equalsIgnoreCase(formFieldValue)) {
pdCheckBox.check();
} else if (CHECKBOX_VALUE_OFF.equalsIgnoreCase(formFieldValue)) {
pdCheckBox.unCheck();
}
pdCheckBox.setReadOnly(true);
}
}
// extract to output file byte[]
out = new ByteArrayOutputStream();
pdDoc.save(out);
mailMergedPDFContent = out.toByteArray();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// clear resources
try {
if (pdDoc != null) pdDoc.close();
if (out != null) out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return mailMergedPDFContent;
}
}
Input to above utility class is
byte[] template = Files.readAllBytes(Paths.get("SomePDFTemplate.pdf"));
// set field name value map
Map<String, String> map = new HashMap<>();
map.put("Signing_Place", "İstanbul, Poznań, Łodź"); // these wierd characters not rendering properly when code runs on Lunix server
map.put("Participant_Name","Test Präjakta");
map.put("Radio_Button_Group","RB_Item_3"); // valid values are: [RB_Item_1, RB_Item_2, RB_Item_3]
map.put("CB_Item_1","OFF");
map.put("CB_Item_2","ON");
// create document
byte[] pdfDoc = PDFMailMergeUtil.createPdfDocumentFromPdfTemplate(template, map);
// save document as .pdf
try (FileOutputStream fos = new FileOutputStream("C:\data\Projects\demo\src\test\resources\Test_PROD.pdf")) {
fos.write(pdfDoc);
}
改变这个
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"));
至此
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"), false);
避免子集化。 IIRC 这是因为子集字体中的字体文件在您使用它时并不真正存在,因为您使用的对象是不同的 PDFont 对象。
我的要求是我有一个带有少量文本字段的 PDF Acroform 模板,并且基于登录用户,这些文本字段预先填充了用户名和城市并呈现给用户(注意用户不是手动输入数据到PDF 但我的后端代码正在这样做)。 所以这些名称和城市字段给非英语字符带来了问题。 奇怪的是这在我的 loca (windows 10) 上工作得很好。然而,当代码部署在 lunix 机器上时,那些非英文字符将被替换为其他一些字符。
我正在使用 pdfbox-app 版本 2.0.24
我的类路径中有这种字体 Arial_Narrow.ttf。 (解决另一个问题 - java.lang.IllegalArgumentException:...在此字体的编码中不可用:WinAnsiEncoding)
我的假设是因为此 Arial_Narrow.ttf 字体具有 WinAnsiEncoding,它可以在 Windows 10 机器上运行,但在部署到 Lunix 机器上时会出现问题。
请帮忙!
package com.example.demo;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.pdmodel.interactive.form.PDCheckBox;
import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.apache.pdfbox.pdmodel.interactive.form.PDRadioButton;
import org.apache.pdfbox.pdmodel.interactive.form.PDTextField;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
public class PDFMailMergeUtil {
/**
* constants used to check or uncheck a checkbox in the PDF
*/
private static final String CHECKBOX_VALUE_ON = "ON";
private static final String CHECKBOX_VALUE_OFF = "OFF";
/**
* This method takes PDF Template and a map of PDF Form field names and their values,
* and returns Actual PDF Document with appropriate values.
* Consider templateContent as Class and returned data as Object of that cklass.
*
* @param templateContent PDF Template
* @param pdf_fieldName_fieldValue_map PDF form field names and their values
* @return Actual PDF document with filled values
*/
public static byte[] createPdfDocumentFromPdfTemplate(byte[] templateContent, Map<String, String> pdf_fieldName_fieldValue_map) {
byte[] mailMergedPDFContent = null;
// validation
if (templateContent == null) {
System.out.println ("PDF Template Content is null.");
return null;
}
ByteArrayOutputStream out = null;
PDDocument pdDoc = null;
try {
pdDoc = PDDocument.load(templateContent);
PDAcroForm pdAcroForm = pdDoc.getDocumentCatalog().getAcroForm();
if (pdAcroForm == null) {
System.out.println("No Form Field present in the PDF Template.");
} else {
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf")); // I have this Arial_Narrow.ttf font in my resources folder so available in classpath
PDResources res = pdAcroForm.getDefaultResources();
String fontName = res.add(font).getName();
String defaultAppearanceString = "/" + fontName + " 10 Tf 0 g";
Iterator<PDField> fieldsIterator = pdAcroForm.getFieldIterator();
while (fieldsIterator.hasNext()) {
PDField pdfield = fieldsIterator.next();
String formFieldName = pdfield.getFullyQualifiedName();
// check if fieldName-Value Map contains the form field name in template
if (!pdf_fieldName_fieldValue_map.containsKey(formFieldName)) {
continue;
}
// We are here - means the PDF Acro Form Field name is present in our name-value map
// get field value from map
String formFieldValue = pdf_fieldName_fieldValue_map.get(formFieldName);
if (pdfield instanceof PDTextField) {
// if the PDF Form field is a Text Field
((PDTextField) pdfield).setDefaultAppearance(defaultAppearanceString);
pdfield.setValue(formFieldValue);
pdfield.setReadOnly(true);
} else if (pdfield instanceof PDRadioButton) {
PDRadioButton pdRadioButton = (PDRadioButton)pdfield;
// if the PDF Form field is a Radio Button
Set<String> allowedValues = pdRadioButton.getOnValues();
if (allowedValues != null && !allowedValues.isEmpty() && allowedValues.contains(formFieldValue)) {
pdfield.setValue(formFieldValue);
} else {
System.out.println("PDF Form Field with name '" + formFieldName + "' received value as '" + formFieldValue
+ "'. However allowed values for this field are " + allowedValues);
}
pdfield.setReadOnly(true);
} else if (pdfield instanceof PDCheckBox) {
// if the PDF Form field is a Checkbox
PDCheckBox pdCheckBox = (PDCheckBox)pdfield;
if (CHECKBOX_VALUE_ON.equalsIgnoreCase(formFieldValue)) {
pdCheckBox.check();
} else if (CHECKBOX_VALUE_OFF.equalsIgnoreCase(formFieldValue)) {
pdCheckBox.unCheck();
}
pdCheckBox.setReadOnly(true);
}
}
// extract to output file byte[]
out = new ByteArrayOutputStream();
pdDoc.save(out);
mailMergedPDFContent = out.toByteArray();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// clear resources
try {
if (pdDoc != null) pdDoc.close();
if (out != null) out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return mailMergedPDFContent;
}
}
Input to above utility class is
byte[] template = Files.readAllBytes(Paths.get("SomePDFTemplate.pdf"));
// set field name value map
Map<String, String> map = new HashMap<>();
map.put("Signing_Place", "İstanbul, Poznań, Łodź"); // these wierd characters not rendering properly when code runs on Lunix server
map.put("Participant_Name","Test Präjakta");
map.put("Radio_Button_Group","RB_Item_3"); // valid values are: [RB_Item_1, RB_Item_2, RB_Item_3]
map.put("CB_Item_1","OFF");
map.put("CB_Item_2","ON");
// create document
byte[] pdfDoc = PDFMailMergeUtil.createPdfDocumentFromPdfTemplate(template, map);
// save document as .pdf
try (FileOutputStream fos = new FileOutputStream("C:\data\Projects\demo\src\test\resources\Test_PROD.pdf")) {
fos.write(pdfDoc);
}
改变这个
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"));
至此
PDType0Font font = PDType0Font.load(pdDoc, PDFMailMergeUtil.class.getResourceAsStream("/Arial_Narrow.ttf"), false);
避免子集化。 IIRC 这是因为子集字体中的字体文件在您使用它时并不真正存在,因为您使用的对象是不同的 PDFont 对象。