JodConverter 在 java 中不支持 docx 文件

JodConverter does not support docx file in java

我正在使用 JodConverter 将我的 .docx (Microsoft Office) 文件转换为 pdf,但不知何故它不会转换并给我 error.When 如果我尝试将 .doc 文档转换为 pdf 那么它可以工作 fine.I 我正在使用 maven JodConverter 插件版本 2.2.0 .

我的问题是是否可以使用 JODCoverter 将 .docx 文件转换为 pdf?

import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.io.FilenameUtils;

import com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.DocumentFormat;
import com.artofsolving.jodconverter.DocumentFormatRegistry;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.sun.star.beans.PropertyValue;
import com.sun.star.lang.XComponent;
import com.sun.star.uno.UnoRuntime;
import com.sun.star.util.XRefreshable;

public abstract class AbstractOpenOfficeDocumentConverter implements DocumentConverter {

    private final Map/*<String,Object>*/ defaultLoadProperties;

    protected OpenOfficeConnection openOfficeConnection;
    private DocumentFormatRegistry documentFormatRegistry;

    public AbstractOpenOfficeDocumentConverter(OpenOfficeConnection connection) {
        this(connection, new DefaultDocumentFormatRegistry());
    }

    public AbstractOpenOfficeDocumentConverter(OpenOfficeConnection openOfficeConnection, DocumentFormatRegistry documentFormatRegistry) {
        this.openOfficeConnection = openOfficeConnection;
        this.documentFormatRegistry = documentFormatRegistry;

        defaultLoadProperties = new HashMap();
        defaultLoadProperties.put("Hidden", Boolean.TRUE);
        defaultLoadProperties.put("ReadOnly", Boolean.TRUE);
    }

    public void setDefaultLoadProperty(String name, Object value) {
        defaultLoadProperties.put(name, value);
    }

    protected Map getDefaultLoadProperties() {
        return defaultLoadProperties;
    }

    protected DocumentFormatRegistry getDocumentFormatRegistry() {
        return documentFormatRegistry;
    }

    public void convert(File inputFile, File outputFile) {
        convert(inputFile, outputFile, null);
    }

    public void convert(File inputFile, File outputFile, DocumentFormat outputFormat) {
        convert(inputFile, null, outputFile, outputFormat);
    }

    public void convert(InputStream inputStream, DocumentFormat inputFormat, OutputStream outputStream, DocumentFormat outputFormat) {
        ensureNotNull("inputStream", inputStream);
        ensureNotNull("inputFormat", inputFormat);
        ensureNotNull("outputStream", outputStream);
        ensureNotNull("outputFormat", outputFormat);
        convertInternal(inputStream, inputFormat, outputStream, outputFormat);
    }

    public void convert(File inputFile, DocumentFormat inputFormat, File outputFile, DocumentFormat outputFormat) {
        ensureNotNull("inputFile", inputFile);
        ensureNotNull("outputFile", outputFile);

        if (!inputFile.exists()) {
            throw new IllegalArgumentException("inputFile doesn't exist: " + inputFile);
        }
        if (inputFormat == null) {
            inputFormat = guessDocumentFormat(inputFile);
        }
        if (outputFormat == null) {
            outputFormat = guessDocumentFormat(outputFile);
        }
        if (!inputFormat.isImportable()) {
            throw new IllegalArgumentException("unsupported input format: " + inputFormat.getName());
        }
        if (!inputFormat.isExportableTo(outputFormat)) {
            throw new IllegalArgumentException("unsupported conversion: from " + inputFormat.getName() + " to " + outputFormat.getName());
        }
        convertInternal(inputFile, inputFormat, outputFile, outputFormat);
    }

    protected abstract void convertInternal(InputStream inputStream, DocumentFormat inputFormat, OutputStream outputStream, DocumentFormat outputFormat);

    protected abstract void convertInternal(File inputFile, DocumentFormat inputFormat, File outputFile, DocumentFormat outputFormat);

    private void ensureNotNull(String argumentName, Object argumentValue) {
        if (argumentValue == null) {
            throw new IllegalArgumentException(argumentName + " is null");
        }
    }

    private DocumentFormat guessDocumentFormat(File file) {
        String extension = FilenameUtils.getExtension(file.getName());
        DocumentFormat format = getDocumentFormatRegistry().getFormatByFileExtension(extension);
        if (format == null) {
            throw new IllegalArgumentException("unknown document format for file: " + file);
        }
        return format;
    }

    protected void refreshDocument(XComponent document) {
        XRefreshable refreshable = (XRefreshable) UnoRuntime.queryInterface(XRefreshable.class, document);
        if (refreshable != null) {
            refreshable.refresh();
        }
    }

    protected static PropertyValue property(String name, Object value) {
        PropertyValue property = new PropertyValue();
        property.Name = name;
        property.Value = value;
        return property;
    }

    protected static PropertyValue[] toPropertyValues(Map/*<String,Object>*/ properties) {
        PropertyValue[] propertyValues = new PropertyValue[properties.size()];
        int i = 0;
        for (Iterator iter = properties.entrySet().iterator(); iter.hasNext();) {
            Map.Entry entry = (Map.Entry) iter.next();
            Object value = entry.getValue();
            if (value instanceof Map) {
                // recursively convert nested Map to PropertyValue[]
                Map subProperties = (Map) value;
                value = toPropertyValues(subProperties);
            }
            propertyValues[i++] = property((String) entry.getKey(), value);
        }
        return propertyValues;
    }
}

JodConverter 2.2.1 版无法猜测 docx 的文档格式。虽然您可以将输入文件的文档格式作为参数传递。

final DocumentFormat docx = new DocumentFormat("Microsoft Word 2007 XML", DocumentFamily.TEXT, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx");
converter.convert(inputFile, docx, outputFile, null);

免责声明:我拥有 jodconverter.org 域并在 GitHub 上分叉了原始项目。

晚会有点晚了,但您可以使用 Maven 上提供的原始项目的分支:

JODConverter

它支持 docx 和 xlsx 格式。

如果您还在使用旧的JodConverter 2.2.1 并且不想升级它,您可以将docx 文档格式添加到注册表中。代码将如下所示:

DefaultDocumentFormatRegistry documentFormatRegistry= new DefaultDocumentFormatRegistry();
DocumentFormat docx = new DocumentFormat("Microsoft Word 2007 XML", "application/vnd.openxmlformats-officedocument.wordprocessingml.document","docx");
docx.setExportFilter(DocumentFamily.TEXT,"MS Word 2007 XML");
documentFormatRegistry.addDocumentFormat(docx);
OpenOfficeDocumentConverter converter = new OpenOfficeDocumentConverter(connection,documentFormatRegistry);