如何使用 POI 流 (SAX XML) 从 Excel 中提取日期作为日期而非文本

How to extract Date as Date Not As Text from Excel using POI Streaming (SAX XML)

我正在 Web 应用程序中解析 excel (xlsx) 工作表。我正在使用 Apache POI,Streaming API 因为我有大文件。现在 Excel 在内部将日期存储为数字,例如我得到 42035 而不是 1/31/2015。 因为我正在使用 XML 解析,所以我无法访问 POI 提供的日期格式化方法。有什么想法吗?

这是解析器(来自本例 https://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api,但已更改):

/**
 * See org.xml.sax.helpers.DefaultHandler javadocs
 */
private static class SheetHandler extends DefaultHandler {

    private SharedStringsTable table;
    private String lastContents;
    private boolean nextIsString;
    boolean isFirstRow = true;
    private int quantityOfColumns;
    private int currentColumnNumber = 1;
    int currentRowNumber = 1;
    private int rowNumberOfLastCell = 1;
    private DataSet data = new DataSet();
    private Tuple tuple;

    static final Logger LOG = LoggerFactory.getLogger(SheetHandler.class);

    private SheetHandler(SharedStringsTable sst) {
        this.table = sst;
        LOG.debug("Sheethandler created");
    }

    @Override
    public void startElement(String uri, String localName, String name,
            Attributes attributes) throws SAXException {
        // c => cell
        LOG.debug("Reading element of type {} ", name);
        if (name.equals("c")) {
            rowNumberOfLastCell = currentRowNumber;
            String cellType = attributes.getValue("t");
            LOG.debug("Extracting cell {} with type {}", attributes.getValue("r"), cellType);
            currentRowNumber = extractIntFromString(attributes.getValue("r"));
            if (isFinishedRowHeaderRow()) {
                extractHeaders();
            }
            // Figure out if the value is an index in the SST (Static String Table)
            if (cellType != null && cellType.equals("s")) {
                System.out.println("We have content!");
                nextIsString = true;
            } else {
                nextIsString = false;
            }
        }
        // Clear contents cache
        lastContents = "";
    }

    private boolean isFinishedRowHeaderRow() {
        return (rowNumberOfLastCell != currentRowNumber) && isFirstRow;
    }

    private void extractHeaders() {
        quantityOfColumns = data.getHeaders().size();
        LOG.debug("{} rows detected", quantityOfColumns); 
        LOG.debug("{} headers parsed", data.getHeaders().size());
        isFirstRow = false;
        currentColumnNumber = 1;
        tuple = new Tuple(quantityOfColumns);
    }

    @Override
    public void characters(char[] ch, int start, int length)
            throws SAXException {
        lastContents += new String(ch, start, length);
        LOG.debug("Extracted value \"{}\"", lastContents);
    }

    @Override
    public void endElement(String uri, String localName, String name)
            throws SAXException {
        // Process the last contents as required.
        // Do now, as characters() may be called more than once
        if (nextIsString) {
            extractStringFromCell();
        }
        // v => contents of a cell
        // Output after we've seen the string contents
        if (name.equals("v")) {
            addExtractedValueToDataSet();
            LOG.debug("Ended parsing of column {}", currentColumnNumber);
            if (currentColumnNumber == (quantityOfColumns)) {
                concludeRow();
            } else {
                currentColumnNumber++;
            }
        }
    }

    private void concludeRow() {
        data.addRow(tuple);
        LOG.debug("Row added to DataSet");
        tuple = new Tuple(quantityOfColumns);
        currentColumnNumber = 1;
        LOG.debug("Row {} parsed", rowNumberOfLastCell);
    }

    private void addExtractedValueToDataSet() {
        if (isFirstRow) {
            data.getHeaders().add(lastContents);
        } else {
            tuple.getRowEntries()[currentColumnNumber - 1] = lastContents;
        }
    }

    private void extractStringFromCell() throws NumberFormatException {
        int idx = Integer.parseInt(lastContents);
        lastContents = new XSSFRichTextString(table.getEntryAt(idx)).toString();
        LOG.debug("Extracted value \"{}\"", lastContents);
        nextIsString = false;
    }

    public int extractIntFromString(String original) {
        char[] chars = original.toCharArray();
        for (int i = 0; i < chars.length; i++) {
            try {
                return Integer.valueOf(original.substring(i, original.length()));
            } catch (NumberFormatException ex) {
            }
        }
        return 0;
    }

    public DataSet getData() {
        return data;
    }
}

@Gagravarr 感谢您的回复,由于某种原因我忽略了这一点,这帮助我解决了我的问题:

https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java