Apache POI SAX XSSFReader 读取错误的日期格式
Apache POI SAX XSSFReader reads wrong date format
Apache POI SAX reader 实现类似于这个众所周知的示例 https://github.com/pjfanning/poi-shared-strings-sample/blob/master/src/main/java/com/github/pjfanning/poi/sample/XLSX2CSV.java 读取一些日期值,而不是它们在 excel 中显示的日期值,尽管它应该读取 "formatted value".
excel 文件中的值:1/1/2019,"formatted value" 由 reader 读取:1/ 1/19.
知道为什么会有差异吗?
Apache POI 版本 3.17
Reader代码:
package com.lopuch.sk.lita.is.importer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import com.lopuch.sk.lita.is.importer.fileImport.ExcelRowReadListener;
public class ExcelSaxImporter {
private static final Logger logger = Logger.getLogger(ExcelSaxImporter.class);
private ExcelRowReadListener listener;
public void setOnRowRead(ExcelRowReadListener listener) {
this.listener = listener;
}
public ExcelRowReadListener getListener() {
return listener;
};
public void process(byte[] fileByteArray)
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ZipSecureFile.setMinInflateRatio(0.0d);
OPCPackage opcpPackage = OPCPackage.open(new ByteArrayInputStream(fileByteArray));
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcpPackage);
XSSFReader xssfReader = new XSSFReader(opcpPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (iter.hasNext()) {
InputStream stream = iter.next();
processSheet(styles, strings, getHandler(), stream);
stream.close();
}
}
private SheetContentsHandler getHandler() {
return new SheetContentsHandler() {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
// Maps column Letter name to its value.
// Does not contain key-value pair if cell value is null for
// currently
// processed column and row.
private Map<String, String> rowValues;
@Override
public void startRow(int rowNum) {
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
rowValues = new HashMap<String, String>();
}
@Override
public void endRow(int rowNum) {
if (rowValues.keySet().size() == 0) {
logger.trace("Skipping calling rowRead() because of empty row");
} else {
ExcelSaxImporter.this.getListener().rowRead(rowValues);
}
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
}
// gracefully handle missing CellRef here in a similar way
// as XSSFCell does
if (cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
currentCol = thisCol;
cellReference = cellReference.replaceAll("\d","");
rowValues.put(cellReference, formattedValue);
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
};
}
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
}
excel 显示的值与 Apache POI 读取的值的差异来自对用户语言设置作出反应的日期格式。来自 Excel:
Date formats that begin with an asterisk (*) responds to changes in regional date and time settings that are specified for the operating system.
Apache POI DataFormatter 忽略这些特定于语言环境的格式和 returns 默认美国格式日期。来自 Apache POI DataFormatter 文档:
Some formats are automatically "localized" by Excel, eg show as mm/dd/yyyy when loaded in Excel in some Locales but as dd/mm/yyyy in others. These are always returned in the "default" (US) format, as stored in the file.
要解决此问题,请参阅对
的回答
Apache POI SAX reader 实现类似于这个众所周知的示例 https://github.com/pjfanning/poi-shared-strings-sample/blob/master/src/main/java/com/github/pjfanning/poi/sample/XLSX2CSV.java 读取一些日期值,而不是它们在 excel 中显示的日期值,尽管它应该读取 "formatted value".
excel 文件中的值:1/1/2019,"formatted value" 由 reader 读取:1/ 1/19.
知道为什么会有差异吗?
Apache POI 版本 3.17
Reader代码:
package com.lopuch.sk.lita.is.importer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import com.lopuch.sk.lita.is.importer.fileImport.ExcelRowReadListener;
public class ExcelSaxImporter {
private static final Logger logger = Logger.getLogger(ExcelSaxImporter.class);
private ExcelRowReadListener listener;
public void setOnRowRead(ExcelRowReadListener listener) {
this.listener = listener;
}
public ExcelRowReadListener getListener() {
return listener;
};
public void process(byte[] fileByteArray)
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ZipSecureFile.setMinInflateRatio(0.0d);
OPCPackage opcpPackage = OPCPackage.open(new ByteArrayInputStream(fileByteArray));
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcpPackage);
XSSFReader xssfReader = new XSSFReader(opcpPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (iter.hasNext()) {
InputStream stream = iter.next();
processSheet(styles, strings, getHandler(), stream);
stream.close();
}
}
private SheetContentsHandler getHandler() {
return new SheetContentsHandler() {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
// Maps column Letter name to its value.
// Does not contain key-value pair if cell value is null for
// currently
// processed column and row.
private Map<String, String> rowValues;
@Override
public void startRow(int rowNum) {
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
rowValues = new HashMap<String, String>();
}
@Override
public void endRow(int rowNum) {
if (rowValues.keySet().size() == 0) {
logger.trace("Skipping calling rowRead() because of empty row");
} else {
ExcelSaxImporter.this.getListener().rowRead(rowValues);
}
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
if (firstCellOfRow) {
firstCellOfRow = false;
}
// gracefully handle missing CellRef here in a similar way
// as XSSFCell does
if (cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
currentCol = thisCol;
cellReference = cellReference.replaceAll("\d","");
rowValues.put(cellReference, formattedValue);
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
}
};
}
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
}
excel 显示的值与 Apache POI 读取的值的差异来自对用户语言设置作出反应的日期格式。来自 Excel:
Date formats that begin with an asterisk (*) responds to changes in regional date and time settings that are specified for the operating system.
Apache POI DataFormatter 忽略这些特定于语言环境的格式和 returns 默认美国格式日期。来自 Apache POI DataFormatter 文档:
Some formats are automatically "localized" by Excel, eg show as mm/dd/yyyy when loaded in Excel in some Locales but as dd/mm/yyyy in others. These are always returned in the "default" (US) format, as stored in the file.
要解决此问题,请参阅对