使用带有 XSSF 和 SAX 的 poi 在 JAVA 中实现内存高效 XLSX reader 但不知道如何获取公式?
Implement memory efficient XLSX reader in JAVA using poi with XSSF and SAX but didn't know how to get formula?
我正在尝试在 java 中使用 POI 实现 .xlsx
a reader 因为我主要关心的是内存所以我使用 XSSF 和 SAX 实现它这里是参考我使用的代码 Event API (XSSF with SAX)
但是公式是我想要读取的单元格 属性 的主要内容之一,例如,我想读取单元格名称(C12)、单元格值、单元格公式等,但我正在为公式而苦苦挣扎不知道如何在不使用工作簿的情况下获得。因为如果我使用工作簿,内存就会有问题。
有人可以帮我解决问题吗?
如 XSSF and SAX (Event API) 所述:
... you can get at the underlying XML data, and process it yourself.
This is intended for intermediate developers who are willing to learn
a little bit of low level structure of .xlsx files, and who are happy
processing XML in java. Its relatively simple to use, but requires a
basic understanding of the file structure.
因此,首先您需要了解 *.xlsx
文件的结构以及 XML
各部分的含义。您还需要了解 XML
使用 SAX
进行解析的工作原理。例如,ContentHandler
具有方法 startElement
、endElement
和 characters
意味着什么。您还需要知道它们何时被调用以及给定参数的含义。
如果所有这些都清楚了,那么您就可以开始编程了。 XSSF and SAX (Event API) 中的 ExampleEventUserModel
示例具有非常基本的功能,可以理解基础知识。它仅从共享字符串 table 和所有其他内容中获取与它们存储在 v
元素中完全相同的字符串内容。您链接的 DZone
示例更加简单。它只从共享字符串 table.
获取字符串内容
我可以提供一个更完整的示例,它还从 f
元素(如果有的话)中获取公式,并且还使用 SharedStringsTable
之外的 StylesTable
来获取 XSSFCellStyle
的单元格,如果 s
属性指向单元格样式。此 XSSFCellStyle
然后包含数字格式以及字体设置、边框设置等,如果有的话。
示例:
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
public class ExampleEventUserModel {
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable st = r.getStylesTable();
XMLReader parser = fetchSheetParser(sst, st);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst, StylesTable st) throws SAXException, ParserConfigurationException {
XMLReader parser = SAXHelper.newXMLReader();
ContentHandler handler = new SheetHandler(sst, st);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private StylesTable st;
private String lastCharacters; // characters cache to collect character content between startElement and eneElement
private String formula; // stores the formula, if any
private String content; // stores the content, if any
private boolean nextValueIsSSTString; // indicates that next value is from SharedStringsTable
private boolean nextValueIsStyledNumeric; // indicates that next value is a styled numeric value
private XSSFCellStyle cellStyle; // stores the cell style, if any
private DataFormatter formatter; // used to format the styled numeric values
private SheetHandler(SharedStringsTable sst, StylesTable st) {
this.sst = sst;
this.st = st;
this.formatter = new DataFormatter(java.util.Locale.US, true);
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => start of cell
if(name.equals("c")) {
// print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// get the cell type
String cellType = attributes.getValue("t");
// figure out if the value is an index in the SST
this.nextValueIsSSTString = false;
if(cellType != null && cellType.equals("s")) {
this.nextValueIsSSTString = true;
}
// figure out if the cell has style
this.cellStyle = null;
String styleIdx = attributes.getValue("s");
if (styleIdx != null) {
int styleIndex = Integer.parseInt(styleIdx);
this.cellStyle = st.getStyleAt(styleIndex);
// print that there is cell style for this cell
System.out.print("CellStyle: " + this.cellStyle + " - ");
}
// figure out if the value is an styled numeric value or date
this.nextValueIsStyledNumeric = false;
if(cellType != null && cellType.equals("n") || cellType == null) {
if (this.cellStyle != null) {
this.nextValueIsStyledNumeric = true;
}
}
}
// clear characters cache after each element
this.lastCharacters = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// f => end of formula in a cell
if(name.equals("f")) {
this.formula = lastCharacters;
// print formula
System.out.print("Formula: " + this.formula + " - ");
}
// v => end of value of a cell
if(name.equals("v")) {
this.content = this.lastCharacters;
// process shared string value
if(this.nextValueIsSSTString) {
int idx = Integer.parseInt(lastCharacters);
this.content = sst.getItemAt(idx).getString();
nextValueIsSSTString = false;
}
// process styled numeric value
if(this.nextValueIsStyledNumeric) {
String formatString = cellStyle.getDataFormatString();
int formatIndex = cellStyle.getDataFormat();
if (formatString == null) {
// formatString could not be found, so it must be a builtin format.
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
double value = Double.valueOf(this.content);
this.content = formatter.formatRawCellContents(value, formatIndex, formatString);
nextValueIsStyledNumeric = false;
}
}
// c => end of a cell
if(name.equals("c")) {
// print content
System.out.println("Content: " + this.content);
this.content = "";
}
}
public void characters(char[] ch, int start, int length) {
this.lastCharacters += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
ExampleEventUserModel example = new ExampleEventUserModel();
//example.processAllSheets(args[0]);
example.processAllSheets("ExcelExample.xlsx");
}
}
我正在尝试在 java 中使用 POI 实现 .xlsx
a reader 因为我主要关心的是内存所以我使用 XSSF 和 SAX 实现它这里是参考我使用的代码 Event API (XSSF with SAX)
但是公式是我想要读取的单元格 属性 的主要内容之一,例如,我想读取单元格名称(C12)、单元格值、单元格公式等,但我正在为公式而苦苦挣扎不知道如何在不使用工作簿的情况下获得。因为如果我使用工作簿,内存就会有问题。
有人可以帮我解决问题吗?
如 XSSF and SAX (Event API) 所述:
... you can get at the underlying XML data, and process it yourself. This is intended for intermediate developers who are willing to learn a little bit of low level structure of .xlsx files, and who are happy processing XML in java. Its relatively simple to use, but requires a basic understanding of the file structure.
因此,首先您需要了解 *.xlsx
文件的结构以及 XML
各部分的含义。您还需要了解 XML
使用 SAX
进行解析的工作原理。例如,ContentHandler
具有方法 startElement
、endElement
和 characters
意味着什么。您还需要知道它们何时被调用以及给定参数的含义。
如果所有这些都清楚了,那么您就可以开始编程了。 XSSF and SAX (Event API) 中的 ExampleEventUserModel
示例具有非常基本的功能,可以理解基础知识。它仅从共享字符串 table 和所有其他内容中获取与它们存储在 v
元素中完全相同的字符串内容。您链接的 DZone
示例更加简单。它只从共享字符串 table.
我可以提供一个更完整的示例,它还从 f
元素(如果有的话)中获取公式,并且还使用 SharedStringsTable
之外的 StylesTable
来获取 XSSFCellStyle
的单元格,如果 s
属性指向单元格样式。此 XSSFCellStyle
然后包含数字格式以及字体设置、边框设置等,如果有的话。
示例:
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
public class ExampleEventUserModel {
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable st = r.getStylesTable();
XMLReader parser = fetchSheetParser(sst, st);
Iterator<InputStream> sheets = r.getSheetsData();
while(sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst, StylesTable st) throws SAXException, ParserConfigurationException {
XMLReader parser = SAXHelper.newXMLReader();
ContentHandler handler = new SheetHandler(sst, st);
parser.setContentHandler(handler);
return parser;
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private StylesTable st;
private String lastCharacters; // characters cache to collect character content between startElement and eneElement
private String formula; // stores the formula, if any
private String content; // stores the content, if any
private boolean nextValueIsSSTString; // indicates that next value is from SharedStringsTable
private boolean nextValueIsStyledNumeric; // indicates that next value is a styled numeric value
private XSSFCellStyle cellStyle; // stores the cell style, if any
private DataFormatter formatter; // used to format the styled numeric values
private SheetHandler(SharedStringsTable sst, StylesTable st) {
this.sst = sst;
this.st = st;
this.formatter = new DataFormatter(java.util.Locale.US, true);
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => start of cell
if(name.equals("c")) {
// print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// get the cell type
String cellType = attributes.getValue("t");
// figure out if the value is an index in the SST
this.nextValueIsSSTString = false;
if(cellType != null && cellType.equals("s")) {
this.nextValueIsSSTString = true;
}
// figure out if the cell has style
this.cellStyle = null;
String styleIdx = attributes.getValue("s");
if (styleIdx != null) {
int styleIndex = Integer.parseInt(styleIdx);
this.cellStyle = st.getStyleAt(styleIndex);
// print that there is cell style for this cell
System.out.print("CellStyle: " + this.cellStyle + " - ");
}
// figure out if the value is an styled numeric value or date
this.nextValueIsStyledNumeric = false;
if(cellType != null && cellType.equals("n") || cellType == null) {
if (this.cellStyle != null) {
this.nextValueIsStyledNumeric = true;
}
}
}
// clear characters cache after each element
this.lastCharacters = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// f => end of formula in a cell
if(name.equals("f")) {
this.formula = lastCharacters;
// print formula
System.out.print("Formula: " + this.formula + " - ");
}
// v => end of value of a cell
if(name.equals("v")) {
this.content = this.lastCharacters;
// process shared string value
if(this.nextValueIsSSTString) {
int idx = Integer.parseInt(lastCharacters);
this.content = sst.getItemAt(idx).getString();
nextValueIsSSTString = false;
}
// process styled numeric value
if(this.nextValueIsStyledNumeric) {
String formatString = cellStyle.getDataFormatString();
int formatIndex = cellStyle.getDataFormat();
if (formatString == null) {
// formatString could not be found, so it must be a builtin format.
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
double value = Double.valueOf(this.content);
this.content = formatter.formatRawCellContents(value, formatIndex, formatString);
nextValueIsStyledNumeric = false;
}
}
// c => end of a cell
if(name.equals("c")) {
// print content
System.out.println("Content: " + this.content);
this.content = "";
}
}
public void characters(char[] ch, int start, int length) {
this.lastCharacters += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
ExampleEventUserModel example = new ExampleEventUserModel();
//example.processAllSheets(args[0]);
example.processAllSheets("ExcelExample.xlsx");
}
}