使用带有 XSSF 和 SAX 的 poi 在 JAVA 中实现内存高效 XLSX reader 但不知道如何获取公式?

Implement memory efficient XLSX reader in JAVA using poi with XSSF and SAX but didn't know how to get formula?

我正在尝试在 java 中使用 POI 实现 .xlsx a reader 因为我主要关心的是内存所以我使用 XSSF 和 SAX 实现它这里是参考我使用的代码 Event API (XSSF with SAX) 但是公式是我想要读取的单元格 属性 的主要内容之一,例如,我想读取单元格名称(C12)、单元格值、单元格公式等,但我正在为公式而苦苦挣扎不知道如何在不使用工作簿的情况下获得。因为如果我使用工作簿,内存就会有问题。

有人可以帮我解决问题吗?

XSSF and SAX (Event API) 所述:

... you can get at the underlying XML data, and process it yourself. This is intended for intermediate developers who are willing to learn a little bit of low level structure of .xlsx files, and who are happy processing XML in java. Its relatively simple to use, but requires a basic understanding of the file structure.

因此,首先您需要了解 *.xlsx 文件的结构以及 XML 各部分的含义。您还需要了解 XML 使用 SAX 进行解析的工作原理。例如,ContentHandler 具有方法 startElementendElementcharacters 意味着什么。您还需要知道它们何时被调用以及给定参数的含义。

如果所有这些都清楚了,那么您就可以开始编程了。 XSSF and SAX (Event API) 中的 ExampleEventUserModel 示例具有非常基本的功能,可以理解基础知识。它仅从共享字符串 table 和所有其他内容中获取与它们存储在 v 元素中完全相同的字符串内容。您链接的 DZone 示例更加简单。它只从共享字符串 table.

获取字符串内容

我可以提供一个更完整的示例,它还从 f 元素(如果有的话)中获取公式,并且还使用 SharedStringsTable 之外的 StylesTable 来获取 XSSFCellStyle 的单元格,如果 s 属性指向单元格样式。此 XSSFCellStyle 然后包含数字格式以及字体设置、边框设置等,如果有的话。

示例:

import java.io.InputStream;
import java.util.Iterator;

import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.ParserConfigurationException;

public class ExampleEventUserModel {

    public void processAllSheets(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);

        SharedStringsTable sst = r.getSharedStringsTable();
        StylesTable st = r.getStylesTable();
        XMLReader parser = fetchSheetParser(sst, st);

        Iterator<InputStream> sheets = r.getSheetsData();
        while(sheets.hasNext()) {
            System.out.println("Processing new sheet:\n");
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
            System.out.println("");
        }
    }


    public XMLReader fetchSheetParser(SharedStringsTable sst, StylesTable st) throws SAXException, ParserConfigurationException {
        XMLReader parser = SAXHelper.newXMLReader();
        ContentHandler handler = new SheetHandler(sst, st);
        parser.setContentHandler(handler);
        return parser;
    }


    private static class SheetHandler extends DefaultHandler {

        private SharedStringsTable sst;
        private StylesTable st;
        private String lastCharacters; // characters cache to collect character content between startElement and eneElement
        private String formula; // stores the formula, if any
        private String content; // stores the content, if any
        private boolean nextValueIsSSTString; // indicates that next value is from SharedStringsTable 
        private boolean nextValueIsStyledNumeric; // indicates that next value is a styled numeric value
        private XSSFCellStyle cellStyle; // stores the cell style, if any
        private DataFormatter formatter; // used to format the styled numeric values

        private SheetHandler(SharedStringsTable sst, StylesTable st) {
            this.sst = sst;
            this.st = st;
            this.formatter = new DataFormatter(java.util.Locale.US, true);
        }

        public void startElement(String uri, String localName, String name,
                                 Attributes attributes) throws SAXException {
            // c => start of cell
            if(name.equals("c")) {
                // print the cell reference
                System.out.print(attributes.getValue("r") + " - ");

                // get the cell type
                String cellType = attributes.getValue("t");

                // figure out if the value is an index in the SST
                this.nextValueIsSSTString = false;
                if(cellType != null && cellType.equals("s")) {
                    this.nextValueIsSSTString = true;
                } 

                // figure out if the cell has style
                this.cellStyle = null;
                String styleIdx = attributes.getValue("s");
                if (styleIdx != null) {
                    int styleIndex = Integer.parseInt(styleIdx);
                    this.cellStyle = st.getStyleAt(styleIndex);
                    // print that there is cell style for this cell
                    System.out.print("CellStyle: " + this.cellStyle + " - ");
                }

                // figure out if the value is an styled numeric value or date
                this.nextValueIsStyledNumeric = false;
                if(cellType != null && cellType.equals("n") || cellType == null) {
                    if (this.cellStyle != null) {
                        this.nextValueIsStyledNumeric = true;
                    }
                } 

            }

            // clear characters cache after each element
            this.lastCharacters = "";
        }

        public void endElement(String uri, String localName, String name)
                throws SAXException {

            // f => end of formula in a cell
            if(name.equals("f")) {
                this.formula = lastCharacters;
                // print formula
                System.out.print("Formula: " + this.formula + " - ");
            }

            // v => end of value of a cell
            if(name.equals("v")) {

                this.content = this.lastCharacters;

                // process shared string value
                if(this.nextValueIsSSTString) {
                    int idx = Integer.parseInt(lastCharacters);
                    this.content = sst.getItemAt(idx).getString();
                    nextValueIsSSTString = false;
                }

                // process styled numeric value
                if(this.nextValueIsStyledNumeric) {
                    String formatString = cellStyle.getDataFormatString();
                    int formatIndex = cellStyle.getDataFormat();                    
                    if (formatString == null) {
                        // formatString could not be found, so it must be a builtin format.
                        formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
                    }
                    double value = Double.valueOf(this.content);
                    this.content = formatter.formatRawCellContents(value, formatIndex, formatString);
                    nextValueIsStyledNumeric = false;
                }

            }

            // c => end of a cell
            if(name.equals("c")) {
                // print content
                System.out.println("Content: " + this.content);
                this.content = "";
            }
        }

        public void characters(char[] ch, int start, int length) {
            this.lastCharacters += new String(ch, start, length);
        }
    }

    public static void main(String[] args) throws Exception {
        ExampleEventUserModel example = new ExampleEventUserModel();
        //example.processAllSheets(args[0]);
        example.processAllSheets("ExcelExample.xlsx");
    }
}