在指定行开始读取 Excel sheet

Start reading an Excel sheet at a specified row

我正在使用 POI 的 XSSF 和 SAX(事件 API)读取 Excel 文件。 Excel sheet 有数千行,所以这是我发现具有良好性能的唯一方法。现在我想从一个特别的行读取 Excel 文件,例如第 6 行,因为这些 Excel 文件每天都会更新,我存储了我已经存储的最后一行,所以我只能读取新行。

如何从一行开始?

private void getExcelField(AcquisitionForm acquisitionForm) throws ExcelReadException, IOException{
    InputStream stream=null;
    OPCPackage p=null;
    try{
        p = OPCPackage.open(acquisitionForm.getDatatablePath(), PackageAccess.READ);
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(p);
        XSSFReader xssfReader = new XSSFReader(p);
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        int index = 0;
        //Test with one sheet
        iter.hasNext();
        //(iter.hasNext()) {
        stream = iter.next();
        String sheetName = iter.getSheetName();
        processSheet(styles, strings, stream, acquisitionForm);
        stream.close();
        //++index;
        //}
        p.close();
    }catch(Exception e){
        throw new ExcelReadException("An error occured during excel file reading ", e);
    }finally{ 
        if (stream!=null)
            stream.close();
        if (p!=null)
            p.close();
        //++index;
        // }
    }

}



/**
 * Parses and shows the content of one sheet
 * using the specified styles and shared-strings tables.
 * @param styles
 * @param strings
 * @param sheetInputStream
 * @throws ExcelReadException 
 */
private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream, AcquisitionForm acquisitionForm) throws Exception {
    InputSource sheetSource = new InputSource(sheetInputStream);
    SAXParserFactory saxFactory = SAXParserFactory.newInstance();
    SAXParser saxParser = saxFactory.newSAXParser();
    XMLReader sheetParser = saxParser.getXMLReader();
    //ContentHandler handler = new MyXSSFSheetHandler(styles, strings);
    MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,databaseAcquisitionServices, acquisitionForm);
    sheetParser.setContentHandler(handler);
    sheetParser.parse(sheetSource);
}

MyXSSFSheetHandler

public MyXSSFSheetHandler(StylesTable styles, ReadOnlySharedStringsTable strings,DatabaseAcquisitionServices databaseAcquisitionServices, AcquisitionForm acquisitionForm, int sheetIndex) {
    this.stylesTable = styles;
    this.sharedStringsTable = strings;
    this.formatter = new DataFormatter();
    this.value = new StringBuffer();
    this.nextDataType = XssfDataType.NUMBER;
    this.databaseAcquisitionServices=databaseAcquisitionServices;
    this.acquisitionForm=acquisitionForm;
    this.sheetIndex = sheetIndex;
}

/**
 * 
 */
public void startElement(String uri, String localName, String name,
        Attributes attributes) throws SAXException {

    if ("inlineStr".equals(name) || "v".equals(name)) {
        vIsOpen = true;
        // Clear contents cache
        value.setLength(0);
    }
    // c => cell
    else if ("c".equals(name)) {
        // Get the cell reference
        cellCoordinate = attributes.getValue("r");
        int firstDigit = -1;
        for (int c = 0; c < cellCoordinate.length(); ++c) {
            if (Character.isDigit(cellCoordinate.charAt(c))) {
                firstDigit = c;
                break;
            }
        }
        thisColumn = nameToColumn(cellCoordinate.substring(0, firstDigit));

        // Set up defaults.
        this.nextDataType = XssfDataType.NUMBER;
        this.formatIndex = -1;
        this.formatString = null;
        String cellType = attributes.getValue("t");
        String cellStyleStr = attributes.getValue("s");
        if ("b".equals(cellType)) {
            nextDataType = XssfDataType.BOOL;
        } else if ("e".equals(cellType)) {
            nextDataType = XssfDataType.ERROR;
        } else if ("inlineStr".equals(cellType)) {
            nextDataType = XssfDataType.INLINESTR;
        } else if ("s".equals(cellType)) {
            nextDataType = XssfDataType.SSTINDEX;
        } else if ("str".equals(cellType)) {
            nextDataType = XssfDataType.FORMULA;
        } else if (cellStyleStr != null) {
            // It's a number, but almost certainly one
            //  with a special style or format 
            int styleIndex = Integer.parseInt(cellStyleStr);
            XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
            this.formatIndex = style.getDataFormat();
            this.formatString = style.getDataFormatString();
            if (this.formatString == null) {
                this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
            }
        }
    }

}

/*
 * (non-Javadoc)
 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
 */
public void endElement(String uri, String localName, String name)
        throws SAXException {



    String cellValue = null;
    //String thisStr = null;

    // v => contents of a cell
    if ("v".equals(name)) {
        // Process the value contents as required.
        // Do now, as characters() may be called more than once
        switch (nextDataType) {

        case BOOL:
            char first = value.charAt(0);
            //thisStr = first == '0' ? "FALSE" : "TRUE";
            //cellValue= new Boolean(first =='0' ? false: true);
            cellValue=first == '0' ? "false" : "true";
            break;

        case ERROR:
            //thisStr = "\"ERROR:" + value.toString() + '"';
            cellValue=new String(value.toString());
            break;

        case FORMULA:
            // A formula could result in a string value,
            // so always add double-quote characters.
            //thisStr = '"' + value.toString() + '"';
            cellValue=new String(value.toString());
            break;

        case INLINESTR:
            XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
            //thisStr = '"' + rtsi.toString() + '"';
            cellValue=new String(rtsi.toString());
            break;

        case SSTINDEX:
            String sstIndex = value.toString();
            try {
                int idx = Integer.parseInt(sstIndex);
                XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
                //  thisStr = '"' + rtss.toString() + '"';
                cellValue=new String(rtss.toString());
            }
            catch (NumberFormatException ex) {
                System.out.println("Failed to parse SST index '" + sstIndex + "': " + ex.toString());
            }
            break;

        case NUMBER:
            String n = value.toString();
            if (this.formatString != null && n.length() > 0){
                cellValue = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
                //cellValue=new Double(Double.parseDouble(n));
            }
            else{
                //thisStr = n;
                cellValue=new String(n);
            }
            break;

        default:
            cellValue="";
            //thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
            break;
        }

        // Output after we've seen the string contents
        // Emit commas for any fields that were missing on this row
        if (lastColumnNumber == -1) {
            lastColumnNumber = 0;
        }
        //          for (int i = lastColumnNumber; i < thisColumn; ++i){
        //              System.out.print(',');
        //          }


        // Might be the empty string.
        rowValues.put(cellCoordinate,cellValue);
        //System.out.print(cellValue);

        // Update column
        if (thisColumn > -1)
            lastColumnNumber = thisColumn;

    } else if ("row".equals(name)) {
        // We're onto a new row

        //I have to pass new HashMap because otherwise all the map into archiveAcquisition have the same values
        databaseAcquisitionServices.archiveAcquisition(new TreeMap<>(rowValues), rowCounter, acquisitionForm, sheetIndex);
        //Clear the structure used to store row data
        rowValues.clear();
        rowCounter++;
        //System.out.println();
        lastColumnNumber = -1;
    }

}

/**
 * Captures characters only if a suitable element is open.
 * Originally was just "v"; extended for inlineStr also.
 */
public void characters(char[] ch, int start, int length)
        throws SAXException {
    if (vIsOpen)
        value.append(ch, start, length);
}

/**
 * Converts an Excel column name like "C" to a zero-based index.
 *
 * @param name
 * @return Index corresponding to the specified name
 */
private int nameToColumn(String name) {
    int column = -1;
    for (int i = 0; i < name.length(); ++i) {
        int c = name.charAt(i);
        column = (column + 1) * 26 + c - 'A';
    }
    return column;
}

您可能需要计算从 SheetContentsHandler 派生的 class 中看到的行数,我认为您不需要单独的 XSSFSheetHAndler,我宁愿使用默认的,并且只有一个 dervied SheetContentsHandler,它具有针对您感兴趣的所有事物的回调方法,例如startRowendRowcell

您可以查看 XLSX2CSV sample 的示例,了解如何完成 XLSX 文件的流式读取,尤其是如何使用 class SheetToCSV 获取调用每个 row/cell,你可以例如在方法 cell() 中使用类似 if (currentRow < startRow) { return } 的内容来跳过行。

使用参数 String cellReference 调用单元方法,可用于通过 new CellReference(cellReference).getCol()new CellReference(cellReference).getRow().

检索 row/cell 坐标