Java - XLSX 解析和数据库导出

Java - XLSX parse & database export

我有一个 excel 填充了大约 50k-60k 行。

我必须把那个excel的内容上传到MySQL,一般我都是用apache poi读取并上传到MySQL,但是这个文件不能用apache读取poi 因为文件太大了。

谁能指导我怎么做?这是我使用 apache poi 将内容上传到 MySQL 的示例代码(它适用于一些包含 1000-2000 行的小 xlsx 文件)

public static void uploadCrossSellCorpCard(FileItem file, String dbtable) {
    System.out.println("UploadUtil Running" + file.getFileName().toString());
    try {
        for(int i = 0; i<=sheetx.getLastRowNum(); i++){
            row = sheetx.getRow(i);
            
            try{
                int oc = (int) row.getCell(0).getNumericCellValue();
                if((String.valueOf(oc).matches("[A-Za-z0-9]{3}"))){
                    String rm_name = row.getCell(1).getStringCellValue();
                    String company = row.getCell(2).getStringCellValue();
                    String product = row.getCell(3).getStringCellValue();
                    String detail = row.getCell(4).getStringCellValue();
                    String type = row.getCell(5).getStringCellValue();
                    
                    String sql = "INSERT INTO " + dbtable + " VALUES('"
                            + oc + "','" + rm_name + "','" + company + "','"
                            + product + "','" + detail + "','" + type + "')";           
                    save(sql);
                    System.out.println("Import rows " + i);
                }
            } catch (IllegalStateException e) {
                e.printStackTrace();
            } catch (NullPointerException e) {
                System.out.println(e);
            }
        }
        System.out.println("Success import xlsx to mysql table");
    } catch (NullPointerException e){
        System.out.println(e);
        System.out.println("Select the file first before uploading");
    }
}

注意:我使用休眠方法来处理上传模式。“save(sql)”正在调用我的休眠方法

您可以尝试使用 Apache POI SAX - 阅读部分 --> XSSF 和 SAX(事件 API)https://poi.apache.org/spreadsheet/how-to.html

您可以像阅读 xml 文件一样阅读整个 excel 60k 行甚至 100k 行。您唯一需要注意的是空单元格,因为空单元格的 xml 标记只会跳过它的单元格,但您可能想更新 db table 中表示空值的单元格的空值。

解决方案 --> 您可以读取每一行并在循环中触发插入语句。如果出现间隙,则通过监视单元格地址来监视空单元格,然后检查相应的列名,并相应地用空值更新插入语句。

希望对您有所帮助。下面的示例代码读取 excel 并将其存储在 ArrayList 的 ArrayList 中以进行表格表示。我在控制台中打印消息 - "new row begins" 在开始读取和打印行之前。以及打印单元格值本身之前每个值的单元格编号。

我没有处理空单元格的单元格间隙,但您可以根据查找单元格间隙对其进行编码,因为在我的情况下我没有空单元格。 在控制台中查找单元格地址,帮助您发现任何差距并根据需要进行处理。

运行 这段代码对我来说很好用。不要忘记添加 xmlbeans-2.3.0.jar 导入语句所需的其他罐子。

import java.io.InputStream;
import java.util.ArrayList;

import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelToStringArray implements Cloneable {

    public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
    public static ArrayList<StringBuilder> retainedString;
    public static Integer lineCounter = 0;

    public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
            throws Exception {
        ExcelToStringArray myParser = new ExcelToStringArray();
        myParser.processOneSheet(PathtoFilename, rId);
        return stringArrayToReturn;
    }

    public void processOneSheet(String PathtoFilename, String rId) throws Exception {
        OPCPackage pkg = OPCPackage.open(PathtoFilename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        InputStream sheet = r.getSheet(rId);
        InputSource sheetSource = new InputSource(sheet);
        parser.parse(sheetSource);
        sheet.close();
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        ContentHandler handler = new SheetHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }

    private class SheetHandler extends DefaultHandler {
        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;

        private SheetHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

            if (name.equals("row")) {
                retainedString = new ArrayList<StringBuilder>();

                if (retainedString.isEmpty()) {
                    stringArrayToReturn.add(retainedString);
                    retainedString.clear();
                }

                System.out.println("New row begins");

                retainedString.add(new StringBuilder(lineCounter.toString()));
                lineCounter++;
            }
            // c => cell
            if (name.equals("c")) {
                // Print the cell reference
                System.out.print(attributes.getValue("r") + " - ");

                // System.out.print(attributes.getValue("r") + " - ");
                // Figure out if the value is an index in the SST
                String cellType = attributes.getValue("t");
                if (cellType != null && cellType.equals("s")) {
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }
            // Clear contents cache
            lastContents = "";
        }

        public void endElement(String uri, String localName, String name) throws SAXException {
            // Process the last contents as required.
            // Do now, as characters() may be called more than once
            if (nextIsString) {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
            }

            // v => contents of a cell
            // Output after we've seen the string contents
            if (name.equals("v")) {
                System.out.println(lastContents);
                // value of cell what it string or number
                retainedString.add(new StringBuilder(lastContents));
            }
        }

        public void characters(char[] ch, int start, int length) throws SAXException {
            lastContents += new String(ch, start, length);
        }
    }

    public static void main(String[] args) throws Exception {
        StopWatch watch = new StopWatch();
        watch.start();
        ExcelToStringArray generate = new ExcelToStringArray();
        // rID1 is first sheet in my workbook for rId2 for second sheet and so
        // on.
        generate.GetSheetInStringArray("D:\Users\NIA\Desktop\0000_MasterTestSuite.xlsx", "rId10");

        watch.stop();

        System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
        System.out.println("done");
        System.out.println(generate.stringArrayToReturn);

    }

}