Java - XLSX 解析和数据库导出
Java - XLSX parse & database export
我有一个 excel 填充了大约 50k-60k 行。
我必须把那个excel的内容上传到MySQL,一般我都是用apache poi读取并上传到MySQL,但是这个文件不能用apache读取poi 因为文件太大了。
谁能指导我怎么做?这是我使用 apache poi 将内容上传到 MySQL 的示例代码(它适用于一些包含 1000-2000 行的小 xlsx 文件)
public static void uploadCrossSellCorpCard(FileItem file, String dbtable) {
System.out.println("UploadUtil Running" + file.getFileName().toString());
try {
for(int i = 0; i<=sheetx.getLastRowNum(); i++){
row = sheetx.getRow(i);
try{
int oc = (int) row.getCell(0).getNumericCellValue();
if((String.valueOf(oc).matches("[A-Za-z0-9]{3}"))){
String rm_name = row.getCell(1).getStringCellValue();
String company = row.getCell(2).getStringCellValue();
String product = row.getCell(3).getStringCellValue();
String detail = row.getCell(4).getStringCellValue();
String type = row.getCell(5).getStringCellValue();
String sql = "INSERT INTO " + dbtable + " VALUES('"
+ oc + "','" + rm_name + "','" + company + "','"
+ product + "','" + detail + "','" + type + "')";
save(sql);
System.out.println("Import rows " + i);
}
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (NullPointerException e) {
System.out.println(e);
}
}
System.out.println("Success import xlsx to mysql table");
} catch (NullPointerException e){
System.out.println(e);
System.out.println("Select the file first before uploading");
}
}
注意:我使用休眠方法来处理上传模式。“save(sql)”正在调用我的休眠方法
您可以尝试使用 Apache POI SAX - 阅读部分 --> XSSF 和 SAX(事件 API)在 https://poi.apache.org/spreadsheet/how-to.html
您可以像阅读 xml 文件一样阅读整个 excel 60k 行甚至 100k 行。您唯一需要注意的是空单元格,因为空单元格的 xml 标记只会跳过它的单元格,但您可能想更新 db table 中表示空值的单元格的空值。
解决方案 --> 您可以读取每一行并在循环中触发插入语句。如果出现间隙,则通过监视单元格地址来监视空单元格,然后检查相应的列名,并相应地用空值更新插入语句。
希望对您有所帮助。下面的示例代码读取 excel 并将其存储在 ArrayList 的 ArrayList 中以进行表格表示。我在控制台中打印消息 - "new row begins" 在开始读取和打印行之前。以及打印单元格值本身之前每个值的单元格编号。
我没有处理空单元格的单元格间隙,但您可以根据查找单元格间隙对其进行编码,因为在我的情况下我没有空单元格。
在控制台中查找单元格地址,帮助您发现任何差距并根据需要进行处理。
运行 这段代码对我来说很好用。不要忘记添加 xmlbeans-2.3.0.jar
导入语句所需的其他罐子。
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelToStringArray implements Cloneable {
public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
public static ArrayList<StringBuilder> retainedString;
public static Integer lineCounter = 0;
public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
throws Exception {
ExcelToStringArray myParser = new ExcelToStringArray();
myParser.processOneSheet(PathtoFilename, rId);
return stringArrayToReturn;
}
public void processOneSheet(String PathtoFilename, String rId) throws Exception {
OPCPackage pkg = OPCPackage.open(PathtoFilename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet = r.getSheet(rId);
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (name.equals("row")) {
retainedString = new ArrayList<StringBuilder>();
if (retainedString.isEmpty()) {
stringArrayToReturn.add(retainedString);
retainedString.clear();
}
System.out.println("New row begins");
retainedString.add(new StringBuilder(lineCounter.toString()));
lineCounter++;
}
// c => cell
if (name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
// value of cell what it string or number
retainedString.add(new StringBuilder(lastContents));
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
ExcelToStringArray generate = new ExcelToStringArray();
// rID1 is first sheet in my workbook for rId2 for second sheet and so
// on.
generate.GetSheetInStringArray("D:\Users\NIA\Desktop\0000_MasterTestSuite.xlsx", "rId10");
watch.stop();
System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
System.out.println("done");
System.out.println(generate.stringArrayToReturn);
}
}
我有一个 excel 填充了大约 50k-60k 行。
我必须把那个excel的内容上传到MySQL,一般我都是用apache poi读取并上传到MySQL,但是这个文件不能用apache读取poi 因为文件太大了。
谁能指导我怎么做?这是我使用 apache poi 将内容上传到 MySQL 的示例代码(它适用于一些包含 1000-2000 行的小 xlsx 文件)
public static void uploadCrossSellCorpCard(FileItem file, String dbtable) {
System.out.println("UploadUtil Running" + file.getFileName().toString());
try {
for(int i = 0; i<=sheetx.getLastRowNum(); i++){
row = sheetx.getRow(i);
try{
int oc = (int) row.getCell(0).getNumericCellValue();
if((String.valueOf(oc).matches("[A-Za-z0-9]{3}"))){
String rm_name = row.getCell(1).getStringCellValue();
String company = row.getCell(2).getStringCellValue();
String product = row.getCell(3).getStringCellValue();
String detail = row.getCell(4).getStringCellValue();
String type = row.getCell(5).getStringCellValue();
String sql = "INSERT INTO " + dbtable + " VALUES('"
+ oc + "','" + rm_name + "','" + company + "','"
+ product + "','" + detail + "','" + type + "')";
save(sql);
System.out.println("Import rows " + i);
}
} catch (IllegalStateException e) {
e.printStackTrace();
} catch (NullPointerException e) {
System.out.println(e);
}
}
System.out.println("Success import xlsx to mysql table");
} catch (NullPointerException e){
System.out.println(e);
System.out.println("Select the file first before uploading");
}
}
注意:我使用休眠方法来处理上传模式。“save(sql)”正在调用我的休眠方法
您可以尝试使用 Apache POI SAX - 阅读部分 --> XSSF 和 SAX(事件 API)在 https://poi.apache.org/spreadsheet/how-to.html
您可以像阅读 xml 文件一样阅读整个 excel 60k 行甚至 100k 行。您唯一需要注意的是空单元格,因为空单元格的 xml 标记只会跳过它的单元格,但您可能想更新 db table 中表示空值的单元格的空值。
解决方案 --> 您可以读取每一行并在循环中触发插入语句。如果出现间隙,则通过监视单元格地址来监视空单元格,然后检查相应的列名,并相应地用空值更新插入语句。
希望对您有所帮助。下面的示例代码读取 excel 并将其存储在 ArrayList 的 ArrayList 中以进行表格表示。我在控制台中打印消息 - "new row begins" 在开始读取和打印行之前。以及打印单元格值本身之前每个值的单元格编号。
我没有处理空单元格的单元格间隙,但您可以根据查找单元格间隙对其进行编码,因为在我的情况下我没有空单元格。 在控制台中查找单元格地址,帮助您发现任何差距并根据需要进行处理。
运行 这段代码对我来说很好用。不要忘记添加 xmlbeans-2.3.0.jar 导入语句所需的其他罐子。
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class ExcelToStringArray implements Cloneable {
public static ArrayList<ArrayList<StringBuilder>> stringArrayToReturn = new ArrayList<ArrayList<StringBuilder>>();
public static ArrayList<StringBuilder> retainedString;
public static Integer lineCounter = 0;
public ArrayList<ArrayList<StringBuilder>> GetSheetInStringArray(String PathtoFilename, String rId)
throws Exception {
ExcelToStringArray myParser = new ExcelToStringArray();
myParser.processOneSheet(PathtoFilename, rId);
return stringArrayToReturn;
}
public void processOneSheet(String PathtoFilename, String rId) throws Exception {
OPCPackage pkg = OPCPackage.open(PathtoFilename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
InputStream sheet = r.getSheet(rId);
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (name.equals("row")) {
retainedString = new ArrayList<StringBuilder>();
if (retainedString.isEmpty()) {
stringArrayToReturn.add(retainedString);
retainedString.clear();
}
System.out.println("New row begins");
retainedString.add(new StringBuilder(lineCounter.toString()));
lineCounter++;
}
// c => cell
if (name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
// value of cell what it string or number
retainedString.add(new StringBuilder(lastContents));
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
StopWatch watch = new StopWatch();
watch.start();
ExcelToStringArray generate = new ExcelToStringArray();
// rID1 is first sheet in my workbook for rId2 for second sheet and so
// on.
generate.GetSheetInStringArray("D:\Users\NIA\Desktop\0000_MasterTestSuite.xlsx", "rId10");
watch.stop();
System.out.println(DurationFormatUtils.formatDurationWords(watch.getTime(), true, true));
System.out.println("done");
System.out.println(generate.stringArrayToReturn);
}
}