Apache POI 大 Excel 导出速度慢
Apache POI Large Excel Export is slow
我正在尝试使用 SXSSFWorkbook 通过 Apache POI 将 CSV 转换为 Excel。
我的 CSV 文件有大约 230,000 条记录和 50 列。读取和写入过程的总处理时间约为 1 分钟。
我正在四处寻找解决我的性能问题以获得更短的时间(最多 30 秒),因为连接超时发生在 30 秒后。来自网关。
我做错了什么?有什么我可以改变的吗?
public static byte[] readCsvAndWriteExcel(String filePath, File csvFile) {
logger.info("Read csv and write excel has been started for {}", csvFile.getName());
String line;
BufferedReader bufferedReader = readFile(filePath);
SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE);
workbook.setCompressTempFiles(true);
int rowNumber = 0;
int cellNumber;
SXSSFRow row;
SXSSFSheet sheet = workbook.createSheet(FilenameUtils.getBaseName(csvFile.getName()));
try {
while ((line = bufferedReader.readLine()) != null) {
cellNumber = 0;
row = sheet.createRow(rowNumber++);
addCell(cellNumber, row, line.trim().split(SEPERATOR_PATTERN));
}
} catch (IOException e) {
logger.error("File is not found: {}", filePath, e);
throw new Exception("Exception");
}
logger.info("Read csv and write excel has been finished for {}", csvFile.getName());
return retrieveBytesOfXlsxFile(workbook);
}
以及用于保存单元格和样式的 addCell 方法;
private static void addCell(int cellNumber, SXSSFRow row, String[] splitRow) {
for (String field : splitRow) {
SXSSFCell cell = row.createCell(cellNumber++);
cell.setCellValue(field);
}
}
Fastexcel 提供与 POI 流相同的时间。
你写Excel的方式没有问题。时间主要浪费在读取CSV的方法上。另外,fastexcel是我测试过的最好的选择,它比其他的(SXSSFWorkBook、easyexcel和myexcel)更快。下面的代码希望对你有用。
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import com.alibaba.excel.EasyExcel;
import com.github.liaochong.myexcel.core.DefaultStreamExcelBuilder;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelTest {
Logger logger = LoggerFactory.getLogger(ExcelTest.class);
/**
* generate test xlsx (time < 15s)
*/
@Test
public void Test1() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST1.xlsx")) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
HashMap<Pair<Integer, Integer>, String> dataMap = new HashMap<>();
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
for (int j = 0; j < 50; j++) {
ws.value(i, j, strings[j]);
}
}
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* generate test csv (time < 2s)
*/
@Test
public void Test2() {
LocalDateTime start = LocalDateTime.now();
try (CsvWriter writer = CsvUtil.getWriter("D:\tmp\TEST2.csv", StandardCharsets.UTF_8)) {
writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
writer.writeLine(strings);
}
} catch (Exception e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by fastexcel (time < 15s)
*/
@Test
public void Test3() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST3.xlsx"); CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
for (int i = 0; i < a.size(); i++) {
ws.value((int) originalLineNumber, i, a.get(i));
}
});
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by SXSSFWorkbook (time < 30s)
*/
@Test
public void Test4() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv")); OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST4.xlsx"); SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE)) {
SXSSFSheet sheet = workbook.createSheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
SXSSFRow row = sheet.createRow((int) originalLineNumber);
for (int i = 0; i < a.size(); i++) {
SXSSFCell cell = row.createCell(i);
cell.setCellValue(a.get(i));
}
});
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by easyexcel (time < 30s)
*/
@Test
public void Test5() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
EasyExcel.write("D:\tmp\TEST5.xlsx", ExcelTest.class).sheet("Sheet 1").needHead(false).doWrite(() -> reader.stream().collect(Collectors.toList()));
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
private ExecutorService executorService = Executors.newFixedThreadPool(10);
/**
* convert by myexcel (time < 30s)
*/
@Test
public void Test6() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST6.xlsx");
Reader reader = FileUtil.getUtf8Reader("D:\tmp\TEST2.csv");
CsvReader csvReader = CsvUtil.getReader();
DefaultStreamExcelBuilder<Map> defaultExcelBuilder = DefaultStreamExcelBuilder.of(Map.class)
.threadPool(executorService)
.start()) {
csvReader.setContainsHeader(true);
List maps = csvReader.readMapList(reader);
defaultExcelBuilder.asyncAppend(() -> maps);
org.apache.poi.ss.usermodel.Workbook workbook = defaultExcelBuilder.build();
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
}
依赖项:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
<dependency>
<groupId>com.github.liaochong</groupId>
<artifactId>myexcel</artifactId>
<version>3.11.8</version>
</dependency>
测试结果:
Test result Image
我正在尝试使用 SXSSFWorkbook 通过 Apache POI 将 CSV 转换为 Excel。
我的 CSV 文件有大约 230,000 条记录和 50 列。读取和写入过程的总处理时间约为 1 分钟。
我正在四处寻找解决我的性能问题以获得更短的时间(最多 30 秒),因为连接超时发生在 30 秒后。来自网关。
我做错了什么?有什么我可以改变的吗?
public static byte[] readCsvAndWriteExcel(String filePath, File csvFile) {
logger.info("Read csv and write excel has been started for {}", csvFile.getName());
String line;
BufferedReader bufferedReader = readFile(filePath);
SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE);
workbook.setCompressTempFiles(true);
int rowNumber = 0;
int cellNumber;
SXSSFRow row;
SXSSFSheet sheet = workbook.createSheet(FilenameUtils.getBaseName(csvFile.getName()));
try {
while ((line = bufferedReader.readLine()) != null) {
cellNumber = 0;
row = sheet.createRow(rowNumber++);
addCell(cellNumber, row, line.trim().split(SEPERATOR_PATTERN));
}
} catch (IOException e) {
logger.error("File is not found: {}", filePath, e);
throw new Exception("Exception");
}
logger.info("Read csv and write excel has been finished for {}", csvFile.getName());
return retrieveBytesOfXlsxFile(workbook);
}
以及用于保存单元格和样式的 addCell 方法;
private static void addCell(int cellNumber, SXSSFRow row, String[] splitRow) {
for (String field : splitRow) {
SXSSFCell cell = row.createCell(cellNumber++);
cell.setCellValue(field);
}
}
Fastexcel 提供与 POI 流相同的时间。
你写Excel的方式没有问题。时间主要浪费在读取CSV的方法上。另外,fastexcel是我测试过的最好的选择,它比其他的(SXSSFWorkBook、easyexcel和myexcel)更快。下面的代码希望对你有用。
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import com.alibaba.excel.EasyExcel;
import com.github.liaochong.myexcel.core.DefaultStreamExcelBuilder;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelTest {
Logger logger = LoggerFactory.getLogger(ExcelTest.class);
/**
* generate test xlsx (time < 15s)
*/
@Test
public void Test1() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST1.xlsx")) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
HashMap<Pair<Integer, Integer>, String> dataMap = new HashMap<>();
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
for (int j = 0; j < 50; j++) {
ws.value(i, j, strings[j]);
}
}
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* generate test csv (time < 2s)
*/
@Test
public void Test2() {
LocalDateTime start = LocalDateTime.now();
try (CsvWriter writer = CsvUtil.getWriter("D:\tmp\TEST2.csv", StandardCharsets.UTF_8)) {
writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
writer.writeLine(strings);
}
} catch (Exception e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by fastexcel (time < 15s)
*/
@Test
public void Test3() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST3.xlsx"); CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
for (int i = 0; i < a.size(); i++) {
ws.value((int) originalLineNumber, i, a.get(i));
}
});
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by SXSSFWorkbook (time < 30s)
*/
@Test
public void Test4() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv")); OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST4.xlsx"); SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE)) {
SXSSFSheet sheet = workbook.createSheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
SXSSFRow row = sheet.createRow((int) originalLineNumber);
for (int i = 0; i < a.size(); i++) {
SXSSFCell cell = row.createCell(i);
cell.setCellValue(a.get(i));
}
});
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by easyexcel (time < 30s)
*/
@Test
public void Test5() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
EasyExcel.write("D:\tmp\TEST5.xlsx", ExcelTest.class).sheet("Sheet 1").needHead(false).doWrite(() -> reader.stream().collect(Collectors.toList()));
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
private ExecutorService executorService = Executors.newFixedThreadPool(10);
/**
* convert by myexcel (time < 30s)
*/
@Test
public void Test6() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST6.xlsx");
Reader reader = FileUtil.getUtf8Reader("D:\tmp\TEST2.csv");
CsvReader csvReader = CsvUtil.getReader();
DefaultStreamExcelBuilder<Map> defaultExcelBuilder = DefaultStreamExcelBuilder.of(Map.class)
.threadPool(executorService)
.start()) {
csvReader.setContainsHeader(true);
List maps = csvReader.readMapList(reader);
defaultExcelBuilder.asyncAppend(() -> maps);
org.apache.poi.ss.usermodel.Workbook workbook = defaultExcelBuilder.build();
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
}
依赖项:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
<dependency>
<groupId>com.github.liaochong</groupId>
<artifactId>myexcel</artifactId>
<version>3.11.8</version>
</dependency>
测试结果: Test result Image