Apache POI 大 Excel 导出速度慢

Apache POI Large Excel Export is slow

我正在尝试使用 SXSSFWorkbook 通过 Apache POI 将 CSV 转换为 Excel。

我的 CSV 文件有大约 230,000 条记录和 50 列。读取和写入过程的总处理时间约为 1 分钟。

我正在四处寻找解决我的性能问题以获得更短的时间(最多 30 秒),因为连接超时发生在 30 秒后。来自网关。

我做错了什么?有什么我可以改变的吗?

public static byte[] readCsvAndWriteExcel(String filePath, File csvFile) {
    logger.info("Read csv and write excel has been started for {}", csvFile.getName());

    String line;
    BufferedReader bufferedReader = readFile(filePath);

    SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE);
    workbook.setCompressTempFiles(true);
    int rowNumber = 0;
    int cellNumber;
    SXSSFRow row;
    SXSSFSheet sheet = workbook.createSheet(FilenameUtils.getBaseName(csvFile.getName()));

    try {
        while ((line = bufferedReader.readLine()) != null) {
            cellNumber = 0;
            row = sheet.createRow(rowNumber++);
            addCell(cellNumber, row, line.trim().split(SEPERATOR_PATTERN));
        }
    } catch (IOException e) {
        logger.error("File is not found: {}", filePath, e);
        throw new Exception("Exception");
    }

    logger.info("Read csv and write excel has been finished for {}", csvFile.getName());
    return retrieveBytesOfXlsxFile(workbook);
}

以及用于保存单元格和样式的 addCell 方法;

private static void addCell(int cellNumber, SXSSFRow row, String[] splitRow) {
    for (String field : splitRow) {
        SXSSFCell cell = row.createCell(cellNumber++);
        cell.setCellValue(field);
    }
}

Fastexcel 提供与 POI 流相同的时间。

你写Excel的方式没有问题。时间主要浪费在读取CSV的方法上。另外,fastexcel是我测试过的最好的选择,它比其他的(SXSSFWorkBook、easyexcel和myexcel)更快。下面的代码希望对你有用。

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import com.alibaba.excel.EasyExcel;
import com.github.liaochong.myexcel.core.DefaultStreamExcelBuilder;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

public class ExcelTest {

    Logger logger = LoggerFactory.getLogger(ExcelTest.class);

    /**
     * generate test xlsx (time < 15s)
     */
    @Test
    public void Test1() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST1.xlsx")) {
            Workbook wb = new Workbook(os, "MyApplication", "1.0");
            Worksheet ws = wb.newWorksheet("Sheet 1");
            HashMap<Pair<Integer, Integer>, String> dataMap = new HashMap<>();
            for (int i = 0; i < 230000; i++) {
                String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
                for (int j = 0; j < 50; j++) {
                    ws.value(i, j, strings[j]);
                }
            }
            wb.finish();
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * generate test csv  (time < 2s)
     */
    @Test
    public void Test2() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvWriter writer = CsvUtil.getWriter("D:\tmp\TEST2.csv", StandardCharsets.UTF_8)) {
            writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
            for (int i = 0; i < 230000; i++) {
                String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
                writer.writeLine(strings);
            }

        } catch (Exception e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by fastexcel   (time < 15s)
     */
    @Test
    public void Test3() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST3.xlsx"); CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
            Workbook wb = new Workbook(os, "MyApplication", "1.0");
            Worksheet ws = wb.newWorksheet("Sheet 1");
            reader.stream().forEach(a -> {
                long originalLineNumber = a.getOriginalLineNumber();
                for (int i = 0; i < a.size(); i++) {
                    ws.value((int) originalLineNumber, i, a.get(i));
                }
            });
            wb.finish();
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by SXSSFWorkbook  (time < 30s)
     */
    @Test
    public void Test4() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv")); OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST4.xlsx"); SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE)) {
            SXSSFSheet sheet = workbook.createSheet("Sheet 1");
            reader.stream().forEach(a -> {
                long originalLineNumber = a.getOriginalLineNumber();
                SXSSFRow row = sheet.createRow((int) originalLineNumber);
                for (int i = 0; i < a.size(); i++) {
                    SXSSFCell cell = row.createCell(i);
                    cell.setCellValue(a.get(i));
                }
            });
            workbook.write(os);
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by easyexcel  (time < 30s)
     */
    @Test
    public void Test5() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST2.csv"))) {
            EasyExcel.write("D:\tmp\TEST5.xlsx", ExcelTest.class).sheet("Sheet 1").needHead(false).doWrite(() -> reader.stream().collect(Collectors.toList()));
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    private ExecutorService executorService = Executors.newFixedThreadPool(10);

    /**
     * convert by myexcel  (time < 30s)
     */
    @Test
    public void Test6() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST6.xlsx");
             Reader reader = FileUtil.getUtf8Reader("D:\tmp\TEST2.csv");
             CsvReader csvReader = CsvUtil.getReader();
             DefaultStreamExcelBuilder<Map> defaultExcelBuilder = DefaultStreamExcelBuilder.of(Map.class)
                     .threadPool(executorService)
                     .start()) {
            csvReader.setContainsHeader(true);
            List maps = csvReader.readMapList(reader);
            defaultExcelBuilder.asyncAppend(() -> maps);
            org.apache.poi.ss.usermodel.Workbook workbook = defaultExcelBuilder.build();
            workbook.write(os);
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

}

依赖项:

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.dhatim</groupId>
    <artifactId>fastexcel</artifactId>
    <version>0.12.12</version>
</dependency>
<dependency>
    <groupId>cn.hutool</groupId>
    <artifactId>hutool-all</artifactId>
    <version>5.7.16</version>
</dependency>
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>easyexcel</artifactId>
    <version>3.0.5</version>
</dependency>
<dependency>
    <groupId>com.github.liaochong</groupId>
    <artifactId>myexcel</artifactId>
    <version>3.11.8</version>
</dependency>

测试结果: Test result Image