使用 apache poi 根据列名将 CSV/XLSX 文件拆分为多个工作表
Split CSV/XLSX file into multiple sheets based on column names using apache poi
我有一个 CSV 文件,其中包含从 SQL 数据导出的近 190 列。现在我想根据特定列将单个 CSV\XLSX 支持分成多个工作表,比如
Sample.csv
id name technology User Countries Support_Place Support_Details
1 A.aaa "SQL,JAVA" user1 US US1 6766678 NAM
2 B.bbb Linux user2 Japan Japan2 9878678 RAN
3 C.ccc Java user3 India India3 6545654 CAN
Expecting,
Sample.csv
sheet1
====
id name technology
1 A.aaa "SQL,JAVA"
2 B.bbb Linux
3 C.ccc Java
User Sheet2
==============
User Countries
user1 US
user2 Japan
user3 India
Support_Place Sheet3
=====================
Support_Place Support_Contacts
US1 6766678 NAM
Japan2 9878678 RAN
India3 6545654 CAN
请告诉我如何根据列名称将列拆分为单独的工作表。
非常感谢您的帮助。
如果您不必使用 Apache POI,以下代码可能对您有用。其实基于fastexcel效率更高
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.Random;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelSplitTest {
Logger logger = LoggerFactory.getLogger(ExcelSplitTest.class);
@Test
public void Test1() {
LocalDateTime start = LocalDateTime.now();
try (CsvWriter writer = CsvUtil.getWriter("D:\tmp\TEST_1.csv", StandardCharsets.UTF_8)) {
writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
writer.writeLine(strings);
}
} catch (Exception e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by fastexcel
*/
@Test
public void Test3() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST_3.xlsx")) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
try (CsvReader csvReader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST_1.csv"))) {
Worksheet ws1 = wb.newWorksheet("Sheet 1");
Worksheet ws2 = wb.newWorksheet("Sheet 2");
csvReader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
int col1 = 0;
int col2 = 0;
for (int i = 0; i < a.size(); i++) {
if (i % 2 == 0) {
ws1.value((int) originalLineNumber, col1, a.get(i));
col1++;
}
else{
ws2.value((int) originalLineNumber, col2, a.get(i));
col2++;
}
}
});
}
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
}
依赖项:
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>
我有一个 CSV 文件,其中包含从 SQL 数据导出的近 190 列。现在我想根据特定列将单个 CSV\XLSX 支持分成多个工作表,比如
Sample.csv
id name technology User Countries Support_Place Support_Details
1 A.aaa "SQL,JAVA" user1 US US1 6766678 NAM
2 B.bbb Linux user2 Japan Japan2 9878678 RAN
3 C.ccc Java user3 India India3 6545654 CAN
Expecting,
Sample.csv
sheet1
====
id name technology
1 A.aaa "SQL,JAVA"
2 B.bbb Linux
3 C.ccc Java
User Sheet2
==============
User Countries
user1 US
user2 Japan
user3 India
Support_Place Sheet3
=====================
Support_Place Support_Contacts
US1 6766678 NAM
Japan2 9878678 RAN
India3 6545654 CAN
请告诉我如何根据列名称将列拆分为单独的工作表。 非常感谢您的帮助。
如果您不必使用 Apache POI,以下代码可能对您有用。其实基于fastexcel效率更高
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.Random;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelSplitTest {
Logger logger = LoggerFactory.getLogger(ExcelSplitTest.class);
@Test
public void Test1() {
LocalDateTime start = LocalDateTime.now();
try (CsvWriter writer = CsvUtil.getWriter("D:\tmp\TEST_1.csv", StandardCharsets.UTF_8)) {
writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
writer.writeLine(strings);
}
} catch (Exception e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by fastexcel
*/
@Test
public void Test3() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\tmp\TEST_3.xlsx")) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
try (CsvReader csvReader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\tmp\TEST_1.csv"))) {
Worksheet ws1 = wb.newWorksheet("Sheet 1");
Worksheet ws2 = wb.newWorksheet("Sheet 2");
csvReader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
int col1 = 0;
int col2 = 0;
for (int i = 0; i < a.size(); i++) {
if (i % 2 == 0) {
ws1.value((int) originalLineNumber, col1, a.get(i));
col1++;
}
else{
ws2.value((int) originalLineNumber, col2, a.get(i));
col2++;
}
}
});
}
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
}
依赖项:
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>