在 Java 中使用分组依据聚合 CSV 数据
Aggregate CSV data with group by in Java
我需要在 Java 中使用 group by
聚合 CSV 数据。
我的 csv 文件如下所示:
Numero, NumeroWsn, NoeudAdress, PacketRece, NoeudsRece, Hello
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44
有没有办法通过按 NoeudAdress
分组来聚合这些数据,并使用 Java 将 NoeudsRece
计数器显示为如下所示的列?
NoeudsAdresse,NumberOfNoeudsRece
b27b ,7
bc2d ,6
我考虑过使用 OpenCSV 将 CSV 文件加载到列表中,但是对于具有数百万行的 csv 文件是否有效?
可以逐行读取文件中的数据,以逗号分割,将各个值保存在一个String数组中,然后创建一个Hashmap,键为NoeudAdress对应的索引,值为NoeudAdress对应的Arraylist您需要一个计数器的列。在这种情况下,计数器将是相应 Arraylist 的大小。
编辑:这是通过 OpenCSV 导入整个 csv 的条件变体。我们没有将整个 csv 文件加载到内存中,而是仅以我们需要的特定格式导入我们需要的数据。这将比粗略的方法表现得更好。
使用 H2
而不是 OpenCSV
。
删除你的 header 行并将其放入名为 DATA.CSV
的文件中
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44
从这里下载 h2 jar 文件:
http://www.h2database.com/html/download.html
然后运行这段代码...
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class CSVLoader {
public static final String getApplicationPath(Class<?> mainClass) throws URISyntaxException {
return getApplicationDirectory(mainClass).getAbsolutePath();
}
public CSVLoader() {
executeStatement(getDropTableStatement(), false);
executeStatement(getCreateTableStatement(), false);
executeStatement(getInsertStatement(), false);
executeStatement(getSelectStatement(), true);
}
public static final String getDropTableStatement() {
String SQLString = "DROP TABLE DATA IF EXISTS;\n";
return SQLString;
}
public static final String getSelectStatement() {
String SQLString = "SELECT NOEUDADRESS, COUNT(NOEUDSRECE) FROM DATA GROUP BY NOEUDADRESS;\n";
return SQLString;
}
public static final String getCreateTableStatement() {
String SQLString = "CREATE TABLE DATA(\n";
SQLString += " NUMERO VARCHAR(100),\n";
SQLString += " NUMEROWSN VARCHAR(100),\n";
SQLString += " NOEUDADRESS VARCHAR(100),\n";
SQLString += " PACKETRECE VARCHAR(100),\n";
SQLString += " NOEUDSRECE VARCHAR(100),\n";
SQLString += " HELLO VARCHAR(100))";
return SQLString;
}
public static final String getInsertStatement() {
return "INSERT INTO DATA SELECT * FROM CSVREAD('DATA.CSV')";
}
public void executeStatement(String sql, boolean withResultSet) {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
try {
File file = getApplicationDirectory(CSVLoader.class);
Class.forName("org.h2.Driver");
connection = DriverManager.getConnection("jdbc:h2:" + file.getAbsolutePath() + File.separator + "storage", "sa", "secret");
statement = connection.createStatement();
if(withResultSet) {
resultSet = statement.executeQuery(sql);
while(resultSet.next()) {
System.out.println("-->" + resultSet.getString(1) + "\t" + resultSet.getString(2));
}
}
else {
statement.execute(sql);
}
}
catch (URISyntaxException e) {
e.printStackTrace();
}
catch (ClassNotFoundException e) {
e.printStackTrace();
}
catch (SQLException e) {
e.printStackTrace();
}
finally {
try {
if(resultSet != null) {
resultSet.close();
}
if(statement != null) {
statement.close();
}
if(connection != null) {
connection.close();
}
}
catch (SQLException e) {
e.printStackTrace();
statement = null;
connection = null;
}
}
}
private static final File getApplicationDirectory(Class<?> mainClass) throws URISyntaxException {
URL url = mainClass.getProtectionDomain().getCodeSource().getLocation();
File file = new File(url.toURI());
return file.getParentFile();
}
public static void main(String[] args) {
new CSVLoader();
}
}
输出是
-->NA:b27b 7
-->NA:bc2d 6
你也可以从控制台运行它:当你在控制台输入这个时上面的例子也有效:
CREATE TABLE TEST(
NUMERO VARCHAR(100),
NUMEROWSN VARCHAR(100),
NOEUDADRESS VARCHAR(100),
PACKETRECE VARCHAR(100),
NOEUDSRECE VARCHAR(100),
HELLO VARCHAR(100)
) AS SELECT * FROM CSVREAD('C:\ECLIPSE\WORKSPACE\H2\DATA.CSV')
确保使用数据文件的完整路径
我需要在 Java 中使用 group by
聚合 CSV 数据。
我的 csv 文件如下所示:
Numero, NumeroWsn, NoeudAdress, PacketRece, NoeudsRece, Hello
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44
有没有办法通过按 NoeudAdress
分组来聚合这些数据,并使用 Java 将 NoeudsRece
计数器显示为如下所示的列?
NoeudsAdresse,NumberOfNoeudsRece
b27b ,7
bc2d ,6
我考虑过使用 OpenCSV 将 CSV 文件加载到列表中,但是对于具有数百万行的 csv 文件是否有效?
可以逐行读取文件中的数据,以逗号分割,将各个值保存在一个String数组中,然后创建一个Hashmap,键为NoeudAdress对应的索引,值为NoeudAdress对应的Arraylist您需要一个计数器的列。在这种情况下,计数器将是相应 Arraylist 的大小。
编辑:这是通过 OpenCSV 导入整个 csv 的条件变体。我们没有将整个 csv 文件加载到内存中,而是仅以我们需要的特定格式导入我们需要的数据。这将比粗略的方法表现得更好。
使用 H2
而不是 OpenCSV
。
删除你的 header 行并将其放入名为 DATA.CSV
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44
从这里下载 h2 jar 文件: http://www.h2database.com/html/download.html
然后运行这段代码...
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class CSVLoader {
public static final String getApplicationPath(Class<?> mainClass) throws URISyntaxException {
return getApplicationDirectory(mainClass).getAbsolutePath();
}
public CSVLoader() {
executeStatement(getDropTableStatement(), false);
executeStatement(getCreateTableStatement(), false);
executeStatement(getInsertStatement(), false);
executeStatement(getSelectStatement(), true);
}
public static final String getDropTableStatement() {
String SQLString = "DROP TABLE DATA IF EXISTS;\n";
return SQLString;
}
public static final String getSelectStatement() {
String SQLString = "SELECT NOEUDADRESS, COUNT(NOEUDSRECE) FROM DATA GROUP BY NOEUDADRESS;\n";
return SQLString;
}
public static final String getCreateTableStatement() {
String SQLString = "CREATE TABLE DATA(\n";
SQLString += " NUMERO VARCHAR(100),\n";
SQLString += " NUMEROWSN VARCHAR(100),\n";
SQLString += " NOEUDADRESS VARCHAR(100),\n";
SQLString += " PACKETRECE VARCHAR(100),\n";
SQLString += " NOEUDSRECE VARCHAR(100),\n";
SQLString += " HELLO VARCHAR(100))";
return SQLString;
}
public static final String getInsertStatement() {
return "INSERT INTO DATA SELECT * FROM CSVREAD('DATA.CSV')";
}
public void executeStatement(String sql, boolean withResultSet) {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
try {
File file = getApplicationDirectory(CSVLoader.class);
Class.forName("org.h2.Driver");
connection = DriverManager.getConnection("jdbc:h2:" + file.getAbsolutePath() + File.separator + "storage", "sa", "secret");
statement = connection.createStatement();
if(withResultSet) {
resultSet = statement.executeQuery(sql);
while(resultSet.next()) {
System.out.println("-->" + resultSet.getString(1) + "\t" + resultSet.getString(2));
}
}
else {
statement.execute(sql);
}
}
catch (URISyntaxException e) {
e.printStackTrace();
}
catch (ClassNotFoundException e) {
e.printStackTrace();
}
catch (SQLException e) {
e.printStackTrace();
}
finally {
try {
if(resultSet != null) {
resultSet.close();
}
if(statement != null) {
statement.close();
}
if(connection != null) {
connection.close();
}
}
catch (SQLException e) {
e.printStackTrace();
statement = null;
connection = null;
}
}
}
private static final File getApplicationDirectory(Class<?> mainClass) throws URISyntaxException {
URL url = mainClass.getProtectionDomain().getCodeSource().getLocation();
File file = new File(url.toURI());
return file.getParentFile();
}
public static void main(String[] args) {
new CSVLoader();
}
}
输出是
-->NA:b27b 7
-->NA:bc2d 6
你也可以从控制台运行它:当你在控制台输入这个时上面的例子也有效:
CREATE TABLE TEST(
NUMERO VARCHAR(100),
NUMEROWSN VARCHAR(100),
NOEUDADRESS VARCHAR(100),
PACKETRECE VARCHAR(100),
NOEUDSRECE VARCHAR(100),
HELLO VARCHAR(100)
) AS SELECT * FROM CSVREAD('C:\ECLIPSE\WORKSPACE\H2\DATA.CSV')
确保使用数据文件的完整路径