在 Java 中使用分组依据聚合 CSV 数据

Aggregate CSV data with group by in Java

我需要在 Java 中使用 group by 聚合 CSV 数据。

我的 csv 文件如下所示:

Numero, NumeroWsn, NoeudAdress, PacketRece, NoeudsRece, Hello
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44

有没有办法通过按 NoeudAdress 分组来聚合这些数据,并使用 Java 将 NoeudsRece 计数器显示为如下所示的列?

NoeudsAdresse,NumberOfNoeudsRece

b27b ,7
bc2d ,6

我考虑过使用 OpenCSV 将 CSV 文件加载到列表中,但是对于具有数百万行的 csv 文件是否有效?

可以逐行读取文件中的数据,以逗号分割,将各个值保存在一个String数组中,然后创建一个Hashmap,键为NoeudAdress对应的索引,值为NoeudAdress对应的Arraylist您需要一个计数器的列。在这种情况下,计数器将是相应 Arraylist 的大小。

编辑:这是通过 OpenCSV 导入整个 csv 的条件变体。我们没有将整个 csv 文件加载到内存中,而是仅以我们需要的特定格式导入我们需要的数据。这将比粗略的方法表现得更好。

使用 H2 而不是 OpenCSV

删除你的 header 行并将其放入名为 DATA.CSV

的文件中
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b4, Hello #33
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b986, Hello #33
1436136640477415,wsn430-8,NA:b27b,Packet recevied from,RX: bc2d, Hello #33
1436136640477566,wsn430-8,NA:b27b,Packet recevied from,RX: b36b, Hello #34
1436136640477716,wsn430-8,NA:b27b,Packet recevied from,RX: bcb6, Hello #35
1436136640477995,wsn430-9,NA:bc2d,Packet recevied from,RX: 1f9e, Hello #33
1436136640478162,wsn430-9,NA:bc2d,Packet recevied from,RX: be29, Hello #33
1436136640478313,wsn430-9,NA:bc2d,Packet recevied from,RX: b61a, Hello #32
1436136640478462,wsn430-9,NA:bc2d,Packet recevied from,RX: c735, Hello #32
1436136640478612,wsn430-9,NA:bc2d,Packet recevied from,RX: bb0a, Hello #32
1436136640478760,wsn430-9,NA:bc2d,Packet recevied from,RX: b6bc, Hello #33
1436136640477044,wsn430-8,NA:b27b,Packet recevied from,RX: b0b1, Hello #42
1436136640477257,wsn430-8,NA:b27b,Packet recevied from,RX: b984, Hello #44

从这里下载 h2 jar 文件: http://www.h2database.com/html/download.html

然后运行这段代码...

import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class CSVLoader {

  public static final String getApplicationPath(Class<?> mainClass) throws URISyntaxException {
    return  getApplicationDirectory(mainClass).getAbsolutePath();
  } 

  public CSVLoader() {
    executeStatement(getDropTableStatement(), false);
    executeStatement(getCreateTableStatement(), false);
    executeStatement(getInsertStatement(), false);
    executeStatement(getSelectStatement(), true);
  }

  public static final String getDropTableStatement() {
        String SQLString = "DROP TABLE DATA IF EXISTS;\n";
        return SQLString;
  }

  public static final String getSelectStatement() {
        String SQLString = "SELECT NOEUDADRESS, COUNT(NOEUDSRECE) FROM DATA GROUP BY NOEUDADRESS;\n";
        return SQLString;
  }

  public static final String getCreateTableStatement() { 
        String SQLString = "CREATE TABLE DATA(\n";
      SQLString += "   NUMERO         VARCHAR(100),\n";
      SQLString += "   NUMEROWSN      VARCHAR(100),\n";    
      SQLString += "   NOEUDADRESS    VARCHAR(100),\n";  
      SQLString += "   PACKETRECE     VARCHAR(100),\n";  
      SQLString += "   NOEUDSRECE     VARCHAR(100),\n";   
      SQLString += "   HELLO          VARCHAR(100))";

        return SQLString;
  }

  public static final String getInsertStatement() {
    return "INSERT INTO DATA SELECT * FROM CSVREAD('DATA.CSV')";
  } 

  public void executeStatement(String sql, boolean withResultSet) {
    Connection connection = null;
    Statement statement = null;
    ResultSet resultSet = null;

    try {
      File file = getApplicationDirectory(CSVLoader.class);
      Class.forName("org.h2.Driver");
          connection = DriverManager.getConnection("jdbc:h2:" + file.getAbsolutePath() + File.separator + "storage", "sa", "secret");
          statement = connection.createStatement();

          if(withResultSet) {
              resultSet = statement.executeQuery(sql);
              while(resultSet.next()) {
                System.out.println("-->" + resultSet.getString(1) + "\t" + resultSet.getString(2));
              }
          }
          else {
              statement.execute(sql);
          }
      }
    catch (URISyntaxException e) {
      e.printStackTrace();
    } 
    catch (ClassNotFoundException e) {
      e.printStackTrace();
    } 
    catch (SQLException e) {
      e.printStackTrace();
    }
    finally {
      try {
        if(resultSet != null) {
          resultSet.close();
        }       
          if(statement != null) {
            statement.close();
        }
          if(connection != null) {
          connection.close();
        }
      }
      catch (SQLException e) {
        e.printStackTrace();
        statement = null;
        connection = null;        
      }
    }
  }

  private static final File getApplicationDirectory(Class<?> mainClass) throws URISyntaxException {
    URL url = mainClass.getProtectionDomain().getCodeSource().getLocation();
    File file = new File(url.toURI());
    return file.getParentFile();
  }

  public static void main(String[] args) {
    new CSVLoader();
  }
}

输出是

-->NA:b27b  7
-->NA:bc2d  6

你也可以从控制台运行它:当你在控制台输入这个时上面的例子也有效:

CREATE TABLE TEST( 
    NUMERO         VARCHAR(100),
    NUMEROWSN      VARCHAR(100),
    NOEUDADRESS    VARCHAR(100),
    PACKETRECE     VARCHAR(100),  
    NOEUDSRECE     VARCHAR(100),   
    HELLO          VARCHAR(100)
) AS SELECT * FROM CSVREAD('C:\ECLIPSE\WORKSPACE\H2\DATA.CSV')

确保使用数据文件的完整路径