在 java 中使用 univocity 库进行 CSV 解析
CSV parsing with univocity library in java
我正在使用 univocity 解析 java 中的大型 (6 GB) CSV。 CSV enrty如下,可以解析CSV。知道如何生成如下输出:
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setLineSeparator("\n");
CsvParser parser = new CsvParser(settings);
File f = new File("test.csv");
parser.beginParsing(f, "UTF-8");
String[] row;
while ((row = parser.parseNext()) != null) {
String val = Arrays.toString(row);
val = val.replaceAll("\[", "");
val = val.replaceAll("\]", "");
val = val.replaceAll("\s", "");
System.out.println(val);
} // end while
test.csv内容:
A,10,2,3
null,11,A1,null
null,30,A23,null
null,44,A34,null
null,16,A67,null
A,20,5,6
null,41,A100,null
null,60,A56,null
null,74,A34,null
null,86,A56,null
正在尝试获得如下输出:
A,[10;11;30;44;16],[2,A1,A23,A34,A67],3
A,[20;41;60;74;86],[5,A100,A56,A34,A56],6
每行预期输出取决于多行。每个单元格值应存储在中间变量中。据此代码可以写成:
BufferedReader csv = new BufferedReader(new FileReader("test.csv"));
String line;
ArrayList<String> ar1 = new ArrayList<String>();
ArrayList<String> ar2 = new ArrayList<String>();
String s1=null,s2=null;
String[] lineSplit;
while ((line = csv.readLine()) != null){
lineSplit = line.split(",");
if(lineSplit.length>1){
if(!lineSplit[0].equals("null")){
if(!ar1.isEmpty()){
System.out.println(s1+","+ar1.toString().replaceAll(", ", ";")
+","+ar2.toString().replaceAll(", ", ",")+","+s2);
}
s1 = lineSplit[0] ;
s2 = lineSplit[3];
ar1 = new ArrayList<String>();
ar1.add(lineSplit[1]);
ar2 = new ArrayList<String>();
ar2.add(lineSplit[2]);
}
else{
ar1.add(lineSplit[1]);
ar2.add(lineSplit[2]);
}
}
}
System.out.println(s1+","+ar1.toString().replaceAll(", ", ";")
+","+ar2.toString().replaceAll(", ", ",")+","+s2);
csv.close();
我正在使用 univocity 解析 java 中的大型 (6 GB) CSV。 CSV enrty如下,可以解析CSV。知道如何生成如下输出:
CsvParserSettings settings = new CsvParserSettings();
settings.getFormat().setLineSeparator("\n");
CsvParser parser = new CsvParser(settings);
File f = new File("test.csv");
parser.beginParsing(f, "UTF-8");
String[] row;
while ((row = parser.parseNext()) != null) {
String val = Arrays.toString(row);
val = val.replaceAll("\[", "");
val = val.replaceAll("\]", "");
val = val.replaceAll("\s", "");
System.out.println(val);
} // end while
test.csv内容:
A,10,2,3
null,11,A1,null
null,30,A23,null
null,44,A34,null
null,16,A67,null
A,20,5,6
null,41,A100,null
null,60,A56,null
null,74,A34,null
null,86,A56,null
正在尝试获得如下输出:
A,[10;11;30;44;16],[2,A1,A23,A34,A67],3
A,[20;41;60;74;86],[5,A100,A56,A34,A56],6
每行预期输出取决于多行。每个单元格值应存储在中间变量中。据此代码可以写成:
BufferedReader csv = new BufferedReader(new FileReader("test.csv"));
String line;
ArrayList<String> ar1 = new ArrayList<String>();
ArrayList<String> ar2 = new ArrayList<String>();
String s1=null,s2=null;
String[] lineSplit;
while ((line = csv.readLine()) != null){
lineSplit = line.split(",");
if(lineSplit.length>1){
if(!lineSplit[0].equals("null")){
if(!ar1.isEmpty()){
System.out.println(s1+","+ar1.toString().replaceAll(", ", ";")
+","+ar2.toString().replaceAll(", ", ",")+","+s2);
}
s1 = lineSplit[0] ;
s2 = lineSplit[3];
ar1 = new ArrayList<String>();
ar1.add(lineSplit[1]);
ar2 = new ArrayList<String>();
ar2.add(lineSplit[2]);
}
else{
ar1.add(lineSplit[1]);
ar2.add(lineSplit[2]);
}
}
}
System.out.println(s1+","+ar1.toString().replaceAll(", ", ";")
+","+ar2.toString().replaceAll(", ", ",")+","+s2);
csv.close();