如何在 Java 中更快地阅读大文本文件?
How to read large text files faster in Java?
我刚刚做了一个简单的顺序中值滤波算法,我碰巧使用了 2 个扫描器来处理命令行输入和一个扫描器来读取文件。现在的问题是,我有一个 2,000,000
行的文本文件,每行 2 列文本,格式为 <integer> <float>
并且需要很长时间(超过 2 分钟)才能读取该文件。
基本上,程序只是获取输入,使用中值滤波算法并写入输出文件。
下面是我的代码:
main.java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class main {
public static void main(String args[]) throws NumberFormatException, IOException{
String inFile; //Input file name.
int filterSize; //Filter size (odd integer >= 3).
String outFile; //Output file name.
int arraySize;
List<Float> elements = new ArrayList<Float>();
int index = 0;
//Scanner to take input file name, filter size and output file name.
Scanner keyboardInput = new Scanner(System.in);
System.out.println("Enter your keyboard input as follows: <data file name> <filter size(odd int >= 3> <output file name>");
//Assigning values to variables.
inFile = keyboardInput.next();
filterSize = keyboardInput.nextInt();
outFile = keyboardInput.next();
// //Reading file
// Scanner readFile = new Scanner(new File(inFile));
// readFile.nextInt(); //Get Array Size
//
// //Add elements into ArrayList
// while(readFile.hasNext()){
// readFile.nextInt();
// elements.add(Float.parseFloat(readFile.next()));
// }
//Reading file
BufferedReader br = new BufferedReader(new FileReader(inFile));
br.readLine(); //Get Array Size
String line;
while((line = br.readLine())!= null){
String[] nums = line.split(" ");
int val = Integer.valueOf(nums[0]);
elements.add(Float.valueOf(nums[1]));
}
br.close();
new Serial(elements, filterSize, outFile);
}
}
Serial.java
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class Serial {
int filterSize; //Filter size (odd integer >= 3).
String outFile; //Output file name.
int arraySize;
List<Float> elements = new ArrayList<Float>();
int index = 0;
public Serial(List<Float> elements, int filterSize, String outFile) throws FileNotFoundException, UnsupportedEncodingException {
this.elements = elements;
this.filterSize= filterSize;
this.outFile = outFile;
List<Float> tempElements = new ArrayList<Float>();
List<Float> outputElements = new ArrayList<Float>();
//Add first boundary element to ouput ArrayList
outputElements.add(this.elements.get(0));
while(elements.size() >= filterSize){
for(int i = 0; i<filterSize; i++){
tempElements.add(this.elements.get(i));
}
Collections.sort(tempElements);
outputElements.add(tempElements.get((filterSize-1)/2));
elements.remove(0);
tempElements.clear();
}
//Add last boundary element to output ArrayList
if (elements != null && !elements.isEmpty()) {
outputElements.add(elements.get(elements.size()-1));
}
/*Trace. Checking if output is correct
for(int i=0; i<outputElements.size(); i++){
System.out.println(outputElements.get(i));
}*/
//Write elements to output file
PrintWriter writeOutput = new PrintWriter(this.outFile, "UTF-8");
writeOutput.println(outputElements.size());//Number of lines
for(int i=0; i<outputElements.size();i++){
writeOutput.println(i+1 + " " + outputElements.get(i)); //Each line is written
}
writeOutput.close(); //Close when output finished writing.
}
}
有没有办法更快地读取文件(以秒为单位)?
谢谢
编辑:
输入示例
5
1 2
2 80
3 6
4 3
5 1
使用 Buffered Reader 应该可以提高读取文件的速度,因为它的缓冲区大小比 Scanner 大得多。
我从您之前的一个问题中了解到您最初使用的是 Buffered Reader。您可以逐行读取并拆分 space 上的字符串,如下所示:
//Reading file
BufferedReader br = new BufferedReader(new FileReader(inFile));
br.readLine(); //Get Array Size
String line;
while((line = br.readLine())!= null){
String[] nums = line.split(" ");
int val = Integer.valueOf(nums[0]);
elements.add(Float.valueOf(nums[1]));
}
br.close();
我刚刚做了一个简单的顺序中值滤波算法,我碰巧使用了 2 个扫描器来处理命令行输入和一个扫描器来读取文件。现在的问题是,我有一个 2,000,000
行的文本文件,每行 2 列文本,格式为 <integer> <float>
并且需要很长时间(超过 2 分钟)才能读取该文件。
基本上,程序只是获取输入,使用中值滤波算法并写入输出文件。
下面是我的代码: main.java
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class main {
public static void main(String args[]) throws NumberFormatException, IOException{
String inFile; //Input file name.
int filterSize; //Filter size (odd integer >= 3).
String outFile; //Output file name.
int arraySize;
List<Float> elements = new ArrayList<Float>();
int index = 0;
//Scanner to take input file name, filter size and output file name.
Scanner keyboardInput = new Scanner(System.in);
System.out.println("Enter your keyboard input as follows: <data file name> <filter size(odd int >= 3> <output file name>");
//Assigning values to variables.
inFile = keyboardInput.next();
filterSize = keyboardInput.nextInt();
outFile = keyboardInput.next();
// //Reading file
// Scanner readFile = new Scanner(new File(inFile));
// readFile.nextInt(); //Get Array Size
//
// //Add elements into ArrayList
// while(readFile.hasNext()){
// readFile.nextInt();
// elements.add(Float.parseFloat(readFile.next()));
// }
//Reading file
BufferedReader br = new BufferedReader(new FileReader(inFile));
br.readLine(); //Get Array Size
String line;
while((line = br.readLine())!= null){
String[] nums = line.split(" ");
int val = Integer.valueOf(nums[0]);
elements.add(Float.valueOf(nums[1]));
}
br.close();
new Serial(elements, filterSize, outFile);
}
}
Serial.java
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class Serial {
int filterSize; //Filter size (odd integer >= 3).
String outFile; //Output file name.
int arraySize;
List<Float> elements = new ArrayList<Float>();
int index = 0;
public Serial(List<Float> elements, int filterSize, String outFile) throws FileNotFoundException, UnsupportedEncodingException {
this.elements = elements;
this.filterSize= filterSize;
this.outFile = outFile;
List<Float> tempElements = new ArrayList<Float>();
List<Float> outputElements = new ArrayList<Float>();
//Add first boundary element to ouput ArrayList
outputElements.add(this.elements.get(0));
while(elements.size() >= filterSize){
for(int i = 0; i<filterSize; i++){
tempElements.add(this.elements.get(i));
}
Collections.sort(tempElements);
outputElements.add(tempElements.get((filterSize-1)/2));
elements.remove(0);
tempElements.clear();
}
//Add last boundary element to output ArrayList
if (elements != null && !elements.isEmpty()) {
outputElements.add(elements.get(elements.size()-1));
}
/*Trace. Checking if output is correct
for(int i=0; i<outputElements.size(); i++){
System.out.println(outputElements.get(i));
}*/
//Write elements to output file
PrintWriter writeOutput = new PrintWriter(this.outFile, "UTF-8");
writeOutput.println(outputElements.size());//Number of lines
for(int i=0; i<outputElements.size();i++){
writeOutput.println(i+1 + " " + outputElements.get(i)); //Each line is written
}
writeOutput.close(); //Close when output finished writing.
}
}
有没有办法更快地读取文件(以秒为单位)?
谢谢
编辑: 输入示例
5
1 2
2 80
3 6
4 3
5 1
使用 Buffered Reader 应该可以提高读取文件的速度,因为它的缓冲区大小比 Scanner 大得多。
我从您之前的一个问题中了解到您最初使用的是 Buffered Reader。您可以逐行读取并拆分 space 上的字符串,如下所示:
//Reading file
BufferedReader br = new BufferedReader(new FileReader(inFile));
br.readLine(); //Get Array Size
String line;
while((line = br.readLine())!= null){
String[] nums = line.split(" ");
int val = Integer.valueOf(nums[0]);
elements.add(Float.valueOf(nums[1]));
}
br.close();