读取大文件错误 "outofmemoryerror"(java)
Read large file error "outofmemoryerror"(java)
对不起我的英语。我想读取一个大文件,但是当我读取时出现错误outOfMemoryError。我不明白如何在应用程序中使用内存。以下代码不起作用:
try {
StringBuilder fileData = new StringBuilder(1000);
BufferedReader reader = new BufferedReader(new FileReader(file));
char[] buf = new char[8192];
int bytesread = 0,
bytesBuffered = 0;
while( (bytesread = reader.read( buf )) > -1 ) {
String readData = String.valueOf(buf, 0, bytesread);
bytesBuffered += bytesread;
fileData.append(readData); //this is error
if (bytesBuffered > 1024 * 1024) {
bytesBuffered = 0;
}
}
System.out.println(fileData.toString().toCharArray());
} finally {
}
试试这个。这可能会有帮助:-
try{
BufferedReader reader = new BufferedReader(new FileReader(file));
String txt = "";
while( (txt = reader.read()) != null){
System.out.println(txt);
}
}catch(Exception e){
System.out.println("Error : "+e.getMessage());
}
您需要预先分配一个大缓冲区以避免重新分配。
File file = ...;
StringBuilder fileData = new StringBuilder(file.size());
和运行大堆大小:
java -Xmx2G
====更新
使用缓冲区的 while 循环不需要太多内存 运行。将输入视为流,将您的搜索字符串与流匹配。这是一个非常简单的状态机。如果你需要搜索多个单词,你可以找到一个 TrieTree 实现(支持流)。
// the match state model
...xxxxxxabxxxxxaxxxxxabcdexxxx...
ab a abcd
File file = new File("path_to_your_file");
String yourSearchWord = "abcd";
int matchIndex = 0;
boolean matchPrefix = false;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
int chr;
while ((chr = reader.read()) != -1) {
if (matchPrefix == false) {
char searchChar = yourSearchWord.charAt(0);
if (chr == searchChar) {
matchPrefix = true;
matchIndex = 0;
}
} else {
char searchChar = yourSearchWord.charAt(++matchIndex);
if (chr == searchChar) {
if (matchIndex == yourSearchWord.length() - 1) {
// match!!
System.out.println("match: " + matchIndex);
matchPrefix = false;
matchIndex = 0;
}
} else {
matchPrefix = false;
matchIndex = 0;
}
}
}
}
你不应该在内存中保存这么大的文件,因为你 运行 离开了它,如你所见。由于您使用 Java 7,因此您需要将文件作为流手动读取并即时检查内容。否则,您可以使用 Java 8 的流 API。这只是一个示例。它有效,但请记住,由于编码问题,找到的单词的位置可能会有所不同,因此这不是生产代码:
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class FileReader
{
private static String wordToFind = "SEARCHED_WORD";
private static File file = new File("YOUR_FILE");
private static int currentMatchingPosition;
private static int foundAtPosition = -1;
private static int charsRead;
public static void main(String[] args) throws IOException
{
try (FileInputStream fis = new FileInputStream(file))
{
System.out.println("Total size to read (in bytes) : " + fis.available());
int c;
while ((c = fis.read()) != -1)
{
charsRead++;
checkContent(c);
}
if (foundAtPosition > -1)
{
System.out.println("Found word at position: " + (foundAtPosition - wordToFind.length()));
}
else
{
System.out.println("Didnt't find the word!");
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
private static void checkContent(int c)
{
if (currentMatchingPosition >= wordToFind.length())
{
//already found....
return;
}
if (wordToFind.charAt(currentMatchingPosition) == (char)c)
{
foundAtPosition = charsRead;
currentMatchingPosition++;
}
else
{
currentMatchingPosition = 0;
foundAtPosition = -1;
}
}
}
对不起我的英语。我想读取一个大文件,但是当我读取时出现错误outOfMemoryError。我不明白如何在应用程序中使用内存。以下代码不起作用:
try {
StringBuilder fileData = new StringBuilder(1000);
BufferedReader reader = new BufferedReader(new FileReader(file));
char[] buf = new char[8192];
int bytesread = 0,
bytesBuffered = 0;
while( (bytesread = reader.read( buf )) > -1 ) {
String readData = String.valueOf(buf, 0, bytesread);
bytesBuffered += bytesread;
fileData.append(readData); //this is error
if (bytesBuffered > 1024 * 1024) {
bytesBuffered = 0;
}
}
System.out.println(fileData.toString().toCharArray());
} finally {
}
试试这个。这可能会有帮助:-
try{
BufferedReader reader = new BufferedReader(new FileReader(file));
String txt = "";
while( (txt = reader.read()) != null){
System.out.println(txt);
}
}catch(Exception e){
System.out.println("Error : "+e.getMessage());
}
您需要预先分配一个大缓冲区以避免重新分配。
File file = ...;
StringBuilder fileData = new StringBuilder(file.size());
和运行大堆大小:
java -Xmx2G
====更新
使用缓冲区的 while 循环不需要太多内存 运行。将输入视为流,将您的搜索字符串与流匹配。这是一个非常简单的状态机。如果你需要搜索多个单词,你可以找到一个 TrieTree 实现(支持流)。
// the match state model
...xxxxxxabxxxxxaxxxxxabcdexxxx...
ab a abcd
File file = new File("path_to_your_file");
String yourSearchWord = "abcd";
int matchIndex = 0;
boolean matchPrefix = false;
try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
int chr;
while ((chr = reader.read()) != -1) {
if (matchPrefix == false) {
char searchChar = yourSearchWord.charAt(0);
if (chr == searchChar) {
matchPrefix = true;
matchIndex = 0;
}
} else {
char searchChar = yourSearchWord.charAt(++matchIndex);
if (chr == searchChar) {
if (matchIndex == yourSearchWord.length() - 1) {
// match!!
System.out.println("match: " + matchIndex);
matchPrefix = false;
matchIndex = 0;
}
} else {
matchPrefix = false;
matchIndex = 0;
}
}
}
}
你不应该在内存中保存这么大的文件,因为你 运行 离开了它,如你所见。由于您使用 Java 7,因此您需要将文件作为流手动读取并即时检查内容。否则,您可以使用 Java 8 的流 API。这只是一个示例。它有效,但请记住,由于编码问题,找到的单词的位置可能会有所不同,因此这不是生产代码:
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
public class FileReader
{
private static String wordToFind = "SEARCHED_WORD";
private static File file = new File("YOUR_FILE");
private static int currentMatchingPosition;
private static int foundAtPosition = -1;
private static int charsRead;
public static void main(String[] args) throws IOException
{
try (FileInputStream fis = new FileInputStream(file))
{
System.out.println("Total size to read (in bytes) : " + fis.available());
int c;
while ((c = fis.read()) != -1)
{
charsRead++;
checkContent(c);
}
if (foundAtPosition > -1)
{
System.out.println("Found word at position: " + (foundAtPosition - wordToFind.length()));
}
else
{
System.out.println("Didnt't find the word!");
}
}
catch (IOException e)
{
e.printStackTrace();
}
}
private static void checkContent(int c)
{
if (currentMatchingPosition >= wordToFind.length())
{
//already found....
return;
}
if (wordToFind.charAt(currentMatchingPosition) == (char)c)
{
foundAtPosition = charsRead;
currentMatchingPosition++;
}
else
{
currentMatchingPosition = 0;
foundAtPosition = -1;
}
}
}