如何逐块读取文件
How to read from a file block by block
我需要处理一个大文本文件,因为总是有几行我想从中获取信息,这些信息也相互依赖,所以我想逐块读取文件,而不是只存储上面某些行的特定功能。
每个块都将在其第一行中由一个唯一的符号表示。
是否可以使用某种迭代器然后检查我的符号是否出现在每一行?我真的不知道如何处理这个问题,所以非常感谢您的帮助。
示例:
a1 $ 12 20 namea1
b1 x 12 15 namea1,nameb1
c1 x 13 17 namea1,namec1
d1 x 18 20 namea1,named1
a2 $ 36 55 namea2
b2 x 38 40 namea2,nameb2
c2 x 46 54 namea2,namec2
如您所见,带有符号 $
的行之后的所有行都以某种方式引用此行,数字介于与行 a1 的距离之间,并且名称始终组合在一起。我认为逐块而不是逐行读取这样的文件可能更好。
我不太确定您所说的 "block-by-block" 是什么意思,即便如此,您的文本文件结构似乎也很适合 line-by-line 分析。因此,根据您的文件结构,您可以简单地在基本的 while 循环中解析它。 Pseudo-code:
While not end of file
Read line into a String
split this String on whatspace, "\s+" into a String array
Check the String held by the 2nd item in the String array, item[1]
Do action with line (create a certain object) based on this String
end of file
现在,如果其中一个符号代表某种标题,如果这就是您所说的 block-by-block,那么您需要做的就是改变您的解析策略,使用 state-dependent 处理您的 object,类似于 SAX 解析。因此,如果例如 "$"
表示一个新的 "block",则创建一个新块,并在 while 循环中创建 objects 放入此块,直到遇到新块.
假设文本文件如下所示:
$ 12 20 namea1
x 12 15 namea1,nameb1
x 13 17 namea1,namec1
x 18 20 namea1,named1
$ 36 55 namea2
x 38 40 namea2,nameb2
x 46 54 namea2,namec2
我假设您显示的第一个符号不在文件中
并假设您有一个名为 Line 的 class 看起来像:
public class Line {
private int x;
private int y;
private List<String> names = new ArrayList<>();
public Line(int x, int y) {
this.x = x;
this.y = y;
}
public void addName(String name) {
names.add(name);
}
@Override
public String toString() {
return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
}
}
还有一个街区 class,...
public class Block {
private String name;
private int x;
private int y;
private List<Line> lines = new ArrayList<>();
public Block(String name, int x, int y) {
this.name = name;
this.x = x;
this.y = y;
}
public void addLine(Line line) {
lines.add(line);
}
@Override
public String toString() {
return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
}
}
你可以像这样解析它:
Scanner blockScanner = new Scanner(resource);
Block currentBlock = null;
while (blockScanner.hasNextLine()) {
String line = blockScanner.nextLine();
String[] tokens = line.split("\s+");
// NEW_BLOCK == "$"
if (tokens[0].equals(NEW_BLOCK)) {
currentBlock = createBlockFromTokens(tokens);
blocks.add(currentBlock);
} else if (currentBlock != null) {
currentBlock.addLine(createLineFromTokens(tokens));
}
}
其中 createXxxxFromTokens(tokens)
从字符串数组创建新行或新块
例如,整个事物作为一个 MCVE:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class ReadBlocks {
private static final String RESOURCE_PATH = "blocks.txt";
private static final String NEW_BLOCK = "$";
public static void main(String[] args) {
List<Block> blocks = new ArrayList<>();
InputStream resource = ReadBlocks.class.getResourceAsStream(RESOURCE_PATH);
Scanner blockScanner = new Scanner(resource);
Block currentBlock = null;
while (blockScanner.hasNextLine()) {
String line = blockScanner.nextLine();
String[] tokens = line.split("\s+");
if (tokens[0].equals(NEW_BLOCK)) {
currentBlock = createBlockFromTokens(tokens);
blocks.add(currentBlock);
} else if (currentBlock != null) {
currentBlock.addLine(createLineFromTokens(tokens));
}
}
if (blockScanner != null) {
blockScanner.close();
}
for (Block block : blocks) {
System.out.println(block);
}
}
private static Line createLineFromTokens(String[] tokens) {
if (tokens.length < 4) {
// throw exception
}
int x = Integer.parseInt(tokens[1]);
int y = Integer.parseInt(tokens[2]);
Line line = new Line(x, y);
String[] names = tokens[3].split(",");
for (String name : names) {
line.addName(name);
}
return line;
}
private static Block createBlockFromTokens(String[] tokens) {
if (tokens.length < 4) {
// throw exception
}
int x = Integer.parseInt(tokens[1]);
int y = Integer.parseInt(tokens[2]);
String name = tokens[3];
return new Block(name, x, y);
}
}
class Block {
private String name;
private int x;
private int y;
private List<Line> lines = new ArrayList<>();
public Block(String name, int x, int y) {
this.name = name;
this.x = x;
this.y = y;
}
public void addLine(Line line) {
lines.add(line);
}
@Override
public String toString() {
return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
}
}
class Line {
private int x;
private int y;
private List<String> names = new ArrayList<>();
public Line(int x, int y) {
this.x = x;
this.y = y;
}
public void addName(String name) {
names.add(name);
}
@Override
public String toString() {
return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
}
}
我需要处理一个大文本文件,因为总是有几行我想从中获取信息,这些信息也相互依赖,所以我想逐块读取文件,而不是只存储上面某些行的特定功能。
每个块都将在其第一行中由一个唯一的符号表示。
是否可以使用某种迭代器然后检查我的符号是否出现在每一行?我真的不知道如何处理这个问题,所以非常感谢您的帮助。
示例:
a1 $ 12 20 namea1
b1 x 12 15 namea1,nameb1
c1 x 13 17 namea1,namec1
d1 x 18 20 namea1,named1
a2 $ 36 55 namea2
b2 x 38 40 namea2,nameb2
c2 x 46 54 namea2,namec2
如您所见,带有符号 $
的行之后的所有行都以某种方式引用此行,数字介于与行 a1 的距离之间,并且名称始终组合在一起。我认为逐块而不是逐行读取这样的文件可能更好。
我不太确定您所说的 "block-by-block" 是什么意思,即便如此,您的文本文件结构似乎也很适合 line-by-line 分析。因此,根据您的文件结构,您可以简单地在基本的 while 循环中解析它。 Pseudo-code:
While not end of file
Read line into a String
split this String on whatspace, "\s+" into a String array
Check the String held by the 2nd item in the String array, item[1]
Do action with line (create a certain object) based on this String
end of file
现在,如果其中一个符号代表某种标题,如果这就是您所说的 block-by-block,那么您需要做的就是改变您的解析策略,使用 state-dependent 处理您的 object,类似于 SAX 解析。因此,如果例如 "$"
表示一个新的 "block",则创建一个新块,并在 while 循环中创建 objects 放入此块,直到遇到新块.
假设文本文件如下所示:
$ 12 20 namea1
x 12 15 namea1,nameb1
x 13 17 namea1,namec1
x 18 20 namea1,named1
$ 36 55 namea2
x 38 40 namea2,nameb2
x 46 54 namea2,namec2
我假设您显示的第一个符号不在文件中
并假设您有一个名为 Line 的 class 看起来像:
public class Line {
private int x;
private int y;
private List<String> names = new ArrayList<>();
public Line(int x, int y) {
this.x = x;
this.y = y;
}
public void addName(String name) {
names.add(name);
}
@Override
public String toString() {
return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
}
}
还有一个街区 class,...
public class Block {
private String name;
private int x;
private int y;
private List<Line> lines = new ArrayList<>();
public Block(String name, int x, int y) {
this.name = name;
this.x = x;
this.y = y;
}
public void addLine(Line line) {
lines.add(line);
}
@Override
public String toString() {
return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
}
}
你可以像这样解析它:
Scanner blockScanner = new Scanner(resource);
Block currentBlock = null;
while (blockScanner.hasNextLine()) {
String line = blockScanner.nextLine();
String[] tokens = line.split("\s+");
// NEW_BLOCK == "$"
if (tokens[0].equals(NEW_BLOCK)) {
currentBlock = createBlockFromTokens(tokens);
blocks.add(currentBlock);
} else if (currentBlock != null) {
currentBlock.addLine(createLineFromTokens(tokens));
}
}
其中 createXxxxFromTokens(tokens)
从字符串数组创建新行或新块
例如,整个事物作为一个 MCVE:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class ReadBlocks {
private static final String RESOURCE_PATH = "blocks.txt";
private static final String NEW_BLOCK = "$";
public static void main(String[] args) {
List<Block> blocks = new ArrayList<>();
InputStream resource = ReadBlocks.class.getResourceAsStream(RESOURCE_PATH);
Scanner blockScanner = new Scanner(resource);
Block currentBlock = null;
while (blockScanner.hasNextLine()) {
String line = blockScanner.nextLine();
String[] tokens = line.split("\s+");
if (tokens[0].equals(NEW_BLOCK)) {
currentBlock = createBlockFromTokens(tokens);
blocks.add(currentBlock);
} else if (currentBlock != null) {
currentBlock.addLine(createLineFromTokens(tokens));
}
}
if (blockScanner != null) {
blockScanner.close();
}
for (Block block : blocks) {
System.out.println(block);
}
}
private static Line createLineFromTokens(String[] tokens) {
if (tokens.length < 4) {
// throw exception
}
int x = Integer.parseInt(tokens[1]);
int y = Integer.parseInt(tokens[2]);
Line line = new Line(x, y);
String[] names = tokens[3].split(",");
for (String name : names) {
line.addName(name);
}
return line;
}
private static Block createBlockFromTokens(String[] tokens) {
if (tokens.length < 4) {
// throw exception
}
int x = Integer.parseInt(tokens[1]);
int y = Integer.parseInt(tokens[2]);
String name = tokens[3];
return new Block(name, x, y);
}
}
class Block {
private String name;
private int x;
private int y;
private List<Line> lines = new ArrayList<>();
public Block(String name, int x, int y) {
this.name = name;
this.x = x;
this.y = y;
}
public void addLine(Line line) {
lines.add(line);
}
@Override
public String toString() {
return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
}
}
class Line {
private int x;
private int y;
private List<String> names = new ArrayList<>();
public Line(int x, int y) {
this.x = x;
this.y = y;
}
public void addName(String name) {
names.add(name);
}
@Override
public String toString() {
return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
}
}