导航目录结构并为每个已处理的文件命名
navigate directory structure and name each processed file uniquely
我的目录结构为 start
/one
/two
/three
/*files*
我的目标是构建这个程序,使其能够自主导航我的目录结构,抓取每个文件然后处理它,它似乎在做正确的事情。
BUT 我还需要将输出写入具有唯一名称的新文件,即应该处理名为 00001.txt
的文件,结果应该是写给 00001_output.txt
我以为我正确地实施了,但显然不是。
我哪里误入歧途了?
String dirStart = "/home/data/";
Path root = Paths.get(dirStart);
Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>()
{
@Override
public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException
{
try(InputStream inputStream = Files.newInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)))
{
// CHANGE OUTPUT TO NEW FILE
String print_file = file.getFileName().toString();
String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
System.out.println(fileNameWithOutExt);
PrintStream out = new PrintStream(new FileOutputStream( fileNameWithOutExt + "_output.txt" ) );
System.setOut(out);
// SOUP PART
StringBuilder sb = new StringBuilder();
String line = bufferedReader.readLine();
while (line != null)
{
sb.append(line);
sb.append(System.lineSeparator());
line = bufferedReader.readLine();
}
String everything = sb.toString();
Document doc = Jsoup.parse(everything);
String link = doc.select("block.full_text").text();
System.out.println(link);
}
catch (IOException e)
{
e.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
也是我的问题,它可能会对我实际尝试做的事情提供一些额外的见解。
System.setOut 似乎是个坏主意。
下面是一些可能有效的未经测试的代码。
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class App {
public static void main(String[] args) throws IOException {
String dirStart = "/home/data/";
Path root = Paths.get(dirStart);
Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException {
// CHANGE OUTPUT TO NEW FILE
String print_file = file.getFileName().toString();
String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
System.out.println(fileNameWithOutExt);
// SOUP PART
String everything = new String(Files.readAllBytes(file), StandardCharsets.UTF_8);
Document doc = Jsoup.parse(everything);
String link = doc.select("block.full_text").text();
try (PrintStream out = new PrintStream(new FileOutputStream(fileNameWithOutExt + "_output.txt"))) {
out.println(link);
} catch (IOException e) {
e.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
}
}
我的目录结构为 start
/one
/two
/three
/*files*
我的目标是构建这个程序,使其能够自主导航我的目录结构,抓取每个文件然后处理它,它似乎在做正确的事情。
BUT 我还需要将输出写入具有唯一名称的新文件,即应该处理名为 00001.txt
的文件,结果应该是写给 00001_output.txt
我以为我正确地实施了,但显然不是。
我哪里误入歧途了?
String dirStart = "/home/data/";
Path root = Paths.get(dirStart);
Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>()
{
@Override
public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException
{
try(InputStream inputStream = Files.newInputStream(file);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)))
{
// CHANGE OUTPUT TO NEW FILE
String print_file = file.getFileName().toString();
String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
System.out.println(fileNameWithOutExt);
PrintStream out = new PrintStream(new FileOutputStream( fileNameWithOutExt + "_output.txt" ) );
System.setOut(out);
// SOUP PART
StringBuilder sb = new StringBuilder();
String line = bufferedReader.readLine();
while (line != null)
{
sb.append(line);
sb.append(System.lineSeparator());
line = bufferedReader.readLine();
}
String everything = sb.toString();
Document doc = Jsoup.parse(everything);
String link = doc.select("block.full_text").text();
System.out.println(link);
}
catch (IOException e)
{
e.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
System.setOut 似乎是个坏主意。
下面是一些可能有效的未经测试的代码。
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class App {
public static void main(String[] args) throws IOException {
String dirStart = "/home/data/";
Path root = Paths.get(dirStart);
Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException {
// CHANGE OUTPUT TO NEW FILE
String print_file = file.getFileName().toString();
String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
System.out.println(fileNameWithOutExt);
// SOUP PART
String everything = new String(Files.readAllBytes(file), StandardCharsets.UTF_8);
Document doc = Jsoup.parse(everything);
String link = doc.select("block.full_text").text();
try (PrintStream out = new PrintStream(new FileOutputStream(fileNameWithOutExt + "_output.txt"))) {
out.println(link);
} catch (IOException e) {
e.printStackTrace();
}
return FileVisitResult.CONTINUE;
}
});
}
}