Hdfs 文件行数

Hdfs file line count

有没有办法计算 JAVA 中 HDFS 目录的行数,因为我们 在命令提示符下执行以下命令?

hadoop fs -cat  /abc/def/* | wc -l

特别是使用 HADOOP API 而不是编写 map-reduce 或 spark 代码。

像这样的东西应该有用:-

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class LineCounter {

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub

        Configuration conf = new Configuration();
        conf.addResource(new FileInputStream("hdfs-site.xml"));
        conf.addResource(new FileInputStream("core-site.xml"));

        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        conf.set("fs.file.impl",org.apache.hadoop.fs.LocalFileSystem.class.getName());

        FileSystem fs = FileSystem.get(conf);
        Path pt = new Path("/some/path");

        FileStatus[] status = fs.listStatus(pt);

        int count = 0;

        for(FileStatus f : status){
            if(f.isFile()){
                 FSDataInputStream inputStream = fs.open(f.getPath());
                 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));

                 String line = reader.readLine();

                 while(line!=null){
                     count++;
                     line = reader.readLine();
                 }

                 if(reader!=null){
                     reader.close();
                 }
            }
        }

    }

}