Java 使用 HDFS 文件 read/write
Java with HDFS file read/write
我是 Hadoop 的新手并且 Java。我必须在我的远程云时代发行版中读取和写入存储在 HDFS 上的 *.txt 文件。同样,我编写了这个 java 小程序:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadHadoopFileData {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
FileSystem hdfs = FileSystem.get( new URI( "hdfs://admin:H4d00p@172.16.10.124:8888" ), configuration );
Path file = new Path("hdfs://admin:H4d00p@172.16.10.124:8888/user/admin/Data/Tlog.txt");
try{
BufferedReader br=new BufferedReader(new InputStreamReader(hdfs.open(file)));
String line;
line=br.readLine();
while (line != null){
System.out.println(line);
line=br.readLine();
}
}catch(Exception e){
e.printStackTrace();
}
}
}
但是当执行行 BufferedReader br=new BufferedReader(new InputStreamReader(hdfs.open(file)));
时,我 运行 陷入了这个错误:
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message tag had invalid wire type.; Host Details : local host is: "KWTLT02221/169.254.208.16"; destination host is: "172.16.104.124":8888;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:772)
at org.apache.hadoop.ipc.Client.call(Client.java:1472)
at org.apache.hadoop.ipc.Client.call(Client.java:1399)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
at com.sun.proxy.$Proxy9.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:254)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy10.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1220)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1210)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1200)
at org.apache.hadoop.hdfs.DFSInputStream.fetchLocatedBlocksAndGetLastBlockLength(DFSInputStream.java:271)
at org.apache.hadoop.hdfs.DFSInputStream.openInfo(DFSInputStream.java:238)
at org.apache.hadoop.hdfs.DFSInputStream.<init>(DFSInputStream.java:231)
at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1498)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:302)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:766)
at ReadHadoopFileData.main(ReadHadoopFileData.java:26)
Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message tag had invalid wire type.
at com.google.protobuf.InvalidProtocolBufferException.invalidWireType(InvalidProtocolBufferException.java:99)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:498)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at com.google.protobuf.GeneratedMessage.parseUnknownField(GeneratedMessage.java:193)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.<init>(RpcHeaderProtos.java:2207)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.<init>(RpcHeaderProtos.java:2165)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parsePartialFrom(RpcHeaderProtos.java:2295)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parsePartialFrom(RpcHeaderProtos.java:2290)
at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:3167)
at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1072)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:966)
有人可以帮我解决这个问题吗?我已经用了一天了。
我找到了这个错误的解决方案。看起来我用错了端口。我使用的是我在 HUE URL 上看到的端口号(被不同来源误导)。
如果我在 Cloudera 管理器的名称节点上选择了为配置 "NameNode Service RPC Port" 或 "dfs.namenode.servicerpc-address" 定义的端口号,它工作正常。
我是 Hadoop 的新手并且 Java。我必须在我的远程云时代发行版中读取和写入存储在 HDFS 上的 *.txt 文件。同样,我编写了这个 java 小程序:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ReadHadoopFileData {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
FileSystem hdfs = FileSystem.get( new URI( "hdfs://admin:H4d00p@172.16.10.124:8888" ), configuration );
Path file = new Path("hdfs://admin:H4d00p@172.16.10.124:8888/user/admin/Data/Tlog.txt");
try{
BufferedReader br=new BufferedReader(new InputStreamReader(hdfs.open(file)));
String line;
line=br.readLine();
while (line != null){
System.out.println(line);
line=br.readLine();
}
}catch(Exception e){
e.printStackTrace();
}
}
}
但是当执行行 BufferedReader br=new BufferedReader(new InputStreamReader(hdfs.open(file)));
时,我 运行 陷入了这个错误:
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message tag had invalid wire type.; Host Details : local host is: "KWTLT02221/169.254.208.16"; destination host is: "172.16.104.124":8888;
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:772)
at org.apache.hadoop.ipc.Client.call(Client.java:1472)
at org.apache.hadoop.ipc.Client.call(Client.java:1399)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
at com.sun.proxy.$Proxy9.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:254)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
at com.sun.proxy.$Proxy10.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1220)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1210)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1200)
at org.apache.hadoop.hdfs.DFSInputStream.fetchLocatedBlocksAndGetLastBlockLength(DFSInputStream.java:271)
at org.apache.hadoop.hdfs.DFSInputStream.openInfo(DFSInputStream.java:238)
at org.apache.hadoop.hdfs.DFSInputStream.<init>(DFSInputStream.java:231)
at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1498)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:302)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:298)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:766)
at ReadHadoopFileData.main(ReadHadoopFileData.java:26)
Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message tag had invalid wire type.
at com.google.protobuf.InvalidProtocolBufferException.invalidWireType(InvalidProtocolBufferException.java:99)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:498)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:461)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:579)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFrom(UnknownFieldSet.java:280)
at com.google.protobuf.CodedInputStream.readGroup(CodedInputStream.java:240)
at com.google.protobuf.UnknownFieldSet$Builder.mergeFieldFrom(UnknownFieldSet.java:488)
at com.google.protobuf.GeneratedMessage.parseUnknownField(GeneratedMessage.java:193)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.<init>(RpcHeaderProtos.java:2207)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.<init>(RpcHeaderProtos.java:2165)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parsePartialFrom(RpcHeaderProtos.java:2295)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parsePartialFrom(RpcHeaderProtos.java:2290)
at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200)
at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259)
at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:3167)
at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:1072)
at org.apache.hadoop.ipc.Client$Connection.run(Client.java:966)
有人可以帮我解决这个问题吗?我已经用了一天了。
我找到了这个错误的解决方案。看起来我用错了端口。我使用的是我在 HUE URL 上看到的端口号(被不同来源误导)。
如果我在 Cloudera 管理器的名称节点上选择了为配置 "NameNode Service RPC Port" 或 "dfs.namenode.servicerpc-address" 定义的端口号,它工作正常。