运行 Hbase 上的 MapReduce 导出 Table thorws 找不到值 class 的反序列化器:'org.apache.hadoop.hbase.client.Result
Running MapReduce on Hbase Exported Table thorws Could not find a deserializer for the Value class: 'org.apache.hadoop.hbase.client.Result
我已经使用 Hbase Export 实用工具对 Hbase table 进行了备份。
hbase org.apache.hadoop.hbase.mapreduce.Export "FinancialLineItem" "/project/fricadev/ESGTRF/EXPORT"
这已启动 mapreduce 并且 t运行sferred 我所有的 table 数据到输出文件夹中。
根据文档,ouotput 文件的文件格式将是序列文件。
所以我 运行 下面的代码从文件中提取我的键和值 .
现在我想 运行 mapreduce 从输出文件中读取键值但低于异常
java.lang.Exception: java.io.IOException: Could not find a
deserializer for the Value class:
'org.apache.hadoop.hbase.client.Result'. Please ensure that the
configuration 'io.serializations' is properly configured, if you're
using custom serialization.
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:406)
Caused by: java.io.IOException: Could not find a deserializer for the Value class: 'org.apache.hadoop.hbase.client.Result'. Please
ensure that the configuration 'io.serializations' is properly
configured, if you're using custom serialization.
at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1964)
at org.apache.hadoop.io.SequenceFile$Reader.initialize(SequenceFile.java:1811)
at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile.java:1760)
at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile.java:1774)
at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50)
at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:478)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:671)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330)
这是我的驱动代码
package SEQ;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SeqDriver extends Configured implements Tool
{
public static void main(String[] args) throws Exception{
int exitCode = ToolRunner.run(new SeqDriver(), args);
System.exit(exitCode);
}
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s needs two arguments files\n",
getClass().getSimpleName());
return -1;
}
String outputPath = args[1];
FileSystem hfs = FileSystem.get(getConf());
Job job = new Job();
job.setJarByClass(SeqDriver.class);
job.setJobName("SequenceFileReader");
HDFSUtil.removeHdfsSubDirIfExists(hfs, new Path(outputPath), true);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Result.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(MySeqMapper.class);
job.setNumReduceTasks(0);
int returnValue = job.waitForCompletion(true) ? 0:1;
if(job.isSuccessful()) {
System.out.println("Job was successful");
} else if(!job.isSuccessful()) {
System.out.println("Job was not successful");
}
return returnValue;
}
}
这是我的映射器代码
package SEQ;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MySeqMapper extends Mapper <ImmutableBytesWritable, Result, Text, Text>{
@Override
public void map(ImmutableBytesWritable row, Result value,Context context)
throws IOException, InterruptedException {
}
}
所以我会回答我的问题
这是让它工作所需要的
因为我们使用 HBase 来存储我们的数据并且这个 reducer 将它的结果输出到 HBase table,Hadoop 告诉我们他不知道如何序列化我们的数据。这就是为什么我们需要帮助它。在 setUp 中设置 io.serializations 变量
hbaseConf.setStrings("io.serializations", new String[]{hbaseConf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()});
我已经使用 Hbase Export 实用工具对 Hbase table 进行了备份。
hbase org.apache.hadoop.hbase.mapreduce.Export "FinancialLineItem" "/project/fricadev/ESGTRF/EXPORT"
这已启动 mapreduce 并且 t运行sferred 我所有的 table 数据到输出文件夹中。 根据文档,ouotput 文件的文件格式将是序列文件。 所以我 运行 下面的代码从文件中提取我的键和值 .
现在我想 运行 mapreduce 从输出文件中读取键值但低于异常
java.lang.Exception: java.io.IOException: Could not find a deserializer for the Value class: 'org.apache.hadoop.hbase.client.Result'. Please ensure that the configuration 'io.serializations' is properly configured, if you're using custom serialization. at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:406) Caused by: java.io.IOException: Could not find a deserializer for the Value class: 'org.apache.hadoop.hbase.client.Result'. Please ensure that the configuration 'io.serializations' is properly configured, if you're using custom serialization. at org.apache.hadoop.io.SequenceFile$Reader.init(SequenceFile.java:1964) at org.apache.hadoop.io.SequenceFile$Reader.initialize(SequenceFile.java:1811) at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile.java:1760) at org.apache.hadoop.io.SequenceFile$Reader.(SequenceFile.java:1774) at org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader.initialize(SequenceFileRecordReader.java:50) at org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:478) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:671) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:330)
这是我的驱动代码
package SEQ;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SeqDriver extends Configured implements Tool
{
public static void main(String[] args) throws Exception{
int exitCode = ToolRunner.run(new SeqDriver(), args);
System.exit(exitCode);
}
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s needs two arguments files\n",
getClass().getSimpleName());
return -1;
}
String outputPath = args[1];
FileSystem hfs = FileSystem.get(getConf());
Job job = new Job();
job.setJarByClass(SeqDriver.class);
job.setJobName("SequenceFileReader");
HDFSUtil.removeHdfsSubDirIfExists(hfs, new Path(outputPath), true);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Result.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(MySeqMapper.class);
job.setNumReduceTasks(0);
int returnValue = job.waitForCompletion(true) ? 0:1;
if(job.isSuccessful()) {
System.out.println("Job was successful");
} else if(!job.isSuccessful()) {
System.out.println("Job was not successful");
}
return returnValue;
}
}
这是我的映射器代码
package SEQ;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class MySeqMapper extends Mapper <ImmutableBytesWritable, Result, Text, Text>{
@Override
public void map(ImmutableBytesWritable row, Result value,Context context)
throws IOException, InterruptedException {
}
}
所以我会回答我的问题 这是让它工作所需要的
因为我们使用 HBase 来存储我们的数据并且这个 reducer 将它的结果输出到 HBase table,Hadoop 告诉我们他不知道如何序列化我们的数据。这就是为什么我们需要帮助它。在 setUp 中设置 io.serializations 变量
hbaseConf.setStrings("io.serializations", new String[]{hbaseConf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()});