使用 ArrayWritables 时出现问题
Issue when working with ArrayWritables
我是 Hadoop 的初学者,正在使用 Hadoop map-reduce 中的 ArrayWritables。
这是我正在使用的 Mapper 代码 :-
public class Base_Mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
String currLine[] = new String[1000];
Text K = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
currLine = line.split("");
int count = 0;
for (int i = 0; i < currLine.length; i++) {
String currToken = currLine[i];
count++;
K.set(currToken);
context.write(K, new IntWritable(count));
}
}
}
减速器:-
public class Base_Reducer extends Reducer<Text, IntWritable,Text, IntArrayWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
IntArrayWritable finalArray = new IntArrayWritable();
IntWritable[] arr = new IntWritable[1000];
for (int i = 0; i < 150; i++)
arr[i] = new IntWritable(0);
int redCount = 0;
for (IntWritable val : values) {
int thisValue = val.get();
for (int i = 1; i <= 150; i++) {
if (thisValue == i)
arr[i - 1] = new IntWritable(redCount++);
}
}
finalArray.set(arr);
context.write(key, finalArray);
}
}
我正在使用 IntArrayWritable 作为 ArrayWritable[ 的子类=35=] 如下图:-
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
public class IntArrayWritable extends ArrayWritable {
public IntArrayWritable() {
super(IntWritable.class);
}
public IntArrayWritable(IntWritable[] values) {
super(IntWritable.class, values);
}
}
我的作业的预期输出是一组基作为键(这是正确的)和一组 IntWritables 作为值。
但我得到的输出为:-
com.feathersoft.Base.IntArrayWritable@30374534
A com.feathersoft.Base.IntArrayWritable@7ca071a6
C com.feathersoft.Base.IntArrayWritable@9858936
G com.feathersoft.Base.IntArrayWritable@1df33d1c
N com.feathersoft.Base.IntArrayWritable@4c3108a0
T com.feathersoft.Base.IntArrayWritable@272d6774
为了解决这个问题,我必须进行哪些更改?
您需要在 IntArrayWritable
实现中覆盖 toString()
方法的默认行为。
请试试这个:
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
public class IntArrayWritable extends ArrayWritable {
public IntArrayWritable() {
super(IntWritable.class);
}
public IntArrayWritable(IntWritable[] values) {
super(IntWritable.class, values);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[");
for (String s : super.toStrings())
{
sb.append(s).append(" ");
}
sb.append("]")
return sb.toString();
}
}
如果您喜欢这个答案,请将其标记为已接受。谢谢。
我是 Hadoop 的初学者,正在使用 Hadoop map-reduce 中的 ArrayWritables。
这是我正在使用的 Mapper 代码 :-
public class Base_Mapper extends Mapper<LongWritable, Text, Text, IntWritable> {
String currLine[] = new String[1000];
Text K = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
currLine = line.split("");
int count = 0;
for (int i = 0; i < currLine.length; i++) {
String currToken = currLine[i];
count++;
K.set(currToken);
context.write(K, new IntWritable(count));
}
}
}
减速器:-
public class Base_Reducer extends Reducer<Text, IntWritable,Text, IntArrayWritable> {
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
IntArrayWritable finalArray = new IntArrayWritable();
IntWritable[] arr = new IntWritable[1000];
for (int i = 0; i < 150; i++)
arr[i] = new IntWritable(0);
int redCount = 0;
for (IntWritable val : values) {
int thisValue = val.get();
for (int i = 1; i <= 150; i++) {
if (thisValue == i)
arr[i - 1] = new IntWritable(redCount++);
}
}
finalArray.set(arr);
context.write(key, finalArray);
}
}
我正在使用 IntArrayWritable 作为 ArrayWritable[ 的子类=35=] 如下图:-
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
public class IntArrayWritable extends ArrayWritable {
public IntArrayWritable() {
super(IntWritable.class);
}
public IntArrayWritable(IntWritable[] values) {
super(IntWritable.class, values);
}
}
我的作业的预期输出是一组基作为键(这是正确的)和一组 IntWritables 作为值。 但我得到的输出为:-
com.feathersoft.Base.IntArrayWritable@30374534
A com.feathersoft.Base.IntArrayWritable@7ca071a6
C com.feathersoft.Base.IntArrayWritable@9858936
G com.feathersoft.Base.IntArrayWritable@1df33d1c
N com.feathersoft.Base.IntArrayWritable@4c3108a0
T com.feathersoft.Base.IntArrayWritable@272d6774
为了解决这个问题,我必须进行哪些更改?
您需要在 IntArrayWritable
实现中覆盖 toString()
方法的默认行为。
请试试这个:
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.IntWritable;
public class IntArrayWritable extends ArrayWritable {
public IntArrayWritable() {
super(IntWritable.class);
}
public IntArrayWritable(IntWritable[] values) {
super(IntWritable.class, values);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("[");
for (String s : super.toStrings())
{
sb.append(s).append(" ");
}
sb.append("]")
return sb.toString();
}
}
如果您喜欢这个答案,请将其标记为已接受。谢谢。