hadoop 中如何将一个 CompositeKey copy/assign 转换成另一个 CompositeKey?
How to copy/assign a CompositeKey into another CompositeKey in hadoop?
我尝试 运行 对集群上的某些数据进行 map reduce 并获得以下输出。
0000000000 44137 0 2
1 1
902996760100000 44137 2 6
2 2
9029967602 44137 2 8
2 2
90299676030000 44137 2 1
9029967604 44137 2 5
2 2
905000 38704 2 1
9050000001 38702 2 24
2 2
9050000001 38704 2 14
2 2
9050000001 38705 2 12
2 2
9050000001 38706 2 13
2 2
9050000001 38714 2 24
2 2
9050000002 38704 2 12
2 2
9050000002 38706 2 12
2 2
9050000011 38704 2 6
2 2
9050000011 38706 2 12
2 2
9050000021 38702 2 12
2 2
9050000031 38704 2 6
2 2
9050000031 38705 2 6
2 2
9050000031 38714 2 12
2 2
这是我的减速器
public class RTopLoc extends Reducer<CompositeKey, IntWritable, Text, Text> {
private static int number = 0;
private static CompositeKey lastCK;
private static Text lastLac = new Text();
@Override
public void reduce(CompositeKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = sumValues(values);
String str = Integer.toString(sum);
String str2 = Integer.toString(number);
String str3 = key.getSecond().toString();
context.write(key.getFirst(), new Text(str3 + " " + str2 + " " + str));
context.write(lastCK.getFirst(), lastCK.getSecond());
if(number == 0){
number = sum;
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
context.write(new Text("1"), new Text("1"));
}
else if(lastCK.getFirst().equals(key.getFirst()) && sum > number){
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
context.write(new Text("2"), new Text("2"));
}
else if(!lastCK.getFirst().equals(key.getFirst())){
// context.write(lastCK.getFirst(), lastCK.getSecond());
context.write(new Text("3"), new Text("3"));
number = sum;
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
}
}
据我了解,问题是 hadoop 将 lastCK 和 key 视为同一对象,而这种情况
if(lastCK.getFirst().equals(key.getFirst())
永远是真的
这是我的 CompositeKey class
public class CompositeKey implements WritableComparable {
private Text first = null;
private Text second = null;
public CompositeKey() {
}
public CompositeKey(Text first, Text second) {
this.first = first;
this.second = second;
}
//...getters and setters
public Text getFirst() {
return first;
}
public void setFirst(Text first) {
this.first = first;
}
public void setFirst(String first) {
setFirst(new Text(first));
}
public Text getSecond() {
return second;
}
public void setSecond(Text second) {
this.second = second;
}
public void setSecond(String second) {
setSecond(new Text(second));
}
public void write(DataOutput d) throws IOException {
first.write(d);
second.write(d);
}
public void readFields(DataInput di) throws IOException {
if (first == null) {
first = new Text();
}
if (second == null) {
second = new Text();
}
first.readFields(di);
second.readFields(di);
}
public int compareTo(Object obj) {
CompositeKey other = (CompositeKey) obj;
int cmp = first.compareTo(other.getFirst());
if (cmp != 0) {
return cmp;
}
return second.compareTo(other.getSecond());
}
@Override
public boolean equals(Object obj) {
CompositeKey other = (CompositeKey)obj;
return first.equals(other.getFirst());
}
@Override
public int hashCode() {
return first.hashCode();
}
}
我尝试将 setter 更改为类似的内容
public void setFirst(Text first) {
this.first.set(first);
}
public void setFirst(String first) {
setFirst(new Text(first));
}
其中集是 Hadoop Text Documentation
但是得到了
Error: java.lang.NullPointerException
at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:29)
at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:33)
如何使 hadoop 将 lastCK 和 key 视为不同的对象?
如果您更改这些行:
private Text first = null;
private Text second = null;
到
private Text first = new Text();
private Text second = new Text();
然后使用:
public void setFirst(Text first) {
this.first.set(first);
}
应该可以。您还可以在构造函数中创建 first
和 second
。
我尝试 运行 对集群上的某些数据进行 map reduce 并获得以下输出。
0000000000 44137 0 2
1 1
902996760100000 44137 2 6
2 2
9029967602 44137 2 8
2 2
90299676030000 44137 2 1
9029967604 44137 2 5
2 2
905000 38704 2 1
9050000001 38702 2 24
2 2
9050000001 38704 2 14
2 2
9050000001 38705 2 12
2 2
9050000001 38706 2 13
2 2
9050000001 38714 2 24
2 2
9050000002 38704 2 12
2 2
9050000002 38706 2 12
2 2
9050000011 38704 2 6
2 2
9050000011 38706 2 12
2 2
9050000021 38702 2 12
2 2
9050000031 38704 2 6
2 2
9050000031 38705 2 6
2 2
9050000031 38714 2 12
2 2
这是我的减速器
public class RTopLoc extends Reducer<CompositeKey, IntWritable, Text, Text> {
private static int number = 0;
private static CompositeKey lastCK;
private static Text lastLac = new Text();
@Override
public void reduce(CompositeKey key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = sumValues(values);
String str = Integer.toString(sum);
String str2 = Integer.toString(number);
String str3 = key.getSecond().toString();
context.write(key.getFirst(), new Text(str3 + " " + str2 + " " + str));
context.write(lastCK.getFirst(), lastCK.getSecond());
if(number == 0){
number = sum;
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
context.write(new Text("1"), new Text("1"));
}
else if(lastCK.getFirst().equals(key.getFirst()) && sum > number){
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
context.write(new Text("2"), new Text("2"));
}
else if(!lastCK.getFirst().equals(key.getFirst())){
// context.write(lastCK.getFirst(), lastCK.getSecond());
context.write(new Text("3"), new Text("3"));
number = sum;
lastCK = new CompositeKey(key.getFirst().toString(), key.getSecond().toString());
}
}
据我了解,问题是 hadoop 将 lastCK 和 key 视为同一对象,而这种情况
if(lastCK.getFirst().equals(key.getFirst())
永远是真的
这是我的 CompositeKey class
public class CompositeKey implements WritableComparable {
private Text first = null;
private Text second = null;
public CompositeKey() {
}
public CompositeKey(Text first, Text second) {
this.first = first;
this.second = second;
}
//...getters and setters
public Text getFirst() {
return first;
}
public void setFirst(Text first) {
this.first = first;
}
public void setFirst(String first) {
setFirst(new Text(first));
}
public Text getSecond() {
return second;
}
public void setSecond(Text second) {
this.second = second;
}
public void setSecond(String second) {
setSecond(new Text(second));
}
public void write(DataOutput d) throws IOException {
first.write(d);
second.write(d);
}
public void readFields(DataInput di) throws IOException {
if (first == null) {
first = new Text();
}
if (second == null) {
second = new Text();
}
first.readFields(di);
second.readFields(di);
}
public int compareTo(Object obj) {
CompositeKey other = (CompositeKey) obj;
int cmp = first.compareTo(other.getFirst());
if (cmp != 0) {
return cmp;
}
return second.compareTo(other.getSecond());
}
@Override
public boolean equals(Object obj) {
CompositeKey other = (CompositeKey)obj;
return first.equals(other.getFirst());
}
@Override
public int hashCode() {
return first.hashCode();
}
}
我尝试将 setter 更改为类似的内容
public void setFirst(Text first) {
this.first.set(first);
}
public void setFirst(String first) {
setFirst(new Text(first));
}
其中集是 Hadoop Text Documentation
但是得到了
Error: java.lang.NullPointerException
at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:29)
at task.SecondarySort.CompositeKey.setFirst(CompositeKey.java:33)
如何使 hadoop 将 lastCK 和 key 视为不同的对象?
如果您更改这些行:
private Text first = null;
private Text second = null;
到
private Text first = new Text();
private Text second = new Text();
然后使用:
public void setFirst(Text first) {
this.first.set(first);
}
应该可以。您还可以在构造函数中创建 first
和 second
。