MapReduce 作业挂起

MapReduce Job hangs

我是 Hadoop 的 MapReduce 新手。我已经编写了一个 map reduce 任务,我正在尝试 运行 在我的本地机器上。但工作在地图 100% 后挂起。

下面是代码,我不明白我错过了什么。

我有一个自定义密钥 class

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class AirlineMonthKey implements WritableComparable<AirlineMonthKey>{

Text airlineName;
Text month;

public AirlineMonthKey(){
    super();
}

public AirlineMonthKey(Text airlineName, Text month) {
    super();
    this.airlineName = airlineName;
    this.month = month;
}

public Text getAirlineName() {
    return airlineName;
}

public void setAirlineName(Text airlineName) {
    this.airlineName = airlineName;
}

public Text getMonth() {
    return month;
}

public void setMonth(Text month) {
    this.month = month;
}

@Override
public void readFields(DataInput in) throws IOException {
    // TODO Auto-generated method stub
    this.airlineName.readFields(in);
    this.month.readFields(in);
}

@Override
public void write(DataOutput out) throws IOException {
    // TODO Auto-generated method stub
    this.airlineName.write(out);
    this.month.write(out);      
}

@Override
public int compareTo(AirlineMonthKey airlineMonthKey) {
    // TODO Auto-generated method stub
    int diff = getAirlineName().compareTo(airlineMonthKey.getAirlineName());
    if(diff != 0){
        return diff;
    }

    int m1 = Integer.parseInt(getMonth().toString());
    int m2 = Integer.parseInt(airlineMonthKey.getMonth().toString());

    if(m1>m2){
        return -1;
    }
    else 
        return 1;
}


}

和使用自定义键的映射器和缩减器class如下。

package com.mapresuce.secondarysort;

import java.io.IOException;
import java.io.StringReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import com.opencsv.CSVReader;

public class FlightDelayByMonth {

public static class FlightDelayByMonthMapper extends
        Mapper<Object, Text, AirlineMonthKey, Text> {
    public void map(Object key, Text value, Context context)
            throws IOException, InterruptedException {
        String str = value.toString();
        // Reading Line one by one from the input CSV.
        CSVReader reader = new CSVReader(new StringReader(str));
        String[] split = reader.readNext();
        reader.close();

        String airlineName = split[6];
        String month = split[2];
        String year = split[0];
        String delayMinutes = split[37];
        String cancelled = split[41];

        if (!(airlineName.equals("") || month.equals("") || delayMinutes
                .equals(""))) {
            if (year.equals("2008") && cancelled.equals("0.00")) {
                AirlineMonthKey airlineMonthKey = new AirlineMonthKey(
                        new Text(airlineName), new Text(month));
                Text delay = new Text(delayMinutes);
                context.write(airlineMonthKey, delay);
                System.out.println("1");
            }
        }

    }
}

public static class FlightDelayByMonthReducer extends
        Reducer<AirlineMonthKey, Text, Text, Text> {


    public void reduce(AirlineMonthKey key, Iterable<Text> values,
            Context context) throws IOException, InterruptedException {
        for(Text val : values){
            context.write(new Text(key.getAirlineName().toString()+" "+key.getMonth().toString()), val);
        }
    }
}

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {   
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args)
            .getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage:<in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "Average monthly flight dealy");
    job.setJarByClass(FlightDelayByMonth.class);
    job.setMapperClass(FlightDelayByMonthMapper.class);
    job.setReducerClass(FlightDelayByMonthReducer.class);
    job.setOutputKeyClass(AirlineMonthKey.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

另外我在 main 中创建了一个作业和配置。不知道我错过了什么。我 运行 在本地环境中完成所有这些操作。

尝试在您的 AirlineMonthKey 中编写 toString、equals 和 hashcode 的自定义实现 class。

阅读下文 link。

http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/WritableComparable.html

键类型实现 hashCode() 很重要。

希望对您有所帮助。

问题是我必须在 AirlineMonthKey 中使用默认构造函数(我这样做了)并在自定义键 class 中初始化实例变量(我没有这样做)。