在 Hadoop MapReduce 中对多个输出目录使用多个映射器
Using Multiple Mappers for multiple output directories in Hadoop MapReduce
我想 运行 两个映射器在第一个映射器的不同 directories.The 输出中产生两个不同的输出(作为参数发送)应该发送到第二个映射器的输入 mapper.i 在驱动程序中有此代码 class
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Export_Column_Mapping
{
private static String[] Detail_output_column_array = new String[27];
private static String[] Shop_output_column_array = new String[8];
private static String details_output = null ;
private static String Shop_output = null;
public static void main(String[] args) throws Exception
{
String Output_filetype = args[3];
String Input_column_number = args[4];
String Output_column_number = args[5];
Configuration Detailsconf = new Configuration(false);
Detailsconf.setStrings("output_filetype",Output_filetype);
Detailsconf.setStrings("Input_column_number",Input_column_number);
Detailsconf.setStrings("Output_column_number",Output_column_number);
Job Details = new Job(Detailsconf," Export_Column_Mapping");
Details.setJarByClass(Export_Column_Mapping.class);
Details.setJobName("DetailsFile_Job");
Details.setMapperClass(DetailFile_Mapper.class);
Details.setNumReduceTasks(0);
Details.setInputFormatClass(TextInputFormat.class);
Details.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(Details, new Path(args[0]));
FileOutputFormat.setOutputPath(Details, new Path(args[1]));
if(Details.waitForCompletion(true))
{
Configuration Shopconf = new Configuration();
Job Shop = new Job(Shopconf,"Export_Column_Mapping");
Shop.setJarByClass(Export_Column_Mapping.class);
Shop.setJobName("ShopFile_Job");
Shop.setMapperClass(ShopFile_Mapper.class);
Shop.setNumReduceTasks(0);
Shop.setInputFormatClass(TextInputFormat.class);
Shop.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(Shop, new Path(args[1]));
FileOutputFormat.setOutputPath(Shop, new Path(args[2]));
MultipleOutputs.addNamedOutput(Shop, "text", TextOutputFormat.class,LongWritable.class, Text.class);
System.exit(Shop.waitForCompletion(true) ? 0 : 1);
}
}
public static class DetailFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String str_Output_filetype = context.getConfiguration().get("output_filetype");
String str_Input_column_number = context.getConfiguration().get("Input_column_number");
String[] input_columns_number = str_Input_column_number.split(",");
String str_Output_column_number= context.getConfiguration().get("Output_column_number");
String[] output_columns_number = str_Output_column_number.split(",");
String str_line = value.toString();
String[] input_column_array = str_line.split(",");
try
{
for(int i = 0;i<=input_column_array.length+1; i++)
{
int int_outputcolumn = Integer.parseInt(output_columns_number[i]);
int int_inputcolumn = Integer.parseInt(input_columns_number[i]);
if((int_inputcolumn != 0) && (int_outputcolumn != 0) && output_columns_number.length == input_columns_number.length)
{
Detail_output_column_array[int_outputcolumn-1] = input_column_array[int_inputcolumn-1];
if(details_output != null)
{
details_output = details_output+" "+ Detail_output_column_array[int_outputcolumn-1];
Shop_output = Shop_output+" "+ Shop_output_column_array[int_outputcolumn-1];
}else
{
details_output = Detail_output_column_array[int_outputcolumn-1];
Shop_output = Shop_output_column_array[int_outputcolumn-1];
}
}
}
}catch (Exception e)
{
}
context.write(null,new Text(details_output));
}
}
public static class ShopFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
try
{
for(int i = 0;i<=Shop_output_column_array.length; i++)
{
Shop_output_column_array[0] = Detail_output_column_array[0];
Shop_output_column_array[1] = Detail_output_column_array[1];
Shop_output_column_array[2] = Detail_output_column_array[2];
Shop_output_column_array[3] = Detail_output_column_array[3];
Shop_output_column_array[4] = Detail_output_column_array[14];
if(details_output != null)
{
Shop_output = Shop_output+" "+ Shop_output_column_array[i];
}else
{
Shop_output = Shop_output_column_array[i-1];
}
}
}catch (Exception e){
}
context.write(null,new Text(Shop_output));
}
}
}
我收到错误..
Error:org.apache.hadoop.mapreduce.lib.input.InvalidInputException:
Input path does not exist:
file:/home/Barath.B.Natarajan.ap/rules/text.txt
我想运行一项一项的工作,有人可以帮助我吗?...
有一种叫做 jobcontrol 的东西,你可以用它来实现它。
假设有两个工作A和B
ControlledJob A= new ControlledJob(JobConf for A);
ControlledJob B= new ControlledJob(JobConf for B);
B.addDependingJob(A);
JobControl jControl = newJobControl("Name");
jControl.addJob(A);
jControl.addJob(B);
Thread runJControl = new Thread(jControl);
runJControl.start();
while (!jControl.allFinished()) {
code = jControl.getFailedJobList().size() == 0 ? 0 : 1;
Thread.sleep(1000);
}
System.exit(1);
开头的初始化代码是这样的:
int code =1;
让你的第一个工作是第一个 reducer 为零的映射器,第二个工作是第二个映射器为零的 reducer.The 配置应该是 B 的输入路径和 A 的输出路径应该是一样。
我想 运行 两个映射器在第一个映射器的不同 directories.The 输出中产生两个不同的输出(作为参数发送)应该发送到第二个映射器的输入 mapper.i 在驱动程序中有此代码 class
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class Export_Column_Mapping
{
private static String[] Detail_output_column_array = new String[27];
private static String[] Shop_output_column_array = new String[8];
private static String details_output = null ;
private static String Shop_output = null;
public static void main(String[] args) throws Exception
{
String Output_filetype = args[3];
String Input_column_number = args[4];
String Output_column_number = args[5];
Configuration Detailsconf = new Configuration(false);
Detailsconf.setStrings("output_filetype",Output_filetype);
Detailsconf.setStrings("Input_column_number",Input_column_number);
Detailsconf.setStrings("Output_column_number",Output_column_number);
Job Details = new Job(Detailsconf," Export_Column_Mapping");
Details.setJarByClass(Export_Column_Mapping.class);
Details.setJobName("DetailsFile_Job");
Details.setMapperClass(DetailFile_Mapper.class);
Details.setNumReduceTasks(0);
Details.setInputFormatClass(TextInputFormat.class);
Details.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(Details, new Path(args[0]));
FileOutputFormat.setOutputPath(Details, new Path(args[1]));
if(Details.waitForCompletion(true))
{
Configuration Shopconf = new Configuration();
Job Shop = new Job(Shopconf,"Export_Column_Mapping");
Shop.setJarByClass(Export_Column_Mapping.class);
Shop.setJobName("ShopFile_Job");
Shop.setMapperClass(ShopFile_Mapper.class);
Shop.setNumReduceTasks(0);
Shop.setInputFormatClass(TextInputFormat.class);
Shop.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.setInputPaths(Shop, new Path(args[1]));
FileOutputFormat.setOutputPath(Shop, new Path(args[2]));
MultipleOutputs.addNamedOutput(Shop, "text", TextOutputFormat.class,LongWritable.class, Text.class);
System.exit(Shop.waitForCompletion(true) ? 0 : 1);
}
}
public static class DetailFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String str_Output_filetype = context.getConfiguration().get("output_filetype");
String str_Input_column_number = context.getConfiguration().get("Input_column_number");
String[] input_columns_number = str_Input_column_number.split(",");
String str_Output_column_number= context.getConfiguration().get("Output_column_number");
String[] output_columns_number = str_Output_column_number.split(",");
String str_line = value.toString();
String[] input_column_array = str_line.split(",");
try
{
for(int i = 0;i<=input_column_array.length+1; i++)
{
int int_outputcolumn = Integer.parseInt(output_columns_number[i]);
int int_inputcolumn = Integer.parseInt(input_columns_number[i]);
if((int_inputcolumn != 0) && (int_outputcolumn != 0) && output_columns_number.length == input_columns_number.length)
{
Detail_output_column_array[int_outputcolumn-1] = input_column_array[int_inputcolumn-1];
if(details_output != null)
{
details_output = details_output+" "+ Detail_output_column_array[int_outputcolumn-1];
Shop_output = Shop_output+" "+ Shop_output_column_array[int_outputcolumn-1];
}else
{
details_output = Detail_output_column_array[int_outputcolumn-1];
Shop_output = Shop_output_column_array[int_outputcolumn-1];
}
}
}
}catch (Exception e)
{
}
context.write(null,new Text(details_output));
}
}
public static class ShopFile_Mapper extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
try
{
for(int i = 0;i<=Shop_output_column_array.length; i++)
{
Shop_output_column_array[0] = Detail_output_column_array[0];
Shop_output_column_array[1] = Detail_output_column_array[1];
Shop_output_column_array[2] = Detail_output_column_array[2];
Shop_output_column_array[3] = Detail_output_column_array[3];
Shop_output_column_array[4] = Detail_output_column_array[14];
if(details_output != null)
{
Shop_output = Shop_output+" "+ Shop_output_column_array[i];
}else
{
Shop_output = Shop_output_column_array[i-1];
}
}
}catch (Exception e){
}
context.write(null,new Text(Shop_output));
}
}
}
我收到错误..
Error:org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/home/Barath.B.Natarajan.ap/rules/text.txt
我想运行一项一项的工作,有人可以帮助我吗?...
有一种叫做 jobcontrol 的东西,你可以用它来实现它。
假设有两个工作A和B
ControlledJob A= new ControlledJob(JobConf for A);
ControlledJob B= new ControlledJob(JobConf for B);
B.addDependingJob(A);
JobControl jControl = newJobControl("Name");
jControl.addJob(A);
jControl.addJob(B);
Thread runJControl = new Thread(jControl);
runJControl.start();
while (!jControl.allFinished()) {
code = jControl.getFailedJobList().size() == 0 ? 0 : 1;
Thread.sleep(1000);
}
System.exit(1);
开头的初始化代码是这样的:
int code =1;
让你的第一个工作是第一个 reducer 为零的映射器,第二个工作是第二个映射器为零的 reducer.The 配置应该是 B 的输入路径和 A 的输出路径应该是一样。