如何在hadoop map reduce程序中的mapper代码中获取输入文件名
How to get input file name in mapper code in hadoop map reduce programme
我编写了一个带有两个输入参数的驱动程序代码:args[0] 和 args[1]。
我想使用 args[0] 即输入文件路径,作为我在映射器代码中的文件路径之一,这样我的缓冲区 reader 可以读取文件,如下所示:
{
public void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
ArrayList<String> al=new ArrayList<String>();
ArrayList<String> nl =new ArrayList<String>();
String line = null;
String filePath = args[0]; String path= args[1];
try {
FileReader fileReader =
new FileReader(filePath);
BufferedReader bufferedReader =
new BufferedReader(fileReader);
while((line = bufferedReader.readLine()) != null)
{
if(line.contains("result") && line.contains("Complete_IND"))
{
String pattern1 = "venk";
String pattern2 = "si";
Pattern p = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2));
Matcher m = p.matcher(line);
while (m.find()) {
System.out.println(m.group(1));
String num=m.group(1);
int fin=Integer.parseInt(m.group(1));
if(fin>0)
{
System.out.println("number greater than zero"+fin);
al.add(num);
for(String obj:al)
{System.out.println("final obj"+obj);}}
}
}
}
// Always close files.
bufferedReader.close();
// for(String obj:al){
// System.out.println(obj);
//}
}
catch(FileNotFoundException ex) {
System.out.println(
"Unable to open file '" +
filePath + "'");
}
catch(IOException ex) {
System.out.println(
"Error reading file '"
+ filePath + "'");
// Or we could just do this:
// ex.printStackTrace();
}
System.out.println("secondloop");
try {
// FileReader reads text files in the default encoding.
FileReader fileReader =
new FileReader(path);
// Always wrap FileReader in BufferedReader.
BufferedReader bufferedReader =
new BufferedReader(fileReader);
System.out.println("djdne"+path);
// Always wrap FileReader in BufferedReader.
BufferedReader bufferReader =
new BufferedReader(fileReader );
System.out.println("djdne"+path);
while((line = bufferReader.readLine()) != null)
{
System.out.println("djdne"+line);
for(String obj:al) {
if(line.contains(obj))
{
System.out.println("77");
System.out.println("1 obj is"+obj);
String pattern1 = "|";
String pattern2 = "|";
Pattern pt = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2));
Matcher mac = pt.matcher(line);
System.out.println("22");
while (mac.find()) {
System.out.println("33");
System.out.println(mac.group(1));
nl.add(mac.group(1));
for(int i=0;i<nl.size();i++)
{
Text si= new Text(al.get(i));
Text vi=new Text(nl.get(i));
context.write(si, vi);
}
}
}
}
}
bufferReader.close();
}
catch(FileNotFoundException ex) {
System.out.println(
"Unable to open file '" +
path + "'");
}
catch(IOException ex) {
System.out.println(
"Error reading file '"
+ path + "'");
// Or we could just do this:
// ex.printStackTrace();
}
}
}
您可以从映射器上下文和当前输入拆分中获取它。
((FileSplit) context.getInputSplit()).getPath();
在配置Mapper中class你可以得到文件名。然后你可以用它来阅读。 Example more detail
public void configure(JobConf job) {
inputFile = job.get("mapred.input.file");
}
我编写了一个带有两个输入参数的驱动程序代码:args[0] 和 args[1]。
我想使用 args[0] 即输入文件路径,作为我在映射器代码中的文件路径之一,这样我的缓冲区 reader 可以读取文件,如下所示:
{
public void map(LongWritable key, Text value, Context context)
throws java.io.IOException, InterruptedException {
ArrayList<String> al=new ArrayList<String>();
ArrayList<String> nl =new ArrayList<String>();
String line = null;
String filePath = args[0]; String path= args[1];
try {
FileReader fileReader =
new FileReader(filePath);
BufferedReader bufferedReader =
new BufferedReader(fileReader);
while((line = bufferedReader.readLine()) != null)
{
if(line.contains("result") && line.contains("Complete_IND"))
{
String pattern1 = "venk";
String pattern2 = "si";
Pattern p = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2));
Matcher m = p.matcher(line);
while (m.find()) {
System.out.println(m.group(1));
String num=m.group(1);
int fin=Integer.parseInt(m.group(1));
if(fin>0)
{
System.out.println("number greater than zero"+fin);
al.add(num);
for(String obj:al)
{System.out.println("final obj"+obj);}}
}
}
}
// Always close files.
bufferedReader.close();
// for(String obj:al){
// System.out.println(obj);
//}
}
catch(FileNotFoundException ex) {
System.out.println(
"Unable to open file '" +
filePath + "'");
}
catch(IOException ex) {
System.out.println(
"Error reading file '"
+ filePath + "'");
// Or we could just do this:
// ex.printStackTrace();
}
System.out.println("secondloop");
try {
// FileReader reads text files in the default encoding.
FileReader fileReader =
new FileReader(path);
// Always wrap FileReader in BufferedReader.
BufferedReader bufferedReader =
new BufferedReader(fileReader);
System.out.println("djdne"+path);
// Always wrap FileReader in BufferedReader.
BufferedReader bufferReader =
new BufferedReader(fileReader );
System.out.println("djdne"+path);
while((line = bufferReader.readLine()) != null)
{
System.out.println("djdne"+line);
for(String obj:al) {
if(line.contains(obj))
{
System.out.println("77");
System.out.println("1 obj is"+obj);
String pattern1 = "|";
String pattern2 = "|";
Pattern pt = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2));
Matcher mac = pt.matcher(line);
System.out.println("22");
while (mac.find()) {
System.out.println("33");
System.out.println(mac.group(1));
nl.add(mac.group(1));
for(int i=0;i<nl.size();i++)
{
Text si= new Text(al.get(i));
Text vi=new Text(nl.get(i));
context.write(si, vi);
}
}
}
}
}
bufferReader.close();
}
catch(FileNotFoundException ex) {
System.out.println(
"Unable to open file '" +
path + "'");
}
catch(IOException ex) {
System.out.println(
"Error reading file '"
+ path + "'");
// Or we could just do this:
// ex.printStackTrace();
}
}
}
您可以从映射器上下文和当前输入拆分中获取它。
((FileSplit) context.getInputSplit()).getPath();
在配置Mapper中class你可以得到文件名。然后你可以用它来阅读。 Example more detail
public void configure(JobConf job) {
inputFile = job.get("mapred.input.file");
}