使用 Page rank problem.Mapreduce 错误时出错
Error while working with Page rank problem.Mapreduce error
我一直在 Map Reduce 作业的帮助下研究 PageRank 算法。
我需要创建 Mapper 和 Reducer 类,我将借助它们创建 jar 文件。
我正在使用 jar 文件来处理 Hadoop 集群。
目前我的 java 个文件是 PageRank.java
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.ArrayList;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
public class PageRank {
public class PageRankMapper extends Mapper<LongWritable, Text, Text, Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
line = line.replaceAll("\s+",";");
StringTokenizer token = new StringTokenizer(line, ";");
ArrayList<String> list = new ArrayList<String>();
while (token.hasMoreTokens())
{
list.add(token.nextToken());
}
int size = list.size();
double ipr = Double.parseDouble(list.get(size-1)); //initial page rank
String pageid = list.get(0); // pageid is always first element
int numlinks = (size-2); // number of output links = size - (first and last)
double opr = ipr/(double)numlinks; //output page rank = total/number of links
String oprtext = (pageid + " " +String.valueOf(opr));
int loop = 1;
String outputlinks = "";
while(loop <= numlinks)
{
outputlinks += (list.get(loop)+" ");
context.write(new Text(list.get(loop)), new Text(oprtext));
loop++;
}
context.write(new Text(pageid), new Text(outputlinks));
}
}
public class PageRankReducer extends Reducer<Text, Text, Text, Text>
{
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
float fpr = 0.0f;
String outlinks ="";
for (Text value : values)
{
String line = value.toString();
if (line.matches(".*\d.*"))
{
String[] token = line.split(" ");
fpr += Float.parseFloat(token[1]);
}
else
{
outlinks += line;
}
}
String output = (outlinks +String.valueOf(fpr));
context.write(key, new Text(output));
}
}
public static void main(String[] args) throws Exception {
int i = 0;
String opfile = "/part-r-00000";
Path opath = new Path(opfile);
Path inputPath = new Path(args[0]);
Path outputPath = new Path(args[1] );
while (i < 3)
{
String suffix = ("/"+String.valueOf(i));
Path spath = new Path(suffix);
Job job = getNewJob(i);
i++;
job.setJarByClass(PageRank.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setMapperClass(PageRankMapper.class);
job.setReducerClass(PageRankReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
inputPath = Path.mergePaths(outputPath,opath);
outputPath = Path.mergePaths(outputPath,spath);
job.waitForCompletion(true);
}
}
private static Job getNewJob(int i) throws IOException
{
Job job = new Job();
job.setJobName("Page Rank "+ String.valueOf(i));
return job;
}
}
我的目录是什么样的
hduser@ajyj:~/pagerank$ ls -l
total 12
drwxr-xr-x 2 root root 4096 Oct 24 12:37 input_data
drwxr-xr-x 2 root root 4096 Oct 24 12:38 pagerank_classes
-rw-r--r-- 1 root hadoop 3824 Oct 24 13:39 PageRank.java
我已经创建了 hdfs 目录。
到目前为止,我正在按照一些教程进行配置。
https://www.youtube.com/watch?v=6sK3LDY7Pp4
我收到的错误是针对此命令的。
hduser@ajyj:~/pagerank$ javac -classpath `hadoop classpath` -d '/home/hduser/pagerank/pagerank_classes' '/home/hduser/pagerank/PageRank.java'
/home/hduser/pagerank/PageRank.java:17: error: error while writing PageRank.PageRankMapper: /home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class (Permission denied)
public class PageRankMapper extends Mapper<LongWritable, Text, Text, Text>
^
Note: /home/hduser/pagerank/PageRank.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
1 error
以上问题的任何解决方案....
此处,您有 permission denied
错误消息;
error while writing PageRank.PageRankMapper:
/home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class (Permission denied)
目录pagerank_classes
为root用户所有,只有root用户可以写入;
drwxr-xr-x 2 root root 4096 Oct 24 12:38 pagerank_classes
但是你是运行你的命令hduser
。
我认为如果您递归地更改路径 /home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class
的文件权限(或所有者)以为当前用户提供写入权限,您的问题就会得到解决。
我一直在 Map Reduce 作业的帮助下研究 PageRank 算法。
我需要创建 Mapper 和 Reducer 类,我将借助它们创建 jar 文件。
我正在使用 jar 文件来处理 Hadoop 集群。
目前我的 java 个文件是 PageRank.java
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.util.ArrayList;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
public class PageRank {
public class PageRankMapper extends Mapper<LongWritable, Text, Text, Text>
{
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
line = line.replaceAll("\s+",";");
StringTokenizer token = new StringTokenizer(line, ";");
ArrayList<String> list = new ArrayList<String>();
while (token.hasMoreTokens())
{
list.add(token.nextToken());
}
int size = list.size();
double ipr = Double.parseDouble(list.get(size-1)); //initial page rank
String pageid = list.get(0); // pageid is always first element
int numlinks = (size-2); // number of output links = size - (first and last)
double opr = ipr/(double)numlinks; //output page rank = total/number of links
String oprtext = (pageid + " " +String.valueOf(opr));
int loop = 1;
String outputlinks = "";
while(loop <= numlinks)
{
outputlinks += (list.get(loop)+" ");
context.write(new Text(list.get(loop)), new Text(oprtext));
loop++;
}
context.write(new Text(pageid), new Text(outputlinks));
}
}
public class PageRankReducer extends Reducer<Text, Text, Text, Text>
{
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException
{
float fpr = 0.0f;
String outlinks ="";
for (Text value : values)
{
String line = value.toString();
if (line.matches(".*\d.*"))
{
String[] token = line.split(" ");
fpr += Float.parseFloat(token[1]);
}
else
{
outlinks += line;
}
}
String output = (outlinks +String.valueOf(fpr));
context.write(key, new Text(output));
}
}
public static void main(String[] args) throws Exception {
int i = 0;
String opfile = "/part-r-00000";
Path opath = new Path(opfile);
Path inputPath = new Path(args[0]);
Path outputPath = new Path(args[1] );
while (i < 3)
{
String suffix = ("/"+String.valueOf(i));
Path spath = new Path(suffix);
Job job = getNewJob(i);
i++;
job.setJarByClass(PageRank.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setMapperClass(PageRankMapper.class);
job.setReducerClass(PageRankReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
inputPath = Path.mergePaths(outputPath,opath);
outputPath = Path.mergePaths(outputPath,spath);
job.waitForCompletion(true);
}
}
private static Job getNewJob(int i) throws IOException
{
Job job = new Job();
job.setJobName("Page Rank "+ String.valueOf(i));
return job;
}
}
我的目录是什么样的
hduser@ajyj:~/pagerank$ ls -l
total 12
drwxr-xr-x 2 root root 4096 Oct 24 12:37 input_data
drwxr-xr-x 2 root root 4096 Oct 24 12:38 pagerank_classes
-rw-r--r-- 1 root hadoop 3824 Oct 24 13:39 PageRank.java
我已经创建了 hdfs 目录。
到目前为止,我正在按照一些教程进行配置。 https://www.youtube.com/watch?v=6sK3LDY7Pp4
我收到的错误是针对此命令的。
hduser@ajyj:~/pagerank$ javac -classpath `hadoop classpath` -d '/home/hduser/pagerank/pagerank_classes' '/home/hduser/pagerank/PageRank.java'
/home/hduser/pagerank/PageRank.java:17: error: error while writing PageRank.PageRankMapper: /home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class (Permission denied)
public class PageRankMapper extends Mapper<LongWritable, Text, Text, Text>
^
Note: /home/hduser/pagerank/PageRank.java uses or overrides a deprecated API.
Note: Recompile with -Xlint:deprecation for details.
1 error
以上问题的任何解决方案....
此处,您有 permission denied
错误消息;
error while writing PageRank.PageRankMapper:
/home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class (Permission denied)
目录pagerank_classes
为root用户所有,只有root用户可以写入;
drwxr-xr-x 2 root root 4096 Oct 24 12:38 pagerank_classes
但是你是运行你的命令hduser
。
我认为如果您递归地更改路径 /home/hduser/pagerank/pagerank_classes/PageRank$PageRankMapper.class
的文件权限(或所有者)以为当前用户提供写入权限,您的问题就会得到解决。