配置单元 UDF 中的 FileNotFoundException
FileNotFoundException in hive UDF
我的 UDF:
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
public class HoursDiff extends UDF {
//private = new Text();
public IntWritable evaluate(String date,String time)
{
String dateStart = "2014-12-01 00:00:00";
String currentdate=date+" "+time;
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date d1 = null;
Date d2 = null;
try
{
d1 = format.parse(dateStart);
d2 = format.parse(currentdate);
long diff = d2.getTime() - d1.getTime();
long diffHours = diff / (3600000) % 24;
long diffDays = diff / (86400000);
int hours=(int)(diffDays*24+diffHours);
IntWritable hour=new IntWritable(hours);
return hour;
}
catch (Exception e)
{
e.printStackTrace();
}
return null;
}
}
我导出到/home/hadoop/mapreduce/HoursDiff.jar
我打开了蜂巢shell:
add jar /home/hadoop/mapreduce/HoursDiff.jar;
create temporary function hoursdiff as HoursDiff;
当我尝试执行以下命令时,我得到 FileNotFoundException
:
select hoursdiff(date,time) as hours from date_test;
堆栈跟踪
create temporary function hoursdiff as 'HoursDiff';
OK
Time taken: 0.009 seconds
hive> select hoursdiff(date,time) as hours from date_test;
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
15/10/11 15:17:03 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Execution log at: /tmp/hadoop/hadoop_20151011151616_2c15561f-7cd2-4012-8bd2-b7dfcf488432.log
java.io.FileNotFoundException: File does not exist: hdfs://172.16.253.17:54310/home/hadoop/mapreduce/HoursDiff.jar
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:1122)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:288)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:224)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestamps(ClientDistributedCacheManager.java:93)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(ClientDistributedCacheManager.java:57)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:269)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:390)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:483)
at org.apache.hadoop.mapreduce.Job.run(Job.java:1296)
at org.apache.hadoop.mapreduce.Job.run(Job.java:1293)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
at org.apache.hadoop.mapred.JobClient.run(JobClient.java:562)
at org.apache.hadoop.mapred.JobClient.run(JobClient.java:557)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:557)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:548)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:420)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:740)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: hdfs://172.16.253.17:54310/home/hadoop/mapreduce/HoursDiff.jar)'
Execution failed with exit status: 1
你所做的一切都是正确的,但它是在 HDFS 路径中搜索,你注册了本地路径。
将 jar 复制到 HDFS 位置 并尝试将其注册到 HDFS 路径。
我希望你用 HDFS 用户打开了 hive 终端,所以它正在搜索 HDFS 的路径。
注意:它也会接受本地路径来注册jar。
我的 UDF:
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
public class HoursDiff extends UDF {
//private = new Text();
public IntWritable evaluate(String date,String time)
{
String dateStart = "2014-12-01 00:00:00";
String currentdate=date+" "+time;
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date d1 = null;
Date d2 = null;
try
{
d1 = format.parse(dateStart);
d2 = format.parse(currentdate);
long diff = d2.getTime() - d1.getTime();
long diffHours = diff / (3600000) % 24;
long diffDays = diff / (86400000);
int hours=(int)(diffDays*24+diffHours);
IntWritable hour=new IntWritable(hours);
return hour;
}
catch (Exception e)
{
e.printStackTrace();
}
return null;
}
}
我导出到/home/hadoop/mapreduce/HoursDiff.jar
我打开了蜂巢shell:
add jar /home/hadoop/mapreduce/HoursDiff.jar;
create temporary function hoursdiff as HoursDiff;
当我尝试执行以下命令时,我得到 FileNotFoundException
:
select hoursdiff(date,time) as hours from date_test;
堆栈跟踪
create temporary function hoursdiff as 'HoursDiff';
OK
Time taken: 0.009 seconds
hive> select hoursdiff(date,time) as hours from date_test;
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
15/10/11 15:17:03 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Execution log at: /tmp/hadoop/hadoop_20151011151616_2c15561f-7cd2-4012-8bd2-b7dfcf488432.log
java.io.FileNotFoundException: File does not exist: hdfs://172.16.253.17:54310/home/hadoop/mapreduce/HoursDiff.jar
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:1122)
at org.apache.hadoop.hdfs.DistributedFileSystem.doCall(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:288)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.getFileStatus(ClientDistributedCacheManager.java:224)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestamps(ClientDistributedCacheManager.java:93)
at org.apache.hadoop.mapreduce.filecache.ClientDistributedCacheManager.determineTimestampsAndCacheVisibilities(ClientDistributedCacheManager.java:57)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:269)
at org.apache.hadoop.mapreduce.JobSubmitter.copyAndConfigureFiles(JobSubmitter.java:390)
at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:483)
at org.apache.hadoop.mapreduce.Job.run(Job.java:1296)
at org.apache.hadoop.mapreduce.Job.run(Job.java:1293)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:1293)
at org.apache.hadoop.mapred.JobClient.run(JobClient.java:562)
at org.apache.hadoop.mapred.JobClient.run(JobClient.java:557)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:557)
at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:548)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:420)
at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.main(ExecDriver.java:740)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:601)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Job Submission failed with exception 'java.io.FileNotFoundException(File does not exist: hdfs://172.16.253.17:54310/home/hadoop/mapreduce/HoursDiff.jar)'
Execution failed with exit status: 1
你所做的一切都是正确的,但它是在 HDFS 路径中搜索,你注册了本地路径。
将 jar 复制到 HDFS 位置 并尝试将其注册到 HDFS 路径。
我希望你用 HDFS 用户打开了 hive 终端,所以它正在搜索 HDFS 的路径。
注意:它也会接受本地路径来注册jar。