NoClassDefFoundError joda DateTimeFormat 在 hadoop cloudera 上使用 aws-kinesis

NoClassDefFoundError joda DateTimeFormat using aws-kinesis on hadoop cloudera

我收到一个奇怪的错误 运行在 hadoop (CDH 5.5.1) 上使用 aws kinesis。

我可以用这个简单的方法重现错误 class :

import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.kinesis.AmazonKinesisClient;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.joda.time.format.DateTimeFormat;

public class CollectJob extends Configured implements Tool {

    public static void main(String[] args) throws Exception {

        int exitCode = ToolRunner.run(new CollectJob(), args);
        System.exit(exitCode);
    }

    @Override
    public int run(String[] args) throws Exception {

        System.out.println(DateTimeFormat.class);
        System.out.println(DateTimeFormat.class.getProtectionDomain().getCodeSource().getLocation());

        System.out.println(AmazonKinesisClient.class);
        System.out.println(AmazonKinesisClient.class.getProtectionDomain().getCodeSource().getLocation());

        AWSCredentials credentials = new BasicAWSCredentials("toto", "tata");
        AmazonKinesisClient kinesisClient = new AmazonKinesisClient(credentials);
        kinesisClient.setEndpoint("kinesis.eu-west-1.amazonaws.com");

        return 0;
    }
}

我的 pom :

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>test</groupId>
    <artifactId>event-collector</artifactId>
    <version>1.0-SNAPSHOT</version>

    <repositories>
    <repository>
        <id>cloudera-repo-releases</id>
        <url>https://repository.cloudera.com/artifactory/repo/</url>
    </repository>
    </repositories>

    <dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.0-cdh5.5.1</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-core</artifactId>
        <version>2.6.0-mr1-cdh5.5.1</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>com.amazonaws</groupId>
        <artifactId>aws-java-sdk</artifactId>
        <version>1.7.4</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>joda-time</groupId>
        <artifactId>joda-time</artifactId>
        <version>2.4</version>
    </dependency>
    </dependencies>

    <build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <configuration>
                <source>1.7</source>
                <target>1.7</target>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <transformers>
                            <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
                            <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>reference.conf</resource>
                            </transformer>
                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
    </build>
</project>

然后我运行我的主要:

yarn jar event-collector-1.0-SNAPSHOT.jar CollectJob

得到这个标准输出:

class org.joda.time.format.DateTimeFormat
file:/tmp/hadoop-unjar3521528256461676644/
class com.amazonaws.services.kinesis.AmazonKinesisClient
file:/opt/cloudera/parcels/CDH-5.5.1-1.cdh5.5.1.p0.11/jars/aws-java-sdk-1.7.4.jar
Exception in thread "main" java.lang.NoClassDefFoundError: org/joda/time/format/DateTimeFormat
    at com.amazonaws.auth.AWS4Signer.<clinit>(AWS4Signer.java:44)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
    at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
    at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
    at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
    at java.lang.Class.newInstance(Class.java:374)
    at com.amazonaws.auth.SignerFactory.createSigner(SignerFactory.java:119)
    at com.amazonaws.auth.SignerFactory.lookupAndCreateSigner(SignerFactory.java:105)
    at com.amazonaws.auth.SignerFactory.getSigner(SignerFactory.java:78)
    at com.amazonaws.AmazonWebServiceClient.computeSignerByServiceRegion(AmazonWebServiceClient.java:307)
    at com.amazonaws.AmazonWebServiceClient.computeSignerByURI(AmazonWebServiceClient.java:280)
    at com.amazonaws.AmazonWebServiceClient.setEndpoint(AmazonWebServiceClient.java:160)
    at com.amazonaws.services.kinesis.AmazonKinesisClient.setEndpoint(AmazonKinesisClient.java:2102)
    at com.amazonaws.services.kinesis.AmazonKinesisClient.init(AmazonKinesisClient.java:216)
    at com.amazonaws.services.kinesis.AmazonKinesisClient.<init>(AmazonKinesisClient.java:139)
    at com.amazonaws.services.kinesis.AmazonKinesisClient.<init>(AmazonKinesisClient.java:116)
    at tivan.CollectJob.run(CollectJob.java:29)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
    at tivan.CollectJob.main(CollectJob.java:15)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassNotFoundException: org.joda.time.format.DateTimeFormat
    at java.net.URLClassLoader.run(URLClassLoader.java:366)
    at java.net.URLClassLoader.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    ... 26 more

我尝试使用 YARN_USER_CLASSPATH_FIRST true 或 false,但得到了同样的错误。 我不明白这是怎么回事...

求助! :)


编辑更多细节:同样失败:

    @Override
    public int run(String[] args) throws Exception {

        //print DateTimeFormat class infos
        System.out.println(DateTimeFormat.class);
        System.out.println(DateTimeFormat.class.getProtectionDomain().getCodeSource().getLocation());

        //print AWS4Signer class infos
        System.out.println(AWS4Signer.class);
        System.out.println(AWS4Signer.class.getProtectionDomain().getCodeSource().getLocation());

        //test joda time class (copy-paste from AWS4Signer source code)
        DateTimeFormatter dateFormatter = DateTimeFormat.forPattern("yyyyMMdd").withZoneUTC();
        System.out.println(dateFormatter);

        //create AWS4Signer instance -> ClassNotFound on DateTimeFormat !!!
        AWS4Signer signer = new AWS4Signer();
        System.out.println(signer);

        return 0;
    }

你能为 pom.xml 试试这个吗:-

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>test</groupId>
    <artifactId>event-collector</artifactId>
    <version>1.0-SNAPSHOT</version>

    <repositories>
    <repository>
        <id>cloudera-repo-releases</id>
        <url>https://repository.cloudera.com/artifactory/repo/</url>
    </repository>
    </repositories>

    <dependencies>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.0-cdh5.5.1</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-core</artifactId>
        <version>2.6.0-mr1-cdh5.5.1</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>com.amazonaws</groupId>
        <artifactId>aws-java-sdk</artifactId>
        <version>1.7.4</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>joda-time</groupId>
        <artifactId>joda-time</artifactId>
        <version>2.4</version>
    </dependency>
    </dependencies>

    <build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <configuration>
                <source>1.7</source>
                <target>1.7</target>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
    </plugins>
    </build>
</project>

我终于找到了解决方案,我只需要导出以下变量:

export YARN_USER_CLASSPATH=pathtomyjar