MRUnit 测试在使用 MULTIPLEOUTPUTS 写入 HDFS 时给出 NULLPOINTER 异常
MRUnit test giving NULLPOINTER exception while writing to HDFS using MULTIPLEOUTPUTS
我目前有一个 mapReduce 程序,它在我的减速器中使用不同的文件 name.So 将数据发送到 hdfs 我正在使用 MultipleOutputs 写入 HDFS 中的不同文件(下面的完整减速器代码)。
我想用 m运行测试我的代码,下面是我的测试方法。
@Test
public void reducerMRUnit() throws IOException{
String output="";
ArrayList<Text> list = new ArrayList<Text>(0);
list.add(new Text(""));
reduceDriver.withInput(new Text(""), list);
reduceDriver.withPathOutput(new Text(output),NullWritable.get(),"");
reduceDriver.runTest();
}
但是,当我 运行 这个测试时,它给了我 NPE。
java.lang.NullPointerException
at org.apache.hadoop.fs.Path.<init>(Path.java:104)
at org.apache.hadoop.fs.Path.<init>(Path.java:93)
at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getDefaultWorkFile(FileOutputFormat.java:286)
at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:129)
at org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.getRecordWriter(MultipleOutputs.java:476)
at org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.write(MultipleOutputs.java:456)
at org.clinical3PO.learn.fasta.ArffToFastAReducer.reduce(ArffToFastAReducer.java:127)
at org.clinical3PO.learn.fasta.ArffToFastAReducer.reduce(ArffToFastAReducer.java:1)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mrunit.mapreduce.ReduceDriver.run(ReduceDriver.java:265)
at org.apache.hadoop.mrunit.TestDriver.runTest(TestDriver.java:640)
at org.apache.hadoop.mrunit.TestDriver.runTest(TestDriver.java:627)
at org.clinical3PO.learn.fasta.MRUnitTest.ArffToFastAReducerMRUnitTest.reducerMRUnit(ArffToFastAReducerMRUnitTest.java:63)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.junit.runners.model.FrameworkMethod.runReflectiveCall(FrameworkMethod.java:44)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:41)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:76)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner.run(ParentRunner.java:193)
at org.junit.runners.ParentRunner.schedule(ParentRunner.java:52)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:191)
at org.junit.runners.ParentRunner.access[=11=]0(ParentRunner.java:42)
at org.junit.runners.ParentRunner.evaluate(ParentRunner.java:184)
at org.junit.runners.ParentRunner.run(ParentRunner.java:236)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:459)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:675)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:382)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:192)
减速器代码:
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class AReducer extends Reducer<Text, Text, Text, NullWritable>{
private MultipleOutputs<Text, NullWritable> mos = null;
@Override
public void setup(Context context) throws IOException {
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
mos = new MultipleOutputs<Text, NullWritable>(context);
mos.write(key, value, "filename");
}
@Override
public void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
}
有什么建议吗?
MRUnit 目前有一个已知问题,没有很好的记录,即测试 MultipleOutputs
需要 运行 带有 PowerMockRunner
的测试和应用于 mock 的 PrepareForTest
注释减速器 class。 JIRA 问题 MRUNIT-13 and MRUNIT-213 包含对此的详细讨论。 MRUNIT-213 仍然是 unresolved/unfixed.
将 PowerMock 添加到项目中会引发一些进一步的挑战,以排列正确的 Mockito 和 PowerMock 兼容版本。 Using PowerMock with Mockito 上的文档涵盖了兼容的版本。
我尝试对您的示例进行这些更改。那已经过了 NullPointerException
,但是我 运行 进入了最后一个问题。测试中声明的预期路径输出与 reducer 代码使用的 "filename"
路径不匹配。我更改了预期的路径输出以使测试完全通过。
这是我的最终结果:一个完整的项目与您的示例测试。享受吧!
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>test</groupId>
<artifactId>test-mrunit</artifactId>
<packaging>jar</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>Test MRUnit</name>
<description>Test MRUnit</description>
<properties>
<hadoop.version>2.7.1</hadoop.version>
<powermock.version>1.6.4</powermock.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-core</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>
</dependencies>
</project>
src/main/java/AReducer.java
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class AReducer extends Reducer<Text, Text, Text, NullWritable>{
private MultipleOutputs<Text, NullWritable> mos = null;
@Override
public void setup(Context context) throws IOException {
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
mos.write(key, NullWritable.get(), "filename");
}
@Override
public void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
}
src/test/java/TestAReducer.java
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;
@RunWith(PowerMockRunner.class)
@PrepareForTest(AReducer.class)
public class TestAReducer {
@Test
public void reducerMRUnit() throws IOException{
ReduceDriver reduceDriver = new ReduceDriver(new AReducer());
String output = "";
ArrayList<Text> list = new ArrayList<Text>(0);
list.add(new Text(""));
reduceDriver.withInput(new Text(""), list);
reduceDriver.withPathOutput(new Text(output), NullWritable.get(), "filename");
reduceDriver.runTest();
}
}
我目前有一个 mapReduce 程序,它在我的减速器中使用不同的文件 name.So 将数据发送到 hdfs 我正在使用 MultipleOutputs 写入 HDFS 中的不同文件(下面的完整减速器代码)。
我想用 m运行测试我的代码,下面是我的测试方法。
@Test
public void reducerMRUnit() throws IOException{
String output="";
ArrayList<Text> list = new ArrayList<Text>(0);
list.add(new Text(""));
reduceDriver.withInput(new Text(""), list);
reduceDriver.withPathOutput(new Text(output),NullWritable.get(),"");
reduceDriver.runTest();
}
但是,当我 运行 这个测试时,它给了我 NPE。
java.lang.NullPointerException
at org.apache.hadoop.fs.Path.<init>(Path.java:104)
at org.apache.hadoop.fs.Path.<init>(Path.java:93)
at org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.getDefaultWorkFile(FileOutputFormat.java:286)
at org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.getRecordWriter(TextOutputFormat.java:129)
at org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.getRecordWriter(MultipleOutputs.java:476)
at org.apache.hadoop.mapreduce.lib.output.MultipleOutputs.write(MultipleOutputs.java:456)
at org.clinical3PO.learn.fasta.ArffToFastAReducer.reduce(ArffToFastAReducer.java:127)
at org.clinical3PO.learn.fasta.ArffToFastAReducer.reduce(ArffToFastAReducer.java:1)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mrunit.mapreduce.ReduceDriver.run(ReduceDriver.java:265)
at org.apache.hadoop.mrunit.TestDriver.runTest(TestDriver.java:640)
at org.apache.hadoop.mrunit.TestDriver.runTest(TestDriver.java:627)
at org.clinical3PO.learn.fasta.MRUnitTest.ArffToFastAReducerMRUnitTest.reducerMRUnit(ArffToFastAReducerMRUnitTest.java:63)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.junit.runners.model.FrameworkMethod.runReflectiveCall(FrameworkMethod.java:44)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:41)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:76)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
at org.junit.runners.ParentRunner.run(ParentRunner.java:193)
at org.junit.runners.ParentRunner.schedule(ParentRunner.java:52)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:191)
at org.junit.runners.ParentRunner.access[=11=]0(ParentRunner.java:42)
at org.junit.runners.ParentRunner.evaluate(ParentRunner.java:184)
at org.junit.runners.ParentRunner.run(ParentRunner.java:236)
at org.eclipse.jdt.internal.junit4.runner.JUnit4TestReference.run(JUnit4TestReference.java:50)
at org.eclipse.jdt.internal.junit.runner.TestExecution.run(TestExecution.java:38)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:459)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.runTests(RemoteTestRunner.java:675)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.run(RemoteTestRunner.java:382)
at org.eclipse.jdt.internal.junit.runner.RemoteTestRunner.main(RemoteTestRunner.java:192)
减速器代码:
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class AReducer extends Reducer<Text, Text, Text, NullWritable>{
private MultipleOutputs<Text, NullWritable> mos = null;
@Override
public void setup(Context context) throws IOException {
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
mos = new MultipleOutputs<Text, NullWritable>(context);
mos.write(key, value, "filename");
}
@Override
public void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
}
有什么建议吗?
MRUnit 目前有一个已知问题,没有很好的记录,即测试 MultipleOutputs
需要 运行 带有 PowerMockRunner
的测试和应用于 mock 的 PrepareForTest
注释减速器 class。 JIRA 问题 MRUNIT-13 and MRUNIT-213 包含对此的详细讨论。 MRUNIT-213 仍然是 unresolved/unfixed.
将 PowerMock 添加到项目中会引发一些进一步的挑战,以排列正确的 Mockito 和 PowerMock 兼容版本。 Using PowerMock with Mockito 上的文档涵盖了兼容的版本。
我尝试对您的示例进行这些更改。那已经过了 NullPointerException
,但是我 运行 进入了最后一个问题。测试中声明的预期路径输出与 reducer 代码使用的 "filename"
路径不匹配。我更改了预期的路径输出以使测试完全通过。
这是我的最终结果:一个完整的项目与您的示例测试。享受吧!
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>test</groupId>
<artifactId>test-mrunit</artifactId>
<packaging>jar</packaging>
<version>0.0.1-SNAPSHOT</version>
<name>Test MRUnit</name>
<description>Test MRUnit</description>
<properties>
<hadoop.version>2.7.1</hadoop.version>
<powermock.version>1.6.4</powermock.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.10.19</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-core</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>${powermock.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.mrunit</groupId>
<artifactId>mrunit</artifactId>
<version>1.1.0</version>
<classifier>hadoop2</classifier>
<scope>test</scope>
</dependency>
</dependencies>
</project>
src/main/java/AReducer.java
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
public class AReducer extends Reducer<Text, Text, Text, NullWritable>{
private MultipleOutputs<Text, NullWritable> mos = null;
@Override
public void setup(Context context) throws IOException {
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
mos.write(key, NullWritable.get(), "filename");
}
@Override
public void cleanup(Context context) throws IOException, InterruptedException {
mos.close();
}
}
src/test/java/TestAReducer.java
import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mrunit.mapreduce.ReduceDriver;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.powermock.core.classloader.annotations.PrepareForTest;
import org.powermock.modules.junit4.PowerMockRunner;
@RunWith(PowerMockRunner.class)
@PrepareForTest(AReducer.class)
public class TestAReducer {
@Test
public void reducerMRUnit() throws IOException{
ReduceDriver reduceDriver = new ReduceDriver(new AReducer());
String output = "";
ArrayList<Text> list = new ArrayList<Text>(0);
list.add(new Text(""));
reduceDriver.withInput(new Text(""), list);
reduceDriver.withPathOutput(new Text(output), NullWritable.get(), "filename");
reduceDriver.runTest();
}
}