Hadoop YARN 上的 JMH 基准测试
JMH Benchmark on Hadoop YARN
我已经为我的 MapReduce 作业编写了 JMH 基准。如果我 运行 我的应用程序处于本地模式,它可以工作,但是当我 运行 它与我的 hadoop 集群上的 yarn 脚本一起使用时,我会收到以下错误:
[cloudera@quickstart Desktop]$ ./launch_mapreduce.sh
# JMH 1.10 (released 5 days ago)
# VM invoker: /usr/java/jdk1.7.0_67-cloudera/jre/bin/java
# VM options: -Dproc_jar -Xmx1000m -Xms825955249 -Xmx825955249 -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/usr/lib/hadoop-yarn/logs -Dyarn.log.dir=/usr/lib/hadoop-yarn/logs -Dhadoop.log.file=yarn.log -Dyarn.log.file=yarn.log -Dyarn.home.dir=/usr/lib/hadoop-yarn -Dhadoop.home.dir=/usr/lib/hadoop-yarn -Dhadoop.root.logger=INFO,console -Dyarn.root.logger=INFO,console -Djava.library.path=/usr/lib/hadoop/lib/native
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Throughput, ops/time
# Benchmark: mgm.tp.bigdata.ma_mapreduce.MapReduceBenchmark.test
# Run progress: 0.00% complete, ETA 00:00:10
# Fork: 1 of 1
Error: Could not find or load main class org.openjdk.jmh.runner.ForkedMain
<forked VM failed with exit code 1>
<stdout last='20 lines'>
</stdout>
<stderr last='20 lines'>
Error: Could not find or load main class org.openjdk.jmh.runner.ForkedMain
</stderr>
# Run complete. Total time: 00:00:00
Benchmark Mode Cnt Score Error Units
我的 shell 脚本如下:
/usr/bin/yarn jar ma-mapreduce-benchmark.jar
我的基准选项是:
public static void main(String[] args) throws Exception {
Options opt = new OptionsBuilder()
.include(MapReduceBenchmark.class.getSimpleName())
.warmupIterations(5)
.measurementIterations(5)
.forks(1)
.build();
new Runner(opt).run();
}
在我看来 jhm 在 hadoop 集群上不起作用,因为在集群的每个节点中,基准测试想要启动一个自己的 jvm。那行不通,节点为并行化进行通信。首先我测量程序执行的时间并重复这个,最后我计算容错。
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
public class MapReduceBenchmarkLauncher {
private static List<Long> times = new ArrayList<Long>();
public static void main(String[] args) throws Exception {
Properties pro = new Properties();
pro.load(MapReduceBenchmarkLauncher.class.getResourceAsStream("/config.properties"));
int benchRounds = Integer.parseInt(pro.getProperty("benchmark.rounds"));
for(int i=0;i<benchRounds;i++) {
JobMain jm = new JobMain();// app being tested
long start = System.nanoTime();
jm.run();
long end = System.nanoTime();
times.add(end-start);
}
writeTimekeepingToFile(times, "mapreduce_benchmark");
}
public static void writeTimekeepingToFile(List<Long> times, String benchfile) throws Exception {
// arithmetic mean
double am = 0;
for(int i=0;i<times.size();i++) {
am = am + times.get(i);
}
am = am / times.size();
// varinaz calculation
double v = 0;
for(int i=0;i<times.size();i++) {
v = v + (times.get(i)-am)*(times.get(i)-am); // no math lib, cause long
}
v = v / times.size();
// calculating standard deviation
double s = Math.sqrt(v);
// output
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(benchfile), "utf-8"));
for(int i=0;i<times.size();i++) {
br.write("round "+(i+1)+": "+times.get(i)+" ns\n");
}
br.write("varianz: v="+v+"\n");
br.write("standard deviation: t=("+am+" \u00B1 "+s+") ns" );
br.close();
}
}
我已经为我的 MapReduce 作业编写了 JMH 基准。如果我 运行 我的应用程序处于本地模式,它可以工作,但是当我 运行 它与我的 hadoop 集群上的 yarn 脚本一起使用时,我会收到以下错误:
[cloudera@quickstart Desktop]$ ./launch_mapreduce.sh
# JMH 1.10 (released 5 days ago)
# VM invoker: /usr/java/jdk1.7.0_67-cloudera/jre/bin/java
# VM options: -Dproc_jar -Xmx1000m -Xms825955249 -Xmx825955249 -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/usr/lib/hadoop-yarn/logs -Dyarn.log.dir=/usr/lib/hadoop-yarn/logs -Dhadoop.log.file=yarn.log -Dyarn.log.file=yarn.log -Dyarn.home.dir=/usr/lib/hadoop-yarn -Dhadoop.home.dir=/usr/lib/hadoop-yarn -Dhadoop.root.logger=INFO,console -Dyarn.root.logger=INFO,console -Djava.library.path=/usr/lib/hadoop/lib/native
# Warmup: 5 iterations, 1 s each
# Measurement: 5 iterations, 1 s each
# Timeout: 10 min per iteration
# Threads: 1 thread, will synchronize iterations
# Benchmark mode: Throughput, ops/time
# Benchmark: mgm.tp.bigdata.ma_mapreduce.MapReduceBenchmark.test
# Run progress: 0.00% complete, ETA 00:00:10
# Fork: 1 of 1
Error: Could not find or load main class org.openjdk.jmh.runner.ForkedMain
<forked VM failed with exit code 1>
<stdout last='20 lines'>
</stdout>
<stderr last='20 lines'>
Error: Could not find or load main class org.openjdk.jmh.runner.ForkedMain
</stderr>
# Run complete. Total time: 00:00:00
Benchmark Mode Cnt Score Error Units
我的 shell 脚本如下:
/usr/bin/yarn jar ma-mapreduce-benchmark.jar
我的基准选项是:
public static void main(String[] args) throws Exception {
Options opt = new OptionsBuilder()
.include(MapReduceBenchmark.class.getSimpleName())
.warmupIterations(5)
.measurementIterations(5)
.forks(1)
.build();
new Runner(opt).run();
}
在我看来 jhm 在 hadoop 集群上不起作用,因为在集群的每个节点中,基准测试想要启动一个自己的 jvm。那行不通,节点为并行化进行通信。首先我测量程序执行的时间并重复这个,最后我计算容错。
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
public class MapReduceBenchmarkLauncher {
private static List<Long> times = new ArrayList<Long>();
public static void main(String[] args) throws Exception {
Properties pro = new Properties();
pro.load(MapReduceBenchmarkLauncher.class.getResourceAsStream("/config.properties"));
int benchRounds = Integer.parseInt(pro.getProperty("benchmark.rounds"));
for(int i=0;i<benchRounds;i++) {
JobMain jm = new JobMain();// app being tested
long start = System.nanoTime();
jm.run();
long end = System.nanoTime();
times.add(end-start);
}
writeTimekeepingToFile(times, "mapreduce_benchmark");
}
public static void writeTimekeepingToFile(List<Long> times, String benchfile) throws Exception {
// arithmetic mean
double am = 0;
for(int i=0;i<times.size();i++) {
am = am + times.get(i);
}
am = am / times.size();
// varinaz calculation
double v = 0;
for(int i=0;i<times.size();i++) {
v = v + (times.get(i)-am)*(times.get(i)-am); // no math lib, cause long
}
v = v / times.size();
// calculating standard deviation
double s = Math.sqrt(v);
// output
BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(benchfile), "utf-8"));
for(int i=0;i<times.size();i++) {
br.write("round "+(i+1)+": "+times.get(i)+" ns\n");
}
br.write("varianz: v="+v+"\n");
br.write("standard deviation: t=("+am+" \u00B1 "+s+") ns" );
br.close();
}
}