多线程与单线程
MultiThreading vs SingleThreading
我测试了一些简单的条件:
考虑一个长度为 10 000 000 的 int 数组。填充:
- 使用单(主)线程。
- 使用双工作线程并加入它们直到它们完成。第一个从开始填充到数组的中间。倒数第二个
- 使用ExecutorService固定池(2),调用execute等待终止
- 使用具有默认工作人员数量(可用处理器数量)的 ForkJoinPool
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveAction;
import java.util.concurrent.TimeUnit;
public class PerformanceTest {
private static final int ARRAY_LENGTH = 10_000_000;
private static int[] array;
private static final int ITERATIONS = 10;
public static void main(String[] args) throws InterruptedException {
for (int i = 0; i < ITERATIONS; i++) {
array = new int[ARRAY_LENGTH];
long millis = System.currentTimeMillis();
singleWorkerFill();
System.out.println("Single worker: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
doubleWorkerFill();
System.out.println("Double worker: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
forkJoinWorkersFill();
System.out.println("Executor workers: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
executorWorkersFill();
System.out.println("ForkJoin workers: " + (System.currentTimeMillis() - millis));
System.out.println("---------------------------------------------");
Thread.sleep(1000);
}
}
private static void singleWorkerFill() {
for (int i = 0, len = array.length; i < len; i++) {
array[i] = i;
}
}
private static void doubleWorkerFill() throws InterruptedException {
Thread worker1 = new Thread(new HeadArrayFiller());
Thread worker2 = new Thread(new TailArrayFiller());
worker1.start();
worker2.start();
worker1.join();
worker2.join();
}
private static void executorWorkersFill() throws InterruptedException {
ExecutorService executorService = Executors.newFixedThreadPool(2);
executorService.execute(new HeadArrayFiller());
executorService.execute(new TailArrayFiller());
executorService.shutdown();
executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
}
private static void forkJoinWorkersFill() throws InterruptedException {
ForkJoinPool pool = new ForkJoinPool();
pool.invoke(new HeadArrayFiller());
pool.invoke(new TailArrayFiller());
pool.shutdown();
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
}
private static class HeadArrayFiller extends RecursiveAction implements Runnable {
@Override
public void run() {
for (int i = 0, middle = array.length / 2; i <= middle; i++) {
array[i] = i;
}
}
@Override
protected void compute() {
run();
}
}
private static class TailArrayFiller extends RecursiveAction implements Runnable {
@Override
public void run() {
for (int i = array.length - 2, middle = array.length / 2; i > middle; i--) {
array[i] = i;
}
}
@Override
protected void compute() {
run();
}
}
}
我预计单线程模型与其他模型相比没有机会,但事实并非如此。这是以毫秒为单位的测试结果:
ITERATION #1
Single worker: 7
Double worker: 10
Executor workers: 11
ForkJoin workers: 6
ITERATION #2
Single worker: 6
Double worker: 4
Executor workers: 5
ForkJoin workers: 4
ITERATION #3
Single worker: 4
Double worker: 4
Executor workers: 5
ForkJoin workers: 4
ITERATION #4
Single worker: 5
Double worker: 5
Executor workers: 5
ForkJoin workers: 4
ITERATION #5
Single worker: 5
Double worker: 5
Executor workers: 4
ForkJoin workers: 5
ITERATION #6
Single worker: 4
Double worker: 4
Executor workers: 5
ForkJoin workers: 4
ITERATION #7
Single worker: 4
Double worker: 4
Executor workers: 4
ForkJoin workers: 5
ITERATION #8
Single worker: 4
Double worker: 4
Executor workers: 4
ForkJoin workers: 5
ITERATION #9
Single worker: 4
Double worker: 4
Executor workers: 4
ForkJoin workers: 5
ITERATION #10
Single worker: 5
Double worker: 4
Executor workers: 4
ForkJoin workers: 4
如您所见,单线程模型在启动时比多线程双线程模型更快。 Fork-join 模型似乎是最好的 likewise ExecutorService。
我建议对迭代进行一些 JIT 编译器优化。他们在测试结束时都非常相似。
无论如何,主要问题是 为什么双线程模型性能与单线程模型相同(甚至在冷启动时更慢)。我怎样才能达到预期快两倍的性能?
谢谢
对于现代计算机来说,初始化 10M 整数是一项非常快的任务,并且在两个独立的内核上并行执行操作的好处不会补偿(或仅补偿)启动线程、上下文的开销-在他们之间切换,协调他们等
开始在每次迭代中做更多的工作(例如休眠 5 毫秒),多线程的优势就会开始显现。
我测试了一些简单的条件:
考虑一个长度为 10 000 000 的 int 数组。填充:
- 使用单(主)线程。
- 使用双工作线程并加入它们直到它们完成。第一个从开始填充到数组的中间。倒数第二个
- 使用ExecutorService固定池(2),调用execute等待终止
- 使用具有默认工作人员数量(可用处理器数量)的 ForkJoinPool
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.RecursiveAction;
import java.util.concurrent.TimeUnit;
public class PerformanceTest {
private static final int ARRAY_LENGTH = 10_000_000;
private static int[] array;
private static final int ITERATIONS = 10;
public static void main(String[] args) throws InterruptedException {
for (int i = 0; i < ITERATIONS; i++) {
array = new int[ARRAY_LENGTH];
long millis = System.currentTimeMillis();
singleWorkerFill();
System.out.println("Single worker: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
doubleWorkerFill();
System.out.println("Double worker: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
forkJoinWorkersFill();
System.out.println("Executor workers: " + (System.currentTimeMillis() - millis));
array = new int[ARRAY_LENGTH];
millis = System.currentTimeMillis();
executorWorkersFill();
System.out.println("ForkJoin workers: " + (System.currentTimeMillis() - millis));
System.out.println("---------------------------------------------");
Thread.sleep(1000);
}
}
private static void singleWorkerFill() {
for (int i = 0, len = array.length; i < len; i++) {
array[i] = i;
}
}
private static void doubleWorkerFill() throws InterruptedException {
Thread worker1 = new Thread(new HeadArrayFiller());
Thread worker2 = new Thread(new TailArrayFiller());
worker1.start();
worker2.start();
worker1.join();
worker2.join();
}
private static void executorWorkersFill() throws InterruptedException {
ExecutorService executorService = Executors.newFixedThreadPool(2);
executorService.execute(new HeadArrayFiller());
executorService.execute(new TailArrayFiller());
executorService.shutdown();
executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
}
private static void forkJoinWorkersFill() throws InterruptedException {
ForkJoinPool pool = new ForkJoinPool();
pool.invoke(new HeadArrayFiller());
pool.invoke(new TailArrayFiller());
pool.shutdown();
pool.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
}
private static class HeadArrayFiller extends RecursiveAction implements Runnable {
@Override
public void run() {
for (int i = 0, middle = array.length / 2; i <= middle; i++) {
array[i] = i;
}
}
@Override
protected void compute() {
run();
}
}
private static class TailArrayFiller extends RecursiveAction implements Runnable {
@Override
public void run() {
for (int i = array.length - 2, middle = array.length / 2; i > middle; i--) {
array[i] = i;
}
}
@Override
protected void compute() {
run();
}
}
}
我预计单线程模型与其他模型相比没有机会,但事实并非如此。这是以毫秒为单位的测试结果:
ITERATION #1 Single worker: 7 Double worker: 10 Executor workers: 11 ForkJoin workers: 6 ITERATION #2 Single worker: 6 Double worker: 4 Executor workers: 5 ForkJoin workers: 4 ITERATION #3 Single worker: 4 Double worker: 4 Executor workers: 5 ForkJoin workers: 4 ITERATION #4 Single worker: 5 Double worker: 5 Executor workers: 5 ForkJoin workers: 4 ITERATION #5 Single worker: 5 Double worker: 5 Executor workers: 4 ForkJoin workers: 5 ITERATION #6 Single worker: 4 Double worker: 4 Executor workers: 5 ForkJoin workers: 4 ITERATION #7 Single worker: 4 Double worker: 4 Executor workers: 4 ForkJoin workers: 5 ITERATION #8 Single worker: 4 Double worker: 4 Executor workers: 4 ForkJoin workers: 5 ITERATION #9 Single worker: 4 Double worker: 4 Executor workers: 4 ForkJoin workers: 5 ITERATION #10 Single worker: 5 Double worker: 4 Executor workers: 4 ForkJoin workers: 4
如您所见,单线程模型在启动时比多线程双线程模型更快。 Fork-join 模型似乎是最好的 likewise ExecutorService。 我建议对迭代进行一些 JIT 编译器优化。他们在测试结束时都非常相似。
无论如何,主要问题是 为什么双线程模型性能与单线程模型相同(甚至在冷启动时更慢)。我怎样才能达到预期快两倍的性能?
谢谢
对于现代计算机来说,初始化 10M 整数是一项非常快的任务,并且在两个独立的内核上并行执行操作的好处不会补偿(或仅补偿)启动线程、上下文的开销-在他们之间切换,协调他们等
开始在每次迭代中做更多的工作(例如休眠 5 毫秒),多线程的优势就会开始显现。