2 核 Mac 上的多个 java 线程 - 慢
Multiple java threads on a 2-core Mac - Slow
我一直在为我的 OS 课程编写以下代码,但我得到了一些奇怪的结果。该代码创建 x 个线程并同时运行它们以将两个平方矩阵相乘。每个线程将乘以输入矩阵的 Number_of_rows/Number_of_threads 行。
当 运行 它在具有 1...8 个线程的 1024X1024 矩阵上时,我得到最快的乘法发生在仅使用一个线程时。我希望配备 i5 处理器(2 核)的 MacBook pro 将利用两个内核,并且在使用两个线程时会产生更快的结果。
运行 使用一个线程的时间从约 9.2 秒,使用 8 个线程的时间从约 9.6 秒减少到 27 秒。
知道为什么会这样吗?
顺便说一句,关于代码的一些事情:
一个。假设两个矩阵具有相同的维度并且是正方形的。
b。假设线程数 <= rows/columns 的数量。
public class MatrixMultThread implements Runnable {
final static int MATRIX_SIZE = 1024;
final static int MAX_THREADS = MATRIX_SIZE;
private float[][] a;
private float[][] b;
private float[][] res;
private int startIndex;
private int endIndex;
public MatrixMultThread(float[][] a, float[][]b, float[][] res, int startIndex, int endIndex) {
this.a = a;
this.b = b;
this.res = res;
this.startIndex = startIndex;
this.endIndex = endIndex;
}
public void run() {
float value = 0;
for (int k = startIndex; k < endIndex; k++) {
for (int i = 0; i < a.length; i++) {
for (int j = 0; j < a.length; j++) {
value += a[k][j]*b[j][i];
}
res[k][i] = value;
value = 0;
}
}
}
public static float[][] mult(float[][] a, float[][] b, int threadCount){
// Get number of rows per each thread.
int rowsPerThread = (int) Math.ceil(MATRIX_SIZE / threadCount);
float[][] res = new float[MATRIX_SIZE][MATRIX_SIZE];
// Create thread array
Thread[] threadsArray = new Thread[threadCount];
int rowCounter = 0;
for (int i = 0; i < threadCount; i++) {
threadsArray[i] = new Thread(new MatrixMultThread(a,b,res,rowCounter, Math.max(rowCounter + rowsPerThread, MATRIX_SIZE)));
threadsArray[i].start();
rowCounter += rowsPerThread;
}
// Wait for all threads to end before finishing execution.
for (int i = 0; i < threadCount; i++) {
try {
threadsArray[i].join();
} catch (InterruptedException e) {
System.out.println("join failed");
}
}
return res;
}
public static void main(String args[]) {
// Create matrices and random generator
Random randomGenerator = new Random();
float[][] a = new float[MATRIX_SIZE][MATRIX_SIZE];
float[][] b = new float[MATRIX_SIZE][MATRIX_SIZE];
// Initialize two matrices with initial values from 1 to 10.
for (int i = 0; i < a.length; i++) {
for (int j = 0; j < a.length; j++) {
a[i][j] = randomGenerator.nextFloat() * randomGenerator.nextInt(100);
b[i][j] = randomGenerator.nextFloat() * randomGenerator.nextInt(100);
}
}
long startTime;
for (int i = 1; i <= 8; i++) {
startTime = System.currentTimeMillis();
mult(a,b,i);
System.out.println("Total running time is: " + (System.currentTimeMillis() - startTime) + " ms");
}
}
}
首先,一些日志记录会有所帮助。我为此做了日志记录,发现您的逻辑中存在错误。
这是日志
Starting execution for thread count: 1
Start index: 0
End index: 1024
Starting execution: MatrixMultiplier: 0
Ending executionMatrixMultiplier: 0
Total running time is: 6593 ms
Starting execution for thread count: 2
Start index: 0
End index: 1024 <------ This is the problem area
Start index: 512
End index: 1024
Starting execution: MatrixMultiplier: 1
Starting execution: MatrixMultiplier: 0
所有迭代中的第一个线程每次都执行整数乘法。这就是您看不到结果的原因。找出错误。
我一直在为我的 OS 课程编写以下代码,但我得到了一些奇怪的结果。该代码创建 x 个线程并同时运行它们以将两个平方矩阵相乘。每个线程将乘以输入矩阵的 Number_of_rows/Number_of_threads 行。
当 运行 它在具有 1...8 个线程的 1024X1024 矩阵上时,我得到最快的乘法发生在仅使用一个线程时。我希望配备 i5 处理器(2 核)的 MacBook pro 将利用两个内核,并且在使用两个线程时会产生更快的结果。 运行 使用一个线程的时间从约 9.2 秒,使用 8 个线程的时间从约 9.6 秒减少到 27 秒。
知道为什么会这样吗?
顺便说一句,关于代码的一些事情:
一个。假设两个矩阵具有相同的维度并且是正方形的。
b。假设线程数 <= rows/columns 的数量。
public class MatrixMultThread implements Runnable {
final static int MATRIX_SIZE = 1024;
final static int MAX_THREADS = MATRIX_SIZE;
private float[][] a;
private float[][] b;
private float[][] res;
private int startIndex;
private int endIndex;
public MatrixMultThread(float[][] a, float[][]b, float[][] res, int startIndex, int endIndex) {
this.a = a;
this.b = b;
this.res = res;
this.startIndex = startIndex;
this.endIndex = endIndex;
}
public void run() {
float value = 0;
for (int k = startIndex; k < endIndex; k++) {
for (int i = 0; i < a.length; i++) {
for (int j = 0; j < a.length; j++) {
value += a[k][j]*b[j][i];
}
res[k][i] = value;
value = 0;
}
}
}
public static float[][] mult(float[][] a, float[][] b, int threadCount){
// Get number of rows per each thread.
int rowsPerThread = (int) Math.ceil(MATRIX_SIZE / threadCount);
float[][] res = new float[MATRIX_SIZE][MATRIX_SIZE];
// Create thread array
Thread[] threadsArray = new Thread[threadCount];
int rowCounter = 0;
for (int i = 0; i < threadCount; i++) {
threadsArray[i] = new Thread(new MatrixMultThread(a,b,res,rowCounter, Math.max(rowCounter + rowsPerThread, MATRIX_SIZE)));
threadsArray[i].start();
rowCounter += rowsPerThread;
}
// Wait for all threads to end before finishing execution.
for (int i = 0; i < threadCount; i++) {
try {
threadsArray[i].join();
} catch (InterruptedException e) {
System.out.println("join failed");
}
}
return res;
}
public static void main(String args[]) {
// Create matrices and random generator
Random randomGenerator = new Random();
float[][] a = new float[MATRIX_SIZE][MATRIX_SIZE];
float[][] b = new float[MATRIX_SIZE][MATRIX_SIZE];
// Initialize two matrices with initial values from 1 to 10.
for (int i = 0; i < a.length; i++) {
for (int j = 0; j < a.length; j++) {
a[i][j] = randomGenerator.nextFloat() * randomGenerator.nextInt(100);
b[i][j] = randomGenerator.nextFloat() * randomGenerator.nextInt(100);
}
}
long startTime;
for (int i = 1; i <= 8; i++) {
startTime = System.currentTimeMillis();
mult(a,b,i);
System.out.println("Total running time is: " + (System.currentTimeMillis() - startTime) + " ms");
}
}
}
首先,一些日志记录会有所帮助。我为此做了日志记录,发现您的逻辑中存在错误。
这是日志
Starting execution for thread count: 1
Start index: 0
End index: 1024
Starting execution: MatrixMultiplier: 0
Ending executionMatrixMultiplier: 0
Total running time is: 6593 ms
Starting execution for thread count: 2
Start index: 0
End index: 1024 <------ This is the problem area
Start index: 512
End index: 1024
Starting execution: MatrixMultiplier: 1
Starting execution: MatrixMultiplier: 0
所有迭代中的第一个线程每次都执行整数乘法。这就是您看不到结果的原因。找出错误。