CyclicBarrier 浪费时间
CyclicBarrier Wasting Time
我正在实施并行算法。如果没有 CyclicBarrier,我可以在 Sequential Time 的一半时间内完成工作。使用 CyclicBarrier 会使它花费最多 100 倍的时间。我将包括我的线程调用和线程函数,这样您就可以看到发生了什么并尝试帮助我。 CyclicBarrier 被重用,每次都会产生新的线程。由于某种原因,TRY(barrier.await;) 位旋转了很长时间。
//Threads use this ...
private class threadILoop implements Runnable {
protected int start, end, j, k;
public threadILoop(int start,int end,int j,int k){
this.start = start;
this.end = end;
this.j = j;
this.k = k;
}
public void run() {
for (int z = start; z < end; z++) {
int zxj = z ^ j;
if(zxj > z){
if((z&k) == 0 && (data[z] > data[zxj]))
swap(z, zxj);
if((z&k) != 0 && (data[z] < data[zxj]))
swap(z, zxj);
}
try{barrier.await();}
catch (InterruptedException ex) { return; }
catch (BrokenBarrierException ex) {return; }
}
}
}
//Main Driver here, where the CyclicBarrier gets allocated and the threads //are spawned from.
private void loopSort() throws InterruptedException {
//print(data);
barrier = new CyclicBarrier(N_THREADS);
int kMax = data.length;
for(int k = 2; k<=kMax; k*=2){
for (int j = k/2; j > 0; j/=2) {
int piece = data.length/N_THREADS;
if(j > N_THREADS) {
//DIVIDE UP DATA SPACE FOR THREADS -> do work faster
int start = 0;
for(int i = 0; i < N_THREADS; i++)
{
int end = i == N_THREADS - 1 ? data.length : start + piece;
threads[i] = new Thread(new threadILoop(start, end, j, k));
//threads[i].start();
start = end;
}
for(int i = 0; i < N_THREADS; i++)
{
threads[i].start();
}
// print(data);
for(int i = 0; i < N_THREADS; i++)
{
threads[i].join();
}
}
您在循环中遇到的障碍太远了,现在每个线程都需要处理一系列元素,它们都处理一个元素,等待所有线程,处理下一个,等等。在这种情况下,线程之间等待和通信的开销变得比实际处理要多得多。
尝试在与其他线程对齐之前处理更多元素,例如在整个范围内处理,然后等待。
//Threads use this ...
private class threadILoop implements Runnable {
protected int start, end, j, k;
public threadILoop(int start,int end,int j,int k){
this.start = start;
this.end = end;
this.j = j;
this.k = k;
}
public void run() {
for (int z = start; z < end; z++) {
int zxj = z ^ j;
if(zxj > z){
if((z&k) == 0 && (data[z] > data[zxj]))
swap(z, zxj);
if((z&k) != 0 && (data[z] < data[zxj]))
swap(z, zxj);
}
// Wait moved from here
}
// To here (outside the inner loop)
try{barrier.await();}
catch (InterruptedException ex) { return; }
catch (BrokenBarrierException ex) {return; }
}
}
我正在实施并行算法。如果没有 CyclicBarrier,我可以在 Sequential Time 的一半时间内完成工作。使用 CyclicBarrier 会使它花费最多 100 倍的时间。我将包括我的线程调用和线程函数,这样您就可以看到发生了什么并尝试帮助我。 CyclicBarrier 被重用,每次都会产生新的线程。由于某种原因,TRY(barrier.await;) 位旋转了很长时间。
//Threads use this ...
private class threadILoop implements Runnable {
protected int start, end, j, k;
public threadILoop(int start,int end,int j,int k){
this.start = start;
this.end = end;
this.j = j;
this.k = k;
}
public void run() {
for (int z = start; z < end; z++) {
int zxj = z ^ j;
if(zxj > z){
if((z&k) == 0 && (data[z] > data[zxj]))
swap(z, zxj);
if((z&k) != 0 && (data[z] < data[zxj]))
swap(z, zxj);
}
try{barrier.await();}
catch (InterruptedException ex) { return; }
catch (BrokenBarrierException ex) {return; }
}
}
}
//Main Driver here, where the CyclicBarrier gets allocated and the threads //are spawned from.
private void loopSort() throws InterruptedException {
//print(data);
barrier = new CyclicBarrier(N_THREADS);
int kMax = data.length;
for(int k = 2; k<=kMax; k*=2){
for (int j = k/2; j > 0; j/=2) {
int piece = data.length/N_THREADS;
if(j > N_THREADS) {
//DIVIDE UP DATA SPACE FOR THREADS -> do work faster
int start = 0;
for(int i = 0; i < N_THREADS; i++)
{
int end = i == N_THREADS - 1 ? data.length : start + piece;
threads[i] = new Thread(new threadILoop(start, end, j, k));
//threads[i].start();
start = end;
}
for(int i = 0; i < N_THREADS; i++)
{
threads[i].start();
}
// print(data);
for(int i = 0; i < N_THREADS; i++)
{
threads[i].join();
}
}
您在循环中遇到的障碍太远了,现在每个线程都需要处理一系列元素,它们都处理一个元素,等待所有线程,处理下一个,等等。在这种情况下,线程之间等待和通信的开销变得比实际处理要多得多。
尝试在与其他线程对齐之前处理更多元素,例如在整个范围内处理,然后等待。
//Threads use this ...
private class threadILoop implements Runnable {
protected int start, end, j, k;
public threadILoop(int start,int end,int j,int k){
this.start = start;
this.end = end;
this.j = j;
this.k = k;
}
public void run() {
for (int z = start; z < end; z++) {
int zxj = z ^ j;
if(zxj > z){
if((z&k) == 0 && (data[z] > data[zxj]))
swap(z, zxj);
if((z&k) != 0 && (data[z] < data[zxj]))
swap(z, zxj);
}
// Wait moved from here
}
// To here (outside the inner loop)
try{barrier.await();}
catch (InterruptedException ex) { return; }
catch (BrokenBarrierException ex) {return; }
}
}