如何将 OpenMp 添加到三重嵌套 for 循环
How to add OpenMp to triple nested for-loop
目标是将尽可能多的 OpenMP 添加到以下 Cholesky 因子函数以增加并行化。到目前为止,我只有一个 #pragma omp parallel for
正确实施。 vector<vector<double>>
表示二维矩阵。我已经尝试为
添加 #pragma omp parallel for
for (int i = 0; i < n; ++i)
、for (int k = 0; k < i; ++k)
和 for (int j = 0; j < k; ++j)
但并行化出错了。 makeMatrix(n, n)
初始化大小为 nxn
.
的所有零的 vector<vector<double>>
vector<vector<double>> cholesky_factor(vector<vector<double>> input)
{
int n = input.size();
vector<vector<double>> result = makeMatrix(n, n);
for (int i = 0; i < n; ++i)
{
for (int k = 0; k < i; ++k)
{
double value = input[i][k];
for (int j = 0; j < k; ++j)
{
value -= result[i][j] * result[k][j];
}
result[i][k] = value / result[k][k];
}
double value = input[i][i];
#pragma omp parallel for
for (int j = 0; j < i; ++j)
{
value -= result[i][j] * result[i][j];
}
result[i][i] = std::sqrt(value);
}
return result;
}
我不认为你可以用这个算法并行化更多的东西,因为外循环的第 i
次迭代取决于第 i - 1
次迭代的结果和 k
次内循环迭代取决于第k - 1
次迭代的结果。
vector<vector<double>> cholesky_factor(vector<vector<double>> input)
{
int n = input.size();
vector<vector<double>> result = makeMatrix(n, n);
for (int i = 0; i < n; ++i)
{
for (int k = 0; k < i; ++k)
{
double value = input[i][k];
// reduction(-: value) does the same
// (private instances of value are initialized to zero and
// added to the initial instance of value when the threads are joining
#pragma omp parallel for reduction(+: value)
for (int j = 0; j < k; ++j)
{
value -= result[i][j] * result[k][j];
}
result[i][k] = value / result[k][k];
}
double value = input[i][i];
#pragma omp parallel for reduction(+: value)
for (int j = 0; j < i; ++j)
{
value -= result[i][j] * result[i][j];
}
result[i][i] = std::sqrt(value);
}
return result;
}
目标是将尽可能多的 OpenMP 添加到以下 Cholesky 因子函数以增加并行化。到目前为止,我只有一个 #pragma omp parallel for
正确实施。 vector<vector<double>>
表示二维矩阵。我已经尝试为
添加 #pragma omp parallel for
for (int i = 0; i < n; ++i)
、for (int k = 0; k < i; ++k)
和 for (int j = 0; j < k; ++j)
但并行化出错了。 makeMatrix(n, n)
初始化大小为 nxn
.
vector<vector<double>>
vector<vector<double>> cholesky_factor(vector<vector<double>> input)
{
int n = input.size();
vector<vector<double>> result = makeMatrix(n, n);
for (int i = 0; i < n; ++i)
{
for (int k = 0; k < i; ++k)
{
double value = input[i][k];
for (int j = 0; j < k; ++j)
{
value -= result[i][j] * result[k][j];
}
result[i][k] = value / result[k][k];
}
double value = input[i][i];
#pragma omp parallel for
for (int j = 0; j < i; ++j)
{
value -= result[i][j] * result[i][j];
}
result[i][i] = std::sqrt(value);
}
return result;
}
我不认为你可以用这个算法并行化更多的东西,因为外循环的第 i
次迭代取决于第 i - 1
次迭代的结果和 k
次内循环迭代取决于第k - 1
次迭代的结果。
vector<vector<double>> cholesky_factor(vector<vector<double>> input)
{
int n = input.size();
vector<vector<double>> result = makeMatrix(n, n);
for (int i = 0; i < n; ++i)
{
for (int k = 0; k < i; ++k)
{
double value = input[i][k];
// reduction(-: value) does the same
// (private instances of value are initialized to zero and
// added to the initial instance of value when the threads are joining
#pragma omp parallel for reduction(+: value)
for (int j = 0; j < k; ++j)
{
value -= result[i][j] * result[k][j];
}
result[i][k] = value / result[k][k];
}
double value = input[i][i];
#pragma omp parallel for reduction(+: value)
for (int j = 0; j < i; ++j)
{
value -= result[i][j] * result[i][j];
}
result[i][i] = std::sqrt(value);
}
return result;
}