线程初始化
Thread initiallization
我想加快模拟的执行速度。这是主要功能的一部分:
double computeStart = wtime();
/* Main loop */
for (t = 0.0; t < t_end; t += del_t, iters++) {
setTimestepInterval(&del_t, imax, jmax, delx, dely, u, v, Re, tau);
ifluid = (imax * jmax) - ibound;
computeTentativeVelocity(u, v, f, g, flag, imax, jmax,
del_t, delx, dely, gamma, Re);
computeRhs(f, g, rhs, flag, imax, jmax, del_t, delx, dely);
if (ifluid > 0) {
itersor = poissonSolver(p, rhs, flag, imax, jmax, delx, dely,
eps, itermax, omega, &res, ifluid);
} else {
itersor = 0;
}
if (verbose > 1) {
printf("%d t:%g, del_t:%g, SOR iters:%3d, res:%e, bcells:%d\n",
iters, t+del_t, del_t, itersor, res, ibound);
}
updateVelocity(u, v, f, g, p, flag, imax, jmax, del_t, delx, dely);
applyBoundaryConditions(u, v, flag, imax, jmax, ui, vi);
} /* End of main loop */
double computeEnd = wtime();
我想并行化另一个文件中的函数 updateVelocity 和 applyBoundaryConditions。但是我不想用
#pragma omp parallel num_threads(2)
每次调用这两个函数时,开销都会很大。有没有一种方法可以只初始化线程一次,而不是每次调用 updateVelocity 和 applyBoundaryConditions 时都初始化它们。这些是函数:
void updateVelocity(float **u, float **v, float **f, float **g, float **p,
char **flag, int imax, int jmax, float del_t, float delx, float dely)
{
int i, j;
#pragma omp for schedule(static)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for schedule(static)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
}
void applyBoundaryConditions(float **u, float **v, char **flag,
int imax, int jmax, float ui, float vi)
{
int i, j;
// imax is 600
// jmax is 200
// the larger the loop iterations the more efficient the parallelisation
// may not be optimisable under parallelisation
// 2 threads optimal
for (j=0; j<=jmax+1; j++) {
/* Fluid freely flows in from the west */
u[0][j] = u[1][j];
v[0][j] = v[1][j];
/* Fluid freely flows out to the east */
u[imax][j] = u[imax-1][j];
v[imax+1][j] = v[imax][j];
}
for (i=0; i<=imax+1; i++) {
/* The vertical velocity approaches 0 at the north and south
* boundaries, but fluid flows freely in the horizontal direction */
v[i][jmax] = 0.0;
u[i][jmax+1] = u[i][jmax];
v[i][0] = 0.0;
u[i][0] = u[i][1];
}
/* Apply no-slip boundary conditions to cells that are adjacent to
* internal obstacle cells. This forces the u and v velocity to
* tend towards zero in these cells.
*/
#pragma omp for schedule(static) // decrease runtime by about 9 secs
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax; j++) {
if (flag[i][j] & B_NSEW) {
switch (flag[i][j]) {
case B_N:
v[i][j] = 0.0;
u[i][j] = -u[i][j+1];
u[i-1][j] = -u[i-1][j+1];
break;
case B_E:
u[i][j] = 0.0;
v[i][j] = -v[i+1][j];
v[i][j-1] = -v[i+1][j-1];
break;
case B_S:
v[i][j-1] = 0.0;
u[i][j] = -u[i][j-1];
u[i-1][j] = -u[i-1][j-1];
break;
case B_W:
u[i-1][j] = 0.0;
v[i][j] = -v[i-1][j];
v[i][j-1] = -v[i-1][j-1];
break;
case B_NE:
v[i][j] = 0.0;
u[i][j] = 0.0;
v[i][j-1] = -v[i+1][j-1];
u[i-1][j] = -u[i-1][j+1];
break;
case B_SE:
v[i][j-1] = 0.0;
u[i][j] = 0.0;
v[i][j] = -v[i+1][j];
u[i-1][j] = -u[i-1][j-1];
break;
case B_SW:
v[i][j-1] = 0.0;
u[i-1][j] = 0.0;
v[i][j] = -v[i-1][j];
u[i][j] = -u[i][j-1];
break;
case B_NW:
v[i][j] = 0.0;
u[i-1][j] = 0.0;
v[i][j-1] = -v[i-1][j-1];
u[i][j] = -u[i][j+1];
break;
}
}
}
}
/* Finally, fix the horizontal velocity at the western edge to have
* a continual flow of fluid into the simulation.
*/
v[0][0] = 2*vi-v[1][0];
for (j=1;j<=jmax;j++) {
u[0][j] = ui;
v[0][j] = 2*vi-v[1][j];
}
}
任何帮助将不胜感激。
But i do not want to use
#pragma omp parallel num_threads(2)
您需要使用 #pragma omp parallel
创建线程。
而不是:
#pragma omp for schedule(static)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for schedule(static)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
尝试以下方法
#pragma omp parallel
{
#pragma omp for private(j)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for private(j)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
}
您添加的另一个循环 #pragma omp for schedule(static)
#pragma omp for schedule(static) // decrease runtime by about 9 secs
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax; j++) {
if (flag[i][j] & B_NSEW) {
switch (flag[i][j]) {
不应并行化,因为最外层循环的迭代之间存在依赖关系,即:
case B_N:
v[i][j] = 0.0;
u[i][j] = -u[i][j+1];
u[i-1][j] = -u[i-1][j+1]; // <-- This one
等等。
我想加快模拟的执行速度。这是主要功能的一部分:
double computeStart = wtime();
/* Main loop */
for (t = 0.0; t < t_end; t += del_t, iters++) {
setTimestepInterval(&del_t, imax, jmax, delx, dely, u, v, Re, tau);
ifluid = (imax * jmax) - ibound;
computeTentativeVelocity(u, v, f, g, flag, imax, jmax,
del_t, delx, dely, gamma, Re);
computeRhs(f, g, rhs, flag, imax, jmax, del_t, delx, dely);
if (ifluid > 0) {
itersor = poissonSolver(p, rhs, flag, imax, jmax, delx, dely,
eps, itermax, omega, &res, ifluid);
} else {
itersor = 0;
}
if (verbose > 1) {
printf("%d t:%g, del_t:%g, SOR iters:%3d, res:%e, bcells:%d\n",
iters, t+del_t, del_t, itersor, res, ibound);
}
updateVelocity(u, v, f, g, p, flag, imax, jmax, del_t, delx, dely);
applyBoundaryConditions(u, v, flag, imax, jmax, ui, vi);
} /* End of main loop */
double computeEnd = wtime();
我想并行化另一个文件中的函数 updateVelocity 和 applyBoundaryConditions。但是我不想用
#pragma omp parallel num_threads(2)
每次调用这两个函数时,开销都会很大。有没有一种方法可以只初始化线程一次,而不是每次调用 updateVelocity 和 applyBoundaryConditions 时都初始化它们。这些是函数:
void updateVelocity(float **u, float **v, float **f, float **g, float **p,
char **flag, int imax, int jmax, float del_t, float delx, float dely)
{
int i, j;
#pragma omp for schedule(static)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for schedule(static)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
}
void applyBoundaryConditions(float **u, float **v, char **flag,
int imax, int jmax, float ui, float vi)
{
int i, j;
// imax is 600
// jmax is 200
// the larger the loop iterations the more efficient the parallelisation
// may not be optimisable under parallelisation
// 2 threads optimal
for (j=0; j<=jmax+1; j++) {
/* Fluid freely flows in from the west */
u[0][j] = u[1][j];
v[0][j] = v[1][j];
/* Fluid freely flows out to the east */
u[imax][j] = u[imax-1][j];
v[imax+1][j] = v[imax][j];
}
for (i=0; i<=imax+1; i++) {
/* The vertical velocity approaches 0 at the north and south
* boundaries, but fluid flows freely in the horizontal direction */
v[i][jmax] = 0.0;
u[i][jmax+1] = u[i][jmax];
v[i][0] = 0.0;
u[i][0] = u[i][1];
}
/* Apply no-slip boundary conditions to cells that are adjacent to
* internal obstacle cells. This forces the u and v velocity to
* tend towards zero in these cells.
*/
#pragma omp for schedule(static) // decrease runtime by about 9 secs
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax; j++) {
if (flag[i][j] & B_NSEW) {
switch (flag[i][j]) {
case B_N:
v[i][j] = 0.0;
u[i][j] = -u[i][j+1];
u[i-1][j] = -u[i-1][j+1];
break;
case B_E:
u[i][j] = 0.0;
v[i][j] = -v[i+1][j];
v[i][j-1] = -v[i+1][j-1];
break;
case B_S:
v[i][j-1] = 0.0;
u[i][j] = -u[i][j-1];
u[i-1][j] = -u[i-1][j-1];
break;
case B_W:
u[i-1][j] = 0.0;
v[i][j] = -v[i-1][j];
v[i][j-1] = -v[i-1][j-1];
break;
case B_NE:
v[i][j] = 0.0;
u[i][j] = 0.0;
v[i][j-1] = -v[i+1][j-1];
u[i-1][j] = -u[i-1][j+1];
break;
case B_SE:
v[i][j-1] = 0.0;
u[i][j] = 0.0;
v[i][j] = -v[i+1][j];
u[i-1][j] = -u[i-1][j-1];
break;
case B_SW:
v[i][j-1] = 0.0;
u[i-1][j] = 0.0;
v[i][j] = -v[i-1][j];
u[i][j] = -u[i][j-1];
break;
case B_NW:
v[i][j] = 0.0;
u[i-1][j] = 0.0;
v[i][j-1] = -v[i-1][j-1];
u[i][j] = -u[i][j+1];
break;
}
}
}
}
/* Finally, fix the horizontal velocity at the western edge to have
* a continual flow of fluid into the simulation.
*/
v[0][0] = 2*vi-v[1][0];
for (j=1;j<=jmax;j++) {
u[0][j] = ui;
v[0][j] = 2*vi-v[1][j];
}
}
任何帮助将不胜感激。
But i do not want to use
#pragma omp parallel num_threads(2)
您需要使用 #pragma omp parallel
创建线程。
而不是:
#pragma omp for schedule(static)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for schedule(static)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
尝试以下方法
#pragma omp parallel
{
#pragma omp for private(j)
for (i=1; i<=imax-1; i++) {
for (j=1; j<=jmax; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i+1][j] & C_F)) {
float pcalc = p[i+1][j]-p[i][j];
float mul = pcalc*del_t;
float div = mul/delx;
u[i][j] = f[i][j] - div;
// u[i][j] = f[i][j]-(p[i+1][j]-p[i][j])*del_t/delx;
}
}
}
#pragma omp for private(j)
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax-1; j++) {
/* only if both adjacent cells are fluid cells */
if ((flag[i][j] & C_F) && (flag[i][j+1] & C_F)) {
float pcalcv = p[i][j+1]-p[i][j];
float mulv = pcalcv*del_t;
float divv = mulv / dely;
v[i][j] = g[i][j] - divv;
// v[i][j] = g[i][j]-(p[i][j+1]-p[i][j])*del_t/dely;
}
}
}
}
您添加的另一个循环 #pragma omp for schedule(static)
#pragma omp for schedule(static) // decrease runtime by about 9 secs
for (i=1; i<=imax; i++) {
for (j=1; j<=jmax; j++) {
if (flag[i][j] & B_NSEW) {
switch (flag[i][j]) {
不应并行化,因为最外层循环的迭代之间存在依赖关系,即:
case B_N:
v[i][j] = 0.0;
u[i][j] = -u[i][j+1];
u[i-1][j] = -u[i-1][j+1]; // <-- This one
等等。