OpenMP:访问冲突和其他错误
OpenMP: Access violation and other errors
前言
最近,我在我们组的项目代码中实现了OpenMP。两个 for 循环中的 Main 运行s;外部控制 'run',而内部控制 'generation.' 世代完全独立于不同的 运行,尽管依赖于同一 运行.[=29 中的其他世代=]
想法是并行化外层循环,即 'run' 循环,同时让每个线程在分配给它的任何特定 运行 编号上保持世代演化。
问题
当设置 OMP_THREADS = 1
时,即让程序 运行 只有一个线程时,它 运行 很顺利。如果这个数字更高,我会收到以下错误:
Unhandled exception at 0x00F5C4C3 in projectc.exe: 0xC0000005: Access violation writing location 0x00000072.
以下内容出现在 Visual Studio 的 "Autos" 部分:
(注意:t
、t->active_cells
和t->cellx
是"error red",而其余的当我得到这个错误时是白色的)
如果我在外循环正上方的 #pragma
中将 default(none)
更改为 default(shared)
,并删除 t
、s
和 bn
来自 threadprivate
(这些是在外部文件中初始化的结构),然后程序 运行s 通常在冻结之前在每个线程上生成一次(尽管 CPU activity 表明两个线程仍然 运行 与以前相同的强度)。
尝试解决方案
我不知道出了什么问题。在外循环之外尝试一个简单的 #pragma omp parallel for
当然是行不通的,但我也尝试过将所有 main 声明为 #pragma omp parallel
并将外循环声明为 #pragma omp for
。其他一些微妙的方法也像这样尝试过,这让我得出结论,这一定与线程之间共享变量的方式有关......因为所有 运行s,所以线程,是独立的,实际上所有变量 都可以 设置为私有;尽管您在 shared(..)
.
中看到了一些重叠
下面附上代码。
main.c
/* General Includes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <omp.h>
/* Project Includes */
#include "main.h"
#include "randgen.h"
#include "board7.h"
#include "tissue.h"
#include "io.h"
#define BitFlp(arg,posn) ((arg) ^ (1L << (posn)))
#define BitClr(arg,posn) ((arg) & ~(1L << (posn)))
#define display_dbg 1 //Controls whether print statements in main.c are displayed.
#define display_time 1 //Controls whether timing print statements are executed.
#define BILLION 1000000000L;
#define num_runs 10 //Controls number of runs per simulation
#define num_gens 4000//Controls number of generations per run
#define OMP_THREADS 1 // Max number of threads used if OpenMP is enabled
int n, i, r, j, z, x, sxa, y, flagb, m;
int j1, j2;
char a;
int max_fit_gen, collect_data, lb_run, w, rn, sx;
float f, max_fitness;
tissuen *fx;
input_vec dx;
calookup ra;
#pragma omp threadprivate(n, r, j, x, z, sxa, y, flagb, m, \
j1, j2, a, max_fit_gen, collect_data, lb_run, w, \
rn, sx, f, max_fitness, fx, dx, ra, run_data, t, s, bn)
int main(int argc, char *argv[])
{
int* p = 0x00000000; // pointer to NULL
char sa[256];
char ss[10];
long randn;
boardtable ba;
srand((unsigned)time(NULL));
init_mm();
randn = number_range(1, 100);
#ifdef OS_WINDOWS
// Timing parameters
LARGE_INTEGER clk_freq;
LARGE_INTEGER t1, t2, t3;
#endif
#ifdef OS_UNIX
struct timespec clk_freq, t1, t2, t3;
#endif
double avg_gen_time, avg_run_time, run_time, sim_time, est_run_time, est_sim_time;
// File System and IO Parameters
char cwd[FILENAME_MAX];
getcwd(&cwd, sizeof(cwd));
char curState[FILENAME_MAX];
char recState[FILENAME_MAX];
char recMode[FILENAME_MAX];
char curGen[FILENAME_MAX];
char curRun[FILENAME_MAX];
char genTmp[FILENAME_MAX];
strcpy(curState, cwd);
strcpy(recState, cwd);
strcpy(recMode, cwd);
strcpy(curGen, cwd);
strcpy(curRun, cwd);
strcpy(genTmp, cwd);
#ifdef OS_WINDOWS
strcat(curState, "\current.txt");
strcat(recState, "\recover.txt");
strcat(recMode, "\recovermode.txt");
strcat(curGen, "\gen.txt");
strcat(curRun, "\run");
strcat(genTmp, "\tmp\gentmp");
#endif
#ifdef OS_UNIX
strcat(curState, "/current.txt");
strcat(recState, "/recover.txt");
strcat(recMode, "/recovermode.txt");
strcat(curGen, "/gen.txt");
strcat(curRun, "/run");
strcat(genTmp, "/tmp/gentmp");
#endif
//Read current EA run variables (i.e. current run number, generation, recover mode status)
z = readorcreate(curState);
x = readorcreate(recState);
sxa = readorcreate(recMode);
y = readorcreate(curGen);
//Initialize simulation parameters
s.count = 0;
s.x[0] = 0;
s.y[0] = 0;
s.addvec[0] = 0;
s.bestnum = 0;
s.countb = 0;
s.count = 0;
initialize_sim_param(&s, 0, 200);
collect_data = 0;
//Build a collection of experiment initial conditions
buildboardcollection7(&bn);
//Determine clock frequency.
#ifdef OS_WINDOWS
if (display_time) get_frequency(&clk_freq);
#endif
#ifdef OS_UNIX
if (display_time) get_frequency(CLOCK_REALTIME, &clk_freq);
#endif
//Start simulation timer
#ifdef OS_WINDOWS
if (display_time) read_clock(&t1);
#endif
#ifdef OS_UNIX
if (display_time) read_clock(CLOCK_REALTIME, &t1);
#endif
#pragma omp parallel for schedule(static) default(none) num_threads(OMP_THREADS) \
private(sa, ss, randn, ba, t2, t3, avg_gen_time, avg_run_time, sim_time, \
run_time, est_run_time, est_sim_time) \
shared(i, cwd, recMode, curRun, curGen, curState, genTmp, clk_freq, t1)
for (i = z; i < num_runs; i++)
{
// randomly initialize content of tissue population
initialize_tissue_pop_s2(&(t.tgen[0]), &s);
initialize_tissue_pop_s2(&(t.tgen[1]), &s);
max_fit_gen = 0;
max_fitness = 0.0;
flagb = 0;
if ((i == z) && (x == 1))
{
w = y;
}
else
{
w = 0;
}
rn = 200;
j1 = 0;
s.run_num = i;
s.maxfitness = 0.0;
//Start run timer
#ifdef OS_WINDOWS
if (display_time) read_clock(&t2);
#endif
#ifdef OS_UNIX
if (display_time) read_clock(CLOCK_REALTIME, &t2);
#endif
#if defined(_OPENMP)
printf("\n ======================================= \n");
printf(" OpenMP Status Message \n");
printf("\n --------------------------------------- \n");
printf("| RUN %d : \n", i);
printf("| New Thread Process (Thread %d) \n", omp_get_thread_num());
printf("| Available Threads: %d of %d \n", omp_get_num_threads(), omp_get_max_threads());
printf(" ======================================= \n\n");
#endif
for (j = w; j < num_gens; j++)
{
// Flips on lightboard data collection. See board7.h.
if (enable_collection == 1) {
if ((i >= run_collect) && (j >= gen_collect)) { collect_data = 1; }
}
sx = readcurrent(recMode);
// Pseudo loop code. Uses bit flipping to cycle through boards.
j2 = ~(j1)& 1;
if (display_dbg) printf("start evaluation...\n");
// evaluate tissue
// Most of the problems in the code happen here.
evaluatepopulation_tissueb(&(t.tgen[j1]), &ra, &bn, &s, j, i);
if (display_dbg) printf("\n");
// display fitness stats to screen
printmaxfitness(&(t.tgen[j1]), i, j, j1, &cwd);
if (display_dbg) printf("start tournament...\n");
// Perform tournament selection and have children ready for evaluation
// Rarely have to touch. Figure out best parents. Crossover operator.
// Create a subgroup. Randomly pick individuals from the population.
// Pick fittest individuals out of the random group.
// 2 parents and 2 children. Children replace parents.
tournamentsel_tissueb(&(t.tgen[j1]), &(t.tgen[j2]), &s);
printf("Tournament selection complete.\n");
// keep track of best fitness during run
if (t.tgen[j1].fit_max > max_fitness)
{
max_fitness = t.tgen[j1].fit_max;
max_fit_gen = j;
}
if ((t.tgen[j1].fit_max > 99.0) && (flagb == 0))
{
flagb = 1;
run_data.fit90[i] = t.tgen[j1].fit_max;
run_data.gen90[i] = j;
}
sa[0] = 0;
strcat(sa, curRun);
sprintf(ss, "%d", i);
strcat(sa, ss);
strcat(sa, ".txt");
printf("Write fitness epc...\n");
// write fitness stats to file
writefitnessepc(sa, &(t), j1, j);
printf("Write fitness complete.\n");
// trunk for saving population to disk
if (sx != 0)
{
sa[0] = 0;
strcat(sa, genTmp);
sprintf(ss, "%d", 1);
strcat(sa, ss);
strcat(sa, ".txt");
if (display_dbg) printf("Saving Current Run\n");
}
//update current generation to file
writecurrent(curGen, j + 1);
if (display_time && j > 0 && (j % 10 == 0 || j % (num_gens - 1) == 0))
{
#ifdef OS_WINDOWS
read_clock(&t3);
sim_time = (t3.QuadPart - t1.QuadPart) / clk_freq.QuadPart;
run_time = (t3.QuadPart - t2.QuadPart) / clk_freq.QuadPart;
#endif
#ifdef OS_UNIX
read_clock(CLOCK_REALTIME, &t3);
sim_time = (double)(t3.tv_sec - t1.tv_sec);
run_time = (double)(t3.tv_sec - t2.tv_sec);
#endif
avg_gen_time = run_time / (j + 1);
est_run_time = avg_gen_time * (num_gens - j);
avg_run_time = est_run_time + run_time;
est_sim_time = (est_run_time * (num_runs - i)) / (i + 1);
printf("\n============= Timing Data =============\n");
printf("Time in Simulation: %.2fs\n", sim_time);
printf("Time in Run: %.2fs\n", run_time);
printf("Est. Time to Complete Run: %.2fs\n", est_run_time);
printf("Est. Time to Complete Simulation: %.2fs\n\n", est_sim_time);
printf("Average Time Per Generation: %.2fs/gen\n", avg_gen_time);
printf("Average Time Per Run: %.2fs/run\n", avg_run_time);
printf("=======================================\n\n");
if (j % (num_gens - 1) == 0) {
}
}
//Display Position Board
//displayboardl(&bn.board[0]);
j1 = j2;
}
}
}
结构
typedef struct boardcollectionn
{
boardtable board[boardnumb];
} boardcollection;
boardcollection bn;
typedef struct tissue_gent
{
tissue_population tgen[2];
} tissue_genx;
typedef struct sim_paramt //struct for storing simulation parameters
{
int penalty;
int addnum[cell_numz];
int x[9];
int y[9];
uint8_t addvec[9];
uint8_t parenta[50];
uint8_t parentb[50];
int errorstatus;
int ones[outputnum][5000];
int zeros[outputnum][5000];
int probcount;
int num;
int numb;
int numc;
int numd;
int nume;
int numf;
int bestnum;
int count;
int col_flag;
int behaviour[outputnum];
int memm[4];
int sel;
int seldecnum;
int seldec[200];
int selx[200];
int sely[200];
int selz[200];
int countb;
float maxfitness;
float oldmaxfitness;
int run_num;
int collision;
} sim_param;
tissue_genx t;
sim_param s;
代码太大而无法进行适当的测试,并且使用全局变量确实无助于找出数据依赖性。不过我只能说几句:
i
声明为 shared
而它是并行循环的索引。这是错误的!如果在 omp for
循环中确实有一个变量你想成为 private
,那么它就是循环索引。我在 C 和 C++ 的 OpenMP 标准中没有发现任何明确的内容,而对于 Fortran,循环索引(以及所有封闭循环的循环索引)是隐式私有化的。尽管如此,英特尔编译器在尝试显式声明 shared
这样的索引时出错:
sharedi.cc(11): warning #2555: static control variable for parallel loop
for ( i=0; i<10; i++ ) {
^
sharedi.cc(10): error: index variable "i" of for statement following an OpenMP for pragma must be private
#pragma omp parallel for shared(i) schedule(static)
^
compilation aborted for sharedi.cc (code 2)
与此同时,gcc 5.1.0 版不会针对同一代码发出任何警告或错误,并且就像变量已被声明一样 private
...我倾向于发现英特尔编译器的行为更合理,但我不能 100% 确定哪个是正确的。然而,我所知道的是,声明 i
shared
绝对是一个非常非常糟糕的主意(甚至是 AFAIC 的错误)。所以我觉得这是一个灰色地带,你的编译器可能会或可能不会做一个明智的工作,这本身就可以解释你的大部分问题。
您似乎将数据输出到文件中,这些文件的名称可能会在线程间发生冲突。小心点,因为你可能会搞得一团糟...
您的打印很可能全乱了。我不知道您对此有何重视,但目前不会像现在这样写。
总而言之,您的代码只是让我纠结,以便清楚地了解正在发生的事情。尝试至少解决我提到的前两点,让它达到“work”可能就足够了。但是,我无法鼓励您清理代码 并摆脱您的全局变量。同样,尽量只在源代码中尽可能晚地声明变量,因为这减少了为 OpenMP 声明它们的需要 private
,并且大大提高了可读性。
祝你调试顺利。
前言
最近,我在我们组的项目代码中实现了OpenMP。两个 for 循环中的 Main 运行s;外部控制 'run',而内部控制 'generation.' 世代完全独立于不同的 运行,尽管依赖于同一 运行.[=29 中的其他世代=]
想法是并行化外层循环,即 'run' 循环,同时让每个线程在分配给它的任何特定 运行 编号上保持世代演化。
问题
当设置 OMP_THREADS = 1
时,即让程序 运行 只有一个线程时,它 运行 很顺利。如果这个数字更高,我会收到以下错误:
Unhandled exception at 0x00F5C4C3 in projectc.exe: 0xC0000005: Access violation writing location 0x00000072.
以下内容出现在 Visual Studio 的 "Autos" 部分:
(注意:t
、t->active_cells
和t->cellx
是"error red",而其余的当我得到这个错误时是白色的)
如果我在外循环正上方的 #pragma
中将 default(none)
更改为 default(shared)
,并删除 t
、s
和 bn
来自 threadprivate
(这些是在外部文件中初始化的结构),然后程序 运行s 通常在冻结之前在每个线程上生成一次(尽管 CPU activity 表明两个线程仍然 运行 与以前相同的强度)。
尝试解决方案
我不知道出了什么问题。在外循环之外尝试一个简单的 #pragma omp parallel for
当然是行不通的,但我也尝试过将所有 main 声明为 #pragma omp parallel
并将外循环声明为 #pragma omp for
。其他一些微妙的方法也像这样尝试过,这让我得出结论,这一定与线程之间共享变量的方式有关......因为所有 运行s,所以线程,是独立的,实际上所有变量 都可以 设置为私有;尽管您在 shared(..)
.
下面附上代码。
main.c
/* General Includes */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <omp.h>
/* Project Includes */
#include "main.h"
#include "randgen.h"
#include "board7.h"
#include "tissue.h"
#include "io.h"
#define BitFlp(arg,posn) ((arg) ^ (1L << (posn)))
#define BitClr(arg,posn) ((arg) & ~(1L << (posn)))
#define display_dbg 1 //Controls whether print statements in main.c are displayed.
#define display_time 1 //Controls whether timing print statements are executed.
#define BILLION 1000000000L;
#define num_runs 10 //Controls number of runs per simulation
#define num_gens 4000//Controls number of generations per run
#define OMP_THREADS 1 // Max number of threads used if OpenMP is enabled
int n, i, r, j, z, x, sxa, y, flagb, m;
int j1, j2;
char a;
int max_fit_gen, collect_data, lb_run, w, rn, sx;
float f, max_fitness;
tissuen *fx;
input_vec dx;
calookup ra;
#pragma omp threadprivate(n, r, j, x, z, sxa, y, flagb, m, \
j1, j2, a, max_fit_gen, collect_data, lb_run, w, \
rn, sx, f, max_fitness, fx, dx, ra, run_data, t, s, bn)
int main(int argc, char *argv[])
{
int* p = 0x00000000; // pointer to NULL
char sa[256];
char ss[10];
long randn;
boardtable ba;
srand((unsigned)time(NULL));
init_mm();
randn = number_range(1, 100);
#ifdef OS_WINDOWS
// Timing parameters
LARGE_INTEGER clk_freq;
LARGE_INTEGER t1, t2, t3;
#endif
#ifdef OS_UNIX
struct timespec clk_freq, t1, t2, t3;
#endif
double avg_gen_time, avg_run_time, run_time, sim_time, est_run_time, est_sim_time;
// File System and IO Parameters
char cwd[FILENAME_MAX];
getcwd(&cwd, sizeof(cwd));
char curState[FILENAME_MAX];
char recState[FILENAME_MAX];
char recMode[FILENAME_MAX];
char curGen[FILENAME_MAX];
char curRun[FILENAME_MAX];
char genTmp[FILENAME_MAX];
strcpy(curState, cwd);
strcpy(recState, cwd);
strcpy(recMode, cwd);
strcpy(curGen, cwd);
strcpy(curRun, cwd);
strcpy(genTmp, cwd);
#ifdef OS_WINDOWS
strcat(curState, "\current.txt");
strcat(recState, "\recover.txt");
strcat(recMode, "\recovermode.txt");
strcat(curGen, "\gen.txt");
strcat(curRun, "\run");
strcat(genTmp, "\tmp\gentmp");
#endif
#ifdef OS_UNIX
strcat(curState, "/current.txt");
strcat(recState, "/recover.txt");
strcat(recMode, "/recovermode.txt");
strcat(curGen, "/gen.txt");
strcat(curRun, "/run");
strcat(genTmp, "/tmp/gentmp");
#endif
//Read current EA run variables (i.e. current run number, generation, recover mode status)
z = readorcreate(curState);
x = readorcreate(recState);
sxa = readorcreate(recMode);
y = readorcreate(curGen);
//Initialize simulation parameters
s.count = 0;
s.x[0] = 0;
s.y[0] = 0;
s.addvec[0] = 0;
s.bestnum = 0;
s.countb = 0;
s.count = 0;
initialize_sim_param(&s, 0, 200);
collect_data = 0;
//Build a collection of experiment initial conditions
buildboardcollection7(&bn);
//Determine clock frequency.
#ifdef OS_WINDOWS
if (display_time) get_frequency(&clk_freq);
#endif
#ifdef OS_UNIX
if (display_time) get_frequency(CLOCK_REALTIME, &clk_freq);
#endif
//Start simulation timer
#ifdef OS_WINDOWS
if (display_time) read_clock(&t1);
#endif
#ifdef OS_UNIX
if (display_time) read_clock(CLOCK_REALTIME, &t1);
#endif
#pragma omp parallel for schedule(static) default(none) num_threads(OMP_THREADS) \
private(sa, ss, randn, ba, t2, t3, avg_gen_time, avg_run_time, sim_time, \
run_time, est_run_time, est_sim_time) \
shared(i, cwd, recMode, curRun, curGen, curState, genTmp, clk_freq, t1)
for (i = z; i < num_runs; i++)
{
// randomly initialize content of tissue population
initialize_tissue_pop_s2(&(t.tgen[0]), &s);
initialize_tissue_pop_s2(&(t.tgen[1]), &s);
max_fit_gen = 0;
max_fitness = 0.0;
flagb = 0;
if ((i == z) && (x == 1))
{
w = y;
}
else
{
w = 0;
}
rn = 200;
j1 = 0;
s.run_num = i;
s.maxfitness = 0.0;
//Start run timer
#ifdef OS_WINDOWS
if (display_time) read_clock(&t2);
#endif
#ifdef OS_UNIX
if (display_time) read_clock(CLOCK_REALTIME, &t2);
#endif
#if defined(_OPENMP)
printf("\n ======================================= \n");
printf(" OpenMP Status Message \n");
printf("\n --------------------------------------- \n");
printf("| RUN %d : \n", i);
printf("| New Thread Process (Thread %d) \n", omp_get_thread_num());
printf("| Available Threads: %d of %d \n", omp_get_num_threads(), omp_get_max_threads());
printf(" ======================================= \n\n");
#endif
for (j = w; j < num_gens; j++)
{
// Flips on lightboard data collection. See board7.h.
if (enable_collection == 1) {
if ((i >= run_collect) && (j >= gen_collect)) { collect_data = 1; }
}
sx = readcurrent(recMode);
// Pseudo loop code. Uses bit flipping to cycle through boards.
j2 = ~(j1)& 1;
if (display_dbg) printf("start evaluation...\n");
// evaluate tissue
// Most of the problems in the code happen here.
evaluatepopulation_tissueb(&(t.tgen[j1]), &ra, &bn, &s, j, i);
if (display_dbg) printf("\n");
// display fitness stats to screen
printmaxfitness(&(t.tgen[j1]), i, j, j1, &cwd);
if (display_dbg) printf("start tournament...\n");
// Perform tournament selection and have children ready for evaluation
// Rarely have to touch. Figure out best parents. Crossover operator.
// Create a subgroup. Randomly pick individuals from the population.
// Pick fittest individuals out of the random group.
// 2 parents and 2 children. Children replace parents.
tournamentsel_tissueb(&(t.tgen[j1]), &(t.tgen[j2]), &s);
printf("Tournament selection complete.\n");
// keep track of best fitness during run
if (t.tgen[j1].fit_max > max_fitness)
{
max_fitness = t.tgen[j1].fit_max;
max_fit_gen = j;
}
if ((t.tgen[j1].fit_max > 99.0) && (flagb == 0))
{
flagb = 1;
run_data.fit90[i] = t.tgen[j1].fit_max;
run_data.gen90[i] = j;
}
sa[0] = 0;
strcat(sa, curRun);
sprintf(ss, "%d", i);
strcat(sa, ss);
strcat(sa, ".txt");
printf("Write fitness epc...\n");
// write fitness stats to file
writefitnessepc(sa, &(t), j1, j);
printf("Write fitness complete.\n");
// trunk for saving population to disk
if (sx != 0)
{
sa[0] = 0;
strcat(sa, genTmp);
sprintf(ss, "%d", 1);
strcat(sa, ss);
strcat(sa, ".txt");
if (display_dbg) printf("Saving Current Run\n");
}
//update current generation to file
writecurrent(curGen, j + 1);
if (display_time && j > 0 && (j % 10 == 0 || j % (num_gens - 1) == 0))
{
#ifdef OS_WINDOWS
read_clock(&t3);
sim_time = (t3.QuadPart - t1.QuadPart) / clk_freq.QuadPart;
run_time = (t3.QuadPart - t2.QuadPart) / clk_freq.QuadPart;
#endif
#ifdef OS_UNIX
read_clock(CLOCK_REALTIME, &t3);
sim_time = (double)(t3.tv_sec - t1.tv_sec);
run_time = (double)(t3.tv_sec - t2.tv_sec);
#endif
avg_gen_time = run_time / (j + 1);
est_run_time = avg_gen_time * (num_gens - j);
avg_run_time = est_run_time + run_time;
est_sim_time = (est_run_time * (num_runs - i)) / (i + 1);
printf("\n============= Timing Data =============\n");
printf("Time in Simulation: %.2fs\n", sim_time);
printf("Time in Run: %.2fs\n", run_time);
printf("Est. Time to Complete Run: %.2fs\n", est_run_time);
printf("Est. Time to Complete Simulation: %.2fs\n\n", est_sim_time);
printf("Average Time Per Generation: %.2fs/gen\n", avg_gen_time);
printf("Average Time Per Run: %.2fs/run\n", avg_run_time);
printf("=======================================\n\n");
if (j % (num_gens - 1) == 0) {
}
}
//Display Position Board
//displayboardl(&bn.board[0]);
j1 = j2;
}
}
}
结构
typedef struct boardcollectionn
{
boardtable board[boardnumb];
} boardcollection;
boardcollection bn;
typedef struct tissue_gent
{
tissue_population tgen[2];
} tissue_genx;
typedef struct sim_paramt //struct for storing simulation parameters
{
int penalty;
int addnum[cell_numz];
int x[9];
int y[9];
uint8_t addvec[9];
uint8_t parenta[50];
uint8_t parentb[50];
int errorstatus;
int ones[outputnum][5000];
int zeros[outputnum][5000];
int probcount;
int num;
int numb;
int numc;
int numd;
int nume;
int numf;
int bestnum;
int count;
int col_flag;
int behaviour[outputnum];
int memm[4];
int sel;
int seldecnum;
int seldec[200];
int selx[200];
int sely[200];
int selz[200];
int countb;
float maxfitness;
float oldmaxfitness;
int run_num;
int collision;
} sim_param;
tissue_genx t;
sim_param s;
代码太大而无法进行适当的测试,并且使用全局变量确实无助于找出数据依赖性。不过我只能说几句:
i
声明为shared
而它是并行循环的索引。这是错误的!如果在omp for
循环中确实有一个变量你想成为private
,那么它就是循环索引。我在 C 和 C++ 的 OpenMP 标准中没有发现任何明确的内容,而对于 Fortran,循环索引(以及所有封闭循环的循环索引)是隐式私有化的。尽管如此,英特尔编译器在尝试显式声明shared
这样的索引时出错:sharedi.cc(11): warning #2555: static control variable for parallel loop for ( i=0; i<10; i++ ) { ^ sharedi.cc(10): error: index variable "i" of for statement following an OpenMP for pragma must be private #pragma omp parallel for shared(i) schedule(static) ^ compilation aborted for sharedi.cc (code 2)
与此同时,gcc 5.1.0 版不会针对同一代码发出任何警告或错误,并且就像变量已被声明一样
private
...我倾向于发现英特尔编译器的行为更合理,但我不能 100% 确定哪个是正确的。然而,我所知道的是,声明i
shared
绝对是一个非常非常糟糕的主意(甚至是 AFAIC 的错误)。所以我觉得这是一个灰色地带,你的编译器可能会或可能不会做一个明智的工作,这本身就可以解释你的大部分问题。您似乎将数据输出到文件中,这些文件的名称可能会在线程间发生冲突。小心点,因为你可能会搞得一团糟...
您的打印很可能全乱了。我不知道您对此有何重视,但目前不会像现在这样写。
总而言之,您的代码只是让我纠结,以便清楚地了解正在发生的事情。尝试至少解决我提到的前两点,让它达到“work”可能就足够了。但是,我无法鼓励您清理代码 并摆脱您的全局变量。同样,尽量只在源代码中尽可能晚地声明变量,因为这减少了为 OpenMP 声明它们的需要 private
,并且大大提高了可读性。
祝你调试顺利。