OpenMP:访问冲突和其他错误

OpenMP: Access violation and other errors

前言

最近,我在我们组的项目代码中实现了OpenMP。两个 for 循环中的 Main 运行s;外部控制 'run',而内部控制 'generation.' 世代完全独立于不同的 运行,尽管依赖于同一 运行.[=29 中的其他世代=]

想法是并行化外层循环,即 'run' 循环,同时让每个线程在分配给它的任何特定 运行 编号上保持世代演化。

问题

当设置 OMP_THREADS = 1 时,即让程序 运行 只有一个线程时,它 运行 很顺利。如果这个数字更高,我会收到以下错误:

Unhandled exception at 0x00F5C4C3 in projectc.exe: 0xC0000005: Access violation writing location 0x00000072.

以下内容出现在 Visual Studio 的 "Autos" 部分:

(注意:tt->active_cellst->cellx是"error red",而其余的当我得到这个错误时是白色的)

如果我在外循环正上方的 #pragma 中将 default(none) 更改为 default(shared),并删除 tsbn 来自 threadprivate (这些是在外部文件中初始化的结构),然后程序 运行s 通常在冻结之前在每个线程上生成一次(尽管 CPU activity 表明两个线程仍然 运行 与以前相同的强度)。

尝试解决方案

我不知道出了什么问题。在外循环之外尝试一个简单的 #pragma omp parallel for 当然是行不通的,但我也尝试过将所有 main 声明为 #pragma omp parallel 并将外循环声明为 #pragma omp for。其他一些微妙的方法也像这样尝试过,这让我得出结论,这一定与线程之间共享变量的方式有关......因为所有 运行s,所以线程,是独立的,实际上所有变量 都可以 设置为私有;尽管您在 shared(..).

中看到了一些重叠

下面附上代码。

main.c

/* General Includes */
#include <stdio.h> 
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <omp.h>

/* Project Includes */
#include "main.h"
#include "randgen.h"
#include "board7.h"
#include "tissue.h"
#include "io.h"

#define BitFlp(arg,posn) ((arg) ^ (1L << (posn)))
#define BitClr(arg,posn) ((arg) & ~(1L << (posn)))

#define display_dbg 1 //Controls whether print statements in main.c are displayed.
#define display_time 1 //Controls whether timing print statements are executed.
#define BILLION 1000000000L;

#define num_runs 10 //Controls number of runs per simulation
#define num_gens 4000//Controls number of generations per run

#define OMP_THREADS 1 // Max number of threads used if OpenMP is enabled

int n, i, r, j, z, x, sxa, y, flagb, m;
int j1, j2;
char a;
int max_fit_gen, collect_data, lb_run, w, rn, sx;
float f, max_fitness;
tissuen *fx;
input_vec dx;
calookup ra;

#pragma omp threadprivate(n, r, j, x, z, sxa, y, flagb, m, \
        j1, j2, a, max_fit_gen, collect_data, lb_run, w, \
    rn, sx, f, max_fitness, fx, dx, ra, run_data, t, s, bn)

int main(int argc, char *argv[])
{

    int* p = 0x00000000;   // pointer to NULL
        char sa[256];
    char ss[10];
    long randn;
    boardtable ba;
    srand((unsigned)time(NULL));
    init_mm();
    randn = number_range(1, 100);

    #ifdef OS_WINDOWS
    // Timing parameters
        LARGE_INTEGER clk_freq;
    LARGE_INTEGER t1, t2, t3;
    #endif

    #ifdef OS_UNIX
    struct timespec clk_freq, t1, t2, t3;
    #endif

    double avg_gen_time, avg_run_time, run_time, sim_time, est_run_time, est_sim_time;

    // File System and IO Parameters
    char cwd[FILENAME_MAX];
    getcwd(&cwd, sizeof(cwd));
    char curState[FILENAME_MAX];
    char recState[FILENAME_MAX];
    char recMode[FILENAME_MAX];
    char curGen[FILENAME_MAX];
    char curRun[FILENAME_MAX];
    char genTmp[FILENAME_MAX];

    strcpy(curState, cwd);
    strcpy(recState, cwd);
    strcpy(recMode, cwd);
    strcpy(curGen, cwd);
    strcpy(curRun, cwd);
    strcpy(genTmp, cwd);

    #ifdef OS_WINDOWS
    strcat(curState, "\current.txt");
    strcat(recState, "\recover.txt");
    strcat(recMode, "\recovermode.txt");
    strcat(curGen, "\gen.txt");
    strcat(curRun, "\run");
    strcat(genTmp, "\tmp\gentmp");
    #endif

    #ifdef OS_UNIX
    strcat(curState, "/current.txt");
    strcat(recState, "/recover.txt");
    strcat(recMode, "/recovermode.txt");
    strcat(curGen, "/gen.txt");
    strcat(curRun, "/run");
    strcat(genTmp, "/tmp/gentmp");
    #endif

    //Read current EA run variables (i.e. current run number, generation, recover mode status)
    z = readorcreate(curState);
    x = readorcreate(recState);
    sxa = readorcreate(recMode);
    y = readorcreate(curGen);

    //Initialize simulation parameters
    s.count = 0;

    s.x[0] = 0;
    s.y[0] = 0;

    s.addvec[0] = 0;

    s.bestnum = 0;
    s.countb = 0;
    s.count = 0;
    initialize_sim_param(&s, 0, 200);

    collect_data = 0;

    //Build a collection of experiment initial conditions
    buildboardcollection7(&bn);

    //Determine clock frequency.
    #ifdef OS_WINDOWS
    if (display_time)   get_frequency(&clk_freq);
    #endif

    #ifdef OS_UNIX
    if (display_time)   get_frequency(CLOCK_REALTIME, &clk_freq);
    #endif


//Start simulation timer
    #ifdef OS_WINDOWS
    if (display_time)   read_clock(&t1);
    #endif

    #ifdef OS_UNIX
    if (display_time)   read_clock(CLOCK_REALTIME, &t1);
    #endif

#pragma omp parallel for schedule(static) default(none) num_threads(OMP_THREADS) \
            private(sa, ss, randn, ba, t2, t3, avg_gen_time, avg_run_time, sim_time, \
        run_time, est_run_time, est_sim_time) \
            shared(i, cwd, recMode, curRun, curGen, curState, genTmp, clk_freq, t1)
for (i = z; i < num_runs; i++)
{


    // randomly initialize content of  tissue population
    initialize_tissue_pop_s2(&(t.tgen[0]), &s);
    initialize_tissue_pop_s2(&(t.tgen[1]), &s);

    max_fit_gen = 0;
    max_fitness = 0.0;
    flagb = 0;

    if ((i == z) && (x == 1))
    {
        w = y;
    }
    else
    {
        w = 0;
    }

    rn = 200;
    j1 = 0;

    s.run_num = i;
    s.maxfitness = 0.0;

    //Start run timer
        #ifdef OS_WINDOWS
    if (display_time)   read_clock(&t2);
        #endif

        #ifdef OS_UNIX
    if (display_time)   read_clock(CLOCK_REALTIME, &t2);
        #endif

        #if defined(_OPENMP)
    printf("\n ======================================= \n");
    printf("  OpenMP Status Message \n");
    printf("\n --------------------------------------- \n");
    printf("| RUN %d : \n", i);
    printf("|   New Thread Process (Thread %d) \n", omp_get_thread_num());
    printf("|   Available Threads: %d of %d \n", omp_get_num_threads(), omp_get_max_threads());
    printf(" ======================================= \n\n");
        #endif

    for (j = w; j < num_gens; j++)
    {

        // Flips on lightboard data collection. See board7.h.
        if (enable_collection == 1) {
            if ((i >= run_collect) && (j >= gen_collect)) { collect_data = 1; }
        }

        sx = readcurrent(recMode);

        // Pseudo loop code. Uses bit flipping to cycle through boards.
        j2 = ~(j1)& 1;
        if (display_dbg)    printf("start evaluation...\n");

        // evaluate tissue
        // Most of the problems in the code happen here.
        evaluatepopulation_tissueb(&(t.tgen[j1]), &ra, &bn, &s, j, i);
        if (display_dbg)    printf("\n");

        // display fitness stats to screen
        printmaxfitness(&(t.tgen[j1]), i, j, j1, &cwd);

        if (display_dbg)    printf("start tournament...\n");

        // Perform tournament selection and have children ready for evaluation
        // Rarely have to touch. Figure out best parents. Crossover operator.
        // Create a subgroup. Randomly pick individuals from the population.
        // Pick fittest individuals out of the random group.
        // 2 parents and 2 children. Children replace parents.
        tournamentsel_tissueb(&(t.tgen[j1]), &(t.tgen[j2]), &s);

        printf("Tournament selection complete.\n");

        // keep track of best fitness during run
        if (t.tgen[j1].fit_max > max_fitness)
        {
            max_fitness = t.tgen[j1].fit_max;
            max_fit_gen = j;
        }

        if ((t.tgen[j1].fit_max > 99.0) && (flagb == 0))
        {
            flagb = 1;
            run_data.fit90[i] = t.tgen[j1].fit_max;
            run_data.gen90[i] = j;
        }

        sa[0] = 0;
        strcat(sa, curRun);
        sprintf(ss, "%d", i);
        strcat(sa, ss);
        strcat(sa, ".txt");

        printf("Write fitness epc...\n");

        // write fitness stats to file
        writefitnessepc(sa, &(t), j1, j);

        printf("Write fitness complete.\n");

        // trunk for saving population to disk
        if (sx != 0)
        {
            sa[0] = 0;
            strcat(sa, genTmp);
            sprintf(ss, "%d", 1);
            strcat(sa, ss);
            strcat(sa, ".txt");

            if (display_dbg)    printf("Saving Current Run\n");
        }

        //update current generation to file
        writecurrent(curGen, j + 1);

        if (display_time && j > 0 && (j % 10 == 0 || j % (num_gens - 1) == 0))
        {
            #ifdef OS_WINDOWS
            read_clock(&t3);
            sim_time = (t3.QuadPart - t1.QuadPart) / clk_freq.QuadPart;
            run_time = (t3.QuadPart - t2.QuadPart) / clk_freq.QuadPart;
            #endif

            #ifdef OS_UNIX
            read_clock(CLOCK_REALTIME, &t3);
            sim_time = (double)(t3.tv_sec - t1.tv_sec);
            run_time = (double)(t3.tv_sec - t2.tv_sec);
            #endif

            avg_gen_time = run_time / (j + 1);
            est_run_time = avg_gen_time * (num_gens - j);
            avg_run_time = est_run_time + run_time;
            est_sim_time = (est_run_time * (num_runs - i)) / (i + 1);
            printf("\n============= Timing Data =============\n");
            printf("Time in Simulation: %.2fs\n", sim_time);
            printf("Time in Run: %.2fs\n", run_time);
            printf("Est. Time to Complete Run: %.2fs\n", est_run_time);
            printf("Est. Time to Complete Simulation: %.2fs\n\n", est_sim_time);
            printf("Average Time Per Generation: %.2fs/gen\n", avg_gen_time);
            printf("Average Time Per Run: %.2fs/run\n", avg_run_time);
            printf("=======================================\n\n");

            if (j % (num_gens - 1) == 0) {

            }
        }

            //Display Position Board
            //displayboardl(&bn.board[0]);

            j1 = j2;
        }
    }
}

结构

typedef struct boardcollectionn
{
    boardtable board[boardnumb];

} boardcollection;

boardcollection bn;

typedef struct tissue_gent
{
    tissue_population tgen[2]; 

} tissue_genx;

typedef struct sim_paramt   //struct for storing simulation parameters
{
int penalty;
int addnum[cell_numz];
int x[9];
int y[9];
uint8_t addvec[9];
uint8_t parenta[50];
uint8_t parentb[50];
int errorstatus;
int ones[outputnum][5000];
int zeros[outputnum][5000];
int probcount;
int num;
int numb;
int numc;
int numd;
int nume;
int numf;
int bestnum;
int count;
int col_flag;
int behaviour[outputnum];
int memm[4];
int sel;
int seldecnum;
int seldec[200];
int selx[200];
int sely[200];
int selz[200];
int countb;
float maxfitness;
float oldmaxfitness;
int run_num;
int collision;

} sim_param;

tissue_genx t;
sim_param s;

代码太大而无法进行适当的测试,并且使用全局变量确实无助于找出数据依赖性。不过我只能说几句:

  • i 声明为 shared 而它是并行循环的索引。这是错误的!如果在 omp for 循环中确实有一个变量你想成为 private,那么它就是循环索引。我在 C 和 C++ 的 OpenMP 标准中没有发现任何明确的内容,而对于 Fortran,循环索引(以及所有封闭循环的循环索引)是隐式私有化的。尽管如此,英特尔编译器在尝试显式声明 shared 这样的索引时出错:

    sharedi.cc(11): warning #2555: static control variable for parallel loop
          for ( i=0; i<10; i++ ) {
                           ^
    sharedi.cc(10): error: index variable "i" of for statement following an OpenMP for pragma must be private
          #pragma omp parallel for shared(i) schedule(static)
          ^
    compilation aborted for sharedi.cc (code 2)
    

    与此同时,gcc 5.1.0 版不会针对同一代码发出任何警告或错误,并且就像变量已被声明一样 private...我倾向于发现英特尔编译器的行为更合理,但我不能 100% 确定哪个是正确的。然而,我所知道的是,声明 i shared 绝对是一个非常非常糟糕的主意(甚至是 AFAIC 的错误)。所以我觉得这是一个灰色地带,你的编译器可能会或可能不会做一个明智的工作,这本身就可以解释你的大部分问题。

  • 您似乎将数据输出到文件中,这些文件的名称可能会在线程间发生冲突。小心点,因为你可能会搞得一团糟...

  • 您的打印很可能全乱了。我不知道您对此有何重视,但目前不会像现在这样写。

总而言之,您的代码只是让我纠结,以便清楚地了解正在发生的事情。尝试至少解决我提到的前两点,让它达到“work”可能就足够了。但是,我无法鼓励您清理代码摆脱您的全局变量。同样,尽量只在源代码中尽可能晚地声明变量,因为这减少了为 OpenMP 声明它们的需要 private,并且大大提高了可读性。

祝你调试顺利。