如何将 OpenMP 与 PARI/GP 一起使用?

How to use OpenMP with PARI/GP?

我想并行化一个 for 循环,其中包括来自 PARI/GP 库的一些操作,以及一些其他操作。但是,将 OpenMP 与 PARI/GP 一起使用似乎并不是很简单。我发现的唯一类似示例是 this,实际上它似乎使用了 OpenMP 中的 sections。我尝试使用 parallel for 子句做一些事情,如下所示,但是,它在 PARI/GP 上抛出分段错误。

#include <omp.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <time.h>
#include "pari/pari.h"

#define MAX_THREAD      8
#define MAX_COUNT       8
#define CLOCK_PRECISION 1E9

typedef struct vector {
  void **items;
  unsigned int capacity;
  unsigned int size;
} vector_st;

typedef vector_st *vector_t;

vector_t vector_init(unsigned int capacity) {
  if (capacity <= 0) {
    fprintf(stderr, "Error: invalid vector capacity.\n");
    exit(1);
  }

  vector_t v = (vector_t) calloc(1, sizeof(vector_st));
  v->capacity = capacity;
  v->size = 0;
  v->items = malloc(sizeof(void *) * v->capacity);
  if (v->items == NULL) {
    fprintf(stderr, "Error: could not allocate memory.\n");
    exit(1);
  }

  return v;
}

unsigned int vector_size(vector_t v) {
  return v->size;
}


void *vector_get(vector_t v, unsigned int index) {
  if (index >= v->size) {
    fprintf(stderr, "Error: index larger than the vector size.\n");
    exit(1);
  }
  return v->items[index];
}

void vector_resize(vector_t v, unsigned int capacity) {
#ifdef DEBUG
  printf("Vector resize, from %u to %u.\n", v->capacity, capacity);
#endif

  void **items = realloc(v->items, sizeof(void *) * capacity);
  if (items) {
    v->items = items;
    v->capacity = capacity;
  } else {
    fprintf(stderr, "Error: could not re-allocate memory.\n");
    exit(1);
  }
}

void vector_add(vector_t v, void *item) {
  if (v->capacity == v->size) {
    vector_resize(v, v->capacity * 2);
  }
  v->items[v->size++] = item;
}

void vector_copy(vector_t c, vector_t a) {
  unsigned int i;
  for (i = 0; i < vector_size(a); i++) {
    vector_add(c, vector_get(a, i));
  }
}

void vector_free(vector_t v) {
  if (v) {
    if (v->items) {
      free(v->items);
    }

    v->capacity = 0;
    v->size = 0;

    free(v);
    v = NULL;
  }
}

typedef struct {
  GEN sk;
  GEN pk;
} cl_key_pair_st;

typedef cl_key_pair_st *cl_key_pair_t;

#define cl_key_pair_null(key_pair) key_pair = NULL;

#define cl_key_pair_new(key_pair)                     \
  do {                                                \
    key_pair = malloc(sizeof(cl_key_pair_st));        \
    if (key_pair == NULL) {                           \
      exit(1);                                        \
    }                                                 \
  } while (0)

#define cl_key_pair_free(key_pair)                    \
  do {                                                \
    free(key_pair);                                   \
    key_pair = NULL;                                  \
  } while (0)


uint64_t mtimer(void) {
    struct timespec time;
    clock_gettime(CLOCK_MONOTONIC, &time);
    return (uint64_t) (time.tv_sec * CLOCK_PRECISION + time.tv_nsec);
}

void test(vector_t keys, const unsigned count) {
  GEN bound = strtoi("25413151665722220203610173826311975594790577398151861612310606875883990655261658217495681782816066858410439979225400605895077952191850577877370585295070770312182177789916520342292660169492395314400288273917787194656036294620169343699612953311314935485124063580486497538161801803224580096");
  GEN g_q_a = strtoi("4008431686288539256019978212352910132512184203702279780629385896624473051840259706993970111658701503889384191610389161437594619493081376284617693948914940268917628321033421857293703008209538182518138447355678944124861126384966287069011522892641935034510731734298233539616955610665280660839844718152071538201031396242932605390717004106131705164194877377");
  GEN g_q_b = negi(strtoi("3117991088204303366418764671444893060060110057237597977724832444027781815030207752301780903747954421114626007829980376204206959818582486516608623149988315386149565855935873517607629155593328578131723080853521348613293428202727746191856239174267496577422490575311784334114151776741040697808029563449966072264511544769861326483835581088191752567148165409"));
  GEN g_q_c = strtoi("7226982982667784284607340011220616424554394853592495056851825214613723615410492468400146084481943091452495677425649405002137153382700126963171182913281089395393193450415031434185562111748472716618186256410737780813669746598943110785615647848722934493732187571819575328802273312361412673162473673367423560300753412593868713829574117975260110889575205719");
  GEN g_q = qfi(g_q_a, g_q_b, g_q_c);

  size_t numthread = MAX_THREAD;
  if (numthread > count) {
    numthread = count;
    omp_set_num_threads(numthread);
  }

  struct pari_thread pth[numthread];
  for (size_t i = 0; i < numthread; i++) {
    pari_thread_alloc(&pth[i], 100000000, NULL);
  }

  #pragma omp parallel
  {
    int thnum = omp_get_thread_num();
    if (thnum) {
      (void) pari_thread_start(&pth[thnum]);
    }

    vector_t keys_private = vector_init(count / numthread);

    #pragma omp for schedule(static)
    for (size_t i = 0; i < count; i++) {
      cl_key_pair_t key_pair_i;
      cl_key_pair_null(key_pair_i);
      cl_key_pair_new(key_pair_i);

      key_pair_i->sk = randomi(bound);
      key_pair_i->pk = nupow(g_q, key_pair_i->sk, NULL);
      vector_add(keys_private, key_pair_i);
    }

    #pragma omp for ordered
    for (size_t i = 0; i < numthread; i++) {
      #pragma omp ordered
      {
        vector_copy(keys, keys_private);
      }
    }

    if (thnum) {
      pari_thread_close();
    }
  }

  for (size_t i = 0; i < numthread; i++) {
    if (&pth[i] != NULL) pari_thread_free(&pth[i]);
  }
}

int main(void) {
  pari_init(1000000000, 2);
    setrand(getwalltime());

  vector_t keys = vector_init(MAX_COUNT);

  uint64_t start_time = mtimer();
  test(keys, MAX_COUNT);
  uint64_t stop_time = mtimer();
  double total_time = ((stop_time - start_time) / CLOCK_PRECISION);
  printf("Elapsed time: %.5lf s\n", total_time);

  printf("\nvector_size(keys): %u\n", vector_size(keys));
  for (size_t i = 0; i < vector_size(keys); i++) {
    cl_key_pair_t key_pair_i = (cl_key_pair_t) vector_get(keys, i);
    printf("%zu->sk: %s\n", i, GENtostr(key_pair_i->sk));
    printf("%zu->pk: %s\n", i, GENtostr(key_pair_i->pk));
  }

  vector_free(keys);
  pari_close();
  return 0;
}

关于如何在 structOpenMP 中使用 GEN 有什么想法吗?

修复涉及删除以下行:

for (size_t i = 0; i < numthread; i++) {
  if (&pth[i] != NULL) pari_thread_free(&pth[i]);
}

我在 Bill Allombert 的 PARI/GP 邮件列表中收到了此修复程序,他还为此提供了以下解释:

Each threads use its own PARI stack to store results. Calling pari_thread_free destroys the PARI stack thread and any result computed by that thread. So whatever result you want to preserve need to be moved to the main stack using gcopy before calling pari_thread_free.

In fact this is why the API has pari_thread_alloc: to allow the thread stacks to survive pari_thread_close so that the main thread get the results.