为什么添加临界区会导致段错误?
Why does adding a critical section cause a segmentation fault?
我正在尝试并行使用以下快速排序算法:
#include <stdio.h>
#include <stdlib.h>
#include<omp.h>
#define MAX_UNFINISHED 1000 /* Maximum number of unsorted sub-arrays */
struct {
int first; /* Low index of unsorted sub-array */
int last; /* High index of unsorted sub-array */
} unfinished[MAX_UNFINISHED]; /* Stack */
int unfinished_index; /* Index of top of stack */
float *A; /* Array of elements to be sorted */
int n; /* Number of elements in A */
void swap (float *x, float *y)
{
float tmp;
tmp = *x;
*x = *y;
*y = tmp;
}
int partition (int first, int last)
{
int i, j;
float x;
x = A[last];
i = first - 1;
for (j = first; j < last; j++)
if (A[j] <= x) {
i++;
swap (&A[i], &A[j]);
}
swap (&A[i+1], &A[last]);
return (i+1);
}
void quicksort (void)
{
int first;
int last;
int my_index;
int q;
while (unfinished_index >= 0) {
#pragma omp critical
{
my_index = unfinished_index;
unfinished_index--;
first = unfinished[my_index].first;
last = unfinished[my_index].last;
}
while (first < last) {
q = partition (first, last);
if ((unfinished_index+1) >= MAX_UNFINISHED) {
printf ("Stack overflow\n");
exit (-1);
}
#pragma omp critical
{
unfinished_index++;
unfinished[unfinished_index].first = q+1;
unfinished[unfinished_index].last = last;
last = q-1;
}
}
}
}
int verify_sorted (float *A, int n)
{
int i;
for (i = 0; i < n-1; i++)
if (A[i] > A[i+1])
return 0;
return 1;
}
int main (int argc, char *argv[])
{
int i;
int seed; /* Seed component input by user */
unsigned short xi[3]; /* Random number seed */
if (argc != 3) {
printf ("Command-line syntax: %s <n> <seed>\n", argv[0]);
exit (-1);
}
seed = atoi (argv[2]);
xi[0] = xi[1] = xi[2] = seed;
n = atoi (argv[1]);
A = (float *) malloc (n * sizeof(float));
for (i = 0; i < n; i++)
A[i] = erand48(xi);
unfinished[0].first = 0;
unfinished[0].last = n-1;
unfinished_index = 0; //
#pragma omp parallel
quicksort();
if (verify_sorted (A, n)) printf ("Elements are sorted\n");
else printf ("ERROR: Elements are NOT sorted\n");
return 0;
}
在 quicksort() 函数中添加临界区导致段错误 11,这是为什么?根据我的基本理解,当系统试图访问它无法访问或不存在的内存时会发生这样的错误,我看不出会在哪里发生。在整个 while()
循环上放置一个关键部分可以修复它,但它会很慢。
您的 QuickSort 并行化策略看起来过于复杂并且容易出现 竞争条件 ,并行化该算法的典型方法是使用 OpenMP 任务。你可以看看下面的 SO Threads
1.QuickSort;
2.MergeSort.
Adding the critical sections in the quicksort() function causes a
segmentation fault 11, why is that?
您的代码有几个问题;
- 在
while (unfinished_index >= 0)
中读取 unfinished_index
和其他线程更新该变量之间存在 竞争条件 ;
- 发生
segmentation fault 11
是因为线程可以访问数组边界外的位置 unfinished
,包括负数位置。
即使有临界区,多个线程也可以执行:
unfinished_index--;
最终导致 unfinished_index < 0
,因此:
my_index = unfinished_index;
unfinished_index--;
first = unfinished[my_index].first; <-- problem
last = unfinished[my_index].last; <-- problem
正在访问 unfinished
数组的负数位置。这同样适用于上限。我通过此检查的所有线程:
if ((unfinished_index+1) >= MAX_UNFINISHED) {
printf ("Stack overflow\n");
exit (-1);
}
然后简单地
#pragma omp critical
{
unfinished_index++;
unfinished[unfinished_index].first = q+1;
unfinished[unfinished_index].last = last;
last = q-1;
}
增加 unfinished_index
以至于它可以访问数组边界之外的位置。
要解决这些问题,您可以执行以下操作:
void quicksort (void)
{
int first;
int last;
int my_index;
int q;
int keep_working = 1;
while (keep_working) {
#pragma omp critical
{
my_index = unfinished_index;
if(my_index >= 0 && my_index < MAX_UNFINISHED)
unfinished_index--;
else
keep_working = 0;
}
if(keep_working){
first = unfinished[my_index].first;
last = unfinished[my_index].last;
while (first < last && keep_working)
{
q = partition (first, last);
#pragma omp critical
{
unfinished_index++;
my_index = unfinished_index;
}
if (my_index < MAX_UNFINISHED){
unfinished[my_index].first = q+1;
unfinished[my_index].last = last;
last = q-1;
}
else
keep_working = 0;
}
}
}
}
但是请记住,以下代码适用于 2 个线程。除此之外,您有时可能会得到“数组未排序”。我会把它留给你来解决。但是,我建议您改用 Task
方法,因为它更快、更简单,并且更新更现代的 OpenMP 构造函数。
我正在尝试并行使用以下快速排序算法:
#include <stdio.h>
#include <stdlib.h>
#include<omp.h>
#define MAX_UNFINISHED 1000 /* Maximum number of unsorted sub-arrays */
struct {
int first; /* Low index of unsorted sub-array */
int last; /* High index of unsorted sub-array */
} unfinished[MAX_UNFINISHED]; /* Stack */
int unfinished_index; /* Index of top of stack */
float *A; /* Array of elements to be sorted */
int n; /* Number of elements in A */
void swap (float *x, float *y)
{
float tmp;
tmp = *x;
*x = *y;
*y = tmp;
}
int partition (int first, int last)
{
int i, j;
float x;
x = A[last];
i = first - 1;
for (j = first; j < last; j++)
if (A[j] <= x) {
i++;
swap (&A[i], &A[j]);
}
swap (&A[i+1], &A[last]);
return (i+1);
}
void quicksort (void)
{
int first;
int last;
int my_index;
int q;
while (unfinished_index >= 0) {
#pragma omp critical
{
my_index = unfinished_index;
unfinished_index--;
first = unfinished[my_index].first;
last = unfinished[my_index].last;
}
while (first < last) {
q = partition (first, last);
if ((unfinished_index+1) >= MAX_UNFINISHED) {
printf ("Stack overflow\n");
exit (-1);
}
#pragma omp critical
{
unfinished_index++;
unfinished[unfinished_index].first = q+1;
unfinished[unfinished_index].last = last;
last = q-1;
}
}
}
}
int verify_sorted (float *A, int n)
{
int i;
for (i = 0; i < n-1; i++)
if (A[i] > A[i+1])
return 0;
return 1;
}
int main (int argc, char *argv[])
{
int i;
int seed; /* Seed component input by user */
unsigned short xi[3]; /* Random number seed */
if (argc != 3) {
printf ("Command-line syntax: %s <n> <seed>\n", argv[0]);
exit (-1);
}
seed = atoi (argv[2]);
xi[0] = xi[1] = xi[2] = seed;
n = atoi (argv[1]);
A = (float *) malloc (n * sizeof(float));
for (i = 0; i < n; i++)
A[i] = erand48(xi);
unfinished[0].first = 0;
unfinished[0].last = n-1;
unfinished_index = 0; //
#pragma omp parallel
quicksort();
if (verify_sorted (A, n)) printf ("Elements are sorted\n");
else printf ("ERROR: Elements are NOT sorted\n");
return 0;
}
在 quicksort() 函数中添加临界区导致段错误 11,这是为什么?根据我的基本理解,当系统试图访问它无法访问或不存在的内存时会发生这样的错误,我看不出会在哪里发生。在整个 while()
循环上放置一个关键部分可以修复它,但它会很慢。
您的 QuickSort 并行化策略看起来过于复杂并且容易出现 竞争条件 ,并行化该算法的典型方法是使用 OpenMP 任务。你可以看看下面的 SO Threads
1.QuickSort;
2.MergeSort.
Adding the critical sections in the quicksort() function causes a segmentation fault 11, why is that?
您的代码有几个问题;
- 在
while (unfinished_index >= 0)
中读取unfinished_index
和其他线程更新该变量之间存在 竞争条件 ; - 发生
segmentation fault 11
是因为线程可以访问数组边界外的位置unfinished
,包括负数位置。
即使有临界区,多个线程也可以执行:
unfinished_index--;
最终导致 unfinished_index < 0
,因此:
my_index = unfinished_index;
unfinished_index--;
first = unfinished[my_index].first; <-- problem
last = unfinished[my_index].last; <-- problem
正在访问 unfinished
数组的负数位置。这同样适用于上限。我通过此检查的所有线程:
if ((unfinished_index+1) >= MAX_UNFINISHED) {
printf ("Stack overflow\n");
exit (-1);
}
然后简单地
#pragma omp critical
{
unfinished_index++;
unfinished[unfinished_index].first = q+1;
unfinished[unfinished_index].last = last;
last = q-1;
}
增加 unfinished_index
以至于它可以访问数组边界之外的位置。
要解决这些问题,您可以执行以下操作:
void quicksort (void)
{
int first;
int last;
int my_index;
int q;
int keep_working = 1;
while (keep_working) {
#pragma omp critical
{
my_index = unfinished_index;
if(my_index >= 0 && my_index < MAX_UNFINISHED)
unfinished_index--;
else
keep_working = 0;
}
if(keep_working){
first = unfinished[my_index].first;
last = unfinished[my_index].last;
while (first < last && keep_working)
{
q = partition (first, last);
#pragma omp critical
{
unfinished_index++;
my_index = unfinished_index;
}
if (my_index < MAX_UNFINISHED){
unfinished[my_index].first = q+1;
unfinished[my_index].last = last;
last = q-1;
}
else
keep_working = 0;
}
}
}
}
但是请记住,以下代码适用于 2 个线程。除此之外,您有时可能会得到“数组未排序”。我会把它留给你来解决。但是,我建议您改用 Task
方法,因为它更快、更简单,并且更新更现代的 OpenMP 构造函数。