C 中的通用快速排序

Generic Quicksort in C

任何人都可以告诉我在这个伪代码之后的通用快速排序代码中我做错了什么 Quicksort & Partition, the algorithm works, because I have already done it with integers only without the compare function by passing an int array to the quicksort and partition functions, but I have tried to make it work for both int and strings. In this code I have tested only the int values, but the code doesn't work, the output is the initial value of the array, it's the same exact thing for the strings I get the same initial array as an output. I have commented the string part because they get sorted the same way as the integers. This is the integer code that works Integer working code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
 
//prototipi delle funzioni
typedef int (*compare_function)(const void *, const void *); 
 
void generic_quicksort(void *v, int i, int f, size_t size, compare_function compare);
void generic_swap(void *a, void *b, size_t size);
int generic_partition(void *v, int i, int f, size_t size, compare_function compare);
 
void print_int_array(const int *array, size_t len) { 
    size_t i;
 
    for (i = 0; i < len; i++) 
        printf("%d | ", array[i]);
 
    putchar('\n');
}

//funzione di confronto
int compare_int(const void *, const void *); 
 
int compare_str(const void *a, const void *b) { 
    const char **ia = (const char **)a;
    const char **ib = (const char **)b;
    return strcmp(*ia, *ib);
    /* strcmp functions works exactly as expected from
    comparison function */ 
} 
 
void print_cstring_array(char **array, size_t len) { 
    size_t i;
 
    for (i = 0; i < len; i++) 
        printf("%s | ", array[i]);
 
    putchar('\n');
} 
 
int main() {
    int v[] = { 5, 4, 3, 2, 1 };
    char *strings[] = { "Zorro", "Alex", "Celine", "Bill", "Forest", "Dexter" };

    int n = sizeof(v) / sizeof(int);
 
    print_int_array(v, n);
 
    generic_quicksort((void *)v, 0, n - 1, sizeof(int), compare_int);
    
    print_int_array(v, n);

    /*

    int s = sizeof(strings) / sizeof(*char);

    print_cstring_array(strings, s);

    generic_quicksort((void *)strings, 0, s - 1, sizeof(*char), compare_str);

    print_cstring_array(strings, s);
*/
    return 0;
} 
 
int compare_int(const void *a, const void *b) {
    return *((int*)a) - *((int*)b);
} 
 
void generic_quicksort(void *v, int i, int f, size_t size, int (*comp)(const void *, const void *)) {
    if (i >= f)
        return;
 
    int p = generic_partition(v, i, f, size, comp);
 
    generic_quicksort(v, i, p - 1, size, comp);
    generic_quicksort(v, p + 1, f, size, comp);
}
 
void generic_swap(void *a, void *b, size_t size) {
    void *tmp = malloc(size);
 
    memcpy(tmp, a, size);
    memcpy(a, b, size);
    memcpy(b, tmp, size);
 
    free(tmp);
}
 
int generic_partition(void *v, int i, int f, size_t size, int (*comp)(const void *, const void *)) {
    
    void *x = malloc(size);
    int k, j;

    memcpy(x, v + (i * size), size);

    k = i - 1;

    for (j = i; j <= f - 1; j++) {
        if (comp(v + (j * size), x) <= 0) {
            k++;
            generic_swap(v + (k * size), v + (j * size), size);
        }
    }
    
    generic_swap(v + ((k + 1) * size), v + (f * size), size);

    free(x);
    
    return (k + 1);
}

代码中存在多个问题:

  • int n = sizeof(v) / sizeof(int); 是有风险的:关于 v 的类型有一个沉默的假设。你应该写 int n = sizeof(v) / sizeof(*v);

  • 传递切片的第一个和最后一个元素的索引的约定在 C 中令人困惑且不符合习惯,您应该传递第一个元素的索引和后面元素的索引最后一个。这允许无符号索引类型和空数组。

  • v + (j * size) 使用 void 指针算法,这是一种并非在所有系统上都可用的扩展。为此使用 unsigned char 指针。

  • 整数的比较函数对于大的绝对值有未定义的行为,因为减去它们可能会导致算术溢出。你应该改用这个:

    int compare_int(const void *a, const void *b) {
        int ia = *(const int *)a;
        int ib = *(const int *)b;
        return (ia > ib) - (ia < ib);
    }
    
  • generic_swap 使用 mallocmemcpy。这会导致小元素的开销很大,你应该使用一个简单的循环:

    void generic_swap(void *a, void *b, size_t size) {
        unsigned char *pa = (unsigned char *)a;
        unsigned char *pb = (unsigned char *)b;
        while (size-- > 0) {
            unsigned char c = *pa;
            *pa++ = *pb;
            *pb++ = c;
        }
    }
    
  • 参考中的generic_partition使用最后一个元素作为主元,但你从第一个元素初始化x。你应该写memcpy(x, v + (f * size), size);。这是导致失败的原因。当前代码可能巧合地适用于 int 版本。使用第一个或最后一个元素作为基准会导致排序数组出现最坏情况。

这是修改后的版本:

#include <stdio.h>
#include <string.h>

//prototipi delle funzioni
typedef int (*compare_function)(const void *, const void *);

void generic_quicksort(void *v, int i, int f, size_t size, compare_function compare);

//funzione di confronto
int compare_int(const void *a, const void *b) {
    int ia = *(const int *)a;
    int ib = *(const int *)b;
    return (ia > ib) - (ia < ib);
}

int compare_str(const void *a, const void *b) {
    const char *sa = *(const char * const *)a;
    const char *sb = *(const char * const *)b;
    return strcmp(sa, sb);
}

void print_int_array(const int *array, size_t len) {
    size_t i;

    if (len > 0) {
        printf("%d", array[0]);
        for (i = 1; i < len; i++)
            printf("| %d", array[i]);
    }
    putchar('\n');
}

void print_cstring_array(const char * const *array, size_t len) {
    size_t i;

    if (len > 0) {
        printf("%s", array[0]);
        for (i = 1; i < len; i++)
            printf(" | %s", array[i]);
    }
    putchar('\n');
}

static void generic_swap(void *a, void *b, size_t size) {
    unsigned char *pa = (unsigned char *)a;
    unsigned char *pb = (unsigned char *)b;
    while (size-- > 0) {
        unsigned char c = *pa;
        *pa++ = *pb;
        *pb++ = c;
    }
}

static int generic_partition(void *v, int i, int f, size_t size,
                             int (*comp)(const void *, const void *))
{
    unsigned char *p = (unsigned char *)v;
    int j, k = i;
    // using first element as pivot

    for (j = i + 1; j < f; j++) {
        if (comp(p + j * size, p + i * size) <= 0) {
            k++;
            generic_swap(p + k * size, p + j * size, size);
        }
    }

    /* swap the pivot to the end of the left part */
    generic_swap(p + i * size, p + k * size, size);
    return k;
}

void generic_quicksort(void *v, int i, int f, size_t size,
                       int (*comp)(const void *, const void *))
{
    if (f > i + 1) {
        int p = generic_partition(v, i, f, size, comp);
        generic_quicksort(v, i, p, size, comp);
        generic_quicksort(v, p + 1, f, size, comp);
    }
}

int main() {
    int v[] = { 5, 4, 3, 2, 1 };
    int n = sizeof(v) / sizeof(*v);

    const char *strings[] = { "Zorro", "Alex", "Celine", "Bill", "Forest", "Dexter" };
    int s = sizeof(strings) / sizeof(*strings);

    print_int_array(v, n);
    generic_quicksort((void *)v, 0, n, sizeof(*v), compare_int);
    print_int_array(v, n);

    print_cstring_array(strings, s);
    generic_quicksort((void *)strings, 0, s, sizeof(*strings), compare_str);
    print_cstring_array(strings, s);

    return 0;
}

请注意,选择第一个或最后一个元素作为基准会导致排序数组的最坏情况复杂性。 generic_quicksort 的递归深度将是数组的长度,可能导致 堆栈溢出

这是一个修改版本,可以防止这种情况发生,但在排序数组上仍然具有二次时间复杂度:

void generic_quicksort(void *v, int i, int f, size_t size,
                       int (*comp)(const void *, const void *))
{
    while (f > i + 1) {
        int p = generic_partition(v, i, f, size, comp);
        if (p - i < f - p) {
            generic_quicksort(v, i, p, size, comp);
            i = p + 1;
        } else {
            generic_quicksort(v, p + 1, f, size, comp);
            f = p;
        }
    }
}