为什么 EXC_BAD_ACCESS (code=EXC_I386_GPFLT) 当回调指针指向函数？

Question

以下代码尝试使用 hashset 和 vector 来计算文档中的词频。

#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>


/** ==================================== VECTOR ======================================= */

typedef enum {
    true, false
} bool;

typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);

typedef struct {
    int elemSize;               //how many byte for each element
    int elemNum;                //number of current element in vector
    int capacity;               //maximum number of element vector can hold
    void *elems;                //pointer to data memory
    VectorFreeFunction freefn;  //pointer to the function used to free each element
} vector;

/**
 * Reallocate a new memory of twice of original size
 * return 1 if reallocation success, otherwise return -1.
 */
static void DoubleMemory(vector *v) {
    void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
    assert(tmp != NULL);
    v->elems = tmp;
    v->capacity *= 2;
}

/**
 * Constructor
 */
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
    v->elems = malloc(initialAllocation * elemSize);
    assert(v->elems != NULL);
    v->elemSize = elemSize;
    v->elemNum = 0;
    v->capacity = initialAllocation;
    v->freefn = freefn;
}

/**
 * Appends a new element to the end of the specified vector.
 */
void VectorAppend(vector *v, const void *elemAddr) {
    /* double size if neccessary */
    if (v->elemNum == v->capacity) DoubleMemory(v);
    memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
    v->elemNum++;
}

/**
 * Search the specified vector for an element whose contents match the element passed as the key.
 */
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
    assert(key && searchfn);
    if (v->elemNum == 0) return -1;
    assert(startIndex >= 0 && startIndex < v->elemNum);
    if (isSorted == true) {
        /* binary search */
        void *startAddr = (char *)v->elems + startIndex * v->elemSize;
        int size = v->elemNum - startIndex;
        void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
        return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
    } else {
        /* linear search */
        for (int i = 0; i < v->elemNum; i++) {
            if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
                return i;
            }
        }
        return -1;
    }
}

/**
 * Overwrites the element at the specified position.
 */
void VectorReplace(vector *v, const void *elemAddr, int position) {
    assert(position >= 0 && position < v->elemNum);
    void *posAddr = (char *)v->elems + position * v->elemSize;
    /* free the memory of old element first */
    if (v->freefn != NULL) v->freefn(posAddr);
    memcpy(posAddr, elemAddr, v->elemSize);
}





/** ==================================== HASHSET ======================================= */

typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);

typedef struct {
    int elemNum;            //current element number
    int bucketNum;          //number of hash bucket
    int elemSize;           //how many byte each element has
    vector *buckets;        //array of vector
    HashSetHashFunction hashfn;
    HashSetCompareFunction compfn;
    HashSetFreeFunction freefn;
} hashset;

void HashSetNew(hashset *h, int elemSize, int numBuckets,   
        HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
    assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
    h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
    assert(h->buckets != NULL);
    for (int i = 0; i < numBuckets; i++) {
        vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
        VectorNew(bucket, elemSize, freefn, 4);
    }
    h->bucketNum = numBuckets;
    h->elemSize = elemSize;
    h->elemNum = 0;
    h->hashfn = hashfn;
    h->compfn = comparefn;
    h->freefn = freefn;
}

void HashSetEnter(hashset *h, const void *elemAddr) {
    int hash = h->hashfn(elemAddr, h->bucketNum);
    vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
    // search in the hash set first
    int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
    if (pos != -1) {
        // replace the old one if find a match
        VectorReplace(bucket, elemAddr, pos);
    } else {
        // otherwise insert the new one
        VectorAppend(bucket, elemAddr);
        h->elemNum++;
    }
}




/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */

/****************************************************************
 *
 * doc_freq is a key-value pair of [documentid, frequency]
 * It's not supposed to be exposed to user or search engine.
 * -----------------------------------------------------------
 * It looks like:
 *      [1611742826915764000]   [4      ]  
 *      |-------------------|   |-------|
 *       docid                   freq
 ***************************************************************/
typedef struct {
    long docid;
    int freq;
} doc_freq;

static void new_docfreq(doc_freq *df, long docid, int freq) {
    df->docid = docid;
    df->freq = freq;
}

/**
 * HashSetHashFunction<doc_freq>
 */
static int hash_docfreq(const void *elemAddr, int numBuckets) {
    doc_freq *df = (doc_freq *)elemAddr;
    return (int)(df->docid % numBuckets);
}

/**
 * HashSetCompareFunction<doc_freq>
 */
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
    long id1 = ((doc_freq *)elemAddr1)->docid;
    long id2 = ((doc_freq *)elemAddr2)->docid;
    if (id1 < id2) {
        return -1;
    } else if (id1 > id2) {
        return 1;
    } else { // id1 == id2
        return 0;
    }
}

/**
 * word_index is a index of a single word.
 * ---------------------------------------
 * A typical word_index looks like:
 *      [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
 *      |-----|  |------------------------------|
 *       word     freqs
 */
typedef struct {
    char *word;
    hashset *freqs; // hashset<doc_freq>
} word_index;

static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
    size_t wordlen = strlen(word);
    wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '[=12=]';
}

/**
 * Mainly used to build a word_index.
 */
void add_docfreq(word_index *wi, const long docid, const int frequency) {
    doc_freq df;
    new_docfreq(&df, docid, frequency);
    HashSetEnter(wi->freqs, &df);
}





/** ==================================== UNIT-TEST ======================================= */

int main(void) {
    /* apple:   [1611742826915764000, 5][1611742826915538000, 10] */
    word_index *apple = (word_index *)malloc(sizeof(word_index));
    new_wordindex(apple, "apple");
    add_docfreq(apple, 1611742826915764000L, 5);
    add_docfreq(apple, 1611742826915538000L, 10);
}

它给了我一个 segmentation fault:

[1]    84309 segmentation fault  testindexer

lldb 发现问题发生在 hashset 尝试回调函数 hashfn 的给定指针时。我不太明白这里的 EXC_BAD_ACCESS (code=EXC_I386_GPFLT) 是什么。我之前对 hashset 做过几次单元测试，HashSetEnter() 函数与 hashfn 配合得很好。又对hash_docfreq()函数进行了单元测试，也能正确计算出哈希值。我有点困惑。任何人都可以帮忙吗？谢谢！

Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
    frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
   127  }
   128
   129  void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130      int hash = h->hashfn(elemAddr, h->bucketNum);
   131      vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
   132      // search in the hash set first
   133      int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
  * frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
    frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
    frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
    frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)

Answer 1

运行 under gdb，出错后，执行tb命令获取堆栈回溯，我们看到：

#0  0x00000005004016e6 in ?? ()
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
#2  0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
    frequency=5) at orig.c:266
#3  0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1  0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
    elemAddr=0x7fffffffdc40) at orig.c:150
150     int hash = h->hashfn(elemAddr, h->bucketNum);

您在 HashSetEnter 中出现段错误，行：

int hash = h->hashfn(elemAddr, h->bucketNum);

这是因为 h 目前无效。

查源，设置值的地方最终无效，设置在new_wordindex.

在 new_wordindex 中，您正在保存 [并返回] h 的地址。

h这里是一个函数作用域的变量，所以在函数returns.

之后就不再有效了

为此您必须使用 malloc。并且，稍后，您需要能够在清理期间 free 这个指针。

这是错误函数的重构代码。

请注意，为了显示 old/original 代码与 new/corrected 代码，我使用了预处理器条件：

#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif

#if 0下的代码可以是elided/removed，只留下#else代码。

static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
    hashset h;
    HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = &h;
#else
    hashset *h = malloc(sizeof(*h));
    HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
    wi->freqs = h;
#endif

    size_t wordlen = strlen(word);

    wi->word = (char *) malloc(wordlen + 1);    // +1 for null-termination
    strcpy(wi->word, word);
    (wi->word)[wordlen] = '[=13=]';
}

为什么 EXC_BAD_ACCESS (code=EXC_I386_GPFLT) 当回调指针指向函数？

Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?

c

segmentation-fault

exc-bad-access