为什么 EXC_BAD_ACCESS (code=EXC_I386_GPFLT) 当回调指针指向函数?
Why EXC_BAD_ACCESS (code=EXC_I386_GPFLT) when callback pointer to function?
以下代码尝试使用 hashset
和 vector
来计算文档中的词频。
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/** ==================================== VECTOR ======================================= */
typedef enum {
true, false
} bool;
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
/**
* Overwrites the element at the specified position.
*/
void VectorReplace(vector *v, const void *elemAddr, int position) {
assert(position >= 0 && position < v->elemNum);
void *posAddr = (char *)v->elems + position * v->elemSize;
/* free the memory of old element first */
if (v->freefn != NULL) v->freefn(posAddr);
memcpy(posAddr, elemAddr, v->elemSize);
}
/** ==================================== HASHSET ======================================= */
typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);
typedef struct {
int elemNum; //current element number
int bucketNum; //number of hash bucket
int elemSize; //how many byte each element has
vector *buckets; //array of vector
HashSetHashFunction hashfn;
HashSetCompareFunction compfn;
HashSetFreeFunction freefn;
} hashset;
void HashSetNew(hashset *h, int elemSize, int numBuckets,
HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
assert(h->buckets != NULL);
for (int i = 0; i < numBuckets; i++) {
vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
VectorNew(bucket, elemSize, freefn, 4);
}
h->bucketNum = numBuckets;
h->elemSize = elemSize;
h->elemNum = 0;
h->hashfn = hashfn;
h->compfn = comparefn;
h->freefn = freefn;
}
void HashSetEnter(hashset *h, const void *elemAddr) {
int hash = h->hashfn(elemAddr, h->bucketNum);
vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
// search in the hash set first
int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
if (pos != -1) {
// replace the old one if find a match
VectorReplace(bucket, elemAddr, pos);
} else {
// otherwise insert the new one
VectorAppend(bucket, elemAddr);
h->elemNum++;
}
}
/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */
/****************************************************************
*
* doc_freq is a key-value pair of [documentid, frequency]
* It's not supposed to be exposed to user or search engine.
* -----------------------------------------------------------
* It looks like:
* [1611742826915764000] [4 ]
* |-------------------| |-------|
* docid freq
***************************************************************/
typedef struct {
long docid;
int freq;
} doc_freq;
static void new_docfreq(doc_freq *df, long docid, int freq) {
df->docid = docid;
df->freq = freq;
}
/**
* HashSetHashFunction<doc_freq>
*/
static int hash_docfreq(const void *elemAddr, int numBuckets) {
doc_freq *df = (doc_freq *)elemAddr;
return (int)(df->docid % numBuckets);
}
/**
* HashSetCompareFunction<doc_freq>
*/
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
long id1 = ((doc_freq *)elemAddr1)->docid;
long id2 = ((doc_freq *)elemAddr2)->docid;
if (id1 < id2) {
return -1;
} else if (id1 > id2) {
return 1;
} else { // id1 == id2
return 0;
}
}
/**
* word_index is a index of a single word.
* ---------------------------------------
* A typical word_index looks like:
* [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
* |-----| |------------------------------|
* word freqs
*/
typedef struct {
char *word;
hashset *freqs; // hashset<doc_freq>
} word_index;
static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
size_t wordlen = strlen(word);
wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '[=12=]';
}
/**
* Mainly used to build a word_index.
*/
void add_docfreq(word_index *wi, const long docid, const int frequency) {
doc_freq df;
new_docfreq(&df, docid, frequency);
HashSetEnter(wi->freqs, &df);
}
/** ==================================== UNIT-TEST ======================================= */
int main(void) {
/* apple: [1611742826915764000, 5][1611742826915538000, 10] */
word_index *apple = (word_index *)malloc(sizeof(word_index));
new_wordindex(apple, "apple");
add_docfreq(apple, 1611742826915764000L, 5);
add_docfreq(apple, 1611742826915538000L, 10);
}
它给了我一个 segmentation fault
:
[1] 84309 segmentation fault testindexer
lldb 发现问题发生在 hashset
尝试回调函数 hashfn
的给定指针时。我不太明白这里的 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
是什么。我之前对 hashset 做过几次单元测试,HashSetEnter()
函数与 hashfn
配合得很好。又对hash_docfreq()
函数进行了单元测试,也能正确计算出哈希值。我有点困惑。任何人都可以帮忙吗?谢谢!
Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
127 }
128
129 void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130 int hash = h->hashfn(elemAddr, h->bucketNum);
131 vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
132 // search in the hash set first
133 int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)
运行 under gdb
,出错后,执行tb
命令获取堆栈回溯,我们看到:
#0 0x00000005004016e6 in ?? ()
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
#2 0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
frequency=5) at orig.c:266
#3 0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
150 int hash = h->hashfn(elemAddr, h->bucketNum);
您在 HashSetEnter
中出现段错误,行:
int hash = h->hashfn(elemAddr, h->bucketNum);
这是因为 h
目前 无效 。
查源,设置值的地方最终无效,设置在new_wordindex
.
在 new_wordindex
中,您正在保存 [并返回] h
的地址。
h
这里是一个函数作用域的变量,所以在函数returns.
之后就不再有效了
为此您必须使用 malloc
。并且,稍后,您需要能够在清理期间 free
这个指针。
这是错误函数的重构代码。
请注意,为了显示 old/original 代码与 new/corrected 代码,我使用了预处理器条件:
#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif
#if 0
下的代码可以是elided/removed,只留下#else
代码。
static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
#else
hashset *h = malloc(sizeof(*h));
HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = h;
#endif
size_t wordlen = strlen(word);
wi->word = (char *) malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '[=13=]';
}
以下代码尝试使用 hashset
和 vector
来计算文档中的词频。
#include <assert.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/** ==================================== VECTOR ======================================= */
typedef enum {
true, false
} bool;
typedef int (*VectorCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*VectorFreeFunction)(void *elemAddr);
typedef struct {
int elemSize; //how many byte for each element
int elemNum; //number of current element in vector
int capacity; //maximum number of element vector can hold
void *elems; //pointer to data memory
VectorFreeFunction freefn; //pointer to the function used to free each element
} vector;
/**
* Reallocate a new memory of twice of original size
* return 1 if reallocation success, otherwise return -1.
*/
static void DoubleMemory(vector *v) {
void *tmp = realloc(v->elems, v->capacity * v->elemSize * 2);
assert(tmp != NULL);
v->elems = tmp;
v->capacity *= 2;
}
/**
* Constructor
*/
void VectorNew(vector *v, int elemSize, VectorFreeFunction freefn, int initialAllocation) {
v->elems = malloc(initialAllocation * elemSize);
assert(v->elems != NULL);
v->elemSize = elemSize;
v->elemNum = 0;
v->capacity = initialAllocation;
v->freefn = freefn;
}
/**
* Appends a new element to the end of the specified vector.
*/
void VectorAppend(vector *v, const void *elemAddr) {
/* double size if neccessary */
if (v->elemNum == v->capacity) DoubleMemory(v);
memcpy((char *)v->elems + v->elemNum * v->elemSize, elemAddr, v->elemSize);
v->elemNum++;
}
/**
* Search the specified vector for an element whose contents match the element passed as the key.
*/
int VectorSearch(const vector *v, const void *key, VectorCompareFunction searchfn, int startIndex, bool isSorted) {
assert(key && searchfn);
if (v->elemNum == 0) return -1;
assert(startIndex >= 0 && startIndex < v->elemNum);
if (isSorted == true) {
/* binary search */
void *startAddr = (char *)v->elems + startIndex * v->elemSize;
int size = v->elemNum - startIndex;
void *resAddr = bsearch(key, startAddr, size, v->elemSize, searchfn);
return (resAddr != NULL)? ((char *)resAddr - (char *)v->elems) / v->elemSize : -1;
} else {
/* linear search */
for (int i = 0; i < v->elemNum; i++) {
if (searchfn((char *)v->elems + i * v->elemSize, key) == 0) {
return i;
}
}
return -1;
}
}
/**
* Overwrites the element at the specified position.
*/
void VectorReplace(vector *v, const void *elemAddr, int position) {
assert(position >= 0 && position < v->elemNum);
void *posAddr = (char *)v->elems + position * v->elemSize;
/* free the memory of old element first */
if (v->freefn != NULL) v->freefn(posAddr);
memcpy(posAddr, elemAddr, v->elemSize);
}
/** ==================================== HASHSET ======================================= */
typedef int (*HashSetHashFunction)(const void *elemAddr, int numBuckets);
typedef int (*HashSetCompareFunction)(const void *elemAddr1, const void *elemAddr2);
typedef void (*HashSetFreeFunction)(void *elemAddr);
typedef struct {
int elemNum; //current element number
int bucketNum; //number of hash bucket
int elemSize; //how many byte each element has
vector *buckets; //array of vector
HashSetHashFunction hashfn;
HashSetCompareFunction compfn;
HashSetFreeFunction freefn;
} hashset;
void HashSetNew(hashset *h, int elemSize, int numBuckets,
HashSetHashFunction hashfn, HashSetCompareFunction comparefn, HashSetFreeFunction freefn) {
assert(elemSize > 0 && numBuckets > 0 && hashfn != NULL && comparefn != NULL);
h->buckets = (vector *)malloc(numBuckets * sizeof(vector));
assert(h->buckets != NULL);
for (int i = 0; i < numBuckets; i++) {
vector *bucket = (vector *)((char *)h->buckets + i * sizeof(vector));
VectorNew(bucket, elemSize, freefn, 4);
}
h->bucketNum = numBuckets;
h->elemSize = elemSize;
h->elemNum = 0;
h->hashfn = hashfn;
h->compfn = comparefn;
h->freefn = freefn;
}
void HashSetEnter(hashset *h, const void *elemAddr) {
int hash = h->hashfn(elemAddr, h->bucketNum);
vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
// search in the hash set first
int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
if (pos != -1) {
// replace the old one if find a match
VectorReplace(bucket, elemAddr, pos);
} else {
// otherwise insert the new one
VectorAppend(bucket, elemAddr);
h->elemNum++;
}
}
/** ==================================== DOC_FREQ & WORD_INDEX ======================================= */
/****************************************************************
*
* doc_freq is a key-value pair of [documentid, frequency]
* It's not supposed to be exposed to user or search engine.
* -----------------------------------------------------------
* It looks like:
* [1611742826915764000] [4 ]
* |-------------------| |-------|
* docid freq
***************************************************************/
typedef struct {
long docid;
int freq;
} doc_freq;
static void new_docfreq(doc_freq *df, long docid, int freq) {
df->docid = docid;
df->freq = freq;
}
/**
* HashSetHashFunction<doc_freq>
*/
static int hash_docfreq(const void *elemAddr, int numBuckets) {
doc_freq *df = (doc_freq *)elemAddr;
return (int)(df->docid % numBuckets);
}
/**
* HashSetCompareFunction<doc_freq>
*/
static int comp_docfreq(const void *elemAddr1, const void *elemAddr2) {
long id1 = ((doc_freq *)elemAddr1)->docid;
long id2 = ((doc_freq *)elemAddr2)->docid;
if (id1 < id2) {
return -1;
} else if (id1 > id2) {
return 1;
} else { // id1 == id2
return 0;
}
}
/**
* word_index is a index of a single word.
* ---------------------------------------
* A typical word_index looks like:
* [apple]: [doc1, 5], [doc3, 10], [doc5, 7]
* |-----| |------------------------------|
* word freqs
*/
typedef struct {
char *word;
hashset *freqs; // hashset<doc_freq>
} word_index;
static const size_t kWordIndexHashSetBuckets = 64;
static void new_wordindex(word_index *wi, const char *word) {
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
size_t wordlen = strlen(word);
wi->word = (char *)malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '[=12=]';
}
/**
* Mainly used to build a word_index.
*/
void add_docfreq(word_index *wi, const long docid, const int frequency) {
doc_freq df;
new_docfreq(&df, docid, frequency);
HashSetEnter(wi->freqs, &df);
}
/** ==================================== UNIT-TEST ======================================= */
int main(void) {
/* apple: [1611742826915764000, 5][1611742826915538000, 10] */
word_index *apple = (word_index *)malloc(sizeof(word_index));
new_wordindex(apple, "apple");
add_docfreq(apple, 1611742826915764000L, 5);
add_docfreq(apple, 1611742826915538000L, 10);
}
它给了我一个 segmentation fault
:
[1] 84309 segmentation fault testindexer
lldb 发现问题发生在 hashset
尝试回调函数 hashfn
的给定指针时。我不太明白这里的 EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
是什么。我之前对 hashset 做过几次单元测试,HashSetEnter()
函数与 hashfn
配合得很好。又对hash_docfreq()
函数进行了单元测试,也能正确计算出哈希值。我有点困惑。任何人都可以帮忙吗?谢谢!
Process 89962 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
127 }
128
129 void HashSetEnter(hashset *h, const void *elemAddr) {
-> 130 int hash = h->hashfn(elemAddr, h->bucketNum);
131 vector *bucket = (vector *)((char *)h->buckets + hash * sizeof(vector));
132 // search in the hash set first
133 int pos = VectorSearch(bucket, elemAddr, h->compfn, 0, false);
Target 0: (testnothing) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=EXC_I386_GPFLT)
* frame #0: 0x0000000100003b83 testnothing`HashSetEnter(h=0x00007ffeefbff620, elemAddr=0x00007ffeefbff638) at test_nothing.c:130:13
frame #1: 0x0000000100003c37 testnothing`add_docfreq(wi=0x0000000100306060, docid=1611742826915764000, frequency=5) at test_nothing.c:222:2
frame #2: 0x0000000100003cae testnothing`main at test_nothing.c:235:2
frame #3: 0x00007fff70df0cc9 libdyld.dylib`start + 1
(lldb)
运行 under gdb
,出错后,执行tb
命令获取堆栈回溯,我们看到:
#0 0x00000005004016e6 in ?? ()
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
#2 0x0000000000401834 in add_docfreq (wi=0x405260, docid=1611742826915764000,
frequency=5) at orig.c:266
#3 0x0000000000401879 in main () at orig.c:278
(gdb) frame 1
#1 0x000000000040163a in HashSetEnter (h=0x7fffffffdc10,
elemAddr=0x7fffffffdc40) at orig.c:150
150 int hash = h->hashfn(elemAddr, h->bucketNum);
您在 HashSetEnter
中出现段错误,行:
int hash = h->hashfn(elemAddr, h->bucketNum);
这是因为 h
目前 无效 。
查源,设置值的地方最终无效,设置在new_wordindex
.
在 new_wordindex
中,您正在保存 [并返回] h
的地址。
h
这里是一个函数作用域的变量,所以在函数returns.
为此您必须使用 malloc
。并且,稍后,您需要能够在清理期间 free
这个指针。
这是错误函数的重构代码。
请注意,为了显示 old/original 代码与 new/corrected 代码,我使用了预处理器条件:
#if 0
// old/original code
// NOTE: this is _not_ compiled in
#else
// new/corrected code
// NOTE: this _is_ compiled in
#endif
#if 0
下的代码可以是elided/removed,只留下#else
代码。
static void
new_wordindex(word_index * wi, const char *word)
{
// NOTE/BUG: h is function scoped -- this can _not_ be saved and returned
// because it ceases to be valid when we return
#if 0
hashset h;
HashSetNew(&h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = &h;
#else
hashset *h = malloc(sizeof(*h));
HashSetNew(h, sizeof(doc_freq), kWordIndexHashSetBuckets, hash_docfreq, comp_docfreq, NULL);
wi->freqs = h;
#endif
size_t wordlen = strlen(word);
wi->word = (char *) malloc(wordlen + 1); // +1 for null-termination
strcpy(wi->word, word);
(wi->word)[wordlen] = '[=13=]';
}