C++ Hashmap 库的 C 接口无法正常工作

C interface for C++ Hashmap library not working correctly

我对 C 比较陌生,尽管我已经使用其他语言进行了很长一段时间的开发。为了尝试“边做边学”,在学习 C 和 C++ 的 coursera 课程时,我决定通过为现有项目编写包装器来挑战自己,这将转化为我最终将在我的项目中使用它们的目的它们在现实世界中的应用。

我正在为 Google Sparse Hash Map, using a guide on C interfaces / wrapping (here) 的 C++ 实现构建一个 C 接口,但似乎遇到了数据未插入或插入后无法读取数据的问题?

这就是我的。

C++ 对象

sparsehashmap.cpp

#include "sparsehashmap.h"
#include<string>

SparseHashMap::SparseHashMap(){_shash.set_deleted_key("");}

void SparseHashMap::insert(const char* arg1, const char* arg2)
{
    _shash[arg1] = arg2;
}

const char* SparseHashMap::read(const char* key)
{
    return _shash[key];
}

int SparseHashMap::exists(const char* key)
{

    if (_shash.find(key) == _shash.end())
    {
        return false;
    }
    else
    {
        return true;
    }
}

void SparseHashMap::remove(const char* key)
{
    _shash.erase(key);
}

int SparseHashMap::length()
{
    return (int) _shash.size();
}

void SparseHashMap::flush()
{
    _shash.clear();
}

sparsehashmap.h

#ifndef __CPPSPARSEHASH__
#define __CPPSPARSEHASH__

#include <iostream>
#include <sparsehash/sparse_hash_map> //https://github.com/sparsehash/sparsehash

typedef google::sparse_hash_map<const char*, const char*> _SPARSEHASH;

class SparseHashMap
{
    private:
        _SPARSEHASH _shash;

    public:

    SparseHashMap();
    void            insert(const char* arg1, const char* arg2);
    const char*     read(const char* key);
    int             exists(const char* key);
    void            remove(const char* key);
    int             length();
    void            flush();
};

#endif // __CPPSPARSEHASH__

C 包装器

sparse.h

#ifndef __FOSPARSE_H__
#define __FOSPARSE_H__

#ifdef __cplusplus
extern "C" {
#endif

    struct CSparseHashMap;
    typedef struct CSparseHashMap _SparseHashMap;

    _SparseHashMap *sparsehashmap_create();
    void            sparsehashmap_destroy(_SparseHashMap *shm);
    void            sparsehashmap_insert    (_SparseHashMap *shm, const char* arg1, const char* arg2);
    const char*     sparsehashmap_read      (_SparseHashMap *shm, const char* key);
    int             sparsehashmap_exists    (_SparseHashMap *shm, const char* key);
    void            sparsehashmap_remove    (_SparseHashMap *shm, const char* key);
    int             sparsehashmap_length    (_SparseHashMap *shm);
    void            sparsehashmap_flush     (_SparseHashMap *shm);

#ifdef __cplusplus
}
#endif

#endif

sparse.cpp

#include <stdlib.h>
#include "sparse.h"
#include "sparsehashmap.h"

struct CSparseHashMap {
    void *obj;
};

_SparseHashMap *sparsehashmap_create()
{
    _SparseHashMap *shm;
    SparseHashMap *obj;

    shm         = (__typeof__(shm))malloc(sizeof(*shm));
    obj         = new SparseHashMap();
    shm->obj    = obj;

    return shm;
}

void sparsehashmap_destroy(_SparseHashMap *shm)
{
    if (shm == NULL)
        return;
    delete static_cast<SparseHashMap *>(shm->obj);
    free(shm);
}

void sparsehashmap_insert(_SparseHashMap *shm,  const char* arg1, const char* arg2)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return;
    obj = static_cast<SparseHashMap *>(shm->obj);
    obj->insert(arg1, arg2);
}

const char* sparsehashmap_read(_SparseHashMap *shm,  const char* key)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return 0;
    obj = static_cast<SparseHashMap *>(shm->obj);
    
    return obj->read(key);
}

int sparsehashmap_exists(_SparseHashMap *shm,  const char* key)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return 0;
    obj = static_cast<SparseHashMap *>(shm->obj);
    
    return obj->exists(key);
}

void sparsehashmap_remove(_SparseHashMap *shm,  const char* key)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return;
    obj = static_cast<SparseHashMap *>(shm->obj);
    obj->remove(key);
}

int sparsehashmap_length(_SparseHashMap *shm)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return 0;
    obj = static_cast<SparseHashMap *>(shm->obj);

    return obj->length();
}

void sparsehashmap_flush(_SparseHashMap *shm)
{
    SparseHashMap *obj;

    if (shm == NULL)
        return;
    obj = static_cast<SparseHashMap *>(shm->obj);
    obj->flush();
}

测试

main.c

#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "sparse.h"

#define MAXCHAR 256


int main(int argc, char* argv[])
{
    // Create Pointer to Sparsehashmap object
    _SparseHashMap *fshm = sparsehashmap_create();


    /*  
        Open File specified in command line argument <argv[1]> 
        and initialise array pointer to hold the line data  
    */
    
    FILE *fp;
    char row[MAXCHAR];
    int ct = 0;
    fp = fopen(argv[1], "r");

    /*  
        Loop through the file, split the row using a single space
        as a delimeter, and insert as <key> => <value> into the 
        sparsehashmap 
    */

    while (feof(fp) != true)
    {
        char key[MAXCHAR], value[MAXCHAR];
        fgets(row, MAXCHAR, fp);

        sscanf(row, "%s %s", key, value);

//      if( (ct % 100) == 0 )
//          printf("Key: %s\tValue: %s\n", key, value);

        sparsehashmap_insert(fshm, key, value);
        
        ct++;
    }

    // close the file as it is no longer needed.
    fclose(fp);

    // Print the total number of times the while loop ran
    printf("%d Total entries.\n", ct);

    // Print the length of the Sparsehashmap
    printf("C SparseHashMap: Length of Hashmap = %d\n", sparsehashmap_length(fshm));
    

    /*
        Read, Exists and Remove Examples
    */

    printf("C SparseHashMap: _SparseHashMap[\"63-5039337\"] = %s\n", sparsehashmap_read(fshm, "63-5039337"));
    printf("C SparseHashMap: _SparseHashMap[\"26-8663826\"] = %s\n", sparsehashmap_read(fshm, "26-8663826"));
    printf("C SparseHashMap: _SparseHashMap[\"87-9801138\"] = %s\n", sparsehashmap_read(fshm, "87-9801138"));


    printf("C SparseHashMap: Key Exists [\"15-8895492\"] = %d\n", sparsehashmap_exists(fshm, "15-8895492"));
    printf("C SparseHashMap: Key Exists [\"22-4942175\"] = %d\n", sparsehashmap_exists(fshm, "22-4942175"));
    

    printf("C SparseHashMap: Remove Key From Hashmap...\n");

    printf("C SparseHashMap: Remove Key From Hashmap [\"63-5039337\"]\n"); 
        sparsehashmap_remove(fshm, "63-5039337");
    printf("C SparseHashMap: Remove Key From Hashmap [\"20-6892893\"]\n"); 
        sparsehashmap_remove(fshm, "20-6892893");
    printf("C SparseHashMap: Remove Key From Hashmap [\"58-8150180\"]\n"); 
        sparsehashmap_remove(fshm, "58-8150180");


    printf("C SparseHashMap: Length of Hashmap = %d\n", sparsehashmap_length(fshm));    

    /*
        Destroy the Sparsehashmap to free its
        memory allocation.
    */
    printf("C SparseHashMap: Destroy Hashmap\n");
    sparsehashmap_destroy(fshm);

    printf("C SparseHashMap: Freed");
    return 0;
}

我已经创建了要插入和查询的数据的要点,可以在 here.

中找到

构建完成后,我 运行 它针对模拟数据文件,但是对于包含 2000 个测试条目的文件,这些是我得到的结果

2001 Total entries.                                     /* times file is looped with fgets() */
C SparseHashMap: Length of Hashmap = 1                  /* Size of the hashmap */
C SparseHashMap: _SparseHashMap["63-5039337"] = (null)  /* Read Function Response */
C SparseHashMap: _SparseHashMap["26-8663826"] = (null)  /* Read Function Response */
C SparseHashMap: _SparseHashMap["87-9801138"] = (null)  /* Read Function Response */
C SparseHashMap: Key Exists ["15-8895492"] = 0
C SparseHashMap: Key Exists ["22-4942175"] = 0
C SparseHashMap: Remove Key From Hashmap ["63-5039337"]
C SparseHashMap: Remove Key From Hashmap ["20-6892893"]
C SparseHashMap: Remove Key From Hashmap ["58-8150180"]
C SparseHashMap: Length of Hashmap = 3                  /* Size of the hashmap */
C SparseHashMap: Destroy Hashmap
C SparseHashMap: Freed%          

Hash Map的大小是从1开始的(明明插入了2000个条目之后),它无法从输入数据中读取任何key,然后在进行了多次删除之后,长度跳到了3?

我知道我正在使用的指南比使用哈希映射更基本,但想知道是否有人可以指出我在这方面出了什么问题?以及我应该如何修改它以使操作按预期工作?

任何指导将不胜感激。

======== 编辑 ========

问题似乎是将指针映射到指针。在将 *.cpp 文件修改为 / return std::string 并在从 C 指令传递之前执行相关转换后,接口开始按预期工作

./run MOCK_DATA.txt 
2001 Total entries.
C SparseHashMap: Length of Hashmap = 2000
C SparseHashMap: _SparseHashMap["63-5039337"] = {"id":1,"first_name":"Ilario","last_name":"Thickins","email":"ithickins0@nbcnews.com","gender":"Male","ip_address":"54.113.8.103"}
C SparseHashMap: _SparseHashMap["26-8663826"] = {"id":35,"first_name":"Mignonne","last_name":"Oakden","email":"moakdeny@ifeng.com","gender":"Female","ip_address":"41.14.68.148"}
C SparseHashMap: _SparseHashMap["87-9801138"] = {"id":1989,"first_name":"Mavis","last_name":"Collingwood","email":"mcollingwoodrg@google.it","gender":"Female","ip_address":"56.70.97.65"}
C SparseHashMap: Key Exists ["15-8895492"] = 1
C SparseHashMap: Key Exists ["22-4942175"] = 1
C SparseHashMap: Remove Key From Hashmap ["63-5039337"]
C SparseHashMap: Remove Key From Hashmap ["20-6892893"]
C SparseHashMap: Remove Key From Hashmap ["58-8150180"]
C SparseHashMap: Length of Hashmap = 1997
C SparseHashMap: Destroy Hashmap
C SparseHashMap: Freed

您正在将指针映射到指针,但您可能打算将字符串映射到字符串。

typedef google::sparse_hash_map<const char*, const char*> _SPARSEHASH;

std::map 可能是:

std::map<const char*, const char*>

这只是意味着您正在将指针映射到指针。

你所有的指针都来自这两个对象。

char key[MAXCHAR], value[MAXCHAR];

每次向散列映射添加键时,您使用的键是指向本地 key 数组的指针(在循环的每次迭代中始终相同)。该值是指向本地 value 数组的指针(同样,在循环的每次迭代中始终相同)。这就是为什么您的地图中只有 1 个条目。您已成功将 &key[0] 映射到 &value[0]

您可能想要的是使用 std::string

std::map<std::string, std::string>

typedef google::sparse_hash_map<std::string, std::string> _SPARSEHASH;

那你还有更大的鱼要炸

现在您不得不担心 read 的实现只是 return 一个指针,而不是 return 一个字符串。您刚刚继承了 C 中管理字符串的所有问题。您可以像 std::string::c_str 那样对待它,其中您 return 哈希条目的 c_str,但是如果您将另一个条目添加到映射或删除一个,因此必须清楚地记录您的语义,否则您将必须为 returned 字符串的副本分配一个缓冲区并允许再次释放该缓冲区,就像 C 中的方式一样。 (我很高兴 C++ 是一回事。)