C++ 动态散列-table
C++ dynamic hash-table
我现在正在学习 C++ 中的散列。
我找到了一个带有 class 的示例,并进行了测试。所以我希望所有测试都能通过。
应编写动态哈希表。哈希值应该存储在数组中,数组可以有目的地改变大小。当改变Array的大小时,Hash函数的改变方式要使Hash函数的目标区域与Array的大小一致。当数组的大小改变时,所有元素都需要再次散列。元素的key是String,需要计算Hash的值(字符串所有字符的ASCII值之和)mod(Array的大小)。如果发生碰撞,则应执行 Open Addressing:h(key)+j mod M。
对于删除元素我需要注意每个元素的理想位置i=h(k)和当前位置j我需要关心:T[i], T[i+1 ],...,T[j] 已被占用。
直到现在我还停留在测试 5。
但是每次我尝试调整它的大小时,我都会遇到未定义的行为或者崩溃。
这是hashtable.hclass:
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include <string>
using namespace std;
class hashtable
{
public:
hashtable();
virtual ~hashtable();
void insert(string);
int find(string);
void remove(string);
void resize_array();
int hashfunction(string str);
int getSize();
string* getArray();
private:
int elemts_in_array;
int table_size;
string* T;
};
这是hashtable.cpp
#include "hashtable.h"
#include<iostream>
#include<cstring>
using namespace std;
hashtable::hashtable()
{
table_size=10;
T=new string[table_size];
// your code (start with a capacity of 10)
}
hashtable::~hashtable()
{
}
int hashtable::hashfunction(string str)
{
int k;
for(int i=0;i<table_size;i++)
k+=(int)str[i];
return k%table_size;// your code
}
void hashtable::insert(string key)
{
int k=hashfunction(key);
//
// float filled = (float)sizeof(T) / (float)table_size;
//
// cout<<filled<< "percent filled"<<endl;
//
// if(filled >= 0.8)
// {
// cout << "Resizing Array.." << endl;
// resize_array();
// }
if(T[k]==""){
T[k]=key;
}
else {
while(T[k]!="")
if(T[k]==key){
break;
}else {
T[k]=key;
}
k++;
}
// your code (keys already present in the hashtable should not be added a second time)
// (use resize_array when the hashtable is filled by 80%)
// (the testbench expects the resize taking place before and without considering the
insertion of the new element)
}
int hashtable::find(string key)
{
for(int k=0;k<table_size;k++){
if(T[k]==key)
return k ;
}
return -1;
// your code (should return -1 if key was not found)
}
void hashtable::remove(string key)
{
int k=hashfunction(key);
T[k]="";
for(int i=1;i<table_size;i++){
string next=T[k+i];
if(hashfunction(next)==k){
T[k]=T[k+1];
}
}
// your code (Invariant: ensure all keys are at their "optimal" position afterwards)
}
void hashtable::resize_array()
{
// remember the old table
int old_table_size = table_size;
std::string *old_array = T;
// build the new table and reset counter
table_size *= 2;
T = new std::string[table_size];
elemts_in_array = 0;
// now bring the elements over
for (int i=0; i<old_table_size; ++i)
{
if (old_array[i].empty())
insert(old_array[i]);
}
// finally, throw out old array
delete[] old_array;
}
//void hashtable::resize_array()
//{
// size_t newsize=table_size*2;
// string *newT=new string[newsize];
// memcpy(newT,T,table_size*sizeof(string));
//
// table_size=newsize;
// delete[] T;
// T=newT;
// // your code (double the array size)
//}
int hashtable::getSize()
{
return table_size;
// your code
}
string* hashtable::getArray()
{
return T;
}
这是测试文件:
#include <iostream>
#include "hashtable.h"
bool compare_arrays(string* testarray, hashtable t, int len)
{
string* array2 = t.getArray();
if(t.getSize() != len)
{
return 1;
}
for (int i=0; i < len ; i++)
{
if(testarray[i] != array2[i])
{
return 1;
}
}
return 0;
}
void print_array(hashtable t){
string* array = t.getArray();
for (int i=0; i< t.getSize();i++)
{
cout << i << " - " << array[i]<< endl;
}
}
void test(hashtable* t)
{
string test_vector1[10] = {"123","","","","","ABCDE","Info","","",""};
string test_vector2[10] = {"","","","!","","","Test","","ABC",""};
string test_vector3[10] = {"123", "T!est","Te!st","Tes!t","Test!","ABCDE","Info","","","!Test"};
string test_vector4[20] = {"","","","","","","","","","T!est","123","Te!st","Tes!t","Test!","!Test","ABCDE","Info","Inof","",""};
string test_vector5[20] = {"","","","", "", "", "", "", "", "T!est","123","Tes!t","Test!","!Test","Te!st","ABCDE","Info", "Inof", "", ""};
t->insert("Info");
t->insert("123");
t->insert("ABCDE");
if(compare_arrays(test_vector1, *t,10))
{
cout << "Hashtable does not work correctly!" << endl;
return;
}
t->insert("Info");
if(compare_arrays(test_vector1, *t,10))
{
cout << "Inserting a element twice might not work correctly!" << endl;
return;
}
int test_var = t->find("CBA");
if(test_var != -1)
{
cout << "Find might not work correctly for unseen keys." << endl;
return;
}
test_var = t->find("Info");
if(test_var != 6)
{
cout << "Find might not work correctly!" << endl;
return;
}
t->insert("!Test");
t->insert("T!est");
t->insert("Te!st");
t->insert("Tes!t");
t->insert("Test!");
t->insert("Test!");
if(compare_arrays(test_vector3, *t,10))
{
cout << "Hashfunction / insert might not work correctly!" << endl;
return;
}
t->insert("Inof");
if(compare_arrays(test_vector4, *t,20))
{
cout << "Resize Array might not work correctly!" << endl;
return;
}
t->remove("!");
t->remove("Te!st");
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
{
cout << "Remove might not work correctly!" << endl;
return;
}
cout << "All test passed!";
}
int main()
{
hashtable t;
test(&t);
}
两个明显的错误:
int hashtable::hashfunction(string str)
{
int k; // <-- Uninitialized
for (int i = 0; i < table_size; i++)
k += (int)str[i]; // <-- Potential out-of-bounds access
return k % table_size;// your code
}
k
未初始化,因此您不知道 k
的最终值是什么。
第二个问题是,如果 str
的大小小于 table_size
,则您正在越界访问 str
。您的示例显示字符串 "Info"
被传递给 hash_function
,但 table_size 是 10。
另一个基本错误是 hashtable
没有正确的复制语义,因此按值传递或返回 hashtable
将导致未定义的行为。这都是因为你使用
string *T
作为成员变量。
最简单的解决方法是不使用 string* T
作为成员,而是使用 std::vector<std::string> T;
我现在正在学习 C++ 中的散列。 我找到了一个带有 class 的示例,并进行了测试。所以我希望所有测试都能通过。
应编写动态哈希表。哈希值应该存储在数组中,数组可以有目的地改变大小。当改变Array的大小时,Hash函数的改变方式要使Hash函数的目标区域与Array的大小一致。当数组的大小改变时,所有元素都需要再次散列。元素的key是String,需要计算Hash的值(字符串所有字符的ASCII值之和)mod(Array的大小)。如果发生碰撞,则应执行 Open Addressing:h(key)+j mod M。
对于删除元素我需要注意每个元素的理想位置i=h(k)和当前位置j我需要关心:T[i], T[i+1 ],...,T[j] 已被占用。
直到现在我还停留在测试 5。 但是每次我尝试调整它的大小时,我都会遇到未定义的行为或者崩溃。
这是hashtable.hclass:
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include <string>
using namespace std;
class hashtable
{
public:
hashtable();
virtual ~hashtable();
void insert(string);
int find(string);
void remove(string);
void resize_array();
int hashfunction(string str);
int getSize();
string* getArray();
private:
int elemts_in_array;
int table_size;
string* T;
};
这是hashtable.cpp
#include "hashtable.h"
#include<iostream>
#include<cstring>
using namespace std;
hashtable::hashtable()
{
table_size=10;
T=new string[table_size];
// your code (start with a capacity of 10)
}
hashtable::~hashtable()
{
}
int hashtable::hashfunction(string str)
{
int k;
for(int i=0;i<table_size;i++)
k+=(int)str[i];
return k%table_size;// your code
}
void hashtable::insert(string key)
{
int k=hashfunction(key);
//
// float filled = (float)sizeof(T) / (float)table_size;
//
// cout<<filled<< "percent filled"<<endl;
//
// if(filled >= 0.8)
// {
// cout << "Resizing Array.." << endl;
// resize_array();
// }
if(T[k]==""){
T[k]=key;
}
else {
while(T[k]!="")
if(T[k]==key){
break;
}else {
T[k]=key;
}
k++;
}
// your code (keys already present in the hashtable should not be added a second time)
// (use resize_array when the hashtable is filled by 80%)
// (the testbench expects the resize taking place before and without considering the
insertion of the new element)
}
int hashtable::find(string key)
{
for(int k=0;k<table_size;k++){
if(T[k]==key)
return k ;
}
return -1;
// your code (should return -1 if key was not found)
}
void hashtable::remove(string key)
{
int k=hashfunction(key);
T[k]="";
for(int i=1;i<table_size;i++){
string next=T[k+i];
if(hashfunction(next)==k){
T[k]=T[k+1];
}
}
// your code (Invariant: ensure all keys are at their "optimal" position afterwards)
}
void hashtable::resize_array()
{
// remember the old table
int old_table_size = table_size;
std::string *old_array = T;
// build the new table and reset counter
table_size *= 2;
T = new std::string[table_size];
elemts_in_array = 0;
// now bring the elements over
for (int i=0; i<old_table_size; ++i)
{
if (old_array[i].empty())
insert(old_array[i]);
}
// finally, throw out old array
delete[] old_array;
}
//void hashtable::resize_array()
//{
// size_t newsize=table_size*2;
// string *newT=new string[newsize];
// memcpy(newT,T,table_size*sizeof(string));
//
// table_size=newsize;
// delete[] T;
// T=newT;
// // your code (double the array size)
//}
int hashtable::getSize()
{
return table_size;
// your code
}
string* hashtable::getArray()
{
return T;
}
这是测试文件:
#include <iostream>
#include "hashtable.h"
bool compare_arrays(string* testarray, hashtable t, int len)
{
string* array2 = t.getArray();
if(t.getSize() != len)
{
return 1;
}
for (int i=0; i < len ; i++)
{
if(testarray[i] != array2[i])
{
return 1;
}
}
return 0;
}
void print_array(hashtable t){
string* array = t.getArray();
for (int i=0; i< t.getSize();i++)
{
cout << i << " - " << array[i]<< endl;
}
}
void test(hashtable* t)
{
string test_vector1[10] = {"123","","","","","ABCDE","Info","","",""};
string test_vector2[10] = {"","","","!","","","Test","","ABC",""};
string test_vector3[10] = {"123", "T!est","Te!st","Tes!t","Test!","ABCDE","Info","","","!Test"};
string test_vector4[20] = {"","","","","","","","","","T!est","123","Te!st","Tes!t","Test!","!Test","ABCDE","Info","Inof","",""};
string test_vector5[20] = {"","","","", "", "", "", "", "", "T!est","123","Tes!t","Test!","!Test","Te!st","ABCDE","Info", "Inof", "", ""};
t->insert("Info");
t->insert("123");
t->insert("ABCDE");
if(compare_arrays(test_vector1, *t,10))
{
cout << "Hashtable does not work correctly!" << endl;
return;
}
t->insert("Info");
if(compare_arrays(test_vector1, *t,10))
{
cout << "Inserting a element twice might not work correctly!" << endl;
return;
}
int test_var = t->find("CBA");
if(test_var != -1)
{
cout << "Find might not work correctly for unseen keys." << endl;
return;
}
test_var = t->find("Info");
if(test_var != 6)
{
cout << "Find might not work correctly!" << endl;
return;
}
t->insert("!Test");
t->insert("T!est");
t->insert("Te!st");
t->insert("Tes!t");
t->insert("Test!");
t->insert("Test!");
if(compare_arrays(test_vector3, *t,10))
{
cout << "Hashfunction / insert might not work correctly!" << endl;
return;
}
t->insert("Inof");
if(compare_arrays(test_vector4, *t,20))
{
cout << "Resize Array might not work correctly!" << endl;
return;
}
t->remove("!");
t->remove("Te!st");
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
t-> insert("Te!st");
if(compare_arrays(test_vector5, *t,20))
{
cout << "Remove might not work correctly!" << endl;
return;
}
cout << "All test passed!";
}
int main()
{
hashtable t;
test(&t);
}
两个明显的错误:
int hashtable::hashfunction(string str)
{
int k; // <-- Uninitialized
for (int i = 0; i < table_size; i++)
k += (int)str[i]; // <-- Potential out-of-bounds access
return k % table_size;// your code
}
k
未初始化,因此您不知道 k
的最终值是什么。
第二个问题是,如果 str
的大小小于 table_size
,则您正在越界访问 str
。您的示例显示字符串 "Info"
被传递给 hash_function
,但 table_size 是 10。
另一个基本错误是 hashtable
没有正确的复制语义,因此按值传递或返回 hashtable
将导致未定义的行为。这都是因为你使用
string *T
作为成员变量。
最简单的解决方法是不使用 string* T
作为成员,而是使用 std::vector<std::string> T;