因为之前写过一篇golang 数据类型分析的文章。包含slice、map、channel等。 想写一篇用其它语言实现golang数据类型的代码,于是选中map作为实验对象。 笔者之前写过5年的c++, 虽然 c++代码大概有3年没有写过了,但还是想试一试(可能是手痒痒。虽然写go很开心,但是也没有说对c++过河拆桥,偶尔还会想起以前gdb、makefile、linux kernel codeing、对照汇编指令集的日子)。
于是一边百度c++恢复功力,一边实现本文的代码,竟然大概用了2个小时。 目前实现的功能有:创建map(支持任意数据类型)、插入数据(自动扩容)、查询数据、数据析构。数据删除(在golang中是GC,在c++中就是析构函数对对象进行释放)后面有空再实现,时间不早准备休息了。关于c++百度的过程: 首先百度一下什么是类,尼玛? 结构体的元素默认是private的,而类的元素默认的public的。 好像得用模,模,模板? 绕不过去那就百度一把。 友元类? 我x,还好可以不用。 对[]操作符的重载? 纳尼,什么是重载???这个不得不用一下,因为map需要想数组一样去索引。 用makefile还是用cmake好呢?思之再三没有想好。算了,单文件不如g++ filename吧,时间珍贵不要自己找事情,连-o都不用指定。就a.out吧。
解决哈希冲突的方法:
- 数组 + 链表的方法:适用于数据存储时间长的场景。
- 多次哈希:适用于数据存储周期很短的场景,否则,哈希的命中率会大大降低。
golang中map的实现原理:
例如:m1 map[string]string插入一条数据的过程如下:
insert “key1 name”:“乔布斯”
hashvalue = hash(“key1 name”)
slot = hashvalue的低8bit % len(m1),例如m1的槽位是4个,则slot = hashvalue % 4。假设slot = 2
hashvalue的高8bit这条数据应该插入到bmap中的第几个子槽。如果bmap已经写满8个,则读取overflow指向的下一个紧邻着的bmap去插入这条数据注意:bmap中k-v的存放方式是:key0、key1、key2、key3、key4、key5、key6、key7、val0、val1、val2、val3、val4、val5、val6、val7、pads、overflow (我认为改成叫next更为合适)
关于map扩容:
观察上面的map数据结构,最理想的k-v存储方式就是:哈希1找到槽位,哈希2找到key-value。 考虑现实情况,数据不确定时不可能申请一个很大的数组作为槽位,也不可能让一个槽位下面的链表太长影响索引速度。 因此采用折中的办法,当一个链表过长之后,会对哈希进行横向扩容(增加槽位数),这就改变了哈希1的映射关系,原有的数据需要重新插入到争取的位置。 因此。我认为应该这样去设计: 发现链表太长之后先扩容,然后新插入的数据放入新的槽位。 老的数据放在原位置不动,每次在读取(我认为应该是先用新的哈希1去读取(命中率会逐步降低),如果读取不到再用老的哈希1去读取)到老的数据时,把老数据删掉然后插入新的位置,直到老的槽位数据为0,就可以释放掉老的槽位。 或者开一个go routine逐步的进行k-v数据的迁移。
goalng数据结构分析----文章链接
目录结构:
c++代码:
// main.cc
#include <iostream>
#include <string>
#include <string.h>
#include <stdlib.h>
#include <limits>
#include <time.h> // for nanosleep
#include <sys/time.h> // for gettimeofday
using namespace std;
using std::hash;
using std::string;
using std::cout;
class S {
public:
string first_name;
string last_name;
};
template<typename S>
class myhash
{
public:
size_t operator()(const S &s) const
{
size_t h1 = hash<string>()(s.first_name);
size_t h2 = hash<string>()(s.last_name);
return h1 ^ (h2 << 1);
}
};
template<typename KEY, typename VALUE>
class bmap {
public:
char topbits;
KEY key[8];
VALUE values[8];
//pad uintptr
//
void * overflow;
};
template<typename KEY, typename VALUE>
class TangMap
{
typedef bmap<KEY, VALUE> slot_node;
public:
TangMap(int slot_len=8) {
slot_num= slot_len;
buckets = (slot_node *)malloc(sizeof(slot_node) * slot_num);
slot_node * it = (slot_node *)buckets;
for (auto i=0; i<slot_num; i++) {
it->overflow = NULL;
}
count = 0;
//cout << "----:" << slot_num << endl;
printf("%p", buckets);
}
~TangMap() {
// 删除槽下面的链
for (auto i=0; i<slot_num; i++) {
auto it = &buckets[i];
for (;;) {
if (it->overflow != NULL) {
auto node = it->overflow;
it->overflow = ((slot_node *)it->overflow)->overflow;
it = (slot_node *)it->overflow;
free(node);
} else {
cout << "this list is clean" << endl;
if (it != &buckets[i]) {
free(it);
}
break;
}
if (it == NULL) {
break;
}
}
}
//
// 删除槽
if (buckets != NULL) {
free(buckets);
}
// 清零元素个数
count = 0;
cout << "destruction function is invoked!" << endl;
}
void insert(KEY key, VALUE val) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
cout << "--->bmap can not insert into this node" << endl;
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
auto node = new(slot_node);
node->overflow = NULL;
it->overflow = node;
it->key[subslot_index] = key;
it->values[subslot_index] = val;
it->topbits |= 1 << subslot_index;
count++;
break;
}
} else {
cout << "bmap can insert into this node" << endl;
it->key[subslot_index] = key;
it->values[subslot_index] = val;
it->topbits |= 1 << subslot_index;
printf("it->topbits=%d\r\n", it->topbits);
count++;
break;
}
}
printf("it->topbits=%d\r\n", it->topbits);
}
VALUE * operator[](KEY key) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
if (key == it->key[subslot_index]) {
cout << "find key!" << endl;
return &it->values[subslot_index];
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
}
}
// 元素个数,调用 len(map) 时,直接返回此值
int count;
char flags;
int slot_num; // buckets 的对数 log_2
slot_node * buckets; // 指向内存的指针,可以看作是:[]bmap。 其大小为 2^B. 如果元素个数为0,就为 nil
void *extra;
};
inline int randf(void)
{
struct timespec tim;
tim.tv_sec=0; tim.tv_nsec=1e4;
nanosleep(&tim, 0);
struct timeval cur;
gettimeofday(&cur, 0);
srand(cur.tv_usec);
return rand()/float(RAND_MAX);
}
string randstr(char *str, const int len)
{
srand(time(NULL));
int i;
for (i = 0; i < len; ++i)
{
switch ((randf() % 3))
{
case 1:
str[i] = 'A' + randf() % 26;
break;
case 2:
str[i] = 'a' + randf() % 26;
break;
default:
str[i] = '0' + randf() % 10;
break;
}
}
str[++i] = '\0';
return str;
}
int main(int arcg, char **argv) {
const int LEN_NAME=40;
char name[LEN_NAME+1];
TangMap<std::string,string> obj;
for (auto i=0; i<40; i++) {
auto key = "key" + std::to_string(i);
auto val = "value" + std::to_string(i);
obj.insert(key, val);
auto v = obj[key];
printf("val is:%s, v is:%s\r\n\r\n", val.c_str(), v->c_str());
}
}
编译执行:
root@ubuntu:~/zz/map# g++ main.cc
root@ubuntu:~/zz/map#
root@ubuntu:~/zz/map# ./a.out
0x55ef79a2ce70slot_index is:4, subslot_index is:2
bmap can insert into this node
it->topbits=4
it->topbits=4
slot_index is:4, subslot_index is:2
find key!
val is:value0, v is:value0
slot_index is:2, subslot_index is:2
bmap can insert into this node
it->topbits=4
it->topbits=4
slot_index is:2, subslot_index is:2
find key!
val is:value1, v is:value1
slot_index is:4, subslot_index is:6
bmap can insert into this node
it->topbits=68
it->topbits=68
slot_index is:4, subslot_index is:6
find key!
val is:value2, v is:value2
slot_index is:2, subslot_index is:6
bmap can insert into this node
it->topbits=68
it->topbits=68
slot_index is:2, subslot_index is:6
find key!
val is:value3, v is:value3
slot_index is:6, subslot_index is:2
bmap can insert into this node
it->topbits=4
it->topbits=4
slot_index is:6, subslot_index is:2
find key!
val is:value4, v is:value4
slot_index is:0, subslot_index is:0
bmap can insert into this node
it->topbits=1
it->topbits=1
slot_index is:0, subslot_index is:0
find key!
val is:value5, v is:value5
slot_index is:7, subslot_index is:1
bmap can insert into this node
it->topbits=2
it->topbits=2
slot_index is:7, subslot_index is:1
find key!
val is:value6, v is:value6
slot_index is:5, subslot_index is:3
bmap can insert into this node
it->topbits=8
it->topbits=8
slot_index is:5, subslot_index is:3
find key!
val is:value7, v is:value7
slot_index is:4, subslot_index is:5
bmap can insert into this node
it->topbits=100
it->topbits=100
slot_index is:4, subslot_index is:5
find key!
val is:value8, v is:value8
slot_index is:3, subslot_index is:6
bmap can insert into this node
it->topbits=64
it->topbits=64
slot_index is:3, subslot_index is:6
find key!
val is:value9, v is:value9
slot_index is:5, subslot_index is:0
bmap can insert into this node
it->topbits=9
it->topbits=9
slot_index is:5, subslot_index is:0
find key!
val is:value10, v is:value10
slot_index is:5, subslot_index is:7
bmap can insert into this node
it->topbits=-119
it->topbits=-119
slot_index is:5, subslot_index is:7
find key!
val is:value11, v is:value11
slot_index is:0, subslot_index is:0
--->bmap can not insert into this node
it->topbits=1
slot_index is:0, subslot_index is:0
find key!
val is:value12, v is:value12
slot_index is:7, subslot_index is:2
bmap can insert into this node
it->topbits=6
it->topbits=6
slot_index is:7, subslot_index is:2
find key!
val is:value13, v is:value13
slot_index is:4, subslot_index is:4
bmap can insert into this node
it->topbits=116
it->topbits=116
slot_index is:4, subslot_index is:4
find key!
val is:value14, v is:value14
slot_index is:3, subslot_index is:1
bmap can insert into this node
it->topbits=66
it->topbits=66
slot_index is:3, subslot_index is:1
find key!
val is:value15, v is:value15
slot_index is:4, subslot_index is:1
bmap can insert into this node
it->topbits=118
it->topbits=118
slot_index is:4, subslot_index is:1
find key!
val is:value16, v is:value16
slot_index is:3, subslot_index is:5
bmap can insert into this node
it->topbits=98
it->topbits=98
slot_index is:3, subslot_index is:5
find key!
val is:value17, v is:value17
slot_index is:6, subslot_index is:1
bmap can insert into this node
it->topbits=6
it->topbits=6
slot_index is:6, subslot_index is:1
find key!
val is:value18, v is:value18
slot_index is:6, subslot_index is:6
bmap can insert into this node
it->topbits=70
it->topbits=70
slot_index is:6, subslot_index is:6
find key!
val is:value19, v is:value19
slot_index is:6, subslot_index is:1
--->bmap can not insert into this node
it->topbits=70
slot_index is:6, subslot_index is:1
find key!
val is:value20, v is:value20
slot_index is:2, subslot_index is:1
bmap can insert into this node
it->topbits=70
it->topbits=70
slot_index is:2, subslot_index is:1
find key!
val is:value21, v is:value21
slot_index is:5, subslot_index is:2
bmap can insert into this node
it->topbits=-115
it->topbits=-115
slot_index is:5, subslot_index is:2
find key!
val is:value22, v is:value22
slot_index is:5, subslot_index is:7
--->bmap can not insert into this node
it->topbits=-115
slot_index is:5, subslot_index is:7
find key!
val is:value23, v is:value23
slot_index is:0, subslot_index is:4
bmap can insert into this node
it->topbits=17
it->topbits=17
slot_index is:0, subslot_index is:4
find key!
val is:value24, v is:value24
slot_index is:5, subslot_index is:3
--->bmap can not insert into this node
bmap can insert into this node
it->topbits=8
it->topbits=8
slot_index is:5, subslot_index is:3
find key!
val is:value25, v is:value25
slot_index is:7, subslot_index is:2
--->bmap can not insert into this node
it->topbits=6
slot_index is:7, subslot_index is:2
find key!
val is:value26, v is:value26
slot_index is:6, subslot_index is:7
bmap can insert into this node
it->topbits=-58
it->topbits=-58
slot_index is:6, subslot_index is:7
find key!
val is:value27, v is:value27
slot_index is:0, subslot_index is:2
bmap can insert into this node
it->topbits=21
it->topbits=21
slot_index is:0, subslot_index is:2
find key!
val is:value28, v is:value28
slot_index is:1, subslot_index is:1
bmap can insert into this node
it->topbits=2
it->topbits=2
slot_index is:1, subslot_index is:1
find key!
val is:value29, v is:value29
slot_index is:0, subslot_index is:4
--->bmap can not insert into this node
bmap can insert into this node
it->topbits=16
it->topbits=16
slot_index is:0, subslot_index is:4
find key!
val is:value30, v is:value30
slot_index is:7, subslot_index is:1
--->bmap can not insert into this node
bmap can insert into this node
it->topbits=2
it->topbits=2
slot_index is:7, subslot_index is:1
find key!
val is:value31, v is:value31
slot_index is:6, subslot_index is:1
--->bmap can not insert into this node
bmap can insert into this node
it->topbits=2
it->topbits=2
slot_index is:6, subslot_index is:1
find key!
val is:value32, v is:value32
slot_index is:3, subslot_index is:5
--->bmap can not insert into this node
it->topbits=98
slot_index is:3, subslot_index is:5
find key!
val is:value33, v is:value33
slot_index is:7, subslot_index is:5
bmap can insert into this node
it->topbits=38
it->topbits=38
slot_index is:7, subslot_index is:5
find key!
val is:value34, v is:value34
slot_index is:2, subslot_index is:4
bmap can insert into this node
it->topbits=86
it->topbits=86
slot_index is:2, subslot_index is:4
find key!
val is:value35, v is:value35
slot_index is:7, subslot_index is:1
--->bmap can not insert into this node
--->bmap can not insert into this node
it->topbits=2
slot_index is:7, subslot_index is:1
find key!
val is:value36, v is:value36
slot_index is:5, subslot_index is:0
--->bmap can not insert into this node
bmap can insert into this node
it->topbits=9
it->topbits=9
slot_index is:5, subslot_index is:0
find key!
val is:value37, v is:value37
slot_index is:0, subslot_index is:4
--->bmap can not insert into this node
--->bmap can not insert into this node
it->topbits=16
slot_index is:0, subslot_index is:4
find key!
val is:value38, v is:value38
slot_index is:4, subslot_index is:4
--->bmap can not insert into this node
it->topbits=118
slot_index is:4, subslot_index is:4
find key!
val is:value39, v is:value39
this list is clean
this list is clean
this list is clean
this list is clean
destruction function is invoked!
root@ubuntu:~/zz/map#
cgdb调试代码:
写c++的代码绕不过去的就是用gdb调试。在添加map的update和delete方法时,一不小心竟然段错误了……。百度gdb吧。东西还得舍弃来:
用gdb的适合,需要在编译代码时指定-g选项,才能在gdb中进行调试。 类似于vs里面的release和debug模式。
g++ main.cc -g
用gdb启动可执行文件:
root@ubuntu:~/zz/map# cgdb ./a.out
经过gdb和cout调试。
最终调试完善的代码如下:
#include <iostream>
#include <string>
#include <string.h>
#include <stdlib.h>
#include <limits>
#include <time.h> // for nanosleep
#include <sys/time.h> // for gettimeofday
using namespace std;
using std::hash;
using std::string;
using std::cout;
class S {
public:
string first_name;
string last_name;
};
template<typename S>
class myhash
{
public:
size_t operator()(const S &s) const
{
size_t h1 = hash<string>()(s.first_name);
size_t h2 = hash<string>()(s.last_name);
return h1 ^ (h2 << 1);
}
};
template<typename KEY, typename VALUE>
class bmap {
public:
char topbits;
KEY key[8];
VALUE values[8];
//pad uintptr
//
void * overflow;
};
template<typename KEY, typename VALUE>
class TangMap
{
typedef bmap<KEY, VALUE> slot_node;
public:
TangMap(int slot_len=8) {
slot_num= slot_len;
buckets = (slot_node *)malloc(sizeof(slot_node) * slot_num);
slot_node * it = (slot_node *)buckets;
for (auto i=0; i<slot_num; i++) {
it->overflow = NULL;
}
count = 0;
//cout << "----:" << slot_num << endl;
printf("%p", buckets);
}
~TangMap() {
// 删除槽下面的链
for (auto i=0; i<slot_num; i++) {
auto it = &buckets[i];
for (;;) {
if (it->overflow != NULL) {
auto node = it->overflow;
it->overflow = ((slot_node *)it->overflow)->overflow;
it = (slot_node *)it->overflow;
free(node);
} else {
cout << "this list is clean" << endl;
if (it != &buckets[i]) {
free(it);
}
break;
}
if (it == NULL) {
break;
}
}
}
//
// 删除槽
if (buckets != NULL) {
free(buckets);
}
count = 0;
cout << "destruction function is invoked!" << endl;
}
slot_node * get_slot(KEY key) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
return it;
}
void insert(KEY key, VALUE val) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
cout << "--->bmap can not insert into this node" << endl;
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
auto node = new(slot_node);
node->overflow = NULL;
it->overflow = node;
it->key[subslot_index] = key;
it->values[subslot_index] = val;
it->topbits |= 1 << subslot_index;
count++;
break;
}
} else {
cout << "bmap can insert into this node" << endl;
it->key[subslot_index] = key;
it->values[subslot_index] = val;
it->topbits |= 1 << subslot_index;
printf("it->topbits=%d\r\n", it->topbits);
count++;
break;
}
}
printf("it->topbits=%d\r\n", it->topbits);
}
VALUE * update(KEY key, VALUE val) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
if (key == it->key[subslot_index]) {
cout << "find key!" << endl;
it->values[subslot_index] = val;
return &it->values[subslot_index];
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
}
}
void del(KEY key) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
if (key == it->key[subslot_index]) {
cout << "find key!" << endl;
it->topbits &= ~ (1 << subslot_index);
count--;
return;
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return ;
}
}
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return ;
}
}
}
}
VALUE * operator[](KEY key) {
hash<KEY> hash_fn;
size_t slot_index = (hash_fn(key) & 0xff) % slot_num;
size_t subslot_index = ((hash_fn(key) >> 8) & 0xff) % slot_num;
cout << "slot_index is:" << slot_index << ", subslot_index is:" << subslot_index << endl;
auto it = &buckets[slot_index]; // 获取槽位头节点
for (;;) {
if (it->topbits & (1 << subslot_index)) {
if (key == it->key[subslot_index]) {
cout << "find key!" << endl;
return &it->values[subslot_index];
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
} else {
if (it->overflow != NULL) {
it = (slot_node *)it->overflow;
continue;
} else {
return NULL;
}
}
}
}
// 元素个数,调用 len(map) 时,直接返回此值
int count;
char flags;
int slot_num; // buckets 的对数 log_2
slot_node * buckets; // 指向内存的指针,可以看作是:[]bmap。 其大小为 2^B. 如果元素个数为0,就为 nil
void *extra;
};
inline int randf(void)
{
struct timespec tim;
tim.tv_sec=0; tim.tv_nsec=1e4;
nanosleep(&tim, 0);
struct timeval cur;
gettimeofday(&cur, 0);
srand(cur.tv_usec);
return rand()/float(RAND_MAX);
}
string randstr(char *str, const int len)
{
srand(time(NULL));
int i;
for (i = 0; i < len; ++i)
{
switch ((randf() % 3))
{
case 1:
str[i] = 'A' + randf() % 26;
break;
case 2:
str[i] = 'a' + randf() % 26;
break;
default:
str[i] = '0' + randf() % 10;
break;
}
}
str[++i] = '\0';
return str;
}
int main(int arcg, char **argv) {
const int LEN_NAME=40;
char name[LEN_NAME+1];
TangMap<std::string,string> obj;
for (auto i=0; i<40; i++) {
auto key = "key" + std::to_string(i);
auto val = "value" + std::to_string(i);
obj.insert(key, val);
auto v = obj[key];
printf("val is:%s, v is:%s\r\n\r\n", val.c_str(), v->c_str());
obj.update(key, val + "new value");
auto vvv = obj[key];
printf("---val is:%s, vvv is:%s\r\n\r\n", val.c_str(), vvv->c_str());
obj.del(key);
auto vvvv = obj[key];
if (vvvv == NULL) {
cout << "key is deleted!" << endl;
}
}
}