redis 5.0.7 源码阅读——字典dict

redis中字典相关的文件为：dict.h与dict.c

与其说是一个字典，道不如说是一个哈希表。

一、数据结构

dictEntry

 typedef struct dictEntry {

     void *key;

     union {

         void *val;

         uint64_t u64;

         int64_t s64;

         double d;

     } v;

     struct dictEntry *next;

 } dictEntry;

dictEntry是一个kv对的单向链表，其中v是一个联合体，支持数字，或者是指向一块内存的指针。

 /*

 +---------------+

 |void *key      |

 +---------------+

 |union{...} v   |

 +---------------+

 |dictEntry *next|---+

 +---------------+   |

                     |

 +---------------+ <-+

 |void *key      |

 +---------------+

 |union{...} v   |

 +---------------+

 |dictEntry *next|

 +---------------+

 */

为了节约篇幅，后续用以下结构表示：

 /*

 +---+  +---+

 |K|V|->|K|V|->NULL

 +---+  +---+

 */

dictht

 typedef struct dictht {

     dictEntry **table;

     unsigned long size;

     /*

         这样写可能更容易理解

         const unsigned long size = 4;

         dictEntry *table[size];

     */

     unsigned long sizemask;

     //sizemask，始终为size-1

     unsigned long used;

     //当前总dictEntry数量

 } dictht;

dictht是一个hash table，整体结构大致为：

 /*

 +----------------------+   +---> +-----------------+  +---+

 |dictEntry **table     |---+     |dictEntry *bucket|->|K|V|->NULL

 +----------------------+         +-----------------+  +---+

 |unsigned long size = 4|         |dictEntry *bucket|->NULL

 +----------------------+         +-----------------+

 |unsigned long sizemask|         |dictEntry *bucket|->NULL

 +----------------------+         +-----------------+

 |unsigned long used    |         |dictEntry *bucket|->NULL

 +----------------------+         +-----------------+

 */

其中，table指向大小为sizeof(dictEntry*) * size的一片内存空间，每个dictEntry*可以视为一个bucket，每个bucket下挂着一个dictEntry单向链表。

size的值始终为2的位数，而sizemask的值始终为size-1，其作用是决定kv对要挂在哪个bucket上。

举个例子，size=4时，sizemask=3，其二进制为 0011，若通过hash函数计算出来key对应的hash值hash_value为5，二进制为0101，则通过位运算 sizemask & hash_value = 0011 & 0101 = 0001，十进制为1，则将会挂在idx = 1的bucket上。

dictType

 typedef struct dictType {

     uint64_t (*hashFunction)(const void *key);

     void *(*keyDup)(void *privdata, const void *key);

     void *(*valDup)(void *privdata, const void *obj);

     int (*keyCompare)(void *privdata, const void *key1, const void *key2);

     void (*keyDestructor)(void *privdata, void *key);

     void (*valDestructor)(void *privdata, void *obj);

 } dictType;

dictType用于自定义一些操作的方法，如拷贝key、拷贝value、销毁key、销毁value，比较key，以及hash函数。

dict

 typedef struct dict {

     dictType *type;

     void *privdata;

     dictht ht[];

     long rehashidx; /* rehashing not in progress if rehashidx == -1 */

     unsigned long iterators; /* number of iterators currently running */

 } dict;

之前提到的dictType与dictht都是dict的成员变量。除此之外，还有privdata，是在创建dict的时候调用者传入，用于特定操作时回传给函数的。如：

 #define dictFreeVal(d, entry) \

     if ((d)->type->valDestructor) \

         (d)->type->valDestructor((d)->privdata, (entry)->v.val)

 #define dictSetVal(d, entry, _val_) do { \

     if ((d)->type->valDup) \

         (entry)->v.val = (d)->type->valDup((d)->privdata, _val_); \

     else \

         (entry)->v.val = (_val_); \

 } while()

 #define dictSetSignedIntegerVal(entry, _val_) \

     do { (entry)->v.s64 = _val_; } while()

 #define dictSetUnsignedIntegerVal(entry, _val_) \

     do { (entry)->v.u64 = _val_; } while()

 #define dictSetDoubleVal(entry, _val_) \

     do { (entry)->v.d = _val_; } while()

 #define dictFreeKey(d, entry) \

     if ((d)->type->keyDestructor) \

         (d)->type->keyDestructor((d)->privdata, (entry)->key)

 #define dictSetKey(d, entry, _key_) do { \

     if ((d)->type->keyDup) \

         (entry)->key = (d)->type->keyDup((d)->privdata, _key_); \

     else \

         (entry)->key = (_key_); \

 } while()

 #define dictCompareKeys(d, key1, key2) \

     (((d)->type->keyCompare) ? \

         (d)->type->keyCompare((d)->privdata, key1, key2) : \

         (key1) == (key2))

rehashidx，是与ht[2]配合实现渐进式rehash操作的。若使用一步到位的方式，当key的数量非常大的时候，rehashing期间，是会卡死所有操作的。

iterators，是用于记录当前使用的安全迭代器数量，与rehashing操作有关。

整体结构如下：

 /*

 +---------+    /+-----------+   +-->+----------+  +---+

 |dictType*|   / |dictEntry**|---+   |dictEntry*|->|K|V|->NULL

 +---------+  /  +-----------+       +----------+  +---+

 |privdata | /   |size       |       |dictEntry*|->NULL

 +---------+/    +-----------+       +----------+

 |ht[2]    |     |sizemask   |       |dictEntry*|->NULL

 +---------+\    +-----------+       +----------+

 |rehashidx| \   |used       |       |dictEntry*|->NULL

 +---------+  \  +-----------+       +----------+

 |iterators|   \

 +---------+    \+-----------+

                 |dictEntry**|-->NULL

                 +-----------+

                 |size       |

                 +-----------+

                 |sizemask   |

                 +-----------+

                 |used       |

                 +-----------+

 */

二、创建

 static void _dictReset(dictht *ht)

 {

     ht->table = NULL;

     ht->size = ;

     ht->sizemask = ;

     ht->used = ;

 }

 int _dictInit(dict *d, dictType *type,

         void *privDataPtr)

 {

     _dictReset(&d->ht[]);

     _dictReset(&d->ht[]);

     d->type = type;

     d->privdata = privDataPtr;

     d->rehashidx = -;

     d->iterators = ;

     return DICT_OK;

 }

 dict *dictCreate(dictType *type,

         void *privDataPtr)

 {

     dict *d = zmalloc(sizeof(*d));

     _dictInit(d,type,privDataPtr);

     return d;

 }

可以调用dictCreate创建一个空的dict，它会分配好dict的空间，并初始化所有成员变量。在这里把privdata传入并保存。搜了一下整个redis源码的dictCreate调用，看到传入的值全为NULL。目前的理解暂时不清楚这个变量是什么时候赋值的。初始化后的dict结构如下：

 /*

 +------------+    /+-----------+

 |dictType*   |   / |dictEntry**|-->NULL

 +------------+  /  +-----------+

 |privdata    | /   |size=0     |

 +------------+/    +-----------+

 |ht[2]       |     |sizemask=0 |

 +------------+\    +-----------+

 |rehashidx=-1| \   |used=0     |

 +------------+  \  +-----------+

 |iterators=0 |   \

 +------------+    \+-----------+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

刚创建好的dict是存不了任何数据的，其两个hash table的size都为0。这里先说明一下resize操作：

 #define DICT_HT_INITIAL_SIZE     4

 static unsigned long _dictNextPower(unsigned long size)

 {

     unsigned long i = DICT_HT_INITIAL_SIZE;

     if (size >= LONG_MAX) return LONG_MAX + 1LU;

     while() {

         if (i >= size)

             return i;

         i *= ;

     }

 }

 /* Expand or create the hash table */

 int dictExpand(dict *d, unsigned long size)

 {

     /* the size is invalid if it is smaller than the number of

      * elements already inside the hash table */

     if (dictIsRehashing(d) || d->ht[].used > size)

         return DICT_ERR;

     dictht n; /* the new hash table */

     unsigned long realsize = _dictNextPower(size);

     /* Rehashing to the same table size is not useful. */

     if (realsize == d->ht[].size) return DICT_ERR;

     /* Allocate the new hash table and initialize all pointers to NULL */

     n.size = realsize;

     n.sizemask = realsize-;

     n.table = zcalloc(realsize*sizeof(dictEntry*));

     n.used = ;

     /* Is this the first initialization? If so it's not really a rehashing

      * we just set the first hash table so that it can accept keys. */

     if (d->ht[].table == NULL) {

         d->ht[] = n;

         return DICT_OK;

     }

     /* Prepare a second hash table for incremental rehashing */

     d->ht[] = n;

     d->rehashidx = ;

     return DICT_OK;

 }

 int dictResize(dict *d)

 {

     int minimal;

     if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

     minimal = d->ht[].used;

     if (minimal < DICT_HT_INITIAL_SIZE)

         minimal = DICT_HT_INITIAL_SIZE;

     return dictExpand(d, minimal);

 }

_dictNextPower用于获取当前要分配给hash table的size，得到的值一定是2的倍数，初始值为4。

dictExpand，从源码注释上看，它是为了扩容hash table，或者创建一个。它不允许与rehashing操作同时进行，也不能强制缩容。在使用_dictNextPower得到需要的size之后，它先是使用一个临时变量n去分配空间，然后进行判断，若ht[0].table的值为NULL，则认为是刚create出来的dict，直接把n赋值给ht[0]，否则给ht[1]，并开始rehashing操作。

dictResize操作就不用多说了。

三、rehashing操作

若有这样一个dict，假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7，使用sizemask计算出来的idx分别为0、1、2、3

 /*

                                                       +----+

                                                    +->|K1|V|->NULL

 +------------+    /+-----------+  +->+----------+ /   +----+

 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +----+

 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL

 |privdata    | /   |size=4     |     |dictEntry*|/    +----+

 +------------+/    +-----------+     +----------+

 |ht[2]       |     |sizemask=3 |     |dictEntry*|\    +----+

 +------------+\    +-----------+     +----------+ +-->|K3|V|->NULL

 |rehashidx=-1| \   |used=4     |     |dictEntry*|\    +----+

 +------------+  \  +-----------+     +----------+ \   +----+

 |iterators=0 |   \                                 +->|K4|V|->NULL

 +------------+    \+-----------+                      +----+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

 static int _dictExpandIfNeeded(dict *d)

 {

     /* Incremental rehashing already in progress. Return. */

     if (dictIsRehashing(d)) return DICT_OK;

     /* If the hash table is empty expand it to the initial size. */

     if (d->ht[].size == ) return dictExpand(d, DICT_HT_INITIAL_SIZE);

     /* If we reached the 1:1 ratio, and we are allowed to resize the hash

      * table (global setting) or we should avoid it but the ratio between

      * elements/buckets is over the "safe" threshold, we resize doubling

      * the number of buckets. */

     if (d->ht[].used >= d->ht[].size &&

         (dict_can_resize ||

          d->ht[].used/d->ht[].size > dict_force_resize_ratio))

     {

         return dictExpand(d, d->ht[].used*);

     }

     return DICT_OK;

 }

通过函数_dictExpandIfNeeded，可知当used >= size且dict_can_resize == TRUE的时候，需要调用dictExpand进入rehashing状态。dict_can_resize默认为1

 static int dict_can_resize = ;

 static unsigned int dict_force_resize_ratio = ;

需要的size为当前used * 2，即为8。调用dictExpand之后的结构：

 /*

                                                        +----+

                                                     +->|K1|V|->NULL

                                    +->+----------+ /   +----+

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=0 | \   |used=4     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+

                     |dictEntry**|--+  |dictEntry*|->NULL

                     +-----------+     +----------+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=0     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

根据rehashing操作

 int dictRehash(dict *d, int n) {

     int empty_visits = n*; /* Max number of empty buckets to visit. */

     if (!dictIsRehashing(d)) return ;

     while(n-- && d->ht[].used != ) {

         dictEntry *de, *nextde;

         /* Note that rehashidx can't overflow as we are sure there are more

          * elements because ht[0].used != 0 */

         assert(d->ht[].size > (unsigned long)d->rehashidx);

         while(d->ht[].table[d->rehashidx] == NULL) {

             d->rehashidx++;

             if (--empty_visits == ) return ;

         }

         de = d->ht[].table[d->rehashidx];

         /* Move all the keys in this bucket from the old to the new hash HT */

         while(de) {

             uint64_t h;

             nextde = de->next;

             /* Get the index in the new hash table */

             h = dictHashKey(d, de->key) & d->ht[].sizemask;

             de->next = d->ht[].table[h];

             d->ht[].table[h] = de;

             d->ht[].used--;

             d->ht[].used++;

             de = nextde;

         }

         d->ht[].table[d->rehashidx] = NULL;

         d->rehashidx++;

     }

     /* Check if we already rehashed the whole table... */

     if (d->ht[].used == ) {

         zfree(d->ht[].table);

         d->ht[] = d->ht[];

         _dictReset(&d->ht[]);

         d->rehashidx = -;

         return ;

     }

     /* More to rehash... */

     return ;

 }

rehashing操作将会把ht[0]里，rehashidx的值对应的bucket下的所有dictEntry，移至ht[1]，之后对rehashidx进行自增处理。当ht[0]->used为0时，认为ht[0]的所有dictEntry已经移至ht[1]，此时return 0，否则 return 1，告诉调用者，还需要继续进行rehashing操作。同时，rehashing时允许最多跳过10n的空bucket，就要退出流程。假设传入的n=1，即只进行一次rehashing操作，转换至完成之后的结构：

 /*

                                                     +->NULL

                                    +->+----------+ /

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=1 | \   |used=3     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+   +----+

                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL

                     +-----------+     +----------+   +----+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=1     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

所有节点移完时：

 /*

                                    +->+----------+

                                    |  |dictEntry*|->NULL

                                    |  +----------+

                                    |  |dictEntry*|->NULL

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|->NULL

  +------------+  /  +-----------+     +----------+

  |privdata    | /   |size=4     |     |dictEntry*|->NULL

  +------------+/    +-----------+     +----------+

  |ht[2]       |     |sizemask=3 |

  +------------+\    +-----------+

  |rehashidx=4 | \   |used=0     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+   +----+

                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->NULL

                     +-----------+     +----------+   +----+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+   +----+

                     |sizemask=7 |     |dictEntry*|-->|K3|V|->NULL

                     +-----------+     +----------+   +----+

                     |used=4     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+   +----+

                                       |dictEntry*|-->|K2|V|->NULL

                                       +----------+   +----+

                                       |dictEntry*|->NULL

                                       +----------+   +----+

                                       |dictEntry*|-->|K4|V|->NULL

                                       +----------+   +----+

 */

此时ht[0]->used为0，释放原ht[0]的hash table，把ht[1]赋值给ht[0]，并设置ht[1] = NULL，最后重置rehashidx=-1，rehashing操作结束

 /*

  +------------+    /+-----------+   +-->+----------+   +----+

  |dictType*   |   / |dictEntry**|---+   |dictEntry*|-->|K1|V|->NULL

  +------------+  /  +-----------+       +----------+   +----+

  |privdata    | /   |size=8     |       |dictEntry*|->NULL

  +------------+/    +-----------+       +----------+   +----+

  |ht[2]       |     |sizemask=7 |       |dictEntry*|-->|K3|V|->NULL

  +------------+\    +-----------+       +----------+   +----+

  |rehashidx=-1| \   |used=4     |       |dictEntry*|->NULL

  +------------+  \  +-----------+       +----------+

  |iterators=0 |   \                     |dictEntry*|->NULL

  +------------+    \+-----------+       +----------+   +----+

                     |dictEntry**|->NULL |dictEntry*|-->|K2|V|->NULL

                     +-----------+       +----------+   +----+

                     |size=0     |       |dictEntry*|->NULL

                     +-----------+       +----------+   +----+

                     |sizemask=0 |       |dictEntry*|-->|K4|V|->NULL

                     +-----------+       +----------+   +----+

                     |used=0     |

                     +-----------+

 */

rehashing操作的触发共有两种方式

1、定时操作

 long long timeInMilliseconds(void) {

     struct timeval tv;

     gettimeofday(&tv,NULL);

     return (((long long)tv.tv_sec)*)+(tv.tv_usec/);

 }

 /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */

 int dictRehashMilliseconds(dict *d, int ms) {

     long long start = timeInMilliseconds();

     int rehashes = ;

     while(dictRehash(d,)) {

         rehashes += ;

         if (timeInMilliseconds()-start > ms) break;

     }

     return rehashes;

 }

外部传入一个毫秒时间，在这时间内循环执行rehashing，每次执行100次。

2、操作时触发

 static void _dictRehashStep(dict *d) {

     if (d->iterators == ) dictRehash(d,);

 }

在插入、删除、查找等操作时，顺带执行一次rehashing操作。值得注意的是，如果存在安全的迭代器，即d->iterators != 0，则不会进行rehashing操作

三、插入

获取可插入新节点的bucket idx的方法：

 static long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing)

 {

     unsigned long idx, table;

     dictEntry *he;

     if (existing) *existing = NULL;

     /* Expand the hash table if needed */

     if (_dictExpandIfNeeded(d) == DICT_ERR)

         return -;

     for (table = ; table <= ; table++) {

         idx = hash & d->ht[table].sizemask;

         /* Search if this slot does not already contain the given key */

         he = d->ht[table].table[idx];

         while(he) {

             if (key==he->key || dictCompareKeys(d, key, he->key)) {

                 if (existing) *existing = he;

                 return -;

             }

             he = he->next;

         }

         if (!dictIsRehashing(d)) break;

     }

     return idx;

 }

此方法在进行查找idx之前，先进行一次判断，是否需要rehashing操作。而后进行查找。idx的值就是通过hash函数计算出来的hash_value与sizemask做位运算的结果，然后遍历此idx对应的bucket，若已存在相同的key，则认为不可插入，并把对应的dictEntry用传入的二级指针的方式传出，供调用者使用。若不存在，则需要判断是否正在进行rehashing操作。若在，则会对ht[1]做一次相同的操作。最终可以得到一个idx值，或传出一个dictEntry。

由于rehashing期间，将会把ht[0]的所有dictEntry依次转移至ht[1]，为了防止新插入的dictEntry落到ht[0]已完成rehashing操作的bucket上，在rehashing期间，返回的可插入的idx一定是属于ht[1]的。

插入方法：

 dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)

 {

     long index;

     dictEntry *entry;

     dictht *ht;

     if (dictIsRehashing(d)) _dictRehashStep(d);

     /* Get the index of the new element, or -1 if

      * the element already exists. */

     if ((index = _dictKeyIndex(d, key, dictHashKey(d,key), existing)) == -)

         return NULL;

     /* Allocate the memory and store the new entry.

      * Insert the element in top, with the assumption that in a database

      * system it is more likely that recently added entries are accessed

      * more frequently. */

     ht = dictIsRehashing(d) ? &d->ht[] : &d->ht[];

     entry = zmalloc(sizeof(*entry));

     entry->next = ht->table[index];

     ht->table[index] = entry;

     ht->used++;

     /* Set the hash entry fields. */

     dictSetKey(d, entry, key);

     return entry;

 }

若不存在相同key，则插入，否则，传出dictEntry的指针。插入时，由于没有记录每个dictEntry链表的尾指针，所以使用头插法，可以节约插入时的时间消耗。

dictAddRaw做为最终插入的方法，被多个方法所调用：

 //若不存在，则插入，否则报错

 int dictAdd(dict *d, void *key, void *val)

 {

     dictEntry *entry = dictAddRaw(d,key,NULL);

     if (!entry) return DICT_ERR;

     dictSetVal(d, entry, val);

     return DICT_OK;

 }

 //若存在，则替换value，否则插入

 int dictReplace(dict *d, void *key, void *val)

 {

     dictEntry *entry, *existing, auxentry;

     entry = dictAddRaw(d,key,&existing);

     if (entry) {

         dictSetVal(d, entry, val);

         return ;

     }

     auxentry = *existing;

     dictSetVal(d, existing, val);

     dictFreeVal(d, &auxentry);

     return ;

 }

 //若存在，则返回对应dictEntry，否则插入后返回新的dictEntry

 dictEntry *dictAddOrFind(dict *d, void *key) {

     dictEntry *entry, *existing;

     entry = dictAddRaw(d,key,&existing);

     return entry ? entry : existing;

 }

对于一个刚刚create的dict：

 /*

 +------------+    /+-----------+

 |dictType*   |   / |dictEntry**|-->NULL

 +------------+  /  +-----------+

 |privdata    | /   |size=0     |

 +------------+/    +-----------+

 |ht[2]       |     |sizemask=0 |

 +------------+\    +-----------+

 |rehashidx=-1| \   |used=0     |

 +------------+  \  +-----------+

 |iterators=0 |   \

 +------------+    \+-----------+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

假设K1、K2、K3、K4计算出来的hash值分别为0、5、2、7，使用sizemask计算出来的idx分别为0、1、2、3

现调用dictAdd方法进行插入

1、插入K1

执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded：

 /*

                                                    +-->NULL

 +------------+    /+-----------+  +->+----------+ /

 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/

 +------------+  /  +-----------+     +----------+ +--->NULL

 |privdata    | /   |size=4     |     |dictEntry*|/

 +------------+/    +-----------+     +----------+

 |ht[2]       |     |sizemask=3 |     |dictEntry*|\

 +------------+\    +-----------+     +----------+ +--->NULL

 |rehashidx=-1| \   |used=0     |     |dictEntry*|\

 +------------+  \  +-----------+     +----------+ \

 |iterators=0 |   \                                 +-->NULL

 +------------+    \+-----------+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

同时得到其在ht[0]的idx = 0，且不在rehashing操作中，于是直接插入

 /*

                                                       +----+

                                                    +->|K1|V|->NULL

 +------------+    /+-----------+  +->+----------+ /   +----+

 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/

 +------------+  /  +-----------+     +----------+ +--->NULL

 |privdata    | /   |size=4     |     |dictEntry*|/

 +------------+/    +-----------+     +----------+

 |ht[2]       |     |sizemask=3 |     |dictEntry*|\

 +------------+\    +-----------+     +----------+ +--->NULL

 |rehashidx=-1| \   |used=1     |     |dictEntry*|\

 +------------+  \  +-----------+     +----------+ \

 |iterators=0 |   \                                 +-->NULL

 +------------+    \+-----------+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

2、插入K2、K3、K4后：

 /*

                                                       +----+

                                                    +->|K1|V|->NULL

 +------------+    /+-----------+  +->+----------+ /   +----+

 |dictType*   |   / |dictEntry**|--+  |dictEntry*|/    +-----

 +------------+  /  +-----------+     +----------+ +-->|K2|V|->NULL

 |privdata    | /   |size=4     |     |dictEntry*|/    +----+

 +------------+/    +-----------+     +----------+

 |ht[2]       |     |sizemask=3 |     |dictEntry*|\    +----+

 +------------+\    +-----------+     +----------+ +-->|K3|V|->NULL

 |rehashidx=-1| \   |used=4     |     |dictEntry*|\    +----+

 +------------+  \  +-----------+     +----------+ \   +----+

 |iterators=0 |   \                                 +->|K4|V|->NULL

 +------------+    \+-----------+                      +----+

                    |dictEntry**|-->NULL

                    +-----------+

                    |size=0     |

                    +-----------+

                    |sizemask=0 |

                    +-----------+

                    |used=0     |

                    +-----------+

 */

3、此时若有一个K5，计算出来的hash值为8，则：

i.因此刻不在rehashing操作，所以不用做处理

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded：

 /*

                                                        +----+

                                                     +->|K1|V|->NULL

                                    +->+----------+ /   +----+

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=0 | \   |used=4     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+

                     |dictEntry**|--+  |dictEntry*|->NULL

                     +-----------+     +----------+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=0     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

同时得到其在ht[1]的idx=0

iii.插入

 /*

                                                        +----+

                                                     +->|K1|V|->NULL

                                    +->+----------+ /   +----+

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=0 | \   |used=4     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+   +----+

                     |dictEntry**|--+  |dictEntry*|-->|K5|V|->NULL

                     +-----------+     +----------+   +----+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=1     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

4、此时若有一个K6，计算出来的hash值为16，则：

i.此时已处理rehashing操作，执行一步：

 /*

                                                     +-->NULL

                                    +->+----------+ /

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=1 | \   |used=3     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+   +----+  +----+

                     |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL

                     +-----------+     +----------+   +----+  +----+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=2     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

ii.执行完dictAddRaw中的_dictKeyIndex里的_dictExpandIfNeeded，因已在进行rehashing，所以不做任何处理，只返回其在ht[1]的idx 0

iii.头插法将K6插入

 /*

                                                     +-->NULL

                                    +->+----------+ /

                                    |  |dictEntry*|/    +----+

                                    |  +----------+ +-->|K2|V|->NULL

                                    |  |dictEntry*|/    +----+

  +------------+    /+-----------+  |  +----------+

  |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

  +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

  |privdata    | /   |size=4     |     |dictEntry*|\    +----+

  +------------+/    +-----------+     +----------+ \   +----+

  |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

  +------------+\    +-----------+                      +----+

  |rehashidx=1 | \   |used=3     |

  +------------+  \  +-----------+

  |iterators=0 |   \

  +------------+    \+-----------+  +->+----------+   +----+  +----+  +----+

                     |dictEntry**|--+  |dictEntry*|-->|K6|V|->|K1|V|->|K5|V|->NULL

                     +-----------+     +----------+   +----+  +----+  +----+

                     |size=8     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |sizemask=7 |     |dictEntry*|->NULL

                     +-----------+     +----------+

                     |used=3     |     |dictEntry*|->NULL

                     +-----------+     +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

                                       |dictEntry*|->NULL

                                       +----------+

 */

以上为正常插入时的情况，key已存在，或是调用另外两个方法的情况与之大同小异，不再做过多叙述。

四、查找

 dictEntry *dictFind(dict *d, const void *key)

 {

     dictEntry *he;

     uint64_t h, idx, table;

     if (d->ht[].used + d->ht[].used == ) return NULL; /* dict is empty */

     if (dictIsRehashing(d)) _dictRehashStep(d);

     h = dictHashKey(d, key);

     for (table = ; table <= ; table++) {

         idx = h & d->ht[table].sizemask;

         he = d->ht[table].table[idx];

         while(he) {

             if (key==he->key || dictCompareKeys(d, key, he->key))

                 return he;

             he = he->next;

         }

         if (!dictIsRehashing(d)) return NULL;

     }

     return NULL;

 }

同样，若在rehashing期间，则执行一次。首先在ht[0]里查找，计算出hash值对应ht[0]的idx，取得其bucket，然后遍历之，找到与指定key相同的dictEntry。若ht[0]中找不到指定的key，且正在进行rehashing操作，则去ht[1]以相同方式也查找一次。

redis额外提供一个，根据key只获取其value的方法：

 void *dictFetchValue(dict *d, const void *key) {

     dictEntry *he;

     he = dictFind(d,key);

     return he ? dictGetVal(he) : NULL;

 }

key不存在时返回NULL

五、删除

删除方法：

 static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {

     uint64_t h, idx;

     dictEntry *he, *prevHe;

     int table;

     if (d->ht[].used ==  && d->ht[].used == ) return NULL;

     if (dictIsRehashing(d)) _dictRehashStep(d);

     h = dictHashKey(d, key);

     for (table = ; table <= ; table++) {

         idx = h & d->ht[table].sizemask;

         he = d->ht[table].table[idx];

         prevHe = NULL;

         while(he) {

             if (key==he->key || dictCompareKeys(d, key, he->key)) {

                 /* Unlink the element from the list */

                 if (prevHe)

                     prevHe->next = he->next;

                 else

                     d->ht[table].table[idx] = he->next;

                 if (!nofree) {

                     dictFreeKey(d, he);

                     dictFreeVal(d, he);

                     zfree(he);

                 }

                 d->ht[table].used--;

                 return he;

             }

             prevHe = he;

             he = he->next;

         }

         if (!dictIsRehashing(d)) break;

     }

     return NULL; /* not found */

 }

查找方式与dictFind相同。找到之后，由调用者指定是否要销毁此dictEntry，若不销毁，则要把对应指针传出来，给外部使用。

此方法被两个接口所调用：

 int dictDelete(dict *ht, const void *key) {

     return dictGenericDelete(ht,key,) ? DICT_OK : DICT_ERR;

 }

 dictEntry *dictUnlink(dict *ht, const void *key) {

     return dictGenericDelete(ht,key,);

 }

dictDelete就不用多说了，直接删除对应dictEntry。关于为什么需要dictUnlink，源码的注释上写道，如果有某种操作，需要先查找指定key对应的dictEntry，然后对其做点操作，接着就直接删除，在没有dictUnlink的时候，需要这样：

 entry = dictFind(...);

 // Do something with entry

 dictDelete(dictionary,entry);

实际需要查找两次。而在有dictUnlink的情况下：

 entry = dictUnlink(dictionary,entry);

 // Do something with entry

 dictFreeUnlinkedEntry(entry);

只需要一次查找，配合专门的删除操作，即可。

 void dictFreeUnlinkedEntry(dict *d, dictEntry *he) {

     if (he == NULL) return;

     dictFreeKey(d, he);

     dictFreeVal(d, he);

     zfree(he);

 }

六、销毁

清空一个hash table的方法

 int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {

     unsigned long i;

     /* Free all the elements */

     for (i = ; i < ht->size && ht->used > ; i++) {

         dictEntry *he, *nextHe;

         if (callback && (i & ) == ) callback(d->privdata);

         if ((he = ht->table[i]) == NULL) continue;

         while(he) {

             nextHe = he->next;

             dictFreeKey(d, he);

             dictFreeVal(d, he);

             zfree(he);

             ht->used--;

             he = nextHe;

         }

     }

     /* Free the table and the allocated cache structure */

     zfree(ht->table);

     /* Re-initialize the table */

     _dictReset(ht);

     return DICT_OK; /* never fails */

 }

两层循环，分别遍历所有bucket与单bucket里所有dictEntry进行释放。关于这里的 (i&65535) == 0的判断，_dictClear方法仅在相同文件的方法dictEmpty与dictRelease调用

 void dictRelease(dict *d)

 {

     _dictClear(d,&d->ht[],NULL);

     _dictClear(d,&d->ht[],NULL);

     zfree(d);

 }

 void dictEmpty(dict *d, void(callback)(void*)) {

     _dictClear(d,&d->ht[],callback);

     _dictClear(d,&d->ht[],callback);

     d->rehashidx = -;

     d->iterators = ;

 }

dictRelease不用多说，传入的callback为NULL。而dictEmpty，搜索redis源码所有文件的调用，

ccx@ccx:~/Desktop/redis-5.0./redis-5.0./src$ grep dictEmpty * -r

blocked.c:    dictEmpty(c->bpop.keys,NULL);

db.c:            dictEmpty(server.db[j].dict,callback);

db.c:            dictEmpty(server.db[j].expires,callback);

dict.c:void dictEmpty(dict *d, void(callback)(void*)) {

dict.h:void dictEmpty(dict *d, void(callback)(void*));

replication.c:    dictEmpty(server.repl_scriptcache_dict,NULL);

sentinel.c:    dictEmpty(server.commands,NULL);

仅db.c里传了callback进来，对应的方法为

 long long emptyDb(int dbnum, int flags, void(callback)(void*));

继续搜索：

ccx@ccx:~/Desktop/redis-5.0./redis-5.0./src$ grep emptyDb * -r

cluster.c:        emptyDb(-,EMPTYDB_NO_FLAGS,NULL);

db.c:long long emptyDb(int dbnum, int flags, void(callback)(void*)) {

db.c:            emptyDbAsync(&server.db[j]);

db.c:/* Return the set of flags to use for the emptyDb() call for FLUSHALL*/

db.c:    server.dirty += emptyDb(c->db->id,flags,NULL);

db.c:    server.dirty += emptyDb(-,flags,NULL);

debug.c:        emptyDb(-,EMPTYDB_NO_FLAGS,NULL);

debug.c:        emptyDb(-,EMPTYDB_NO_FLAGS,NULL);

lazyfree.c:void emptyDbAsync(redisDb *db) {

replication.c: * data with emptyDb(), and while we load the new data received as an

replication.c:/* Callback used by emptyDb() while flushing away old data to load*/

replication.c:        emptyDb(

server.h:long long emptyDb(int dbnum, int flags, void(callback)(void*));

server.h:void emptyDbAsync(redisDb *db);

真正调用的地方传入的也是NULL，并不知道是拿来做什么用的...

ps：用grep查找只是方便cv过来....

七、迭代器操作

数据结构：

 typedef struct dictIterator {

     dict *d;

     long index;

     int table, safe;

     dictEntry *entry, *nextEntry;

     /* unsafe iterator fingerprint for misuse detection. */

     long long fingerprint;

 } dictIterator;

根据源码注释可知，如果是个安全的迭代器，即safe == 1，则在迭代中可以调用dictAdd、dictFind等方法，否则只能调用dictNext。

index表示，ht[table]对应的bucket的idx。

获取迭代器：

 dictIterator *dictGetIterator(dict *d)

 {

     dictIterator *iter = zmalloc(sizeof(*iter));

     iter->d = d;

     iter->table = ;

     iter->index = -;

     iter->safe = ;

     iter->entry = NULL;

     iter->nextEntry = NULL;

     return iter;

 }

 dictIterator *dictGetSafeIterator(dict *d) {

     dictIterator *i = dictGetIterator(d);

     i->safe = ;

     return i;

 }

刚获取的迭代器并不指向具体哪个dictEntry

next操作：

 dictEntry *dictNext(dictIterator *iter)

 {

     while () {

         if (iter->entry == NULL) {

             dictht *ht = &iter->d->ht[iter->table];

             if (iter->index == - && iter->table == ) {

                 if (iter->safe)

                     iter->d->iterators++;

                 else

                     iter->fingerprint = dictFingerprint(iter->d);

             }

             iter->index++;

             if (iter->index >= (long) ht->size) {

                 if (dictIsRehashing(iter->d) && iter->table == ) {

                     iter->table++;

                     iter->index = ;

                     ht = &iter->d->ht[];

                 } else {

                     break;

                 }

             }

             iter->entry = ht->table[iter->index];

         } else {

             iter->entry = iter->nextEntry;

         }

         if (iter->entry) {

             /* We need to save the 'next' here, the iterator user

              * may delete the entry we are returning. */

             iter->nextEntry = iter->entry->next;

             return iter->entry;

         }

     }

     return NULL;

 }

对于一个新的迭代器，首次调用时，会根据是否安全，做不同操作。安全的迭代器会给dict里的计数器+1，不安全的将会记录本字典的指纹。之后会遍历ht[0]，取到第一个非NULL的dictEntry。当ht[0]遍历完且取不到非NULL的dictEntry时，如果正在进行rehashing操作，则会去ht[1]里找。

如：

 /*

      +-------------------------+

 +----|dict *                   |

 |    +-------------------------+

 |    |long index               |

 |    +-------------------------+

 |    |int table                |

 |    +-------------------------+

 |    |int safe                 |

 |    +-------------------------+

 |    |dictEntry *entry         |->NULL

 |    +-------------------------+

 |    |dictEntry *entrynextEntry|->NULL

 |    +-------------------------+

 |    |long long fingerprint    |

 |    +-------------------------+

 |

 |

 |

 |                                                       +-->NULL

 |                                      +->+----------+ /

 |                                      |  |dictEntry*|/    +----+

 |                                      |  +----------+ +-->|K2|V|->NULL

 |                                      |  |dictEntry*|/    +----+

 +--->+------------+    /+-----------+  |  +----------+

      |dictType*   |   / |dictEntry**|--+  |dictEntry*|\    +----+

      +------------+  /  +-----------+     +----------+ +-->|K3|V|->NULL

      |privdata    | /   |size=4     |     |dictEntry*|\    +----+

      +------------+/    +-----------+     +----------+ \   +----+

      |ht[2]       |     |sizemask=3 |                   +->|K4|V|->NULL

      +------------+\    +-----------+                      +----+

      |rehashidx=1 | \   |used=3     |

      +------------+  \  +-----------+

      |iterators=0 |   \

      +------------+    \+-----------+  +->+----------+   +----+  +----+

                         |dictEntry**|--+  |dictEntry*|-->|K1|V|->|K5|V|->NULL

                         +-----------+     +----------+   +----+  +----+

                         |size=8     |     |dictEntry*|->NULL

                         +-----------+     +----------+

                         |sizemask=7 |     |dictEntry*|->NULL

                         +-----------+     +----------+

                         |used=3     |     |dictEntry*|->NULL

                         +-----------+     +----------+

                                           |dictEntry*|->NULL

                                           +----------+

                                           |dictEntry*|->NULL

                                           +----------+

                                           |dictEntry*|->NULL

                                           +----------+

                                           |dictEntry*|->NULL

                                           +----------+

 */

遍历顺序为，K2，K3，K4，K1，K5。

迭代器销毁：

 void dictReleaseIterator(dictIterator *iter)

 {

     if (!(iter->index == - && iter->table == )) {

         if (iter->safe)

             iter->d->iterators--;

         else

             assert(iter->fingerprint == dictFingerprint(iter->d));

     }

     zfree(iter);

 }

与首次执行next操作相对应，若为safe的迭代器，要给dict的计算减1，否则要校验期间dict的指纹是否发生了变化。

指纹的计算：

 long long dictFingerprint(dict *d) {

     long long integers[], hash = ;

     int j;

     integers[] = (long) d->ht[].table;

     integers[] = d->ht[].size;

     integers[] = d->ht[].used;

     integers[] = (long) d->ht[].table;

     integers[] = d->ht[].size;

     integers[] = d->ht[].used;

     /* We hash N integers by summing every successive integer with the integer

      * hashing of the previous sum. Basically:

      *

      * Result = hash(hash(hash(int1)+int2)+int3) ...

      *

      * This way the same set of integers in a different order will (likely) hash

      * to a different number. */

     for (j = ; j < ; j++) {

         hash += integers[j];

         /* For the hashing step we use Tomas Wang's 64 bit integer hash. */

         hash = (~hash) + (hash << ); // hash = (hash << 21) - hash - 1;

         hash = hash ^ (hash >> );

         hash = (hash + (hash << )) + (hash << ); // hash * 265

         hash = hash ^ (hash >> );

         hash = (hash + (hash << )) + (hash << ); // hash * 21

         hash = hash ^ (hash >> );

         hash = hash + (hash << );

     }

     return hash;

 }

对于不安全的迭代器，在迭代过程中，不允许操作任何修改dict的操作，是只读的，不会发生迭代器失效的问题。对于安全的迭代器，在进行操作本节点的时候，redis中记录了当前迭代的bucket idx，以及当前dictEntry的next节点。如果只是add操作，即使是用了头插法把新dictEntry插在本节点之前，对迭代器本身并没有影响。如果是delete了本节点，迭代器中还记录了next节点的位置，调用next时直接取就好。如果next为空，则可以认为当前bucket遍历完了，取下一个bucket就行了。当然，如果在add/delete等操作的时候，进行了rehashing操作，那么当前迭代器里记录的next，在rehashing之后，可能就不是当前节点新位置的next了。所以在使用安全迭代器的时候，禁止了rehashing操作。

八、其它操作

dict还支持其它的一些操作。

1、随机获取一个key，可以用于一些随机操作的dictGetRandomKey

2、随机获取n个key：dictGetSomeKeys

3、scan操作

关于scan操作，redis采用了一个很巧妙的方法，保证了在开始scan时未删除的元素一定能遍历到，又能保证尽量少地重复遍历。

这里直接给个传送门，这里讲得很好：

https://blog.****.net/gqtcgq/article/details/50533336

redis 5.0.7 下载链接

http://download.redis.io/releases/redis-5.0.7.tar.gz

源码阅读顺序参考：

https://github.com/huangz1990/blog/blob/master/diary/2014/how-to-read-redis-source-code.rst