redis基础结构–词典-技术文章-卡飞资源网

注释代码：版本3.5以后
https://github.com/aleafboat/redis.git
扩容函数调用关系

结构定义

1.typedef struct dict

2.{

3. dictType *type;

4. void *privdata;

5. dictht ht[2];

6. long rehashidx; /* rehashing not in progress if rehashidx == -1 */

7. int iterators; /* number of iterators currently running */

8.} dict;

9.

10.typedef struct dictht

11.{ //字典hash table

12. dictEntry **table;

13. //可以看做字典数组，俗称桶bucket

14. unsigned long size;

15. //指针数组的大小，即桶的层数

16. unsigned long sizemask;

17. unsigned long used;

18. //字典中当前的节点数目

19.} dictht;

20.

21.typedef struct dictEntry {

22. void *key;

23. void *val;

24. struct dictEntry *next;

25.} dictEntry;

扩容过程

何时需要扩容?

每次增加dictEntry时，都会调用一次_dictExpandIfNeeded()这个函数。

1.如果dict正在扩容，则返回正常DICT_OK

2.如果dict刚刚初始化，也就是两个dictht都为NULL，则调用dictExpand()函数初始化。dictExpand()同样会做一次判断两个dictht都为NULL，然后直接赋给ht[0]。

3.如果可以扩容(dict_can_resize=1)，那么只要现在表中键总数大于表的长度就开始扩容。如果不能扩容(也就是dict_can_resize=0)，

但是如果表中键总数与表的长度的比值大于某一个阀值(由dict_force_resize_ratio静态变量决定)，那么就强制扩容。

扩容是怎么进行的？之前所说的每个dict中都有两个哈希表结构dictht *ht[2]。

当开始扩容时，把第一个ht作为原始表，

第二个作为扩容后的表。dict中rehashidx决定了目前扩容的进度。

扩容过程什么时候执行？

当决定开始扩容后，dict中rehashidx赋为0，然后有两个函数来进行扩容。

max(4,used)

int dictRehash(dict *d, int n);

int dictRehashMilliseconds(dict *d, int ms);

第一个函数中n决定每次rehash(也就是转移dictEntry)的次数，

第二个函数决定调用rehash函数的时间。

每次执行查找，增加，删除操作都会执行一次dictRehash(1)(如果正在扩容)，使得整个漫长的扩容过程隐含在每一次简单的操作中。

函数调用关系：

/* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
int dictRehashMilliseconds(dict *d, int ms) {
long long start = timeInMilliseconds();
int rehashes = 0;
//每隔100ms执行一次
while(dictRehash(d,100)) {
rehashes += 100;
if (timeInMilliseconds()-start > ms) break;
}
return rehashes;
}
int dictRehash(dict *d, int n)
{ //移动n*10层(buckets)从d->ht[0] 到d->ht[1]
int empty_visits = n*10; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return 0;
1.while(n-- && d->ht[0].used != 0)
2.{
3. dictEntry *de, *nextde;
4.
5. /* Note that rehashidx can't overflow as we are sure there are more
6. * elements because ht[0].used != 0 */
7. assert(d->ht[0].size > (unsigned long)d->rehashidx);
8. //没有数据不移动，数组哈希表
9. while(d->ht[0].table[d->rehashidx] == NULL)
10. {
11. d->rehashidx++;
12. if (--empty_visits == 0) return 1;
13. }
14. de = d->ht[0].table[d->rehashidx];
15. /* Move all the keys in this bucket from the old to the new hash HT */
16. while(de)
17. {
18. unsigned int h;
19.
20. nextde = de->next;
21. /* Get the index in the new hash table */
22. h = dictHashKey(d, de->key) & d->ht[1].sizemask;
23. //前插例如head -- B -C--D insert A A->B-C>D, head-A
24. de->next = d->ht[1].table[h];
25. d->ht[1].table[h] = de;
26.
27. d->ht[0].used--;
28. d->ht[1].used++;
29. de = nextde;
30. }
31. d->ht[0].table[d->rehashidx] = NULL;//第rehashidx层移动完毕
32. d->rehashidx++;//继续下一层
33.}
34.
35./* Check if we already rehashed the whole table... */
36.if (d->ht[0].used == 0)
37.{
38. zfree(d->ht[0].table);
39. d->ht[0] = d->ht[1];
40. _dictReset(&d->ht[1]);
41. d->rehashidx = -1;//扩容完毕
42. return 0;
43.}
44.
45./* More to rehash... */
46.return 1;
}
总结：
扩容步骤
业务操作触发扩容计算扩容数量
后台进程,间隔时间移动数据
QA
1 在扩容期间如果有新元素插入进来怎么办？
插入扩容的的哈希表
ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
2 每次扩容多少？
初始化为数组大小为长度为 4
如果操作引起扩容长度为 2*size 2倍
(类似vector)
还有一个可能为原来元素个数
当以下条件中的任意一个被满足时，程序会自动开始对哈希表执行扩展操作：
1）服务器目前没有在执行BGSAVE命令或者BGREWRITEAOF命令，并且哈希表的负载因子大于等于1。
2）服务器目前正在执行BGSAVE命令或者BGREWRITEAOF命令，并且哈希表的负载因子大于等于5。
if (d->ht[0].used >= d->ht[0].size &&
(dict_can_resize ||
d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
{
return dictExpand(d, d->ht[0].used*2);
//元素个数大于层次个数或者配置强制扩容参数
}

3 如何实现慢慢扩容？

解决办法通过开辟一个新的线程来处理

4 假如原来的元素在A位置，扩容之后还咋A位置吗？

肯定不是呀 key%n 方式 n发生了变化

5 redis有没有想oracle那样有索引自己定位

没有,index 是计算出来的，具体位置遍历就可以了

卡飞资源网

专业编程技术资源共享平台

redis基础结构–词典