首页 > 代码库 > Redis源码分析(三)---dict哈希结构

Redis源码分析(三)---dict哈希结构

          昨天分析完adlist的Redis代码,今天马上马不停蹄的继续学习Redis代码中的哈希部分的结构学习,不过在这里他不叫什么hashMap,而是叫dict,而且是一种全新设计的一种哈希结构,他只是通过几个简单的结构体,再搭配上一些比较常见的哈希算法,就实现了类似高级语言中HashMap的作用了。也让我见识了一些哈希算法的实现,比如dbj hash的算法实现,俗称times33,算法,就是不停的*33,。这种算是一种超级简单的哈希算法。

         下面说说给我感觉Redis代码中哈希实现的不是那么简单,中间加了一些东西,比如说dictType定义了一些字典集合操作的公共方法,我把dict叫做字典总类,也可以说字典操作类,真正存放键值对的叫dictEntry,我叫做字典集合,字典集合存放在哈希表中,叫dictht,下面给出一张结构图来理理思路。


        下面给出2个文件的代码解析:

dict.h:

<span style="font-size:14px;">/* Hash Tables Implementation.
 *
 * This file implements in-memory hash tables with insert/del/replace/find/
 * get-random-element operations. Hash tables will auto-resize if needed
 * tables of power of two in size are used, collisions are handled by
 * chaining. See the source code for more information... :)
 *
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <stdint.h>

#ifndef __DICT_H
#define __DICT_H

/* 定义了成功与错误的值 */
#define DICT_OK 0
#define DICT_ERR 1

/* Unused arguments generate annoying warnings... */
/* dict没有用到时,用来提示警告的 */
#define DICT_NOTUSED(V) ((void) V)

/* 字典结构体,保存K-V值的结构体 */
typedef struct dictEntry {
	//字典key函数指针
    void *key;
    union {
        void *val;
        //无符号整型值
        uint64_t u64;
        //有符号整型值
        int64_t s64;
        double d;
    } v;
    //下一字典结点
    struct dictEntry *next;
} dictEntry;

/* 字典类型 */
typedef struct dictType {
	//哈希计算方法,返回整形变量
    unsigned int (*hashFunction)(const void *key);
    //复制key方法
    void *(*keyDup)(void *privdata, const void *key);
    //复制val方法
    void *(*valDup)(void *privdata, const void *obj);
    //key值比较方法
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    //key的析构函数
    void (*keyDestructor)(void *privdata, void *key);
    //val的析构函数
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* This is our hash table structure. Every dictionary has two of this as we
 * implement incremental rehashing, for the old to the new table. */
/* 哈希表结构体 */
typedef struct dictht {
	//字典实体
    dictEntry **table;
    //表格可容纳字典数量
    unsigned long size;
    unsigned long sizemask;
    //正在被使用的数量
    unsigned long used;
} dictht;

/* 字典主操作类 */
typedef struct dict {
	//字典类型
    dictType *type;
    //私有数据指针
    void *privdata;
    //字典哈希表,共2张,一张旧的,一张新的
    dictht ht[2];
    //重定位哈希时的下标
    long rehashidx; /* rehashing not in progress if rehashidx == -1 */
    //当前迭代器数量
    int iterators; /* number of iterators currently running */
} dict;

/* If safe is set to 1 this is a safe iterator, that means, you can call
 * dictAdd, dictFind, and other functions against the dictionary even while
 * iterating. Otherwise it is a non safe iterator, and only dictNext()
 * should be called while iterating. */
/* 字典迭代器,如果是安全迭代器,这safe设置为1,可以调用dicAdd,dictFind */
/* 如果是不安全的,则只能调用dicNext方法*/
typedef struct dictIterator {
	//当前字典
    dict *d;
    //下标
    long index;
    //表格,和安全值的表格代表的是旧的表格,还是新的表格
    int table, safe;
    //字典实体
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    /* 指纹标记,避免不安全的迭代器滥用现象 */
    long long fingerprint;
} dictIterator;

/* 字典扫描方法 */
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);

/* This is the initial size of every hash table */
/* 初始化哈希表的数目 */
#define DICT_HT_INITIAL_SIZE     4

/* ------------------------------- Macros ------------------------------------*/
/* 字典释放val函数时候调用,如果dict中的dictType定义了这个函数指针, */
#define dictFreeVal(d, entry)     if ((d)->type->valDestructor)         (d)->type->valDestructor((d)->privdata, (entry)->v.val)
    
/* 字典val函数复制时候调用,如果dict中的dictType定义了这个函数指针, */
#define dictSetVal(d, entry, _val_) do {     if ((d)->type->valDup)         entry->v.val = (d)->type->valDup((d)->privdata, _val_);     else         entry->v.val = (_val_); } while(0)

/* 设置dictEntry中共用体v中有符号类型的值 */
#define dictSetSignedIntegerVal(entry, _val_)     do { entry->v.s64 = _val_; } while(0)

/* 设置dictEntry中共用体v中无符号类型的值 */
#define dictSetUnsignedIntegerVal(entry, _val_)     do { entry->v.u64 = _val_; } while(0)

/* 设置dictEntry中共用体v中double类型的值 */
#define dictSetDoubleVal(entry, _val_)     do { entry->v.d = _val_; } while(0)

/* 调用dictType定义的key析构函数 */
#define dictFreeKey(d, entry)     if ((d)->type->keyDestructor)         (d)->type->keyDestructor((d)->privdata, (entry)->key)

/* 调用dictType定义的key复制函数,没有定义直接赋值 */
#define dictSetKey(d, entry, _key_) do {     if ((d)->type->keyDup)         entry->key = (d)->type->keyDup((d)->privdata, _key_);     else         entry->key = (_key_); } while(0)

/* 调用dictType定义的key比较函数,没有定义直接key值直接比较 */
#define dictCompareKeys(d, key1, key2)     (((d)->type->keyCompare) ?         (d)->type->keyCompare((d)->privdata, key1, key2) :         (key1) == (key2))

#define dictHashKey(d, key) (d)->type->hashFunction(key)   //哈希定位方法
#define dictGetKey(he) ((he)->key)    //获取dictEntry的key值
#define dictGetVal(he) ((he)->v.val)  //获取dicEntry中共用体v中定义的val值
#define dictGetSignedIntegerVal(he) ((he)->v.s64) //获取dicEntry中共用体v中定义的有符号值
#define dictGetUnsignedIntegerVal(he) ((he)->v.u64)  //获取dicEntry中共用体v中定义的无符号值
#define dictGetDoubleVal(he) ((he)->v.d)  //获取dicEntry中共用体v中定义的double类型值
#define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)  //获取dict字典中总的表大小
#define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)   //获取dict字典中总的表的总正在被使用的数量
#define dictIsRehashing(d) ((d)->rehashidx != -1)   //字典有无被重定位过

/* API */
dict *dictCreate(dictType *type, void *privDataPtr);   //创建dict字典总类
int dictExpand(dict *d, unsigned long size);    //字典扩增方法
int dictAdd(dict *d, void *key, void *val);    //字典根据key, val添加一个字典集
dictEntry *dictAddRaw(dict *d, void *key);     //字典添加一个只有key值的dicEntry
int dictReplace(dict *d, void *key, void *val); //替代dict中一个字典集
dictEntry *dictReplaceRaw(dict *d, void *key);  //替代dict中的一个字典,只提供一个key值
int dictDelete(dict *d, const void *key);    //根据key删除一个字典集
int dictDeleteNoFree(dict *d, const void *key);  //字典集删除无、不调用free方法
void dictRelease(dict *d);   //释放整个dict
dictEntry * dictFind(dict *d, const void *key);  //根据key寻找字典集
void *dictFetchValue(dict *d, const void *key);  //根据key值寻找相应的val值
int dictResize(dict *d);  //重新计算大小
dictIterator *dictGetIterator(dict *d); //获取字典迭代器
dictIterator *dictGetSafeIterator(dict *d);  //获取字典安全迭代器  
dictEntry *dictNext(dictIterator *iter);   //根据字典迭代器获取字典集的下一字典集
void dictReleaseIterator(dictIterator *iter); //释放迭代器
dictEntry *dictGetRandomKey(dict *d);  //随机获取一个字典集
void dictPrintStats(dict *d);  //打印当前字典状态
unsigned int dictGenHashFunction(const void *key, int len); //输入的key值,目标长度,此方法帮你计算出索引值
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len); //这里提供了一种比较简单的哈希算法
void dictEmpty(dict *d, void(callback)(void*)); //清空字典
void dictEnableResize(void);  //启用调整方法
void dictDisableResize(void); //禁用调整方法
int dictRehash(dict *d, int n); //hash重定位,主要从旧的表映射到新表中,分n轮定位
int dictRehashMilliseconds(dict *d, int ms);  //在给定时间内,循环执行哈希重定位
void dictSetHashFunctionSeed(unsigned int initval); //设置哈希方法种子
unsigned int dictGetHashFunctionSeed(void);  //获取哈希种子
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); //字典扫描方法

/* Hash table types */
/* 哈希表类型  */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

#endif /* __DICT_H */
</span>

dict.c;

<span style="font-size:14px;">/* Hash Tables Implementation.
 *
 * This file implements in memory hash tables with insert/del/replace/find/
 * get-random-element operations. Hash tables will auto resize if needed
 * tables of power of two in size are used, collisions are handled by
 * chaining. See the source code for more information... :)
 *
 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Redis nor the names of its contributors may be used
 *     to endorse or promote products derived from this software without
 *     specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "fmacros.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>

#include "dict.h"
#include "zmalloc.h"
#include "redisassert.h"

/* Using dictEnableResize() / dictDisableResize() we make possible to
 * enable/disable resizing of the hash table as needed. This is very important
 * for Redis, as we use copy-on-write and don't want to move too much memory
 * around when there is a child performing saving operations.
 *
 * Note that even when dict_can_resize is set to 0, not all resizes are
 * prevented: a hash table is still allowed to grow if the ratio between
 * the number of elements and the buckets > dict_force_resize_ratio. */
/* redis用了dictEnableResize() / dictDisableResize()方法可以重新调整哈希表的长度,
 *因为redis采用的是写时复制的算法,不会挪动太多的内存,只有当调整数量大于一定比例才可能有效 */
static int dict_can_resize = 1;
static unsigned int dict_force_resize_ratio = 5;

/* -------------------------- private prototypes ---------------------------- */
/* 私有方法 */
static int _dictExpandIfNeeded(dict *ht);    //字典是否需要扩展
static unsigned long _dictNextPower(unsigned long size);
static int _dictKeyIndex(dict *ht, const void *key);
static int _dictInit(dict *ht, dictType *type, void *privDataPtr);  //字典初始化方法

/* -------------------------- hash functions -------------------------------- */
/* 哈希索引计算的方法 */

/* Thomas Wang's 32 bit Mix Function */
/* Thomas Wang's 32 bit Mix 的哈希算法直接输入key值,获取索引值,据说这种冲突的概率很低 */
unsigned int dictIntHashFunction(unsigned int key)
{
    key += ~(key << 15);
    key ^=  (key >> 10);
    key +=  (key << 3);
    key ^=  (key >> 6);
    key += ~(key << 11);
    key ^=  (key >> 16);
    return key;
}

//哈希方法种子,跟产生随机数的种子作用应该是一样的
static uint32_t dict_hash_function_seed = 5381;

/* 重设哈希种子 */
void dictSetHashFunctionSeed(uint32_t seed) {
    dict_hash_function_seed = seed;
}

/* 获取哈希种子 */
uint32_t dictGetHashFunctionSeed(void) {
    return dict_hash_function_seed;
}

/* MurmurHash2, by Austin Appleby
 * Note - This code makes a few assumptions about how your machine behaves -
 * 1. We can read a 4-byte value from any address without crashing
 * 2. sizeof(int) == 4
 *
 * And it has a few limitations -
 *
 * 1. It will not work incrementally.
 * 2. It will not produce the same results on little-endian and big-endian
 *    machines.
 */
/* 输入的key值,目标长度,此方法帮你计算出索引值,此方法特别表明,
 *	不会因为机器之间高低位存储的不同而产生相同的结果 */
unsigned int dictGenHashFunction(const void *key, int len) {
    /* 'm' and 'r' are mixing constants generated offline.
     They're not really 'magic', they just happen to work well.  */
    //seed种子,m,r的值都将会参与到计算中
    uint32_t seed = dict_hash_function_seed;
    const uint32_t m = 0x5bd1e995;
    const int r = 24;

    /* Initialize the hash to a 'random' value */
    uint32_t h = seed ^ len;

    /* Mix 4 bytes at a time into the hash */
    const unsigned char *data = http://www.mamicode.com/(const unsigned char *)key;>

哈希算法的索引计算其实我还是有点不理解的地方的,比如他的索引计算,会从一张旧表映射到一个新表,作者出于什么目的,也许以后再看的时候才会明白吧。

Redis源码分析(三)---dict哈希结构