首页 > 代码库 > 一致性Hash简单介绍和使用

一致性Hash简单介绍和使用

背景:
一致性Hash用于分布式缓存系统,将Key值映射到详细机器Ip上,而且添加和删除1台机器的数据移动量较小,对现网影响较小

实现:
1 Hash环:将节点的Hash值映射到一个Hash环中。每一个Key顺时针第一个找到的节点。就是这个Key被路由到的机器
2 "虚拟节点":将节点虚拟成多个"虚拟节点"分布在Hash环上,使得分布更均匀。扩缩容影响较小

代码实例:

/*
 * @ 一致性Hash模拟測试
 * @ 结论:模拟4台机器扩容1台。遍历Key[0,999983]
		- 一致性Hash需移动181161个Key,约占18%(1/5左右,符合预期效果)
		- 取模Hash需移动799984个Key,约占80%
 * @ 2014.05.30 
 */

#include <stdint.h>
#include <iostream>
#include <string.h>
#include <sstream>
#include <map>
#include <vector>
using namespace std;

#define HASH_MOD (999983) 

template <class T> 
string ToStr(const T &t)
{
	stringstream stream;
	stream << t;
	return stream.str();
}

uint32_t APHash(string &sKey)
{
	char *key = (char*)sKey.c_str();
	unsigned int hash = 0;
	for (int i=0; *key; i++)
	{
		if ((i & 1) == 0) {
			hash ^= ((hash<<7)^(*key++)^(hash>>3));
		} else {
			hash ^= (~((hash<<11)^(*key++)^(hash>>5)));
		}
	}
	return hash%HASH_MOD;
}

class CMyConHash
{
public:
	/* 加入一台机器(IP) */
	void AddIp(const string &sIp) 
	{
		// 每一个IP分配128个虚拟节点,原因:结合APHash实验结果分布较均匀
		for (int i = 0; i < 128; i ++)
		{
			string sCode = sIp + ToStr(i) + "#Hash";
			uint32_t uVirKey = APHash(sCode);
			mapVirKey2Ip[uVirKey] = sIp;
			mapIp2VirKey[sIp].push_back(uVirKey);
		}
	}
	
	/* 删除一台机器(IP) */
	void DelIp(const string &sIp) 
	{
		if (mapIp2VirKey.count(sIp) == 0) {	
			cout << "DelIp Err: mapIp2VirKey Don`t Has Ip=" << sIp << endl;
			return;
		}
		vector<uint32_t> vecVirKey = mapIp2VirKey[sIp];
		for (int i = 0; i < vecVirKey.size(); i ++)
		{
			uint32_t uVirKey = vecVirKey[i];
			if (mapVirKey2Ip[uVirKey] == sIp) {
				// 得推断下。有可能2个IP虚拟节点相等后覆盖了
				mapVirKey2Ip.erase(uVirKey);
			}
		}
		mapIp2VirKey.erase(sIp);
	}
	
	/* 路由:给每一个Key找到负责的机器(IP) */
	int FindIp(uint32_t uKey, string &sIp)
	{
		if (mapVirKey2Ip.size() == 0) {
			cout << "FindIp Err: mapVirKey2Ip.size() == 0" << endl;
			return -1;
		}
		bool bFind = false;
		uint32_t uVirKey;
		map<uint32_t, string>::iterator iter;
		// 遍历std::map是按Key大小顺序输出(差别std::tr1::unordered_map)
		for(iter = mapVirKey2Ip.begin(); iter != mapVirKey2Ip.end(); iter ++)
		{
			uVirKey = iter->first;
			if (uVirKey > uKey%HASH_MOD) {
				sIp = iter->second; 
				bFind = true;
				break;
			}
		}
		if (!bFind) {
			// 找不到比Key小的虚拟节点,故使用最小的虚拟节点(环)
			iter = mapVirKey2Ip.begin();
			uVirKey = iter->first;
			sIp = iter->second; 
		}
		//cout << "FindIp Suc:" << uKey%HASH_MOD << "=>" << uVirKey << "," << sIp << endl;
		return 0;
	}
	
	/* 打印各个IP负责的Key区域大小。影响因素:1 Hash函数 2 虚拟节点个数 */
	/* 4台机器的情况,相对还是较均匀:
		Ip=202.168.14.241,Cnt=251649
		Ip=202.168.14.242,Cnt=257902
		Ip=202.168.14.243,Cnt=245945
		Ip=202.168.14.244,Cnt=235516 */
	void EchoIpState()
	{
		map<string, uint32_t> mapIpCnt; 
		map<uint32_t, string>::iterator iter = mapVirKey2Ip.end(); 
		iter --;
		uint32_t uPreKey = iter->first;
		string sPreIp = iter->second;
		do {
			iter --;
			uint32_t uVirKey = iter->first;
			string sIp = iter->second;
			if (mapIpCnt.count(sPreIp) == 0) {
				mapIpCnt[sPreIp] = uPreKey-uVirKey;
			} else {
				mapIpCnt[sPreIp] += uPreKey-uVirKey;
			} 
			uPreKey = uVirKey;
			sPreIp = sIp;
		} while (iter != mapVirKey2Ip.begin()); 
		
		cout << "Ip Size=" << mapIpCnt.size() << endl;
		map<string, uint32_t>::iterator iter1;
		for(iter1 = mapIpCnt.begin(); iter1 != mapIpCnt.end(); iter1 ++)
		{
			cout << "Ip=" << iter1->first << ",Cnt=" << iter1->second << endl;
		}
	}
private:
	map< uint32_t, string > mapVirKey2Ip;
	map< string, vector<uint32_t> > mapIp2VirKey; 
};

class CMyModHash
{
public: 
	void AddIp(const string &sIp) 
	{
		vecIpList.push_back(sIp);
	}
	void FindIp(uint32_t uKey, string &sIp)
	{
		sIp = vecIpList[uKey%vecIpList.size()];
	}
	void EchoIpState()
	{
		cout << "Ip Cnt=" << vecIpList.size() << endl;
	}
private: 
	vector<string> vecIpList;
};

int main()
{
	CMyConHash oMyHash;
	// CMyModHash oMyHash;
	
	// 模拟初始化4台机器
	oMyHash.AddIp("202.168.14.241"); 
	oMyHash.AddIp("202.168.14.242"); 
	oMyHash.AddIp("202.168.14.243"); 
	oMyHash.AddIp("202.168.14.244"); 
	oMyHash.EchoIpState();
	
	// 保存下各个Key路由的机器 
	string sIp, arrKeyIp[HASH_MOD];
	for (uint32_t key = 0; key < HASH_MOD; key ++)
	{
		oMyHash.FindIp(key, sIp);
		arrKeyIp[key] = sIp;
	}
	
	// 模拟加入1台机器
	oMyHash.AddIp("202.168.14.245"); 
	oMyHash.EchoIpState();
	
	// 推断多少Key相应数据须要移动机器
	uint32_t uCnt = 0;
	for (uint32_t key = 0; key < HASH_MOD; key ++)
	{
		oMyHash.FindIp(key, sIp);
		if (arrKeyIp[key] != sIp) {
			uCnt ++;
		}
	}
	cout << "Key Sum=" << HASH_MOD << " , Need To Move:" << uCnt << endl;
	
	return 0;
}


一致性Hash简单介绍和使用