php 实现一致性hash 算法 memcache

时间:2022-04-21 16:45:05

 

散列表的应用

涉及到数据查找比对,首先考虑到使用HashSet。HashSet最大的好处就是实现查找时间复杂度为O(1)。使用HashSet需要解决一个重要问题:冲突问题。对比研究了网上一些字符串哈希函数,发现几乎所有的流行的HashMap都采用了DJB Hash Function,俗称“Times33”算法。Times33的算法很简单,就是对字符串逐字符迭代乘以33,见下面算法原型:hash(i)=33*hash(i-1)+str[i]

使用HashSet需要解决一个重要问题:冲突问题。对比研究了网上一些字符串哈希函数,发现几乎所有的流行的HashMap都采用了DJB Hash Function,俗称“Times33”算法。Times33的算法很简单,就是对字符串逐字符迭代乘以33,见下面算法原型:hash(i)=33*hash(i-1)+str[i]

 

<?php
// +----------------------------------------------------------------------
// | Perfect Is Shit
// +----------------------------------------------------------------------
// | PHP实现:一致性HASH算法
// +----------------------------------------------------------------------
// | Author: alexander <gt199899@gmail.com>
// +----------------------------------------------------------------------
// | Datetime: 2017-01-11 16:01:36
// +----------------------------------------------------------------------
// | Copyright: Perfect Is Shit
// +----------------------------------------------------------------------

class ConsistentHashing
{
    // 圆环
    // hash -> 节点
    private $_ring = array();
    // 所有节点
    // 节点 -> hash
    public $nodes = array();
    // 每个节点的虚拟节点
    public $virtual = 64;

    /**
     * 构造
     * @param array $nodes 初始化的节点列表
     */
    public function __construct($nodes = array())
    {
        if (!empty($nodes)) {
            foreach ($nodes as $value) {
                $this->addNode($value);
            }
        }
    }

    /**
     * 获取圆环内容
     * @return array $this->_ring
     */
    public function getRing()
    {
        return $this->_ring;
    }

    /**
     * time33 函数
     * @param  string $str
     * @return 32位正整数
     * @author 大神们
     */
    public function time33($str)
    {
        // hash(i) = hash(i-1) * 33 + str[i]
        // $hash = 5381; ## 将hash设置为0,竟然比设置为5381分布效果更好!!!
        $hash = 0;
        //
        $s = md5($str); //相比其它版本,进行了md5加密
        $seed = 5;
        $len = 32;//加密后长度32
        for ($i = 0; $i < $len; $i++) {
            // (hash << 5) + hash 相当于 hash * 33
            //$hash = sprintf("%u", $hash * 33) + ord($s{$i});
            //$hash = ($hash * 33 + ord($s{$i})) & 0x7FFFFFFF;
            $hash = ($hash << $seed) + $hash + ord($s{$i});
        }
        return $hash & 0x7FFFFFFF;
    }

    /**
     * 增加节点
     * @param string $node 节点名称
     * @return object $this
     */
    public function addNode($node)
    {
        if (in_array($node, array_keys($this->nodes))) {
            return;
        }
        for ($i = 1; $i <= $this->virtual; $i++) {
            $key = $this->time33($node . '-' . $i);
            $this->_ring[$key] = $node;
            $this->nodes[$node][] = $key;
        }
        ksort($this->_ring, SORT_NUMERIC);
        return $this;
    }

    /**
     * 获取字符串的HASH在圆环上面映射到的节点
     * @param  string $key
     * @return string $node
     */
    public function getNode($key)
    {
        $node = current($this->_ring);
        $hash = $this->time33($key);
        foreach ($this->_ring as $key => $value) {
            if ($hash <= $key) {
                $node = $value;
                break;
            }
        }
        return $node;
    }

    /**
     * 获取映射到特定节点的KEY
     * 此方法需手动调用,非特殊情况不建议程序中使用此方法
     * @param  string $node
     * @param  string $keyPre
     * @return mixed
     */
    public function getKey($node, $keyPre = "")
    {
        if (!in_array($node, array_keys($this->nodes))) {
            return false;
        }
        $result = false;
        for ($i = 1; $i <= 10000; $i++) {
            $key = $keyPre . md5(rand(1000, 9999));
            if ($this->getNode($key) == $node) {
                $result = true;
                break;
            }
        }
        return $result ? $key : false;
    }

}

$ch_obj = new ConsistentHashing();
$ch_obj->addNode('node_1');
$ch_obj->addNode('node_2');
$ch_obj->addNode('node_3');
$ch_obj->addNode('node_4');
$ch_obj->addNode('node_5');
$ch_obj->addNode('node_6');

// +----------------------------------------------------------------------
// | 查看key映射到的节点
// +----------------------------------------------------------------------
$key1 = "asofiwjamfdalksjfkasasdflasfja";
$key2 = "jaksldfjlasfjsdjfioafaslkjflsadkjfl";
$key3 = "asjldflkjasfsdjflkajkldsjfksajdlflajs";
$key4 = "iowanfasijfmasdnfoas";
$key5 = "pqkisndfhoalnfiewlkl";
$key6 = "qjklasjdifoajfalsjflsa";
echo sprintf("%-50s 映射到节点 %s\n", $key1, $ch_obj->getNode($key1));
echo sprintf("%-50s 映射到节点 %s\n", $key2, $ch_obj->getNode($key2));
echo sprintf("%-50s 映射到节点 %s\n", $key3, $ch_obj->getNode($key3));
echo sprintf("%-50s 映射到节点 %s\n", $key4, $ch_obj->getNode($key4));
echo sprintf("%-50s 映射到节点 %s\n", $key5, $ch_obj->getNode($key5));
echo sprintf("%-50s 映射到节点 %s\n", $key6, $ch_obj->getNode($key6));

// +----------------------------------------------------------------------
// | 查看圆环和节点信息
// +----------------------------------------------------------------------
echo '<pre>';
 var_dump($ch_obj->getRing());
 var_dump($ch_obj->nodes);

// +----------------------------------------------------------------------
// | 获取特定节点的KEY
// +----------------------------------------------------------------------
// $key1 = $ch_obj->getKey('node_1', 'pre_');
// var_dump($key1);

// +----------------------------------------------------------------------
// | 测试分布
// +----------------------------------------------------------------------
// $keys = array();
// $rings = array();
// for ($i = 1; $i <= 60000; $i++) {
//     $key = sha1(rand(1000000,9999999));
//     $node = $ch_obj->getNode($key);
//     $rings[$node] = isset($rings[$node]) ? ++$rings[$node] : 1;
// }
// var_dump($rings);