Squid--hash代码分析

#ifndef SQUID_HASH_H

#define SQUID_HASH_H

//几个函数和变量的别名

typedef void HASHFREE(void *);

typedef int HASHCMP(const void *, const void *);

typedef unsigned int HASHHASH(const void *, unsigned int);

typedef struct _hash_link hash_link;

typedef struct _hash_table hash_table;

//每个hash节点的数据结构

struct _hash_link {

    void *key;

    hash_link *next;

};

//hash表的数据结构

struct _hash_table {

    hash_link **buckets;    //存储hash节点（hash_link）地址的桶链表

    HASHCMP *cmp;           //hash比较函数

    HASHHASH *hash;         //获取hash值函数

    unsigned int size;      //buckets桶链表的大小

    unsigned int current_slot;  //指向当前的桶

    hash_link *next;        //指向下一个桶（相对于current_slot的下一个桶）

    int count;      //hash_table中已经存储的hash节点（hash_link）的数目

};

SQUIDCEXTERN hash_table *hash_create(HASHCMP *, int, HASHHASH *);

SQUIDCEXTERN void hash_join(hash_table *, hash_link *);

SQUIDCEXTERN void hash_remove_link(hash_table *, hash_link *);

SQUIDCEXTERN int hashPrime(int n);

SQUIDCEXTERN hash_link *hash_lookup(hash_table *, const void *);

SQUIDCEXTERN void hash_first(hash_table *);

SQUIDCEXTERN hash_link *hash_next(hash_table *);

SQUIDCEXTERN void hash_last(hash_table *);

SQUIDCEXTERN hash_link *hash_get_bucket(hash_table *, unsigned int);

SQUIDCEXTERN void hashFreeMemory(hash_table *);

SQUIDCEXTERN void hashFreeItems(hash_table *, HASHFREE *);

SQUIDCEXTERN HASHHASH hash_string;

SQUIDCEXTERN HASHHASH hash4;

SQUIDCEXTERN const char *hashKeyStr(hash_link *);

/*  squid建议的hansh素数

 *  Here are some good prime number choices.  It's important not to

 *  choose a prime number that is too close to exact powers of 2.

 *

 *  HASH_SIZE 103               // prime number < 128

 *  HASH_SIZE 229               // prime number < 256

 *  HASH_SIZE 467               // prime number < 512

 *  HASH_SIZE 977               // prime number < 1024

 *  HASH_SIZE 1979              // prime number < 2048

 *  HASH_SIZE 4019              // prime number < 4096

 *  HASH_SIZE 6037              // prime number < 6144

 *  HASH_SIZE 7951              // prime number < 8192

 *  HASH_SIZE 12149             // prime number < 12288

 *  HASH_SIZE 16231             // prime number < 16384

 *  HASH_SIZE 33493             // prime number < 32768

 *  HASH_SIZE 65357             // prime number < 65536

 */

//默认的桶链表大小

#define  DEFAULT_HASH_SIZE 7951 /* prime number < 8192 */

#endif /* SQUID_HASH_H */

/*

 * DEBUG: section 00    Hash Tables

 * AUTHOR: Harvest Derived

 *

 * SQUID Web Proxy Cache          http://www.squid-cache.org/

 * ----------------------------------------------------------

 *

 *  Squid is the result of efforts by numerous individuals from

 *  the Internet community; see the CONTRIBUTORS file for full

 *  details.   Many organizations have provided support for Squid's

 *  development; see the SPONSORS file for full details.  Squid is

 *  Copyrighted (C) 2001 by the Regents of the University of

 *  California; see the COPYRIGHT file for full details.  Squid

 *  incorporates software developed and/or copyrighted by other

 *  sources; see the CREDITS file for full details.

 *

 *  This program is free software; you can redistribute it and/or modify

 *  it under the terms of the GNU General Public License as published by

 *  the Free Software Foundation; either version 2 of the License, or

 *  (at your option) any later version.

 *

 *  This program is distributed in the hope that it will be useful,

 *  but WITHOUT ANY WARRANTY; without even the implied warranty of

 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 *  GNU General Public License for more details.

 *

 *  You should have received a copy of the GNU General Public License

 *  along with this program; if not, write to the Free Software

 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.

 *

 */

#include "squid.h"

#include "hash.h"

#include "profiler/Profiler.h"

#if HAVE_STDIO_H

#include <stdio.h>

#endif

#if HAVE_STDLIB_H

#include <stdlib.h>

#endif

#if HAVE_STRING_H

#include <string.h>

#endif

#if HAVE_UNISTD_H

#include <unistd.h>

#endif

#if HAVE_GNUMALLLOC_H

#include <gnumalloc.h>

#elif HAVE_MALLOC_H

#include <malloc.h>

#endif

#if HAVE_ASSERT_H

#include <assert.h>

#endif

#if HAVE_MATH_H

#include <math.h>

#endif

static void hash_next_bucket(hash_table * hid);

/*下面两个函数常用来作为第二个参数建立hash表， 也就是取hash值的操作函数*/

/*第二个更适合于操作字符串，第一个可以是二进制数*/

unsigned int

hash_string(const void *data, unsigned int size)

{

    const unsigned char *s = static_cast<const unsigned char *>(data);

    unsigned int n = 0;

    unsigned int j = 0;

    unsigned int i = 0;

    while (*s) {

        ++j;

        n ^= 271 * *s;	//^按位异或运算

        ++s;

    }

    i = n ^ (j * 271);

    return i % size;

}

/* the following function(s) were adapted from

 *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */

/* Hash function from Chris Torek. */

unsigned int

hash4(const void *data, unsigned int size)

{

    const char *key = static_cast<const char *>(data);

    size_t loop;

    unsigned int h;

    size_t len;

#define HASH4a   h = (h << 5) - h + *key++;

#define HASH4b   h = (h << 5) + h + *key++;

#define HASH4 HASH4b

    h = 0;

    len = strlen(key);

    loop = len >> 3;

    switch (len & (8 - 1)) {

    case 0:

        break;

    case 7:

        HASH4;

        /* FALLTHROUGH */

    case 6:

        HASH4;

        /* FALLTHROUGH */

    case 5:

        HASH4;

        /* FALLTHROUGH */

    case 4:

        HASH4;

        /* FALLTHROUGH */

    case 3:

        HASH4;

        /* FALLTHROUGH */

    case 2:

        HASH4;

        /* FALLTHROUGH */

    case 1:

        HASH4;

    }

    while (loop) {

        --loop;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

    }

    return h % size;

}

/**

 *  hash_create - creates a new hash table, uses the cmp_func

 *  to compare keys.  Returns the identification for the hash table;

 *  otherwise returns a negative number on error.

 * 创建hash表，返回hash_table的对象

 */

hash_table *

hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)

{

    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));

    if (!hash_sz)

        hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951

    else

        hid->size = (unsigned int) hash_sz;

    /* allocate and null the buckets */

    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));

    hid->cmp = cmp_func;

    hid->hash = hash_func;

    hid->next = NULL;

    hid->current_slot = 0;

    return hid;

}

/**

 *  hash_join - joins a hash_link under its key lnk->key

 *  into the hash table 'hid'.

 *

 *  It does not copy any data into the hash table, only links pointers.

 *	将hash节点链接到hash表中对应的桶节点。

 */

void

hash_join(hash_table * hid, hash_link * lnk)

{

    int i;

    i = hid->hash(lnk->key, hid->size);

    lnk->next = hid->buckets[i];

    hid->buckets[i] = lnk;

    ++hid->count;

}

/**

 *  hash_lookup - locates the item under the key 'k' in the hash table

 *  'hid'.  Returns a pointer to the hash bucket on success; otherwise

 *  returns NULL.

 *	在hash_table中定位哈希值为k的hash_link节点

 */

hash_link *

hash_lookup(hash_table * hid, const void *k)

{

    int b;

    PROF_start(hash_lookup);

    assert(k != NULL);

    b = hid->hash(k, hid->size);

    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {

        if ((hid->cmp) (k, walker->key) == 0) {

            PROF_stop(hash_lookup);

            return (walker);

        }

        assert(walker != walker->next);

    }

    PROF_stop(hash_lookup);

    return NULL;

}

//指向下一个桶

static void

hash_next_bucket(hash_table * hid)

{

    while (hid->next == NULL && ++hid->current_slot < hid->size)

        hid->next = hid->buckets[hid->current_slot];

}

/**

 *  hash_first - initializes the hash table for the hash_next()

 *  function.

 *	使hid->current_slot指向第一个桶，hid->next指向下一个桶

 */

void

hash_first(hash_table * hid)

{

    assert(NULL == hid->next);

    hid->current_slot = 0;

    hid->next = hid->buckets[hid->current_slot];

    if (NULL == hid->next)	//如果hash_table还没有使用

        hash_next_bucket(hid);

}

/**

 *  hash_next - returns the next item in the hash table 'hid'.

 *  Otherwise, returns NULL on error or end of list.

 *

 *  MUST call hash_first() before hash_next().

 *	获取下一个hash_link节点

 */

hash_link *

hash_next(hash_table * hid)

{

    hash_link *p = hid->next;

    if (NULL == p)

        return NULL;

    hid->next = p->next;

    if (NULL == hid->next)

        hash_next_bucket(hid);

    return p;

}

/**

 *  hash_last - resets hash traversal state to NULL

 *

 */

void

hash_last(hash_table * hid)

{

    assert(hid != NULL);

    hid->next = NULL;

    hid->current_slot = 0;

}

/**

 *  hash_remove_link - deletes the given hash_link node from the

 *  hash table 'hid'.  Does not free the item, only removes it

 *  from the list.

 *

 *  An assertion is triggered if the hash_link is not found in the

 *  list.

 *	将hash_link为hl的节点从桶链表中移除

 */

void

hash_remove_link(hash_table * hid, hash_link * hl)

{

    assert(hl != NULL);

    int i = hid->hash(hl->key, hid->size);

    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {

        if (*P != hl)

            continue;

        *P = hl->next;

        if (hid->next == hl) {

            hid->next = hl->next;

            if (NULL == hid->next)

                hash_next_bucket(hid);

        }

        --hid->count;

        return;

    }

    assert(0);

}

/**

 *  hash_get_bucket - returns the head item of the bucket

 *  in the hash table 'hid'. Otherwise, returns NULL on error.

 *	获取hid->buckets[bucket]

 */

hash_link *

hash_get_bucket(hash_table * hid, unsigned int bucket)

{

    if (bucket >= hid->size)

        return NULL;

    return (hid->buckets[bucket]);

}

//将所有hash_link节点集中到一起，集中释放存储空间

void

hashFreeItems(hash_table * hid, HASHFREE * free_func)

{

    hash_link *l;

    int i = 0;

    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));

    hash_first(hid);

    while ((l = hash_next(hid)) && i < hid->count) {

        *(list + i) = l;

        ++i;

    }

    for (int j = 0; j < i; ++j)

        free_func(*(list + j));

    xfree(list);

}

//释放hash_table空间

void

hashFreeMemory(hash_table * hid)

{

    if (hid == NULL)

        return;

    if (hid->buckets)

        xfree(hid->buckets);

    xfree(hid);

}

static int hash_primes[] = {

    103,

    229,

    467,

    977,

    1979,

    4019,

    6037,

    7951,

    12149,

    16231,

    33493,

    65357

};

int

hashPrime(int n)

{

    int I = sizeof(hash_primes) / sizeof(int);

    int best_prime = hash_primes[0];

    double min = fabs(log((double) n) - log((double) hash_primes[0]));

    double d;

    for (int i = 0; i < I; ++i) {

        d = fabs(log((double) n) - log((double) hash_primes[i]));

        if (d > min)

            continue;

        min = d;

        best_prime = hash_primes[i];

    }

    return best_prime;

}

/**

 * return the key of a hash_link as a const string

 *	获取hl的哈希值

 */

const char *

hashKeyStr(hash_link * hl)

{

    return (const char *) hl->key;

}

#if USE_HASH_DRIVER

/**

 *  hash-driver - Run with a big file as stdin to insert each line into the

 *  hash table, then prints the whole hash table, then deletes a random item,

 *  and prints the table again...

 */

int

main(void)

{

    hash_table *hid;

    LOCAL_ARRAY(char, buf, BUFSIZ);

    LOCAL_ARRAY(char, todelete, BUFSIZ);

    hash_link *walker = NULL;

    todelete[0] = '\0';

    printf("init\n");

    printf("creating hash table\n");

    if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {

        printf("hash_create error.\n");

        exit(1);

    }

    printf("done creating hash table: %d\n", hid);

    while (fgets(buf, BUFSIZ, stdin)) {

        buf[strlen(buf) - 1] = '\0';

        printf("Inserting '%s' for item %p to hash table: %d\n",

               buf, buf, hid);

        hash_insert(hid, xstrdup(buf), (void *) 0x12345678);

        if (random() % 17 == 0)

            strcpy(todelete, buf);

    }

    printf("walking hash table...\n");

    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {

        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,

               walker->item);

    }

    printf("done walking hash table...\n");

    if (todelete[0]) {

        printf("deleting %s from %d\n", todelete, hid);

        if (hash_delete(hid, todelete))

            printf("hash_delete error\n");

    }

    printf("walking hash table...\n");

    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {

        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,

               walker->item);

    }

    printf("done walking hash table...\n");

    printf("driver finished.\n");

    exit(0);

}

#endif

下面具体分析：

hash表整体结构：

Squid--hash代码分析

1、hash_create

/**

 *  hash_create - creates a new hash table, uses the cmp_func

 *  to compare keys.  Returns the identification for the hash table;

 *  otherwise returns a negative number on error.

 * 创建hash表，返回hash_table的对象

 */

hash_table *

hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)

{

    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));

    if (!hash_sz)

        hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951

    else

        hid->size = (unsigned int) hash_sz;

    /* allocate and null the buckets */

    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));

    hid->cmp = cmp_func;

    hid->hash = hash_func;

    hid->next = NULL;

    hid->current_slot = 0;

    return hid;

}

创建hash表。需要三个参数：cmp_func、hash_sz、hash_func，其中hash_sz用来表示创建的hash表的桶链表的大小，如果为0，则使用默认的大小DEFAULT_HASH_SIZE.

桶链表储存的数据类型为：hash_link * ，即它只存储hash_link节点的地址。初始化后的桶链表没有存储任何地址，全部为0。

current_slot = 0，当前指向的桶为buckets[0];指向下一个桶的指针hid->next为NULL，表示hash表还没有被使用。

Squid--hash代码分析

2、hash_join

/**

 *  hash_join - joins a hash_link under its key lnk->key

 *  into the hash table 'hid'.

 *

 *  It does not copy any data into the hash table, only links pointers.

 *	将hash节点链接到hash表中对应的桶节点。

 */

void

hash_join(hash_table * hid, hash_link * lnk)

{

    int i;

    i = hid->hash(lnk->key, hid->size);

    lnk->next = hid->buckets[i];

    hid->buckets[i] = lnk;

    ++hid->count;

}

首先利用函数hash找到节点link应该插入到的桶号i，将link的next指针指向桶号i存储的链表的首节点，再将link节点的地址储存到桶号i内，link节点成为桶号i储存的链表的首节点。

Squid--hash代码分析

3、hash_lookup

/**

 *  hash_lookup - locates the item under the key 'k' in the hash table

 *  'hid'.  Returns a pointer to the hash bucket on success; otherwise

 *  returns NULL.

 *	在hash_table中定位哈希值为k的hash_link节点

 */

hash_link *

hash_lookup(hash_table * hid, const void *k)

{

    int b;

    PROF_start(hash_lookup);

    assert(k != NULL);

    b = hid->hash(k, hid->size);

    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {

        if ((hid->cmp) (k, walker->key) == 0) {

            PROF_stop(hash_lookup);

            return (walker);

        }

        assert(walker != walker->next);

    }

    PROF_stop(hash_lookup);

    return NULL;

}

首先根据哈希值k找到对应的桶链表节点b，walker指向b所在链表的首节点。

Squid--hash代码分析

4、hash_remove_link

/**

 *  hash_remove_link - deletes the given hash_link node from the

 *  hash table 'hid'.  Does not free the item, only removes it

 *  from the list.

 *

 *  An assertion is triggered if the hash_link is not found in the

 *  list.

 *	将hash_link为hl的节点从桶链表中移除

 */

void

hash_remove_link(hash_table * hid, hash_link * hl)

{

    assert(hl != NULL);

    int i = hid->hash(hl->key, hid->size);

    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {

        if (*P != hl)

            continue;

        *P = hl->next;

        if (hid->next == hl) {

            hid->next = hl->next;

            if (NULL == hid->next)

                hash_next_bucket(hid);

        }

        --hid->count;

        return;

    }

    assert(0);

}

移除分两种情况：

1、hl为首节点，将hid->next = hl->next

2、hl为中间节点，*p = hl->next，p指向hl的下一个节点

Squid--hash代码分析

5、hashFreeItems

//将所有hash_link节点集中到一起，集中释放存储空间

void

hashFreeItems(hash_table * hid, HASHFREE * free_func)

{

    hash_link *l;

    int i = 0;

    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));

    hash_first(hid);

    while ((l = hash_next(hid)) && i < hid->count) {

        *(list + i) = l;

        ++i;

    }

    for (int j = 0; j < i; ++j)

        free_func(*(list + j));

    xfree(list);

}

根据hid->count大小分配存储空间来存储hash_link节点的地址。调用hash_first将hid->current_slot指向第一个桶，同时将hid->next指向第二个桶。调用hash_next取得hash链表中的每一个hash_link节点，并将地址赋值到list中，最后对list中的hash_link地址统一销毁。

本文为Eliot原创，转载请注明出处：http://blog.csdn.net/xyw_blog/article/details/9791221

秒客网

Squid--hash代码分析

相关文章