首页 > 代码库 > Python 2.7的字典实现

Python 2.7的字典实现

/* Pure C simple version of python 2.7.8 hash table *//* Sample usage: see main() */#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <string.h>#define PyDict_MINSIZE 8#define PERTURB_SHIFT 5#define INIT_NONZERO_DICT_SLOTS(mp) do {                                \    (mp)->ma_table = (mp)->ma_smalltable;                                   (mp)->ma_mask = PyDict_MINSIZE - 1;                                 } while (0)#define EMPTY_TO_MINSIZE(mp) do {                                       \    memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable));            (mp)->ma_used = (mp)->ma_fill = 0;                                      INIT_NONZERO_DICT_SLOTS(mp);                                        } while (0)typedef void PyObject;typedef struct {    size_t me_hash;    PyObject *me_key;    PyObject *me_value;} PyDictEntry;typedef struct _dictobject PyDictObject;struct _dictobject {    size_t ma_fill;  /* # Active + # Dummy */    size_t ma_used;  /* # Active */    size_t ma_mask;    PyDictEntry *ma_table;    size_t (*ma_hash)(PyObject *key);    int (*ma_keycmp)(PyObject *key1,PyObject *key2);    PyObject *(*ma_keydup)(PyObject *key);    PyObject *(*ma_valdup)(PyObject *val);    PyObject *(*ma_default)(void);    PyDictEntry ma_smalltable[PyDict_MINSIZE];};/* Object used as dummy key to fill deleted entries */static PyObject *_dummy_struct;#define dummy (&_dummy_struct)static size_thash_string(PyObject *_str) {    char *str = (char *)_str;    size_t hash = 5381;    for (; *str; str++)        hash = ((hash << 5) + hash) + *str; /* hash * 33 + c */    return hash;}static intkeycmp_string(PyObject *_k1, PyObject *_k2) {    char *k1 = (char *)_k1;    char *k2 = (char *)_k2;    for (; *k1 == *k2; k1++, k2++)        if (*k1 == \0)            return 0;    return *k1 - *k2;}static PyObject *keydup_string(PyObject *_key){    return (PyObject *)strdup((char *)_key);}static PyObject *valdup_int(PyObject *_val){    int *val = (int*)malloc(sizeof(int));    *val = *(int *)_val;    return (PyObject *)val;}static PyObject *get_default_val(void){    int *val=(int*)malloc(sizeof(int));    *val=0;    return (PyObject *)val;}PyDictObject *dict_new(void) {    register PyDictObject *mp;    mp = (PyDictObject *)malloc(sizeof(PyDictObject));    if (mp == NULL)        return NULL;    EMPTY_TO_MINSIZE(mp);    mp->ma_hash = hash_string;    mp->ma_keycmp = keycmp_string;    mp->ma_keydup = keydup_string;    mp->ma_valdup = valdup_int;    mp->ma_default = get_default_val;    return mp;}static PyDictEntry *lookdict(PyDictObject *mp, PyObject *key, register size_t hash) {    register size_t i;    register size_t perturb;    register PyDictEntry *freeslot;    register size_t mask = (size_t)mp->ma_mask;    PyDictEntry *ep0 = mp->ma_table;    register PyDictEntry *ep;    i = (size_t)hash & mask;    ep = &ep0[i];    if (ep->me_key == NULL || ep->me_key == key)        return ep;    if (ep->me_key == dummy)        freeslot = ep;    else if (ep->me_hash == hash             && mp->ma_keycmp(ep->me_key, key) == 0)        return ep;    else        freeslot = NULL;    for (perturb = hash;; perturb >>= PERTURB_SHIFT) {        i = (i << 2) + i + perturb + 1;        ep = &ep0[i & mask];        if (ep->me_key == NULL)            return freeslot == NULL ? ep : freeslot;        if (ep->me_key == key                || (ep->me_hash == hash                    && ep->me_key != dummy //delete won‘t change me_hash, so this is needed.                    && mp->ma_keycmp(ep->me_key, key) == 0))            return ep;        if (ep->me_key == dummy && freeslot == NULL)            freeslot = ep;    }    assert(0);          /* NOT REACHED */    return 0;}static PyDictEntry *lookdict_nodummy(PyDictObject *mp, PyObject *key, register size_t hash) {    register size_t i;    register size_t perturb;    register size_t mask = (size_t)mp->ma_mask;    PyDictEntry *ep0 = mp->ma_table;    register PyDictEntry *ep;    i = (size_t)hash & mask;    ep = &ep0[i];    if (ep->me_key == NULL            || ep->me_key == key            || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0))        return ep;    for (perturb = hash;; perturb >>= PERTURB_SHIFT) {        i = (i << 2) + i + perturb + 1;        ep = &ep0[i & mask];        if (ep->me_key == NULL                || ep->me_key == key                || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0))            return ep;    }    assert(0);          /* NOT REACHED */    return 0;}static voidinsertdict_clean(register PyDictObject *mp, PyObject *key, size_t hash, PyObject *value) {    register size_t i;    register size_t perturb;    register size_t mask = mp->ma_mask;    PyDictEntry *ep0 = mp->ma_table;    register PyDictEntry *ep;    i = hash & mask;    ep = &ep0[i];    for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {        i = (i << 2) + i + perturb + 1;        ep = &ep0[i & mask];    }    mp->ma_fill++;    ep->me_key = key;    ep->me_hash = hash;    ep->me_value =http://www.mamicode.com/ value;    mp->ma_used++;}/*Restructure the table by allocating a new table and reinserting allitems again.  When entries have been deleted, the new table mayactually be smaller than the old one.*/static intdict_resize(PyDictObject *mp, size_t minused) {    //printf("call resize...\n");    size_t newsize;    PyDictEntry *oldtable, *newtable, *ep;    size_t i;    int is_oldtable_malloced;    PyDictEntry small_copy[PyDict_MINSIZE];    /* Find the smallest table size > minused. */    for (newsize = PyDict_MINSIZE;            newsize <= minused && newsize > 0;            newsize <<= 1)        ;    /* Get space for a new table. */    oldtable = mp->ma_table;    is_oldtable_malloced = (oldtable != mp->ma_smalltable);    if (newsize == PyDict_MINSIZE) {        /* A large table is shrinking, or we can‘t get any smaller. */        newtable = mp->ma_smalltable;        if (newtable == oldtable) {//such as: for a new dict, add 5 keys, delete them, add 6th key, then goes here.            if (mp->ma_fill == mp->ma_used) {                /* No dummies, so no point doing anything. */                return 0;            }            assert(mp->ma_fill > mp->ma_used);            memcpy(small_copy, oldtable, sizeof(small_copy));            oldtable = small_copy;        }    } else {        newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry)*newsize);        if (newtable == NULL)            return -1;    }    /* Make the dict empty, using the new table. */    assert(newtable != oldtable);    mp->ma_table = newtable;    mp->ma_mask = newsize - 1;    memset(newtable, 0, sizeof(PyDictEntry)* newsize);    mp->ma_used = 0;    i = mp->ma_fill;    mp->ma_fill = 0;    /* Copy the data over; this is refcount-neutral for active entries;    dummy entries aren‘t copied over, of course */    for (ep = oldtable; i > 0; ep++) {        if (ep->me_value != NULL) {             /* active entry */            --i;            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);        } else if (ep->me_key != NULL) {      /* dummy entry */            --i;            assert(ep->me_key == dummy);        }        /* else key == value =http://www.mamicode.com/= NULL:  nothing to do */    }    if (is_oldtable_malloced)        free(oldtable);    return 0;}PyObject *dict_search(PyDictObject *mp, PyObject *key) {    size_t hash;    PyDictEntry *ep;    assert(key);    hash = mp->ma_hash(key);    ep = lookdict(mp, key, hash);    return ep->me_value;}intdict_contain(PyDictObject *mp, PyObject *key) {    return dict_search(mp,key)?1:0;}intdict_add(register PyDictObject *mp, PyObject *key, PyObject *value) {    register size_t hash;    assert(key);    assert(value);    hash = mp->ma_hash(key);    PyDictEntry *ep=lookdict(mp,key,hash);    assert(ep->me_value=http://www.mamicode.com/=NULL); //only process for add case.    if (ep->me_key == NULL) //unused        mp->ma_fill++;    ep->me_key = mp->ma_keydup(key);    ep->me_hash = hash;    ep->me_value = http://www.mamicode.com/mp->ma_valdup(value);    mp->ma_used++;    if (!( mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2))        return 0;    return dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);}intdict_update(register PyDictObject *mp, PyObject *key, PyObject *value) {    register size_t hash;    assert(key);    assert(value);    hash = mp->ma_hash(key);    PyDictEntry *ep=lookdict(mp,key,hash);    assert(ep->me_value!=NULL); //only process for update case.    PyObject *old_value = http://www.mamicode.com/ep->me_value;    ep->me_value = http://www.mamicode.com/mp->ma_valdup(value);    free(old_value);    mp->ma_used++;    return 0;}intdict_del(PyDictObject *mp, PyObject *key) {    register size_t hash;    register PyDictEntry *ep;    PyObject *old_value, *old_key;    assert(key);    hash = mp->ma_hash(key);    ep = lookdict(mp, key, hash);    if (ep->me_value =http://www.mamicode.com/= NULL) //key does not exist or duplicate deletion        return -1;    old_key = ep->me_key;    ep->me_key = dummy;    old_value = ep->me_value;    ep->me_value =http://www.mamicode.com/ NULL;    mp->ma_used--;    free(old_value);    free(old_key);    return 0;}PyObject *dict_dget(PyDictObject *mp, PyObject *key) {    PyDictEntry *ep;    size_t h;    h = mp->ma_hash(key);    ep = lookdict(mp,key,h);    if (ep->me_value=http://www.mamicode.com/=NULL) {        if (ep->me_key == NULL)            mp->ma_fill++;        ep->me_key= mp->ma_keydup(key);        ep->me_value = http://www.mamicode.com/mp->ma_default();        ep->me_hash=h;        mp->ma_used++;        if (mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2){            dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);            ep = lookdict_nodummy(mp,key,h);        }    }    return ep->me_value;}voiddict_clear(PyObject *op) {    PyDictObject *mp;    PyDictEntry *ep, *table;    int table_is_malloced;    size_t fill;    PyDictEntry small_copy[PyDict_MINSIZE];    mp = (PyDictObject *)op;    table = mp->ma_table;    assert(table != NULL);    table_is_malloced = table != mp->ma_smalltable;    fill = mp->ma_fill;    if (table_is_malloced)        EMPTY_TO_MINSIZE(mp);    else if (fill > 0) {        memcpy(small_copy, table, sizeof(small_copy));        table = small_copy;        EMPTY_TO_MINSIZE(mp);    }    for (ep = table; fill > 0; ep++) {        if (ep->me_key) {            fill--;            free(ep->me_key);            free(ep->me_value);        }    }    if (table_is_malloced)        free(table);}size_tdict_len(PyDictObject *mp) {    return mp->ma_used;}// test functionstatic inthash_cmp(const void *a, const void *b) {    return *(int *)(*(PyDictEntry *) a).me_value > *(int *)(*(PyDictEntry *) b).me_value ? -1 : 1;}static voidhash_all_members(PyDictObject *t) {    PyDictEntry *pdes = t->ma_table;    PyDictEntry es[t->ma_used];    size_t i = 0, j = 0, size = t->ma_mask+1;    for (; i < size; i++)        if (pdes[i].me_value != NULL)            es[j++] = pdes[i];    qsort(es, t->ma_used, sizeof(es[0]), hash_cmp);    for (i = 0; i < t->ma_used; i++)        printf("%s\t%d\n", (char *)es[i].me_key, *(int *)es[i].me_value);}int main(void) {    PyDictObject *mp = dict_new();    FILE *fp;    char keybuf[100];    int *valbuf = (int*)malloc(sizeof(int));    *valbuf = 1;    if ((fp = fopen("u8w.txt", "r")) == NULL ) {        fprintf(stdout,"Fail to open file.\n");        exit(0);    }    while (fscanf(fp, "%s", keybuf) == 1) {        if (dict_contain(mp,keybuf))            *(int*)dict_search(mp,keybuf) += 1;        else            dict_add(mp,keybuf,valbuf);    }//    PyObject *vp;//    while (fscanf(fp, "%s", keybuf) == 1) {//        vp = dict_dget(mp,keybuf);//        *(int*)vp += 1;//    }    hash_all_members(mp);    dict_clear(mp);    free(valbuf);    free(mp);    return 0;}