首页 > 代码库 > Python 2.7的字典实现
Python 2.7的字典实现
/* Pure C simple version of python 2.7.8 hash table *//* Sample usage: see main() */#include <stdio.h>#include <stdlib.h>#include <assert.h>#include <string.h>#define PyDict_MINSIZE 8#define PERTURB_SHIFT 5#define INIT_NONZERO_DICT_SLOTS(mp) do { \ (mp)->ma_table = (mp)->ma_smalltable; (mp)->ma_mask = PyDict_MINSIZE - 1; } while (0)#define EMPTY_TO_MINSIZE(mp) do { \ memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); (mp)->ma_used = (mp)->ma_fill = 0; INIT_NONZERO_DICT_SLOTS(mp); } while (0)typedef void PyObject;typedef struct { size_t me_hash; PyObject *me_key; PyObject *me_value;} PyDictEntry;typedef struct _dictobject PyDictObject;struct _dictobject { size_t ma_fill; /* # Active + # Dummy */ size_t ma_used; /* # Active */ size_t ma_mask; PyDictEntry *ma_table; size_t (*ma_hash)(PyObject *key); int (*ma_keycmp)(PyObject *key1,PyObject *key2); PyObject *(*ma_keydup)(PyObject *key); PyObject *(*ma_valdup)(PyObject *val); PyObject *(*ma_default)(void); PyDictEntry ma_smalltable[PyDict_MINSIZE];};/* Object used as dummy key to fill deleted entries */static PyObject *_dummy_struct;#define dummy (&_dummy_struct)static size_thash_string(PyObject *_str) { char *str = (char *)_str; size_t hash = 5381; for (; *str; str++) hash = ((hash << 5) + hash) + *str; /* hash * 33 + c */ return hash;}static intkeycmp_string(PyObject *_k1, PyObject *_k2) { char *k1 = (char *)_k1; char *k2 = (char *)_k2; for (; *k1 == *k2; k1++, k2++) if (*k1 == ‘\0‘) return 0; return *k1 - *k2;}static PyObject *keydup_string(PyObject *_key){ return (PyObject *)strdup((char *)_key);}static PyObject *valdup_int(PyObject *_val){ int *val = (int*)malloc(sizeof(int)); *val = *(int *)_val; return (PyObject *)val;}static PyObject *get_default_val(void){ int *val=(int*)malloc(sizeof(int)); *val=0; return (PyObject *)val;}PyDictObject *dict_new(void) { register PyDictObject *mp; mp = (PyDictObject *)malloc(sizeof(PyDictObject)); if (mp == NULL) return NULL; EMPTY_TO_MINSIZE(mp); mp->ma_hash = hash_string; mp->ma_keycmp = keycmp_string; mp->ma_keydup = keydup_string; mp->ma_valdup = valdup_int; mp->ma_default = get_default_val; return mp;}static PyDictEntry *lookdict(PyDictObject *mp, PyObject *key, register size_t hash) { register size_t i; register size_t perturb; register PyDictEntry *freeslot; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = (size_t)hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key) return ep; if (ep->me_key == dummy) freeslot = ep; else if (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == 0) return ep; else freeslot = NULL; for (perturb = hash;; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL) return freeslot == NULL ? ep : freeslot; if (ep->me_key == key || (ep->me_hash == hash && ep->me_key != dummy //delete won‘t change me_hash, so this is needed. && mp->ma_keycmp(ep->me_key, key) == 0)) return ep; if (ep->me_key == dummy && freeslot == NULL) freeslot = ep; } assert(0); /* NOT REACHED */ return 0;}static PyDictEntry *lookdict_nodummy(PyDictObject *mp, PyObject *key, register size_t hash) { register size_t i; register size_t perturb; register size_t mask = (size_t)mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = (size_t)hash & mask; ep = &ep0[i]; if (ep->me_key == NULL || ep->me_key == key || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0)) return ep; for (perturb = hash;; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; if (ep->me_key == NULL || ep->me_key == key || (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key)==0)) return ep; } assert(0); /* NOT REACHED */ return 0;}static voidinsertdict_clean(register PyDictObject *mp, PyObject *key, size_t hash, PyObject *value) { register size_t i; register size_t perturb; register size_t mask = mp->ma_mask; PyDictEntry *ep0 = mp->ma_table; register PyDictEntry *ep; i = hash & mask; ep = &ep0[i]; for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) { i = (i << 2) + i + perturb + 1; ep = &ep0[i & mask]; } mp->ma_fill++; ep->me_key = key; ep->me_hash = hash; ep->me_value =http://www.mamicode.com/ value; mp->ma_used++;}/*Restructure the table by allocating a new table and reinserting allitems again. When entries have been deleted, the new table mayactually be smaller than the old one.*/static intdict_resize(PyDictObject *mp, size_t minused) { //printf("call resize...\n"); size_t newsize; PyDictEntry *oldtable, *newtable, *ep; size_t i; int is_oldtable_malloced; PyDictEntry small_copy[PyDict_MINSIZE]; /* Find the smallest table size > minused. */ for (newsize = PyDict_MINSIZE; newsize <= minused && newsize > 0; newsize <<= 1) ; /* Get space for a new table. */ oldtable = mp->ma_table; is_oldtable_malloced = (oldtable != mp->ma_smalltable); if (newsize == PyDict_MINSIZE) { /* A large table is shrinking, or we can‘t get any smaller. */ newtable = mp->ma_smalltable; if (newtable == oldtable) {//such as: for a new dict, add 5 keys, delete them, add 6th key, then goes here. if (mp->ma_fill == mp->ma_used) { /* No dummies, so no point doing anything. */ return 0; } assert(mp->ma_fill > mp->ma_used); memcpy(small_copy, oldtable, sizeof(small_copy)); oldtable = small_copy; } } else { newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry)*newsize); if (newtable == NULL) return -1; } /* Make the dict empty, using the new table. */ assert(newtable != oldtable); mp->ma_table = newtable; mp->ma_mask = newsize - 1; memset(newtable, 0, sizeof(PyDictEntry)* newsize); mp->ma_used = 0; i = mp->ma_fill; mp->ma_fill = 0; /* Copy the data over; this is refcount-neutral for active entries; dummy entries aren‘t copied over, of course */ for (ep = oldtable; i > 0; ep++) { if (ep->me_value != NULL) { /* active entry */ --i; insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value); } else if (ep->me_key != NULL) { /* dummy entry */ --i; assert(ep->me_key == dummy); } /* else key == value =http://www.mamicode.com/= NULL: nothing to do */ } if (is_oldtable_malloced) free(oldtable); return 0;}PyObject *dict_search(PyDictObject *mp, PyObject *key) { size_t hash; PyDictEntry *ep; assert(key); hash = mp->ma_hash(key); ep = lookdict(mp, key, hash); return ep->me_value;}intdict_contain(PyDictObject *mp, PyObject *key) { return dict_search(mp,key)?1:0;}intdict_add(register PyDictObject *mp, PyObject *key, PyObject *value) { register size_t hash; assert(key); assert(value); hash = mp->ma_hash(key); PyDictEntry *ep=lookdict(mp,key,hash); assert(ep->me_value=http://www.mamicode.com/=NULL); //only process for add case. if (ep->me_key == NULL) //unused mp->ma_fill++; ep->me_key = mp->ma_keydup(key); ep->me_hash = hash; ep->me_value = http://www.mamicode.com/mp->ma_valdup(value); mp->ma_used++; if (!( mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2)) return 0; return dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);}intdict_update(register PyDictObject *mp, PyObject *key, PyObject *value) { register size_t hash; assert(key); assert(value); hash = mp->ma_hash(key); PyDictEntry *ep=lookdict(mp,key,hash); assert(ep->me_value!=NULL); //only process for update case. PyObject *old_value = http://www.mamicode.com/ep->me_value; ep->me_value = http://www.mamicode.com/mp->ma_valdup(value); free(old_value); mp->ma_used++; return 0;}intdict_del(PyDictObject *mp, PyObject *key) { register size_t hash; register PyDictEntry *ep; PyObject *old_value, *old_key; assert(key); hash = mp->ma_hash(key); ep = lookdict(mp, key, hash); if (ep->me_value =http://www.mamicode.com/= NULL) //key does not exist or duplicate deletion return -1; old_key = ep->me_key; ep->me_key = dummy; old_value = ep->me_value; ep->me_value =http://www.mamicode.com/ NULL; mp->ma_used--; free(old_value); free(old_key); return 0;}PyObject *dict_dget(PyDictObject *mp, PyObject *key) { PyDictEntry *ep; size_t h; h = mp->ma_hash(key); ep = lookdict(mp,key,h); if (ep->me_value=http://www.mamicode.com/=NULL) { if (ep->me_key == NULL) mp->ma_fill++; ep->me_key= mp->ma_keydup(key); ep->me_value = http://www.mamicode.com/mp->ma_default(); ep->me_hash=h; mp->ma_used++; if (mp->ma_fill * 3 >= (mp->ma_mask + 1) * 2){ dict_resize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used); ep = lookdict_nodummy(mp,key,h); } } return ep->me_value;}voiddict_clear(PyObject *op) { PyDictObject *mp; PyDictEntry *ep, *table; int table_is_malloced; size_t fill; PyDictEntry small_copy[PyDict_MINSIZE]; mp = (PyDictObject *)op; table = mp->ma_table; assert(table != NULL); table_is_malloced = table != mp->ma_smalltable; fill = mp->ma_fill; if (table_is_malloced) EMPTY_TO_MINSIZE(mp); else if (fill > 0) { memcpy(small_copy, table, sizeof(small_copy)); table = small_copy; EMPTY_TO_MINSIZE(mp); } for (ep = table; fill > 0; ep++) { if (ep->me_key) { fill--; free(ep->me_key); free(ep->me_value); } } if (table_is_malloced) free(table);}size_tdict_len(PyDictObject *mp) { return mp->ma_used;}// test functionstatic inthash_cmp(const void *a, const void *b) { return *(int *)(*(PyDictEntry *) a).me_value > *(int *)(*(PyDictEntry *) b).me_value ? -1 : 1;}static voidhash_all_members(PyDictObject *t) { PyDictEntry *pdes = t->ma_table; PyDictEntry es[t->ma_used]; size_t i = 0, j = 0, size = t->ma_mask+1; for (; i < size; i++) if (pdes[i].me_value != NULL) es[j++] = pdes[i]; qsort(es, t->ma_used, sizeof(es[0]), hash_cmp); for (i = 0; i < t->ma_used; i++) printf("%s\t%d\n", (char *)es[i].me_key, *(int *)es[i].me_value);}int main(void) { PyDictObject *mp = dict_new(); FILE *fp; char keybuf[100]; int *valbuf = (int*)malloc(sizeof(int)); *valbuf = 1; if ((fp = fopen("u8w.txt", "r")) == NULL ) { fprintf(stdout,"Fail to open file.\n"); exit(0); } while (fscanf(fp, "%s", keybuf) == 1) { if (dict_contain(mp,keybuf)) *(int*)dict_search(mp,keybuf) += 1; else dict_add(mp,keybuf,valbuf); }// PyObject *vp;// while (fscanf(fp, "%s", keybuf) == 1) {// vp = dict_dget(mp,keybuf);// *(int*)vp += 1;// } hash_all_members(mp); dict_clear(mp); free(valbuf); free(mp); return 0;}
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。