From fd9b29867f4b4ad5d1ea71eb12145b2020604328 Mon Sep 17 00:00:00 2001 From: kramm Date: Wed, 12 Nov 2008 10:37:16 +0000 Subject: [PATCH] optimized dict implementation --- lib/q.c | 504 +++++++++++++++++++++++++++++++++------------------------------ lib/q.h | 53 ++++--- 2 files changed, 297 insertions(+), 260 deletions(-) diff --git a/lib/q.c b/lib/q.c index 2f46373..9c1773a 100644 --- a/lib/q.c +++ b/lib/q.c @@ -60,18 +60,12 @@ void mem_destroy(mem_t*mem) mem_clear(mem); free(mem); } -static int mem_put_(mem_t*m,void*data, int length, int null) +static int mem_put_(mem_t*m,const void*data, int length, int null) { int n = m->pos; m->pos += length + (null?1:0); - if(m->pos > m->len) - { - //m->len += 1024>length?1024:(null?length*2:length); - - m->len *= 2; - while(m->len < m->pos) - m->len += 64; - + if(m->pos > m->len) { + m->len = (m->pos+63)&~63; m->buffer = m->buffer?(char*)realloc(m->buffer,m->len):(char*)malloc(m->len); } memcpy(&m->buffer[n], data, length); @@ -171,6 +165,105 @@ void ringbuffer_clear(ringbuffer_t*r) free(i); } +// ------------------------------- heap_t ------------------------------- + +void heap_init(heap_t*h,int n,int elem_size, int(*compare)(const void *, const void *)) +{ + memset(h, 0, sizeof(heap_t)); + h->max_size = n; + h->size = 0; + h->elem_size = elem_size; + h->compare = compare; + h->elements = (void**)malloc(n*sizeof(void*));memset(h->elements, 0, n*sizeof(void*)); + h->data = (char*)malloc(h->max_size*h->elem_size);memset(h->data, 0, h->max_size*h->elem_size); +} +void heap_clear(heap_t*h) +{ + free(h->elements); + free(h->data); +} + +#define HEAP_NODE_SMALLER(h,node1,node2) ((h)->compare((node1),(node2))>0) + +static void up(heap_t*h, int node) +{ + void*node_p = h->elements[node]; + int parent = node; + do { + node = parent; + if(!node) break; + parent = (node-1)/2; + h->elements[node] = h->elements[parent]; + } while(HEAP_NODE_SMALLER(h,h->elements[parent], node_p)); + + h->elements[node] = node_p; +} +static void down(heap_t*h, int node) +{ + void*node_p = h->elements[node]; + int child = node; + do { + node = child; + + /* determine new child's position */ + child = node<<1|1; + if(child >= h->size) + break; + if(child+1 < h->size && HEAP_NODE_SMALLER(h,h->elements[child],h->elements[child+1])) // search for bigger child + child++; + + h->elements[node] = h->elements[child]; + } while(HEAP_NODE_SMALLER(h,node_p, h->elements[child])); + + h->elements[node] = node_p; +} +void heap_put(heap_t*h, void*e) +{ + int pos = h->size++; + memcpy(&h->data[pos*h->elem_size],e,h->elem_size); + h->elements[pos] = &h->data[pos]; + up(h, pos); +} +int heap_size(heap_t*h) +{ + return h->size; +} +void* heap_max(heap_t*h) +{ + return h->elements[0]; +} +void* heap_chopmax(heap_t*h) +{ + void*p = h->elements[0]; + h->elements[0] = h->elements[--h->size]; + down(h,0); + return p; +} +void heap_dump(heap_t*h, FILE*fi) +{ + int t; + for(t=0;tsize;t++) { + int s; + for(s=0;s<=t;s=(s+1)*2-1) { + if(s==t) fprintf(fi,"\n"); + } + //fprintf(fi,"%d ", h->elements[t]->x); //? + } +} +void** heap_flatten(heap_t*h) +{ + void**nodes = (void**)malloc(h->size*sizeof(void*)); + void**p = nodes; + + while(h->size) { + /*printf("Heap Size: %d\n", h->size); + heap_print(stdout, h); + printf("\n");*/ + *p++ = heap_chopmax(h); + } + return nodes; +} + // ------------------------------- crc32 -------------------------------------- static unsigned int*crc32 = 0; static void crc32_init(void) @@ -190,26 +283,52 @@ static void crc32_init(void) } // ------------------------------- string_t ----------------------------------- -void string_set2(string_t*str, char*text, int len) +void string_set2(string_t*str, const char*text, int len) { str->len = len; str->str = text; } -void string_set(string_t*str, char*text) +void string_set(string_t*str, const char*text) { str->len = strlen(text); str->str = text; } +string_t string_new(const char*text, int len) +{ + string_t s; + s.len = len; + s.str = text; + return s; +} +string_t string_new2(const char*text) +{ + string_t s; + s.len = strlen(text); + s.str = text; + return s; +} unsigned int string_hash(string_t*str) { int t; unsigned int checksum = 0; - crc32_init(); + if(!crc32) + crc32_init(); for(t=0;tlen;t++) { checksum = checksum>>8 ^ crc32[(str->str[t]^checksum)&0xff]; } return checksum; } +unsigned int string_hash2(const char*str) +{ + unsigned int checksum = 0; + if(!crc32) + crc32_init(); + while(*str) { + checksum = checksum>>8 ^ crc32[(*str^checksum)&0xff]; + str++; + } + return checksum; +} void string_dup2(string_t*str, const char*text, int len) { str->len = len; @@ -249,7 +368,6 @@ typedef struct _stringlist { typedef struct _stringarray_internal_t { - mem_t data; mem_t pos; stringlist_t**hash; int num; @@ -263,7 +381,6 @@ void stringarray_init(stringarray_t*sa, int hashsize) sa->internal = (stringarray_internal_t*)malloc(sizeof(stringarray_internal_t)); memset(sa->internal, 0, sizeof(stringarray_internal_t)); s = (stringarray_internal_t*)sa->internal; - mem_init(&s->data); mem_init(&s->pos); s->hash = malloc(sizeof(stringlist_t*)*hashsize); memset(s->hash, 0, sizeof(stringlist_t*)*hashsize); @@ -274,8 +391,9 @@ void stringarray_put(stringarray_t*sa, string_t str) stringarray_internal_t*s = (stringarray_internal_t*)sa->internal; int pos; int hash = string_hash(&str) % s->hashsize; - pos = mem_putstring(&s->data, str); - mem_put(&s->pos, &pos, sizeof(int)); + + char*ss = string_cstr(&str); + mem_put(&s->pos, &ss, sizeof(char*)); stringlist_t*l = malloc(sizeof(stringlist_t)); l->index = s->num; @@ -287,13 +405,13 @@ void stringarray_put(stringarray_t*sa, string_t str) char* stringarray_at(stringarray_t*sa, int pos) { stringarray_internal_t*s = (stringarray_internal_t*)sa->internal; - int p; + char*p; if(pos<0 || pos>=s->num) return 0; - p = *(int*)&s->pos.buffer[pos*sizeof(int)]; + p = *(char**)&s->pos.buffer[pos*sizeof(char*)]; if(p<0) return 0; - return &s->data.buffer[p]; + return p; } string_t stringarray_at2(stringarray_t*sa, int pos) { @@ -329,7 +447,7 @@ void stringarray_del(stringarray_t*sa, int pos) string_t str = stringarray_at2(sa, pos); int hash = string_hash(&str) % s->hashsize; s->hash[hash] = stringlist_del(sa, s->hash[hash], pos); - *(int*)&s->pos.buffer[pos*sizeof(int)] = -1; + *(char**)&s->pos.buffer[pos*sizeof(char*)] = 0; } int stringarray_find(stringarray_t*sa, string_t* str) { @@ -337,6 +455,7 @@ int stringarray_find(stringarray_t*sa, string_t* str) int hash = string_hash(str) % s->hashsize; int t; stringlist_t*l = s->hash[hash]; + //TODO: statistics while(l) { string_t s = stringarray_at2(sa, l->index); if(string_equals2(str, &s)) { @@ -349,7 +468,6 @@ int stringarray_find(stringarray_t*sa, string_t* str) void stringarray_clear(stringarray_t*sa) { stringarray_internal_t*s = (stringarray_internal_t*)sa->internal; - mem_clear(&s->data); mem_clear(&s->pos); int t; for(t=0;thashsize;t++) { @@ -361,6 +479,7 @@ void stringarray_clear(stringarray_t*sa) l = next; } } + free(s->hash);s->hash=0; free(s); } void stringarray_destroy(stringarray_t*sa) @@ -370,271 +489,176 @@ void stringarray_destroy(stringarray_t*sa) } -// ------------------------------- map_t -------------------------------------- +// ------------------------------- dictionary_t ------------------------------- -typedef struct _map_internal_t +dict_t*dict_new() { - stringarray_t keys; - stringarray_t values; - int num; -} map_internal_t; - -void map_init(map_t*map) + dict_t*d = malloc(sizeof(dict_t)); + dict_init(d); + return d; +} +void dict_init(dict_t*h) { - map_internal_t*m; - map->internal = (map_internal_t*)malloc(sizeof(map_internal_t)); - memset(map->internal, 0, sizeof(map_internal_t)); - m = (map_internal_t*)map->internal; - stringarray_init(&m->keys, DEFAULT_HASH_SIZE); - stringarray_init(&m->values, DEFAULT_HASH_SIZE); + memset(h, 0, sizeof(dict_t)); + h->hashsize = DEFAULT_HASH_SIZE; + h->slots = malloc(sizeof(dictentry_t*)*h->hashsize); + h->num = 0; + memset(h->slots, 0, sizeof(dictentry_t*)*h->hashsize); } -void map_put(map_t*map, string_t t1, string_t t2) +void dict_put(dict_t*h, string_t s, void* data) { - map_internal_t*m = (map_internal_t*)map->internal; - stringarray_put(&m->keys, t1); - stringarray_put(&m->values, t2); - m->num++; + unsigned int checksum = string_hash(&s) % h->hashsize; + dictentry_t*e = (dictentry_t*)malloc(sizeof(dictentry_t)); + e->s = strdup(s.str); + e->next = h->slots[checksum]; + e->data = data; + h->slots[checksum] = e; + h->num++; } -char* map_lookup(map_t*map, const char*name) +void dict_put2(dict_t*h, const char*s, void*data) { - int s; - map_internal_t*m = (map_internal_t*)map->internal; - string_t str; - string_set(&str, (char*)name); - s = stringarray_find(&m->keys, &str); - if(s>=0) { - string_t s2 = stringarray_at2(&m->values, s); - return s2.str; - } - return 0; + string_t s2; + string_set(&s2, s); + dict_put(h, s2, data); } -void map_dump(map_t*map, FILE*fi, const char*prefix) +void dict_dump(dict_t*h, FILE*fi, const char*prefix) { int t; - map_internal_t*m = (map_internal_t*)map->internal; - for(t=0;tnum;t++) { - string_t s1 = stringarray_at2(&m->keys, t); - string_t s2 = stringarray_at2(&m->values, t); - fprintf(fi, "%s%s=%s\n", prefix, s1.str, s2.str); + for(t=0;thashsize;t++) { + dictentry_t*e = h->slots[t]; + while(e) { + fprintf(fi, "%s%s=%08x\n", prefix, e->s, e->data); + e = e->next; + } } } -void map_clear(map_t*map) +int dict_count(dict_t*h) { - map_internal_t*m = (map_internal_t*)map->internal; - stringarray_clear(&m->keys); - stringarray_clear(&m->values); - free(m); -} -void map_destroy(map_t*map) -{ - map_clear(map); - free(map); -} - -// ------------------------------- dictionary_t ------------------------------- - -typedef struct _dictionary_internal_t -{ - stringarray_t keys; - mem_t values; - int num; -} dictionary_internal_t; - -void dictionary_init(dictionary_t*dict) -{ - dictionary_internal_t*d; - dict->internal = (dictionary_internal_t*)malloc(sizeof(dictionary_internal_t)); - memset(dict->internal, 0, sizeof(dictionary_internal_t)); - d = (dictionary_internal_t*)dict->internal; - stringarray_init(&d->keys, DEFAULT_HASH_SIZE); - mem_init(&d->values); + return h->num; } -void dictionary_put(dictionary_t*dict, string_t t1, void* t2) +void* dict_lookup(dict_t*h, const char*s) { - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - int s=0; - s = stringarray_find(&d->keys, &t1); - if(s>=0) { - /* replace */ - *(void**)(&d->values.buffer[s*sizeof(void*)]) = t2; - } else { - stringarray_put(&d->keys, t1); - mem_put(&d->values, &t2, sizeof(void*)); - d->num++; + unsigned int checksum = string_hash2(s) % h->hashsize; + dictentry_t*e = h->slots[checksum]; + while(e) { + if(!strcmp(e->s, s)) { + return e->data; + } + e = e->next; } + return 0; } -void dictionary_put2(dictionary_t*dict, const char*t1, void* t2) -{ - string_t s; - string_set(&s, (char*)t1); - dictionary_put(dict, s, t2); -} -stringarray_t* dictionary_index(dictionary_t*dict) -{ - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - return &d->keys; -} -int dictionary_count(dictionary_t* dict) // this count includes entries that have been deleted -{ - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - return d->num; -} -void* dictionary_lookup(dictionary_t*dict, const char*name) -{ - int s; - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - string_t str; - string_set(&str, (char*)name); - s = stringarray_find(&d->keys, &str); - if(s>=0) { - return *(void**)&d->values.buffer[sizeof(void*)*s]; +char dict_del(dict_t*h, const char*s) +{ + unsigned int checksum = string_hash2(s) % h->hashsize; + dictentry_t*head = h->slots[checksum]; + dictentry_t*e = head, *prev=0; + while(e) { + if(!strcmp(e->s, s)) { + dictentry_t*next = e->next; + free((void*)e->s); + memset(e, 0, sizeof(dictentry_t)); + free(e); + if(e == head) { + h->slots[checksum] = 0; + } else { + prev->next = next; + } + return 1; + } + prev = e; + e = e->next; } return 0; } -void dictionary_dump(dictionary_t*dict, FILE*fi, const char*prefix) +void dict_foreach_value(dict_t*h, void (*runFunction)(void*)) { - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; int t; - for(t=0;tnum;t++) { - string_t s1 = stringarray_at2(&d->keys, t); - fprintf(fi, "%s%s=%08x\n", prefix, s1.str, *(void**)&d->values.buffer[sizeof(void*)*t]); + for(t=0;thashsize;t++) { + dictentry_t*e = h->slots[t]; + while(e) { + dictentry_t*next = e->next; + if(runFunction) { + runFunction(e->data); + } + e = e->next; + } } } -void dictionary_del(dictionary_t*dict, const char* name) +void dict_free_all(dict_t*h, void (*freeFunction)(void*)) { - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - int s; - string_t str; - string_set(&str, (char*)name); - s = stringarray_find(&d->keys, &str); - if(s>=0) { - *(void**)(&d->values.buffer[s*sizeof(void*)]) = 0; - stringarray_del(&d->keys, s); + int t; + for(t=0;thashsize;t++) { + dictentry_t*e = h->slots[t]; + while(e) { + dictentry_t*next = e->next; + free((void*)e->s); + if(freeFunction) { + freeFunction(e->data); + } + memset(e, 0, sizeof(dictentry_t)); + free(e); + e = next; + } } + free(h->slots);h->slots = 0; } -void dictionary_clear(dictionary_t*dict) +void dict_clear(dict_t*h) { - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - stringarray_clear(&d->keys); - mem_clear(&d->values); - free(d); + dict_free_all(h, 0); } -void dictionary_destroy(dictionary_t*dict) +void dict_destroy(dict_t*dict) { - dictionary_clear(dict); + dict_clear(dict); free(dict); } -void dictionary_free_all(dictionary_t* dict, void (*freeFunction)(void*)) -{ - dictionary_internal_t*d = (dictionary_internal_t*)dict->internal; - int num = 0; - char* name = stringarray_at(&d->keys, num) ; - while (name) - { - freeFunction(dictionary_lookup(dict, name)); - num++; - name = stringarray_at(&d->keys, num); - } - dictionary_clear(dict); -} - -// ------------------------------- heap_t ------------------------------- - -void heap_init(heap_t*h,int n,int elem_size, int(*compare)(const void *, const void *)) -{ - memset(h, 0, sizeof(heap_t)); - h->max_size = n; - h->size = 0; - h->elem_size = elem_size; - h->compare = compare; - h->elements = (void**)malloc(n*sizeof(void*));memset(h->elements, 0, n*sizeof(void*)); - h->data = (char*)malloc(h->max_size*h->elem_size);memset(h->data, 0, h->max_size*h->elem_size); -} -void heap_clear(heap_t*h) -{ - free(h->elements); - free(h->data); -} - -#define HEAP_NODE_SMALLER(h,node1,node2) ((h)->compare((node1),(node2))>0) - -static void up(heap_t*h, int node) -{ - void*node_p = h->elements[node]; - int parent = node; - do { - node = parent; - if(!node) break; - parent = (node-1)/2; - h->elements[node] = h->elements[parent]; - } while(HEAP_NODE_SMALLER(h,h->elements[parent], node_p)); +// ------------------------------- map_t -------------------------------------- - h->elements[node] = node_p; -} -static void down(heap_t*h, int node) +typedef struct _map_internal_t { - void*node_p = h->elements[node]; - int child = node; - do { - node = child; - - /* determine new child's position */ - child = node<<1|1; - if(child >= h->size) - break; - if(child+1 < h->size && HEAP_NODE_SMALLER(h,h->elements[child],h->elements[child+1])) // search for bigger child - child++; + dict_t d; +} map_internal_t; - h->elements[node] = h->elements[child]; - } while(HEAP_NODE_SMALLER(h,node_p, h->elements[child])); - - h->elements[node] = node_p; -} -void heap_put(heap_t*h, void*e) +void map_init(map_t*map) { - int pos = h->size++; - memcpy(&h->data[pos*h->elem_size],e,h->elem_size); - h->elements[pos] = &h->data[pos]; - up(h, pos); + map_internal_t*m; + map->internal = (map_internal_t*)malloc(sizeof(map_internal_t)); + memset(map->internal, 0, sizeof(map_internal_t)); + m = (map_internal_t*)map->internal; + dict_init(&m->d); } -int heap_size(heap_t*h) +void map_put(map_t*map, string_t t1, string_t t2) { - return h->size; + map_internal_t*m = (map_internal_t*)map->internal; + dict_put2(&m->d, string_cstr(&t1), (void*)string_cstr(&t2)); } -void* heap_max(heap_t*h) +const char* map_lookup(map_t*map, const char*name) { - return h->elements[0]; + map_internal_t*m = (map_internal_t*)map->internal; + const char*value = dict_lookup(&m->d, name); + return value; } -void* heap_chopmax(heap_t*h) +void map_dump(map_t*map, FILE*fi, const char*prefix) { - void*p = h->elements[0]; - h->elements[0] = h->elements[--h->size]; - down(h,0); - return p; + int t; + map_internal_t*m = (map_internal_t*)map->internal; + fprintf(fi, "ERROR: map dumping not implemented yet\n"); + /*for(t=0;tnum;t++) { + string_t s1 = stringarray_at2(&m->keys, t); + string_t s2 = stringarray_at2(&m->values, t); + fprintf(fi, "%s%s=%s\n", prefix, s1.str, s2.str); + }*/ } -void heap_dump(heap_t*h, FILE*fi) +void map_clear(map_t*map) { - int t; - for(t=0;tsize;t++) { - int s; - for(s=0;s<=t;s=(s+1)*2-1) { - if(s==t) fprintf(fi,"\n"); - } - //fprintf(fi,"%d ", h->elements[t]->x); //? - } + map_internal_t*m = (map_internal_t*)map->internal; + dict_clear(&m->d); + free(m); } -void** heap_flatten(heap_t*h) +void map_destroy(map_t*map) { - void**nodes = (void**)malloc(h->size*sizeof(void*)); - void**p = nodes; - - while(h->size) { - /*printf("Heap Size: %d\n", h->size); - heap_print(stdout, h); - printf("\n");*/ - *p++ = heap_chopmax(h); - } - return nodes; + map_clear(map); + free(map); } diff --git a/lib/q.h b/lib/q.h index 18e7f3a..fbc6d19 100644 --- a/lib/q.h +++ b/lib/q.h @@ -23,7 +23,6 @@ #define __q_h__ #include -#include "../config.h" #ifdef __cplusplus extern "C" { @@ -45,7 +44,7 @@ typedef struct _ringbuffer_t /* non-nul terminated string */ typedef struct _string_t { - char*str; + const char*str; int len; } string_t; @@ -55,9 +54,17 @@ typedef struct _map_t { } map_t; /* (void*)s referenced by strings */ -typedef struct _dictionary_t { - void*internal; -} dictionary_t; +typedef struct _dictentry { + const char*s; + void*data; + struct _dictentry*next; +} dictentry_t; + +typedef struct _dict { + dictentry_t**slots; + int hashsize; + int num; +} dict_t; /* array of strings */ typedef struct _stringarray_t @@ -87,13 +94,17 @@ void ringbuffer_put(ringbuffer_t*r, void*buf, int size); int ringbuffer_read(ringbuffer_t*r, void*buf, int size); void ringbuffer_clear(ringbuffer_t*r); -void string_set(string_t*str, char*text); -void string_set2(string_t*str, char*text, int len); +string_t string_new(const char*text, int len); +string_t string_new2(const char*text); +unsigned int string_hash(string_t*str); +unsigned int string_hash2(const char*str); +void string_set(string_t*str, const char*text); +void string_set2(string_t*str, const char*text, int len); void string_dup(string_t*str, const char*text); void string_dup2(string_t*str, const char*text, int len); int string_equals(string_t*str, const char*text); -void stringarray_init(stringarray_t*sa); +void stringarray_init(stringarray_t*sa, int hashsize); void stringarray_put(stringarray_t*sa, string_t str); char* stringarray_at(stringarray_t*sa, int pos); string_t stringarray_at2(stringarray_t*sa, int pos); @@ -103,22 +114,24 @@ void stringarray_destroy(stringarray_t*sa); void map_init(map_t*map); void map_put(map_t*map, string_t t1, string_t t2); -char* map_lookup(map_t*map, const char*name); +const char* map_lookup(map_t*map, const char*name); void map_dump(map_t*map, FILE*fi, const char*prefix); void map_clear(map_t*map); void map_destroy(map_t*map); -void dictionary_init(dictionary_t*dict); -void dictionary_put(dictionary_t*dict, string_t t1, void* t2); -void dictionary_put2(dictionary_t*dict, const char* t1, void* t2); -stringarray_t* dictionary_index(dictionary_t*dict); -int dictionary_count(dictionary_t* dict); -void* dictionary_lookup(dictionary_t*dict, const char*name); -void dictionary_dump(dictionary_t*dict, FILE*fi, const char*prefix); -void dictionary_del(dictionary_t*dict, const char* name); -void dictionary_clear(dictionary_t*dict); -void dictionary_destroy(dictionary_t*dict); -void dictionary_free_all(dictionary_t* dict, void (*freeFunction)(void*)); +dict_t*dict_new(); +void dict_init(dict_t*dict); +void dict_put(dict_t*dict, string_t t1, void* t2); +void dict_put2(dict_t*dict, const char* t1, void* t2); +stringarray_t* dict_index(dict_t*dict); +int dict_count(dict_t* dict); +void* dict_lookup(dict_t*dict, const char*name); +void dict_dump(dict_t*dict, FILE*fi, const char*prefix); +char dict_del(dict_t*dict, const char* name); +void dict_foreach_value(dict_t*h, void (*runFunction)(void*)); +void dict_free_all(dict_t*h, void (*freeFunction)(void*)); +void dict_clear(dict_t*dict); +void dict_destroy(dict_t*dict); void heap_init(heap_t*h,int n,int elem_size, int(*compare)(const void *, const void *)); void heap_clear(heap_t*h); -- 1.7.10.4