Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* reads words, one-per line into a dynamically allocated and sized hashtable
- * storage for words is dynamically allocated to exact length, table is
- * re-hashed when load-factor exceeds 0.6. The header, source and example
- * files are shown below, but can be compiled together in this single exmple.
- * when separated into separate files, the hastable struct and list struct
- * are opaque to the example file with typedefs and forward declarations
- * being all that is visible to the user if compiled into a library.
- * openSSL LH_strhash() function is used for hashing.
- */
- /* =========================== wordhash_tbl.h =========================== */
- #ifndef _wordhash_tbl_h_
- #define _wordhash_tbl_h_ 1
- #include <stdio.h>
- #define HTSZ 256 /* default number of buckets for new hashtable */
- #define LOADFACT 0.6F /* never > .7 (buckets filled / total) */
- typedef struct _list_t list_t; /* typedef for opaque hashtable list */
- typedef struct _hashtable_t hashtable_t; /* typedef for opaque hashtable struct */
- int ht_size (hashtable_t *ht); /* size (total no. of buckets) */
- int ht_filled (hashtable_t *ht); /* buckets filled (used) */
- size_t ht_words (hashtable_t *ht); /* total words in hashtable */
- float ht_load (hashtable_t *ht); /* current load-factor (filled / total) */
- hashtable_t *ht_create (size_t size); /* create new hashtable (if 0 HTSZ used) */
- /* lookup functions */
- list_t *ht_lookup_str (hashtable_t *ht, const char *s);
- /* takes hash to avoid re-hash of same word if hash already computed */
- list_t *ht_lookup_hash (hashtable_t *ht, const char *s, unsigned int hval);
- /* insert string, delete string and free hashtable funcitons */
- int ht_insert_str (hashtable_t **ht, const char *s);
- void ht_delete_str (hashtable_t *ht, const char *s);
- void ht_free (hashtable_t *ht);
- #endif
- /* =========================== wordhash_tbl.c =========================== */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <limits.h>
- #include <stdint.h>
- #include "wordhash_tbl.h"
- struct _list_t { /* opaque struct for hashtable list */
- char *word;
- struct _list_t *next;
- };
- struct _hashtable_t { /* opaque struct for hashtable */
- int size;
- int filled;
- size_t words;
- list_t **table;
- };
- int ht_size (hashtable_t *ht)
- {
- return ht->size;
- }
- int ht_filled (hashtable_t *ht)
- {
- return ht->filled;
- }
- size_t ht_words (hashtable_t *ht)
- {
- return ht->words;
- }
- /** create hashtable of size 'size', return pointer to hashtable.
- * allocate size buckets and validate, initializing all
- * pointers NULL, setting ht->size to 'size' and filled = 0.
- * returns pointer to allocated hashtable on success, NULL otherwise.
- */
- hashtable_t *ht_create (size_t size)
- {
- hashtable_t *ht;
- if (size < 1) return NULL;
- /* allocate hashtable */
- if (!(ht = calloc (1, sizeof *ht)))
- return NULL;
- /* allocate & INITIALIZE element pointers NULL */
- if (!(ht->table = calloc (size, sizeof *(ht->table))))
- return NULL;
- ht->size = size;
- ht->filled = 0;
- ht->words = 0;
- return ht;
- }
- /** OPENSSL_LH_strhash
- * The following hash seems to work very well on normal text words no
- * collisions on /usr/dict/words and it distributes on %2^n quite well, not
- * as good as MD5, but still good.
- */
- uint32_t lh_strhash (const char *s)
- {
- uint64_t ret = 0, v;
- int64_t n = 0x100;
- int32_t r;
- if (!s || !*s) return (ret);
- for (; *s; s++) {
- v = n | (*s);
- n += 0x100;
- r = (int32_t)((v >> 2) ^ v) & 0x0f;
- ret = (ret << r) | (ret >> (32 - r));
- ret &= 0xFFFFFFFFL;
- ret ^= v * v;
- }
- return ((ret >> 16) ^ ret);
- }
- /** wrapper for lh_strhash returning key (index), 0 - (ht->size - 1).
- * modulo ht-size insures hash is coverted to key that
- * corresponds to an index within the allocated hashtable size.
- */
- uint32_t hash (hashtable_t *ht, const char *s)
- {
- return lh_strhash (s) % ht->size;
- }
- list_t *ht_lookup_str (hashtable_t *ht, const char *s)
- {
- if (!ht || !s) return NULL;
- list_t *list;
- uint32_t hashval = hash (ht, s);
- /* go to the correct list based on the hash value and see if str is
- * in the list. if found, return return a pointer to the list element,
- * otherwise, return NULL.
- */
- for (list = (ht->table)[hashval]; list; list = list->next) {
- if (strcmp (s, list->word) == 0) return list;
- }
- return NULL;
- }
- /** lookup prevents hashing twice when called from within a function */
- list_t *ht_lookup_hash (hashtable_t *ht, const char *s, unsigned int hval)
- {
- if (!ht) return NULL;
- for (list_t *list = (ht->table)[hval]; list; list = list->next) {
- if (strcmp (s, list->word) == 0) return list;
- }
- return NULL;
- }
- /** resize hashtable increase by 2x current, rehashing all keys.
- * double the current number of buckets, moving current list nodes
- * to new buckets/lists within the resized hashtable, free existing
- * ht->table returning pointer to resized hashtable on success,
- * NULL otherwise.
- */
- hashtable_t *ht_rehash (hashtable_t *ht)
- {
- size_t htsz = ht->size * 2; /* double current hashtable size */
- hashtable_t *nht = NULL;
- if (!(nht = ht_create (htsz))) /* allocate new hashtable, validate */
- return NULL;
- nht->size = htsz; /* assign new size */
- for (int i = 0; i < ht->size; i++) /* for each old bucket */
- for (list_t *l = (ht->table)[i]; l;) { /* for each list entry */
- list_t *nextlp = l->next; /* save next list ptr */
- uint32_t nhash = hash (nht, l->word); /* compute new hash */
- l->next = (nht->table)[nhash]; /* set next to index */
- if (!(nht->table)[nhash]) /* set filled buckets */
- (nht->filled)++;
- (nht->table)[nhash] = l; /* set index to list */
- l = nextlp; /* assign next old to iterate */
- }
- nht->words = ht->words; /* assign no. of words */
- free (ht->table); /* free old list ptrs */
- free (ht); /* free old hashtable */
- return nht;
- }
- /** insert string into hashtable, check load, resize/rehash.
- * returns 0 if string placed in new bucket 1 if string exists,
- * otherwise, return -1 on invalid parameter or allocation failure,
- * -2 on resize/rehash failure.
- */
- int ht_insert_str (hashtable_t **ht, const char *s)
- {
- if (!ht || !s) return -1;
- if (!*ht) /* if hashtable NULL */
- if (!(*ht = ht_create (HTSZ))) /* create empty hashtable & validate */
- return -1;
- list_t *new_list, *current_list;
- uint32_t hashval = hash (*ht, s);
- size_t len = 0;
- /* Does item already exist? */
- if ((current_list = ht_lookup_hash (*ht, s, hashval))) return 1;
- /* allocate / validate new list node */
- if (!(new_list = malloc (sizeof *new_list))) return -1;
- if (!((*ht)->table)[hashval]) (*ht)->filled++; /* no. of filled buckets */
- /* Insert into list */
- len = strlen(s); /* get length of string */
- new_list->word = malloc (len + 1); /* allocate string storage */
- if (!new_list->word) { /* validate allocation */
- perror ("malloc-new_list->word");
- return -1;
- }
- memcpy (new_list->word, s, len + 1); /* copy string to new storage */
- (*ht)->words++; /* increment word count */
- new_list->next = ((*ht)->table)[hashval]; /* set next to key addr (ptr is NULL) */
- ((*ht)->table)[hashval] = new_list; /* set key to list */
- while (ht_load (*ht) > LOADFACT) { /* check load factor, resize/rehash */
- #ifdef DEBUG
- printf (" ht_load : %.2f = %d / %d => %d\n",
- ht_load (*ht), (*ht)->filled, (*ht)->size, (*ht)->size * 2);
- #endif
- hashtable_t *tmp = ht_rehash (*ht);
- if (!tmp)
- return -2;
- *ht = tmp;
- }
- return 0;
- }
- /** custom free for each type of list_t.
- * (if not more than one member, just handle in ht_free)
- */
- /*
- void ht_free_node (list_t *lp)
- {
- free (lp->word);
- free (lp->defn);
- free (lp);
- }
- */
- void ht_delete_str (hashtable_t *ht, const char *s)
- {
- if (!ht || !s) return;
- list_t *list, *victim, **pplist;
- uint32_t hashval = hash (ht, s);
- /* lookup element, bail if it does not exist */
- if (!(victim = ht_lookup_hash (ht, s, hashval))) return;
- list = ht->table[hashval];
- pplist = &list;
- for (; list; pplist = &list->next, list = list->next)
- if (list == victim) {
- *pplist = list->next;
- free (victim->word);
- free (victim);
- ht->words--;
- break;
- }
- }
- void ht_free (hashtable_t *ht)
- {
- int i;
- list_t *list, *victim;
- if (!ht) return;
- /* Free the memory for every item in the table, including the
- * words themselves.
- */
- for (i = 0; i < ht->size; i++) {
- list = (ht->table)[i];
- while (list) {
- victim = list;
- list = list->next;
- free (victim->word);
- free (victim);
- }
- }
- /* Free the table itself */
- free (ht->table);
- free (ht);
- }
- float ht_load (hashtable_t *ht)
- {
- return ht ? (float)ht->filled/ht->size : -1.0F;
- }
- /* =========================== wordhash_tst.c =========================== */
- #include "wordhash_tbl.h"
- #define MAXC 64 /* max characters in read buffer for words */
- #define TBLSIZE 256 /* initial number of buckets in table */
- int main (int argc, char **argv) {
- hashtable_t *ht = NULL; /* pointer to hashtable */
- char buf[MAXC]; /* buffer for reading from file */
- /* use filename provided as 1st argument (stdin by default) */
- FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
- if (!fp) { /* validate file open for reading */
- perror ("file open failed");
- return 1;
- }
- if (!(ht = ht_create (TBLSIZE))) { /* create/validate hashtable */
- perror ("ht_create (TBLSIZE)");
- exit (EXIT_FAILURE);
- }
- while (fgets (buf, MAXC, fp)) {
- int rtn; /* variable for insert return */
- buf[strcspn(buf, "\n")] = 0; /* trim trailing '\n' from buf */
- /* insert word in hashtable/validate */
- if ((rtn = ht_insert_str (&ht, buf)) < 0) {
- if (rtn == -1)
- fputs ("error: bad parameter or allocation failure\n", stderr);
- else
- fputs ("error: resize/rehash failure\n", stderr);
- exit (EXIT_FAILURE);
- }
- }
- if (fp != stdin) /* close file if not stdin */
- fclose (fp);
- printf ("hast table size : %d\n"
- "buckets filled : %d\n"
- "load factor : %.2f\n"
- "words in table : %zu\n",
- ht_size(ht), ht_filled(ht), ht_load(ht), ht_words(ht));
- ht_free (ht);
- }
Add Comment
Please, Sign In to add comment