diff options
Diffstat (limited to 'lib/hashtable.c')
-rw-r--r-- | lib/hashtable.c | 721 |
1 files changed, 721 insertions, 0 deletions
diff --git a/lib/hashtable.c b/lib/hashtable.c new file mode 100644 index 00000000000..2f3b5c8d102 --- /dev/null +++ b/lib/hashtable.c @@ -0,0 +1,721 @@ +/* + * This implementation is based on code from uClibc-0.9.30.3 but was + * modified and extended for use within U-Boot. + * + * Copyright (C) 2010 Wolfgang Denk <wd@denx.de> + * + * Original license header: + * + * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc. + * This file is part of the GNU C Library. + * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993. + * + * The GNU C Library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * The GNU C Library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with the GNU C Library; if not, write to the Free + * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307 USA. + */ + +#include <errno.h> +#include <malloc.h> + +#ifdef USE_HOSTCC /* HOST build */ +# include <string.h> +# include <assert.h> + +# ifndef debug +# ifdef DEBUG +# define debug(fmt,args...) printf(fmt ,##args) +# else +# define debug(fmt,args...) +# endif +# endif +#else /* U-Boot build */ +# include <common.h> +# include <linux/string.h> +#endif + +#include "search.h" + +/* + * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 + * [Knuth] The Art of Computer Programming, part 3 (6.4) + */ + +/* + * The non-reentrant version use a global space for storing the hash table. + */ +static struct hsearch_data htab; + +/* + * The reentrant version has no static variables to maintain the state. + * Instead the interface of all functions is extended to take an argument + * which describes the current status. + */ +typedef struct _ENTRY { + unsigned int used; + ENTRY entry; +} _ENTRY; + + +/* + * hcreate() + */ + +/* + * For the used double hash method the table size has to be a prime. To + * correct the user given table size we need a prime test. This trivial + * algorithm is adequate because + * a) the code is (most probably) called a few times per program run and + * b) the number is small because the table must fit in the core + * */ +static int isprime(unsigned int number) +{ + /* no even number will be passed */ + unsigned int div = 3; + + while (div * div < number && number % div != 0) + div += 2; + + return number % div != 0; +} + +int hcreate(size_t nel) +{ + return hcreate_r(nel, &htab); +} + +/* + * Before using the hash table we must allocate memory for it. + * Test for an existing table are done. We allocate one element + * more as the found prime number says. This is done for more effective + * indexing as explained in the comment for the hsearch function. + * The contents of the table is zeroed, especially the field used + * becomes zero. + */ +int hcreate_r(size_t nel, struct hsearch_data *htab) +{ + /* Test for correct arguments. */ + if (htab == NULL) { + __set_errno(EINVAL); + return 0; + } + + /* There is still another table active. Return with error. */ + if (htab->table != NULL) + return 0; + + /* Change nel to the first prime number not smaller as nel. */ + nel |= 1; /* make odd */ + while (!isprime(nel)) + nel += 2; + + htab->size = nel; + htab->filled = 0; + + /* allocate memory and zero out */ + htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY)); + if (htab->table == NULL) + return 0; + + /* everything went alright */ + return 1; +} + + +/* + * hdestroy() + */ +void hdestroy(void) +{ + hdestroy_r(&htab); +} + +/* + * After using the hash table it has to be destroyed. The used memory can + * be freed and the local static variable can be marked as not used. + */ +void hdestroy_r(struct hsearch_data *htab) +{ + int i; + + /* Test for correct arguments. */ + if (htab == NULL) { + __set_errno(EINVAL); + return; + } + + /* free used memory */ + for (i = 1; i <= htab->size; ++i) { + if (htab->table[i].used) { + ENTRY *ep = &htab->table[i].entry; + + free(ep->key); + free(ep->data); + } + } + free(htab->table); + + /* the sign for an existing table is an value != NULL in htable */ + htab->table = NULL; +} + +/* + * hsearch() + */ + +/* + * This is the search function. It uses double hashing with open addressing. + * The argument item.key has to be a pointer to an zero terminated, most + * probably strings of chars. The function for generating a number of the + * strings is simple but fast. It can be replaced by a more complex function + * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown. + * + * We use an trick to speed up the lookup. The table is created by hcreate + * with one more element available. This enables us to use the index zero + * special. This index will never be used because we store the first hash + * index in the field used where zero means not used. Every other value + * means used. The used field can be used as a first fast comparison for + * equality of the stored and the parameter value. This helps to prevent + * unnecessary expensive calls of strcmp. + * + * This implementation differs from the standard library version of + * this function in a number of ways: + * + * - While the standard version does not make any assumptions about + * the type of the stored data objects at all, this implementation + * works with NUL terminated strings only. + * - Instead of storing just pointers to the original objects, we + * create local copies so the caller does not need to care about the + * data any more. + * - The standard implementation does not provide a way to update an + * existing entry. This version will create a new entry or update an + * existing one when both "action == ENTER" and "item.data != NULL". + * - Instead of returning 1 on success, we return the index into the + * internal hash table, which is also guaranteed to be positive. + * This allows us direct access to the found hash table slot for + * example for functions like hdelete(). + */ + +ENTRY *hsearch(ENTRY item, ACTION action) +{ + ENTRY *result; + + (void) hsearch_r(item, action, &result, &htab); + + return result; +} + +int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval, + struct hsearch_data *htab) +{ + unsigned int hval; + unsigned int count; + unsigned int len = strlen(item.key); + unsigned int idx; + + /* Compute an value for the given string. Perhaps use a better method. */ + hval = len; + count = len; + while (count-- > 0) { + hval <<= 4; + hval += item.key[count]; + } + + /* + * First hash function: + * simply take the modul but prevent zero. + */ + hval %= htab->size; + if (hval == 0) + ++hval; + + /* The first index tried. */ + idx = hval; + + if (htab->table[idx].used) { + /* + * Further action might be required according to the + * action value. + */ + unsigned hval2; + + if (htab->table[idx].used == hval + && strcmp(item.key, htab->table[idx].entry.key) == 0) { + /* Overwrite existing value? */ + if ((action == ENTER) && (item.data != NULL)) { + free(htab->table[idx].entry.data); + htab->table[idx].entry.data = + strdup(item.data); + if (!htab->table[idx].entry.data) { + __set_errno(ENOMEM); + *retval = NULL; + return 0; + } + } + /* return found entry */ + *retval = &htab->table[idx].entry; + return idx; + } + + /* + * Second hash function: + * as suggested in [Knuth] + */ + hval2 = 1 + hval % (htab->size - 2); + + do { + /* + * Because SIZE is prime this guarantees to + * step through all available indices. + */ + if (idx <= hval2) + idx = htab->size + idx - hval2; + else + idx -= hval2; + + /* + * If we visited all entries leave the loop + * unsuccessfully. + */ + if (idx == hval) + break; + + /* If entry is found use it. */ + if ((htab->table[idx].used == hval) + && strcmp(item.key, htab->table[idx].entry.key) == 0) { + /* Overwrite existing value? */ + if ((action == ENTER) && (item.data != NULL)) { + free(htab->table[idx].entry.data); + htab->table[idx].entry.data = + strdup(item.data); + if (!htab->table[idx].entry.data) { + __set_errno(ENOMEM); + *retval = NULL; + return 0; + } + } + /* return found entry */ + *retval = &htab->table[idx].entry; + return idx; + } + } + while (htab->table[idx].used); + } + + /* An empty bucket has been found. */ + if (action == ENTER) { + /* + * If table is full and another entry should be + * entered return with error. + */ + if (htab->filled == htab->size) { + __set_errno(ENOMEM); + *retval = NULL; + return 0; + } + + /* + * Create new entry; + * create copies of item.key and item.data + */ + htab->table[idx].used = hval; + htab->table[idx].entry.key = strdup(item.key); + htab->table[idx].entry.data = strdup(item.data); + if (!htab->table[idx].entry.key || + !htab->table[idx].entry.data) { + __set_errno(ENOMEM); + *retval = NULL; + return 0; + } + + ++htab->filled; + + /* return new entry */ + *retval = &htab->table[idx].entry; + return 1; + } + + __set_errno(ESRCH); + *retval = NULL; + return 0; +} + + +/* + * hdelete() + */ + +/* + * The standard implementation of hsearch(3) does not provide any way + * to delete any entries from the hash table. We extend the code to + * do that. + */ + +int hdelete(const char *key) +{ + return hdelete_r(key, &htab); +} + +int hdelete_r(const char *key, struct hsearch_data *htab) +{ + ENTRY e, *ep; + int idx; + + debug("hdelete: DELETE key \"%s\"\n", key); + + e.key = (char *)key; + + if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) { + __set_errno(ESRCH); + return 0; /* not found */ + } + + /* free used ENTRY */ + debug("hdelete: DELETING key \"%s\"\n", key); + + free(ep->key); + free(ep->data); + htab->table[idx].used = 0; + + --htab->filled; + + return 1; +} + +/* + * hexport() + */ + +/* + * Export the data stored in the hash table in linearized form. + * + * Entries are exported as "name=value" strings, separated by an + * arbitrary (non-NUL, of course) separator character. This allows to + * use this function both when formatting the U-Boot environment for + * external storage (using '\0' as separator), but also when using it + * for the "printenv" command to print all variables, simply by using + * as '\n" as separator. This can also be used for new features like + * exporting the environment data as text file, including the option + * for later re-import. + * + * The entries in the result list will be sorted by ascending key + * values. + * + * If the separator character is different from NUL, then any + * separator characters and backslash characters in the values will + * be escaped by a preceeding backslash in output. This is needed for + * example to enable multi-line values, especially when the output + * shall later be parsed (for example, for re-import). + * + * There are several options how the result buffer is handled: + * + * *resp size + * ----------- + * NULL 0 A string of sufficient length will be allocated. + * NULL >0 A string of the size given will be + * allocated. An error will be returned if the size is + * not sufficient. Any unused bytes in the string will + * be '\0'-padded. + * !NULL 0 The user-supplied buffer will be used. No length + * checking will be performed, i. e. it is assumed that + * the buffer size will always be big enough. DANGEROUS. + * !NULL >0 The user-supplied buffer will be used. An error will + * be returned if the size is not sufficient. Any unused + * bytes in the string will be '\0'-padded. + */ + +ssize_t hexport(const char sep, char **resp, size_t size) +{ + return hexport_r(&htab, sep, resp, size); +} + +static int cmpkey(const void *p1, const void *p2) +{ + ENTRY *e1 = *(ENTRY **) p1; + ENTRY *e2 = *(ENTRY **) p2; + + return (strcmp(e1->key, e2->key)); +} + +ssize_t hexport_r(struct hsearch_data *htab, const char sep, + char **resp, size_t size) +{ + ENTRY *list[htab->size]; + char *res, *p; + size_t totlen; + int i, n; + + /* Test for correct arguments. */ + if ((resp == NULL) || (htab == NULL)) { + __set_errno(EINVAL); + return (-1); + } + + debug("EXPORT table = %p, htab.size = %d, htab.filled = %d, size = %d\n", + htab, htab->size, htab->filled, size); + /* + * Pass 1: + * search used entries, + * save addresses and compute total length + */ + for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) { + + if (htab->table[i].used) { + ENTRY *ep = &htab->table[i].entry; + + list[n++] = ep; + + totlen += strlen(ep->key) + 2; + + if (sep == '\0') { + totlen += strlen(ep->data); + } else { /* check if escapes are needed */ + char *s = ep->data; + + while (*s) { + ++totlen; + /* add room for needed escape chars */ + if ((*s == sep) || (*s == '\\')) + ++totlen; + ++s; + } + } + totlen += 2; /* for '=' and 'sep' char */ + } + } + +#ifdef DEBUG + /* Pass 1a: print unsorted list */ + printf("Unsorted: n=%d\n", n); + for (i = 0; i < n; ++i) { + printf("\t%3d: %p ==> %-10s => %s\n", + i, list[i], list[i]->key, list[i]->data); + } +#endif + + /* Sort list by keys */ + qsort(list, n, sizeof(ENTRY *), cmpkey); + + /* Check if the user supplied buffer size is sufficient */ + if (size) { + if (size < totlen + 1) { /* provided buffer too small */ + debug("### buffer too small: %d, but need %d\n", + size, totlen + 1); + __set_errno(ENOMEM); + return (-1); + } + } else { + size = totlen + 1; + } + + /* Check if the user provided a buffer */ + if (*resp) { + /* yes; clear it */ + res = *resp; + memset(res, '\0', size); + } else { + /* no, allocate and clear one */ + *resp = res = calloc(1, size); + if (res == NULL) { + __set_errno(ENOMEM); + return (-1); + } + } + /* + * Pass 2: + * export sorted list of result data + */ + for (i = 0, p = res; i < n; ++i) { + char *s; + + s = list[i]->key; + while (*s) + *p++ = *s++; + *p++ = '='; + + s = list[i]->data; + + while (*s) { + if ((*s == sep) || (*s == '\\')) + *p++ = '\\'; /* escape */ + *p++ = *s++; + } + *p++ = sep; + } + *p = '\0'; /* terminate result */ + + return size; +} + + +/* + * himport() + */ + +/* + * Import linearized data into hash table. + * + * This is the inverse function to hexport(): it takes a linear list + * of "name=value" pairs and creates hash table entries from it. + * + * Entries without "value", i. e. consisting of only "name" or + * "name=", will cause this entry to be deleted from the hash table. + * + * The "flag" argument can be used to control the behaviour: when the + * H_NOCLEAR bit is set, then an existing hash table will kept, i. e. + * new data will be added to an existing hash table; otherwise, old + * data will be discarded and a new hash table will be created. + * + * The separator character for the "name=value" pairs can be selected, + * so we both support importing from externally stored environment + * data (separated by NUL characters) and from plain text files + * (entries separated by newline characters). + * + * To allow for nicely formatted text input, leading white space + * (sequences of SPACE and TAB chars) is ignored, and entries starting + * (after removal of any leading white space) with a '#' character are + * considered comments and ignored. + * + * [NOTE: this means that a variable name cannot start with a '#' + * character.] + * + * When using a non-NUL separator character, backslash is used as + * escape character in the value part, allowing for example for + * multi-line values. + * + * In theory, arbitrary separator characters can be used, but only + * '\0' and '\n' have really been tested. + */ + +int himport(const char *env, size_t size, const char sep, int flag) +{ + return himport_r(&htab, env, size, sep, flag); +} + +int himport_r(struct hsearch_data *htab, + const char *env, size_t size, const char sep, int flag) +{ + char *data, *sp, *dp, *name, *value; + + /* Test for correct arguments. */ + if (htab == NULL) { + __set_errno(EINVAL); + return 0; + } + + /* we allocate new space to make sure we can write to the array */ + if ((data = malloc(size)) == NULL) { + debug("himport_r: can't malloc %d bytes\n", size); + __set_errno(ENOMEM); + return 0; + } + memcpy(data, env, size); + dp = data; + + if ((flag & H_NOCLEAR) == 0) { + /* Destroy old hash table if one exists */ + debug("Destroy Hash Table: %p table = %p\n", htab, + htab->table); + if (htab->table) + hdestroy_r(htab); + } + + /* + * Create new hash table (if needed). The computation of the hash + * table size is based on heuristics: in a sample of some 70+ + * existing systems we found an average size of 39+ bytes per entry + * in the environment (for the whole key=value pair). Assuming a + * size of 7 per entry (= safety factor of >5) should provide enough + * safety margin for any existing environment definitons and still + * allow for more than enough dynamic additions. Note that the + * "size" argument is supposed to give the maximum enviroment size + * (CONFIG_ENV_SIZE). + */ + + if (!htab->table) { + int nent = size / 7; + + debug("Create Hash Table: N=%d\n", nent); + + if (hcreate_r(nent, htab) == 0) { + free(data); + return 0; + } + } + + /* Parse environment; allow for '\0' and 'sep' as separators */ + do { + ENTRY e, *rv; + + /* skip leading white space */ + while ((*dp == ' ') || (*dp == '\t')) + ++dp; + + /* skip comment lines */ + if (*dp == '#') { + while (*dp && (*dp != sep)) + ++dp; + ++dp; + continue; + } + + /* parse name */ + for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp) + ; + + /* deal with "name" and "name=" entries (delete var) */ + if (*dp == '\0' || *(dp + 1) == '\0' || + *dp == sep || *(dp + 1) == sep) { + if (*dp == '=') + *dp++ = '\0'; + *dp++ = '\0'; /* terminate name */ + + debug("DELETE CANDIDATE: \"%s\"\n", name); + + if (hdelete_r(name, htab) == 0) + debug("DELETE ERROR ##############################\n"); + + continue; + } + *dp++ = '\0'; /* terminate name */ + + /* parse value; deal with escapes */ + for (value = sp = dp; *dp && (*dp != sep); ++dp) { + if ((*dp == '\\') && *(dp + 1)) + ++dp; + *sp++ = *dp; + } + *sp++ = '\0'; /* terminate value */ + ++dp; + + /* enter into hash table */ + e.key = name; + e.data = value; + + hsearch_r(e, ENTER, &rv, htab); + if (rv == NULL) { + printf("himport_r: can't insert \"%s=%s\" into hash table\n", name, value); + return 0; + } + + debug("INSERT: %p ==> name=\"%s\" value=\"%s\"\n", rv, name, + value); + debug(" table = %p, size = %d, filled = %d\n", htab, + htab->size, htab->filled); + } while ((dp < data + size) && *dp); /* size check needed for text */ + /* without '\0' termination */ + free(data); + + return 1; /* everything OK */ +} |