00001 /*
00002     File:       Hash.cc
00003 
00004     Function:   Provides a string-based hash table
00005 
00006     Author:     Andrew Willmott, using C code from Greg Ward
00007 
00008     Notes:      When a hash entry is deleted, we can't just clear it completely,
00009                 as it might be part of a chain, and doing so would make it
00010                 impossible to find all entries after it.
00011 
00012                 Instead, the data field is set to zero, to mark it as
00013                 deleted, and these entries are reclaimed only when the
00014                 hash table fills up.  
00015 
00016                 This is mostly taken from Greg Ward's code in mgflib
00017 
00018                 indicators:
00019                     key == null -> empty entry
00020                     otherwise
00021                     data == null -> deleted entry
00022 */
00023 
00024 #include "cl/Hash.h"
00025 
00026 #include <stdio.h>
00027 
00028 const Int tHashSize[] =
00029 {
00030      31, 61, 127, 251, 509, 1021, 2039, 4093,
00031      8191, 16381, 32749, 65521, 131071, 262139, 524287, 1048573,
00032      2097143, 4194301, 8388593, 0
00033 };
00034 
00035 Hash::Hash(Int n) : copyKey(true)
00036 {
00037     if (n > 0)
00038         Init(n);
00039 }
00040 
00041 Int Hash::Init(Int n)
00042 {
00043     const Int   *hsp;
00044     Int         i;
00045 
00046     // aim for 66% occupancy
00047     n += n / 2;
00048 
00049     // find a good prime size for our table
00050     for (hsp = tHashSize; *hsp; hsp++)
00051         if (*hsp > n)
00052             break;
00053 
00054     if (*hsp == 0)
00055         // too big for our table!
00056         n = n * 2 + 1;      // not always prime, but hey...
00057     else
00058         n = *hsp;
00059 
00060     table.SetSize(n);
00061     for (i = 0; i < table.NumItems(); i++)
00062     {
00063         table[i].key = 0;
00064         table[i].hash = 0;
00065         table[i].data = 0;
00066     }
00067 
00068     numDeleted = 0;
00069 
00070     return(table.NumItems());
00071 }
00072 
00073 const Byte tShuffle[256] =
00074 {
00075     0, 157, 58, 215, 116, 17, 174, 75, 232, 133, 34,
00076     191, 92, 249, 150, 51, 208, 109, 10, 167, 68, 225,
00077     126, 27, 184, 85, 242, 143, 44, 201, 102, 3, 160,
00078     61, 218, 119, 20, 177, 78, 235, 136, 37, 194, 95,
00079     252, 153, 54, 211, 112, 13, 170, 71, 228, 129, 30,
00080     187, 88, 245, 146, 47, 204, 105, 6, 163, 64, 221,
00081     122, 23, 180, 81, 238, 139, 40, 197, 98, 255, 156,
00082     57, 214, 115, 16, 173, 74, 231, 132, 33, 190, 91,
00083     248, 149, 50, 207, 108, 9, 166, 67, 224, 125, 26,
00084     183, 84, 241, 142, 43, 200, 101, 2, 159, 60, 217,
00085     118, 19, 176, 77, 234, 135, 36, 193, 94, 251, 152,
00086     53, 210, 111, 12, 169, 70, 227, 128, 29, 186, 87,
00087     244, 145, 46, 203, 104, 5, 162, 63, 220, 121, 22,
00088     179, 80, 237, 138, 39, 196, 97, 254, 155, 56, 213,
00089     114, 15, 172, 73, 230, 131, 32, 189, 90, 247, 148,
00090     49, 206, 107, 8, 165, 66, 223, 124, 25, 182, 83,
00091     240, 141, 42, 199, 100, 1, 158, 59, 216, 117, 18,
00092     175, 76, 233, 134, 35, 192, 93, 250, 151, 52, 209,
00093     110, 11, 168, 69, 226, 127, 28, 185, 86, 243, 144,
00094     45, 202, 103, 4, 161, 62, 219, 120, 21, 178, 79,
00095     236, 137, 38, 195, 96, 253, 154, 55, 212, 113, 14,
00096     171, 72, 229, 130, 31, 188, 89, 246, 147, 48, 205,
00097     106, 7, 164, 65, 222, 123, 24, 181, 82, 239, 140,
00098     41, 198, 99
00099 };
00100 
00101 ULong Hash::CalcHash(StrConst s)
00102 {
00103     Int     i = 0;
00104     ULong   result = 0;
00105     Byte    *t = (Byte *) (const Char *) s;
00106 
00107     while (*t)
00108             result ^= (ULong) tShuffle[*t++] << ((i += 11) & 0x0F);
00109 
00110     return(result);
00111 }
00112 
00113 
00114 HashEntry *Hash::Find(StrConst s)
00115 // find a table entry 
00116 {
00117     ULong       hash;
00118     Int         i, n;
00119     Int         ndx;
00120     HashEntry   *le;
00121     
00122     // look up object 
00123 
00124     if (table.NumItems() == 0)
00125         if (!Init(16))
00126             return(0);
00127 
00128     hash = CalcHash(s);
00129 
00130     // this is a closed hash table. if there's a collision, we
00131     // chain by x^2. i.e. positions n, n + 1, n + 4, n + 9...
00132     // belong to the same bucket.
00133 
00134     // start from the hash
00135     ndx = hash % table.NumItems();
00136     
00137     for (i = 0, n = 1; i < table.NumItems(); i++, n += 2)
00138     {
00139         // look at next entry in the chain
00140         le = &table[ndx];
00141 
00142         if (le->key == 0)
00143         // the entry is free: return it for the user to (maybe) fill in
00144         {
00145             le->hash = hash;
00146             return(le);
00147         }
00148 
00149         if (le->hash == hash && (le->key == s))
00150         // we have a match!
00151         {
00152             return(le);
00153         }
00154 
00155         // find next entry in the chain
00156         ndx += n;
00157         if (ndx >= table.NumItems())
00158             ndx = ndx % table.NumItems();
00159     }
00160 
00161     // table is full; reallocate 
00162 
00163     Array<HashEntry>    oldTable;
00164 
00165     oldTable.Replace(table);
00166 
00167     // try to resize table
00168     if (!Init(table.NumItems() - numDeleted + 1))
00169     {
00170         // revert back to old table & return if ran out of memory
00171         table.Replace(oldTable);        
00172         return(0);
00173     }
00174     
00175     // copy contents of old table to the new...
00176     ndx = oldTable.NumItems();
00177     while (ndx--)
00178         if (oldTable[ndx].key != 0)
00179             if (oldTable[ndx].data != 0)
00180                 // in-use entry
00181                 *Find(oldTable[ndx].key) = oldTable[ndx];
00182             else 
00183                 // deleted entry
00184                 FreeKey(oldTable[ndx].key);
00185 
00186     oldTable.Clear();
00187 
00188     return(Find(s));
00189 }
00190 
00191 Void Hash::SetVoidItem(StrConst s, Void *data)
00192 {
00193     HashEntry *he = Find(s);
00194     if (he)
00195     {
00196         if (copyKey)
00197         {
00198             he->key = new Char[s.Length() + 1];
00199             strcpy(he->key, s);
00200         }
00201         else
00202             he->key = (Char *) (const Char *) s;
00203 
00204         if (he->data)
00205             FreeData(he->data);
00206         he->data = data;
00207     }
00208 };
00209 
00210 Void Hash::DeleteItem(StrConst s)
00211 {
00212     HashEntry   *le;
00213 
00214     le = Find(s);
00215     if (!le)
00216         return;
00217 
00218     // something to free?
00219     if (le->key == 0 || le->data == 0)
00220         return;
00221         
00222     FreeData(le);
00223     le->data = 0;
00224 
00225     numDeleted++;
00226 }
00227 
00228 Void Hash::Iterate(HashIter &iter)
00229 {
00230     Int     i;
00231 
00232     for (i = 0; i < table.NumItems(); i++)
00233         if (table[i].key && table[i].data)
00234             iter.ProcessVoidItem(table[i].key, table[i].data);
00235 }
00236 
00237 Void Hash::FreeData(Void *data)
00238 {
00239 }
00240 
00241 Void Hash::FreeKey(Char *key)
00242 {
00243     if (copyKey)
00244         delete key;
00245     key = 0;
00246 }
00247 
00248 Void Hash::FreeTable()
00249 // free table and contents
00250 {
00251     Int     i;
00252     
00253     for (i = 0; i < table.NumItems(); i++)
00254         if (table[i].key != 0)
00255         {
00256             FreeKey(table[i].key);
00257             if (table[i].data != 0)
00258                 FreeData(&table[i]);
00259         }
00260 
00261     table.Clear();
00262     numDeleted = 0;
00263 }
00264  
00265 #ifdef CL_SGI_INST
00266 #pragma instantiate Array<HashEntry>
00267 #endif