diff --git sal/rtl/source/hash.cxx sal/rtl/source/hash.cxx index 63de2b4..38eb5e8 100644 --- sal/rtl/source/hash.cxx +++ sal/rtl/source/hash.cxx @@ -26,12 +26,16 @@ ************************************************************************/ // MARKER(update_precomp.py): autogen include statement, do not remove +#if 0 #include "precompiled_sal.hxx" #include "rtl/allocator.hxx" +#endif #include "hash.h" #include "strimp.h" +#include +#if 0 #include @@ -110,4 +114,221 @@ rtl_str_hash_remove (rtl_uString *pString) getHashTable()->erase(pString); } +#endif + +// --------------------------- start here --------------------------- + +struct StringHashTableImpl { + sal_uInt32 nEntries; + sal_uInt32 nSize; + rtl_uString **pData; +}; + +typedef StringHashTableImpl StringHashTable; + +extern "C" { + +// Only for use in the implementation +StringHashTable *rtl_str_hash_new (sal_uInt32 nSize); +void rtl_str_hash_free (StringHashTable *pHash); + +} + +StringHashTable * +getHashTable () +{ + static StringHashTable *pInternPool = NULL; + if (pInternPool == NULL) { + static StringHashTable* pHash = rtl_str_hash_new(1024); + pInternPool = pHash; + } + return pInternPool; +} + +extern "C" { + +// Better / smaller / faster hash set .... + +// TODO: add bottom bit-set list terminator to string list + +static sal_uInt32 +getNextSize (sal_uInt32 nSize) +{ + // Sedgewick - Algorithms in C P577. + static const sal_uInt32 nPrimes[] = { 1021, 2039, 4093, 8191, 16381, 32749, + 65521, 131071,262139, 524287, 1048573, + 2097143, 4194301, 8388593, 16777213, + 33554393, 67108859, 134217689 }; + #define NUM_PRIMES (sizeof (nPrimes)/ sizeof (nPrimes[0])) + for (sal_uInt32 i = 0; i < NUM_PRIMES; i++) + { + if (nPrimes[i] > nSize) + return nPrimes[i]; + } + return nSize * 2; +} + +static sal_uInt32 +hashString (rtl_uString *pString) +{ + return (sal_uInt32) rtl_ustr_hashCode_WithLength (pString->buffer, + pString->length); +} + +StringHashTable * +rtl_str_hash_new (sal_uInt32 nSize) +{ + StringHashTable *pHash = (StringHashTable *)malloc (sizeof (StringHashTable)); + + pHash->nEntries = 0; + pHash->nSize = getNextSize (nSize); + pHash->pData = (rtl_uString **) calloc (sizeof (rtl_uString *), pHash->nSize); + + return pHash; +} + +void +rtl_str_hash_free (StringHashTable *pHash) +{ + if (!pHash) + return; + if (pHash->pData) + free (pHash->pData); + free (pHash); +} + +static void +rtl_str_hash_insert_nonequal (StringHashTable *pHash, + rtl_uString *pString) +{ + sal_uInt32 nHash = hashString (pString); + sal_uInt32 n; + rtl_uString *pHashStr; + + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + n++; + if (n >= pHash->nSize) + n = 0; + } + pHash->pData[n] = pString; +} + +static void +rtl_str_hash_resize (sal_uInt32 nNewSize) +{ + sal_uInt32 i; + StringHashTable *pNewHash; + StringHashTable *pHash = getHashTable(); + + OSL_ASSERT (nNewSize > pHash->nEntries); + + pNewHash = rtl_str_hash_new (nNewSize); + + for (i = 0; i < pHash->nSize; i++) + { + if (pHash->pData[i] != NULL) + rtl_str_hash_insert_nonequal (pNewHash, pHash->pData[i]); + } + pNewHash->nEntries = pHash->nEntries; + free (pHash->pData); + *pHash = *pNewHash; + pNewHash->pData = NULL; + rtl_str_hash_free (pNewHash); +} + +static int +compareEqual (rtl_uString *pStringA, rtl_uString *pStringB) +{ + if (pStringA == pStringB) + return 1; + if (pStringA->length != pStringB->length) + return 0; + return !rtl_ustr_compare_WithLength( pStringA->buffer, pStringA->length, + pStringB->buffer, pStringB->length); +} + + +rtl_uString * +rtl_str_hash_intern (rtl_uString *pString, + int can_return) +{ + sal_uInt32 nHash = hashString (pString); + sal_uInt32 n; + rtl_uString *pHashStr; + + StringHashTable *pHash = getHashTable(); + + // Should we resize ? + if (pHash->nEntries >= pHash->nSize/2) + rtl_str_hash_resize (getNextSize(pHash->nSize)); + + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + if (compareEqual (pHashStr, pString)) + { + rtl_uString_acquire (pHashStr); + return pHashStr; + } + n++; + if (n >= pHash->nSize) + n = 0; + } + + if (!can_return) + { + rtl_uString *pCopy = NULL; + rtl_uString_newFromString( &pCopy, pString ); + pString = pCopy; + if (!pString) + return NULL; + } + + if (!SAL_STRING_IS_STATIC (pString)) + pString->refCount |= SAL_STRING_INTERN_FLAG; + pHash->pData[n] = pString; + pHash->nEntries++; + + return pString; +} + +void +rtl_str_hash_remove (rtl_uString *pString) +{ + sal_uInt32 n; + sal_uInt32 nHash = hashString (pString); + rtl_uString *pHashStr; + + StringHashTable *pHash = getHashTable(); + + n = nHash % pHash->nSize; + while ((pHashStr = pHash->pData[n]) != NULL) { + if (compareEqual (pHashStr, pString)) + break; + n++; + if (n >= pHash->nSize) + n = 0; + } + OSL_ASSERT (pHash->pData[n] != 0); + if (pHash->pData[n] == NULL) + return; + + pHash->pData[n++] = NULL; + pHash->nEntries--; + + if (n >= pHash->nSize) + n = 0; + + while ((pHashStr = pHash->pData[n]) != NULL) { + pHash->pData[n] = NULL; + // FIXME: rather unsophisticated and N^2 in chain-length, but robust. + rtl_str_hash_insert_nonequal (pHash, pHashStr); + n++; + if (n >= pHash->nSize) + n = 0; + } + // FIXME: Should we down-size ? +} + + }