| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. | |
| 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> | |
| 4 * | |
| 5 * This library is free software; you can redistribute it and/or | |
| 6 * modify it under the terms of the GNU Library General Public | |
| 7 * License as published by the Free Software Foundation; either | |
| 8 * version 2 of the License, or (at your option) any later version. | |
| 9 * | |
| 10 * This library is distributed in the hope that it will be useful, | |
| 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 13 * Library General Public License for more details. | |
| 14 * | |
| 15 * You should have received a copy of the GNU Library General Public License | |
| 16 * along with this library; see the file COPYING.LIB. If not, write to | |
| 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
| 18 * Boston, MA 02110-1301, USA. | |
| 19 * | |
| 20 */ | |
| 21 | |
| 22 #ifndef WTF_StringHasher_h | |
| 23 #define WTF_StringHasher_h | |
| 24 | |
| 25 #include <wtf/unicode/Unicode.h> | |
| 26 | |
| 27 namespace WTF { | |
| 28 | |
| 29 // Paul Hsieh's SuperFastHash | |
| 30 // http://www.azillionmonkeys.com/qed/hash.html | |
| 31 | |
| 32 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). | |
| 33 | |
| 34 // NOTE: The hash computation here must stay in sync with the create_hash_table
script in | |
| 35 // JavaScriptCore and the CodeGeneratorJS.pm script in WebCore. | |
| 36 | |
| 37 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash valu
e of zero. | |
| 38 static const unsigned stringHashingStartValue = 0x9E3779B9U; | |
| 39 | |
| 40 class StringHasher { | |
| 41 public: | |
| 42 static const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as
flags. | |
| 43 | |
| 44 StringHasher() | |
| 45 : m_hash(stringHashingStartValue) | |
| 46 , m_hasPendingCharacter(false) | |
| 47 , m_pendingCharacter(0) | |
| 48 { | |
| 49 } | |
| 50 | |
| 51 // The hasher hashes two characters at a time, and thus an "aligned" hasher
is one | |
| 52 // where an even number of characters have been added. Callers that always a
dd | |
| 53 // characters two at a time can use the "assuming aligned" functions. | |
| 54 void addCharactersAssumingAligned(UChar a, UChar b) | |
| 55 { | |
| 56 ASSERT(!m_hasPendingCharacter); | |
| 57 m_hash += a; | |
| 58 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); | |
| 59 m_hash += m_hash >> 11; | |
| 60 } | |
| 61 | |
| 62 void addCharacter(UChar character) | |
| 63 { | |
| 64 if (m_hasPendingCharacter) { | |
| 65 m_hasPendingCharacter = false; | |
| 66 addCharactersAssumingAligned(m_pendingCharacter, character); | |
| 67 return; | |
| 68 } | |
| 69 | |
| 70 m_pendingCharacter = character; | |
| 71 m_hasPendingCharacter = true; | |
| 72 } | |
| 73 | |
| 74 void addCharacters(UChar a, UChar b) | |
| 75 { | |
| 76 if (m_hasPendingCharacter) { | |
| 77 #if !ASSERT_DISABLED | |
| 78 m_hasPendingCharacter = false; | |
| 79 #endif | |
| 80 addCharactersAssumingAligned(m_pendingCharacter, a); | |
| 81 m_pendingCharacter = b; | |
| 82 #if !ASSERT_DISABLED | |
| 83 m_hasPendingCharacter = true; | |
| 84 #endif | |
| 85 return; | |
| 86 } | |
| 87 | |
| 88 addCharactersAssumingAligned(a, b); | |
| 89 } | |
| 90 | |
| 91 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(c
onst T* data, unsigned length) | |
| 92 { | |
| 93 ASSERT(!m_hasPendingCharacter); | |
| 94 | |
| 95 bool remainder = length & 1; | |
| 96 length >>= 1; | |
| 97 | |
| 98 while (length--) { | |
| 99 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1]))
; | |
| 100 data += 2; | |
| 101 } | |
| 102 | |
| 103 if (remainder) | |
| 104 addCharacter(Converter(*data)); | |
| 105 } | |
| 106 | |
| 107 template<typename T> void addCharactersAssumingAligned(const T* data, unsign
ed length) | |
| 108 { | |
| 109 addCharactersAssumingAligned<T, defaultConverter>(data, length); | |
| 110 } | |
| 111 | |
| 112 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(c
onst T* data) | |
| 113 { | |
| 114 ASSERT(!m_hasPendingCharacter); | |
| 115 | |
| 116 while (T a = *data++) { | |
| 117 T b = *data++; | |
| 118 if (!b) { | |
| 119 addCharacter(Converter(a)); | |
| 120 break; | |
| 121 } | |
| 122 addCharactersAssumingAligned(Converter(a), Converter(b)); | |
| 123 } | |
| 124 } | |
| 125 | |
| 126 template<typename T> void addCharactersAssumingAligned(const T* data) | |
| 127 { | |
| 128 addCharactersAssumingAligned<T, defaultConverter>(data); | |
| 129 } | |
| 130 | |
| 131 template<typename T, UChar Converter(T)> void addCharacters(const T* data, u
nsigned length) | |
| 132 { | |
| 133 if (m_hasPendingCharacter && length) { | |
| 134 m_hasPendingCharacter = false; | |
| 135 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++))
; | |
| 136 --length; | |
| 137 } | |
| 138 addCharactersAssumingAligned<T, Converter>(data, length); | |
| 139 } | |
| 140 | |
| 141 template<typename T> void addCharacters(const T* data, unsigned length) | |
| 142 { | |
| 143 addCharacters<T, defaultConverter>(data, length); | |
| 144 } | |
| 145 | |
| 146 template<typename T, UChar Converter(T)> void addCharacters(const T* data) | |
| 147 { | |
| 148 if (m_hasPendingCharacter && *data) { | |
| 149 m_hasPendingCharacter = false; | |
| 150 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++))
; | |
| 151 } | |
| 152 addCharactersAssumingAligned<T, Converter>(data); | |
| 153 } | |
| 154 | |
| 155 template<typename T> void addCharacters(const T* data) | |
| 156 { | |
| 157 addCharacters<T, defaultConverter>(data); | |
| 158 } | |
| 159 | |
| 160 unsigned hashWithTop8BitsMasked() const | |
| 161 { | |
| 162 unsigned result = avalancheBits(); | |
| 163 | |
| 164 // Reserving space from the high bits for flags preserves most of the ha
sh's | |
| 165 // value, since hash lookup typically masks out the high bits anyway. | |
| 166 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1; | |
| 167 | |
| 168 // This avoids ever returning a hash code of 0, since that is used to | |
| 169 // signal "hash not computed yet". Setting the high bit maintains | |
| 170 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
| 171 // exactly 0 when hash lookup masks out the high bits. | |
| 172 if (!result) | |
| 173 result = 0x80000000 >> flagCount; | |
| 174 | |
| 175 return result; | |
| 176 } | |
| 177 | |
| 178 unsigned hash() const | |
| 179 { | |
| 180 unsigned result = avalancheBits(); | |
| 181 | |
| 182 // This avoids ever returning a hash code of 0, since that is used to | |
| 183 // signal "hash not computed yet". Setting the high bit maintains | |
| 184 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
| 185 // exactly 0 when hash lookup masks out the high bits. | |
| 186 if (!result) | |
| 187 result = 0x80000000; | |
| 188 | |
| 189 return result; | |
| 190 } | |
| 191 | |
| 192 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskT
op8Bits(const T* data, unsigned length) | |
| 193 { | |
| 194 StringHasher hasher; | |
| 195 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
| 196 return hasher.hashWithTop8BitsMasked(); | |
| 197 } | |
| 198 | |
| 199 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskT
op8Bits(const T* data) | |
| 200 { | |
| 201 StringHasher hasher; | |
| 202 hasher.addCharactersAssumingAligned<T, Converter>(data); | |
| 203 return hasher.hashWithTop8BitsMasked(); | |
| 204 } | |
| 205 | |
| 206 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* dat
a, unsigned length) | |
| 207 { | |
| 208 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); | |
| 209 } | |
| 210 | |
| 211 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* dat
a) | |
| 212 { | |
| 213 return computeHashAndMaskTop8Bits<T, defaultConverter>(data); | |
| 214 } | |
| 215 | |
| 216 template<typename T, UChar Converter(T)> static unsigned computeHash(const T
* data, unsigned length) | |
| 217 { | |
| 218 StringHasher hasher; | |
| 219 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
| 220 return hasher.hash(); | |
| 221 } | |
| 222 | |
| 223 template<typename T, UChar Converter(T)> static unsigned computeHash(const T
* data) | |
| 224 { | |
| 225 StringHasher hasher; | |
| 226 hasher.addCharactersAssumingAligned<T, Converter>(data); | |
| 227 return hasher.hash(); | |
| 228 } | |
| 229 | |
| 230 template<typename T> static unsigned computeHash(const T* data, unsigned len
gth) | |
| 231 { | |
| 232 return computeHash<T, defaultConverter>(data, length); | |
| 233 } | |
| 234 | |
| 235 template<typename T> static unsigned computeHash(const T* data) | |
| 236 { | |
| 237 return computeHash<T, defaultConverter>(data); | |
| 238 } | |
| 239 | |
| 240 static unsigned hashMemory(const void* data, unsigned length) | |
| 241 { | |
| 242 // FIXME: Why does this function use the version of the hash that drops
the top 8 bits? | |
| 243 // We want that for all string hashing so we can use those bits in Strin
gImpl and hash | |
| 244 // strings consistently, but I don't see why we'd want that for general
memory hashing. | |
| 245 ASSERT(!(length % 2)); | |
| 246 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data)
, length / sizeof(UChar)); | |
| 247 } | |
| 248 | |
| 249 template<size_t length> static unsigned hashMemory(const void* data) | |
| 250 { | |
| 251 COMPILE_ASSERT(!(length % 2), length_must_be_a_multiple_of_two); | |
| 252 return hashMemory(data, length); | |
| 253 } | |
| 254 | |
| 255 private: | |
| 256 static UChar defaultConverter(UChar character) | |
| 257 { | |
| 258 return character; | |
| 259 } | |
| 260 | |
| 261 static UChar defaultConverter(LChar character) | |
| 262 { | |
| 263 return character; | |
| 264 } | |
| 265 | |
| 266 unsigned avalancheBits() const | |
| 267 { | |
| 268 unsigned result = m_hash; | |
| 269 | |
| 270 // Handle end case. | |
| 271 if (m_hasPendingCharacter) { | |
| 272 result += m_pendingCharacter; | |
| 273 result ^= result << 11; | |
| 274 result += result >> 17; | |
| 275 } | |
| 276 | |
| 277 // Force "avalanching" of final 31 bits. | |
| 278 result ^= result << 3; | |
| 279 result += result >> 5; | |
| 280 result ^= result << 2; | |
| 281 result += result >> 15; | |
| 282 result ^= result << 10; | |
| 283 | |
| 284 return result; | |
| 285 } | |
| 286 | |
| 287 unsigned m_hash; | |
| 288 bool m_hasPendingCharacter; | |
| 289 UChar m_pendingCharacter; | |
| 290 }; | |
| 291 | |
| 292 } // namespace WTF | |
| 293 | |
| 294 using WTF::StringHasher; | |
| 295 | |
| 296 #endif // WTF_StringHasher_h | |
| OLD | NEW |