| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. | 2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. |
| 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> | 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> |
| 4 * | 4 * |
| 5 * This library is free software; you can redistribute it and/or | 5 * This library is free software; you can redistribute it and/or |
| 6 * modify it under the terms of the GNU Library General Public | 6 * modify it under the terms of the GNU Library General Public |
| 7 * License as published by the Free Software Foundation; either | 7 * License as published by the Free Software Foundation; either |
| 8 * version 2 of the License, or (at your option) any later version. | 8 * version 2 of the License, or (at your option) any later version. |
| 9 * | 9 * |
| 10 * This library is distributed in the hope that it will be useful, | 10 * This library is distributed in the hope that it will be useful, |
| (...skipping 21 matching lines...) Expand all Loading... |
| 32 | 32 |
| 33 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). | 33 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). |
| 34 | 34 |
| 35 // NOTE: The hash computation here must stay in sync with the create_hash_table
script in | 35 // NOTE: The hash computation here must stay in sync with the create_hash_table
script in |
| 36 // JavaScriptCore and the CodeGeneratorJS.pm script in WebCore. | 36 // JavaScriptCore and the CodeGeneratorJS.pm script in WebCore. |
| 37 | 37 |
| 38 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash valu
e of zero. | 38 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash valu
e of zero. |
| 39 static const unsigned stringHashingStartValue = 0x9E3779B9U; | 39 static const unsigned stringHashingStartValue = 0x9E3779B9U; |
| 40 | 40 |
| 41 class StringHasher { | 41 class StringHasher { |
| 42 DISALLOW_NEW(); | 42 DISALLOW_NEW(); |
| 43 public: | |
| 44 static const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as
flags. | |
| 45 | 43 |
| 46 StringHasher() | 44 public: |
| 47 : m_hash(stringHashingStartValue) | 45 static const unsigned flagCount = |
| 48 , m_hasPendingCharacter(false) | 46 8; // Save 8 bits for StringImpl to use as flags. |
| 49 , m_pendingCharacter(0) | 47 |
| 50 { | 48 StringHasher() |
| 49 : m_hash(stringHashingStartValue), |
| 50 m_hasPendingCharacter(false), |
| 51 m_pendingCharacter(0) {} |
| 52 |
| 53 // The hasher hashes two characters at a time, and thus an "aligned" hasher is
one |
| 54 // where an even number of characters have been added. Callers that always add |
| 55 // characters two at a time can use the "assuming aligned" functions. |
| 56 void addCharactersAssumingAligned(UChar a, UChar b) { |
| 57 ASSERT(!m_hasPendingCharacter); |
| 58 m_hash += a; |
| 59 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); |
| 60 m_hash += m_hash >> 11; |
| 61 } |
| 62 |
| 63 void addCharacter(UChar character) { |
| 64 if (m_hasPendingCharacter) { |
| 65 m_hasPendingCharacter = false; |
| 66 addCharactersAssumingAligned(m_pendingCharacter, character); |
| 67 return; |
| 51 } | 68 } |
| 52 | 69 |
| 53 // The hasher hashes two characters at a time, and thus an "aligned" hasher
is one | 70 m_pendingCharacter = character; |
| 54 // where an even number of characters have been added. Callers that always a
dd | 71 m_hasPendingCharacter = true; |
| 55 // characters two at a time can use the "assuming aligned" functions. | 72 } |
| 56 void addCharactersAssumingAligned(UChar a, UChar b) | 73 |
| 57 { | 74 void addCharacters(UChar a, UChar b) { |
| 58 ASSERT(!m_hasPendingCharacter); | 75 if (m_hasPendingCharacter) { |
| 59 m_hash += a; | 76 #if ENABLE(ASSERT) |
| 60 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); | 77 m_hasPendingCharacter = false; |
| 61 m_hash += m_hash >> 11; | 78 #endif |
| 79 addCharactersAssumingAligned(m_pendingCharacter, a); |
| 80 m_pendingCharacter = b; |
| 81 #if ENABLE(ASSERT) |
| 82 m_hasPendingCharacter = true; |
| 83 #endif |
| 84 return; |
| 62 } | 85 } |
| 63 | 86 |
| 64 void addCharacter(UChar character) | 87 addCharactersAssumingAligned(a, b); |
| 65 { | 88 } |
| 66 if (m_hasPendingCharacter) { | |
| 67 m_hasPendingCharacter = false; | |
| 68 addCharactersAssumingAligned(m_pendingCharacter, character); | |
| 69 return; | |
| 70 } | |
| 71 | 89 |
| 72 m_pendingCharacter = character; | 90 template <typename T, UChar Converter(T)> |
| 73 m_hasPendingCharacter = true; | 91 void addCharactersAssumingAligned(const T* data, unsigned length) { |
| 92 ASSERT(!m_hasPendingCharacter); |
| 93 |
| 94 bool remainder = length & 1; |
| 95 length >>= 1; |
| 96 |
| 97 while (length--) { |
| 98 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1])); |
| 99 data += 2; |
| 74 } | 100 } |
| 75 | 101 |
| 76 void addCharacters(UChar a, UChar b) | 102 if (remainder) |
| 77 { | 103 addCharacter(Converter(*data)); |
| 78 if (m_hasPendingCharacter) { | 104 } |
| 79 #if ENABLE(ASSERT) | |
| 80 m_hasPendingCharacter = false; | |
| 81 #endif | |
| 82 addCharactersAssumingAligned(m_pendingCharacter, a); | |
| 83 m_pendingCharacter = b; | |
| 84 #if ENABLE(ASSERT) | |
| 85 m_hasPendingCharacter = true; | |
| 86 #endif | |
| 87 return; | |
| 88 } | |
| 89 | 105 |
| 90 addCharactersAssumingAligned(a, b); | 106 template <typename T> |
| 107 void addCharactersAssumingAligned(const T* data, unsigned length) { |
| 108 addCharactersAssumingAligned<T, defaultConverter>(data, length); |
| 109 } |
| 110 |
| 111 template <typename T, UChar Converter(T)> |
| 112 void addCharacters(const T* data, unsigned length) { |
| 113 if (m_hasPendingCharacter && length) { |
| 114 m_hasPendingCharacter = false; |
| 115 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)); |
| 116 --length; |
| 117 } |
| 118 addCharactersAssumingAligned<T, Converter>(data, length); |
| 119 } |
| 120 |
| 121 template <typename T> |
| 122 void addCharacters(const T* data, unsigned length) { |
| 123 addCharacters<T, defaultConverter>(data, length); |
| 124 } |
| 125 |
| 126 unsigned hashWithTop8BitsMasked() const { |
| 127 unsigned result = avalancheBits(); |
| 128 |
| 129 // Reserving space from the high bits for flags preserves most of the hash's |
| 130 // value, since hash lookup typically masks out the high bits anyway. |
| 131 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1; |
| 132 |
| 133 // This avoids ever returning a hash code of 0, since that is used to |
| 134 // signal "hash not computed yet". Setting the high bit maintains |
| 135 // reasonable fidelity to a hash code of 0 because it is likely to yield |
| 136 // exactly 0 when hash lookup masks out the high bits. |
| 137 if (!result) |
| 138 result = 0x80000000 >> flagCount; |
| 139 |
| 140 return result; |
| 141 } |
| 142 |
| 143 unsigned hash() const { |
| 144 unsigned result = avalancheBits(); |
| 145 |
| 146 // This avoids ever returning a hash code of 0, since that is used to |
| 147 // signal "hash not computed yet". Setting the high bit maintains |
| 148 // reasonable fidelity to a hash code of 0 because it is likely to yield |
| 149 // exactly 0 when hash lookup masks out the high bits. |
| 150 if (!result) |
| 151 result = 0x80000000; |
| 152 |
| 153 return result; |
| 154 } |
| 155 |
| 156 template <typename T, UChar Converter(T)> |
| 157 static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) { |
| 158 StringHasher hasher; |
| 159 hasher.addCharactersAssumingAligned<T, Converter>(data, length); |
| 160 return hasher.hashWithTop8BitsMasked(); |
| 161 } |
| 162 |
| 163 template <typename T> |
| 164 static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) { |
| 165 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); |
| 166 } |
| 167 |
| 168 template <typename T, UChar Converter(T)> |
| 169 static unsigned computeHash(const T* data, unsigned length) { |
| 170 StringHasher hasher; |
| 171 hasher.addCharactersAssumingAligned<T, Converter>(data, length); |
| 172 return hasher.hash(); |
| 173 } |
| 174 |
| 175 template <typename T> |
| 176 static unsigned computeHash(const T* data, unsigned length) { |
| 177 return computeHash<T, defaultConverter>(data, length); |
| 178 } |
| 179 |
| 180 static unsigned hashMemory(const void* data, unsigned length) { |
| 181 // FIXME: Why does this function use the version of the hash that drops the
top 8 bits? |
| 182 // We want that for all string hashing so we can use those bits in StringImp
l and hash |
| 183 // strings consistently, but I don't see why we'd want that for general memo
ry hashing. |
| 184 ASSERT(!(length % 2)); |
| 185 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data), |
| 186 length / sizeof(UChar)); |
| 187 } |
| 188 |
| 189 template <size_t length> |
| 190 static unsigned hashMemory(const void* data) { |
| 191 static_assert(!(length % 2), "length must be a multiple of two"); |
| 192 return hashMemory(data, length); |
| 193 } |
| 194 |
| 195 private: |
| 196 static UChar defaultConverter(UChar character) { return character; } |
| 197 |
| 198 static UChar defaultConverter(LChar character) { return character; } |
| 199 |
| 200 unsigned avalancheBits() const { |
| 201 unsigned result = m_hash; |
| 202 |
| 203 // Handle end case. |
| 204 if (m_hasPendingCharacter) { |
| 205 result += m_pendingCharacter; |
| 206 result ^= result << 11; |
| 207 result += result >> 17; |
| 91 } | 208 } |
| 92 | 209 |
| 93 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(c
onst T* data, unsigned length) | 210 // Force "avalanching" of final 31 bits. |
| 94 { | 211 result ^= result << 3; |
| 95 ASSERT(!m_hasPendingCharacter); | 212 result += result >> 5; |
| 213 result ^= result << 2; |
| 214 result += result >> 15; |
| 215 result ^= result << 10; |
| 96 | 216 |
| 97 bool remainder = length & 1; | 217 return result; |
| 98 length >>= 1; | 218 } |
| 99 | 219 |
| 100 while (length--) { | 220 unsigned m_hash; |
| 101 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1]))
; | 221 bool m_hasPendingCharacter; |
| 102 data += 2; | 222 UChar m_pendingCharacter; |
| 103 } | |
| 104 | |
| 105 if (remainder) | |
| 106 addCharacter(Converter(*data)); | |
| 107 } | |
| 108 | |
| 109 template<typename T> void addCharactersAssumingAligned(const T* data, unsign
ed length) | |
| 110 { | |
| 111 addCharactersAssumingAligned<T, defaultConverter>(data, length); | |
| 112 } | |
| 113 | |
| 114 template<typename T, UChar Converter(T)> void addCharacters(const T* data, u
nsigned length) | |
| 115 { | |
| 116 if (m_hasPendingCharacter && length) { | |
| 117 m_hasPendingCharacter = false; | |
| 118 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++))
; | |
| 119 --length; | |
| 120 } | |
| 121 addCharactersAssumingAligned<T, Converter>(data, length); | |
| 122 } | |
| 123 | |
| 124 template<typename T> void addCharacters(const T* data, unsigned length) | |
| 125 { | |
| 126 addCharacters<T, defaultConverter>(data, length); | |
| 127 } | |
| 128 | |
| 129 unsigned hashWithTop8BitsMasked() const | |
| 130 { | |
| 131 unsigned result = avalancheBits(); | |
| 132 | |
| 133 // Reserving space from the high bits for flags preserves most of the ha
sh's | |
| 134 // value, since hash lookup typically masks out the high bits anyway. | |
| 135 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1; | |
| 136 | |
| 137 // This avoids ever returning a hash code of 0, since that is used to | |
| 138 // signal "hash not computed yet". Setting the high bit maintains | |
| 139 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
| 140 // exactly 0 when hash lookup masks out the high bits. | |
| 141 if (!result) | |
| 142 result = 0x80000000 >> flagCount; | |
| 143 | |
| 144 return result; | |
| 145 } | |
| 146 | |
| 147 unsigned hash() const | |
| 148 { | |
| 149 unsigned result = avalancheBits(); | |
| 150 | |
| 151 // This avoids ever returning a hash code of 0, since that is used to | |
| 152 // signal "hash not computed yet". Setting the high bit maintains | |
| 153 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
| 154 // exactly 0 when hash lookup masks out the high bits. | |
| 155 if (!result) | |
| 156 result = 0x80000000; | |
| 157 | |
| 158 return result; | |
| 159 } | |
| 160 | |
| 161 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskT
op8Bits(const T* data, unsigned length) | |
| 162 { | |
| 163 StringHasher hasher; | |
| 164 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
| 165 return hasher.hashWithTop8BitsMasked(); | |
| 166 } | |
| 167 | |
| 168 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* dat
a, unsigned length) | |
| 169 { | |
| 170 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); | |
| 171 } | |
| 172 | |
| 173 template<typename T, UChar Converter(T)> static unsigned computeHash(const T
* data, unsigned length) | |
| 174 { | |
| 175 StringHasher hasher; | |
| 176 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
| 177 return hasher.hash(); | |
| 178 } | |
| 179 | |
| 180 template<typename T> static unsigned computeHash(const T* data, unsigned len
gth) | |
| 181 { | |
| 182 return computeHash<T, defaultConverter>(data, length); | |
| 183 } | |
| 184 | |
| 185 static unsigned hashMemory(const void* data, unsigned length) | |
| 186 { | |
| 187 // FIXME: Why does this function use the version of the hash that drops
the top 8 bits? | |
| 188 // We want that for all string hashing so we can use those bits in Strin
gImpl and hash | |
| 189 // strings consistently, but I don't see why we'd want that for general
memory hashing. | |
| 190 ASSERT(!(length % 2)); | |
| 191 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data)
, length / sizeof(UChar)); | |
| 192 } | |
| 193 | |
| 194 template<size_t length> static unsigned hashMemory(const void* data) | |
| 195 { | |
| 196 static_assert(!(length % 2), "length must be a multiple of two"); | |
| 197 return hashMemory(data, length); | |
| 198 } | |
| 199 | |
| 200 private: | |
| 201 static UChar defaultConverter(UChar character) | |
| 202 { | |
| 203 return character; | |
| 204 } | |
| 205 | |
| 206 static UChar defaultConverter(LChar character) | |
| 207 { | |
| 208 return character; | |
| 209 } | |
| 210 | |
| 211 unsigned avalancheBits() const | |
| 212 { | |
| 213 unsigned result = m_hash; | |
| 214 | |
| 215 // Handle end case. | |
| 216 if (m_hasPendingCharacter) { | |
| 217 result += m_pendingCharacter; | |
| 218 result ^= result << 11; | |
| 219 result += result >> 17; | |
| 220 } | |
| 221 | |
| 222 // Force "avalanching" of final 31 bits. | |
| 223 result ^= result << 3; | |
| 224 result += result >> 5; | |
| 225 result ^= result << 2; | |
| 226 result += result >> 15; | |
| 227 result ^= result << 10; | |
| 228 | |
| 229 return result; | |
| 230 } | |
| 231 | |
| 232 unsigned m_hash; | |
| 233 bool m_hasPendingCharacter; | |
| 234 UChar m_pendingCharacter; | |
| 235 }; | 223 }; |
| 236 | 224 |
| 237 } // namespace WTF | 225 } // namespace WTF |
| 238 | 226 |
| 239 using WTF::StringHasher; | 227 using WTF::StringHasher; |
| 240 | 228 |
| 241 #endif // WTF_StringHasher_h | 229 #endif // WTF_StringHasher_h |
| OLD | NEW |