OLD | NEW |
1 /* | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. | 2 // Use of this source code is governed by a BSD-style license that can be |
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> | 3 // found in the LICENSE file. |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Library General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Library General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Library General Public License | |
16 * along with this library; see the file COPYING.LIB. If not, write to | |
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
18 * Boston, MA 02110-1301, USA. | |
19 * | |
20 */ | |
21 | 4 |
22 #ifndef WTF_StringHasher_h | 5 #include "platform/wtf/StringHasher.h" |
23 #define WTF_StringHasher_h | |
24 | 6 |
25 #include "wtf/Allocator.h" | 7 // The contents of this header was moved to platform/wtf as part of |
26 #include "wtf/text/Unicode.h" | 8 // WTF migration project. See the following post for details: |
27 | 9 // https://groups.google.com/a/chromium.org/d/msg/blink-dev/tLdAZCTlcAA/bYXVT8gY
CAAJ |
28 namespace WTF { | |
29 | |
30 // Paul Hsieh's SuperFastHash | |
31 // http://www.azillionmonkeys.com/qed/hash.html | |
32 | |
33 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). | |
34 | |
35 // NOTE: The hash computation here must stay in sync with | |
36 // build/scripts/hasher.py. | |
37 | |
38 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash | |
39 // value of zero. | |
40 static const unsigned stringHashingStartValue = 0x9E3779B9U; | |
41 | |
42 class StringHasher { | |
43 DISALLOW_NEW(); | |
44 | |
45 public: | |
46 static const unsigned flagCount = | |
47 8; // Save 8 bits for StringImpl to use as flags. | |
48 | |
49 StringHasher() | |
50 : m_hash(stringHashingStartValue), | |
51 m_hasPendingCharacter(false), | |
52 m_pendingCharacter(0) {} | |
53 | |
54 // The hasher hashes two characters at a time, and thus an "aligned" hasher is | |
55 // one where an even number of characters have been added. Callers that | |
56 // always add characters two at a time can use the "assuming aligned" | |
57 // functions. | |
58 void addCharactersAssumingAligned(UChar a, UChar b) { | |
59 DCHECK(!m_hasPendingCharacter); | |
60 m_hash += a; | |
61 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); | |
62 m_hash += m_hash >> 11; | |
63 } | |
64 | |
65 void addCharacter(UChar character) { | |
66 if (m_hasPendingCharacter) { | |
67 m_hasPendingCharacter = false; | |
68 addCharactersAssumingAligned(m_pendingCharacter, character); | |
69 return; | |
70 } | |
71 | |
72 m_pendingCharacter = character; | |
73 m_hasPendingCharacter = true; | |
74 } | |
75 | |
76 void addCharacters(UChar a, UChar b) { | |
77 if (m_hasPendingCharacter) { | |
78 #if DCHECK_IS_ON() | |
79 m_hasPendingCharacter = false; | |
80 #endif | |
81 addCharactersAssumingAligned(m_pendingCharacter, a); | |
82 m_pendingCharacter = b; | |
83 #if DCHECK_IS_ON() | |
84 m_hasPendingCharacter = true; | |
85 #endif | |
86 return; | |
87 } | |
88 | |
89 addCharactersAssumingAligned(a, b); | |
90 } | |
91 | |
92 template <typename T, UChar Converter(T)> | |
93 void addCharactersAssumingAligned(const T* data, unsigned length) { | |
94 DCHECK(!m_hasPendingCharacter); | |
95 | |
96 bool remainder = length & 1; | |
97 length >>= 1; | |
98 | |
99 while (length--) { | |
100 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1])); | |
101 data += 2; | |
102 } | |
103 | |
104 if (remainder) | |
105 addCharacter(Converter(*data)); | |
106 } | |
107 | |
108 template <typename T> | |
109 void addCharactersAssumingAligned(const T* data, unsigned length) { | |
110 addCharactersAssumingAligned<T, defaultConverter>(data, length); | |
111 } | |
112 | |
113 template <typename T, UChar Converter(T)> | |
114 void addCharacters(const T* data, unsigned length) { | |
115 if (m_hasPendingCharacter && length) { | |
116 m_hasPendingCharacter = false; | |
117 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)); | |
118 --length; | |
119 } | |
120 addCharactersAssumingAligned<T, Converter>(data, length); | |
121 } | |
122 | |
123 template <typename T> | |
124 void addCharacters(const T* data, unsigned length) { | |
125 addCharacters<T, defaultConverter>(data, length); | |
126 } | |
127 | |
128 unsigned hashWithTop8BitsMasked() const { | |
129 unsigned result = avalancheBits(); | |
130 | |
131 // Reserving space from the high bits for flags preserves most of the hash's | |
132 // value, since hash lookup typically masks out the high bits anyway. | |
133 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1; | |
134 | |
135 // This avoids ever returning a hash code of 0, since that is used to | |
136 // signal "hash not computed yet". Setting the high bit maintains | |
137 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
138 // exactly 0 when hash lookup masks out the high bits. | |
139 if (!result) | |
140 result = 0x80000000 >> flagCount; | |
141 | |
142 return result; | |
143 } | |
144 | |
145 unsigned hash() const { | |
146 unsigned result = avalancheBits(); | |
147 | |
148 // This avoids ever returning a hash code of 0, since that is used to | |
149 // signal "hash not computed yet". Setting the high bit maintains | |
150 // reasonable fidelity to a hash code of 0 because it is likely to yield | |
151 // exactly 0 when hash lookup masks out the high bits. | |
152 if (!result) | |
153 result = 0x80000000; | |
154 | |
155 return result; | |
156 } | |
157 | |
158 template <typename T, UChar Converter(T)> | |
159 static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) { | |
160 StringHasher hasher; | |
161 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
162 return hasher.hashWithTop8BitsMasked(); | |
163 } | |
164 | |
165 template <typename T> | |
166 static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) { | |
167 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); | |
168 } | |
169 | |
170 template <typename T, UChar Converter(T)> | |
171 static unsigned computeHash(const T* data, unsigned length) { | |
172 StringHasher hasher; | |
173 hasher.addCharactersAssumingAligned<T, Converter>(data, length); | |
174 return hasher.hash(); | |
175 } | |
176 | |
177 template <typename T> | |
178 static unsigned computeHash(const T* data, unsigned length) { | |
179 return computeHash<T, defaultConverter>(data, length); | |
180 } | |
181 | |
182 static unsigned hashMemory(const void* data, unsigned length) { | |
183 // FIXME: Why does this function use the version of the hash that drops the | |
184 // top 8 bits? We want that for all string hashing so we can use those | |
185 // bits in StringImpl and hash strings consistently, but I don't see why | |
186 // we'd want that for general memory hashing. | |
187 DCHECK(!(length % 2)); | |
188 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data), | |
189 length / sizeof(UChar)); | |
190 } | |
191 | |
192 template <size_t length> | |
193 static unsigned hashMemory(const void* data) { | |
194 static_assert(!(length % 2), "length must be a multiple of two"); | |
195 return hashMemory(data, length); | |
196 } | |
197 | |
198 private: | |
199 static UChar defaultConverter(UChar character) { return character; } | |
200 | |
201 static UChar defaultConverter(LChar character) { return character; } | |
202 | |
203 unsigned avalancheBits() const { | |
204 unsigned result = m_hash; | |
205 | |
206 // Handle end case. | |
207 if (m_hasPendingCharacter) { | |
208 result += m_pendingCharacter; | |
209 result ^= result << 11; | |
210 result += result >> 17; | |
211 } | |
212 | |
213 // Force "avalanching" of final 31 bits. | |
214 result ^= result << 3; | |
215 result += result >> 5; | |
216 result ^= result << 2; | |
217 result += result >> 15; | |
218 result ^= result << 10; | |
219 | |
220 return result; | |
221 } | |
222 | |
223 unsigned m_hash; | |
224 bool m_hasPendingCharacter; | |
225 UChar m_pendingCharacter; | |
226 }; | |
227 | |
228 } // namespace WTF | |
229 | |
230 using WTF::StringHasher; | |
231 | |
232 #endif // WTF_StringHasher_h | |
OLD | NEW |