Source/WTF/wtf/StringHasher.h - Issue 14238015: Move Source/WTF/wtf to Source/wtf

Side by Side Diff: Source/WTF/wtf/StringHasher.h

Issue 14238015: Move Source/WTF/wtf to Source/wtf (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved.

3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>

4 *

5 * This library is free software; you can redistribute it and/or

6 * modify it under the terms of the GNU Library General Public

7 * License as published by the Free Software Foundation; either

8 * version 2 of the License, or (at your option) any later version.

9 *

10 * This library is distributed in the hope that it will be useful,

11 * but WITHOUT ANY WARRANTY; without even the implied warranty of

12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 * Library General Public License for more details.

14 *

15 * You should have received a copy of the GNU Library General Public License

16 * along with this library; see the file COPYING.LIB. If not, write to

17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,

18 * Boston, MA 02110-1301, USA.

19 *

20 */

21

22 #ifndef WTF_StringHasher_h

23 #define WTF_StringHasher_h

24

25 #include <wtf/unicode/Unicode.h>

26

27 namespace WTF {

28

29 // Paul Hsieh's SuperFastHash

30 // http://www.azillionmonkeys.com/qed/hash.html

31

32 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits).

33

34 // NOTE: The hash computation here must stay in sync with the create_hash_table script in

35 // JavaScriptCore and the CodeGeneratorJS.pm script in WebCore.

36

37 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash valu e of zero.

38 static const unsigned stringHashingStartValue = 0x9E3779B9U;

39

40 class StringHasher {

41 public:

42 static const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags.

43

44 StringHasher()

45 : m_hash(stringHashingStartValue)

46 , m_hasPendingCharacter(false)

47 , m_pendingCharacter(0)

48 {

49 }

50

51 // The hasher hashes two characters at a time, and thus an "aligned" hasher is one

52 // where an even number of characters have been added. Callers that always a dd

53 // characters two at a time can use the "assuming aligned" functions.

54 void addCharactersAssumingAligned(UChar a, UChar b)

55 {

56 ASSERT(!m_hasPendingCharacter);

57 m_hash += a;

58 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash);

59 m_hash += m_hash >> 11;

60 }

61

62 void addCharacter(UChar character)

63 {

64 if (m_hasPendingCharacter) {

65 m_hasPendingCharacter = false;

66 addCharactersAssumingAligned(m_pendingCharacter, character);

67 return;

68 }

69

70 m_pendingCharacter = character;

71 m_hasPendingCharacter = true;

72 }

73

74 void addCharacters(UChar a, UChar b)

75 {

76 if (m_hasPendingCharacter) {

77 #if !ASSERT_DISABLED

78 m_hasPendingCharacter = false;

79 #endif

80 addCharactersAssumingAligned(m_pendingCharacter, a);

81 m_pendingCharacter = b;

82 #if !ASSERT_DISABLED

83 m_hasPendingCharacter = true;

84 #endif

85 return;

86 }

87

88 addCharactersAssumingAligned(a, b);

89 }

90

91 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(c onst T* data, unsigned length)

92 {

93 ASSERT(!m_hasPendingCharacter);

94

95 bool remainder = length & 1;

96 length >>= 1;

97

98 while (length--) {

99 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1])) ;

100 data += 2;

101 }

102

103 if (remainder)

104 addCharacter(Converter(*data));

105 }

106

107 template<typename T> void addCharactersAssumingAligned(const T* data, unsign ed length)

108 {

109 addCharactersAssumingAligned<T, defaultConverter>(data, length);

110 }

111

112 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(c onst T* data)

113 {

114 ASSERT(!m_hasPendingCharacter);

115

116 while (T a = *data++) {

117 T b = *data++;

118 if (!b) {

119 addCharacter(Converter(a));

120 break;

121 }

122 addCharactersAssumingAligned(Converter(a), Converter(b));

123 }

124 }

125

126 template<typename T> void addCharactersAssumingAligned(const T* data)

127 {

128 addCharactersAssumingAligned<T, defaultConverter>(data);

129 }

130

131 template<typename T, UChar Converter(T)> void addCharacters(const T* data, u nsigned length)

132 {

133 if (m_hasPendingCharacter && length) {

134 m_hasPendingCharacter = false;

135 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)) ;

136 --length;

137 }

138 addCharactersAssumingAligned<T, Converter>(data, length);

139 }

140

141 template<typename T> void addCharacters(const T* data, unsigned length)

142 {

143 addCharacters<T, defaultConverter>(data, length);

144 }

145

146 template<typename T, UChar Converter(T)> void addCharacters(const T* data)

147 {

148 if (m_hasPendingCharacter && *data) {

149 m_hasPendingCharacter = false;

150 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)) ;

151 }

152 addCharactersAssumingAligned<T, Converter>(data);

153 }

154

155 template<typename T> void addCharacters(const T* data)

156 {

157 addCharacters<T, defaultConverter>(data);

158 }

159

160 unsigned hashWithTop8BitsMasked() const

161 {

162 unsigned result = avalancheBits();

163

164 // Reserving space from the high bits for flags preserves most of the ha sh's

165 // value, since hash lookup typically masks out the high bits anyway.

166 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1;

167

168 // This avoids ever returning a hash code of 0, since that is used to

169 // signal "hash not computed yet". Setting the high bit maintains

170 // reasonable fidelity to a hash code of 0 because it is likely to yield

171 // exactly 0 when hash lookup masks out the high bits.

172 if (!result)

173 result = 0x80000000 >> flagCount;

174

175 return result;

176 }

177

178 unsigned hash() const

179 {

180 unsigned result = avalancheBits();

181

182 // This avoids ever returning a hash code of 0, since that is used to

183 // signal "hash not computed yet". Setting the high bit maintains

184 // reasonable fidelity to a hash code of 0 because it is likely to yield

185 // exactly 0 when hash lookup masks out the high bits.

186 if (!result)

187 result = 0x80000000;

188

189 return result;

190 }

191

192 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskT op8Bits(const T* data, unsigned length)

193 {

194 StringHasher hasher;

195 hasher.addCharactersAssumingAligned<T, Converter>(data, length);

196 return hasher.hashWithTop8BitsMasked();

197 }

198

199 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskT op8Bits(const T* data)

200 {

201 StringHasher hasher;

202 hasher.addCharactersAssumingAligned<T, Converter>(data);

203 return hasher.hashWithTop8BitsMasked();

204 }

205

206 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* dat a, unsigned length)

207 {

208 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length);

209 }

210

211 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* dat a)

212 {

213 return computeHashAndMaskTop8Bits<T, defaultConverter>(data);

214 }

215

216 template<typename T, UChar Converter(T)> static unsigned computeHash(const T * data, unsigned length)

217 {

218 StringHasher hasher;

219 hasher.addCharactersAssumingAligned<T, Converter>(data, length);

220 return hasher.hash();

221 }

222

223 template<typename T, UChar Converter(T)> static unsigned computeHash(const T * data)

224 {

225 StringHasher hasher;

226 hasher.addCharactersAssumingAligned<T, Converter>(data);

227 return hasher.hash();

228 }

229

230 template<typename T> static unsigned computeHash(const T* data, unsigned len gth)

231 {

232 return computeHash<T, defaultConverter>(data, length);

233 }

234

235 template<typename T> static unsigned computeHash(const T* data)

236 {

237 return computeHash<T, defaultConverter>(data);

238 }

239

240 static unsigned hashMemory(const void* data, unsigned length)

241 {

242 // FIXME: Why does this function use the version of the hash that drops the top 8 bits?

243 // We want that for all string hashing so we can use those bits in Strin gImpl and hash

244 // strings consistently, but I don't see why we'd want that for general memory hashing.

245 ASSERT(!(length % 2));

246 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data) , length / sizeof(UChar));

247 }

248

249 template<size_t length> static unsigned hashMemory(const void* data)

250 {

251 COMPILE_ASSERT(!(length % 2), length_must_be_a_multiple_of_two);

252 return hashMemory(data, length);

253 }

254

255 private:

256 static UChar defaultConverter(UChar character)

257 {

258 return character;

259 }

260

261 static UChar defaultConverter(LChar character)

262 {

263 return character;

264 }

265

266 unsigned avalancheBits() const

267 {

268 unsigned result = m_hash;

269

270 // Handle end case.

271 if (m_hasPendingCharacter) {

272 result += m_pendingCharacter;

273 result ^= result << 11;

274 result += result >> 17;

275 }

276

277 // Force "avalanching" of final 31 bits.

278 result ^= result << 3;

279 result += result >> 5;

280 result ^= result << 2;

281 result += result >> 15;

282 result ^= result << 10;

283

284 return result;

285 }

286

287 unsigned m_hash;

288 bool m_hasPendingCharacter;

289 UChar m_pendingCharacter;

290 };

291

292 } // namespace WTF

293

294 using WTF::StringHasher;

295

296 #endif // WTF_StringHasher_h

OLD	NEW

« no previous file with comments | « Source/WTF/wtf/StringExtras.h ('k') | Source/WTF/wtf/TCPackedCache.h » ('j') | Source/config.h » ('J')