Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Side by Side Diff: third_party/prediction/suggest/policyimpl/dictionary/utils/trie_map.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_TRIE_MAP_H
18 #define LATINIME_TRIE_MAP_H
19
20 #include <climits>
21 #include <cstdint>
22 #include <cstdio>
23 #include <vector>
24
25 #include "third_party/prediction/defines.h"
26 #include "third_party/prediction/suggest/policyimpl/dictionary/utils/buffer_with _extendable_buffer.h"
27 #include "third_party/prediction/utils/byte_array_view.h"
28
29 namespace latinime {
30
31 /**
32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
33 * key is int and value is uint64_t.
34 * This supports multiple level map. Terminal entries can have a bitmap for the
35 * next level map.
36 * This doesn't support root map resizing.
37 */
38 class TrieMap {
39 public:
40 struct Result {
41 const uint64_t mValue;
42 const bool mIsValid;
43 const int mNextLevelBitmapEntryIndex;
44
45 Result(const uint64_t value,
46 const bool isValid,
47 const int nextLevelBitmapEntryIndex)
48 : mValue(value),
49 mIsValid(isValid),
50 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
51 };
52
53 /**
54 * Struct to record iteration state in a table.
55 */
56 struct TableIterationState {
57 int mTableSize;
58 int mTableIndex;
59 int mCurrentIndex;
60
61 TableIterationState(const int tableSize, const int tableIndex)
62 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {}
63 };
64
65 class TrieMapRange;
66 class TrieMapIterator {
67 public:
68 class IterationResult {
69 public:
70 IterationResult(const TrieMap* const trieMap,
71 const int key,
72 const uint64_t value,
73 const int nextLeveBitmapEntryIndex)
74 : mTrieMap(trieMap),
75 mKey(key),
76 mValue(value),
77 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
78
79 const TrieMapRange getEntriesInNextLevel() const {
80 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
81 }
82
83 bool hasNextLevelMap() const {
84 return mNextLevelBitmapEntryIndex != INVALID_INDEX;
85 }
86
87 AK_FORCE_INLINE int key() const { return mKey; }
88
89 AK_FORCE_INLINE uint64_t value() const { return mValue; }
90
91 private:
92 const TrieMap* const mTrieMap;
93 const int mKey;
94 const uint64_t mValue;
95 const int mNextLevelBitmapEntryIndex;
96 };
97
98 TrieMapIterator(const TrieMap* const trieMap, const int bitmapEntryIndex)
99 : mTrieMap(trieMap),
100 mStateStack(),
101 mBaseBitmapEntryIndex(bitmapEntryIndex),
102 mKey(0),
103 mValue(0),
104 mIsValid(false),
105 mNextLevelBitmapEntryIndex(INVALID_INDEX) {
106 if (!trieMap) {
107 return;
108 }
109 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex);
110 mStateStack.emplace_back(mTrieMap->popCount(bitmapEntry.getBitmap()),
111 bitmapEntry.getTableIndex());
112 this->operator++();
113 }
114
115 const IterationResult operator*() const {
116 return IterationResult(mTrieMap, mKey, mValue,
117 mNextLevelBitmapEntryIndex);
118 }
119
120 bool operator!=(const TrieMapIterator& other) const {
121 // Caveat: This works only for for loops.
122 return mIsValid || other.mIsValid;
123 }
124
125 const TrieMapIterator& operator++() {
126 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
127 mValue = result.mValue;
128 mIsValid = result.mIsValid;
129 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
130 return *this;
131 }
132
133 private:
134 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
135 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
136
137 const TrieMap* const mTrieMap;
138 std::vector<TrieMap::TableIterationState> mStateStack;
139 const int mBaseBitmapEntryIndex;
140 int mKey;
141 uint64_t mValue;
142 bool mIsValid;
143 int mNextLevelBitmapEntryIndex;
144 };
145
146 /**
147 * Class to support iterating entries in TrieMap by range base for loops.
148 */
149 class TrieMapRange {
150 public:
151 TrieMapRange(const TrieMap* const trieMap, const int bitmapEntryIndex)
152 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex){};
153
154 TrieMapIterator begin() const {
155 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
156 }
157
158 const TrieMapIterator end() const {
159 return TrieMapIterator(nullptr, INVALID_INDEX);
160 }
161
162 private:
163 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
164 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
165
166 const TrieMap* const mTrieMap;
167 const int mBaseBitmapEntryIndex;
168 };
169
170 static const int INVALID_INDEX;
171 static const uint64_t MAX_VALUE;
172
173 TrieMap();
174 // Construct TrieMap using existing data in the memory region written by
175 // save().
176 TrieMap(const ReadWriteByteArrayView buffer);
177 void dump(const int from = 0, const int to = 0) const;
178
179 bool isNearSizeLimit() const { return mBuffer.isNearSizeLimit(); }
180
181 int getRootBitmapEntryIndex() const { return ROOT_BITMAP_ENTRY_INDEX; }
182
183 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
184 int getNextLevelBitmapEntryIndex(const int key) {
185 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
186 }
187
188 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
189
190 const Result getRoot(const int key) const {
191 return get(key, ROOT_BITMAP_ENTRY_INDEX);
192 }
193
194 const Result get(const int key, const int bitmapEntryIndex) const;
195
196 bool putRoot(const int key, const uint64_t value) {
197 return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
198 }
199
200 bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
201
202 const TrieMapRange getEntriesInRootLevel() const {
203 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
204 }
205
206 const TrieMapRange getEntriesInSpecifiedLevel(
207 const int bitmapEntryIndex) const {
208 return TrieMapRange(this, bitmapEntryIndex);
209 }
210
211 bool save(FILE* const file) const;
212
213 private:
214 DISALLOW_COPY_AND_ASSIGN(TrieMap);
215
216 /**
217 * Struct represents an entry.
218 *
219 * Entry is one of these entry types. All entries are fixed size and have 2
220 *fields FIELD_0 and
221 * FIELD_1.
222 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
223 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
224 * 2. terminal entry. terminal entry contains hashed key and value or terminal
225 *link. terminal
226 * entry have terminal link when the value is not fit to FIELD_1 or there is a
227 *next level map
228 * for the key.
229 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) |
230 *FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK))
231 * 3. value entry. value entry represents a value. Upper order bytes are
232 *stored in FIELD_0 and
233 * lower order bytes are stored in FIELD_1.
234 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
235 */
236 struct Entry {
237 const uint32_t mData0;
238 const uint32_t mData1;
239
240 Entry(const uint32_t data0, const uint32_t data1)
241 : mData0(data0), mData1(data1) {}
242
243 AK_FORCE_INLINE bool isBitmapEntry() const {
244 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0;
245 }
246
247 AK_FORCE_INLINE bool hasTerminalLink() const {
248 return (mData1 & TERMINAL_LINK_FLAG) != 0;
249 }
250
251 // For terminal entry.
252 AK_FORCE_INLINE uint32_t getKey() const { return mData0; }
253
254 // For terminal entry.
255 AK_FORCE_INLINE uint32_t getValue() const { return mData1 & VALUE_MASK; }
256
257 // For terminal entry.
258 AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
259 return mData1 & TERMINAL_LINK_MASK;
260 }
261
262 // For bitmap entry.
263 AK_FORCE_INLINE uint32_t getBitmap() const { return mData0; }
264
265 // For bitmap entry.
266 AK_FORCE_INLINE int getTableIndex() const {
267 return static_cast<int>(mData1);
268 }
269
270 // For value entry.
271 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
272 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^
273 mData1);
274 }
275 };
276
277 BufferWithExtendableBuffer mBuffer;
278
279 static const int FIELD0_SIZE;
280 static const int FIELD1_SIZE;
281 static const int ENTRY_SIZE;
282 static const uint32_t VALUE_FLAG;
283 static const uint32_t VALUE_MASK;
284 static const uint32_t TERMINAL_LINK_FLAG;
285 static const uint32_t TERMINAL_LINK_MASK;
286 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
287 static const uint32_t LABEL_MASK;
288 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
289 static const int ROOT_BITMAP_ENTRY_INDEX;
290 static const int ROOT_BITMAP_ENTRY_POS;
291 static const Entry EMPTY_BITMAP_ENTRY;
292 static const int MAX_BUFFER_SIZE;
293
294 uint32_t getBitShuffledKey(const uint32_t key) const;
295 bool writeValue(const uint64_t value, const int terminalEntryIndex);
296 bool updateValue(const Entry& terminalEntry,
297 const uint64_t value,
298 const int terminalEntryIndex);
299 bool freeTable(const int tableIndex, const int entryCount);
300 int allocateTable(const int entryCount);
301 int getTerminalEntryIndex(const uint32_t key,
302 const uint32_t hashedKey,
303 const Entry& bitmapEntry,
304 const int level) const;
305 const Result getInternal(const uint32_t key,
306 const uint32_t hashedKey,
307 const int bitmapEntryIndex,
308 const int level) const;
309 bool putInternal(const uint32_t key,
310 const uint64_t value,
311 const uint32_t hashedKey,
312 const int bitmapEntryIndex,
313 const Entry& bitmapEntry,
314 const int level);
315 bool addNewEntryByResolvingConflict(const uint32_t key,
316 const uint64_t value,
317 const uint32_t hashedKey,
318 const Entry& conflictedEntry,
319 const int conflictedEntryIndex,
320 const int level);
321 bool addNewEntryByExpandingTable(const uint32_t key,
322 const uint64_t value,
323 const int tableIndex,
324 const uint32_t bitmap,
325 const int bitmapEntryIndex,
326 const int label);
327 const Result iterateNext(
328 std::vector<TableIterationState>* const iterationState,
329 int* const outKey) const;
330
331 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
332 return Entry(readField0(entryIndex), readField1(entryIndex));
333 }
334
335 // Returns whether an entry for the index is existing by testing if the
336 // index-th bit in the
337 // bitmap is set or not.
338 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
339 return (bitmap & (1 << index)) != 0;
340 }
341
342 // Set index-th bit in the bitmap.
343 AK_FORCE_INLINE uint32_t
344 setExist(const uint32_t bitmap, const int index) const {
345 return bitmap | (1 << index);
346 }
347
348 // Count set bits before index in the bitmap.
349 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
350 return popCount(bitmap & ((1 << index) - 1));
351 }
352
353 // Count set bits in the bitmap.
354 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
355 return __builtin_popcount(bitmap);
356 // int v = bitmap - ((bitmap >> 1) & 0x55555555);
357 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
358 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
359 }
360
361 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey,
362 const int level) const {
363 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK;
364 }
365
366 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
367 return mBuffer.readUint(FIELD0_SIZE,
368 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
369 }
370
371 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
372 return mBuffer.readUint(
373 FIELD1_SIZE,
374 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
375 }
376
377 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
378 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
379 }
380
381 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex,
382 const int entryCount) {
383 return mBuffer.writeUint(tableIndex, FIELD1_SIZE,
384 (entryCount - 1) * FIELD1_SIZE);
385 }
386
387 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
388 return mBuffer.writeUint(data, FIELD0_SIZE,
389 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
390 }
391
392 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
393 return mBuffer.writeUint(
394 data, FIELD1_SIZE,
395 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
396 }
397
398 AK_FORCE_INLINE bool writeEntry(const Entry& entry, const int entryIndex) {
399 return writeField0(entry.mData0, entryIndex) &&
400 writeField1(entry.mData1, entryIndex);
401 }
402
403 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key,
404 const uint64_t value,
405 const int entryIndex) {
406 return writeField0(key, entryIndex) && writeValue(value, entryIndex);
407 }
408
409 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex,
410 const int newEntryIndex) {
411 return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
412 }
413
414 AK_FORCE_INLINE int getTailEntryIndex() const {
415 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
416 }
417 };
418
419 } // namespace latinime
420 #endif /* LATINIME_TRIE_MAP_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698