Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: changed third_party/prediction to third_party/android_prediction; added CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_TRIE_MAP_H
18 #define LATINIME_TRIE_MAP_H
19
20 #include <climits>
21 #include <cstdint>
22 #include <cstdio>
23 #include <vector>
24
25 #include "third_party/android_prediction/defines.h"
26 #include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buf fer_with_extendable_buffer.h"
27 #include "third_party/android_prediction/utils/byte_array_view.h"
28
29 namespace latinime {
30
31 /**
32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie.
33 * key is int and value is uint64_t.
34 * This supports multiple level map. Terminal entries can have a bitmap for the next level map.
35 * This doesn't support root map resizing.
36 */
37 class TrieMap {
38 public:
39 struct Result {
40 const uint64_t mValue;
41 const bool mIsValid;
42 const int mNextLevelBitmapEntryIndex;
43
44 Result(const uint64_t value, const bool isValid, const int nextLevelBitm apEntryIndex)
45 : mValue(value), mIsValid(isValid),
46 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {}
47 };
48
49 /**
50 * Struct to record iteration state in a table.
51 */
52 struct TableIterationState {
53 int mTableSize;
54 int mTableIndex;
55 int mCurrentIndex;
56
57 TableIterationState(const int tableSize, const int tableIndex)
58 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex( 0) {}
59 };
60
61 class TrieMapRange;
62 class TrieMapIterator {
63 public:
64 class IterationResult {
65 public:
66 IterationResult(const TrieMap *const trieMap, const int key, const u int64_t value,
67 const int nextLeveBitmapEntryIndex)
68 : mTrieMap(trieMap), mKey(key), mValue(value),
69 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {}
70
71 const TrieMapRange getEntriesInNextLevel() const {
72 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex);
73 }
74
75 bool hasNextLevelMap() const {
76 return mNextLevelBitmapEntryIndex != INVALID_INDEX;
77 }
78
79 AK_FORCE_INLINE int key() const {
80 return mKey;
81 }
82
83 AK_FORCE_INLINE uint64_t value() const {
84 return mValue;
85 }
86
87 private:
88 const TrieMap *const mTrieMap;
89 const int mKey;
90 const uint64_t mValue;
91 const int mNextLevelBitmapEntryIndex;
92 };
93
94 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex )
95 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmap EntryIndex),
96 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryInde x(INVALID_INDEX) {
97 if (!trieMap) {
98 return;
99 }
100 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex) ;
101 mStateStack.emplace_back(
102 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.get TableIndex());
103 this->operator++();
104 }
105
106 const IterationResult operator*() const {
107 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntry Index);
108 }
109
110 bool operator!=(const TrieMapIterator &other) const {
111 // Caveat: This works only for for loops.
112 return mIsValid || other.mIsValid;
113 }
114
115 const TrieMapIterator &operator++() {
116 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey);
117 mValue = result.mValue;
118 mIsValid = result.mIsValid;
119 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
120 return *this;
121 }
122
123 private:
124 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator);
125 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator);
126
127 const TrieMap *const mTrieMap;
128 std::vector<TrieMap::TableIterationState> mStateStack;
129 const int mBaseBitmapEntryIndex;
130 int mKey;
131 uint64_t mValue;
132 bool mIsValid;
133 int mNextLevelBitmapEntryIndex;
134 };
135
136 /**
137 * Class to support iterating entries in TrieMap by range base for loops.
138 */
139 class TrieMapRange {
140 public:
141 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex)
142 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {};
143
144 TrieMapIterator begin() const {
145 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex);
146 }
147
148 const TrieMapIterator end() const {
149 return TrieMapIterator(nullptr, INVALID_INDEX);
150 }
151
152 private:
153 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange);
154 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange);
155
156 const TrieMap *const mTrieMap;
157 const int mBaseBitmapEntryIndex;
158 };
159
160 static const int INVALID_INDEX;
161 static const uint64_t MAX_VALUE;
162
163 TrieMap();
164 // Construct TrieMap using existing data in the memory region written by sav e().
165 TrieMap(const ReadWriteByteArrayView buffer);
166 void dump(const int from = 0, const int to = 0) const;
167
168 bool isNearSizeLimit() const {
169 return mBuffer.isNearSizeLimit();
170 }
171
172 int getRootBitmapEntryIndex() const {
173 return ROOT_BITMAP_ENTRY_INDEX;
174 }
175
176 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist.
177 int getNextLevelBitmapEntryIndex(const int key) {
178 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX);
179 }
180
181 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex);
182
183 const Result getRoot(const int key) const {
184 return get(key, ROOT_BITMAP_ENTRY_INDEX);
185 }
186
187 const Result get(const int key, const int bitmapEntryIndex) const;
188
189 bool putRoot(const int key, const uint64_t value) {
190 return put(key, value, ROOT_BITMAP_ENTRY_INDEX);
191 }
192
193 bool put(const int key, const uint64_t value, const int bitmapEntryIndex);
194
195 const TrieMapRange getEntriesInRootLevel() const {
196 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX);
197 }
198
199 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) co nst {
200 return TrieMapRange(this, bitmapEntryIndex);
201 }
202
203 bool save(FILE *const file) const;
204
205 private:
206 DISALLOW_COPY_AND_ASSIGN(TrieMap);
207
208 /**
209 * Struct represents an entry.
210 *
211 * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and
212 * FIELD_1.
213 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table.
214 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE)
215 * 2. terminal entry. terminal entry contains hashed key and value or termin al link. terminal
216 * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map
217 * for the key.
218 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_ FLAG TERMINAL_LINK))
219 * 3. value entry. value entry represents a value. Upper order bytes are sto red in FIELD_0 and
220 * lower order bytes are stored in FIELD_1.
221 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes))
222 */
223 struct Entry {
224 const uint32_t mData0;
225 const uint32_t mData1;
226
227 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData 1(data1) {}
228
229 AK_FORCE_INLINE bool isBitmapEntry() const {
230 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) = = 0;
231 }
232
233 AK_FORCE_INLINE bool hasTerminalLink() const {
234 return (mData1 & TERMINAL_LINK_FLAG) != 0;
235 }
236
237 // For terminal entry.
238 AK_FORCE_INLINE uint32_t getKey() const {
239 return mData0;
240 }
241
242 // For terminal entry.
243 AK_FORCE_INLINE uint32_t getValue() const {
244 return mData1 & VALUE_MASK;
245 }
246
247 // For terminal entry.
248 AK_FORCE_INLINE uint32_t getValueEntryIndex() const {
249 return mData1 & TERMINAL_LINK_MASK;
250 }
251
252 // For bitmap entry.
253 AK_FORCE_INLINE uint32_t getBitmap() const {
254 return mData0;
255 }
256
257 // For bitmap entry.
258 AK_FORCE_INLINE int getTableIndex() const {
259 return static_cast<int>(mData1);
260 }
261
262 // For value entry.
263 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const {
264 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1);
265 }
266 };
267
268 BufferWithExtendableBuffer mBuffer;
269
270 static const int FIELD0_SIZE;
271 static const int FIELD1_SIZE;
272 static const int ENTRY_SIZE;
273 static const uint32_t VALUE_FLAG;
274 static const uint32_t VALUE_MASK;
275 static const uint32_t TERMINAL_LINK_FLAG;
276 static const uint32_t TERMINAL_LINK_MASK;
277 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL;
278 static const uint32_t LABEL_MASK;
279 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL;
280 static const int ROOT_BITMAP_ENTRY_INDEX;
281 static const int ROOT_BITMAP_ENTRY_POS;
282 static const Entry EMPTY_BITMAP_ENTRY;
283 static const int MAX_BUFFER_SIZE;
284
285 uint32_t getBitShuffledKey(const uint32_t key) const;
286 bool writeValue(const uint64_t value, const int terminalEntryIndex);
287 bool updateValue(const Entry &terminalEntry, const uint64_t value,
288 const int terminalEntryIndex);
289 bool freeTable(const int tableIndex, const int entryCount);
290 int allocateTable(const int entryCount);
291 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
292 const Entry &bitmapEntry, const int level) const;
293 const Result getInternal(const uint32_t key, const uint32_t hashedKey,
294 const int bitmapEntryIndex, const int level) const;
295 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t ha shedKey,
296 const int bitmapEntryIndex, const Entry &bitmapEntry, const int leve l);
297 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value ,
298 const uint32_t hashedKey, const Entry &conflictedEntry, const int co nflictedEntryIndex,
299 const int level);
300 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value,
301 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIn dex,
302 const int label);
303 const Result iterateNext(std::vector<TableIterationState> *const iterationSt ate,
304 int *const outKey) const;
305
306 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const {
307 return Entry(readField0(entryIndex), readField1(entryIndex));
308 }
309
310 // Returns whether an entry for the index is existing by testing if the inde x-th bit in the
311 // bitmap is set or not.
312 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const {
313 return (bitmap & (1 << index)) != 0;
314 }
315
316 // Set index-th bit in the bitmap.
317 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) co nst {
318 return bitmap | (1 << index);
319 }
320
321 // Count set bits before index in the bitmap.
322 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const {
323 return popCount(bitmap & ((1 << index) - 1));
324 }
325
326 // Count set bits in the bitmap.
327 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const {
328 return __builtin_popcount(bitmap);
329 // int v = bitmap - ((bitmap >> 1) & 0x55555555);
330 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
331 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
332 }
333
334 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) cons t {
335 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_M ASK;
336 }
337
338 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const {
339 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
340 }
341
342 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const {
343 return mBuffer.readUint(FIELD1_SIZE,
344 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
345 }
346
347 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const {
348 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE);
349 }
350
351 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int ent ryCount) {
352 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIE LD1_SIZE);
353 }
354
355 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) {
356 return mBuffer.writeUint(data, FIELD0_SIZE,
357 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE);
358 }
359
360 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) {
361 return mBuffer.writeUint(data, FIELD1_SIZE,
362 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE);
363 }
364
365 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) {
366 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1 , entryIndex);
367 }
368
369 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t v alue,
370 const int entryIndex) {
371 return writeField0(key, entryIndex) && writeValue(value, entryIndex);
372 }
373
374 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEn tryIndex) {
375 return writeEntry(readEntry(originalEntryIndex), newEntryIndex);
376 }
377
378 AK_FORCE_INLINE int getTailEntryIndex() const {
379 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE;
380 }
381 };
382
383 } // namespace latinime
384 #endif /* LATINIME_TRIE_MAP_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698