Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(487)

Side by Side Diff: third_party/prediction/suggest/policyimpl/dictionary/utils/trie_map.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "third_party/prediction/suggest/policyimpl/dictionary/utils/trie_map.h"
18
19 #include "third_party/prediction/suggest/policyimpl/dictionary/utils/dict_file_w riting_utils.h"
20
21 namespace latinime {
22
23 const int TrieMap::INVALID_INDEX = -1;
24 const int TrieMap::FIELD0_SIZE = 4;
25 const int TrieMap::FIELD1_SIZE = 3;
26 const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
27 const uint32_t TrieMap::VALUE_FLAG = 0x400000;
28 const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
29 const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
30 const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
31 const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
32 const uint32_t TrieMap::LABEL_MASK = 0x1F;
33 const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL =
34 1 << NUM_OF_BITS_USED_FOR_ONE_LEVEL;
35 const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
36 const int TrieMap::ROOT_BITMAP_ENTRY_POS =
37 MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIELD0_SIZE;
38 const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
39 const uint64_t TrieMap::MAX_VALUE =
40 (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
41 const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
42
43 TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
44 mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
45 writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
46 }
47
48 TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
49 : mBuffer(buffer,
50 BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE) {
51 }
52
53 void TrieMap::dump(const int from, const int to) const {
54 AKLOGI("BufSize: %d", mBuffer.getTailPosition());
55 for (int i = from; i < to; ++i) {
56 AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
57 }
58 int unusedRegionSize = 0;
59 for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
60 int index = readEmptyTableLink(i);
61 while (index != ROOT_BITMAP_ENTRY_INDEX) {
62 index = readField0(index);
63 unusedRegionSize += i;
64 }
65 }
66 AKLOGI("Unused Size: %d", unusedRegionSize);
67 }
68
69 int TrieMap::getNextLevelBitmapEntryIndex(const int key,
70 const int bitmapEntryIndex) {
71 const Entry bitmapEntry = readEntry(bitmapEntryIndex);
72 const uint32_t unsignedKey = static_cast<uint32_t>(key);
73 const int terminalEntryIndex = getTerminalEntryIndex(
74 unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
75 if (terminalEntryIndex == INVALID_INDEX) {
76 // Not found.
77 return INVALID_INDEX;
78 }
79 const Entry terminalEntry = readEntry(terminalEntryIndex);
80 if (terminalEntry.hasTerminalLink()) {
81 return terminalEntry.getValueEntryIndex() + 1;
82 }
83 // Create a value entry and a bitmap entry.
84 const int valueEntryIndex = allocateTable(2 /* entryCount */);
85 if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
86 return INVALID_INDEX;
87 }
88 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
89 return INVALID_INDEX;
90 }
91 if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
92 return INVALID_INDEX;
93 }
94 return valueEntryIndex + 1;
95 }
96
97 const TrieMap::Result TrieMap::get(const int key,
98 const int bitmapEntryIndex) const {
99 const uint32_t unsignedKey = static_cast<uint32_t>(key);
100 return getInternal(unsignedKey, getBitShuffledKey(unsignedKey),
101 bitmapEntryIndex, 0 /* level */);
102 }
103
104 bool TrieMap::put(const int key,
105 const uint64_t value,
106 const int bitmapEntryIndex) {
107 if (value > MAX_VALUE) {
108 return false;
109 }
110 const uint32_t unsignedKey = static_cast<uint32_t>(key);
111 return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey),
112 bitmapEntryIndex, readEntry(bitmapEntryIndex),
113 0 /* level */);
114 }
115
116 bool TrieMap::save(FILE* const file) const {
117 return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
118 }
119
120 /**
121 * Iterate next entry in a certain level.
122 *
123 * @param iterationState the iteration state that will be read and updated in
124 *this method.
125 * @param outKey the output key
126 * @return Result instance. mIsValid is false when all entries are iterated.
127 */
128 const TrieMap::Result TrieMap::iterateNext(
129 std::vector<TableIterationState>* const iterationState,
130 int* const outKey) const {
131 while (!iterationState->empty()) {
132 TableIterationState& state = iterationState->back();
133 if (state.mTableSize <= state.mCurrentIndex) {
134 // Move to parent.
135 iterationState->pop_back();
136 } else {
137 const int entryIndex = state.mTableIndex + state.mCurrentIndex;
138 state.mCurrentIndex += 1;
139 const Entry entry = readEntry(entryIndex);
140 if (entry.isBitmapEntry()) {
141 // Move to child.
142 iterationState->emplace_back(popCount(entry.getBitmap()),
143 entry.getTableIndex());
144 } else {
145 if (outKey) {
146 *outKey = entry.getKey();
147 }
148 if (!entry.hasTerminalLink()) {
149 return Result(entry.getValue(), true, INVALID_INDEX);
150 }
151 const int valueEntryIndex = entry.getValueEntryIndex();
152 const Entry valueEntry = readEntry(valueEntryIndex);
153 return Result(valueEntry.getValueOfValueEntry(), true,
154 valueEntryIndex + 1);
155 }
156 }
157 }
158 // Visited all entries.
159 return Result(0, false, INVALID_INDEX);
160 }
161
162 /**
163 * Shuffle bits of the key in the fixed order.
164 *
165 * This method is used as a hash function. This returns different values for
166 *different inputs.
167 */
168 uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
169 uint32_t shuffledKey = 0;
170 for (int i = 0; i < 4; ++i) {
171 const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
172 shuffledKey ^=
173 ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPiece << 21)) &
174 0x11111111)
175 << i;
176 }
177 return shuffledKey;
178 }
179
180 bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
181 if (value <= VALUE_MASK) {
182 // Write value into the terminal entry.
183 return writeField1(value | VALUE_FLAG, terminalEntryIndex);
184 }
185 // Create value entry and write value.
186 const int valueEntryIndex = allocateTable(2 /* entryCount */);
187 if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value),
188 valueEntryIndex)) {
189 return false;
190 }
191 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
192 return false;
193 }
194 return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex);
195 }
196
197 bool TrieMap::updateValue(const Entry& terminalEntry,
198 const uint64_t value,
199 const int terminalEntryIndex) {
200 if (!terminalEntry.hasTerminalLink()) {
201 return writeValue(value, terminalEntryIndex);
202 }
203 const int valueEntryIndex = terminalEntry.getValueEntryIndex();
204 return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value),
205 valueEntryIndex);
206 }
207
208 bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
209 if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
210 return false;
211 }
212 return writeEmptyTableLink(tableIndex, entryCount);
213 }
214
215 /**
216 * Allocate table with entryCount-entries. Reuse freed table if possible.
217 */
218 int TrieMap::allocateTable(const int entryCount) {
219 if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
220 const int tableIndex = readEmptyTableLink(entryCount);
221 if (tableIndex > 0) {
222 if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
223 return INVALID_INDEX;
224 }
225 // Reuse the table.
226 return tableIndex;
227 }
228 }
229 // Allocate memory space at tail position of the buffer.
230 const int mapIndex = getTailEntryIndex();
231 if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
232 return INVALID_INDEX;
233 }
234 return mapIndex;
235 }
236
237 int TrieMap::getTerminalEntryIndex(const uint32_t key,
238 const uint32_t hashedKey,
239 const Entry& bitmapEntry,
240 const int level) const {
241 const int label = getLabel(hashedKey, level);
242 if (!exists(bitmapEntry.getBitmap(), label)) {
243 return INVALID_INDEX;
244 }
245 const int entryIndex =
246 bitmapEntry.getTableIndex() + popCount(bitmapEntry.getBitmap(), label);
247 const Entry entry = readEntry(entryIndex);
248 if (entry.isBitmapEntry()) {
249 // Move to the next level.
250 return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
251 }
252 if (entry.getKey() == key) {
253 // Terminal entry is found.
254 return entryIndex;
255 }
256 return INVALID_INDEX;
257 }
258
259 /**
260 * Get Result corresponding to the key.
261 *
262 * @param key the key.
263 * @param hashedKey the hashed key.
264 * @param bitmapEntryIndex the index of bitmap entry
265 * @param level current level
266 * @return Result instance corresponding to the key. mIsValid indicates whether
267 *the key is in the
268 * map.
269 */
270 const TrieMap::Result TrieMap::getInternal(const uint32_t key,
271 const uint32_t hashedKey,
272 const int bitmapEntryIndex,
273 const int level) const {
274 const int terminalEntryIndex =
275 getTerminalEntryIndex(key, hashedKey, readEntry(bitmapEntryIndex), level);
276 if (terminalEntryIndex == INVALID_INDEX) {
277 // Not found.
278 return Result(0, false, INVALID_INDEX);
279 }
280 const Entry terminalEntry = readEntry(terminalEntryIndex);
281 if (!terminalEntry.hasTerminalLink()) {
282 return Result(terminalEntry.getValue(), true, INVALID_INDEX);
283 }
284 const int valueEntryIndex = terminalEntry.getValueEntryIndex();
285 const Entry valueEntry = readEntry(valueEntryIndex);
286 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
287 }
288
289 /**
290 * Put key to value mapping to the map.
291 *
292 * @param key the key.
293 * @param value the value
294 * @param hashedKey the hashed key.
295 * @param bitmapEntryIndex the index of bitmap entry
296 * @param bitmapEntry the bitmap entry
297 * @param level current level
298 * @return whether the key-value has been correctly inserted to the map or not.
299 */
300 bool TrieMap::putInternal(const uint32_t key,
301 const uint64_t value,
302 const uint32_t hashedKey,
303 const int bitmapEntryIndex,
304 const Entry& bitmapEntry,
305 const int level) {
306 const int label = getLabel(hashedKey, level);
307 const uint32_t bitmap = bitmapEntry.getBitmap();
308 const int mapIndex = bitmapEntry.getTableIndex();
309 if (!exists(bitmap, label)) {
310 // Current map doesn't contain the label.
311 return addNewEntryByExpandingTable(key, value, mapIndex, bitmap,
312 bitmapEntryIndex, label);
313 }
314 const int entryIndex = mapIndex + popCount(bitmap, label);
315 const Entry entry = readEntry(entryIndex);
316 if (entry.isBitmapEntry()) {
317 // Bitmap entry is found. Go to the next level.
318 return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
319 }
320 if (entry.getKey() == key) {
321 // Terminal entry for the key is found. Update the value.
322 return updateValue(entry, value, entryIndex);
323 }
324 // Conflict with the existing key.
325 return addNewEntryByResolvingConflict(key, value, hashedKey, entry,
326 entryIndex, level);
327 }
328
329 /**
330 * Resolve a conflict in the current level and add new entry.
331 *
332 * @param key the key
333 * @param value the value
334 * @param hashedKey the hashed key
335 * @param conflictedEntry the existing conflicted entry
336 * @param conflictedEntryIndex the index of existing conflicted entry
337 * @param level current level
338 * @return whether the key-value has been correctly inserted to the map or not.
339 */
340 bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key,
341 const uint64_t value,
342 const uint32_t hashedKey,
343 const Entry& conflictedEntry,
344 const int conflictedEntryIndex,
345 const int level) {
346 const int conflictedKeyNextLabel =
347 getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
348 const int nextLabel = getLabel(hashedKey, level + 1);
349 if (conflictedKeyNextLabel == nextLabel) {
350 // Conflicted again in the next level.
351 const int newTableIndex = allocateTable(1 /* entryCount */);
352 if (newTableIndex == INVALID_INDEX) {
353 return false;
354 }
355 if (!writeEntry(conflictedEntry, newTableIndex)) {
356 return false;
357 }
358 const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel),
359 newTableIndex);
360 if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
361 return false;
362 }
363 return putInternal(key, value, hashedKey, conflictedEntryIndex,
364 newBitmapEntry, level + 1);
365 }
366 // The conflict has been resolved. Create a table that contains 2 entries.
367 const int newTableIndex = allocateTable(2 /* entryCount */);
368 if (newTableIndex == INVALID_INDEX) {
369 return false;
370 }
371 if (nextLabel < conflictedKeyNextLabel) {
372 if (!writeTerminalEntry(key, value, newTableIndex)) {
373 return false;
374 }
375 if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
376 return false;
377 }
378 } else { // nextLabel > conflictedKeyNextLabel
379 if (!writeEntry(conflictedEntry, newTableIndex)) {
380 return false;
381 }
382 if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
383 return false;
384 }
385 }
386 const uint32_t updatedBitmap =
387 setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel);
388 return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex);
389 }
390
391 /**
392 * Add new entry to the existing table.
393 */
394 bool TrieMap::addNewEntryByExpandingTable(const uint32_t key,
395 const uint64_t value,
396 const int tableIndex,
397 const uint32_t bitmap,
398 const int bitmapEntryIndex,
399 const int label) {
400 // Current map doesn't contain the label.
401 const int entryCount = popCount(bitmap);
402 const int newTableIndex = allocateTable(entryCount + 1);
403 if (newTableIndex == INVALID_INDEX) {
404 return false;
405 }
406 const int newEntryIndexInTable = popCount(bitmap, label);
407 // Copy from existing table to the new table.
408 for (int i = 0; i < entryCount; ++i) {
409 if (!copyEntry(tableIndex + i,
410 newTableIndex + i + (i >= newEntryIndexInTable ? 1 : 0))) {
411 return false;
412 }
413 }
414 // Write new terminal entry.
415 if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
416 return false;
417 }
418 // Update bitmap.
419 if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex),
420 bitmapEntryIndex)) {
421 return false;
422 }
423 if (entryCount > 0) {
424 return freeTable(tableIndex, entryCount);
425 }
426 return true;
427 }
428
429 } // namespace latinime
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698