Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/tri e_map.h"
18
19 #include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/dic t_file_writing_utils.h"
20
21 namespace latinime {
22
23 const int TrieMap::INVALID_INDEX = -1;
24 const int TrieMap::FIELD0_SIZE = 4;
25 const int TrieMap::FIELD1_SIZE = 3;
26 const int TrieMap::ENTRY_SIZE = FIELD0_SIZE + FIELD1_SIZE;
27 const uint32_t TrieMap::VALUE_FLAG = 0x400000;
28 const uint32_t TrieMap::VALUE_MASK = 0x3FFFFF;
29 const uint32_t TrieMap::TERMINAL_LINK_FLAG = 0x800000;
30 const uint32_t TrieMap::TERMINAL_LINK_MASK = 0x7FFFFF;
31 const int TrieMap::NUM_OF_BITS_USED_FOR_ONE_LEVEL = 5;
32 const uint32_t TrieMap::LABEL_MASK = 0x1F;
33 const int TrieMap::MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL = 1 << NUM_OF_BITS_USED_FOR_O NE_LEVEL;
34 const int TrieMap::ROOT_BITMAP_ENTRY_INDEX = 0;
35 const int TrieMap::ROOT_BITMAP_ENTRY_POS = MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL * FIE LD0_SIZE;
36 const TrieMap::Entry TrieMap::EMPTY_BITMAP_ENTRY = TrieMap::Entry(0, 0);
37 const uint64_t TrieMap::MAX_VALUE =
38 (static_cast<uint64_t>(1) << ((FIELD0_SIZE + FIELD1_SIZE) * CHAR_BIT)) - 1;
39 const int TrieMap::MAX_BUFFER_SIZE = TERMINAL_LINK_MASK * ENTRY_SIZE;
40
41 TrieMap::TrieMap() : mBuffer(MAX_BUFFER_SIZE) {
42 mBuffer.extend(ROOT_BITMAP_ENTRY_POS);
43 writeEntry(EMPTY_BITMAP_ENTRY, ROOT_BITMAP_ENTRY_INDEX);
44 }
45
46 TrieMap::TrieMap(const ReadWriteByteArrayView buffer)
47 : mBuffer(buffer, BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUF FER_SIZE) {}
48
49 void TrieMap::dump(const int from, const int to) const {
50 AKLOGI("BufSize: %d", mBuffer.getTailPosition());
51 for (int i = from; i < to; ++i) {
52 AKLOGI("Entry[%d]: %x, %x", i, readField0(i), readField1(i));
53 }
54 int unusedRegionSize = 0;
55 for (int i = 1; i <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; ++i) {
56 int index = readEmptyTableLink(i);
57 while (index != ROOT_BITMAP_ENTRY_INDEX) {
58 index = readField0(index);
59 unusedRegionSize += i;
60 }
61 }
62 AKLOGI("Unused Size: %d", unusedRegionSize);
63 }
64
65 int TrieMap::getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIn dex) {
66 const Entry bitmapEntry = readEntry(bitmapEntryIndex);
67 const uint32_t unsignedKey = static_cast<uint32_t>(key);
68 const int terminalEntryIndex = getTerminalEntryIndex(
69 unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntry, 0 /* level */);
70 if (terminalEntryIndex == INVALID_INDEX) {
71 // Not found.
72 return INVALID_INDEX;
73 }
74 const Entry terminalEntry = readEntry(terminalEntryIndex);
75 if (terminalEntry.hasTerminalLink()) {
76 return terminalEntry.getValueEntryIndex() + 1;
77 }
78 // Create a value entry and a bitmap entry.
79 const int valueEntryIndex = allocateTable(2 /* entryCount */);
80 if (!writeEntry(Entry(0, terminalEntry.getValue()), valueEntryIndex)) {
81 return INVALID_INDEX;
82 }
83 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
84 return INVALID_INDEX;
85 }
86 if (!writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, valueEntryIndex)) {
87 return INVALID_INDEX;
88 }
89 return valueEntryIndex + 1;
90 }
91
92 const TrieMap::Result TrieMap::get(const int key, const int bitmapEntryIndex) co nst {
93 const uint32_t unsignedKey = static_cast<uint32_t>(key);
94 return getInternal(unsignedKey, getBitShuffledKey(unsignedKey), bitmapEntryI ndex,
95 0 /* level */);
96 }
97
98 bool TrieMap::put(const int key, const uint64_t value, const int bitmapEntryInde x) {
99 if (value > MAX_VALUE) {
100 return false;
101 }
102 const uint32_t unsignedKey = static_cast<uint32_t>(key);
103 return putInternal(unsignedKey, value, getBitShuffledKey(unsignedKey), bitma pEntryIndex,
104 readEntry(bitmapEntryIndex), 0 /* level */);
105 }
106
107 bool TrieMap::save(FILE *const file) const {
108 return DictFileWritingUtils::writeBufferToFileTail(file, &mBuffer);
109 }
110
111 /**
112 * Iterate next entry in a certain level.
113 *
114 * @param iterationState the iteration state that will be read and updated in th is method.
115 * @param outKey the output key
116 * @return Result instance. mIsValid is false when all entries are iterated.
117 */
118 const TrieMap::Result TrieMap::iterateNext(std::vector<TableIterationState> *con st iterationState,
119 int *const outKey) const {
120 while (!iterationState->empty()) {
121 TableIterationState &state = iterationState->back();
122 if (state.mTableSize <= state.mCurrentIndex) {
123 // Move to parent.
124 iterationState->pop_back();
125 } else {
126 const int entryIndex = state.mTableIndex + state.mCurrentIndex;
127 state.mCurrentIndex += 1;
128 const Entry entry = readEntry(entryIndex);
129 if (entry.isBitmapEntry()) {
130 // Move to child.
131 iterationState->emplace_back(popCount(entry.getBitmap()), entry. getTableIndex());
132 } else {
133 if (outKey) {
134 *outKey = entry.getKey();
135 }
136 if (!entry.hasTerminalLink()) {
137 return Result(entry.getValue(), true, INVALID_INDEX);
138 }
139 const int valueEntryIndex = entry.getValueEntryIndex();
140 const Entry valueEntry = readEntry(valueEntryIndex);
141 return Result(valueEntry.getValueOfValueEntry(), true, valueEntr yIndex + 1);
142 }
143 }
144 }
145 // Visited all entries.
146 return Result(0, false, INVALID_INDEX);
147 }
148
149 /**
150 * Shuffle bits of the key in the fixed order.
151 *
152 * This method is used as a hash function. This returns different values for dif ferent inputs.
153 */
154 uint32_t TrieMap::getBitShuffledKey(const uint32_t key) const {
155 uint32_t shuffledKey = 0;
156 for (int i = 0; i < 4; ++i) {
157 const uint32_t keyPiece = (key >> (i * 8)) & 0xFF;
158 shuffledKey ^= ((keyPiece ^ (keyPiece << 7) ^ (keyPiece << 14) ^ (keyPie ce << 21))
159 & 0x11111111) << i;
160 }
161 return shuffledKey;
162 }
163
164 bool TrieMap::writeValue(const uint64_t value, const int terminalEntryIndex) {
165 if (value <= VALUE_MASK) {
166 // Write value into the terminal entry.
167 return writeField1(value | VALUE_FLAG, terminalEntryIndex);
168 }
169 // Create value entry and write value.
170 const int valueEntryIndex = allocateTable(2 /* entryCount */);
171 if (!writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntryI ndex)) {
172 return false;
173 }
174 if (!writeEntry(EMPTY_BITMAP_ENTRY, valueEntryIndex + 1)) {
175 return false;
176 }
177 return writeField1(valueEntryIndex | TERMINAL_LINK_FLAG, terminalEntryIndex) ;
178 }
179
180 bool TrieMap::updateValue(const Entry &terminalEntry, const uint64_t value,
181 const int terminalEntryIndex) {
182 if (!terminalEntry.hasTerminalLink()) {
183 return writeValue(value, terminalEntryIndex);
184 }
185 const int valueEntryIndex = terminalEntry.getValueEntryIndex();
186 return writeEntry(Entry(value >> (FIELD1_SIZE * CHAR_BIT), value), valueEntr yIndex);
187 }
188
189 bool TrieMap::freeTable(const int tableIndex, const int entryCount) {
190 if (!writeField0(readEmptyTableLink(entryCount), tableIndex)) {
191 return false;
192 }
193 return writeEmptyTableLink(tableIndex, entryCount);
194 }
195
196 /**
197 * Allocate table with entryCount-entries. Reuse freed table if possible.
198 */
199 int TrieMap::allocateTable(const int entryCount) {
200 if (entryCount > 0 && entryCount <= MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL) {
201 const int tableIndex = readEmptyTableLink(entryCount);
202 if (tableIndex > 0) {
203 if (!writeEmptyTableLink(readField0(tableIndex), entryCount)) {
204 return INVALID_INDEX;
205 }
206 // Reuse the table.
207 return tableIndex;
208 }
209 }
210 // Allocate memory space at tail position of the buffer.
211 const int mapIndex = getTailEntryIndex();
212 if (!mBuffer.extend(entryCount * ENTRY_SIZE)) {
213 return INVALID_INDEX;
214 }
215 return mapIndex;
216 }
217
218 int TrieMap::getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey,
219 const Entry &bitmapEntry, const int level) const {
220 const int label = getLabel(hashedKey, level);
221 if (!exists(bitmapEntry.getBitmap(), label)) {
222 return INVALID_INDEX;
223 }
224 const int entryIndex = bitmapEntry.getTableIndex() + popCount(bitmapEntry.ge tBitmap(), label);
225 const Entry entry = readEntry(entryIndex);
226 if (entry.isBitmapEntry()) {
227 // Move to the next level.
228 return getTerminalEntryIndex(key, hashedKey, entry, level + 1);
229 }
230 if (entry.getKey() == key) {
231 // Terminal entry is found.
232 return entryIndex;
233 }
234 return INVALID_INDEX;
235 }
236
237 /**
238 * Get Result corresponding to the key.
239 *
240 * @param key the key.
241 * @param hashedKey the hashed key.
242 * @param bitmapEntryIndex the index of bitmap entry
243 * @param level current level
244 * @return Result instance corresponding to the key. mIsValid indicates whether the key is in the
245 * map.
246 */
247 const TrieMap::Result TrieMap::getInternal(const uint32_t key, const uint32_t ha shedKey,
248 const int bitmapEntryIndex, const int level) const {
249 const int terminalEntryIndex = getTerminalEntryIndex(key, hashedKey,
250 readEntry(bitmapEntryIndex), level);
251 if (terminalEntryIndex == INVALID_INDEX) {
252 // Not found.
253 return Result(0, false, INVALID_INDEX);
254 }
255 const Entry terminalEntry = readEntry(terminalEntryIndex);
256 if (!terminalEntry.hasTerminalLink()) {
257 return Result(terminalEntry.getValue(), true, INVALID_INDEX);
258 }
259 const int valueEntryIndex = terminalEntry.getValueEntryIndex();
260 const Entry valueEntry = readEntry(valueEntryIndex);
261 return Result(valueEntry.getValueOfValueEntry(), true, valueEntryIndex + 1);
262 }
263
264 /**
265 * Put key to value mapping to the map.
266 *
267 * @param key the key.
268 * @param value the value
269 * @param hashedKey the hashed key.
270 * @param bitmapEntryIndex the index of bitmap entry
271 * @param bitmapEntry the bitmap entry
272 * @param level current level
273 * @return whether the key-value has been correctly inserted to the map or not.
274 */
275 bool TrieMap::putInternal(const uint32_t key, const uint64_t value, const uint32 _t hashedKey,
276 const int bitmapEntryIndex, const Entry &bitmapEntry, const int level) {
277 const int label = getLabel(hashedKey, level);
278 const uint32_t bitmap = bitmapEntry.getBitmap();
279 const int mapIndex = bitmapEntry.getTableIndex();
280 if (!exists(bitmap, label)) {
281 // Current map doesn't contain the label.
282 return addNewEntryByExpandingTable(key, value, mapIndex, bitmap, bitmapE ntryIndex, label);
283 }
284 const int entryIndex = mapIndex + popCount(bitmap, label);
285 const Entry entry = readEntry(entryIndex);
286 if (entry.isBitmapEntry()) {
287 // Bitmap entry is found. Go to the next level.
288 return putInternal(key, value, hashedKey, entryIndex, entry, level + 1);
289 }
290 if (entry.getKey() == key) {
291 // Terminal entry for the key is found. Update the value.
292 return updateValue(entry, value, entryIndex);
293 }
294 // Conflict with the existing key.
295 return addNewEntryByResolvingConflict(key, value, hashedKey, entry, entryInd ex, level);
296 }
297
298 /**
299 * Resolve a conflict in the current level and add new entry.
300 *
301 * @param key the key
302 * @param value the value
303 * @param hashedKey the hashed key
304 * @param conflictedEntry the existing conflicted entry
305 * @param conflictedEntryIndex the index of existing conflicted entry
306 * @param level current level
307 * @return whether the key-value has been correctly inserted to the map or not.
308 */
309 bool TrieMap::addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value,
310 const uint32_t hashedKey, const Entry &conflictedEntry, const int confli ctedEntryIndex,
311 const int level) {
312 const int conflictedKeyNextLabel =
313 getLabel(getBitShuffledKey(conflictedEntry.getKey()), level + 1);
314 const int nextLabel = getLabel(hashedKey, level + 1);
315 if (conflictedKeyNextLabel == nextLabel) {
316 // Conflicted again in the next level.
317 const int newTableIndex = allocateTable(1 /* entryCount */);
318 if (newTableIndex == INVALID_INDEX) {
319 return false;
320 }
321 if (!writeEntry(conflictedEntry, newTableIndex)) {
322 return false;
323 }
324 const Entry newBitmapEntry(setExist(0 /* bitmap */, nextLabel), newTable Index);
325 if (!writeEntry(newBitmapEntry, conflictedEntryIndex)) {
326 return false;
327 }
328 return putInternal(key, value, hashedKey, conflictedEntryIndex, newBitma pEntry, level + 1);
329 }
330 // The conflict has been resolved. Create a table that contains 2 entries.
331 const int newTableIndex = allocateTable(2 /* entryCount */);
332 if (newTableIndex == INVALID_INDEX) {
333 return false;
334 }
335 if (nextLabel < conflictedKeyNextLabel) {
336 if (!writeTerminalEntry(key, value, newTableIndex)) {
337 return false;
338 }
339 if (!writeEntry(conflictedEntry, newTableIndex + 1)) {
340 return false;
341 }
342 } else { // nextLabel > conflictedKeyNextLabel
343 if (!writeEntry(conflictedEntry, newTableIndex)) {
344 return false;
345 }
346 if (!writeTerminalEntry(key, value, newTableIndex + 1)) {
347 return false;
348 }
349 }
350 const uint32_t updatedBitmap =
351 setExist(setExist(0 /* bitmap */, nextLabel), conflictedKeyNextLabel );
352 return writeEntry(Entry(updatedBitmap, newTableIndex), conflictedEntryIndex) ;
353 }
354
355 /**
356 * Add new entry to the existing table.
357 */
358 bool TrieMap::addNewEntryByExpandingTable(const uint32_t key, const uint64_t val ue,
359 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, const int label) {
360 // Current map doesn't contain the label.
361 const int entryCount = popCount(bitmap);
362 const int newTableIndex = allocateTable(entryCount + 1);
363 if (newTableIndex == INVALID_INDEX) {
364 return false;
365 }
366 const int newEntryIndexInTable = popCount(bitmap, label);
367 // Copy from existing table to the new table.
368 for (int i = 0; i < entryCount; ++i) {
369 if (!copyEntry(tableIndex + i, newTableIndex + i + (i >= newEntryIndexIn Table ? 1 : 0))) {
370 return false;
371 }
372 }
373 // Write new terminal entry.
374 if (!writeTerminalEntry(key, value, newTableIndex + newEntryIndexInTable)) {
375 return false;
376 }
377 // Update bitmap.
378 if (!writeEntry(Entry(setExist(bitmap, label), newTableIndex), bitmapEntryIn dex)) {
379 return false;
380 }
381 if (entryCount > 0) {
382 return freeTable(tableIndex, entryCount);
383 }
384 return true;
385 }
386
387 } // namespace latinime
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698