| OLD | NEW |
| (Empty) | |
| 1 /* |
| 2 * Copyright (C) 2014, The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #ifndef LATINIME_TRIE_MAP_H |
| 18 #define LATINIME_TRIE_MAP_H |
| 19 |
| 20 #include <climits> |
| 21 #include <cstdint> |
| 22 #include <cstdio> |
| 23 #include <vector> |
| 24 |
| 25 #include "third_party/prediction/defines.h" |
| 26 #include "third_party/prediction/suggest/policyimpl/dictionary/utils/buffer_with
_extendable_buffer.h" |
| 27 #include "third_party/prediction/utils/byte_array_view.h" |
| 28 |
| 29 namespace latinime { |
| 30 |
| 31 /** |
| 32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. |
| 33 * key is int and value is uint64_t. |
| 34 * This supports multiple level map. Terminal entries can have a bitmap for the |
| 35 * next level map. |
| 36 * This doesn't support root map resizing. |
| 37 */ |
| 38 class TrieMap { |
| 39 public: |
| 40 struct Result { |
| 41 const uint64_t mValue; |
| 42 const bool mIsValid; |
| 43 const int mNextLevelBitmapEntryIndex; |
| 44 |
| 45 Result(const uint64_t value, |
| 46 const bool isValid, |
| 47 const int nextLevelBitmapEntryIndex) |
| 48 : mValue(value), |
| 49 mIsValid(isValid), |
| 50 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} |
| 51 }; |
| 52 |
| 53 /** |
| 54 * Struct to record iteration state in a table. |
| 55 */ |
| 56 struct TableIterationState { |
| 57 int mTableSize; |
| 58 int mTableIndex; |
| 59 int mCurrentIndex; |
| 60 |
| 61 TableIterationState(const int tableSize, const int tableIndex) |
| 62 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} |
| 63 }; |
| 64 |
| 65 class TrieMapRange; |
| 66 class TrieMapIterator { |
| 67 public: |
| 68 class IterationResult { |
| 69 public: |
| 70 IterationResult(const TrieMap* const trieMap, |
| 71 const int key, |
| 72 const uint64_t value, |
| 73 const int nextLeveBitmapEntryIndex) |
| 74 : mTrieMap(trieMap), |
| 75 mKey(key), |
| 76 mValue(value), |
| 77 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} |
| 78 |
| 79 const TrieMapRange getEntriesInNextLevel() const { |
| 80 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); |
| 81 } |
| 82 |
| 83 bool hasNextLevelMap() const { |
| 84 return mNextLevelBitmapEntryIndex != INVALID_INDEX; |
| 85 } |
| 86 |
| 87 AK_FORCE_INLINE int key() const { return mKey; } |
| 88 |
| 89 AK_FORCE_INLINE uint64_t value() const { return mValue; } |
| 90 |
| 91 private: |
| 92 const TrieMap* const mTrieMap; |
| 93 const int mKey; |
| 94 const uint64_t mValue; |
| 95 const int mNextLevelBitmapEntryIndex; |
| 96 }; |
| 97 |
| 98 TrieMapIterator(const TrieMap* const trieMap, const int bitmapEntryIndex) |
| 99 : mTrieMap(trieMap), |
| 100 mStateStack(), |
| 101 mBaseBitmapEntryIndex(bitmapEntryIndex), |
| 102 mKey(0), |
| 103 mValue(0), |
| 104 mIsValid(false), |
| 105 mNextLevelBitmapEntryIndex(INVALID_INDEX) { |
| 106 if (!trieMap) { |
| 107 return; |
| 108 } |
| 109 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); |
| 110 mStateStack.emplace_back(mTrieMap->popCount(bitmapEntry.getBitmap()), |
| 111 bitmapEntry.getTableIndex()); |
| 112 this->operator++(); |
| 113 } |
| 114 |
| 115 const IterationResult operator*() const { |
| 116 return IterationResult(mTrieMap, mKey, mValue, |
| 117 mNextLevelBitmapEntryIndex); |
| 118 } |
| 119 |
| 120 bool operator!=(const TrieMapIterator& other) const { |
| 121 // Caveat: This works only for for loops. |
| 122 return mIsValid || other.mIsValid; |
| 123 } |
| 124 |
| 125 const TrieMapIterator& operator++() { |
| 126 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); |
| 127 mValue = result.mValue; |
| 128 mIsValid = result.mIsValid; |
| 129 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; |
| 130 return *this; |
| 131 } |
| 132 |
| 133 private: |
| 134 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); |
| 135 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); |
| 136 |
| 137 const TrieMap* const mTrieMap; |
| 138 std::vector<TrieMap::TableIterationState> mStateStack; |
| 139 const int mBaseBitmapEntryIndex; |
| 140 int mKey; |
| 141 uint64_t mValue; |
| 142 bool mIsValid; |
| 143 int mNextLevelBitmapEntryIndex; |
| 144 }; |
| 145 |
| 146 /** |
| 147 * Class to support iterating entries in TrieMap by range base for loops. |
| 148 */ |
| 149 class TrieMapRange { |
| 150 public: |
| 151 TrieMapRange(const TrieMap* const trieMap, const int bitmapEntryIndex) |
| 152 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex){}; |
| 153 |
| 154 TrieMapIterator begin() const { |
| 155 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); |
| 156 } |
| 157 |
| 158 const TrieMapIterator end() const { |
| 159 return TrieMapIterator(nullptr, INVALID_INDEX); |
| 160 } |
| 161 |
| 162 private: |
| 163 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); |
| 164 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); |
| 165 |
| 166 const TrieMap* const mTrieMap; |
| 167 const int mBaseBitmapEntryIndex; |
| 168 }; |
| 169 |
| 170 static const int INVALID_INDEX; |
| 171 static const uint64_t MAX_VALUE; |
| 172 |
| 173 TrieMap(); |
| 174 // Construct TrieMap using existing data in the memory region written by |
| 175 // save(). |
| 176 TrieMap(const ReadWriteByteArrayView buffer); |
| 177 void dump(const int from = 0, const int to = 0) const; |
| 178 |
| 179 bool isNearSizeLimit() const { return mBuffer.isNearSizeLimit(); } |
| 180 |
| 181 int getRootBitmapEntryIndex() const { return ROOT_BITMAP_ENTRY_INDEX; } |
| 182 |
| 183 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. |
| 184 int getNextLevelBitmapEntryIndex(const int key) { |
| 185 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); |
| 186 } |
| 187 |
| 188 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); |
| 189 |
| 190 const Result getRoot(const int key) const { |
| 191 return get(key, ROOT_BITMAP_ENTRY_INDEX); |
| 192 } |
| 193 |
| 194 const Result get(const int key, const int bitmapEntryIndex) const; |
| 195 |
| 196 bool putRoot(const int key, const uint64_t value) { |
| 197 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); |
| 198 } |
| 199 |
| 200 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); |
| 201 |
| 202 const TrieMapRange getEntriesInRootLevel() const { |
| 203 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); |
| 204 } |
| 205 |
| 206 const TrieMapRange getEntriesInSpecifiedLevel( |
| 207 const int bitmapEntryIndex) const { |
| 208 return TrieMapRange(this, bitmapEntryIndex); |
| 209 } |
| 210 |
| 211 bool save(FILE* const file) const; |
| 212 |
| 213 private: |
| 214 DISALLOW_COPY_AND_ASSIGN(TrieMap); |
| 215 |
| 216 /** |
| 217 * Struct represents an entry. |
| 218 * |
| 219 * Entry is one of these entry types. All entries are fixed size and have 2 |
| 220 *fields FIELD_0 and |
| 221 * FIELD_1. |
| 222 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. |
| 223 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) |
| 224 * 2. terminal entry. terminal entry contains hashed key and value or terminal |
| 225 *link. terminal |
| 226 * entry have terminal link when the value is not fit to FIELD_1 or there is a |
| 227 *next level map |
| 228 * for the key. |
| 229 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | |
| 230 *FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) |
| 231 * 3. value entry. value entry represents a value. Upper order bytes are |
| 232 *stored in FIELD_0 and |
| 233 * lower order bytes are stored in FIELD_1. |
| 234 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) |
| 235 */ |
| 236 struct Entry { |
| 237 const uint32_t mData0; |
| 238 const uint32_t mData1; |
| 239 |
| 240 Entry(const uint32_t data0, const uint32_t data1) |
| 241 : mData0(data0), mData1(data1) {} |
| 242 |
| 243 AK_FORCE_INLINE bool isBitmapEntry() const { |
| 244 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; |
| 245 } |
| 246 |
| 247 AK_FORCE_INLINE bool hasTerminalLink() const { |
| 248 return (mData1 & TERMINAL_LINK_FLAG) != 0; |
| 249 } |
| 250 |
| 251 // For terminal entry. |
| 252 AK_FORCE_INLINE uint32_t getKey() const { return mData0; } |
| 253 |
| 254 // For terminal entry. |
| 255 AK_FORCE_INLINE uint32_t getValue() const { return mData1 & VALUE_MASK; } |
| 256 |
| 257 // For terminal entry. |
| 258 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { |
| 259 return mData1 & TERMINAL_LINK_MASK; |
| 260 } |
| 261 |
| 262 // For bitmap entry. |
| 263 AK_FORCE_INLINE uint32_t getBitmap() const { return mData0; } |
| 264 |
| 265 // For bitmap entry. |
| 266 AK_FORCE_INLINE int getTableIndex() const { |
| 267 return static_cast<int>(mData1); |
| 268 } |
| 269 |
| 270 // For value entry. |
| 271 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { |
| 272 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ |
| 273 mData1); |
| 274 } |
| 275 }; |
| 276 |
| 277 BufferWithExtendableBuffer mBuffer; |
| 278 |
| 279 static const int FIELD0_SIZE; |
| 280 static const int FIELD1_SIZE; |
| 281 static const int ENTRY_SIZE; |
| 282 static const uint32_t VALUE_FLAG; |
| 283 static const uint32_t VALUE_MASK; |
| 284 static const uint32_t TERMINAL_LINK_FLAG; |
| 285 static const uint32_t TERMINAL_LINK_MASK; |
| 286 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; |
| 287 static const uint32_t LABEL_MASK; |
| 288 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; |
| 289 static const int ROOT_BITMAP_ENTRY_INDEX; |
| 290 static const int ROOT_BITMAP_ENTRY_POS; |
| 291 static const Entry EMPTY_BITMAP_ENTRY; |
| 292 static const int MAX_BUFFER_SIZE; |
| 293 |
| 294 uint32_t getBitShuffledKey(const uint32_t key) const; |
| 295 bool writeValue(const uint64_t value, const int terminalEntryIndex); |
| 296 bool updateValue(const Entry& terminalEntry, |
| 297 const uint64_t value, |
| 298 const int terminalEntryIndex); |
| 299 bool freeTable(const int tableIndex, const int entryCount); |
| 300 int allocateTable(const int entryCount); |
| 301 int getTerminalEntryIndex(const uint32_t key, |
| 302 const uint32_t hashedKey, |
| 303 const Entry& bitmapEntry, |
| 304 const int level) const; |
| 305 const Result getInternal(const uint32_t key, |
| 306 const uint32_t hashedKey, |
| 307 const int bitmapEntryIndex, |
| 308 const int level) const; |
| 309 bool putInternal(const uint32_t key, |
| 310 const uint64_t value, |
| 311 const uint32_t hashedKey, |
| 312 const int bitmapEntryIndex, |
| 313 const Entry& bitmapEntry, |
| 314 const int level); |
| 315 bool addNewEntryByResolvingConflict(const uint32_t key, |
| 316 const uint64_t value, |
| 317 const uint32_t hashedKey, |
| 318 const Entry& conflictedEntry, |
| 319 const int conflictedEntryIndex, |
| 320 const int level); |
| 321 bool addNewEntryByExpandingTable(const uint32_t key, |
| 322 const uint64_t value, |
| 323 const int tableIndex, |
| 324 const uint32_t bitmap, |
| 325 const int bitmapEntryIndex, |
| 326 const int label); |
| 327 const Result iterateNext( |
| 328 std::vector<TableIterationState>* const iterationState, |
| 329 int* const outKey) const; |
| 330 |
| 331 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { |
| 332 return Entry(readField0(entryIndex), readField1(entryIndex)); |
| 333 } |
| 334 |
| 335 // Returns whether an entry for the index is existing by testing if the |
| 336 // index-th bit in the |
| 337 // bitmap is set or not. |
| 338 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { |
| 339 return (bitmap & (1 << index)) != 0; |
| 340 } |
| 341 |
| 342 // Set index-th bit in the bitmap. |
| 343 AK_FORCE_INLINE uint32_t |
| 344 setExist(const uint32_t bitmap, const int index) const { |
| 345 return bitmap | (1 << index); |
| 346 } |
| 347 |
| 348 // Count set bits before index in the bitmap. |
| 349 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { |
| 350 return popCount(bitmap & ((1 << index) - 1)); |
| 351 } |
| 352 |
| 353 // Count set bits in the bitmap. |
| 354 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { |
| 355 return __builtin_popcount(bitmap); |
| 356 // int v = bitmap - ((bitmap >> 1) & 0x55555555); |
| 357 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
| 358 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; |
| 359 } |
| 360 |
| 361 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, |
| 362 const int level) const { |
| 363 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; |
| 364 } |
| 365 |
| 366 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { |
| 367 return mBuffer.readUint(FIELD0_SIZE, |
| 368 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); |
| 369 } |
| 370 |
| 371 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { |
| 372 return mBuffer.readUint( |
| 373 FIELD1_SIZE, |
| 374 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
| 375 } |
| 376 |
| 377 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { |
| 378 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); |
| 379 } |
| 380 |
| 381 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, |
| 382 const int entryCount) { |
| 383 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, |
| 384 (entryCount - 1) * FIELD1_SIZE); |
| 385 } |
| 386 |
| 387 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { |
| 388 return mBuffer.writeUint(data, FIELD0_SIZE, |
| 389 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); |
| 390 } |
| 391 |
| 392 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { |
| 393 return mBuffer.writeUint( |
| 394 data, FIELD1_SIZE, |
| 395 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
| 396 } |
| 397 |
| 398 AK_FORCE_INLINE bool writeEntry(const Entry& entry, const int entryIndex) { |
| 399 return writeField0(entry.mData0, entryIndex) && |
| 400 writeField1(entry.mData1, entryIndex); |
| 401 } |
| 402 |
| 403 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, |
| 404 const uint64_t value, |
| 405 const int entryIndex) { |
| 406 return writeField0(key, entryIndex) && writeValue(value, entryIndex); |
| 407 } |
| 408 |
| 409 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, |
| 410 const int newEntryIndex) { |
| 411 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); |
| 412 } |
| 413 |
| 414 AK_FORCE_INLINE int getTailEntryIndex() const { |
| 415 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; |
| 416 } |
| 417 }; |
| 418 |
| 419 } // namespace latinime |
| 420 #endif /* LATINIME_TRIE_MAP_H */ |
| OLD | NEW |