| OLD | NEW |
| (Empty) | |
| 1 /* |
| 2 * Copyright (C) 2014, The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #ifndef LATINIME_TRIE_MAP_H |
| 18 #define LATINIME_TRIE_MAP_H |
| 19 |
| 20 #include <climits> |
| 21 #include <cstdint> |
| 22 #include <cstdio> |
| 23 #include <vector> |
| 24 |
| 25 #include "third_party/android_prediction/defines.h" |
| 26 #include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buf
fer_with_extendable_buffer.h" |
| 27 #include "third_party/android_prediction/utils/byte_array_view.h" |
| 28 |
| 29 namespace latinime { |
| 30 |
| 31 /** |
| 32 * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. |
| 33 * key is int and value is uint64_t. |
| 34 * This supports multiple level map. Terminal entries can have a bitmap for the
next level map. |
| 35 * This doesn't support root map resizing. |
| 36 */ |
| 37 class TrieMap { |
| 38 public: |
| 39 struct Result { |
| 40 const uint64_t mValue; |
| 41 const bool mIsValid; |
| 42 const int mNextLevelBitmapEntryIndex; |
| 43 |
| 44 Result(const uint64_t value, const bool isValid, const int nextLevelBitm
apEntryIndex) |
| 45 : mValue(value), mIsValid(isValid), |
| 46 mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} |
| 47 }; |
| 48 |
| 49 /** |
| 50 * Struct to record iteration state in a table. |
| 51 */ |
| 52 struct TableIterationState { |
| 53 int mTableSize; |
| 54 int mTableIndex; |
| 55 int mCurrentIndex; |
| 56 |
| 57 TableIterationState(const int tableSize, const int tableIndex) |
| 58 : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(
0) {} |
| 59 }; |
| 60 |
| 61 class TrieMapRange; |
| 62 class TrieMapIterator { |
| 63 public: |
| 64 class IterationResult { |
| 65 public: |
| 66 IterationResult(const TrieMap *const trieMap, const int key, const u
int64_t value, |
| 67 const int nextLeveBitmapEntryIndex) |
| 68 : mTrieMap(trieMap), mKey(key), mValue(value), |
| 69 mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} |
| 70 |
| 71 const TrieMapRange getEntriesInNextLevel() const { |
| 72 return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); |
| 73 } |
| 74 |
| 75 bool hasNextLevelMap() const { |
| 76 return mNextLevelBitmapEntryIndex != INVALID_INDEX; |
| 77 } |
| 78 |
| 79 AK_FORCE_INLINE int key() const { |
| 80 return mKey; |
| 81 } |
| 82 |
| 83 AK_FORCE_INLINE uint64_t value() const { |
| 84 return mValue; |
| 85 } |
| 86 |
| 87 private: |
| 88 const TrieMap *const mTrieMap; |
| 89 const int mKey; |
| 90 const uint64_t mValue; |
| 91 const int mNextLevelBitmapEntryIndex; |
| 92 }; |
| 93 |
| 94 TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex
) |
| 95 : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmap
EntryIndex), |
| 96 mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryInde
x(INVALID_INDEX) { |
| 97 if (!trieMap) { |
| 98 return; |
| 99 } |
| 100 const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex)
; |
| 101 mStateStack.emplace_back( |
| 102 mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.get
TableIndex()); |
| 103 this->operator++(); |
| 104 } |
| 105 |
| 106 const IterationResult operator*() const { |
| 107 return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntry
Index); |
| 108 } |
| 109 |
| 110 bool operator!=(const TrieMapIterator &other) const { |
| 111 // Caveat: This works only for for loops. |
| 112 return mIsValid || other.mIsValid; |
| 113 } |
| 114 |
| 115 const TrieMapIterator &operator++() { |
| 116 const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); |
| 117 mValue = result.mValue; |
| 118 mIsValid = result.mIsValid; |
| 119 mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; |
| 120 return *this; |
| 121 } |
| 122 |
| 123 private: |
| 124 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); |
| 125 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); |
| 126 |
| 127 const TrieMap *const mTrieMap; |
| 128 std::vector<TrieMap::TableIterationState> mStateStack; |
| 129 const int mBaseBitmapEntryIndex; |
| 130 int mKey; |
| 131 uint64_t mValue; |
| 132 bool mIsValid; |
| 133 int mNextLevelBitmapEntryIndex; |
| 134 }; |
| 135 |
| 136 /** |
| 137 * Class to support iterating entries in TrieMap by range base for loops. |
| 138 */ |
| 139 class TrieMapRange { |
| 140 public: |
| 141 TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) |
| 142 : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; |
| 143 |
| 144 TrieMapIterator begin() const { |
| 145 return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); |
| 146 } |
| 147 |
| 148 const TrieMapIterator end() const { |
| 149 return TrieMapIterator(nullptr, INVALID_INDEX); |
| 150 } |
| 151 |
| 152 private: |
| 153 DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); |
| 154 DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); |
| 155 |
| 156 const TrieMap *const mTrieMap; |
| 157 const int mBaseBitmapEntryIndex; |
| 158 }; |
| 159 |
| 160 static const int INVALID_INDEX; |
| 161 static const uint64_t MAX_VALUE; |
| 162 |
| 163 TrieMap(); |
| 164 // Construct TrieMap using existing data in the memory region written by sav
e(). |
| 165 TrieMap(const ReadWriteByteArrayView buffer); |
| 166 void dump(const int from = 0, const int to = 0) const; |
| 167 |
| 168 bool isNearSizeLimit() const { |
| 169 return mBuffer.isNearSizeLimit(); |
| 170 } |
| 171 |
| 172 int getRootBitmapEntryIndex() const { |
| 173 return ROOT_BITMAP_ENTRY_INDEX; |
| 174 } |
| 175 |
| 176 // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. |
| 177 int getNextLevelBitmapEntryIndex(const int key) { |
| 178 return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); |
| 179 } |
| 180 |
| 181 int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); |
| 182 |
| 183 const Result getRoot(const int key) const { |
| 184 return get(key, ROOT_BITMAP_ENTRY_INDEX); |
| 185 } |
| 186 |
| 187 const Result get(const int key, const int bitmapEntryIndex) const; |
| 188 |
| 189 bool putRoot(const int key, const uint64_t value) { |
| 190 return put(key, value, ROOT_BITMAP_ENTRY_INDEX); |
| 191 } |
| 192 |
| 193 bool put(const int key, const uint64_t value, const int bitmapEntryIndex); |
| 194 |
| 195 const TrieMapRange getEntriesInRootLevel() const { |
| 196 return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); |
| 197 } |
| 198 |
| 199 const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) co
nst { |
| 200 return TrieMapRange(this, bitmapEntryIndex); |
| 201 } |
| 202 |
| 203 bool save(FILE *const file) const; |
| 204 |
| 205 private: |
| 206 DISALLOW_COPY_AND_ASSIGN(TrieMap); |
| 207 |
| 208 /** |
| 209 * Struct represents an entry. |
| 210 * |
| 211 * Entry is one of these entry types. All entries are fixed size and have 2
fields FIELD_0 and |
| 212 * FIELD_1. |
| 213 * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. |
| 214 * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) |
| 215 * 2. terminal entry. terminal entry contains hashed key and value or termin
al link. terminal |
| 216 * entry have terminal link when the value is not fit to FIELD_1 or there is
a next level map |
| 217 * for the key. |
| 218 * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_
FLAG TERMINAL_LINK)) |
| 219 * 3. value entry. value entry represents a value. Upper order bytes are sto
red in FIELD_0 and |
| 220 * lower order bytes are stored in FIELD_1. |
| 221 * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) |
| 222 */ |
| 223 struct Entry { |
| 224 const uint32_t mData0; |
| 225 const uint32_t mData1; |
| 226 |
| 227 Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData
1(data1) {} |
| 228 |
| 229 AK_FORCE_INLINE bool isBitmapEntry() const { |
| 230 return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) =
= 0; |
| 231 } |
| 232 |
| 233 AK_FORCE_INLINE bool hasTerminalLink() const { |
| 234 return (mData1 & TERMINAL_LINK_FLAG) != 0; |
| 235 } |
| 236 |
| 237 // For terminal entry. |
| 238 AK_FORCE_INLINE uint32_t getKey() const { |
| 239 return mData0; |
| 240 } |
| 241 |
| 242 // For terminal entry. |
| 243 AK_FORCE_INLINE uint32_t getValue() const { |
| 244 return mData1 & VALUE_MASK; |
| 245 } |
| 246 |
| 247 // For terminal entry. |
| 248 AK_FORCE_INLINE uint32_t getValueEntryIndex() const { |
| 249 return mData1 & TERMINAL_LINK_MASK; |
| 250 } |
| 251 |
| 252 // For bitmap entry. |
| 253 AK_FORCE_INLINE uint32_t getBitmap() const { |
| 254 return mData0; |
| 255 } |
| 256 |
| 257 // For bitmap entry. |
| 258 AK_FORCE_INLINE int getTableIndex() const { |
| 259 return static_cast<int>(mData1); |
| 260 } |
| 261 |
| 262 // For value entry. |
| 263 AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { |
| 264 return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT))
^ mData1); |
| 265 } |
| 266 }; |
| 267 |
| 268 BufferWithExtendableBuffer mBuffer; |
| 269 |
| 270 static const int FIELD0_SIZE; |
| 271 static const int FIELD1_SIZE; |
| 272 static const int ENTRY_SIZE; |
| 273 static const uint32_t VALUE_FLAG; |
| 274 static const uint32_t VALUE_MASK; |
| 275 static const uint32_t TERMINAL_LINK_FLAG; |
| 276 static const uint32_t TERMINAL_LINK_MASK; |
| 277 static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; |
| 278 static const uint32_t LABEL_MASK; |
| 279 static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; |
| 280 static const int ROOT_BITMAP_ENTRY_INDEX; |
| 281 static const int ROOT_BITMAP_ENTRY_POS; |
| 282 static const Entry EMPTY_BITMAP_ENTRY; |
| 283 static const int MAX_BUFFER_SIZE; |
| 284 |
| 285 uint32_t getBitShuffledKey(const uint32_t key) const; |
| 286 bool writeValue(const uint64_t value, const int terminalEntryIndex); |
| 287 bool updateValue(const Entry &terminalEntry, const uint64_t value, |
| 288 const int terminalEntryIndex); |
| 289 bool freeTable(const int tableIndex, const int entryCount); |
| 290 int allocateTable(const int entryCount); |
| 291 int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, |
| 292 const Entry &bitmapEntry, const int level) const; |
| 293 const Result getInternal(const uint32_t key, const uint32_t hashedKey, |
| 294 const int bitmapEntryIndex, const int level) const; |
| 295 bool putInternal(const uint32_t key, const uint64_t value, const uint32_t ha
shedKey, |
| 296 const int bitmapEntryIndex, const Entry &bitmapEntry, const int leve
l); |
| 297 bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value
, |
| 298 const uint32_t hashedKey, const Entry &conflictedEntry, const int co
nflictedEntryIndex, |
| 299 const int level); |
| 300 bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, |
| 301 const int tableIndex, const uint32_t bitmap, const int bitmapEntryIn
dex, |
| 302 const int label); |
| 303 const Result iterateNext(std::vector<TableIterationState> *const iterationSt
ate, |
| 304 int *const outKey) const; |
| 305 |
| 306 AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { |
| 307 return Entry(readField0(entryIndex), readField1(entryIndex)); |
| 308 } |
| 309 |
| 310 // Returns whether an entry for the index is existing by testing if the inde
x-th bit in the |
| 311 // bitmap is set or not. |
| 312 AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { |
| 313 return (bitmap & (1 << index)) != 0; |
| 314 } |
| 315 |
| 316 // Set index-th bit in the bitmap. |
| 317 AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) co
nst { |
| 318 return bitmap | (1 << index); |
| 319 } |
| 320 |
| 321 // Count set bits before index in the bitmap. |
| 322 AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { |
| 323 return popCount(bitmap & ((1 << index) - 1)); |
| 324 } |
| 325 |
| 326 // Count set bits in the bitmap. |
| 327 AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { |
| 328 return __builtin_popcount(bitmap); |
| 329 // int v = bitmap - ((bitmap >> 1) & 0x55555555); |
| 330 // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
| 331 // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; |
| 332 } |
| 333 |
| 334 AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) cons
t { |
| 335 return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_M
ASK; |
| 336 } |
| 337 |
| 338 AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { |
| 339 return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex
* ENTRY_SIZE); |
| 340 } |
| 341 |
| 342 AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { |
| 343 return mBuffer.readUint(FIELD1_SIZE, |
| 344 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
| 345 } |
| 346 |
| 347 AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { |
| 348 return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); |
| 349 } |
| 350 |
| 351 AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int ent
ryCount) { |
| 352 return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIE
LD1_SIZE); |
| 353 } |
| 354 |
| 355 AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex)
{ |
| 356 return mBuffer.writeUint(data, FIELD0_SIZE, |
| 357 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); |
| 358 } |
| 359 |
| 360 AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex)
{ |
| 361 return mBuffer.writeUint(data, FIELD1_SIZE, |
| 362 ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
| 363 } |
| 364 |
| 365 AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { |
| 366 return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1
, entryIndex); |
| 367 } |
| 368 |
| 369 AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t v
alue, |
| 370 const int entryIndex) { |
| 371 return writeField0(key, entryIndex) && writeValue(value, entryIndex); |
| 372 } |
| 373 |
| 374 AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEn
tryIndex) { |
| 375 return writeEntry(readEntry(originalEntryIndex), newEntryIndex); |
| 376 } |
| 377 |
| 378 AK_FORCE_INLINE int getTailEntryIndex() const { |
| 379 return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; |
| 380 } |
| 381 }; |
| 382 |
| 383 } // namespace latinime |
| 384 #endif /* LATINIME_TRIE_MAP_H */ |
| OLD | NEW |