Index: third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h |
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..74876343113a1fcf261bc64595c1e3cfc9a3efaa |
--- /dev/null |
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/utils/trie_map.h |
@@ -0,0 +1,384 @@ |
+/* |
+ * Copyright (C) 2014, The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#ifndef LATINIME_TRIE_MAP_H |
+#define LATINIME_TRIE_MAP_H |
+ |
+#include <climits> |
+#include <cstdint> |
+#include <cstdio> |
+#include <vector> |
+ |
+#include "third_party/android_prediction/defines.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" |
+#include "third_party/android_prediction/utils/byte_array_view.h" |
+ |
+namespace latinime { |
+ |
+/** |
+ * Trie map derived from Phil Bagwell's Hash Array Mapped Trie. |
+ * key is int and value is uint64_t. |
+ * This supports multiple level map. Terminal entries can have a bitmap for the next level map. |
+ * This doesn't support root map resizing. |
+ */ |
+class TrieMap { |
+ public: |
+ struct Result { |
+ const uint64_t mValue; |
+ const bool mIsValid; |
+ const int mNextLevelBitmapEntryIndex; |
+ |
+ Result(const uint64_t value, const bool isValid, const int nextLevelBitmapEntryIndex) |
+ : mValue(value), mIsValid(isValid), |
+ mNextLevelBitmapEntryIndex(nextLevelBitmapEntryIndex) {} |
+ }; |
+ |
+ /** |
+ * Struct to record iteration state in a table. |
+ */ |
+ struct TableIterationState { |
+ int mTableSize; |
+ int mTableIndex; |
+ int mCurrentIndex; |
+ |
+ TableIterationState(const int tableSize, const int tableIndex) |
+ : mTableSize(tableSize), mTableIndex(tableIndex), mCurrentIndex(0) {} |
+ }; |
+ |
+ class TrieMapRange; |
+ class TrieMapIterator { |
+ public: |
+ class IterationResult { |
+ public: |
+ IterationResult(const TrieMap *const trieMap, const int key, const uint64_t value, |
+ const int nextLeveBitmapEntryIndex) |
+ : mTrieMap(trieMap), mKey(key), mValue(value), |
+ mNextLevelBitmapEntryIndex(nextLeveBitmapEntryIndex) {} |
+ |
+ const TrieMapRange getEntriesInNextLevel() const { |
+ return TrieMapRange(mTrieMap, mNextLevelBitmapEntryIndex); |
+ } |
+ |
+ bool hasNextLevelMap() const { |
+ return mNextLevelBitmapEntryIndex != INVALID_INDEX; |
+ } |
+ |
+ AK_FORCE_INLINE int key() const { |
+ return mKey; |
+ } |
+ |
+ AK_FORCE_INLINE uint64_t value() const { |
+ return mValue; |
+ } |
+ |
+ private: |
+ const TrieMap *const mTrieMap; |
+ const int mKey; |
+ const uint64_t mValue; |
+ const int mNextLevelBitmapEntryIndex; |
+ }; |
+ |
+ TrieMapIterator(const TrieMap *const trieMap, const int bitmapEntryIndex) |
+ : mTrieMap(trieMap), mStateStack(), mBaseBitmapEntryIndex(bitmapEntryIndex), |
+ mKey(0), mValue(0), mIsValid(false), mNextLevelBitmapEntryIndex(INVALID_INDEX) { |
+ if (!trieMap) { |
+ return; |
+ } |
+ const Entry bitmapEntry = mTrieMap->readEntry(mBaseBitmapEntryIndex); |
+ mStateStack.emplace_back( |
+ mTrieMap->popCount(bitmapEntry.getBitmap()), bitmapEntry.getTableIndex()); |
+ this->operator++(); |
+ } |
+ |
+ const IterationResult operator*() const { |
+ return IterationResult(mTrieMap, mKey, mValue, mNextLevelBitmapEntryIndex); |
+ } |
+ |
+ bool operator!=(const TrieMapIterator &other) const { |
+ // Caveat: This works only for for loops. |
+ return mIsValid || other.mIsValid; |
+ } |
+ |
+ const TrieMapIterator &operator++() { |
+ const Result result = mTrieMap->iterateNext(&mStateStack, &mKey); |
+ mValue = result.mValue; |
+ mIsValid = result.mIsValid; |
+ mNextLevelBitmapEntryIndex = result.mNextLevelBitmapEntryIndex; |
+ return *this; |
+ } |
+ |
+ private: |
+ DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapIterator); |
+ DISALLOW_ASSIGNMENT_OPERATOR(TrieMapIterator); |
+ |
+ const TrieMap *const mTrieMap; |
+ std::vector<TrieMap::TableIterationState> mStateStack; |
+ const int mBaseBitmapEntryIndex; |
+ int mKey; |
+ uint64_t mValue; |
+ bool mIsValid; |
+ int mNextLevelBitmapEntryIndex; |
+ }; |
+ |
+ /** |
+ * Class to support iterating entries in TrieMap by range base for loops. |
+ */ |
+ class TrieMapRange { |
+ public: |
+ TrieMapRange(const TrieMap *const trieMap, const int bitmapEntryIndex) |
+ : mTrieMap(trieMap), mBaseBitmapEntryIndex(bitmapEntryIndex) {}; |
+ |
+ TrieMapIterator begin() const { |
+ return TrieMapIterator(mTrieMap, mBaseBitmapEntryIndex); |
+ } |
+ |
+ const TrieMapIterator end() const { |
+ return TrieMapIterator(nullptr, INVALID_INDEX); |
+ } |
+ |
+ private: |
+ DISALLOW_DEFAULT_CONSTRUCTOR(TrieMapRange); |
+ DISALLOW_ASSIGNMENT_OPERATOR(TrieMapRange); |
+ |
+ const TrieMap *const mTrieMap; |
+ const int mBaseBitmapEntryIndex; |
+ }; |
+ |
+ static const int INVALID_INDEX; |
+ static const uint64_t MAX_VALUE; |
+ |
+ TrieMap(); |
+ // Construct TrieMap using existing data in the memory region written by save(). |
+ TrieMap(const ReadWriteByteArrayView buffer); |
+ void dump(const int from = 0, const int to = 0) const; |
+ |
+ bool isNearSizeLimit() const { |
+ return mBuffer.isNearSizeLimit(); |
+ } |
+ |
+ int getRootBitmapEntryIndex() const { |
+ return ROOT_BITMAP_ENTRY_INDEX; |
+ } |
+ |
+ // Returns bitmapEntryIndex. Create the next level map if it doesn't exist. |
+ int getNextLevelBitmapEntryIndex(const int key) { |
+ return getNextLevelBitmapEntryIndex(key, ROOT_BITMAP_ENTRY_INDEX); |
+ } |
+ |
+ int getNextLevelBitmapEntryIndex(const int key, const int bitmapEntryIndex); |
+ |
+ const Result getRoot(const int key) const { |
+ return get(key, ROOT_BITMAP_ENTRY_INDEX); |
+ } |
+ |
+ const Result get(const int key, const int bitmapEntryIndex) const; |
+ |
+ bool putRoot(const int key, const uint64_t value) { |
+ return put(key, value, ROOT_BITMAP_ENTRY_INDEX); |
+ } |
+ |
+ bool put(const int key, const uint64_t value, const int bitmapEntryIndex); |
+ |
+ const TrieMapRange getEntriesInRootLevel() const { |
+ return getEntriesInSpecifiedLevel(ROOT_BITMAP_ENTRY_INDEX); |
+ } |
+ |
+ const TrieMapRange getEntriesInSpecifiedLevel(const int bitmapEntryIndex) const { |
+ return TrieMapRange(this, bitmapEntryIndex); |
+ } |
+ |
+ bool save(FILE *const file) const; |
+ |
+ private: |
+ DISALLOW_COPY_AND_ASSIGN(TrieMap); |
+ |
+ /** |
+ * Struct represents an entry. |
+ * |
+ * Entry is one of these entry types. All entries are fixed size and have 2 fields FIELD_0 and |
+ * FIELD_1. |
+ * 1. bitmap entry. bitmap entry contains bitmap and the link to hash table. |
+ * FIELD_0(bitmap) FIELD_1(LINK_TO_HASH_TABLE) |
+ * 2. terminal entry. terminal entry contains hashed key and value or terminal link. terminal |
+ * entry have terminal link when the value is not fit to FIELD_1 or there is a next level map |
+ * for the key. |
+ * FIELD_0(hashed key) (FIELD_1(VALUE_FLAG VALUE) | FIELD_1(TERMINAL_LINK_FLAG TERMINAL_LINK)) |
+ * 3. value entry. value entry represents a value. Upper order bytes are stored in FIELD_0 and |
+ * lower order bytes are stored in FIELD_1. |
+ * FIELD_0(value (upper order bytes)) FIELD_1(value (lower order bytes)) |
+ */ |
+ struct Entry { |
+ const uint32_t mData0; |
+ const uint32_t mData1; |
+ |
+ Entry(const uint32_t data0, const uint32_t data1) : mData0(data0), mData1(data1) {} |
+ |
+ AK_FORCE_INLINE bool isBitmapEntry() const { |
+ return (mData1 & VALUE_FLAG) == 0 && (mData1 & TERMINAL_LINK_FLAG) == 0; |
+ } |
+ |
+ AK_FORCE_INLINE bool hasTerminalLink() const { |
+ return (mData1 & TERMINAL_LINK_FLAG) != 0; |
+ } |
+ |
+ // For terminal entry. |
+ AK_FORCE_INLINE uint32_t getKey() const { |
+ return mData0; |
+ } |
+ |
+ // For terminal entry. |
+ AK_FORCE_INLINE uint32_t getValue() const { |
+ return mData1 & VALUE_MASK; |
+ } |
+ |
+ // For terminal entry. |
+ AK_FORCE_INLINE uint32_t getValueEntryIndex() const { |
+ return mData1 & TERMINAL_LINK_MASK; |
+ } |
+ |
+ // For bitmap entry. |
+ AK_FORCE_INLINE uint32_t getBitmap() const { |
+ return mData0; |
+ } |
+ |
+ // For bitmap entry. |
+ AK_FORCE_INLINE int getTableIndex() const { |
+ return static_cast<int>(mData1); |
+ } |
+ |
+ // For value entry. |
+ AK_FORCE_INLINE uint64_t getValueOfValueEntry() const { |
+ return ((static_cast<uint64_t>(mData0) << (FIELD1_SIZE * CHAR_BIT)) ^ mData1); |
+ } |
+ }; |
+ |
+ BufferWithExtendableBuffer mBuffer; |
+ |
+ static const int FIELD0_SIZE; |
+ static const int FIELD1_SIZE; |
+ static const int ENTRY_SIZE; |
+ static const uint32_t VALUE_FLAG; |
+ static const uint32_t VALUE_MASK; |
+ static const uint32_t TERMINAL_LINK_FLAG; |
+ static const uint32_t TERMINAL_LINK_MASK; |
+ static const int NUM_OF_BITS_USED_FOR_ONE_LEVEL; |
+ static const uint32_t LABEL_MASK; |
+ static const int MAX_NUM_OF_ENTRIES_IN_ONE_LEVEL; |
+ static const int ROOT_BITMAP_ENTRY_INDEX; |
+ static const int ROOT_BITMAP_ENTRY_POS; |
+ static const Entry EMPTY_BITMAP_ENTRY; |
+ static const int MAX_BUFFER_SIZE; |
+ |
+ uint32_t getBitShuffledKey(const uint32_t key) const; |
+ bool writeValue(const uint64_t value, const int terminalEntryIndex); |
+ bool updateValue(const Entry &terminalEntry, const uint64_t value, |
+ const int terminalEntryIndex); |
+ bool freeTable(const int tableIndex, const int entryCount); |
+ int allocateTable(const int entryCount); |
+ int getTerminalEntryIndex(const uint32_t key, const uint32_t hashedKey, |
+ const Entry &bitmapEntry, const int level) const; |
+ const Result getInternal(const uint32_t key, const uint32_t hashedKey, |
+ const int bitmapEntryIndex, const int level) const; |
+ bool putInternal(const uint32_t key, const uint64_t value, const uint32_t hashedKey, |
+ const int bitmapEntryIndex, const Entry &bitmapEntry, const int level); |
+ bool addNewEntryByResolvingConflict(const uint32_t key, const uint64_t value, |
+ const uint32_t hashedKey, const Entry &conflictedEntry, const int conflictedEntryIndex, |
+ const int level); |
+ bool addNewEntryByExpandingTable(const uint32_t key, const uint64_t value, |
+ const int tableIndex, const uint32_t bitmap, const int bitmapEntryIndex, |
+ const int label); |
+ const Result iterateNext(std::vector<TableIterationState> *const iterationState, |
+ int *const outKey) const; |
+ |
+ AK_FORCE_INLINE const Entry readEntry(const int entryIndex) const { |
+ return Entry(readField0(entryIndex), readField1(entryIndex)); |
+ } |
+ |
+ // Returns whether an entry for the index is existing by testing if the index-th bit in the |
+ // bitmap is set or not. |
+ AK_FORCE_INLINE bool exists(const uint32_t bitmap, const int index) const { |
+ return (bitmap & (1 << index)) != 0; |
+ } |
+ |
+ // Set index-th bit in the bitmap. |
+ AK_FORCE_INLINE uint32_t setExist(const uint32_t bitmap, const int index) const { |
+ return bitmap | (1 << index); |
+ } |
+ |
+ // Count set bits before index in the bitmap. |
+ AK_FORCE_INLINE int popCount(const uint32_t bitmap, const int index) const { |
+ return popCount(bitmap & ((1 << index) - 1)); |
+ } |
+ |
+ // Count set bits in the bitmap. |
+ AK_FORCE_INLINE int popCount(const uint32_t bitmap) const { |
+ return __builtin_popcount(bitmap); |
+ // int v = bitmap - ((bitmap >> 1) & 0x55555555); |
+ // v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |
+ // return (((v + (v >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; |
+ } |
+ |
+ AK_FORCE_INLINE int getLabel(const uint32_t hashedKey, const int level) const { |
+ return (hashedKey >> (level * NUM_OF_BITS_USED_FOR_ONE_LEVEL)) & LABEL_MASK; |
+ } |
+ |
+ AK_FORCE_INLINE uint32_t readField0(const int entryIndex) const { |
+ return mBuffer.readUint(FIELD0_SIZE, ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE uint32_t readField1(const int entryIndex) const { |
+ return mBuffer.readUint(FIELD1_SIZE, |
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE int readEmptyTableLink(const int entryCount) const { |
+ return mBuffer.readUint(FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE bool writeEmptyTableLink(const int tableIndex, const int entryCount) { |
+ return mBuffer.writeUint(tableIndex, FIELD1_SIZE, (entryCount - 1) * FIELD1_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE bool writeField0(const uint32_t data, const int entryIndex) { |
+ return mBuffer.writeUint(data, FIELD0_SIZE, |
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE bool writeField1(const uint32_t data, const int entryIndex) { |
+ return mBuffer.writeUint(data, FIELD1_SIZE, |
+ ROOT_BITMAP_ENTRY_POS + entryIndex * ENTRY_SIZE + FIELD0_SIZE); |
+ } |
+ |
+ AK_FORCE_INLINE bool writeEntry(const Entry &entry, const int entryIndex) { |
+ return writeField0(entry.mData0, entryIndex) && writeField1(entry.mData1, entryIndex); |
+ } |
+ |
+ AK_FORCE_INLINE bool writeTerminalEntry(const uint32_t key, const uint64_t value, |
+ const int entryIndex) { |
+ return writeField0(key, entryIndex) && writeValue(value, entryIndex); |
+ } |
+ |
+ AK_FORCE_INLINE bool copyEntry(const int originalEntryIndex, const int newEntryIndex) { |
+ return writeEntry(readEntry(originalEntryIndex), newEntryIndex); |
+ } |
+ |
+ AK_FORCE_INLINE int getTailEntryIndex() const { |
+ return (mBuffer.getTailPosition() - ROOT_BITMAP_ENTRY_POS) / ENTRY_SIZE; |
+ } |
+}; |
+ |
+} // namespace latinime |
+#endif /* LATINIME_TRIE_MAP_H */ |