Index: third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.h |
diff --git a/third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.h b/third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f4a968238b50c1dd1d6efa78c46a69d8b78322f2 |
--- /dev/null |
+++ b/third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.h |
@@ -0,0 +1,365 @@ |
+/* |
+ * Copyright (C) 2013, The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#ifndef LATINIME_HEADER_POLICY_H |
+#define LATINIME_HEADER_POLICY_H |
+ |
+#include <cstdint> |
+ |
+#include "third_party/prediction/defines.h" |
+#include "third_party/prediction/suggest/core/policy/dictionary_header_structure_policy.h" |
+#include "third_party/prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h" |
+#include "third_party/prediction/suggest/policyimpl/dictionary/utils/format_utils.h" |
+#include "third_party/prediction/utils/char_utils.h" |
+#include "third_party/prediction/utils/time_keeper.h" |
+ |
+namespace latinime { |
+ |
+class HeaderPolicy : public DictionaryHeaderStructurePolicy { |
+ public: |
+ // Reads information from existing dictionary buffer. |
+ HeaderPolicy(const uint8_t* const dictBuf, |
+ const FormatUtils::FORMAT_VERSION formatVersion) |
+ : mDictFormatVersion(formatVersion), |
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), |
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), |
+ mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), |
+ mLocale(readLocale()), |
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), |
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), |
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, |
+ IS_DECAYING_DICT_KEY, |
+ false /* defaultValue */)), |
+ mDate(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ DATE_KEY, |
+ TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ LAST_DECAYED_TIME_KEY, |
+ TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mUnigramCount( |
+ HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ UNIGRAM_COUNT_KEY, |
+ 0 /* defaultValue */)), |
+ mBigramCount( |
+ HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ BIGRAM_COUNT_KEY, |
+ 0 /* defaultValue */)), |
+ mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ EXTENDED_REGION_SIZE_KEY, |
+ 0 /* defaultValue */)), |
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, |
+ HAS_HISTORICAL_INFO_KEY, |
+ false /* defaultValue */)), |
+ mForgettingCurveOccurrencesToLevelUp( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, |
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), |
+ mForgettingCurveProbabilityValuesTableId( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, |
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), |
+ mForgettingCurveDurationToLevelDown( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, |
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), |
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ MAX_UNIGRAM_COUNT_KEY, |
+ DEFAULT_MAX_UNIGRAM_COUNT)), |
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ MAX_BIGRAM_COUNT_KEY, |
+ DEFAULT_MAX_BIGRAM_COUNT)) {} |
+ |
+ // Constructs header information using an attribute map. |
+ HeaderPolicy( |
+ const FormatUtils::FORMAT_VERSION dictFormatVersion, |
+ const std::vector<int>& locale, |
+ const DictionaryHeaderStructurePolicy::AttributeMap* const attributeMap) |
+ : mDictFormatVersion(dictFormatVersion), |
+ mDictionaryFlags( |
+ HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( |
+ attributeMap)), |
+ mSize(0), |
+ mAttributeMap(*attributeMap), |
+ mLocale(locale), |
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), |
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), |
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, |
+ IS_DECAYING_DICT_KEY, |
+ false /* defaultValue */)), |
+ mDate(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ DATE_KEY, |
+ TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ DATE_KEY, |
+ TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mUnigramCount(0), |
+ mBigramCount(0), |
+ mExtendedRegionSize(0), |
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, |
+ HAS_HISTORICAL_INFO_KEY, |
+ false /* defaultValue */)), |
+ mForgettingCurveOccurrencesToLevelUp( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, |
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), |
+ mForgettingCurveProbabilityValuesTableId( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, |
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), |
+ mForgettingCurveDurationToLevelDown( |
+ HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, |
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), |
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ MAX_UNIGRAM_COUNT_KEY, |
+ DEFAULT_MAX_UNIGRAM_COUNT)), |
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, |
+ MAX_BIGRAM_COUNT_KEY, |
+ DEFAULT_MAX_BIGRAM_COUNT)) {} |
+ |
+ // Copy header information |
+ HeaderPolicy(const HeaderPolicy* const headerPolicy) |
+ : mDictFormatVersion(headerPolicy->mDictFormatVersion), |
+ mDictionaryFlags(headerPolicy->mDictionaryFlags), |
+ mSize(headerPolicy->mSize), |
+ mAttributeMap(headerPolicy->mAttributeMap), |
+ mLocale(headerPolicy->mLocale), |
+ mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier), |
+ mRequiresGermanUmlautProcessing( |
+ headerPolicy->mRequiresGermanUmlautProcessing), |
+ mIsDecayingDict(headerPolicy->mIsDecayingDict), |
+ mDate(headerPolicy->mDate), |
+ mLastDecayedTime(headerPolicy->mLastDecayedTime), |
+ mUnigramCount(headerPolicy->mUnigramCount), |
+ mBigramCount(headerPolicy->mBigramCount), |
+ mExtendedRegionSize(headerPolicy->mExtendedRegionSize), |
+ mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), |
+ mForgettingCurveOccurrencesToLevelUp( |
+ headerPolicy->mForgettingCurveOccurrencesToLevelUp), |
+ mForgettingCurveProbabilityValuesTableId( |
+ headerPolicy->mForgettingCurveProbabilityValuesTableId), |
+ mForgettingCurveDurationToLevelDown( |
+ headerPolicy->mForgettingCurveDurationToLevelDown), |
+ mMaxUnigramCount(headerPolicy->mMaxUnigramCount), |
+ mMaxBigramCount(headerPolicy->mMaxBigramCount) {} |
+ |
+ // Temporary dummy header. |
+ HeaderPolicy() |
+ : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), |
+ mDictionaryFlags(0), |
+ mSize(0), |
+ mAttributeMap(), |
+ mLocale(CharUtils::EMPTY_STRING), |
+ mMultiWordCostMultiplier(0.0f), |
+ mRequiresGermanUmlautProcessing(false), |
+ mIsDecayingDict(false), |
+ mDate(0), |
+ mLastDecayedTime(0), |
+ mUnigramCount(0), |
+ mBigramCount(0), |
+ mExtendedRegionSize(0), |
+ mHasHistoricalInfoOfWords(false), |
+ mForgettingCurveOccurrencesToLevelUp(0), |
+ mForgettingCurveProbabilityValuesTableId(0), |
+ mForgettingCurveDurationToLevelDown(0), |
+ mMaxUnigramCount(0), |
+ mMaxBigramCount(0) {} |
+ |
+ ~HeaderPolicy() {} |
+ |
+ virtual int getFormatVersionNumber() const { |
+ // Conceptually this converts the symbolic value we use in the code into the |
+ // hardcoded of the bytes in the file. But we want the constants to be the |
+ // same so we use them for both here. |
+ switch (mDictFormatVersion) { |
+ case FormatUtils::VERSION_2: |
+ return FormatUtils::VERSION_2; |
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING: |
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING; |
+ case FormatUtils::VERSION_4: |
+ return FormatUtils::VERSION_4; |
+ case FormatUtils::VERSION_4_DEV: |
+ return FormatUtils::VERSION_4_DEV; |
+ default: |
+ return FormatUtils::UNKNOWN_VERSION; |
+ } |
+ } |
+ |
+ AK_FORCE_INLINE bool isValid() const { |
+ // Decaying dictionary must have historical information. |
+ if (!mIsDecayingDict) { |
+ return true; |
+ } |
+ if (mHasHistoricalInfoOfWords) { |
+ return true; |
+ } else { |
+ return false; |
+ } |
+ } |
+ |
+ AK_FORCE_INLINE int getSize() const { return mSize; } |
+ |
+ AK_FORCE_INLINE float getMultiWordCostMultiplier() const { |
+ return mMultiWordCostMultiplier; |
+ } |
+ |
+ AK_FORCE_INLINE bool isDecayingDict() const { return mIsDecayingDict; } |
+ |
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { |
+ return mRequiresGermanUmlautProcessing; |
+ } |
+ |
+ AK_FORCE_INLINE int getDate() const { return mDate; } |
+ |
+ AK_FORCE_INLINE int getLastDecayedTime() const { return mLastDecayedTime; } |
+ |
+ AK_FORCE_INLINE int getUnigramCount() const { return mUnigramCount; } |
+ |
+ AK_FORCE_INLINE int getBigramCount() const { return mBigramCount; } |
+ |
+ AK_FORCE_INLINE int getExtendedRegionSize() const { |
+ return mExtendedRegionSize; |
+ } |
+ |
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const { |
+ return mHasHistoricalInfoOfWords; |
+ } |
+ |
+ AK_FORCE_INLINE bool shouldBoostExactMatches() const { |
+ // TODO: Investigate better ways to handle exact matches for personalized |
+ // dictionaries. |
+ return !isDecayingDict(); |
+ } |
+ |
+ const DictionaryHeaderStructurePolicy::AttributeMap* getAttributeMap() const { |
+ return &mAttributeMap; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const { |
+ return mForgettingCurveOccurrencesToLevelUp; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const { |
+ return mForgettingCurveProbabilityValuesTableId; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const { |
+ return mForgettingCurveDurationToLevelDown; |
+ } |
+ |
+ AK_FORCE_INLINE int getMaxUnigramCount() const { return mMaxUnigramCount; } |
+ |
+ AK_FORCE_INLINE int getMaxBigramCount() const { return mMaxBigramCount; } |
+ |
+ void readHeaderValueOrQuestionMark(const char* const key, |
+ int* outValue, |
+ int outValueSize) const; |
+ |
+ bool fillInAndWriteHeaderToBuffer( |
+ const bool updatesLastDecayedTime, |
+ const int unigramCount, |
+ const int bigramCount, |
+ const int extendedRegionSize, |
+ BufferWithExtendableBuffer* const outBuffer) const; |
+ |
+ void fillInHeader( |
+ const bool updatesLastDecayedTime, |
+ const int unigramCount, |
+ const int bigramCount, |
+ const int extendedRegionSize, |
+ DictionaryHeaderStructurePolicy::AttributeMap* outAttributeMap) const; |
+ |
+ AK_FORCE_INLINE const std::vector<int>* getLocale() const { return &mLocale; } |
+ |
+ bool supportsBeginningOfSentence() const { |
+ return mDictFormatVersion >= FormatUtils::VERSION_4; |
+ } |
+ |
+ private: |
+ DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); |
+ |
+ static const char* const MULTIPLE_WORDS_DEMOTION_RATE_KEY; |
+ static const char* const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; |
+ static const char* const IS_DECAYING_DICT_KEY; |
+ static const char* const DATE_KEY; |
+ static const char* const LAST_DECAYED_TIME_KEY; |
+ static const char* const UNIGRAM_COUNT_KEY; |
+ static const char* const BIGRAM_COUNT_KEY; |
+ static const char* const EXTENDED_REGION_SIZE_KEY; |
+ static const char* const HAS_HISTORICAL_INFO_KEY; |
+ static const char* const LOCALE_KEY; |
+ static const char* const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY; |
+ static const char* const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY; |
+ static const char* const |
+ FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY; |
+ static const char* const MAX_UNIGRAM_COUNT_KEY; |
+ static const char* const MAX_BIGRAM_COUNT_KEY; |
+ static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; |
+ static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; |
+ static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP; |
+ static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; |
+ static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS; |
+ static const int DEFAULT_MAX_UNIGRAM_COUNT; |
+ static const int DEFAULT_MAX_BIGRAM_COUNT; |
+ |
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion; |
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; |
+ const int mSize; |
+ DictionaryHeaderStructurePolicy::AttributeMap mAttributeMap; |
+ const std::vector<int> mLocale; |
+ const float mMultiWordCostMultiplier; |
+ const bool mRequiresGermanUmlautProcessing; |
+ const bool mIsDecayingDict; |
+ const int mDate; |
+ const int mLastDecayedTime; |
+ const int mUnigramCount; |
+ const int mBigramCount; |
+ const int mExtendedRegionSize; |
+ const bool mHasHistoricalInfoOfWords; |
+ const int mForgettingCurveOccurrencesToLevelUp; |
+ const int mForgettingCurveProbabilityValuesTableId; |
+ const int mForgettingCurveDurationToLevelDown; |
+ const int mMaxUnigramCount; |
+ const int mMaxBigramCount; |
+ |
+ const std::vector<int> readLocale() const; |
+ float readMultipleWordCostMultiplier() const; |
+ bool readRequiresGermanUmlautProcessing() const; |
+ |
+ static DictionaryHeaderStructurePolicy::AttributeMap |
+ createAttributeMapAndReadAllAttributes(const uint8_t* const dictBuf); |
+}; |
+} // namespace latinime |
+#endif /* LATINIME_HEADER_POLICY_H */ |