Index: third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h |
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..b7dc120670708728aecf4e86ce7e2ae90b729468 |
--- /dev/null |
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h |
@@ -0,0 +1,307 @@ |
+/* |
+ * Copyright (C) 2013, The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#ifndef LATINIME_HEADER_POLICY_H |
+#define LATINIME_HEADER_POLICY_H |
+ |
+#include <cstdint> |
+ |
+#include "third_party/android_prediction/defines.h" |
+#include "third_party/android_prediction/suggest/core/policy/dictionary_header_structure_policy.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_read_write_utils.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/format_utils.h" |
+#include "third_party/android_prediction/utils/char_utils.h" |
+#include "third_party/android_prediction/utils/time_keeper.h" |
+ |
+namespace latinime { |
+ |
+class HeaderPolicy : public DictionaryHeaderStructurePolicy { |
+ public: |
+ // Reads information from existing dictionary buffer. |
+ HeaderPolicy(const uint8_t *const dictBuf, const FormatUtils::FORMAT_VERSION formatVersion) |
+ : mDictFormatVersion(formatVersion), |
+ mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), |
+ mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), |
+ mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)), |
+ mLocale(readLocale()), |
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), |
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), |
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, |
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)), |
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ UNIGRAM_COUNT_KEY, 0 /* defaultValue */)), |
+ mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ BIGRAM_COUNT_KEY, 0 /* defaultValue */)), |
+ mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)), |
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), |
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, |
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), |
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, |
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), |
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, |
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), |
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), |
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} |
+ |
+ // Constructs header information using an attribute map. |
+ HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion, |
+ const std::vector<int> &locale, |
+ const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) |
+ : mDictFormatVersion(dictFormatVersion), |
+ mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap( |
+ attributeMap)), mSize(0), mAttributeMap(*attributeMap), mLocale(locale), |
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), |
+ mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()), |
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap, |
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)), |
+ mDate(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, |
+ DATE_KEY, TimeKeeper::peekCurrentTime() /* defaultValue */)), |
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0), |
+ mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue( |
+ &mAttributeMap, HAS_HISTORICAL_INFO_KEY, false /* defaultValue */)), |
+ mForgettingCurveOccurrencesToLevelUp(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY, |
+ DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)), |
+ mForgettingCurveProbabilityValuesTableId(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY, |
+ DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)), |
+ mForgettingCurveDurationToLevelDown(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY, |
+ DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)), |
+ mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, MAX_UNIGRAM_COUNT_KEY, DEFAULT_MAX_UNIGRAM_COUNT)), |
+ mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( |
+ &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} |
+ |
+ // Copy header information |
+ HeaderPolicy(const HeaderPolicy *const headerPolicy) |
+ : mDictFormatVersion(headerPolicy->mDictFormatVersion), |
+ mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize), |
+ mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale), |
+ mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier), |
+ mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing), |
+ mIsDecayingDict(headerPolicy->mIsDecayingDict), |
+ mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime), |
+ mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount), |
+ mExtendedRegionSize(headerPolicy->mExtendedRegionSize), |
+ mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords), |
+ mForgettingCurveOccurrencesToLevelUp( |
+ headerPolicy->mForgettingCurveOccurrencesToLevelUp), |
+ mForgettingCurveProbabilityValuesTableId( |
+ headerPolicy->mForgettingCurveProbabilityValuesTableId), |
+ mForgettingCurveDurationToLevelDown( |
+ headerPolicy->mForgettingCurveDurationToLevelDown), |
+ mMaxUnigramCount(headerPolicy->mMaxUnigramCount), |
+ mMaxBigramCount(headerPolicy->mMaxBigramCount) {} |
+ |
+ // Temporary dummy header. |
+ HeaderPolicy() |
+ : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), |
+ mAttributeMap(), mLocale(CharUtils::EMPTY_STRING), mMultiWordCostMultiplier(0.0f), |
+ mRequiresGermanUmlautProcessing(false), mIsDecayingDict(false), |
+ mDate(0), mLastDecayedTime(0), mUnigramCount(0), mBigramCount(0), |
+ mExtendedRegionSize(0), mHasHistoricalInfoOfWords(false), |
+ mForgettingCurveOccurrencesToLevelUp(0), mForgettingCurveProbabilityValuesTableId(0), |
+ mForgettingCurveDurationToLevelDown(0), mMaxUnigramCount(0), mMaxBigramCount(0) {} |
+ |
+ ~HeaderPolicy() {} |
+ |
+ virtual int getFormatVersionNumber() const { |
+ // Conceptually this converts the symbolic value we use in the code into the |
+ // hardcoded of the bytes in the file. But we want the constants to be the |
+ // same so we use them for both here. |
+ switch (mDictFormatVersion) { |
+ case FormatUtils::VERSION_2: |
+ return FormatUtils::VERSION_2; |
+ case FormatUtils::VERSION_4_ONLY_FOR_TESTING: |
+ return FormatUtils::VERSION_4_ONLY_FOR_TESTING; |
+ case FormatUtils::VERSION_4: |
+ return FormatUtils::VERSION_4; |
+ case FormatUtils::VERSION_4_DEV: |
+ return FormatUtils::VERSION_4_DEV; |
+ default: |
+ return FormatUtils::UNKNOWN_VERSION; |
+ } |
+ } |
+ |
+ AK_FORCE_INLINE bool isValid() const { |
+ // Decaying dictionary must have historical information. |
+ if (!mIsDecayingDict) { |
+ return true; |
+ } |
+ if (mHasHistoricalInfoOfWords) { |
+ return true; |
+ } else { |
+ return false; |
+ } |
+ } |
+ |
+ AK_FORCE_INLINE int getSize() const { |
+ return mSize; |
+ } |
+ |
+ AK_FORCE_INLINE float getMultiWordCostMultiplier() const { |
+ return mMultiWordCostMultiplier; |
+ } |
+ |
+ AK_FORCE_INLINE bool isDecayingDict() const { |
+ return mIsDecayingDict; |
+ } |
+ |
+ AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { |
+ return mRequiresGermanUmlautProcessing; |
+ } |
+ |
+ AK_FORCE_INLINE int getDate() const { |
+ return mDate; |
+ } |
+ |
+ AK_FORCE_INLINE int getLastDecayedTime() const { |
+ return mLastDecayedTime; |
+ } |
+ |
+ AK_FORCE_INLINE int getUnigramCount() const { |
+ return mUnigramCount; |
+ } |
+ |
+ AK_FORCE_INLINE int getBigramCount() const { |
+ return mBigramCount; |
+ } |
+ |
+ AK_FORCE_INLINE int getExtendedRegionSize() const { |
+ return mExtendedRegionSize; |
+ } |
+ |
+ AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const { |
+ return mHasHistoricalInfoOfWords; |
+ } |
+ |
+ AK_FORCE_INLINE bool shouldBoostExactMatches() const { |
+ // TODO: Investigate better ways to handle exact matches for personalized dictionaries. |
+ return !isDecayingDict(); |
+ } |
+ |
+ const DictionaryHeaderStructurePolicy::AttributeMap *getAttributeMap() const { |
+ return &mAttributeMap; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const { |
+ return mForgettingCurveOccurrencesToLevelUp; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const { |
+ return mForgettingCurveProbabilityValuesTableId; |
+ } |
+ |
+ AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const { |
+ return mForgettingCurveDurationToLevelDown; |
+ } |
+ |
+ AK_FORCE_INLINE int getMaxUnigramCount() const { |
+ return mMaxUnigramCount; |
+ } |
+ |
+ AK_FORCE_INLINE int getMaxBigramCount() const { |
+ return mMaxBigramCount; |
+ } |
+ |
+ void readHeaderValueOrQuestionMark(const char *const key, |
+ int *outValue, int outValueSize) const; |
+ |
+ bool fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTime, |
+ const int unigramCount, const int bigramCount, |
+ const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffer) const; |
+ |
+ void fillInHeader(const bool updatesLastDecayedTime, |
+ const int unigramCount, const int bigramCount, const int extendedRegionSize, |
+ DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const; |
+ |
+ AK_FORCE_INLINE const std::vector<int> *getLocale() const { |
+ return &mLocale; |
+ } |
+ |
+ bool supportsBeginningOfSentence() const { |
+ return mDictFormatVersion >= FormatUtils::VERSION_4; |
+ } |
+ |
+ private: |
+ DISALLOW_COPY_AND_ASSIGN(HeaderPolicy); |
+ |
+ static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; |
+ static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY; |
+ static const char *const IS_DECAYING_DICT_KEY; |
+ static const char *const DATE_KEY; |
+ static const char *const LAST_DECAYED_TIME_KEY; |
+ static const char *const UNIGRAM_COUNT_KEY; |
+ static const char *const BIGRAM_COUNT_KEY; |
+ static const char *const EXTENDED_REGION_SIZE_KEY; |
+ static const char *const HAS_HISTORICAL_INFO_KEY; |
+ static const char *const LOCALE_KEY; |
+ static const char *const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY; |
+ static const char *const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY; |
+ static const char *const FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY; |
+ static const char *const MAX_UNIGRAM_COUNT_KEY; |
+ static const char *const MAX_BIGRAM_COUNT_KEY; |
+ static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE; |
+ static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; |
+ static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP; |
+ static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID; |
+ static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS; |
+ static const int DEFAULT_MAX_UNIGRAM_COUNT; |
+ static const int DEFAULT_MAX_BIGRAM_COUNT; |
+ |
+ const FormatUtils::FORMAT_VERSION mDictFormatVersion; |
+ const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; |
+ const int mSize; |
+ DictionaryHeaderStructurePolicy::AttributeMap mAttributeMap; |
+ const std::vector<int> mLocale; |
+ const float mMultiWordCostMultiplier; |
+ const bool mRequiresGermanUmlautProcessing; |
+ const bool mIsDecayingDict; |
+ const int mDate; |
+ const int mLastDecayedTime; |
+ const int mUnigramCount; |
+ const int mBigramCount; |
+ const int mExtendedRegionSize; |
+ const bool mHasHistoricalInfoOfWords; |
+ const int mForgettingCurveOccurrencesToLevelUp; |
+ const int mForgettingCurveProbabilityValuesTableId; |
+ const int mForgettingCurveDurationToLevelDown; |
+ const int mMaxUnigramCount; |
+ const int mMaxBigramCount; |
+ |
+ const std::vector<int> readLocale() const; |
+ float readMultipleWordCostMultiplier() const; |
+ bool readRequiresGermanUmlautProcessing() const; |
+ |
+ static DictionaryHeaderStructurePolicy::AttributeMap createAttributeMapAndReadAllAttributes( |
+ const uint8_t *const dictBuf); |
+}; |
+} // namespace latinime |
+#endif /* LATINIME_HEADER_POLICY_H */ |