Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(751)

Side by Side Diff: third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_HEADER_POLICY_H
18 #define LATINIME_HEADER_POLICY_H
19
20 #include <cstdint>
21
22 #include "third_party/prediction/defines.h"
23 #include "third_party/prediction/suggest/core/policy/dictionary_header_structure _policy.h"
24 #include "third_party/prediction/suggest/policyimpl/dictionary/header/header_rea d_write_utils.h"
25 #include "third_party/prediction/suggest/policyimpl/dictionary/utils/format_util s.h"
26 #include "third_party/prediction/utils/char_utils.h"
27 #include "third_party/prediction/utils/time_keeper.h"
28
29 namespace latinime {
30
31 class HeaderPolicy : public DictionaryHeaderStructurePolicy {
32 public:
33 // Reads information from existing dictionary buffer.
34 HeaderPolicy(const uint8_t* const dictBuf,
35 const FormatUtils::FORMAT_VERSION formatVersion)
36 : mDictFormatVersion(formatVersion),
37 mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)),
38 mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
39 mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
40 mLocale(readLocale()),
41 mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
42 mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
43 mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(
44 &mAttributeMap,
45 IS_DECAYING_DICT_KEY,
46 false /* defaultValue */)),
47 mDate(HeaderReadWriteUtils::readIntAttributeValue(
48 &mAttributeMap,
49 DATE_KEY,
50 TimeKeeper::peekCurrentTime() /* defaultValue */)),
51 mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(
52 &mAttributeMap,
53 LAST_DECAYED_TIME_KEY,
54 TimeKeeper::peekCurrentTime() /* defaultValue */)),
55 mUnigramCount(
56 HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
57 UNIGRAM_COUNT_KEY,
58 0 /* defaultValue */)),
59 mBigramCount(
60 HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
61 BIGRAM_COUNT_KEY,
62 0 /* defaultValue */)),
63 mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(
64 &mAttributeMap,
65 EXTENDED_REGION_SIZE_KEY,
66 0 /* defaultValue */)),
67 mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
68 &mAttributeMap,
69 HAS_HISTORICAL_INFO_KEY,
70 false /* defaultValue */)),
71 mForgettingCurveOccurrencesToLevelUp(
72 HeaderReadWriteUtils::readIntAttributeValue(
73 &mAttributeMap,
74 FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
75 DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
76 mForgettingCurveProbabilityValuesTableId(
77 HeaderReadWriteUtils::readIntAttributeValue(
78 &mAttributeMap,
79 FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
80 DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
81 mForgettingCurveDurationToLevelDown(
82 HeaderReadWriteUtils::readIntAttributeValue(
83 &mAttributeMap,
84 FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
85 DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
86 mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
87 &mAttributeMap,
88 MAX_UNIGRAM_COUNT_KEY,
89 DEFAULT_MAX_UNIGRAM_COUNT)),
90 mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
91 &mAttributeMap,
92 MAX_BIGRAM_COUNT_KEY,
93 DEFAULT_MAX_BIGRAM_COUNT)) {}
94
95 // Constructs header information using an attribute map.
96 HeaderPolicy(
97 const FormatUtils::FORMAT_VERSION dictFormatVersion,
98 const std::vector<int>& locale,
99 const DictionaryHeaderStructurePolicy::AttributeMap* const attributeMap)
100 : mDictFormatVersion(dictFormatVersion),
101 mDictionaryFlags(
102 HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
103 attributeMap)),
104 mSize(0),
105 mAttributeMap(*attributeMap),
106 mLocale(locale),
107 mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
108 mRequiresGermanUmlautProcessing(readRequiresGermanUmlautProcessing()),
109 mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(
110 &mAttributeMap,
111 IS_DECAYING_DICT_KEY,
112 false /* defaultValue */)),
113 mDate(HeaderReadWriteUtils::readIntAttributeValue(
114 &mAttributeMap,
115 DATE_KEY,
116 TimeKeeper::peekCurrentTime() /* defaultValue */)),
117 mLastDecayedTime(HeaderReadWriteUtils::readIntAttributeValue(
118 &mAttributeMap,
119 DATE_KEY,
120 TimeKeeper::peekCurrentTime() /* defaultValue */)),
121 mUnigramCount(0),
122 mBigramCount(0),
123 mExtendedRegionSize(0),
124 mHasHistoricalInfoOfWords(HeaderReadWriteUtils::readBoolAttributeValue(
125 &mAttributeMap,
126 HAS_HISTORICAL_INFO_KEY,
127 false /* defaultValue */)),
128 mForgettingCurveOccurrencesToLevelUp(
129 HeaderReadWriteUtils::readIntAttributeValue(
130 &mAttributeMap,
131 FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY,
132 DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP)),
133 mForgettingCurveProbabilityValuesTableId(
134 HeaderReadWriteUtils::readIntAttributeValue(
135 &mAttributeMap,
136 FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY,
137 DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID)),
138 mForgettingCurveDurationToLevelDown(
139 HeaderReadWriteUtils::readIntAttributeValue(
140 &mAttributeMap,
141 FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY,
142 DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS)),
143 mMaxUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(
144 &mAttributeMap,
145 MAX_UNIGRAM_COUNT_KEY,
146 DEFAULT_MAX_UNIGRAM_COUNT)),
147 mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
148 &mAttributeMap,
149 MAX_BIGRAM_COUNT_KEY,
150 DEFAULT_MAX_BIGRAM_COUNT)) {}
151
152 // Copy header information
153 HeaderPolicy(const HeaderPolicy* const headerPolicy)
154 : mDictFormatVersion(headerPolicy->mDictFormatVersion),
155 mDictionaryFlags(headerPolicy->mDictionaryFlags),
156 mSize(headerPolicy->mSize),
157 mAttributeMap(headerPolicy->mAttributeMap),
158 mLocale(headerPolicy->mLocale),
159 mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
160 mRequiresGermanUmlautProcessing(
161 headerPolicy->mRequiresGermanUmlautProcessing),
162 mIsDecayingDict(headerPolicy->mIsDecayingDict),
163 mDate(headerPolicy->mDate),
164 mLastDecayedTime(headerPolicy->mLastDecayedTime),
165 mUnigramCount(headerPolicy->mUnigramCount),
166 mBigramCount(headerPolicy->mBigramCount),
167 mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
168 mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
169 mForgettingCurveOccurrencesToLevelUp(
170 headerPolicy->mForgettingCurveOccurrencesToLevelUp),
171 mForgettingCurveProbabilityValuesTableId(
172 headerPolicy->mForgettingCurveProbabilityValuesTableId),
173 mForgettingCurveDurationToLevelDown(
174 headerPolicy->mForgettingCurveDurationToLevelDown),
175 mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
176 mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
177
178 // Temporary dummy header.
179 HeaderPolicy()
180 : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION),
181 mDictionaryFlags(0),
182 mSize(0),
183 mAttributeMap(),
184 mLocale(CharUtils::EMPTY_STRING),
185 mMultiWordCostMultiplier(0.0f),
186 mRequiresGermanUmlautProcessing(false),
187 mIsDecayingDict(false),
188 mDate(0),
189 mLastDecayedTime(0),
190 mUnigramCount(0),
191 mBigramCount(0),
192 mExtendedRegionSize(0),
193 mHasHistoricalInfoOfWords(false),
194 mForgettingCurveOccurrencesToLevelUp(0),
195 mForgettingCurveProbabilityValuesTableId(0),
196 mForgettingCurveDurationToLevelDown(0),
197 mMaxUnigramCount(0),
198 mMaxBigramCount(0) {}
199
200 ~HeaderPolicy() {}
201
202 virtual int getFormatVersionNumber() const {
203 // Conceptually this converts the symbolic value we use in the code into the
204 // hardcoded of the bytes in the file. But we want the constants to be the
205 // same so we use them for both here.
206 switch (mDictFormatVersion) {
207 case FormatUtils::VERSION_2:
208 return FormatUtils::VERSION_2;
209 case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
210 return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
211 case FormatUtils::VERSION_4:
212 return FormatUtils::VERSION_4;
213 case FormatUtils::VERSION_4_DEV:
214 return FormatUtils::VERSION_4_DEV;
215 default:
216 return FormatUtils::UNKNOWN_VERSION;
217 }
218 }
219
220 AK_FORCE_INLINE bool isValid() const {
221 // Decaying dictionary must have historical information.
222 if (!mIsDecayingDict) {
223 return true;
224 }
225 if (mHasHistoricalInfoOfWords) {
226 return true;
227 } else {
228 return false;
229 }
230 }
231
232 AK_FORCE_INLINE int getSize() const { return mSize; }
233
234 AK_FORCE_INLINE float getMultiWordCostMultiplier() const {
235 return mMultiWordCostMultiplier;
236 }
237
238 AK_FORCE_INLINE bool isDecayingDict() const { return mIsDecayingDict; }
239
240 AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const {
241 return mRequiresGermanUmlautProcessing;
242 }
243
244 AK_FORCE_INLINE int getDate() const { return mDate; }
245
246 AK_FORCE_INLINE int getLastDecayedTime() const { return mLastDecayedTime; }
247
248 AK_FORCE_INLINE int getUnigramCount() const { return mUnigramCount; }
249
250 AK_FORCE_INLINE int getBigramCount() const { return mBigramCount; }
251
252 AK_FORCE_INLINE int getExtendedRegionSize() const {
253 return mExtendedRegionSize;
254 }
255
256 AK_FORCE_INLINE bool hasHistoricalInfoOfWords() const {
257 return mHasHistoricalInfoOfWords;
258 }
259
260 AK_FORCE_INLINE bool shouldBoostExactMatches() const {
261 // TODO: Investigate better ways to handle exact matches for personalized
262 // dictionaries.
263 return !isDecayingDict();
264 }
265
266 const DictionaryHeaderStructurePolicy::AttributeMap* getAttributeMap() const {
267 return &mAttributeMap;
268 }
269
270 AK_FORCE_INLINE int getForgettingCurveOccurrencesToLevelUp() const {
271 return mForgettingCurveOccurrencesToLevelUp;
272 }
273
274 AK_FORCE_INLINE int getForgettingCurveProbabilityValuesTableId() const {
275 return mForgettingCurveProbabilityValuesTableId;
276 }
277
278 AK_FORCE_INLINE int getForgettingCurveDurationToLevelDown() const {
279 return mForgettingCurveDurationToLevelDown;
280 }
281
282 AK_FORCE_INLINE int getMaxUnigramCount() const { return mMaxUnigramCount; }
283
284 AK_FORCE_INLINE int getMaxBigramCount() const { return mMaxBigramCount; }
285
286 void readHeaderValueOrQuestionMark(const char* const key,
287 int* outValue,
288 int outValueSize) const;
289
290 bool fillInAndWriteHeaderToBuffer(
291 const bool updatesLastDecayedTime,
292 const int unigramCount,
293 const int bigramCount,
294 const int extendedRegionSize,
295 BufferWithExtendableBuffer* const outBuffer) const;
296
297 void fillInHeader(
298 const bool updatesLastDecayedTime,
299 const int unigramCount,
300 const int bigramCount,
301 const int extendedRegionSize,
302 DictionaryHeaderStructurePolicy::AttributeMap* outAttributeMap) const;
303
304 AK_FORCE_INLINE const std::vector<int>* getLocale() const { return &mLocale; }
305
306 bool supportsBeginningOfSentence() const {
307 return mDictFormatVersion >= FormatUtils::VERSION_4;
308 }
309
310 private:
311 DISALLOW_COPY_AND_ASSIGN(HeaderPolicy);
312
313 static const char* const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
314 static const char* const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
315 static const char* const IS_DECAYING_DICT_KEY;
316 static const char* const DATE_KEY;
317 static const char* const LAST_DECAYED_TIME_KEY;
318 static const char* const UNIGRAM_COUNT_KEY;
319 static const char* const BIGRAM_COUNT_KEY;
320 static const char* const EXTENDED_REGION_SIZE_KEY;
321 static const char* const HAS_HISTORICAL_INFO_KEY;
322 static const char* const LOCALE_KEY;
323 static const char* const FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY;
324 static const char* const FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY;
325 static const char* const
326 FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY;
327 static const char* const MAX_UNIGRAM_COUNT_KEY;
328 static const char* const MAX_BIGRAM_COUNT_KEY;
329 static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
330 static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
331 static const int DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP;
332 static const int DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID;
333 static const int DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS;
334 static const int DEFAULT_MAX_UNIGRAM_COUNT;
335 static const int DEFAULT_MAX_BIGRAM_COUNT;
336
337 const FormatUtils::FORMAT_VERSION mDictFormatVersion;
338 const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags;
339 const int mSize;
340 DictionaryHeaderStructurePolicy::AttributeMap mAttributeMap;
341 const std::vector<int> mLocale;
342 const float mMultiWordCostMultiplier;
343 const bool mRequiresGermanUmlautProcessing;
344 const bool mIsDecayingDict;
345 const int mDate;
346 const int mLastDecayedTime;
347 const int mUnigramCount;
348 const int mBigramCount;
349 const int mExtendedRegionSize;
350 const bool mHasHistoricalInfoOfWords;
351 const int mForgettingCurveOccurrencesToLevelUp;
352 const int mForgettingCurveProbabilityValuesTableId;
353 const int mForgettingCurveDurationToLevelDown;
354 const int mMaxUnigramCount;
355 const int mMaxBigramCount;
356
357 const std::vector<int> readLocale() const;
358 float readMultipleWordCostMultiplier() const;
359 bool readRequiresGermanUmlautProcessing() const;
360
361 static DictionaryHeaderStructurePolicy::AttributeMap
362 createAttributeMapAndReadAllAttributes(const uint8_t* const dictBuf);
363 };
364 } // namespace latinime
365 #endif /* LATINIME_HEADER_POLICY_H */
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698