Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: third_party/prediction/suggest/policyimpl/dictionary/header/header_policy.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "third_party/prediction/suggest/policyimpl/dictionary/header/header_pol icy.h"
18
19 #include <algorithm>
20
21 namespace latinime {
22
23 // Note that these are corresponding definitions in Java side in
24 // DictionaryHeader.
25 const char* const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY =
26 "MULTIPLE_WORDS_DEMOTION_RATE";
27 const char* const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
28 "REQUIRES_GERMAN_UMLAUT_PROCESSING";
29 // TODO: Change attribute string to "IS_DECAYING_DICT".
30 const char* const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
31 const char* const HeaderPolicy::DATE_KEY = "date";
32 const char* const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
33 const char* const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
34 const char* const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
35 const char* const HeaderPolicy::EXTENDED_REGION_SIZE_KEY =
36 "EXTENDED_REGION_SIZE";
37 // Historical info is information that is needed to support decaying such as
38 // timestamp, level and
39 // count.
40 const char* const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
41 const char* const HeaderPolicy::LOCALE_KEY =
42 "locale"; // match Java declaration
43 const char* const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
44 "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
45 const char* const
46 HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
47 "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
48 const char* const
49 HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS_KEY =
50 "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
51
52 const char* const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
53 const char* const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
54
55 const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
56 const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
57 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
58 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID =
59 3;
60 // 30 days
61 const int
62 HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS =
63 30 * 24 * 60 * 60;
64
65 const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
66 const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
67
68 // Used for logging. Question mark is used to indicate that the key is not
69 // found.
70 void HeaderPolicy::readHeaderValueOrQuestionMark(const char* const key,
71 int* outValue,
72 int outValueSize) const {
73 if (outValueSize <= 0)
74 return;
75 if (outValueSize == 1) {
76 outValue[0] = '\0';
77 return;
78 }
79 std::vector<int> keyCodePointVector;
80 HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
81 DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
82 mAttributeMap.find(keyCodePointVector);
83 if (it == mAttributeMap.end()) {
84 // The key was not found.
85 outValue[0] = '?';
86 outValue[1] = '\0';
87 return;
88 }
89 const int terminalIndex =
90 std::min(static_cast<int>(it->second.size()), outValueSize - 1);
91 for (int i = 0; i < terminalIndex; ++i) {
92 outValue[i] = it->second[i];
93 }
94 outValue[terminalIndex] = '\0';
95 }
96
97 const std::vector<int> HeaderPolicy::readLocale() const {
98 return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMap,
99 LOCALE_KEY);
100 }
101
102 float HeaderPolicy::readMultipleWordCostMultiplier() const {
103 const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(
104 &mAttributeMap, MULTIPLE_WORDS_DEMOTION_RATE_KEY,
105 DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
106 if (demotionRate <= 0) {
107 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
108 }
109 return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
110 }
111
112 bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
113 return HeaderReadWriteUtils::readBoolAttributeValue(
114 &mAttributeMap, REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
115 }
116
117 bool HeaderPolicy::fillInAndWriteHeaderToBuffer(
118 const bool updatesLastDecayedTime,
119 const int unigramCount,
120 const int bigramCount,
121 const int extendedRegionSize,
122 BufferWithExtendableBuffer* const outBuffer) const {
123 int writingPos = 0;
124 DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(
125 mAttributeMap);
126 fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
127 extendedRegionSize, &attributeMapToWrite);
128 if (!HeaderReadWriteUtils::writeDictionaryVersion(
129 outBuffer, mDictFormatVersion, &writingPos)) {
130 return false;
131 }
132 if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
133 &writingPos)) {
134 return false;
135 }
136 // Temporarily writes a dummy header size.
137 int headerSizeFieldPos = writingPos;
138 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */,
139 &writingPos)) {
140 return false;
141 }
142 if (!HeaderReadWriteUtils::writeHeaderAttributes(
143 outBuffer, &attributeMapToWrite, &writingPos)) {
144 return false;
145 }
146 // Writes the actual header size.
147 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
148 &headerSizeFieldPos)) {
149 return false;
150 }
151 return true;
152 }
153
154 void HeaderPolicy::fillInHeader(
155 const bool updatesLastDecayedTime,
156 const int unigramCount,
157 const int bigramCount,
158 const int extendedRegionSize,
159 DictionaryHeaderStructurePolicy::AttributeMap* outAttributeMap) const {
160 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY,
161 unigramCount);
162 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY,
163 bigramCount);
164 HeaderReadWriteUtils::setIntAttribute(
165 outAttributeMap, EXTENDED_REGION_SIZE_KEY, extendedRegionSize);
166 // Set the current time as the generation time.
167 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
168 TimeKeeper::peekCurrentTime());
169 HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KEY,
170 mLocale);
171 if (updatesLastDecayedTime) {
172 // Set current time as the last updated time.
173 HeaderReadWriteUtils::setIntAttribute(
174 outAttributeMap, LAST_DECAYED_TIME_KEY, TimeKeeper::peekCurrentTime());
175 }
176 }
177
178 /* static */ DictionaryHeaderStructurePolicy::AttributeMap
179 HeaderPolicy::createAttributeMapAndReadAllAttributes(
180 const uint8_t* const dictBuf) {
181 DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
182 HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
183 return attributeMap;
184 }
185
186 } // namespace latinime
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698