Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2013, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "third_party/android_prediction/suggest/policyimpl/dictionary/header/he ader_policy.h"
18
19 #include <algorithm>
20
21 namespace latinime {
22
23 // Note that these are corresponding definitions in Java side in DictionaryHeade r.
24 const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WOR DS_DEMOTION_RATE";
25 const char *const HeaderPolicy::REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY =
26 "REQUIRES_GERMAN_UMLAUT_PROCESSING";
27 // TODO: Change attribute string to "IS_DECAYING_DICT".
28 const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
29 const char *const HeaderPolicy::DATE_KEY = "date";
30 const char *const HeaderPolicy::LAST_DECAYED_TIME_KEY = "LAST_DECAYED_TIME";
31 const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
32 const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
33 const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE ";
34 // Historical info is information that is needed to support decaying such as tim estamp, level and
35 // count.
36 const char *const HeaderPolicy::HAS_HISTORICAL_INFO_KEY = "HAS_HISTORICAL_INFO";
37 const char *const HeaderPolicy::LOCALE_KEY = "locale"; // match Java declaration
38 const char *const HeaderPolicy::FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP_KEY =
39 "FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP";
40 const char *const HeaderPolicy::FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID_KEY =
41 "FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID";
42 const char *const HeaderPolicy::FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECON DS_KEY =
43 "FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECONDS";
44
45 const char *const HeaderPolicy::MAX_UNIGRAM_COUNT_KEY = "MAX_UNIGRAM_COUNT";
46 const char *const HeaderPolicy::MAX_BIGRAM_COUNT_KEY = "MAX_BIGRAM_COUNT";
47
48 const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
49 const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
50 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_OCCURRENCES_TO_LEVEL_UP = 2;
51 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_PROBABILITY_VALUES_TABLE_ID = 3 ;
52 // 30 days
53 const int HeaderPolicy::DEFAULT_FORGETTING_CURVE_DURATION_TO_LEVEL_DOWN_IN_SECON DS =
54 30 * 24 * 60 * 60;
55
56 const int HeaderPolicy::DEFAULT_MAX_UNIGRAM_COUNT = 10000;
57 const int HeaderPolicy::DEFAULT_MAX_BIGRAM_COUNT = 10000;
58
59 // Used for logging. Question mark is used to indicate that the key is not found .
60 void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out Value,
61 int outValueSize) const {
62 if (outValueSize <= 0) return;
63 if (outValueSize == 1) {
64 outValue[0] = '\0';
65 return;
66 }
67 std::vector<int> keyCodePointVector;
68 HeaderReadWriteUtils::insertCharactersIntoVector(key, &keyCodePointVector);
69 DictionaryHeaderStructurePolicy::AttributeMap::const_iterator it =
70 mAttributeMap.find(keyCodePointVector);
71 if (it == mAttributeMap.end()) {
72 // The key was not found.
73 outValue[0] = '?';
74 outValue[1] = '\0';
75 return;
76 }
77 const int terminalIndex = std::min(static_cast<int>(it->second.size()), outV alueSize - 1);
78 for (int i = 0; i < terminalIndex; ++i) {
79 outValue[i] = it->second[i];
80 }
81 outValue[terminalIndex] = '\0';
82 }
83
84 const std::vector<int> HeaderPolicy::readLocale() const {
85 return HeaderReadWriteUtils::readCodePointVectorAttributeValue(&mAttributeMa p, LOCALE_KEY);
86 }
87
88 float HeaderPolicy::readMultipleWordCostMultiplier() const {
89 const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttri buteMap,
90 MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RA TE);
91 if (demotionRate <= 0) {
92 return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
93 }
94 return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate );
95 }
96
97 bool HeaderPolicy::readRequiresGermanUmlautProcessing() const {
98 return HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
99 REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false);
100 }
101
102 bool HeaderPolicy::fillInAndWriteHeaderToBuffer(const bool updatesLastDecayedTim e,
103 const int unigramCount, const int bigramCount,
104 const int extendedRegionSize, BufferWithExtendableBuffer *const outBuffe r) const {
105 int writingPos = 0;
106 DictionaryHeaderStructurePolicy::AttributeMap attributeMapToWrite(mAttribute Map);
107 fillInHeader(updatesLastDecayedTime, unigramCount, bigramCount,
108 extendedRegionSize, &attributeMapToWrite);
109 if (!HeaderReadWriteUtils::writeDictionaryVersion(outBuffer, mDictFormatVers ion,
110 &writingPos)) {
111 return false;
112 }
113 if (!HeaderReadWriteUtils::writeDictionaryFlags(outBuffer, mDictionaryFlags,
114 &writingPos)) {
115 return false;
116 }
117 // Temporarily writes a dummy header size.
118 int headerSizeFieldPos = writingPos;
119 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, 0 /* size */ ,
120 &writingPos)) {
121 return false;
122 }
123 if (!HeaderReadWriteUtils::writeHeaderAttributes(outBuffer, &attributeMapToW rite,
124 &writingPos)) {
125 return false;
126 }
127 // Writes the actual header size.
128 if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(outBuffer, writingPos,
129 &headerSizeFieldPos)) {
130 return false;
131 }
132 return true;
133 }
134
135 void HeaderPolicy::fillInHeader(const bool updatesLastDecayedTime, const int uni gramCount,
136 const int bigramCount, const int extendedRegionSize,
137 DictionaryHeaderStructurePolicy::AttributeMap *outAttributeMap) const {
138 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, UNIGRAM_COUNT_KEY, un igramCount);
139 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, BIGRAM_COUNT_KEY, big ramCount);
140 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, EXTENDED_REGION_SIZE_ KEY,
141 extendedRegionSize);
142 // Set the current time as the generation time.
143 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, DATE_KEY,
144 TimeKeeper::peekCurrentTime());
145 HeaderReadWriteUtils::setCodePointVectorAttribute(outAttributeMap, LOCALE_KE Y, mLocale);
146 if (updatesLastDecayedTime) {
147 // Set current time as the last updated time.
148 HeaderReadWriteUtils::setIntAttribute(outAttributeMap, LAST_DECAYED_TIME _KEY,
149 TimeKeeper::peekCurrentTime());
150 }
151 }
152
153 /* static */ DictionaryHeaderStructurePolicy::AttributeMap
154 HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *cons t dictBuf) {
155 DictionaryHeaderStructurePolicy::AttributeMap attributeMap;
156 HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap);
157 return attributeMap;
158 }
159
160 } // namespace latinime
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698