Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: third_party/android_prediction/suggest/core/suggest.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "third_party/android_prediction/suggest/core/suggest.h"
18
19 #include "third_party/android_prediction/suggest/core/dicnode/dic_node.h"
20 #include "third_party/android_prediction/suggest/core/dicnode/dic_node_priority_ queue.h"
21 #include "third_party/android_prediction/suggest/core/dicnode/dic_node_vector.h"
22 #include "third_party/android_prediction/suggest/core/dictionary/dictionary.h"
23 #include "third_party/android_prediction/suggest/core/dictionary/digraph_utils.h "
24 #include "third_party/android_prediction/suggest/core/layout/proximity_info.h"
25 #include "third_party/android_prediction/suggest/core/policy/dictionary_structur e_with_buffer_policy.h"
26 #include "third_party/android_prediction/suggest/core/policy/traversal.h"
27 #include "third_party/android_prediction/suggest/core/policy/weighting.h"
28 #include "third_party/android_prediction/suggest/core/result/suggestions_output_ utils.h"
29 #include "third_party/android_prediction/suggest/core/session/dic_traverse_sessi on.h"
30
31 namespace latinime {
32
33 // Initialization of class constants.
34 const int Suggest::MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE = 2;
35
36 /**
37 * Returns a set of suggestions for the given input touch points. The commitPoin t argument indicates
38 * whether to prematurely commit the suggested words up to the given point for s entence-level
39 * suggestion.
40 *
41 * Note: Currently does not support concurrent calls across threads. Continuous suggestion is
42 * automatically activated for sequential calls that share the same starting inp ut.
43 * TODO: Stop detecting continuous suggestion. Start using traverseSession inste ad.
44 */
45 void Suggest::getSuggestions(ProximityInfo *pInfo, void *traverseSession,
46 int *inputXs, int *inputYs, int *times, int *pointerIds, int *inputCodeP oints,
47 int inputSize, const float languageWeight,
48 SuggestionResults *const outSuggestionResults) const {
49 PROF_OPEN;
50 PROF_START(0);
51 const float maxSpatialDistance = TRAVERSAL->getMaxSpatialDistance();
52 DicTraverseSession *tSession = static_cast<DicTraverseSession *>(traverseSes sion);
53 tSession->setupForGetSuggestions(pInfo, inputCodePoints, inputSize, inputXs, inputYs, times,
54 pointerIds, maxSpatialDistance, TRAVERSAL->getMaxPointerCount());
55 // TODO: Add the way to evaluate cache
56
57 initializeSearch(tSession);
58 PROF_END(0);
59 PROF_START(1);
60
61 // keep expanding search dicNodes until all have terminated.
62 while (tSession->getDicTraverseCache()->activeSize() > 0) {
63 expandCurrentDicNodes(tSession);
64 tSession->getDicTraverseCache()->advanceActiveDicNodes();
65 tSession->getDicTraverseCache()->advanceInputIndex(inputSize);
66 }
67 PROF_END(1);
68 PROF_START(2);
69 SuggestionsOutputUtils::outputSuggestions(
70 SCORING, tSession, languageWeight, outSuggestionResults);
71 PROF_END(2);
72 PROF_CLOSE;
73 }
74
75 /**
76 * Initializes the search at the root of the lexicon trie. Note that when possib le the search will
77 * continue suggestion from where it left off during the last call.
78 */
79 void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
80 if (!traverseSession->getProximityInfoState(0)->isUsed()) {
81 return;
82 }
83
84 if (traverseSession->getInputSize() > MIN_CONTINUOUS_SUGGESTION_INPUT_SIZE
85 && traverseSession->isContinuousSuggestionPossible()) {
86 // Continue suggestion
87 traverseSession->getDicTraverseCache()->continueSearch();
88 } else {
89 // Restart recognition at the root.
90 traverseSession->resetCache(TRAVERSAL->getMaxCacheSize(traverseSession-> getInputSize()),
91 TRAVERSAL->getTerminalCacheSize());
92 // Create a new dic node here
93 DicNode rootNode;
94 DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy() ,
95 traverseSession->getPrevWordsPtNodePos(), &rootNode);
96 traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
97 }
98 }
99
100 /**
101 * Expands the dicNodes in the current search priority queue by advancing to the possible child
102 * nodes based on the next touch point(s) (or no touch points for lookahead)
103 */
104 void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const {
105 const int inputSize = traverseSession->getInputSize();
106 DicNodeVector childDicNodes(TRAVERSAL->getDefaultExpandDicNodeSize());
107 DicNode correctionDicNode;
108
109 // TODO: Find more efficient caching
110 const bool shouldDepthLevelCache = TRAVERSAL->shouldDepthLevelCache(traverse Session);
111 if (shouldDepthLevelCache) {
112 traverseSession->getDicTraverseCache()->updateLastCachedInputIndex();
113 }
114 if (DEBUG_CACHE) {
115 AKLOGI("expandCurrentDicNodes depth level cache = %d, inputSize = %d",
116 shouldDepthLevelCache, inputSize);
117 }
118 while (traverseSession->getDicTraverseCache()->activeSize() > 0) {
119 DicNode dicNode;
120 traverseSession->getDicTraverseCache()->popActive(&dicNode);
121 if (dicNode.isTotalInputSizeExceedingLimit()) {
122 return;
123 }
124 childDicNodes.clear();
125 const int point0Index = dicNode.getInputIndex(0);
126 const bool canDoLookAheadCorrection =
127 TRAVERSAL->canDoLookAheadCorrection(traverseSession, &dicNode);
128 const bool isLookAheadCorrection = canDoLookAheadCorrection
129 && traverseSession->getDicTraverseCache()->
130 isLookAheadCorrectionInputIndex(static_cast<int>(point0I ndex));
131 const bool isCompletion = dicNode.isCompletion(inputSize);
132
133 const bool shouldNodeLevelCache =
134 TRAVERSAL->shouldNodeLevelCache(traverseSession, &dicNode);
135 if (shouldDepthLevelCache || shouldNodeLevelCache) {
136 if (DEBUG_CACHE) {
137 dicNode.dump("PUSH_CACHE");
138 }
139 traverseSession->getDicTraverseCache()->copyPushContinue(&dicNode);
140 dicNode.setCached();
141 }
142
143 if (dicNode.isInDigraph()) {
144 // Finish digraph handling if the node is in the middle of a digraph expansion.
145 processDicNodeAsDigraph(traverseSession, &dicNode);
146 } else if (isLookAheadCorrection) {
147 // The algorithm maintains a small set of "deferred" nodes that have not consumed the
148 // latest touch point yet. These are needed to apply look-ahead corr ection operations
149 // that require special handling of the latest touch point. For exam ple, with insertions
150 // (e.g., "thiis" -> "this") the latest touch point should not be co nsumed at all.
151 processDicNodeAsTransposition(traverseSession, &dicNode);
152 processDicNodeAsInsertion(traverseSession, &dicNode);
153 } else { // !isLookAheadCorrection
154 // Only consider typing error corrections if the normalized compound distance is
155 // below a spatial distance threshold.
156 // NOTE: the threshold may need to be updated if scoring model chang es.
157 // TODO: Remove. Do not prune node here.
158 const bool allowsErrorCorrections = TRAVERSAL->allowsErrorCorrection s(&dicNode);
159 // Process for handling space substitution (e.g., hevis => he is)
160 if (allowsErrorCorrections
161 && TRAVERSAL->isSpaceSubstitutionTerminal(traverseSession, & dicNode)) {
162 createNextWordDicNode(traverseSession, &dicNode, true /* spaceSu bstitution */);
163 }
164
165 DicNodeUtils::getAllChildDicNodes(
166 &dicNode, traverseSession->getDictionaryStructurePolicy(), & childDicNodes);
167
168 const int childDicNodesSize = childDicNodes.getSizeAndLock();
169 for (int i = 0; i < childDicNodesSize; ++i) {
170 DicNode *const childDicNode = childDicNodes[i];
171 if (isCompletion) {
172 // Handle forward lookahead when the lexicon letter exceeds the input size.
173 processDicNodeAsMatch(traverseSession, childDicNode);
174 continue;
175 }
176 if (DigraphUtils::hasDigraphForCodePoint(
177 traverseSession->getDictionaryStructurePolicy()
178 ->getHeaderStructurePolicy(),
179 childDicNode->getNodeCodePoint())) {
180 correctionDicNode.initByCopy(childDicNode);
181 correctionDicNode.advanceDigraphIndex();
182 processDicNodeAsDigraph(traverseSession, &correctionDicNode) ;
183 }
184 if (TRAVERSAL->isOmission(traverseSession, &dicNode, childDicNod e,
185 allowsErrorCorrections)) {
186 // TODO: (Gesture) Change weight between omission and substi tution errors
187 // TODO: (Gesture) Terminal node should not be handled as om ission
188 correctionDicNode.initByCopy(childDicNode);
189 processDicNodeAsOmission(traverseSession, &correctionDicNode );
190 }
191 const ProximityType proximityType = TRAVERSAL->getProximityType(
192 traverseSession, &dicNode, childDicNode);
193 switch (proximityType) {
194 // TODO: Consider the difference of proximityType here
195 case MATCH_CHAR:
196 case PROXIMITY_CHAR:
197 processDicNodeAsMatch(traverseSession, childDicNode);
198 break;
199 case ADDITIONAL_PROXIMITY_CHAR:
200 if (allowsErrorCorrections) {
201 processDicNodeAsAdditionalProximityChar(traverseSess ion, &dicNode,
202 childDicNode);
203 }
204 break;
205 case SUBSTITUTION_CHAR:
206 if (allowsErrorCorrections) {
207 processDicNodeAsSubstitution(traverseSession, &dicNo de, childDicNode);
208 }
209 break;
210 case UNRELATED_CHAR:
211 // Just drop this dicNode and do nothing.
212 break;
213 default:
214 // Just drop this dicNode and do nothing.
215 break;
216 }
217 }
218
219 // Push the dicNode for look-ahead correction
220 if (allowsErrorCorrections && canDoLookAheadCorrection) {
221 traverseSession->getDicTraverseCache()->copyPushNextActive(&dicN ode);
222 }
223 }
224 }
225 }
226
227 void Suggest::processTerminalDicNode(
228 DicTraverseSession *traverseSession, DicNode *dicNode) const {
229 if (dicNode->getCompoundDistance() >= static_cast<float>(MAX_VALUE_FOR_WEIGH TING)) {
230 return;
231 }
232 if (!dicNode->isTerminalDicNode()) {
233 return;
234 }
235 if (dicNode->shouldBeFilteredBySafetyNetForBigram()) {
236 return;
237 }
238 if (!dicNode->hasMatchedOrProximityCodePoints()) {
239 return;
240 }
241 // Create a non-cached node here.
242 DicNode terminalDicNode(*dicNode);
243 if (TRAVERSAL->needsToTraverseAllUserInput()
244 && dicNode->getInputIndex(0) < traverseSession->getInputSize()) {
245 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL_INSERTION, traverseSession, 0,
246 &terminalDicNode, traverseSession->getMultiBigramMap());
247 }
248 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TERMINAL, traverseSessi on, 0,
249 &terminalDicNode, traverseSession->getMultiBigramMap());
250 traverseSession->getDicTraverseCache()->copyPushTerminal(&terminalDicNode);
251 }
252
253 /**
254 * Adds the expanded dicNode to the next search priority queue. Also creates an additional next word
255 * (by the space omission error correction) search path if input dicNode is on a terminal.
256 */
257 void Suggest::processExpandedDicNode(
258 DicTraverseSession *traverseSession, DicNode *dicNode) const {
259 processTerminalDicNode(traverseSession, dicNode);
260 if (dicNode->getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHT ING)) {
261 if (TRAVERSAL->isSpaceOmissionTerminal(traverseSession, dicNode)) {
262 createNextWordDicNode(traverseSession, dicNode, false /* spaceSubsti tution */);
263 }
264 const int allowsLookAhead = !(dicNode->hasMultipleWords()
265 && dicNode->isCompletion(traverseSession->getInputSize()));
266 if (dicNode->hasChildren() && allowsLookAhead) {
267 traverseSession->getDicTraverseCache()->copyPushNextActive(dicNode);
268 }
269 }
270 }
271
272 void Suggest::processDicNodeAsMatch(DicTraverseSession *traverseSession,
273 DicNode *childDicNode) const {
274 weightChildNode(traverseSession, childDicNode);
275 processExpandedDicNode(traverseSession, childDicNode);
276 }
277
278 void Suggest::processDicNodeAsAdditionalProximityChar(DicTraverseSession *traver seSession,
279 DicNode *dicNode, DicNode *childDicNode) const {
280 // Note: Most types of corrections don't need to look up the bigram informat ion since they do
281 // not treat the node as a terminal. There is no need to pass the bigram map in these cases.
282 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_ADDITIONAL_PROXIMITY,
283 traverseSession, dicNode, childDicNode, 0 /* multiBigramMap */);
284 weightChildNode(traverseSession, childDicNode);
285 processExpandedDicNode(traverseSession, childDicNode);
286 }
287
288 void Suggest::processDicNodeAsSubstitution(DicTraverseSession *traverseSession,
289 DicNode *dicNode, DicNode *childDicNode) const {
290 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_SUBSTITUTION, traverseS ession,
291 dicNode, childDicNode, 0 /* multiBigramMap */);
292 weightChildNode(traverseSession, childDicNode);
293 processExpandedDicNode(traverseSession, childDicNode);
294 }
295
296 // Process the DicNode codepoint as a digraph. This means that composite glyphs like the German
297 // u-umlaut is expanded to the transliteration "ue". Note that this happens in p arallel with
298 // the normal non-digraph traversal, so both "uber" and "ueber" can be corrected to "[u-umlaut]ber".
299 void Suggest::processDicNodeAsDigraph(DicTraverseSession *traverseSession,
300 DicNode *childDicNode) const {
301 weightChildNode(traverseSession, childDicNode);
302 childDicNode->advanceDigraphIndex();
303 processExpandedDicNode(traverseSession, childDicNode);
304 }
305
306 /**
307 * Handle the dicNode as an omission error (e.g., ths => this). Skip the current letter and consider
308 * matches for all possible next letters. Note that just skipping the current le tter without any
309 * other conditions tends to flood the search DicNodes cache with omission DicNo des. Instead, check
310 * the possible *next* letters after the omission to better limit search to plau sible omissions.
311 * Note that apostrophes are handled as omissions.
312 */
313 void Suggest::processDicNodeAsOmission(
314 DicTraverseSession *traverseSession, DicNode *dicNode) const {
315 DicNodeVector childDicNodes;
316 DicNodeUtils::getAllChildDicNodes(
317 dicNode, traverseSession->getDictionaryStructurePolicy(), &childDicN odes);
318
319 const int size = childDicNodes.getSizeAndLock();
320 for (int i = 0; i < size; i++) {
321 DicNode *const childDicNode = childDicNodes[i];
322 // Treat this word as omission
323 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_OMISSION, traverseS ession,
324 dicNode, childDicNode, 0 /* multiBigramMap */);
325 weightChildNode(traverseSession, childDicNode);
326 if (!TRAVERSAL->isPossibleOmissionChildNode(traverseSession, dicNode, ch ildDicNode)) {
327 continue;
328 }
329 processExpandedDicNode(traverseSession, childDicNode);
330 }
331 }
332
333 /**
334 * Handle the dicNode as an insertion error (e.g., thiis => this). Skip the curr ent touch point and
335 * consider matches for the next touch point.
336 */
337 void Suggest::processDicNodeAsInsertion(DicTraverseSession *traverseSession,
338 DicNode *dicNode) const {
339 const int16_t pointIndex = dicNode->getInputIndex(0);
340 DicNodeVector childDicNodes;
341 DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStr ucturePolicy(),
342 &childDicNodes);
343 const int size = childDicNodes.getSizeAndLock();
344 for (int i = 0; i < size; i++) {
345 if (traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(poi ntIndex + 1)
346 != childDicNodes[i]->getNodeCodePoint()) {
347 continue;
348 }
349 DicNode *const childDicNode = childDicNodes[i];
350 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_INSERTION, traverse Session,
351 dicNode, childDicNode, 0 /* multiBigramMap */);
352 processExpandedDicNode(traverseSession, childDicNode);
353 }
354 }
355
356 /**
357 * Handle the dicNode as a transposition error (e.g., thsi => this). Swap the ne xt two touch points.
358 */
359 void Suggest::processDicNodeAsTransposition(DicTraverseSession *traverseSession,
360 DicNode *dicNode) const {
361 const int16_t pointIndex = dicNode->getInputIndex(0);
362 DicNodeVector childDicNodes1;
363 DicNodeVector childDicNodes2;
364 DicNodeUtils::getAllChildDicNodes(dicNode, traverseSession->getDictionaryStr ucturePolicy(),
365 &childDicNodes1);
366 const int childSize1 = childDicNodes1.getSizeAndLock();
367 for (int i = 0; i < childSize1; i++) {
368 const ProximityType matchedId1 = traverseSession->getProximityInfoState( 0)
369 ->getProximityType(pointIndex + 1, childDicNodes1[i]->getNodeCod ePoint(),
370 true /* checkProximityChars */);
371 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId1)) {
372 continue;
373 }
374 if (childDicNodes1[i]->hasChildren()) {
375 childDicNodes2.clear();
376 DicNodeUtils::getAllChildDicNodes(childDicNodes1[i],
377 traverseSession->getDictionaryStructurePolicy(), &childDicNo des2);
378 const int childSize2 = childDicNodes2.getSizeAndLock();
379 for (int j = 0; j < childSize2; j++) {
380 DicNode *const childDicNode2 = childDicNodes2[j];
381 const ProximityType matchedId2 = traverseSession->getProximityIn foState(0)
382 ->getProximityType(pointIndex, childDicNode2->getNodeCod ePoint(),
383 true /* checkProximityChars */);
384 if (!ProximityInfoUtils::isMatchOrProximityChar(matchedId2)) {
385 continue;
386 }
387 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_TRANSPOSITI ON,
388 traverseSession, childDicNodes1[i], childDicNode2, 0 /* multiBigramMap */);
389 processExpandedDicNode(traverseSession, childDicNode2);
390 }
391 }
392 }
393 }
394
395 /**
396 * Weight child dicNode by aligning it to the key
397 */
398 void Suggest::weightChildNode(DicTraverseSession *traverseSession, DicNode *dicN ode) const {
399 const int inputSize = traverseSession->getInputSize();
400 if (dicNode->isCompletion(inputSize)) {
401 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_COMPLETION, travers eSession,
402 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
403 } else { // completion
404 Weighting::addCostAndForwardInputIndex(WEIGHTING, CT_MATCH, traverseSess ion,
405 0 /* parentDicNode */, dicNode, 0 /* multiBigramMap */);
406 }
407 }
408
409 /**
410 * Creates a new dicNode that represents a space insertion at the end of the inp ut dicNode. Also
411 * incorporates the unigram / bigram score for the ending word into the new dicN ode.
412 */
413 void Suggest::createNextWordDicNode(DicTraverseSession *traverseSession, DicNode *dicNode,
414 const bool spaceSubstitution) const {
415 if (!TRAVERSAL->isGoodToTraverseNextWord(dicNode)) {
416 return;
417 }
418
419 // Create a non-cached node here.
420 DicNode newDicNode;
421 DicNodeUtils::initAsRootWithPreviousWord(
422 traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNod e);
423 const CorrectionType correctionType = spaceSubstitution ?
424 CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
425 Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSe ssion, dicNode,
426 &newDicNode, traverseSession->getMultiBigramMap());
427 if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIG HTING)) {
428 // newDicNode is worth continuing to traverse.
429 // CAVEAT: This pruning is important for speed. Remove this when we can afford not to prune
430 // here because here is not the right place to do pruning. Pruning shoul d take place only
431 // in DicNodePriorityQueue.
432 traverseSession->getDicTraverseCache()->copyPushNextActive(&newDicNode);
433 }
434 }
435 } // namespace latinime
OLDNEW
« no previous file with comments | « third_party/android_prediction/suggest/core/suggest.h ('k') | third_party/android_prediction/suggest/core/suggest_interface.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698