| OLD | NEW |
| (Empty) | |
| 1 /* |
| 2 * Copyright (C) 2013 The Android Open Source Project |
| 3 * |
| 4 * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 * you may not use this file except in compliance with the License. |
| 6 * You may obtain a copy of the License at |
| 7 * |
| 8 * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 * |
| 10 * Unless required by applicable law or agreed to in writing, software |
| 11 * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 * See the License for the specific language governing permissions and |
| 14 * limitations under the License. |
| 15 */ |
| 16 |
| 17 #include "third_party/prediction/utils/autocorrection_threshold_utils.h" |
| 18 |
| 19 #include <algorithm> |
| 20 #include <cmath> |
| 21 |
| 22 #include "third_party/prediction/defines.h" |
| 23 #include "third_party/prediction/suggest/policyimpl/utils/edit_distance.h" |
| 24 #include "third_party/prediction/suggest/policyimpl/utils/damerau_levenshtein_ed
it_distance_policy.h" |
| 25 |
| 26 namespace latinime { |
| 27 |
| 28 const int AutocorrectionThresholdUtils::MAX_INITIAL_SCORE = 255; |
| 29 const int AutocorrectionThresholdUtils::TYPED_LETTER_MULTIPLIER = 2; |
| 30 const int AutocorrectionThresholdUtils::FULL_WORD_MULTIPLIER = 2; |
| 31 |
| 32 /* static */ int AutocorrectionThresholdUtils::editDistance( |
| 33 const int* before, |
| 34 const int beforeLength, |
| 35 const int* after, |
| 36 const int afterLength) { |
| 37 const DamerauLevenshteinEditDistancePolicy daemaruLevenshtein( |
| 38 before, beforeLength, after, afterLength); |
| 39 return static_cast<int>(EditDistance::getEditDistance(&daemaruLevenshtein)); |
| 40 } |
| 41 |
| 42 // In dictionary.cpp, getSuggestion() method, |
| 43 // When USE_SUGGEST_INTERFACE_FOR_TYPING is true: |
| 44 // |
| 45 // // TODO: Revise the following logic thoroughly by referring to the logic |
| 46 // // marked as "Otherwise" below. |
| 47 // SUGGEST_INTERFACE_OUTPUT_SCALE was multiplied to the original suggestion |
| 48 // scores to convert |
| 49 // them to integers. |
| 50 // score = (int)((original score) * SUGGEST_INTERFACE_OUTPUT_SCALE) |
| 51 // Undo the scaling here to recover the original score. |
| 52 // normalizedScore = ((float)score) / SUGGEST_INTERFACE_OUTPUT_SCALE |
| 53 // |
| 54 // Otherwise: suggestion scores are computed using the below formula. |
| 55 // original score |
| 56 // := powf(mTypedLetterMultiplier (this is defined 2), |
| 57 // (the number of matched characters between typed word and suggested |
| 58 // word)) |
| 59 // * (individual word's score which defined in the unigram dictionary, |
| 60 // and this score is defined in range [0, 255].) |
| 61 // Then, the following processing is applied. |
| 62 // - If the dictionary word is matched up to the point of the user entry |
| 63 // (full match up to min(before.length(), after.length()) |
| 64 // => Then multiply by FULL_MATCHED_WORDS_PROMOTION_RATE (this is defined |
| 65 // 1.2) |
| 66 // - If the word is a true full match except for differences in accents or |
| 67 // capitalization, then treat it as if the score was 255. |
| 68 // - If before.length() == after.length() |
| 69 // => multiply by mFullWordMultiplier (this is defined 2)) |
| 70 // So, maximum original score is powf(2, min(before.length(), after.length())) * |
| 71 // 255 * 2 * 1.2 |
| 72 // For historical reasons we ignore the 1.2 modifier (because the measure for a |
| 73 // good |
| 74 // autocorrection threshold was done at a time when it didn't exist). This |
| 75 // doesn't change |
| 76 // the result. |
| 77 // So, we can normalize original score by dividing powf(2, min(b.l(),a.l())) * |
| 78 // 255 * 2. |
| 79 |
| 80 /* static */ float AutocorrectionThresholdUtils::calcNormalizedScore( |
| 81 const int* before, |
| 82 const int beforeLength, |
| 83 const int* after, |
| 84 const int afterLength, |
| 85 const int score) { |
| 86 if (0 == beforeLength || 0 == afterLength) { |
| 87 return 0.0f; |
| 88 } |
| 89 const int distance = editDistance(before, beforeLength, after, afterLength); |
| 90 int spaceCount = 0; |
| 91 for (int i = 0; i < afterLength; ++i) { |
| 92 if (after[i] == KEYCODE_SPACE) { |
| 93 ++spaceCount; |
| 94 } |
| 95 } |
| 96 |
| 97 if (spaceCount == afterLength) { |
| 98 return 0.0f; |
| 99 } |
| 100 |
| 101 if (score <= 0 || distance >= afterLength) { |
| 102 // normalizedScore must be 0.0f (the minimum value) if the score is less |
| 103 // than or equal to 0, |
| 104 // or if the edit distance is larger than or equal to afterLength. |
| 105 return 0.0f; |
| 106 } |
| 107 // add a weight based on edit distance. |
| 108 const float weight = |
| 109 1.0f - static_cast<float>(distance) / static_cast<float>(afterLength); |
| 110 |
| 111 // TODO: Revise the following logic thoroughly by referring to... |
| 112 if (true /* USE_SUGGEST_INTERFACE_FOR_TYPING */) { |
| 113 return (static_cast<float>(score) / SUGGEST_INTERFACE_OUTPUT_SCALE) * |
| 114 weight; |
| 115 } |
| 116 // ...this logic. |
| 117 const float maxScore = |
| 118 score >= S_INT_MAX |
| 119 ? static_cast<float>(S_INT_MAX) |
| 120 : static_cast<float>(MAX_INITIAL_SCORE) * |
| 121 powf(static_cast<float>(TYPED_LETTER_MULTIPLIER), |
| 122 static_cast<float>( |
| 123 std::min(beforeLength, afterLength - spaceCount))) * |
| 124 static_cast<float>(FULL_WORD_MULTIPLIER); |
| 125 |
| 126 return (static_cast<float>(score) / maxScore) * weight; |
| 127 } |
| 128 |
| 129 } // namespace latinime |
| OLD | NEW |