Index: third_party/android_prediction/suggest/core/session/prev_words_info.h |
diff --git a/third_party/android_prediction/suggest/core/session/prev_words_info.h b/third_party/android_prediction/suggest/core/session/prev_words_info.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..fddac8c415c834060319cb61db6f83782858558c |
--- /dev/null |
+++ b/third_party/android_prediction/suggest/core/session/prev_words_info.h |
@@ -0,0 +1,162 @@ |
+/* |
+ * Copyright (C) 2014 The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#ifndef LATINIME_PREV_WORDS_INFO_H |
+#define LATINIME_PREV_WORDS_INFO_H |
+ |
+#include "third_party/android_prediction/defines.h" |
+#include "third_party/android_prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" |
+#include "third_party/android_prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h" |
+#include "third_party/android_prediction/utils/char_utils.h" |
+ |
+namespace latinime { |
+ |
+// TODO: Support n-gram. |
+class PrevWordsInfo { |
+ public: |
+ // No prev word information. |
+ PrevWordsInfo() { |
+ clear(); |
+ } |
+ |
+ PrevWordsInfo(PrevWordsInfo &&prevWordsInfo) { |
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { |
+ mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i]; |
+ memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i], |
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]); |
+ mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i]; |
+ } |
+ } |
+ |
+ // Construct from previous words. |
+ PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH], |
+ const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, |
+ const size_t prevWordCount) { |
+ clear(); |
+ for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount); ++i) { |
+ if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) { |
+ continue; |
+ } |
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i], |
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]); |
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i]; |
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i]; |
+ } |
+ } |
+ |
+ // Construct from a previous word. |
+ PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount, |
+ const bool isBeginningOfSentence) { |
+ clear(); |
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) { |
+ return; |
+ } |
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints, |
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount); |
+ mPrevWordCodePointCount[0] = prevWordCodePointCount; |
+ mIsBeginningOfSentence[0] = isBeginningOfSentence; |
+ } |
+ |
+ bool isValid() const { |
+ if (mPrevWordCodePointCount[0] > 0) { |
+ return true; |
+ } |
+ if (mIsBeginningOfSentence[0]) { |
+ return true; |
+ } |
+ return false; |
+ } |
+ |
+ void getPrevWordsTerminalPtNodePos( |
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, |
+ int *const outPrevWordsTerminalPtNodePos, const bool tryLowerCaseSearch) const { |
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { |
+ outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, |
+ mPrevWordCodePoints[i], mPrevWordCodePointCount[i], |
+ mIsBeginningOfSentence[i], tryLowerCaseSearch); |
+ } |
+ } |
+ |
+ // n is 1-indexed. |
+ const int *getNthPrevWordCodePoints(const int n) const { |
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { |
+ return nullptr; |
+ } |
+ return mPrevWordCodePoints[n - 1]; |
+ } |
+ |
+ // n is 1-indexed. |
+ int getNthPrevWordCodePointCount(const int n) const { |
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { |
+ return 0; |
+ } |
+ return mPrevWordCodePointCount[n - 1]; |
+ } |
+ |
+ // n is 1-indexed. |
+ bool isNthPrevWordBeginningOfSentence(const int n) const { |
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) { |
+ return false; |
+ } |
+ return mIsBeginningOfSentence[n - 1]; |
+ } |
+ |
+ private: |
+ DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); |
+ |
+ static int getTerminalPtNodePosOfWord( |
+ const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, |
+ const int *const wordCodePoints, const int wordCodePointCount, |
+ const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { |
+ if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { |
+ return NOT_A_DICT_POS; |
+ } |
+ int codePoints[MAX_WORD_LENGTH]; |
+ int codePointCount = wordCodePointCount; |
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount); |
+ if (isBeginningOfSentence) { |
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, |
+ codePointCount, MAX_WORD_LENGTH); |
+ if (codePointCount <= 0) { |
+ return NOT_A_DICT_POS; |
+ } |
+ } |
+ const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( |
+ codePoints, codePointCount, false /* forceLowerCaseSearch */); |
+ if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) { |
+ // Return the position when when the word was found or doesn't try lower case |
+ // search. |
+ return wordPtNodePos; |
+ } |
+ // Check bigrams for lower-cased previous word if original was not found. Useful for |
+ // auto-capitalized words like "The [current_word]". |
+ return dictStructurePolicy->getTerminalPtNodePositionOfWord( |
+ codePoints, codePointCount, true /* forceLowerCaseSearch */); |
+ } |
+ |
+ void clear() { |
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { |
+ mPrevWordCodePointCount[i] = 0; |
+ mIsBeginningOfSentence[i] = false; |
+ } |
+ } |
+ |
+ int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; |
+ int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
+ bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
+}; |
+} // namespace latinime |
+#endif // LATINIME_PREV_WORDS_INFO_H |