Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Unified Diff: third_party/prediction/suggest/core/session/prev_words_info.h

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/prediction/suggest/core/session/prev_words_info.h
diff --git a/third_party/prediction/suggest/core/session/prev_words_info.h b/third_party/prediction/suggest/core/session/prev_words_info.h
new file mode 100644
index 0000000000000000000000000000000000000000..b498a2a71cc428b8d3dfb44a9ff9b55be2c90cba
--- /dev/null
+++ b/third_party/prediction/suggest/core/session/prev_words_info.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_PREV_WORDS_INFO_H
+#define LATINIME_PREV_WORDS_INFO_H
+
+#include "third_party/prediction/defines.h"
+#include "third_party/prediction/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
+#include "third_party/prediction/suggest/core/policy/dictionary_structure_with_buffer_policy.h"
+#include "third_party/prediction/utils/char_utils.h"
+
+namespace latinime {
+
+// TODO: Support n-gram.
+class PrevWordsInfo {
+ public:
+ // No prev word information.
+ PrevWordsInfo() { clear(); }
+
+ PrevWordsInfo(PrevWordsInfo&& prevWordsInfo) {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
+ memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
+ mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from previous words.
+ PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
+ const int* const prevWordCodePointCount,
+ const bool* const isBeginningOfSentence,
+ const size_t prevWordCount) {
+ clear();
+ for (size_t i = 0; i < std::min(NELEMS(mPrevWordCodePoints), prevWordCount);
+ ++i) {
+ if (prevWordCodePointCount[i] < 0 ||
+ prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
+ continue;
+ }
+ memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
+ sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
+ mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
+ mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
+ }
+ }
+
+ // Construct from a previous word.
+ PrevWordsInfo(const int* const prevWordCodePoints,
+ const int prevWordCodePointCount,
+ const bool isBeginningOfSentence) {
+ clear();
+ if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
+ return;
+ }
+ memmove(mPrevWordCodePoints[0], prevWordCodePoints,
+ sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
+ mPrevWordCodePointCount[0] = prevWordCodePointCount;
+ mIsBeginningOfSentence[0] = isBeginningOfSentence;
+ }
+
+ bool isValid() const {
+ if (mPrevWordCodePointCount[0] > 0) {
+ return true;
+ }
+ if (mIsBeginningOfSentence[0]) {
+ return true;
+ }
+ return false;
+ }
+
+ void getPrevWordsTerminalPtNodePos(
+ const DictionaryStructureWithBufferPolicy* const dictStructurePolicy,
+ int* const outPrevWordsTerminalPtNodePos,
+ const bool tryLowerCaseSearch) const {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ if (mPrevWordCodePointCount[i] == 0) {
+ outPrevWordsTerminalPtNodePos[i] = NOT_A_DICT_POS;
+ break;
+ }
+ outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(
+ dictStructurePolicy, mPrevWordCodePoints[i],
+ mPrevWordCodePointCount[i], mIsBeginningOfSentence[i],
+ tryLowerCaseSearch);
+ }
+ }
+
+ // n is 1-indexed.
+ const int* getNthPrevWordCodePoints(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return nullptr;
+ }
+ return mPrevWordCodePoints[n - 1];
+ }
+
+ // n is 1-indexed.
+ int getNthPrevWordCodePointCount(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return 0;
+ }
+ return mPrevWordCodePointCount[n - 1];
+ }
+
+ // n is 1-indexed.
+ bool isNthPrevWordBeginningOfSentence(const int n) const {
+ if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
+ return false;
+ }
+ return mIsBeginningOfSentence[n - 1];
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
+
+ static int getTerminalPtNodePosOfWord(
+ const DictionaryStructureWithBufferPolicy* const dictStructurePolicy,
+ const int* const wordCodePoints,
+ const int wordCodePointCount,
+ const bool isBeginningOfSentence,
+ const bool tryLowerCaseSearch) {
+ if (!dictStructurePolicy || !wordCodePoints ||
+ wordCodePointCount > MAX_WORD_LENGTH) {
+ return NOT_A_DICT_POS;
+ }
+ int codePoints[MAX_WORD_LENGTH];
+ int codePointCount = wordCodePointCount;
+ memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
+ if (isBeginningOfSentence) {
+ codePointCount = CharUtils::attachBeginningOfSentenceMarker(
+ codePoints, codePointCount, MAX_WORD_LENGTH);
+ if (codePointCount <= 0) {
+ return NOT_A_DICT_POS;
+ }
+ }
+ const int wordPtNodePos =
+ dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, false /* forceLowerCaseSearch */);
+ if (wordPtNodePos != NOT_A_DICT_POS || !tryLowerCaseSearch) {
+ // Return the position when when the word was found or doesn't try lower
+ // case
+ // search.
+ return wordPtNodePos;
+ }
+ // Check bigrams for lower-cased previous word if original was not found.
+ // Useful for
+ // auto-capitalized words like "The [current_word]".
+ return dictStructurePolicy->getTerminalPtNodePositionOfWord(
+ codePoints, codePointCount, true /* forceLowerCaseSearch */);
+ }
+
+ void clear() {
+ for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
+ mPrevWordCodePointCount[i] = 0;
+ mIsBeginningOfSentence[i] = false;
+ }
+ }
+
+ int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
+ int mPrevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+};
+} // namespace latinime
+#endif // LATINIME_PREV_WORDS_INFO_H

Powered by Google App Engine
This is Rietveld 408576698