Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Unified Diff: third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
diff --git a/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp b/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..51b6745fd8ff2774ecd63df5e3b611d9c318709b
--- /dev/null
+++ b/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+
+#include "third_party/prediction/defines.h"
+#include "third_party/prediction/suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "third_party/prediction/suggest/core/policy/dictionary_shortcuts_structure_policy.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/utils/byte_array_utils.h"
+
+namespace latinime {
+
+typedef PatriciaTrieReadingUtils PtReadingUtils;
+
+const PtReadingUtils::NodeFlags PtReadingUtils::MASK_CHILDREN_POSITION_TYPE =
+ 0xC0;
+const PtReadingUtils::NodeFlags
+ PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_NOPOSITION = 0x00;
+const PtReadingUtils::NodeFlags
+ PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_ONEBYTE = 0x40;
+const PtReadingUtils::NodeFlags
+ PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_TWOBYTES = 0x80;
+const PtReadingUtils::NodeFlags
+ PtReadingUtils::FLAG_CHILDREN_POSITION_TYPE_THREEBYTES = 0xC0;
+
+// Flag for single/multiple char group
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_MULTIPLE_CHARS = 0x20;
+// Flag for terminal PtNodes
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_TERMINAL = 0x10;
+// Flag for shortcut targets presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_SHORTCUT_TARGETS =
+ 0x08;
+// Flag for bigram presence
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_HAS_BIGRAMS = 0x04;
+// Flag for non-words (typically, shortcut only entries)
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_NOT_A_WORD = 0x02;
+// Flag for blacklist
+const PtReadingUtils::NodeFlags PtReadingUtils::FLAG_IS_BLACKLISTED = 0x01;
+
+/* static */ int PtReadingUtils::getPtNodeArraySizeAndAdvancePosition(
+ const uint8_t* const buffer,
+ int* const pos) {
+ const uint8_t firstByte =
+ ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ if (firstByte < 0x80) {
+ return firstByte;
+ } else {
+ return ((firstByte & 0x7F) << 8) ^
+ ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ }
+}
+
+/* static */ PtReadingUtils::NodeFlags
+PtReadingUtils::getFlagsAndAdvancePosition(const uint8_t* const buffer,
+ int* const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::getCodePointAndAdvancePosition(
+ const uint8_t* const buffer,
+ int* const pos) {
+ return ByteArrayUtils::readCodePointAndAdvancePosition(buffer, pos);
+}
+
+// Returns the number of read characters.
+/* static */ int PtReadingUtils::getCharsAndAdvancePosition(
+ const uint8_t* const buffer,
+ const NodeFlags flags,
+ const int maxLength,
+ int* const outBuffer,
+ int* const pos) {
+ int length = 0;
+ if (hasMultipleChars(flags)) {
+ length = ByteArrayUtils::readStringAndAdvancePosition(buffer, maxLength,
+ outBuffer, pos);
+ } else {
+ const int codePoint = getCodePointAndAdvancePosition(buffer, pos);
+ if (codePoint == NOT_A_CODE_POINT) {
+ // CAVEAT: codePoint == NOT_A_CODE_POINT means the code point is
+ // CHARACTER_ARRAY_TERMINATOR. The code point must not be
+ // CHARACTER_ARRAY_TERMINATOR
+ // when the PtNode has a single code point.
+ length = 0;
+ AKLOGE(
+ "codePoint is NOT_A_CODE_POINT. pos: %d, codePoint: 0x%x, buffer[pos "
+ "- 1]: 0x%x",
+ *pos - 1, codePoint, buffer[*pos - 1]);
+ ASSERT(false);
+ } else if (maxLength > 0) {
+ outBuffer[0] = codePoint;
+ length = 1;
+ }
+ }
+ return length;
+}
+
+// Returns the number of skipped characters.
+/* static */ int PtReadingUtils::skipCharacters(const uint8_t* const buffer,
+ const NodeFlags flags,
+ const int maxLength,
+ int* const pos) {
+ if (hasMultipleChars(flags)) {
+ return ByteArrayUtils::advancePositionToBehindString(buffer, maxLength,
+ pos);
+ } else {
+ if (maxLength > 0) {
+ getCodePointAndAdvancePosition(buffer, pos);
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+}
+
+/* static */ int PtReadingUtils::readProbabilityAndAdvancePosition(
+ const uint8_t* const buffer,
+ int* const pos) {
+ return ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+}
+
+/* static */ int PtReadingUtils::readChildrenPositionAndAdvancePosition(
+ const uint8_t* const buffer,
+ const NodeFlags flags,
+ int* const pos) {
+ const int base = *pos;
+ int offset = 0;
+ switch (MASK_CHILDREN_POSITION_TYPE & flags) {
+ case FLAG_CHILDREN_POSITION_TYPE_ONEBYTE:
+ offset = ByteArrayUtils::readUint8AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_TWOBYTES:
+ offset = ByteArrayUtils::readUint16AndAdvancePosition(buffer, pos);
+ break;
+ case FLAG_CHILDREN_POSITION_TYPE_THREEBYTES:
+ offset = ByteArrayUtils::readUint24AndAdvancePosition(buffer, pos);
+ break;
+ default:
+ // If we come here, it means we asked for the children of a word with
+ // no children.
+ return NOT_A_DICT_POS;
+ }
+ return base + offset;
+}
+
+/* static */ void PtReadingUtils::readPtNodeInfo(
+ const uint8_t* const dictBuf,
+ const int ptNodePos,
+ const DictionaryShortcutsStructurePolicy* const shortcutPolicy,
+ const DictionaryBigramsStructurePolicy* const bigramPolicy,
+ NodeFlags* const outFlags,
+ int* const outCodePointCount,
+ int* const outCodePoint,
+ int* const outProbability,
+ int* const outChildrenPos,
+ int* const outShortcutPos,
+ int* const outBigramPos,
+ int* const outSiblingPos) {
+ int readingPos = ptNodePos;
+ const NodeFlags flags = getFlagsAndAdvancePosition(dictBuf, &readingPos);
+ *outFlags = flags;
+ *outCodePointCount = getCharsAndAdvancePosition(
+ dictBuf, flags, MAX_WORD_LENGTH, outCodePoint, &readingPos);
+ *outProbability = isTerminal(flags) ? readProbabilityAndAdvancePosition(
+ dictBuf, &readingPos)
+ : NOT_A_PROBABILITY;
+ *outChildrenPos =
+ hasChildrenInFlags(flags)
+ ? readChildrenPositionAndAdvancePosition(dictBuf, flags, &readingPos)
+ : NOT_A_DICT_POS;
+ *outShortcutPos = NOT_A_DICT_POS;
+ if (hasShortcutTargets(flags)) {
+ *outShortcutPos = readingPos;
+ shortcutPolicy->skipAllShortcuts(&readingPos);
+ }
+ *outBigramPos = NOT_A_DICT_POS;
+ if (hasBigrams(flags)) {
+ *outBigramPos = readingPos;
+ bigramPolicy->skipAllBigrams(&readingPos);
+ }
+ *outSiblingPos = readingPos;
+}
+
+} // namespace latinime

Powered by Google App Engine
This is Rietveld 408576698