Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(807)

Unified Diff: third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
diff --git a/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49ef0bbb91aaba9af4561a17c38239907805de08
--- /dev/null
+++ b/third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h"
+
+#include "third_party/prediction/suggest/core/dictionary/property/unigram_property.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h"
+#include "third_party/prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
+
+bool DynamicPtUpdatingHelper::addUnigramWord(
+ DynamicPtReadingHelper* const readingHelper,
+ const int* const wordCodePoints,
+ const int codePointCount,
+ const UnigramProperty* const unigramProperty,
+ bool* const outAddedNewUnigram) {
+ int parentPos = NOT_A_DICT_POS;
+ while (!readingHelper->isEnd()) {
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
+ if (!ptNodeParams.isValid()) {
+ break;
+ }
+ const int matchedCodePointCount =
+ readingHelper->getPrevTotalCodePointCount();
+ if (!readingHelper->isMatchedCodePoint(
+ ptNodeParams, 0 /* index */,
+ wordCodePoints[matchedCodePointCount])) {
+ // The first code point is different from target code point. Skip this
+ // node and read
+ // the next sibling node.
+ readingHelper->readNextSiblingNode(ptNodeParams);
+ continue;
+ }
+ // Check following merged node code points.
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount();
+ for (int j = 1; j < nodeCodePointCount; ++j) {
+ const int nextIndex = matchedCodePointCount + j;
+ if (nextIndex >= codePointCount ||
+ !readingHelper->isMatchedCodePoint(
+ ptNodeParams, j, wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
+ return reallocatePtNodeAndAddNewPtNodes(
+ &ptNodeParams, j, unigramProperty,
+ wordCodePoints + matchedCodePointCount,
+ codePointCount - matchedCodePointCount);
+ }
+ }
+ // All characters are matched.
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
+ return setPtNodeProbability(&ptNodeParams, unigramProperty,
+ outAddedNewUnigram);
+ }
+ if (!ptNodeParams.hasChildren()) {
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(
+ &ptNodeParams, unigramProperty,
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
+ }
+ // Advance to the children nodes.
+ parentPos = ptNodeParams.getHeadPos();
+ readingHelper->readChildNode(ptNodeParams);
+ }
+ if (readingHelper->isError()) {
+ // The dictionary is invalid.
+ return false;
+ }
+ int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
+ return createAndInsertNodeIntoPtNodeArray(
+ parentPos, wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
+ codePointCount - readingHelper->getPrevTotalCodePointCount(),
+ unigramProperty, &pos);
+}
+
+bool DynamicPtUpdatingHelper::addNgramEntry(
+ const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos,
+ const BigramProperty* const bigramProperty,
+ bool* const outAddedNewEntry) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds,
+ prevWordsPtNodePos.size());
+ const int wordId = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ wordPos).getTerminalId();
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty,
+ outAddedNewEntry);
+}
+
+bool DynamicPtUpdatingHelper::removeNgramEntry(
+ const PtNodePosArrayView prevWordsPtNodePos,
+ const int wordPos) {
+ if (prevWordsPtNodePos.empty()) {
+ return false;
+ }
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM);
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) {
+ prevWordTerminalIds[i] =
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ prevWordsPtNodePos[i]).getTerminalId();
+ }
+ const WordIdArrayView prevWordIds(prevWordTerminalIds,
+ prevWordsPtNodePos.size());
+ const int wordId = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ wordPos).getTerminalId();
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId);
+}
+
+bool DynamicPtUpdatingHelper::addShortcutTarget(
+ const int wordPos,
+ const int* const targetCodePoints,
+ const int targetCodePointCount,
+ const int shortcutProbability) {
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
+ return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints,
+ targetCodePointCount,
+ shortcutProbability);
+}
+
+bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(
+ const int parentPos,
+ const int* const nodeCodePoints,
+ const int nodeCodePointCount,
+ const UnigramProperty* const unigramProperty,
+ int* const forwardLinkFieldPos) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBuffer, newPtNodeArrayPos, forwardLinkFieldPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(
+ parentPos, nodeCodePoints, nodeCodePointCount, unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::setPtNodeProbability(
+ const PtNodeParams* const originalPtNodeParams,
+ const UnigramProperty* const unigramProperty,
+ bool* const outAddedNewUnigram) {
+ if (originalPtNodeParams->isTerminal() &&
+ !originalPtNodeParams->isDeleted()) {
+ // Overwrites the probability.
+ *outAddedNewUnigram = false;
+ return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams,
+ unigramProperty);
+ } else {
+ // Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
+ const int movedPos = mBuffer->getTailPosition();
+ int writingPos = movedPos;
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(
+ originalPtNodeParams, unigramProperty->isNotAWord(),
+ unigramProperty->isBlacklisted(), true /* isTerminal */,
+ originalPtNodeParams->getParentPos(),
+ originalPtNodeParams->getCodePointCount(),
+ originalPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(
+ &ptNodeParamsToWrite, unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos,
+ movedPos)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
+ const PtNodeParams* const parentPtNodeParams,
+ const UnigramProperty* const unigramProperty,
+ const int* const codePoints,
+ const int codePointCount) {
+ const int newPtNodeArrayPos = mBuffer->getTailPosition();
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams,
+ newPtNodeArrayPos)) {
+ return false;
+ }
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(),
+ codePoints, codePointCount,
+ unigramProperty);
+}
+
+bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
+ const int parentPtNodePos,
+ const int* const nodeCodePoints,
+ const int nodeCodePointCount,
+ const UnigramProperty* const unigramProperty) {
+ int writingPos = mBuffer->getTailPosition();
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBuffer, 1 /* arraySize */, &writingPos)) {
+ return false;
+ }
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, parentPtNodePos, nodeCodePointCount,
+ nodeCodePoints, unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(
+ &ptNodeParamsToWrite, unigramProperty, &writingPos)) {
+ return false;
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBuffer, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ return true;
+}
+
+// Returns whether the dictionary updating was succeeded or not.
+bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
+ const PtNodeParams* const reallocatingPtNodeParams,
+ const int overlappingCodePointCount,
+ const UnigramProperty* const unigramProperty,
+ const int* const newNodeCodePoints,
+ const int newNodeCodePointCount) {
+ // When addsExtraChild is true, split the reallocating PtNode and add new
+ // child.
+ // Reallocating PtNode: abcde, newNode: abcxy.
+ // abc (1st, not terminal) __ de (2nd)
+ // \_ xy (extra child, terminal)
+ // Otherwise, this method makes 1st part terminal and write information in
+ // unigramProperty.
+ // Reallocating PtNode: abcde, newNode: abc.
+ // abc (1st, terminal) __ de (2nd)
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
+ int writingPos = firstPartOfReallocatedPtNodePos;
+ // Write the 1st part of the reallocating node. The children position will be
+ // updated later
+ // with actual children position.
+ if (addsExtraChild) {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ false /* isNotAWord */, false /* isBlacklisted */,
+ false /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
+ NOT_A_PROBABILITY));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite,
+ &writingPos)) {
+ return false;
+ }
+ } else {
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(
+ &ptNodeParamsToWrite, unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ const int actualChildrenPos = writingPos;
+ // Create new children PtNode array.
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(
+ mBuffer, newPtNodeCount, &writingPos)) {
+ return false;
+ }
+ // Write the 2nd part of the reallocating node.
+ const int secondPartOfReallocatedPtNodePos = writingPos;
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(
+ reallocatingPtNodeParams, reallocatingPtNodeParams->isNotAWord(),
+ reallocatingPtNodeParams->isBlacklisted(),
+ reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
+ reallocatingPtNodeParams->getProbability()));
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams,
+ &writingPos)) {
+ return false;
+ }
+ if (addsExtraChild) {
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
+ true /* isTerminal */, firstPartOfReallocatedPtNodePos,
+ newNodeCodePointCount - overlappingCodePointCount,
+ newNodeCodePoints + overlappingCodePointCount,
+ unigramProperty->getProbability()));
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(
+ &extraChildPtNodeParams, unigramProperty, &writingPos)) {
+ return false;
+ }
+ }
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(
+ mBuffer, NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
+ return false;
+ }
+ // Update original reallocating PtNode as moved.
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams,
+ firstPartOfReallocatedPtNodePos,
+ secondPartOfReallocatedPtNodePos)) {
+ return false;
+ }
+ // Load node info. Information of the 1st part will be fetched.
+ const PtNodeParams ptNodeParams(
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(
+ firstPartOfReallocatedPtNodePos));
+ // Update children position.
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams,
+ actualChildrenPos);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
+ const PtNodeParams* const originalPtNodeParams,
+ const bool isNotAWord,
+ const bool isBlacklisted,
+ const bool isTerminal,
+ const int parentPos,
+ const int codePointCount,
+ const int* const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount,
+ codePoints, probability);
+}
+
+const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
+ const bool isNotAWord,
+ const bool isBlacklisted,
+ const bool isTerminal,
+ const int parentPos,
+ const int codePointCount,
+ const int* const codePoints,
+ const int probability) const {
+ const PatriciaTrieReadingUtils::NodeFlags flags =
+ PatriciaTrieReadingUtils::createAndGetFlags(
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
+ CHILDREN_POSITION_FIELD_SIZE);
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints,
+ probability);
+}
+
+} // namespace latinime

Powered by Google App Engine
This is Rietveld 408576698