Index: third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp |
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..9fc452c23517bd298934228ec909b9971109250d |
--- /dev/null |
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.cpp |
@@ -0,0 +1,307 @@ |
+/* |
+ * Copyright (C) 2013, The Android Open Source Project |
+ * |
+ * Licensed under the Apache License, Version 2.0 (the "License"); |
+ * you may not use this file except in compliance with the License. |
+ * You may obtain a copy of the License at |
+ * |
+ * http://www.apache.org/licenses/LICENSE-2.0 |
+ * |
+ * Unless required by applicable law or agreed to in writing, software |
+ * distributed under the License is distributed on an "AS IS" BASIS, |
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
+ * See the License for the specific language governing permissions and |
+ * limitations under the License. |
+ */ |
+ |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_updating_helper.h" |
+ |
+#include "third_party/android_prediction/suggest/core/dictionary/property/unigram_property.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/patricia_trie_reading_utils.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_reader.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_writer.h" |
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" |
+ |
+namespace latinime { |
+ |
+const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; |
+ |
+bool DynamicPtUpdatingHelper::addUnigramWord( |
+ DynamicPtReadingHelper *const readingHelper, |
+ const int *const wordCodePoints, const int codePointCount, |
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { |
+ int parentPos = NOT_A_DICT_POS; |
+ while (!readingHelper->isEnd()) { |
+ const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); |
+ if (!ptNodeParams.isValid()) { |
+ break; |
+ } |
+ const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); |
+ if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, |
+ wordCodePoints[matchedCodePointCount])) { |
+ // The first code point is different from target code point. Skip this node and read |
+ // the next sibling node. |
+ readingHelper->readNextSiblingNode(ptNodeParams); |
+ continue; |
+ } |
+ // Check following merged node code points. |
+ const int nodeCodePointCount = ptNodeParams.getCodePointCount(); |
+ for (int j = 1; j < nodeCodePointCount; ++j) { |
+ const int nextIndex = matchedCodePointCount + j; |
+ if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, |
+ wordCodePoints[matchedCodePointCount + j])) { |
+ *outAddedNewUnigram = true; |
+ return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty, |
+ wordCodePoints + matchedCodePointCount, |
+ codePointCount - matchedCodePointCount); |
+ } |
+ } |
+ // All characters are matched. |
+ if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { |
+ return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram); |
+ } |
+ if (!ptNodeParams.hasChildren()) { |
+ *outAddedNewUnigram = true; |
+ return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty, |
+ wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), |
+ codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); |
+ } |
+ // Advance to the children nodes. |
+ parentPos = ptNodeParams.getHeadPos(); |
+ readingHelper->readChildNode(ptNodeParams); |
+ } |
+ if (readingHelper->isError()) { |
+ // The dictionary is invalid. |
+ return false; |
+ } |
+ int pos = readingHelper->getPosOfLastForwardLinkField(); |
+ *outAddedNewUnigram = true; |
+ return createAndInsertNodeIntoPtNodeArray(parentPos, |
+ wordCodePoints + readingHelper->getPrevTotalCodePointCount(), |
+ codePointCount - readingHelper->getPrevTotalCodePointCount(), |
+ unigramProperty, &pos); |
+} |
+ |
+bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, |
+ const int wordPos, const BigramProperty *const bigramProperty, |
+ bool *const outAddedNewEntry) { |
+ if (prevWordsPtNodePos.empty()) { |
+ return false; |
+ } |
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM); |
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { |
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos( |
+ prevWordsPtNodePos[i]).getTerminalId(); |
+ } |
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); |
+ const int wordId = |
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); |
+ return mPtNodeWriter->addNgramEntry(prevWordIds, wordId, bigramProperty, outAddedNewEntry); |
+} |
+ |
+bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, |
+ const int wordPos) { |
+ if (prevWordsPtNodePos.empty()) { |
+ return false; |
+ } |
+ ASSERT(prevWordsPtNodePos.size() <= MAX_PREV_WORD_COUNT_FOR_N_GRAM); |
+ int prevWordTerminalIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
+ for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { |
+ prevWordTerminalIds[i] = mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos( |
+ prevWordsPtNodePos[i]).getTerminalId(); |
+ } |
+ const WordIdArrayView prevWordIds(prevWordTerminalIds, prevWordsPtNodePos.size()); |
+ const int wordId = |
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos).getTerminalId(); |
+ return mPtNodeWriter->removeNgramEntry(prevWordIds, wordId); |
+} |
+ |
+bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, |
+ const int *const targetCodePoints, const int targetCodePointCount, |
+ const int shortcutProbability) { |
+ const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos)); |
+ return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount, |
+ shortcutProbability); |
+} |
+ |
+bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, |
+ const int *const nodeCodePoints, const int nodeCodePointCount, |
+ const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) { |
+ const int newPtNodeArrayPos = mBuffer->getTailPosition(); |
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, |
+ newPtNodeArrayPos, forwardLinkFieldPos)) { |
+ return false; |
+ } |
+ return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, |
+ unigramProperty); |
+} |
+ |
+bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, |
+ const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) { |
+ if (originalPtNodeParams->isTerminal() && !originalPtNodeParams->isDeleted()) { |
+ // Overwrites the probability. |
+ *outAddedNewUnigram = false; |
+ return mPtNodeWriter->updatePtNodeUnigramProperty(originalPtNodeParams, unigramProperty); |
+ } else { |
+ // Make the node terminal and write the probability. |
+ *outAddedNewUnigram = true; |
+ const int movedPos = mBuffer->getTailPosition(); |
+ int writingPos = movedPos; |
+ const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, |
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), |
+ true /* isTerminal */, originalPtNodeParams->getParentPos(), |
+ originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), |
+ unigramProperty->getProbability())); |
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, |
+ unigramProperty, &writingPos)) { |
+ return false; |
+ } |
+ if (!mPtNodeWriter->markPtNodeAsMoved(originalPtNodeParams, movedPos, movedPos)) { |
+ return false; |
+ } |
+ } |
+ return true; |
+} |
+ |
+bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( |
+ const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty, |
+ const int *const codePoints, const int codePointCount) { |
+ const int newPtNodeArrayPos = mBuffer->getTailPosition(); |
+ if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { |
+ return false; |
+ } |
+ return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, |
+ codePointCount, unigramProperty); |
+} |
+ |
+bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( |
+ const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, |
+ const UnigramProperty *const unigramProperty) { |
+ int writingPos = mBuffer->getTailPosition(); |
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, |
+ 1 /* arraySize */, &writingPos)) { |
+ return false; |
+ } |
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( |
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, |
+ parentPtNodePos, nodeCodePointCount, nodeCodePoints, |
+ unigramProperty->getProbability())); |
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, |
+ unigramProperty, &writingPos)) { |
+ return false; |
+ } |
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, |
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+// Returns whether the dictionary updating was succeeded or not. |
+bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( |
+ const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, |
+ const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, |
+ const int newNodeCodePointCount) { |
+ // When addsExtraChild is true, split the reallocating PtNode and add new child. |
+ // Reallocating PtNode: abcde, newNode: abcxy. |
+ // abc (1st, not terminal) __ de (2nd) |
+ // \_ xy (extra child, terminal) |
+ // Otherwise, this method makes 1st part terminal and write information in unigramProperty. |
+ // Reallocating PtNode: abcde, newNode: abc. |
+ // abc (1st, terminal) __ de (2nd) |
+ const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; |
+ const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); |
+ int writingPos = firstPartOfReallocatedPtNodePos; |
+ // Write the 1st part of the reallocating node. The children position will be updated later |
+ // with actual children position. |
+ if (addsExtraChild) { |
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( |
+ false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */, |
+ reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, |
+ reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY)); |
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { |
+ return false; |
+ } |
+ } else { |
+ const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( |
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), |
+ true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), |
+ overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), |
+ unigramProperty->getProbability())); |
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, |
+ unigramProperty, &writingPos)) { |
+ return false; |
+ } |
+ } |
+ const int actualChildrenPos = writingPos; |
+ // Create new children PtNode array. |
+ const size_t newPtNodeCount = addsExtraChild ? 2 : 1; |
+ if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, |
+ newPtNodeCount, &writingPos)) { |
+ return false; |
+ } |
+ // Write the 2nd part of the reallocating node. |
+ const int secondPartOfReallocatedPtNodePos = writingPos; |
+ const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, |
+ reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(), |
+ reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, |
+ reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, |
+ reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, |
+ reallocatingPtNodeParams->getProbability())); |
+ if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) { |
+ return false; |
+ } |
+ if (addsExtraChild) { |
+ const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( |
+ unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), |
+ true /* isTerminal */, firstPartOfReallocatedPtNodePos, |
+ newNodeCodePointCount - overlappingCodePointCount, |
+ newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability())); |
+ if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, |
+ unigramProperty, &writingPos)) { |
+ return false; |
+ } |
+ } |
+ if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, |
+ NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) { |
+ return false; |
+ } |
+ // Update original reallocating PtNode as moved. |
+ if (!mPtNodeWriter->markPtNodeAsMoved(reallocatingPtNodeParams, firstPartOfReallocatedPtNodePos, |
+ secondPartOfReallocatedPtNodePos)) { |
+ return false; |
+ } |
+ // Load node info. Information of the 1st part will be fetched. |
+ const PtNodeParams ptNodeParams( |
+ mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(firstPartOfReallocatedPtNodePos)); |
+ // Update children position. |
+ return mPtNodeWriter->updateChildrenPosition(&ptNodeParams, actualChildrenPos); |
+} |
+ |
+const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( |
+ const PtNodeParams *const originalPtNodeParams, |
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, |
+ const int codePointCount, const int *const codePoints, const int probability) const { |
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( |
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, |
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, |
+ CHILDREN_POSITION_FIELD_SIZE); |
+ return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, |
+ probability); |
+} |
+ |
+const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode( |
+ const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, |
+ const int parentPos, const int codePointCount, const int *const codePoints, |
+ const int probability) const { |
+ const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( |
+ isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, |
+ false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, |
+ CHILDREN_POSITION_FIELD_SIZE); |
+ return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); |
+} |
+ |
+} // namespace latinime |