| Index: third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
|
| diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..2764d0d791e5e6600a6f1dd984d0c37b777e55ef
|
| --- /dev/null
|
| +++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.cpp
|
| @@ -0,0 +1,326 @@
|
| +/*
|
| + * Copyright (C) 2013, The Android Open Source Project
|
| + *
|
| + * Licensed under the Apache License, Version 2.0 (the "License");
|
| + * you may not use this file except in compliance with the License.
|
| + * You may obtain a copy of the License at
|
| + *
|
| + * http://www.apache.org/licenses/LICENSE-2.0
|
| + *
|
| + * Unless required by applicable law or agreed to in writing, software
|
| + * distributed under the License is distributed on an "AS IS" BASIS,
|
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| + * See the License for the specific language governing permissions and
|
| + * limitations under the License.
|
| + */
|
| +
|
| +#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_reading_helper.h"
|
| +
|
| +#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/pt_common/pt_node_array_reader.h"
|
| +#include "third_party/android_prediction/utils/char_utils.h"
|
| +
|
| +namespace latinime {
|
| +
|
| +// To avoid infinite loop caused by invalid or malicious forward links.
|
| +const int DynamicPtReadingHelper::MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
| +const int DynamicPtReadingHelper::MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP = 100000;
|
| +const size_t DynamicPtReadingHelper::MAX_READING_STATE_STACK_SIZE = MAX_WORD_LENGTH;
|
| +
|
| +bool DynamicPtReadingHelper::TraversePolicyToGetAllTerminalPtNodePositions::onVisitingPtNode(
|
| + const PtNodeParams *const ptNodeParams) {
|
| + if (ptNodeParams->isTerminal() && !ptNodeParams->isDeleted()) {
|
| + mTerminalPositions->push_back(ptNodeParams->getHeadPos());
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +// Visits all PtNodes in post-order depth first manner.
|
| +// For example, visits c -> b -> y -> x -> a for the following dictionary:
|
| +// a _ b _ c
|
| +// \ x _ y
|
| +bool DynamicPtReadingHelper::traverseAllPtNodesInPostorderDepthFirstManner(
|
| + TraversingEventListener *const listener) {
|
| + bool alreadyVisitedChildren = false;
|
| + // Descend from the root to the root PtNode array.
|
| + if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
| + return false;
|
| + }
|
| + while (!isEnd()) {
|
| + const PtNodeParams ptNodeParams(getPtNodeParams());
|
| + if (!ptNodeParams.isValid()) {
|
| + break;
|
| + }
|
| + if (!alreadyVisitedChildren) {
|
| + if (ptNodeParams.hasChildren()) {
|
| + // Move to the first child.
|
| + if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
|
| + return false;
|
| + }
|
| + pushReadingStateToStack();
|
| + readChildNode(ptNodeParams);
|
| + } else {
|
| + alreadyVisitedChildren = true;
|
| + }
|
| + } else {
|
| + if (!listener->onVisitingPtNode(&ptNodeParams)) {
|
| + return false;
|
| + }
|
| + readNextSiblingNode(ptNodeParams);
|
| + if (isEnd()) {
|
| + // All PtNodes in current linked PtNode arrays have been visited.
|
| + // Return to the parent.
|
| + if (!listener->onReadingPtNodeArrayTail()) {
|
| + return false;
|
| + }
|
| + if (mReadingStateStack.size() <= 0) {
|
| + break;
|
| + }
|
| + if (!listener->onAscend()) {
|
| + return false;
|
| + }
|
| + popReadingStateFromStack();
|
| + alreadyVisitedChildren = true;
|
| + } else {
|
| + // Process sibling PtNode.
|
| + alreadyVisitedChildren = false;
|
| + }
|
| + }
|
| + }
|
| + // Ascend from the root PtNode array to the root.
|
| + if (!listener->onAscend()) {
|
| + return false;
|
| + }
|
| + return !isError();
|
| +}
|
| +
|
| +// Visits all PtNodes in PtNode array level pre-order depth first manner, which is the same order
|
| +// that PtNodes are written in the dictionary buffer.
|
| +// For example, visits a -> b -> x -> c -> y for the following dictionary:
|
| +// a _ b _ c
|
| +// \ x _ y
|
| +bool DynamicPtReadingHelper::traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
|
| + TraversingEventListener *const listener) {
|
| + bool alreadyVisitedAllPtNodesInArray = false;
|
| + bool alreadyVisitedChildren = false;
|
| + // Descend from the root to the root PtNode array.
|
| + if (!listener->onDescend(getPosOfLastPtNodeArrayHead())) {
|
| + return false;
|
| + }
|
| + if (isEnd()) {
|
| + // Empty dictionary. Needs to notify the listener of the tail of empty PtNode array.
|
| + if (!listener->onReadingPtNodeArrayTail()) {
|
| + return false;
|
| + }
|
| + }
|
| + pushReadingStateToStack();
|
| + while (!isEnd()) {
|
| + const PtNodeParams ptNodeParams(getPtNodeParams());
|
| + if (!ptNodeParams.isValid()) {
|
| + break;
|
| + }
|
| + if (alreadyVisitedAllPtNodesInArray) {
|
| + if (alreadyVisitedChildren) {
|
| + // Move to next sibling PtNode's children.
|
| + readNextSiblingNode(ptNodeParams);
|
| + if (isEnd()) {
|
| + // Return to the parent PTNode.
|
| + if (!listener->onAscend()) {
|
| + return false;
|
| + }
|
| + if (mReadingStateStack.size() <= 0) {
|
| + break;
|
| + }
|
| + popReadingStateFromStack();
|
| + alreadyVisitedChildren = true;
|
| + alreadyVisitedAllPtNodesInArray = true;
|
| + } else {
|
| + alreadyVisitedChildren = false;
|
| + }
|
| + } else {
|
| + if (ptNodeParams.hasChildren()) {
|
| + // Move to the first child.
|
| + if (!listener->onDescend(ptNodeParams.getChildrenPos())) {
|
| + return false;
|
| + }
|
| + pushReadingStateToStack();
|
| + readChildNode(ptNodeParams);
|
| + // Push state to return the head of PtNode array.
|
| + pushReadingStateToStack();
|
| + alreadyVisitedAllPtNodesInArray = false;
|
| + alreadyVisitedChildren = false;
|
| + } else {
|
| + alreadyVisitedChildren = true;
|
| + }
|
| + }
|
| + } else {
|
| + if (!listener->onVisitingPtNode(&ptNodeParams)) {
|
| + return false;
|
| + }
|
| + readNextSiblingNode(ptNodeParams);
|
| + if (isEnd()) {
|
| + if (!listener->onReadingPtNodeArrayTail()) {
|
| + return false;
|
| + }
|
| + // Return to the head of current PtNode array.
|
| + popReadingStateFromStack();
|
| + alreadyVisitedAllPtNodesInArray = true;
|
| + }
|
| + }
|
| + }
|
| + popReadingStateFromStack();
|
| + // Ascend from the root PtNode array to the root.
|
| + if (!listener->onAscend()) {
|
| + return false;
|
| + }
|
| + return !isError();
|
| +}
|
| +
|
| +int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
|
| + const int maxCodePointCount, int *const outCodePoints, int *const outUnigramProbability) {
|
| + // This method traverses parent nodes from the terminal by following parent pointers; thus,
|
| + // node code points are stored in the buffer in the reverse order.
|
| + int reverseCodePoints[maxCodePointCount];
|
| + const PtNodeParams terminalPtNodeParams(getPtNodeParams());
|
| + // First, read the terminal node and get its probability.
|
| + if (!isValidTerminalNode(terminalPtNodeParams)) {
|
| + // Node at the ptNodePos is not a valid terminal node.
|
| + *outUnigramProbability = NOT_A_PROBABILITY;
|
| + return 0;
|
| + }
|
| + // Store terminal node probability.
|
| + *outUnigramProbability = terminalPtNodeParams.getProbability();
|
| + // Then, following parent node link to the dictionary root and fetch node code points.
|
| + int totalCodePointCount = 0;
|
| + while (!isEnd()) {
|
| + const PtNodeParams ptNodeParams(getPtNodeParams());
|
| + totalCodePointCount = getTotalCodePointCount(ptNodeParams);
|
| + if (!ptNodeParams.isValid() || totalCodePointCount > maxCodePointCount) {
|
| + // The ptNodePos is not a valid terminal node position in the dictionary.
|
| + *outUnigramProbability = NOT_A_PROBABILITY;
|
| + return 0;
|
| + }
|
| + // Store node code points to buffer in the reverse order.
|
| + fetchMergedNodeCodePointsInReverseOrder(ptNodeParams, getPrevTotalCodePointCount(),
|
| + reverseCodePoints);
|
| + // Follow parent node toward the root node.
|
| + readParentNode(ptNodeParams);
|
| + }
|
| + if (isError()) {
|
| + // The node position or the dictionary is invalid.
|
| + *outUnigramProbability = NOT_A_PROBABILITY;
|
| + return 0;
|
| + }
|
| + // Reverse the stored code points to output them.
|
| + for (int i = 0; i < totalCodePointCount; ++i) {
|
| + outCodePoints[i] = reverseCodePoints[totalCodePointCount - i - 1];
|
| + }
|
| + return totalCodePointCount;
|
| +}
|
| +
|
| +int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
|
| + const int length, const bool forceLowerCaseSearch) {
|
| + int searchCodePoints[length];
|
| + for (int i = 0; i < length; ++i) {
|
| + searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
| + }
|
| + while (!isEnd()) {
|
| + const PtNodeParams ptNodeParams(getPtNodeParams());
|
| + const int matchedCodePointCount = getPrevTotalCodePointCount();
|
| + if (getTotalCodePointCount(ptNodeParams) > length
|
| + || !isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
| + searchCodePoints[matchedCodePointCount])) {
|
| + // Current node has too many code points or its first code point is different from
|
| + // target code point. Skip this node and read the next sibling node.
|
| + readNextSiblingNode(ptNodeParams);
|
| + continue;
|
| + }
|
| + // Check following merged node code points.
|
| + const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
| + for (int j = 1; j < nodeCodePointCount; ++j) {
|
| + if (!isMatchedCodePoint(ptNodeParams, j, searchCodePoints[matchedCodePointCount + j])) {
|
| + // Different code point is found. The given word is not included in the dictionary.
|
| + return NOT_A_DICT_POS;
|
| + }
|
| + }
|
| + // All characters are matched.
|
| + if (length == getTotalCodePointCount(ptNodeParams)) {
|
| + if (!ptNodeParams.isTerminal()) {
|
| + return NOT_A_DICT_POS;
|
| + }
|
| + // Terminal position is found.
|
| + return ptNodeParams.getHeadPos();
|
| + }
|
| + if (!ptNodeParams.hasChildren()) {
|
| + return NOT_A_DICT_POS;
|
| + }
|
| + // Advance to the children nodes.
|
| + readChildNode(ptNodeParams);
|
| + }
|
| + // If we already traversed the tree further than the word is long, there means
|
| + // there was no match (or we would have found it).
|
| + return NOT_A_DICT_POS;
|
| +}
|
| +
|
| +// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
|
| +// method to avoid an infinite loop.
|
| +void DynamicPtReadingHelper::nextPtNodeArray() {
|
| + int ptNodeCountInArray = 0;
|
| + int firstPtNodePos = NOT_A_DICT_POS;
|
| + if (!mPtNodeArrayReader->readPtNodeArrayInfoAndReturnIfValid(
|
| + mReadingState.mPos, &ptNodeCountInArray, &firstPtNodePos)) {
|
| + mIsError = true;
|
| + mReadingState.mPos = NOT_A_DICT_POS;
|
| + return;
|
| + }
|
| + mReadingState.mPosOfThisPtNodeArrayHead = mReadingState.mPos;
|
| + mReadingState.mRemainingPtNodeCountInThisArray = ptNodeCountInArray;
|
| + mReadingState.mPos = firstPtNodePos;
|
| + // Count up nodes and node arrays to avoid infinite loop.
|
| + mReadingState.mTotalPtNodeIndexInThisArrayChain +=
|
| + mReadingState.mRemainingPtNodeCountInThisArray;
|
| + mReadingState.mPtNodeArrayIndexInThisArrayChain++;
|
| + if (mReadingState.mRemainingPtNodeCountInThisArray < 0
|
| + || mReadingState.mTotalPtNodeIndexInThisArrayChain
|
| + > MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP
|
| + || mReadingState.mPtNodeArrayIndexInThisArrayChain
|
| + > MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP) {
|
| + // Invalid dictionary.
|
| + AKLOGI("Invalid dictionary. nodeCount: %d, totalNodeCount: %d, MAX_CHILD_COUNT: %d"
|
| + "nodeArrayCount: %d, MAX_NODE_ARRAY_COUNT: %d",
|
| + mReadingState.mRemainingPtNodeCountInThisArray,
|
| + mReadingState.mTotalPtNodeIndexInThisArrayChain,
|
| + MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP,
|
| + mReadingState.mPtNodeArrayIndexInThisArrayChain,
|
| + MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP);
|
| + ASSERT(false);
|
| + mIsError = true;
|
| + mReadingState.mPos = NOT_A_DICT_POS;
|
| + return;
|
| + }
|
| + if (mReadingState.mRemainingPtNodeCountInThisArray == 0) {
|
| + // Empty node array. Try following forward link.
|
| + followForwardLink();
|
| + }
|
| +}
|
| +
|
| +// Follow the forward link and read the next node array if exists.
|
| +void DynamicPtReadingHelper::followForwardLink() {
|
| + int nextPtNodeArrayPos = NOT_A_DICT_POS;
|
| + if (!mPtNodeArrayReader->readForwardLinkAndReturnIfValid(
|
| + mReadingState.mPos, &nextPtNodeArrayPos)) {
|
| + mIsError = true;
|
| + mReadingState.mPos = NOT_A_DICT_POS;
|
| + return;
|
| + }
|
| + mReadingState.mPosOfLastForwardLinkField = mReadingState.mPos;
|
| + if (nextPtNodeArrayPos != NOT_A_DICT_POS) {
|
| + // Follow the forward link.
|
| + mReadingState.mPos = nextPtNodeArrayPos;
|
| + nextPtNodeArray();
|
| + } else {
|
| + // All node arrays have been read.
|
| + mReadingState.mPos = NOT_A_DICT_POS;
|
| + }
|
| +}
|
| +
|
| +} // namespace latinime
|
|
|