Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1376)

Unified Diff: third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..08d1c09fb162a1600c41e1b074d14ad48e5892b7
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.cpp
@@ -0,0 +1,304 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * !!!!! DO NOT EDIT THIS FILE !!!!!
+ *
+ * This file was generated from
+ * suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_writing_helper.h"
+
+#include <cstring>
+#include <queue>
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/header/header_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/shortcut/ver4_shortcut_list_policy.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_buffers.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_dict_constants.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_node_writer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/backward/v402/ver4_pt_node_array_reader.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/file_utils.h"
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
+
+namespace latinime {
+namespace backward {
+namespace v402 {
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const dictDirPath,
+ const int unigramCount, const int bigramCount) const {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(false /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, extendedRegionSize, &headerBuffer)) {
+ AKLOGE("Cannot write header structure to buffer. "
+ "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
+ "extendedRegionSize: %d", false, unigramCount, bigramCount,
+ extendedRegionSize);
+ return false;
+ }
+ return mBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
+ const char *const dictDirPath) {
+ const HeaderPolicy *const headerPolicy = mBuffers->getHeaderPolicy();
+ Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers(
+ Ver4DictBuffers::createVer4DictBuffers(headerPolicy,
+ Ver4DictConstants::MAX_DICTIONARY_SIZE));
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, headerPolicy, dictBuffers.get(), &unigramCount, &bigramCount)) {
+ return false;
+ }
+ BufferWithExtendableBuffer headerBuffer(
+ BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
+ if (!headerPolicy->fillInAndWriteHeaderToBuffer(true /* updatesLastDecayedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */, &headerBuffer)) {
+ return false;
+ }
+ return dictBuffers->flushHeaderAndDictBuffers(dictDirPath, &headerBuffer);
+}
+
+bool Ver4PatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
+ const HeaderPolicy *const headerPolicy, Ver4DictBuffers *const buffersToWrite,
+ int *const outUnigramCount, int *const outBigramCount) {
+ Ver4PatriciaTrieNodeReader ptNodeReader(mBuffers->getTrieBuffer(),
+ mBuffers->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader ptNodeArrayReader(mBuffers->getTrieBuffer());
+ Ver4BigramListPolicy bigramPolicy(mBuffers->getMutableBigramDictContent(),
+ mBuffers->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy shortcutPolicy(mBuffers->getMutableShortcutDictContent(),
+ mBuffers->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter ptNodeWriter(mBuffers->getWritableTrieBuffer(),
+ mBuffers, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+
+ DynamicPtReadingHelper readingHelper(&ptNodeReader, &ptNodeArrayReader);
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners
+ ::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
+ &ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
+ return false;
+ }
+ const int unigramCount = traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount();
+ const int maxUnigramCount = headerPolicy->getMaxUnigramCount();
+ if (headerPolicy->isDecayingDict() && unigramCount > maxUnigramCount) {
+ if (!truncateUnigrams(&ptNodeReader, &ptNodeWriter, maxUnigramCount)) {
+ AKLOGE("Cannot remove unigrams. current: %d, max: %d", unigramCount,
+ maxUnigramCount);
+ return false;
+ }
+ }
+
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateBigramProbability
+ traversePolicyToUpdateBigramProbability(&ptNodeWriter);
+ if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateBigramProbability)) {
+ return false;
+ }
+ const int bigramCount = traversePolicyToUpdateBigramProbability.getValidBigramEntryCount();
+ const int maxBigramCount = headerPolicy->getMaxBigramCount();
+ if (headerPolicy->isDecayingDict() && bigramCount > maxBigramCount) {
+ if (!truncateBigrams(maxBigramCount)) {
+ AKLOGE("Cannot remove bigrams. current: %d, max: %d", bigramCount, maxBigramCount);
+ return false;
+ }
+ }
+
+ // Mapping from positions in mBuffer to positions in bufferToWrite.
+ PtNodeWriter::DictPositionRelocationMap dictPositionRelocationMap;
+ readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ Ver4PatriciaTrieNodeWriter ptNodeWriterForNewBuffers(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &ptNodeReader, &ptNodeArrayReader, &bigramPolicy,
+ &shortcutPolicy);
+ DynamicPtGcEventListeners::TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
+ traversePolicyToPlaceAndWriteValidPtNodesToBuffer(&ptNodeWriterForNewBuffers,
+ buffersToWrite->getWritableTrieBuffer(), &dictPositionRelocationMap);
+ if (!readingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToPlaceAndWriteValidPtNodesToBuffer)) {
+ return false;
+ }
+
+ // Create policy instances for the GCed dictionary.
+ Ver4PatriciaTrieNodeReader newPtNodeReader(buffersToWrite->getTrieBuffer(),
+ buffersToWrite->getProbabilityDictContent(), headerPolicy);
+ Ver4PtNodeArrayReader newPtNodeArrayreader(buffersToWrite->getTrieBuffer());
+ Ver4BigramListPolicy newBigramPolicy(buffersToWrite->getMutableBigramDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable(), headerPolicy);
+ Ver4ShortcutListPolicy newShortcutPolicy(buffersToWrite->getMutableShortcutDictContent(),
+ buffersToWrite->getTerminalPositionLookupTable());
+ Ver4PatriciaTrieNodeWriter newPtNodeWriter(buffersToWrite->getWritableTrieBuffer(),
+ buffersToWrite, headerPolicy, &newPtNodeReader, &newPtNodeArrayreader, &newBigramPolicy,
+ &newShortcutPolicy);
+ // Re-assign terminal IDs for valid terminal PtNodes.
+ TerminalPositionLookupTable::TerminalIdMap terminalIdMap;
+ if(!buffersToWrite->getMutableTerminalPositionLookupTable()->runGCTerminalIds(
+ &terminalIdMap)) {
+ return false;
+ }
+ // Run GC for probability dict content.
+ if (!buffersToWrite->getMutableProbabilityDictContent()->runGC(&terminalIdMap,
+ mBuffers->getProbabilityDictContent())) {
+ return false;
+ }
+ // Run GC for bigram dict content.
+ if(!buffersToWrite->getMutableBigramDictContent()->runGC(&terminalIdMap,
+ mBuffers->getBigramDictContent(), outBigramCount)) {
+ return false;
+ }
+ // Run GC for shortcut dict content.
+ if(!buffersToWrite->getMutableShortcutDictContent()->runGC(&terminalIdMap,
+ mBuffers->getShortcutDictContent())) {
+ return false;
+ }
+ DynamicPtReadingHelper newDictReadingHelper(&newPtNodeReader, &newPtNodeArrayreader);
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ DynamicPtGcEventListeners::TraversePolicyToUpdateAllPositionFields
+ traversePolicyToUpdateAllPositionFields(&newPtNodeWriter, &dictPositionRelocationMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner(
+ &traversePolicyToUpdateAllPositionFields)) {
+ return false;
+ }
+ newDictReadingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
+ TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds(&newPtNodeWriter, &terminalIdMap);
+ if (!newDictReadingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
+ &traversePolicyToUpdateAllPtNodeFlagsAndTerminalIds)) {
+ return false;
+ }
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateUnigrams(
+ const Ver4PatriciaTrieNodeReader *const ptNodeReader,
+ Ver4PatriciaTrieNodeWriter *const ptNodeWriter, const int maxUnigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int terminalPos = terminalPosLookupTable->getTerminalPtNodePosition(i);
+ if (terminalPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ const ProbabilityEntry probabilityEntry =
+ mBuffers->getProbabilityDictContent()->getProbabilityEntry(i);
+ const int probability = probabilityEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ probabilityEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ probabilityEntry.getProbability();
+ priorityQueue.push(DictProbability(terminalPos, probability,
+ probabilityEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+
+ // Delete unigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxUnigramCount) {
+ const int ptNodePos = priorityQueue.top().getDictPos();
+ priorityQueue.pop();
+ const PtNodeParams ptNodeParams =
+ ptNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
+ if (ptNodeParams.representsNonWordInfo()) {
+ continue;
+ }
+ if (!ptNodeWriter->markPtNodeAsWillBecomeNonTerminal(&ptNodeParams)) {
+ AKLOGE("Cannot mark PtNode as willBecomeNonterminal. PtNode pos: %d", ptNodePos);
+ return false;
+ }
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::truncateBigrams(const int maxBigramCount) {
+ const TerminalPositionLookupTable *const terminalPosLookupTable =
+ mBuffers->getTerminalPositionLookupTable();
+ const int nextTerminalId = terminalPosLookupTable->getNextTerminalId();
+ std::priority_queue<DictProbability, std::vector<DictProbability>, DictProbabilityComparator>
+ priorityQueue;
+ BigramDictContent *const bigramDictContent = mBuffers->getMutableBigramDictContent();
+ for (int i = 0; i < nextTerminalId; ++i) {
+ const int bigramListPos = bigramDictContent->getBigramListHeadPos(i);
+ if (bigramListPos == NOT_A_DICT_POS) {
+ continue;
+ }
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ while (hasNext) {
+ const int entryPos = readingPos;
+ const BigramEntry bigramEntry =
+ bigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = bigramEntry.hasNext();
+ if (!bigramEntry.isValid()) {
+ continue;
+ }
+ const int probability = bigramEntry.hasHistoricalInfo() ?
+ ForgettingCurveUtils::decodeProbability(
+ bigramEntry.getHistoricalInfo(), mBuffers->getHeaderPolicy()) :
+ bigramEntry.getProbability();
+ priorityQueue.push(DictProbability(entryPos, probability,
+ bigramEntry.getHistoricalInfo()->getTimeStamp()));
+ }
+ }
+
+ // Delete bigrams.
+ while (static_cast<int>(priorityQueue.size()) > maxBigramCount) {
+ const int entryPos = priorityQueue.top().getDictPos();
+ const BigramEntry bigramEntry = bigramDictContent->getBigramEntry(entryPos);
+ const BigramEntry invalidatedBigramEntry = bigramEntry.getInvalidatedEntry();
+ if (!bigramDictContent->writeBigramEntry(&invalidatedBigramEntry, entryPos)) {
+ AKLOGE("Cannot write bigram entry to remove. pos: %d", entryPos);
+ return false;
+ }
+ priorityQueue.pop();
+ }
+ return true;
+}
+
+bool Ver4PatriciaTrieWritingHelper::TraversePolicyToUpdateAllPtNodeFlagsAndTerminalIds
+ ::onVisitingPtNode(const PtNodeParams *const ptNodeParams) {
+ if (!ptNodeParams->isTerminal()) {
+ return true;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ mTerminalIdMap->find(ptNodeParams->getTerminalId());
+ if (it == mTerminalIdMap->end()) {
+ AKLOGE("terminal Id %d is not in the terminal position map. map size: %zd",
+ ptNodeParams->getTerminalId(), mTerminalIdMap->size());
+ return false;
+ }
+ if (!mPtNodeWriter->updateTerminalId(ptNodeParams, it->second)) {
+ AKLOGE("Cannot update terminal id. %d -> %d", it->first, it->second);
+ }
+ return mPtNodeWriter->updatePtNodeHasBigramsAndShortcutTargetsFlags(ptNodeParams);
+}
+
+} // namespace v402
+} // namespace backward
+} // namespace latinime

Powered by Google App Engine
This is Rietveld 408576698