Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(396)

Unified Diff: third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp

Issue 1247903003: Add spellcheck and word suggestion to the prediction service (Closed) Base URL: https://github.com/domokit/mojo.git@master
Patch Set: format README and CHROMIUM.diff Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
diff --git a/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..ffb37919bc68b25fe034cb578a3abe91fdb536c6
--- /dev/null
+++ b/third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h"
+
+#include "third_party/android_prediction/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+
+namespace latinime {
+
+const int BigramDictContent::INVALID_LINKED_ENTRY_POS = Ver4DictConstants::NOT_A_TERMINAL_ID;
+
+const BigramEntry BigramDictContent::getBigramEntryAndAdvancePosition(
+ int *const bigramEntryPos) const {
+ const BufferWithExtendableBuffer *const bigramListBuffer = getContentBuffer();
+ const int bigramEntryTailPos = (*bigramEntryPos) + getBigramEntrySize();
+ if (*bigramEntryPos < 0 || bigramEntryTailPos > bigramListBuffer->getTailPosition()) {
+ AKLOGE("Invalid bigram entry position. bigramEntryPos: %d, bigramEntryTailPos: %d, "
+ "bufSize: %d", *bigramEntryPos, bigramEntryTailPos,
+ bigramListBuffer->getTailPosition());
+ ASSERT(false);
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ const int bigramFlags = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, bigramEntryPos);
+ const bool isLink = (bigramFlags & Ver4DictConstants::BIGRAM_IS_LINK_MASK) != 0;
+ int probability = NOT_A_PROBABILITY;
+ int timestamp = NOT_A_TIMESTAMP;
+ int level = 0;
+ int count = 0;
+ if (mHasHistoricalInfo) {
+ timestamp = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, bigramEntryPos);
+ level = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, bigramEntryPos);
+ count = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, bigramEntryPos);
+ } else {
+ probability = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::PROBABILITY_SIZE, bigramEntryPos);
+ }
+ const int encodedTargetTerminalId = bigramListBuffer->readUintAndAdvancePosition(
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, bigramEntryPos);
+ const int targetTerminalId =
+ (encodedTargetTerminalId == Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID) ?
+ Ver4DictConstants::NOT_A_TERMINAL_ID : encodedTargetTerminalId;
+ if (isLink) {
+ const int linkedEntryPos = targetTerminalId;
+ if (linkedEntryPos == INVALID_LINKED_ENTRY_POS) {
+ // Bigram list terminator is found.
+ return BigramEntry(false /* hasNext */, NOT_A_PROBABILITY,
+ Ver4DictConstants::NOT_A_TERMINAL_ID);
+ }
+ *bigramEntryPos = linkedEntryPos;
+ return getBigramEntryAndAdvancePosition(bigramEntryPos);
+ }
+ // hasNext is always true because we should continue to read the next entry until the terminator
+ // is found.
+ if (mHasHistoricalInfo) {
+ const HistoricalInfo historicalInfo(timestamp, level, count);
+ return BigramEntry(true /* hasNext */, probability, &historicalInfo, targetTerminalId);
+ } else {
+ return BigramEntry(true /* hasNext */, probability, targetTerminalId);
+ }
+}
+
+bool BigramDictContent::writeBigramEntryAndAdvancePosition(
+ const BigramEntry *const bigramEntryToWrite, int *const entryWritingPos) {
+ return writeBigramEntryAttributesAndAdvancePosition(false /* isLink */,
+ bigramEntryToWrite->getProbability(), bigramEntryToWrite->getTargetTerminalId(),
+ bigramEntryToWrite->getHistoricalInfo()->getTimeStamp(),
+ bigramEntryToWrite->getHistoricalInfo()->getLevel(),
+ bigramEntryToWrite->getHistoricalInfo()->getCount(),
+ entryWritingPos);
+}
+
+bool BigramDictContent::writeBigramEntryAttributesAndAdvancePosition(
+ const bool isLink, const int probability, const int targetTerminalId,
+ const int timestamp, const int level, const int count, int *const entryWritingPos) {
+ BufferWithExtendableBuffer *const bigramListBuffer = getWritableContentBuffer();
+ const int bigramFlags = isLink ? Ver4DictConstants::BIGRAM_IS_LINK_MASK : 0;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(bigramFlags,
+ Ver4DictConstants::BIGRAM_FLAGS_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram flags. pos: %d, flags: %x", *entryWritingPos, bigramFlags);
+ return false;
+ }
+ if (mHasHistoricalInfo) {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(timestamp,
+ Ver4DictConstants::TIME_STAMP_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram timestamps. pos: %d, timestamp: %d", *entryWritingPos,
+ timestamp);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(level,
+ Ver4DictConstants::WORD_LEVEL_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram level. pos: %d, level: %d", *entryWritingPos,
+ level);
+ return false;
+ }
+ if (!bigramListBuffer->writeUintAndAdvancePosition(count,
+ Ver4DictConstants::WORD_COUNT_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram count. pos: %d, count: %d", *entryWritingPos,
+ count);
+ return false;
+ }
+ } else {
+ if (!bigramListBuffer->writeUintAndAdvancePosition(probability,
+ Ver4DictConstants::PROBABILITY_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram probability. pos: %d, probability: %d", *entryWritingPos,
+ probability);
+ return false;
+ }
+ }
+ const int targetTerminalIdToWrite = (targetTerminalId == Ver4DictConstants::NOT_A_TERMINAL_ID) ?
+ Ver4DictConstants::INVALID_BIGRAM_TARGET_TERMINAL_ID : targetTerminalId;
+ if (!bigramListBuffer->writeUintAndAdvancePosition(targetTerminalIdToWrite,
+ Ver4DictConstants::BIGRAM_TARGET_TERMINAL_ID_FIELD_SIZE, entryWritingPos)) {
+ AKLOGE("Cannot write bigram target terminal id. pos: %d, target terminal id: %d",
+ *entryWritingPos, targetTerminalId);
+ return false;
+ }
+ return true;
+}
+
+bool BigramDictContent::writeLink(const int linkedEntryPos, const int writingPos) {
+ const int targetTerminalId = linkedEntryPos;
+ int pos = writingPos;
+ return writeBigramEntryAttributesAndAdvancePosition(true /* isLink */,
+ NOT_A_PROBABILITY /* probability */, targetTerminalId, NOT_A_TIMESTAMP, 0 /* level */,
+ 0 /* count */, &pos);
+}
+
+bool BigramDictContent::runGC(const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ const BigramDictContent *const originalBigramDictContent,
+ int *const outBigramEntryCount) {
+ for (TerminalPositionLookupTable::TerminalIdMap::const_iterator it = terminalIdMap->begin();
+ it != terminalIdMap->end(); ++it) {
+ const int originalBigramListPos =
+ originalBigramDictContent->getBigramListHeadPos(it->first);
+ if (originalBigramListPos == NOT_A_DICT_POS) {
+ // This terminal does not have a bigram list.
+ continue;
+ }
+ const int bigramListPos = getContentBuffer()->getTailPosition();
+ int bigramEntryCount = 0;
+ // Copy bigram list with GC from original content.
+ if (!runGCBigramList(originalBigramListPos, originalBigramDictContent, bigramListPos,
+ terminalIdMap, &bigramEntryCount)) {
+ AKLOGE("Cannot complete GC for the bigram list. original pos: %d, pos: %d",
+ originalBigramListPos, bigramListPos);
+ return false;
+ }
+ if (bigramEntryCount == 0) {
+ // All bigram entries are useless. This terminal does not have a bigram list.
+ continue;
+ }
+ *outBigramEntryCount += bigramEntryCount;
+ // Set bigram list position to the lookup table.
+ if (!getUpdatableAddressLookupTable()->set(it->second, bigramListPos)) {
+ AKLOGE("Cannot set bigram list position. terminal id: %d, pos: %d",
+ it->second, bigramListPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns whether GC for the bigram list was succeeded or not.
+bool BigramDictContent::runGCBigramList(const int bigramListPos,
+ const BigramDictContent *const sourceBigramDictContent, const int toPos,
+ const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
+ int *const outEntryCount) {
+ bool hasNext = true;
+ int readingPos = bigramListPos;
+ int writingPos = toPos;
+ while (hasNext) {
+ const BigramEntry originalBigramEntry =
+ sourceBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos);
+ hasNext = originalBigramEntry.hasNext();
+ if (!originalBigramEntry.isValid()) {
+ continue;
+ }
+ TerminalPositionLookupTable::TerminalIdMap::const_iterator it =
+ terminalIdMap->find(originalBigramEntry.getTargetTerminalId());
+ if (it == terminalIdMap->end()) {
+ // Target word has been removed.
+ continue;
+ }
+ const BigramEntry updatedBigramEntry =
+ originalBigramEntry.updateTargetTerminalIdAndGetEntry(it->second);
+ if (!writeBigramEntryAndAdvancePosition(&updatedBigramEntry, &writingPos)) {
+ AKLOGE("Cannot write bigram entry to run GC. pos: %d", writingPos);
+ return false;
+ }
+ *outEntryCount += 1;
+ }
+ if (*outEntryCount > 0) {
+ if (!writeTerminator(writingPos)) {
+ AKLOGE("Cannot write terminator to run GC. pos: %d", writingPos);
+ return false;
+ }
+ }
+ return true;
+}
+
+} // namespace latinime

Powered by Google App Engine
This is Rietveld 408576698