| Index: source/common/dictionarydata.cpp
|
| diff --git a/source/common/dictionarydata.cpp b/source/common/dictionarydata.cpp
|
| index 039871b5be8a1c145beb22df3c037506a4dd700f..cb594c6bb2c1d0c377d49c6608bb00c972459172 100644
|
| --- a/source/common/dictionarydata.cpp
|
| +++ b/source/common/dictionarydata.cpp
|
| @@ -1,6 +1,6 @@
|
| /*
|
| *******************************************************************************
|
| -* Copyright (C) 2013, International Business Machines
|
| +* Copyright (C) 2014, International Business Machines
|
| * Corporation and others. All Rights Reserved.
|
| *******************************************************************************
|
| * dictionarydata.h
|
| @@ -40,22 +40,31 @@ int32_t UCharsDictionaryMatcher::getType() const {
|
| return DictionaryData::TRIE_TYPE_UCHARS;
|
| }
|
|
|
| -int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
|
| +int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
|
| + int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
| + int32_t *prefix) const {
|
| +
|
| UCharsTrie uct(characters);
|
| - UChar32 c = utext_next32(text);
|
| - if (c < 0) {
|
| - return 0;
|
| - }
|
| - UStringTrieResult result = uct.first(c);
|
| - int32_t numChars = 1;
|
| - count = 0;
|
| - for (;;) {
|
| + int32_t startingTextIndex = utext_getNativeIndex(text);
|
| + int32_t wordCount = 0;
|
| + int32_t codePointsMatched = 0;
|
| +
|
| + for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
|
| + UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
|
| + int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
|
| + codePointsMatched += 1;
|
| if (USTRINGTRIE_HAS_VALUE(result)) {
|
| - if (count < limit) {
|
| + if (wordCount < limit) {
|
| if (values != NULL) {
|
| - values[count] = uct.getValue();
|
| + values[wordCount] = uct.getValue();
|
| + }
|
| + if (lengths != NULL) {
|
| + lengths[wordCount] = lengthMatched;
|
| + }
|
| + if (cpLengths != NULL) {
|
| + cpLengths[wordCount] = codePointsMatched;
|
| }
|
| - lengths[count++] = numChars;
|
| + ++wordCount;
|
| }
|
| if (result == USTRINGTRIE_FINAL_VALUE) {
|
| break;
|
| @@ -64,20 +73,15 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
|
| else if (result == USTRINGTRIE_NO_MATCH) {
|
| break;
|
| }
|
| -
|
| - // TODO: why do we have a text limit if the UText knows its length?
|
| - if (numChars >= maxLength) {
|
| + if (lengthMatched >= maxLength) {
|
| break;
|
| }
|
| + }
|
|
|
| - c = utext_next32(text);
|
| - if (c < 0) {
|
| - break;
|
| - }
|
| - ++numChars;
|
| - result = uct.next(c);
|
| + if (prefix != NULL) {
|
| + *prefix = codePointsMatched;
|
| }
|
| - return numChars;
|
| + return wordCount;
|
| }
|
|
|
| BytesDictionaryMatcher::~BytesDictionaryMatcher() {
|
| @@ -104,22 +108,30 @@ int32_t BytesDictionaryMatcher::getType() const {
|
| return DictionaryData::TRIE_TYPE_BYTES;
|
| }
|
|
|
| -int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
|
| +int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
|
| + int32_t *lengths, int32_t *cpLengths, int32_t *values,
|
| + int32_t *prefix) const {
|
| BytesTrie bt(characters);
|
| - UChar32 c = utext_next32(text);
|
| - if (c < 0) {
|
| - return 0;
|
| - }
|
| - UStringTrieResult result = bt.first(transform(c));
|
| - int32_t numChars = 1;
|
| - count = 0;
|
| - for (;;) {
|
| + int32_t startingTextIndex = utext_getNativeIndex(text);
|
| + int32_t wordCount = 0;
|
| + int32_t codePointsMatched = 0;
|
| +
|
| + for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
|
| + UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
|
| + int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex;
|
| + codePointsMatched += 1;
|
| if (USTRINGTRIE_HAS_VALUE(result)) {
|
| - if (count < limit) {
|
| + if (wordCount < limit) {
|
| if (values != NULL) {
|
| - values[count] = bt.getValue();
|
| - }
|
| - lengths[count++] = numChars;
|
| + values[wordCount] = bt.getValue();
|
| + }
|
| + if (lengths != NULL) {
|
| + lengths[wordCount] = lengthMatched;
|
| + }
|
| + if (cpLengths != NULL) {
|
| + cpLengths[wordCount] = codePointsMatched;
|
| + }
|
| + ++wordCount;
|
| }
|
| if (result == USTRINGTRIE_FINAL_VALUE) {
|
| break;
|
| @@ -128,20 +140,15 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
|
| else if (result == USTRINGTRIE_NO_MATCH) {
|
| break;
|
| }
|
| -
|
| - // TODO: why do we have a text limit if the UText knows its length?
|
| - if (numChars >= maxLength) {
|
| + if (lengthMatched >= maxLength) {
|
| break;
|
| }
|
| + }
|
|
|
| - c = utext_next32(text);
|
| - if (c < 0) {
|
| - break;
|
| - }
|
| - ++numChars;
|
| - result = bt.next(transform(c));
|
| + if (prefix != NULL) {
|
| + *prefix = codePointsMatched;
|
| }
|
| - return numChars;
|
| + return wordCount;
|
| }
|
|
|
|
|
|
|