| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2013, International Business Machines | 3 * Copyright (C) 2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 * dictionarydata.h | 6 * dictionarydata.h |
| 7 * | 7 * |
| 8 * created on: 2012may31 | 8 * created on: 2012may31 |
| 9 * created by: Markus W. Scherer & Maxime Serrano | 9 * created by: Markus W. Scherer & Maxime Serrano |
| 10 */ | 10 */ |
| 11 | 11 |
| 12 #include "dictionarydata.h" | 12 #include "dictionarydata.h" |
| 13 #include "unicode/ucharstrie.h" | 13 #include "unicode/ucharstrie.h" |
| (...skipping 19 matching lines...) Expand all Loading... |
| 33 } | 33 } |
| 34 | 34 |
| 35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { | 35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { |
| 36 udata_close(file); | 36 udata_close(file); |
| 37 } | 37 } |
| 38 | 38 |
| 39 int32_t UCharsDictionaryMatcher::getType() const { | 39 int32_t UCharsDictionaryMatcher::getType() const { |
| 40 return DictionaryData::TRIE_TYPE_UCHARS; | 40 return DictionaryData::TRIE_TYPE_UCHARS; |
| 41 } | 41 } |
| 42 | 42 |
| 43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
*lengths, int32_t &count, int32_t limit, int32_t *values) const { | 43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
limit, |
| 44 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 45 int32_t *prefix) const { |
| 46 |
| 44 UCharsTrie uct(characters); | 47 UCharsTrie uct(characters); |
| 45 UChar32 c = utext_next32(text); | 48 int32_t startingTextIndex = utext_getNativeIndex(text); |
| 46 if (c < 0) { | 49 int32_t wordCount = 0; |
| 47 return 0; | 50 int32_t codePointsMatched = 0; |
| 48 } | 51 |
| 49 UStringTrieResult result = uct.first(c); | 52 for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { |
| 50 int32_t numChars = 1; | 53 UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct
.next(c); |
| 51 count = 0; | 54 int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex; |
| 52 for (;;) { | 55 codePointsMatched += 1; |
| 53 if (USTRINGTRIE_HAS_VALUE(result)) { | 56 if (USTRINGTRIE_HAS_VALUE(result)) { |
| 54 if (count < limit) { | 57 if (wordCount < limit) { |
| 55 if (values != NULL) { | 58 if (values != NULL) { |
| 56 values[count] = uct.getValue(); | 59 values[wordCount] = uct.getValue(); |
| 57 } | 60 } |
| 58 lengths[count++] = numChars; | 61 if (lengths != NULL) { |
| 62 lengths[wordCount] = lengthMatched; |
| 63 } |
| 64 if (cpLengths != NULL) { |
| 65 cpLengths[wordCount] = codePointsMatched; |
| 66 } |
| 67 ++wordCount; |
| 59 } | 68 } |
| 60 if (result == USTRINGTRIE_FINAL_VALUE) { | 69 if (result == USTRINGTRIE_FINAL_VALUE) { |
| 61 break; | 70 break; |
| 62 } | 71 } |
| 63 } | 72 } |
| 64 else if (result == USTRINGTRIE_NO_MATCH) { | 73 else if (result == USTRINGTRIE_NO_MATCH) { |
| 65 break; | 74 break; |
| 66 } | 75 } |
| 67 | 76 if (lengthMatched >= maxLength) { |
| 68 // TODO: why do we have a text limit if the UText knows its length? | |
| 69 if (numChars >= maxLength) { | |
| 70 break; | 77 break; |
| 71 } | 78 } |
| 79 } |
| 72 | 80 |
| 73 c = utext_next32(text); | 81 if (prefix != NULL) { |
| 74 if (c < 0) { | 82 *prefix = codePointsMatched; |
| 75 break; | |
| 76 } | |
| 77 ++numChars; | |
| 78 result = uct.next(c); | |
| 79 } | 83 } |
| 80 return numChars; | 84 return wordCount; |
| 81 } | 85 } |
| 82 | 86 |
| 83 BytesDictionaryMatcher::~BytesDictionaryMatcher() { | 87 BytesDictionaryMatcher::~BytesDictionaryMatcher() { |
| 84 udata_close(file); | 88 udata_close(file); |
| 85 } | 89 } |
| 86 | 90 |
| 87 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { | 91 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { |
| 88 if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryD
ata::TRANSFORM_TYPE_OFFSET) { | 92 if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryD
ata::TRANSFORM_TYPE_OFFSET) { |
| 89 if (c == 0x200D) { | 93 if (c == 0x200D) { |
| 90 return 0xFF; | 94 return 0xFF; |
| 91 } else if (c == 0x200C) { | 95 } else if (c == 0x200C) { |
| 92 return 0xFE; | 96 return 0xFE; |
| 93 } | 97 } |
| 94 int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSE
T_MASK); | 98 int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSE
T_MASK); |
| 95 if (delta < 0 || 0xFD < delta) { | 99 if (delta < 0 || 0xFD < delta) { |
| 96 return U_SENTINEL; | 100 return U_SENTINEL; |
| 97 } | 101 } |
| 98 return (UChar32)delta; | 102 return (UChar32)delta; |
| 99 } | 103 } |
| 100 return c; | 104 return c; |
| 101 } | 105 } |
| 102 | 106 |
| 103 int32_t BytesDictionaryMatcher::getType() const { | 107 int32_t BytesDictionaryMatcher::getType() const { |
| 104 return DictionaryData::TRIE_TYPE_BYTES; | 108 return DictionaryData::TRIE_TYPE_BYTES; |
| 105 } | 109 } |
| 106 | 110 |
| 107 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
*lengths, int32_t &count, int32_t limit, int32_t *values) const { | 111 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
limit, |
| 112 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 113 int32_t *prefix) const { |
| 108 BytesTrie bt(characters); | 114 BytesTrie bt(characters); |
| 109 UChar32 c = utext_next32(text); | 115 int32_t startingTextIndex = utext_getNativeIndex(text); |
| 110 if (c < 0) { | 116 int32_t wordCount = 0; |
| 111 return 0; | 117 int32_t codePointsMatched = 0; |
| 112 } | 118 |
| 113 UStringTrieResult result = bt.first(transform(c)); | 119 for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { |
| 114 int32_t numChars = 1; | 120 UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform
(c)) : bt.next(transform(c)); |
| 115 count = 0; | 121 int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex; |
| 116 for (;;) { | 122 codePointsMatched += 1; |
| 117 if (USTRINGTRIE_HAS_VALUE(result)) { | 123 if (USTRINGTRIE_HAS_VALUE(result)) { |
| 118 if (count < limit) { | 124 if (wordCount < limit) { |
| 119 if (values != NULL) { | 125 if (values != NULL) { |
| 120 values[count] = bt.getValue(); | 126 values[wordCount] = bt.getValue(); |
| 121 } | 127 } |
| 122 lengths[count++] = numChars; | 128 if (lengths != NULL) { |
| 129 lengths[wordCount] = lengthMatched; |
| 130 } |
| 131 if (cpLengths != NULL) { |
| 132 cpLengths[wordCount] = codePointsMatched; |
| 133 } |
| 134 ++wordCount; |
| 123 } | 135 } |
| 124 if (result == USTRINGTRIE_FINAL_VALUE) { | 136 if (result == USTRINGTRIE_FINAL_VALUE) { |
| 125 break; | 137 break; |
| 126 } | 138 } |
| 127 } | 139 } |
| 128 else if (result == USTRINGTRIE_NO_MATCH) { | 140 else if (result == USTRINGTRIE_NO_MATCH) { |
| 129 break; | 141 break; |
| 130 } | 142 } |
| 131 | 143 if (lengthMatched >= maxLength) { |
| 132 // TODO: why do we have a text limit if the UText knows its length? | |
| 133 if (numChars >= maxLength) { | |
| 134 break; | 144 break; |
| 135 } | 145 } |
| 146 } |
| 136 | 147 |
| 137 c = utext_next32(text); | 148 if (prefix != NULL) { |
| 138 if (c < 0) { | 149 *prefix = codePointsMatched; |
| 139 break; | |
| 140 } | |
| 141 ++numChars; | |
| 142 result = bt.next(transform(c)); | |
| 143 } | 150 } |
| 144 return numChars; | 151 return wordCount; |
| 145 } | 152 } |
| 146 | 153 |
| 147 | 154 |
| 148 U_NAMESPACE_END | 155 U_NAMESPACE_END |
| 149 | 156 |
| 150 U_NAMESPACE_USE | 157 U_NAMESPACE_USE |
| 151 | 158 |
| 152 U_CAPI int32_t U_EXPORT2 | 159 U_CAPI int32_t U_EXPORT2 |
| 153 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, | 160 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, |
| 154 void *outData, UErrorCode *pErrorCode) { | 161 void *outData, UErrorCode *pErrorCode) { |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 224 // but may be used later. | 231 // but may be used later. |
| 225 offset = nextOffset; | 232 offset = nextOffset; |
| 226 nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; | 233 nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; |
| 227 offset = nextOffset; | 234 offset = nextOffset; |
| 228 nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; | 235 nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; |
| 229 offset = nextOffset; | 236 offset = nextOffset; |
| 230 } | 237 } |
| 231 return headerSize + size; | 238 return headerSize + size; |
| 232 } | 239 } |
| 233 #endif | 240 #endif |
| OLD | NEW |