OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2013, International Business Machines | 3 * Copyright (C) 2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 * dictionarydata.h | 6 * dictionarydata.h |
7 * | 7 * |
8 * created on: 2012may31 | 8 * created on: 2012may31 |
9 * created by: Markus W. Scherer & Maxime Serrano | 9 * created by: Markus W. Scherer & Maxime Serrano |
10 */ | 10 */ |
11 | 11 |
12 #include "dictionarydata.h" | 12 #include "dictionarydata.h" |
13 #include "unicode/ucharstrie.h" | 13 #include "unicode/ucharstrie.h" |
(...skipping 19 matching lines...) Expand all Loading... |
33 } | 33 } |
34 | 34 |
35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { | 35 UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { |
36 udata_close(file); | 36 udata_close(file); |
37 } | 37 } |
38 | 38 |
39 int32_t UCharsDictionaryMatcher::getType() const { | 39 int32_t UCharsDictionaryMatcher::getType() const { |
40 return DictionaryData::TRIE_TYPE_UCHARS; | 40 return DictionaryData::TRIE_TYPE_UCHARS; |
41 } | 41 } |
42 | 42 |
43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
*lengths, int32_t &count, int32_t limit, int32_t *values) const { | 43 int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
limit, |
| 44 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 45 int32_t *prefix) const { |
| 46 |
44 UCharsTrie uct(characters); | 47 UCharsTrie uct(characters); |
45 UChar32 c = utext_next32(text); | 48 int32_t startingTextIndex = utext_getNativeIndex(text); |
46 if (c < 0) { | 49 int32_t wordCount = 0; |
47 return 0; | 50 int32_t codePointsMatched = 0; |
48 } | 51 |
49 UStringTrieResult result = uct.first(c); | 52 for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { |
50 int32_t numChars = 1; | 53 UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct
.next(c); |
51 count = 0; | 54 int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex; |
52 for (;;) { | 55 codePointsMatched += 1; |
53 if (USTRINGTRIE_HAS_VALUE(result)) { | 56 if (USTRINGTRIE_HAS_VALUE(result)) { |
54 if (count < limit) { | 57 if (wordCount < limit) { |
55 if (values != NULL) { | 58 if (values != NULL) { |
56 values[count] = uct.getValue(); | 59 values[wordCount] = uct.getValue(); |
57 } | 60 } |
58 lengths[count++] = numChars; | 61 if (lengths != NULL) { |
| 62 lengths[wordCount] = lengthMatched; |
| 63 } |
| 64 if (cpLengths != NULL) { |
| 65 cpLengths[wordCount] = codePointsMatched; |
| 66 } |
| 67 ++wordCount; |
59 } | 68 } |
60 if (result == USTRINGTRIE_FINAL_VALUE) { | 69 if (result == USTRINGTRIE_FINAL_VALUE) { |
61 break; | 70 break; |
62 } | 71 } |
63 } | 72 } |
64 else if (result == USTRINGTRIE_NO_MATCH) { | 73 else if (result == USTRINGTRIE_NO_MATCH) { |
65 break; | 74 break; |
66 } | 75 } |
67 | 76 if (lengthMatched >= maxLength) { |
68 // TODO: why do we have a text limit if the UText knows its length? | |
69 if (numChars >= maxLength) { | |
70 break; | 77 break; |
71 } | 78 } |
| 79 } |
72 | 80 |
73 c = utext_next32(text); | 81 if (prefix != NULL) { |
74 if (c < 0) { | 82 *prefix = codePointsMatched; |
75 break; | |
76 } | |
77 ++numChars; | |
78 result = uct.next(c); | |
79 } | 83 } |
80 return numChars; | 84 return wordCount; |
81 } | 85 } |
82 | 86 |
83 BytesDictionaryMatcher::~BytesDictionaryMatcher() { | 87 BytesDictionaryMatcher::~BytesDictionaryMatcher() { |
84 udata_close(file); | 88 udata_close(file); |
85 } | 89 } |
86 | 90 |
87 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { | 91 UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { |
88 if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryD
ata::TRANSFORM_TYPE_OFFSET) { | 92 if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryD
ata::TRANSFORM_TYPE_OFFSET) { |
89 if (c == 0x200D) { | 93 if (c == 0x200D) { |
90 return 0xFF; | 94 return 0xFF; |
91 } else if (c == 0x200C) { | 95 } else if (c == 0x200C) { |
92 return 0xFE; | 96 return 0xFE; |
93 } | 97 } |
94 int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSE
T_MASK); | 98 int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSE
T_MASK); |
95 if (delta < 0 || 0xFD < delta) { | 99 if (delta < 0 || 0xFD < delta) { |
96 return U_SENTINEL; | 100 return U_SENTINEL; |
97 } | 101 } |
98 return (UChar32)delta; | 102 return (UChar32)delta; |
99 } | 103 } |
100 return c; | 104 return c; |
101 } | 105 } |
102 | 106 |
103 int32_t BytesDictionaryMatcher::getType() const { | 107 int32_t BytesDictionaryMatcher::getType() const { |
104 return DictionaryData::TRIE_TYPE_BYTES; | 108 return DictionaryData::TRIE_TYPE_BYTES; |
105 } | 109 } |
106 | 110 |
107 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
*lengths, int32_t &count, int32_t limit, int32_t *values) const { | 111 int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
limit, |
| 112 int32_t *lengths, int32_t *cpLengths, int32_t *value
s, |
| 113 int32_t *prefix) const { |
108 BytesTrie bt(characters); | 114 BytesTrie bt(characters); |
109 UChar32 c = utext_next32(text); | 115 int32_t startingTextIndex = utext_getNativeIndex(text); |
110 if (c < 0) { | 116 int32_t wordCount = 0; |
111 return 0; | 117 int32_t codePointsMatched = 0; |
112 } | 118 |
113 UStringTrieResult result = bt.first(transform(c)); | 119 for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { |
114 int32_t numChars = 1; | 120 UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform
(c)) : bt.next(transform(c)); |
115 count = 0; | 121 int32_t lengthMatched = utext_getNativeIndex(text) - startingTextIndex; |
116 for (;;) { | 122 codePointsMatched += 1; |
117 if (USTRINGTRIE_HAS_VALUE(result)) { | 123 if (USTRINGTRIE_HAS_VALUE(result)) { |
118 if (count < limit) { | 124 if (wordCount < limit) { |
119 if (values != NULL) { | 125 if (values != NULL) { |
120 values[count] = bt.getValue(); | 126 values[wordCount] = bt.getValue(); |
121 } | 127 } |
122 lengths[count++] = numChars; | 128 if (lengths != NULL) { |
| 129 lengths[wordCount] = lengthMatched; |
| 130 } |
| 131 if (cpLengths != NULL) { |
| 132 cpLengths[wordCount] = codePointsMatched; |
| 133 } |
| 134 ++wordCount; |
123 } | 135 } |
124 if (result == USTRINGTRIE_FINAL_VALUE) { | 136 if (result == USTRINGTRIE_FINAL_VALUE) { |
125 break; | 137 break; |
126 } | 138 } |
127 } | 139 } |
128 else if (result == USTRINGTRIE_NO_MATCH) { | 140 else if (result == USTRINGTRIE_NO_MATCH) { |
129 break; | 141 break; |
130 } | 142 } |
131 | 143 if (lengthMatched >= maxLength) { |
132 // TODO: why do we have a text limit if the UText knows its length? | |
133 if (numChars >= maxLength) { | |
134 break; | 144 break; |
135 } | 145 } |
| 146 } |
136 | 147 |
137 c = utext_next32(text); | 148 if (prefix != NULL) { |
138 if (c < 0) { | 149 *prefix = codePointsMatched; |
139 break; | |
140 } | |
141 ++numChars; | |
142 result = bt.next(transform(c)); | |
143 } | 150 } |
144 return numChars; | 151 return wordCount; |
145 } | 152 } |
146 | 153 |
147 | 154 |
148 U_NAMESPACE_END | 155 U_NAMESPACE_END |
149 | 156 |
150 U_NAMESPACE_USE | 157 U_NAMESPACE_USE |
151 | 158 |
152 U_CAPI int32_t U_EXPORT2 | 159 U_CAPI int32_t U_EXPORT2 |
153 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, | 160 udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, |
154 void *outData, UErrorCode *pErrorCode) { | 161 void *outData, UErrorCode *pErrorCode) { |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
224 // but may be used later. | 231 // but may be used later. |
225 offset = nextOffset; | 232 offset = nextOffset; |
226 nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; | 233 nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; |
227 offset = nextOffset; | 234 offset = nextOffset; |
228 nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; | 235 nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; |
229 offset = nextOffset; | 236 offset = nextOffset; |
230 } | 237 } |
231 return headerSize + size; | 238 return headerSize + size; |
232 } | 239 } |
233 #endif | 240 #endif |
OLD | NEW |