OLD | NEW |
1 /** | 1 /** |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2006, International Business Machines Corporation and others. * | 3 * Copyright (C) 2006, International Business Machines Corporation and others. * |
4 * All Rights Reserved. * | 4 * All Rights Reserved. * |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 */ | 6 */ |
7 | 7 |
8 #ifndef TRIEDICT_H | 8 #ifndef TRIEDICT_H |
9 #define TRIEDICT_H | 9 #define TRIEDICT_H |
10 | 10 |
(...skipping 29 matching lines...) Expand all Loading... |
40 */ | 40 */ |
41 | 41 |
42 U_CAPI int32_t U_EXPORT2 | 42 U_CAPI int32_t U_EXPORT2 |
43 triedict_swap(const UDataSwapper *ds, | 43 triedict_swap(const UDataSwapper *ds, |
44 const void *inData, int32_t length, void *outData, | 44 const void *inData, int32_t length, void *outData, |
45 UErrorCode *pErrorCode); | 45 UErrorCode *pErrorCode); |
46 | 46 |
47 U_NAMESPACE_BEGIN | 47 U_NAMESPACE_BEGIN |
48 | 48 |
49 class StringEnumeration; | 49 class StringEnumeration; |
50 struct CompactTrieHeader; | |
51 | 50 |
52 /******************************************************************* | 51 /******************************************************************* |
53 * TrieWordDictionary | 52 * TrieWordDictionary |
54 */ | 53 */ |
55 | 54 |
56 /** | 55 /** |
57 * <p>TrieWordDictionary is an abstract class that represents a word | 56 * <p>TrieWordDictionary is an abstract class that represents a word |
58 * dictionary based on a trie. The base protocol is read-only. | 57 * dictionary based on a trie. The base protocol is read-only. |
59 * Subclasses may allow writing.</p> | 58 * Subclasses may allow writing.</p> |
60 */ | 59 */ |
61 class U_COMMON_API TrieWordDictionary : public UMemory { | 60 class U_COMMON_API TrieWordDictionary : public UMemory { |
62 public: | 61 public: |
63 | 62 |
64 /** | 63 /** |
65 * <p>Default constructor.</p> | 64 * <p>Default constructor.</p> |
66 * | 65 * |
67 */ | 66 */ |
68 TrieWordDictionary(); | 67 TrieWordDictionary(); |
69 | 68 |
70 /** | 69 /** |
71 * <p>Virtual destructor.</p> | 70 * <p>Virtual destructor.</p> |
72 */ | 71 */ |
73 virtual ~TrieWordDictionary(); | 72 virtual ~TrieWordDictionary(); |
74 | 73 |
| 74 /** |
| 75 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 76 */ |
| 77 virtual UBool getValued() const = 0; |
| 78 |
75 /** | 79 /** |
76 * <p>Find dictionary words that match the text.</p> | 80 * <p>Find dictionary words that match the text.</p> |
77 * | 81 * |
78 * @param text A UText representing the text. The | 82 * @param text A UText representing the text. The |
79 * iterator is left after the longest prefix match in the dictionary. | 83 * iterator is left after the longest prefix match in the dictionary. |
80 * @param start The current position in text. | |
81 * @param maxLength The maximum number of code units to match. | 84 * @param maxLength The maximum number of code units to match. |
82 * @param lengths An array that is filled with the lengths of words that matche
d. | 85 * @param lengths An array that is filled with the lengths of words that matche
d. |
83 * @param count Filled with the number of elements output in lengths. | 86 * @param count Filled with the number of elements output in lengths. |
84 * @param limit The size of the lengths array; this limits the number of words
output. | 87 * @param limit The size of the lengths array; this limits the number of words
output. |
| 88 * @param values An array that is filled with the values associated with the ma
tched words. |
85 * @return The number of characters in text that were matched. | 89 * @return The number of characters in text that were matched. |
86 */ | 90 */ |
87 virtual int32_t matches( UText *text, | 91 virtual int32_t matches( UText *text, |
88 int32_t maxLength, | 92 int32_t maxLength, |
89 int32_t *lengths, | 93 int32_t *lengths, |
90 int &count, | 94 int &count, |
91 int limit ) const = 0; | 95 int limit, |
| 96 uint16_t *values = NULL) const = 0; |
92 | 97 |
93 /** | 98 /** |
94 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 99 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
95 * | 100 * |
96 * @param status A status code recording the success of the call. | 101 * @param status A status code recording the success of the call. |
97 * @return A StringEnumeration that will iterate through the whole dictionary. | 102 * @return A StringEnumeration that will iterate through the whole dictionary. |
98 * The caller is responsible for closing it. The order is unspecified. | 103 * The caller is responsible for closing it. The order is unspecified. |
99 */ | 104 */ |
100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; | 105 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; |
101 | 106 |
(...skipping 19 matching lines...) Expand all Loading... |
121 | 126 |
122 TernaryNode *fTrie; | 127 TernaryNode *fTrie; |
123 | 128 |
124 /** | 129 /** |
125 * A UText for internal use | 130 * A UText for internal use |
126 * @internal | 131 * @internal |
127 */ | 132 */ |
128 | 133 |
129 UText *fIter; | 134 UText *fIter; |
130 | 135 |
| 136 /** |
| 137 * A UText for internal use |
| 138 * @internal |
| 139 */ |
| 140 UBool fValued; |
| 141 |
131 friend class CompactTrieDictionary; // For fast conversion | 142 friend class CompactTrieDictionary; // For fast conversion |
132 | 143 |
133 public: | 144 public: |
134 | 145 |
135 /** | 146 /** |
136 * <p>Constructor.</p> | 147 * <p>Constructor.</p> |
137 * | 148 * |
138 * @param median A UChar around which to balance the trie. Ideally, it should | 149 * @param median A UChar around which to balance the trie. Ideally, it should |
139 * begin at least one word that is near the median of the set in the dictionary | 150 * begin at least one word that is near the median of the set in the dictionary |
140 * @param status A status code recording the success of the call. | 151 * @param status A status code recording the success of the call. |
| 152 * @param containsValue True if the dictionary stores values associated with ea
ch word. |
141 */ | 153 */ |
142 MutableTrieDictionary( UChar median, UErrorCode &status ); | 154 MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue =
FALSE ); |
143 | 155 |
144 /** | 156 /** |
145 * <p>Virtual destructor.</p> | 157 * <p>Virtual destructor.</p> |
146 */ | 158 */ |
147 virtual ~MutableTrieDictionary(); | 159 virtual ~MutableTrieDictionary(); |
148 | 160 |
| 161 /** |
| 162 * Indicate whether the MutableTrieDictionary stores values associated with ea
ch word |
| 163 */ |
| 164 void setValued(UBool valued){ |
| 165 fValued = valued; |
| 166 } |
| 167 |
| 168 /** |
| 169 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 170 */ |
| 171 virtual UBool getValued() const { |
| 172 return fValued; |
| 173 } |
| 174 |
149 /** | 175 /** |
150 * <p>Find dictionary words that match the text.</p> | 176 * <p>Find dictionary words that match the text.</p> |
151 * | 177 * |
152 * @param text A UText representing the text. The | 178 * @param text A UText representing the text. The |
153 * iterator is left after the longest prefix match in the dictionary. | 179 * iterator is left after the longest prefix match in the dictionary. |
154 * @param maxLength The maximum number of code units to match. | 180 * @param maxLength The maximum number of code units to match. |
155 * @param lengths An array that is filled with the lengths of words that matche
d. | 181 * @param lengths An array that is filled with the lengths of words that matche
d. |
156 * @param count Filled with the number of elements output in lengths. | 182 * @param count Filled with the number of elements output in lengths. |
157 * @param limit The size of the lengths array; this limits the number of words
output. | 183 * @param limit The size of the lengths array; this limits the number of words
output. |
| 184 * @param values An array that is filled with the values associated with the ma
tched words. |
158 * @return The number of characters in text that were matched. | 185 * @return The number of characters in text that were matched. |
159 */ | 186 */ |
160 virtual int32_t matches( UText *text, | 187 virtual int32_t matches( UText *text, |
161 int32_t maxLength, | 188 int32_t maxLength, |
162 int32_t *lengths, | 189 int32_t *lengths, |
163 int &count, | 190 int &count, |
164 int limit ) const; | 191 int limit, |
| 192 uint16_t *values = NULL) const; |
165 | 193 |
166 /** | 194 /** |
167 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 195 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
168 * | 196 * |
169 * @param status A status code recording the success of the call. | 197 * @param status A status code recording the success of the call. |
170 * @return A StringEnumeration that will iterate through the whole dictionary. | 198 * @return A StringEnumeration that will iterate through the whole dictionary. |
171 * The caller is responsible for closing it. The order is unspecified. | 199 * The caller is responsible for closing it. The order is unspecified. |
172 */ | 200 */ |
173 virtual StringEnumeration *openWords( UErrorCode &status ) const; | 201 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
174 | 202 |
175 /** | 203 /** |
176 * <p>Add one word to the dictionary.</p> | 204 * <p>Add one word to the dictionary with an optional associated value.</p> |
177 * | 205 * |
178 * @param word A UChar buffer containing the word. | 206 * @param word A UChar buffer containing the word. |
179 * @param length The length of the word. | 207 * @param length The length of the word. |
180 * @param status The resultant status | 208 * @param status The resultant status. |
| 209 * @param value The nonzero value associated with this word. |
181 */ | 210 */ |
182 virtual void addWord( const UChar *word, | 211 virtual void addWord( const UChar *word, |
183 int32_t length, | 212 int32_t length, |
184 UErrorCode &status); | 213 UErrorCode &status, |
| 214 uint16_t value = 0); |
185 | 215 |
186 #if 0 | 216 #if 0 |
187 /** | 217 /** |
188 * <p>Add all strings from a UEnumeration to the dictionary.</p> | 218 * <p>Add all strings from a UEnumeration to the dictionary.</p> |
189 * | 219 * |
190 * @param words A UEnumeration that will return the desired words. | 220 * @param words A UEnumeration that will return the desired words. |
191 * @param status The resultant status | 221 * @param status The resultant status |
192 */ | 222 */ |
193 virtual void addWords( UEnumeration *words, UErrorCode &status ); | 223 virtual void addWords( UEnumeration *words, UErrorCode &status ); |
194 #endif | 224 #endif |
195 | 225 |
196 protected: | 226 protected: |
197 /** | 227 /** |
198 * <p>Search the dictionary for matches.</p> | 228 * <p>Search the dictionary for matches.</p> |
199 * | 229 * |
200 * @param text A UText representing the text. The | 230 * @param text A UText representing the text. The |
201 * iterator is left after the longest prefix match in the dictionary. | 231 * iterator is left after the longest prefix match in the dictionary. |
202 * @param maxLength The maximum number of code units to match. | 232 * @param maxLength The maximum number of code units to match. |
203 * @param lengths An array that is filled with the lengths of words that matche
d. | 233 * @param lengths An array that is filled with the lengths of words that matche
d. |
204 * @param count Filled with the number of elements output in lengths. | 234 * @param count Filled with the number of elements output in lengths. |
205 * @param limit The size of the lengths array; this limits the number of words
output. | 235 * @param limit The size of the lengths array; this limits the number of words
output. |
206 * @param parent The parent of the current node | 236 * @param parent The parent of the current node. |
207 * @param pMatched The returned parent node matched the input | 237 * @param pMatched The returned parent node matched the input/ |
| 238 * @param values An array that is filled with the values associated with the ma
tched words. |
208 * @return The number of characters in text that were matched. | 239 * @return The number of characters in text that were matched. |
209 */ | 240 */ |
210 virtual int32_t search( UText *text, | 241 virtual int32_t search( UText *text, |
211 int32_t maxLength, | 242 int32_t maxLength, |
212 int32_t *lengths, | 243 int32_t *lengths, |
213 int &count, | 244 int &count, |
214 int limit, | 245 int limit, |
215 TernaryNode *&parent, | 246 TernaryNode *&parent, |
216 UBool &pMatched ) const; | 247 UBool &pMatched, |
| 248 uint16_t *values = NULL) const; |
217 | 249 |
218 private: | 250 private: |
219 /** | 251 /** |
220 * <p>Private constructor. The root node it not allocated.</p> | 252 * <p>Private constructor. The root node it not allocated.</p> |
221 * | 253 * |
222 * @param status A status code recording the success of the call. | 254 * @param status A status code recording the success of the call. |
| 255 * @param containsValues True if the dictionary will store a value associated |
| 256 * with each word added. |
223 */ | 257 */ |
224 MutableTrieDictionary( UErrorCode &status ); | 258 MutableTrieDictionary( UErrorCode &status, UBool containsValues = false ); |
225 }; | 259 }; |
226 | 260 |
227 /******************************************************************* | 261 /******************************************************************* |
228 * CompactTrieDictionary | 262 * CompactTrieDictionary |
229 */ | 263 */ |
230 | 264 |
| 265 //forward declarations |
| 266 struct CompactTrieHeader; |
| 267 struct CompactTrieInfo; |
| 268 |
231 /** | 269 /** |
232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted | 270 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted |
233 * to save space.</p> | 271 * to save space.</p> |
234 */ | 272 */ |
235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { | 273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { |
236 private: | 274 private: |
237 /** | 275 /** |
238 * The root node of the trie | 276 * The header of the CompactTrieDictionary which contains all info |
239 */ | 277 */ |
240 | 278 |
241 const CompactTrieHeader *fData; | 279 CompactTrieInfo *fInfo; |
242 | 280 |
243 /** | 281 /** |
244 * A UBool indicating whether or not we own the fData. | 282 * A UBool indicating whether or not we own the fData. |
245 */ | 283 */ |
246 | |
247 UBool fOwnData; | 284 UBool fOwnData; |
248 | 285 |
249 UDataMemory *fUData; | 286 UDataMemory *fUData; |
250 public: | 287 public: |
251 /** | 288 /** |
252 * <p>Construct a dictionary from a UDataMemory.</p> | 289 * <p>Construct a dictionary from a UDataMemory.</p> |
253 * | 290 * |
254 * @param data A pointer to a UDataMemory, which is adopted | 291 * @param data A pointer to a UDataMemory, which is adopted |
255 * @param status A status code giving the result of the constructor | 292 * @param status A status code giving the result of the constructor |
256 */ | 293 */ |
257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); | 294 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); |
258 | 295 |
259 /** | 296 /** |
(...skipping 10 matching lines...) Expand all Loading... |
270 * @param dict The dictionary to use as input. | 307 * @param dict The dictionary to use as input. |
271 * @param status A status code recording the success of the call. | 308 * @param status A status code recording the success of the call. |
272 */ | 309 */ |
273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status )
; | 310 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status )
; |
274 | 311 |
275 /** | 312 /** |
276 * <p>Virtual destructor.</p> | 313 * <p>Virtual destructor.</p> |
277 */ | 314 */ |
278 virtual ~CompactTrieDictionary(); | 315 virtual ~CompactTrieDictionary(); |
279 | 316 |
| 317 /** |
| 318 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 319 */ |
| 320 virtual UBool getValued() const; |
| 321 |
280 /** | 322 /** |
281 * <p>Find dictionary words that match the text.</p> | 323 * <p>Find dictionary words that match the text.</p> |
282 * | 324 * |
283 * @param text A UText representing the text. The | 325 * @param text A UText representing the text. The |
284 * iterator is left after the longest prefix match in the dictionary. | 326 * iterator is left after the longest prefix match in the dictionary. |
285 * @param maxLength The maximum number of code units to match. | 327 * @param maxLength The maximum number of code units to match. |
286 * @param lengths An array that is filled with the lengths of words that matche
d. | 328 * @param lengths An array that is filled with the lengths of words that matche
d. |
287 * @param count Filled with the number of elements output in lengths. | 329 * @param count Filled with the number of elements output in lengths. |
288 * @param limit The size of the lengths array; this limits the number of words
output. | 330 * @param limit The size of the lengths array; this limits the number of words
output. |
| 331 * @param values An array that is filled with the values associated with the ma
tched words. |
289 * @return The number of characters in text that were matched. | 332 * @return The number of characters in text that were matched. |
290 */ | 333 */ |
291 virtual int32_t matches( UText *text, | 334 virtual int32_t matches( UText *text, |
292 int32_t rangeEnd, | 335 int32_t maxLength, |
293 int32_t *lengths, | 336 int32_t *lengths, |
294 int &count, | 337 int &count, |
295 int limit ) const; | 338 int limit, |
| 339 uint16_t *values = NULL) const; |
296 | 340 |
297 /** | 341 /** |
298 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 342 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
299 * | 343 * |
300 * @param status A status code recording the success of the call. | 344 * @param status A status code recording the success of the call. |
301 * @return A StringEnumeration that will iterate through the whole dictionary. | 345 * @return A StringEnumeration that will iterate through the whole dictionary. |
302 * The caller is responsible for closing it. The order is unspecified. | 346 * The caller is responsible for closing it. The order is unspecified. |
303 */ | 347 */ |
304 virtual StringEnumeration *openWords( UErrorCode &status ) const; | 348 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
305 | 349 |
306 /** | 350 /** |
307 * <p>Return the size of the compact data.</p> | 351 * <p>Return the size of the compact data.</p> |
308 * | 352 * |
309 * @return The size of the dictionary's compact data. | 353 * @return The size of the dictionary's compact data. |
310 */ | 354 */ |
311 virtual uint32_t dataSize() const; | 355 virtual uint32_t dataSize() const; |
312 | 356 |
313 /** | 357 /** |
314 * <p>Return a void * pointer to the compact data, platform-endian.</p> | 358 * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.
</p> |
315 * | 359 * |
316 * @return The data for the compact dictionary, suitable for passing to the | 360 * @return The data for the compact dictionary, suitable for passing to the |
317 * constructor. | 361 * constructor. |
318 */ | 362 */ |
319 virtual const void *data() const; | 363 virtual const void *data() const; |
320 | 364 |
321 /** | 365 /** |
322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> | 366 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> |
323 * | 367 * |
324 * @param status A status code recording the success of the call. | 368 * @param status A status code recording the success of the call. |
(...skipping 10 matching lines...) Expand all Loading... |
335 * @param status A status code recording the success of the call. | 379 * @param status A status code recording the success of the call. |
336 * @return A single data blob starting with a CompactTrieHeader. | 380 * @return A single data blob starting with a CompactTrieHeader. |
337 */ | 381 */ |
338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti
onary &dict, | 382 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti
onary &dict, |
339 UErrorCode &status ); | 383 UErrorCode &status ); |
340 | 384 |
341 }; | 385 }; |
342 | 386 |
343 U_NAMESPACE_END | 387 U_NAMESPACE_END |
344 | 388 |
345 /* TRIEDICT_H */ | 389 /* TRIEDICT_H */ |
346 #endif | 390 #endif |
OLD | NEW |