| OLD | NEW |
| 1 /** | 1 /** |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2006, International Business Machines Corporation and others. * | 3 * Copyright (C) 2006, International Business Machines Corporation and others. * |
| 4 * All Rights Reserved. * | 4 * All Rights Reserved. * |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef TRIEDICT_H | 8 #ifndef TRIEDICT_H |
| 9 #define TRIEDICT_H | 9 #define TRIEDICT_H |
| 10 | 10 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 40 */ | 40 */ |
| 41 | 41 |
| 42 U_CAPI int32_t U_EXPORT2 | 42 U_CAPI int32_t U_EXPORT2 |
| 43 triedict_swap(const UDataSwapper *ds, | 43 triedict_swap(const UDataSwapper *ds, |
| 44 const void *inData, int32_t length, void *outData, | 44 const void *inData, int32_t length, void *outData, |
| 45 UErrorCode *pErrorCode); | 45 UErrorCode *pErrorCode); |
| 46 | 46 |
| 47 U_NAMESPACE_BEGIN | 47 U_NAMESPACE_BEGIN |
| 48 | 48 |
| 49 class StringEnumeration; | 49 class StringEnumeration; |
| 50 struct CompactTrieHeader; | |
| 51 | 50 |
| 52 /******************************************************************* | 51 /******************************************************************* |
| 53 * TrieWordDictionary | 52 * TrieWordDictionary |
| 54 */ | 53 */ |
| 55 | 54 |
| 56 /** | 55 /** |
| 57 * <p>TrieWordDictionary is an abstract class that represents a word | 56 * <p>TrieWordDictionary is an abstract class that represents a word |
| 58 * dictionary based on a trie. The base protocol is read-only. | 57 * dictionary based on a trie. The base protocol is read-only. |
| 59 * Subclasses may allow writing.</p> | 58 * Subclasses may allow writing.</p> |
| 60 */ | 59 */ |
| 61 class U_COMMON_API TrieWordDictionary : public UMemory { | 60 class U_COMMON_API TrieWordDictionary : public UMemory { |
| 62 public: | 61 public: |
| 63 | 62 |
| 64 /** | 63 /** |
| 65 * <p>Default constructor.</p> | 64 * <p>Default constructor.</p> |
| 66 * | 65 * |
| 67 */ | 66 */ |
| 68 TrieWordDictionary(); | 67 TrieWordDictionary(); |
| 69 | 68 |
| 70 /** | 69 /** |
| 71 * <p>Virtual destructor.</p> | 70 * <p>Virtual destructor.</p> |
| 72 */ | 71 */ |
| 73 virtual ~TrieWordDictionary(); | 72 virtual ~TrieWordDictionary(); |
| 74 | 73 |
| 74 /** |
| 75 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 76 */ |
| 77 virtual UBool getValued() const = 0; |
| 78 |
| 75 /** | 79 /** |
| 76 * <p>Find dictionary words that match the text.</p> | 80 * <p>Find dictionary words that match the text.</p> |
| 77 * | 81 * |
| 78 * @param text A UText representing the text. The | 82 * @param text A UText representing the text. The |
| 79 * iterator is left after the longest prefix match in the dictionary. | 83 * iterator is left after the longest prefix match in the dictionary. |
| 80 * @param start The current position in text. | |
| 81 * @param maxLength The maximum number of code units to match. | 84 * @param maxLength The maximum number of code units to match. |
| 82 * @param lengths An array that is filled with the lengths of words that matche
d. | 85 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 83 * @param count Filled with the number of elements output in lengths. | 86 * @param count Filled with the number of elements output in lengths. |
| 84 * @param limit The size of the lengths array; this limits the number of words
output. | 87 * @param limit The size of the lengths array; this limits the number of words
output. |
| 88 * @param values An array that is filled with the values associated with the ma
tched words. |
| 85 * @return The number of characters in text that were matched. | 89 * @return The number of characters in text that were matched. |
| 86 */ | 90 */ |
| 87 virtual int32_t matches( UText *text, | 91 virtual int32_t matches( UText *text, |
| 88 int32_t maxLength, | 92 int32_t maxLength, |
| 89 int32_t *lengths, | 93 int32_t *lengths, |
| 90 int &count, | 94 int &count, |
| 91 int limit ) const = 0; | 95 int limit, |
| 96 uint16_t *values = NULL) const = 0; |
| 92 | 97 |
| 93 /** | 98 /** |
| 94 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 99 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 95 * | 100 * |
| 96 * @param status A status code recording the success of the call. | 101 * @param status A status code recording the success of the call. |
| 97 * @return A StringEnumeration that will iterate through the whole dictionary. | 102 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 98 * The caller is responsible for closing it. The order is unspecified. | 103 * The caller is responsible for closing it. The order is unspecified. |
| 99 */ | 104 */ |
| 100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; | 105 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; |
| 101 | 106 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 121 | 126 |
| 122 TernaryNode *fTrie; | 127 TernaryNode *fTrie; |
| 123 | 128 |
| 124 /** | 129 /** |
| 125 * A UText for internal use | 130 * A UText for internal use |
| 126 * @internal | 131 * @internal |
| 127 */ | 132 */ |
| 128 | 133 |
| 129 UText *fIter; | 134 UText *fIter; |
| 130 | 135 |
| 136 /** |
| 137 * A UText for internal use |
| 138 * @internal |
| 139 */ |
| 140 UBool fValued; |
| 141 |
| 131 friend class CompactTrieDictionary; // For fast conversion | 142 friend class CompactTrieDictionary; // For fast conversion |
| 132 | 143 |
| 133 public: | 144 public: |
| 134 | 145 |
| 135 /** | 146 /** |
| 136 * <p>Constructor.</p> | 147 * <p>Constructor.</p> |
| 137 * | 148 * |
| 138 * @param median A UChar around which to balance the trie. Ideally, it should | 149 * @param median A UChar around which to balance the trie. Ideally, it should |
| 139 * begin at least one word that is near the median of the set in the dictionary | 150 * begin at least one word that is near the median of the set in the dictionary |
| 140 * @param status A status code recording the success of the call. | 151 * @param status A status code recording the success of the call. |
| 152 * @param containsValue True if the dictionary stores values associated with ea
ch word. |
| 141 */ | 153 */ |
| 142 MutableTrieDictionary( UChar median, UErrorCode &status ); | 154 MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue =
FALSE ); |
| 143 | 155 |
| 144 /** | 156 /** |
| 145 * <p>Virtual destructor.</p> | 157 * <p>Virtual destructor.</p> |
| 146 */ | 158 */ |
| 147 virtual ~MutableTrieDictionary(); | 159 virtual ~MutableTrieDictionary(); |
| 148 | 160 |
| 161 /** |
| 162 * Indicate whether the MutableTrieDictionary stores values associated with ea
ch word |
| 163 */ |
| 164 void setValued(UBool valued){ |
| 165 fValued = valued; |
| 166 } |
| 167 |
| 168 /** |
| 169 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 170 */ |
| 171 virtual UBool getValued() const { |
| 172 return fValued; |
| 173 } |
| 174 |
| 149 /** | 175 /** |
| 150 * <p>Find dictionary words that match the text.</p> | 176 * <p>Find dictionary words that match the text.</p> |
| 151 * | 177 * |
| 152 * @param text A UText representing the text. The | 178 * @param text A UText representing the text. The |
| 153 * iterator is left after the longest prefix match in the dictionary. | 179 * iterator is left after the longest prefix match in the dictionary. |
| 154 * @param maxLength The maximum number of code units to match. | 180 * @param maxLength The maximum number of code units to match. |
| 155 * @param lengths An array that is filled with the lengths of words that matche
d. | 181 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 156 * @param count Filled with the number of elements output in lengths. | 182 * @param count Filled with the number of elements output in lengths. |
| 157 * @param limit The size of the lengths array; this limits the number of words
output. | 183 * @param limit The size of the lengths array; this limits the number of words
output. |
| 184 * @param values An array that is filled with the values associated with the ma
tched words. |
| 158 * @return The number of characters in text that were matched. | 185 * @return The number of characters in text that were matched. |
| 159 */ | 186 */ |
| 160 virtual int32_t matches( UText *text, | 187 virtual int32_t matches( UText *text, |
| 161 int32_t maxLength, | 188 int32_t maxLength, |
| 162 int32_t *lengths, | 189 int32_t *lengths, |
| 163 int &count, | 190 int &count, |
| 164 int limit ) const; | 191 int limit, |
| 192 uint16_t *values = NULL) const; |
| 165 | 193 |
| 166 /** | 194 /** |
| 167 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 195 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 168 * | 196 * |
| 169 * @param status A status code recording the success of the call. | 197 * @param status A status code recording the success of the call. |
| 170 * @return A StringEnumeration that will iterate through the whole dictionary. | 198 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 171 * The caller is responsible for closing it. The order is unspecified. | 199 * The caller is responsible for closing it. The order is unspecified. |
| 172 */ | 200 */ |
| 173 virtual StringEnumeration *openWords( UErrorCode &status ) const; | 201 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| 174 | 202 |
| 175 /** | 203 /** |
| 176 * <p>Add one word to the dictionary.</p> | 204 * <p>Add one word to the dictionary with an optional associated value.</p> |
| 177 * | 205 * |
| 178 * @param word A UChar buffer containing the word. | 206 * @param word A UChar buffer containing the word. |
| 179 * @param length The length of the word. | 207 * @param length The length of the word. |
| 180 * @param status The resultant status | 208 * @param status The resultant status. |
| 209 * @param value The nonzero value associated with this word. |
| 181 */ | 210 */ |
| 182 virtual void addWord( const UChar *word, | 211 virtual void addWord( const UChar *word, |
| 183 int32_t length, | 212 int32_t length, |
| 184 UErrorCode &status); | 213 UErrorCode &status, |
| 214 uint16_t value = 0); |
| 185 | 215 |
| 186 #if 0 | 216 #if 0 |
| 187 /** | 217 /** |
| 188 * <p>Add all strings from a UEnumeration to the dictionary.</p> | 218 * <p>Add all strings from a UEnumeration to the dictionary.</p> |
| 189 * | 219 * |
| 190 * @param words A UEnumeration that will return the desired words. | 220 * @param words A UEnumeration that will return the desired words. |
| 191 * @param status The resultant status | 221 * @param status The resultant status |
| 192 */ | 222 */ |
| 193 virtual void addWords( UEnumeration *words, UErrorCode &status ); | 223 virtual void addWords( UEnumeration *words, UErrorCode &status ); |
| 194 #endif | 224 #endif |
| 195 | 225 |
| 196 protected: | 226 protected: |
| 197 /** | 227 /** |
| 198 * <p>Search the dictionary for matches.</p> | 228 * <p>Search the dictionary for matches.</p> |
| 199 * | 229 * |
| 200 * @param text A UText representing the text. The | 230 * @param text A UText representing the text. The |
| 201 * iterator is left after the longest prefix match in the dictionary. | 231 * iterator is left after the longest prefix match in the dictionary. |
| 202 * @param maxLength The maximum number of code units to match. | 232 * @param maxLength The maximum number of code units to match. |
| 203 * @param lengths An array that is filled with the lengths of words that matche
d. | 233 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 204 * @param count Filled with the number of elements output in lengths. | 234 * @param count Filled with the number of elements output in lengths. |
| 205 * @param limit The size of the lengths array; this limits the number of words
output. | 235 * @param limit The size of the lengths array; this limits the number of words
output. |
| 206 * @param parent The parent of the current node | 236 * @param parent The parent of the current node. |
| 207 * @param pMatched The returned parent node matched the input | 237 * @param pMatched The returned parent node matched the input/ |
| 238 * @param values An array that is filled with the values associated with the ma
tched words. |
| 208 * @return The number of characters in text that were matched. | 239 * @return The number of characters in text that were matched. |
| 209 */ | 240 */ |
| 210 virtual int32_t search( UText *text, | 241 virtual int32_t search( UText *text, |
| 211 int32_t maxLength, | 242 int32_t maxLength, |
| 212 int32_t *lengths, | 243 int32_t *lengths, |
| 213 int &count, | 244 int &count, |
| 214 int limit, | 245 int limit, |
| 215 TernaryNode *&parent, | 246 TernaryNode *&parent, |
| 216 UBool &pMatched ) const; | 247 UBool &pMatched, |
| 248 uint16_t *values = NULL) const; |
| 217 | 249 |
| 218 private: | 250 private: |
| 219 /** | 251 /** |
| 220 * <p>Private constructor. The root node it not allocated.</p> | 252 * <p>Private constructor. The root node it not allocated.</p> |
| 221 * | 253 * |
| 222 * @param status A status code recording the success of the call. | 254 * @param status A status code recording the success of the call. |
| 255 * @param containsValues True if the dictionary will store a value associated |
| 256 * with each word added. |
| 223 */ | 257 */ |
| 224 MutableTrieDictionary( UErrorCode &status ); | 258 MutableTrieDictionary( UErrorCode &status, UBool containsValues = false ); |
| 225 }; | 259 }; |
| 226 | 260 |
| 227 /******************************************************************* | 261 /******************************************************************* |
| 228 * CompactTrieDictionary | 262 * CompactTrieDictionary |
| 229 */ | 263 */ |
| 230 | 264 |
| 265 //forward declarations |
| 266 struct CompactTrieHeader; |
| 267 struct CompactTrieInfo; |
| 268 |
| 231 /** | 269 /** |
| 232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted | 270 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted |
| 233 * to save space.</p> | 271 * to save space.</p> |
| 234 */ | 272 */ |
| 235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { | 273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { |
| 236 private: | 274 private: |
| 237 /** | 275 /** |
| 238 * The root node of the trie | 276 * The header of the CompactTrieDictionary which contains all info |
| 239 */ | 277 */ |
| 240 | 278 |
| 241 const CompactTrieHeader *fData; | 279 CompactTrieInfo *fInfo; |
| 242 | 280 |
| 243 /** | 281 /** |
| 244 * A UBool indicating whether or not we own the fData. | 282 * A UBool indicating whether or not we own the fData. |
| 245 */ | 283 */ |
| 246 | |
| 247 UBool fOwnData; | 284 UBool fOwnData; |
| 248 | 285 |
| 249 UDataMemory *fUData; | 286 UDataMemory *fUData; |
| 250 public: | 287 public: |
| 251 /** | 288 /** |
| 252 * <p>Construct a dictionary from a UDataMemory.</p> | 289 * <p>Construct a dictionary from a UDataMemory.</p> |
| 253 * | 290 * |
| 254 * @param data A pointer to a UDataMemory, which is adopted | 291 * @param data A pointer to a UDataMemory, which is adopted |
| 255 * @param status A status code giving the result of the constructor | 292 * @param status A status code giving the result of the constructor |
| 256 */ | 293 */ |
| 257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); | 294 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); |
| 258 | 295 |
| 259 /** | 296 /** |
| (...skipping 10 matching lines...) Expand all Loading... |
| 270 * @param dict The dictionary to use as input. | 307 * @param dict The dictionary to use as input. |
| 271 * @param status A status code recording the success of the call. | 308 * @param status A status code recording the success of the call. |
| 272 */ | 309 */ |
| 273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status )
; | 310 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status )
; |
| 274 | 311 |
| 275 /** | 312 /** |
| 276 * <p>Virtual destructor.</p> | 313 * <p>Virtual destructor.</p> |
| 277 */ | 314 */ |
| 278 virtual ~CompactTrieDictionary(); | 315 virtual ~CompactTrieDictionary(); |
| 279 | 316 |
| 317 /** |
| 318 * <p>Returns true if the dictionary contains values associated with each word
.</p> |
| 319 */ |
| 320 virtual UBool getValued() const; |
| 321 |
| 280 /** | 322 /** |
| 281 * <p>Find dictionary words that match the text.</p> | 323 * <p>Find dictionary words that match the text.</p> |
| 282 * | 324 * |
| 283 * @param text A UText representing the text. The | 325 * @param text A UText representing the text. The |
| 284 * iterator is left after the longest prefix match in the dictionary. | 326 * iterator is left after the longest prefix match in the dictionary. |
| 285 * @param maxLength The maximum number of code units to match. | 327 * @param maxLength The maximum number of code units to match. |
| 286 * @param lengths An array that is filled with the lengths of words that matche
d. | 328 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 287 * @param count Filled with the number of elements output in lengths. | 329 * @param count Filled with the number of elements output in lengths. |
| 288 * @param limit The size of the lengths array; this limits the number of words
output. | 330 * @param limit The size of the lengths array; this limits the number of words
output. |
| 331 * @param values An array that is filled with the values associated with the ma
tched words. |
| 289 * @return The number of characters in text that were matched. | 332 * @return The number of characters in text that were matched. |
| 290 */ | 333 */ |
| 291 virtual int32_t matches( UText *text, | 334 virtual int32_t matches( UText *text, |
| 292 int32_t rangeEnd, | 335 int32_t maxLength, |
| 293 int32_t *lengths, | 336 int32_t *lengths, |
| 294 int &count, | 337 int &count, |
| 295 int limit ) const; | 338 int limit, |
| 339 uint16_t *values = NULL) const; |
| 296 | 340 |
| 297 /** | 341 /** |
| 298 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> | 342 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 299 * | 343 * |
| 300 * @param status A status code recording the success of the call. | 344 * @param status A status code recording the success of the call. |
| 301 * @return A StringEnumeration that will iterate through the whole dictionary. | 345 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 302 * The caller is responsible for closing it. The order is unspecified. | 346 * The caller is responsible for closing it. The order is unspecified. |
| 303 */ | 347 */ |
| 304 virtual StringEnumeration *openWords( UErrorCode &status ) const; | 348 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| 305 | 349 |
| 306 /** | 350 /** |
| 307 * <p>Return the size of the compact data.</p> | 351 * <p>Return the size of the compact data.</p> |
| 308 * | 352 * |
| 309 * @return The size of the dictionary's compact data. | 353 * @return The size of the dictionary's compact data. |
| 310 */ | 354 */ |
| 311 virtual uint32_t dataSize() const; | 355 virtual uint32_t dataSize() const; |
| 312 | 356 |
| 313 /** | 357 /** |
| 314 * <p>Return a void * pointer to the compact data, platform-endian.</p> | 358 * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.
</p> |
| 315 * | 359 * |
| 316 * @return The data for the compact dictionary, suitable for passing to the | 360 * @return The data for the compact dictionary, suitable for passing to the |
| 317 * constructor. | 361 * constructor. |
| 318 */ | 362 */ |
| 319 virtual const void *data() const; | 363 virtual const void *data() const; |
| 320 | 364 |
| 321 /** | 365 /** |
| 322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> | 366 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> |
| 323 * | 367 * |
| 324 * @param status A status code recording the success of the call. | 368 * @param status A status code recording the success of the call. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 335 * @param status A status code recording the success of the call. | 379 * @param status A status code recording the success of the call. |
| 336 * @return A single data blob starting with a CompactTrieHeader. | 380 * @return A single data blob starting with a CompactTrieHeader. |
| 337 */ | 381 */ |
| 338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti
onary &dict, | 382 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti
onary &dict, |
| 339 UErrorCode &status ); | 383 UErrorCode &status ); |
| 340 | 384 |
| 341 }; | 385 }; |
| 342 | 386 |
| 343 U_NAMESPACE_END | 387 U_NAMESPACE_END |
| 344 | 388 |
| 345 /* TRIEDICT_H */ | 389 /* TRIEDICT_H */ |
| 346 #endif | 390 #endif |
| OLD | NEW |