OLD | NEW |
(Empty) | |
| 1 /** |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2006, International Business Machines Corporation and others. * |
| 4 * All Rights Reserved. * |
| 5 ******************************************************************************* |
| 6 */ |
| 7 |
| 8 #ifndef TRIEDICT_H |
| 9 #define TRIEDICT_H |
| 10 |
| 11 #include "unicode/utypes.h" |
| 12 #include "unicode/uobject.h" |
| 13 #include "unicode/utext.h" |
| 14 |
| 15 struct UEnumeration; |
| 16 struct UDataSwapper; |
| 17 struct UDataMemory; |
| 18 |
| 19 /** |
| 20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p> |
| 21 * |
| 22 * @param ds Pointer to UDataSwapper containing global data about the |
| 23 * transformation and function pointers for handling primitive |
| 24 * types. |
| 25 * @param inData Pointer to the input data to be transformed or examined. |
| 26 * @param length Length of the data, counting bytes. May be -1 for preflighting
. |
| 27 * If length>=0, then transform the data. |
| 28 * If length==-1, then only determine the length of the data. |
| 29 * The length cannot be determined from the data itself for all |
| 30 * types of data (e.g., not for simple arrays of integers). |
| 31 * @param outData Pointer to the output data buffer. |
| 32 * If length>=0 (transformation), then the output buffer must |
| 33 * have a capacity of at least length. |
| 34 * If length==-1, then outData will not be used and can be NULL. |
| 35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must |
| 36 * fulfill U_SUCCESS on input. |
| 37 * @return The actual length of the data. |
| 38 * |
| 39 * @see UDataSwapper |
| 40 */ |
| 41 |
| 42 U_CAPI int32_t U_EXPORT2 |
| 43 triedict_swap(const UDataSwapper *ds, |
| 44 const void *inData, int32_t length, void *outData, |
| 45 UErrorCode *pErrorCode); |
| 46 |
| 47 U_NAMESPACE_BEGIN |
| 48 |
| 49 class StringEnumeration; |
| 50 struct CompactTrieHeader; |
| 51 |
| 52 /******************************************************************* |
| 53 * TrieWordDictionary |
| 54 */ |
| 55 |
| 56 /** |
| 57 * <p>TrieWordDictionary is an abstract class that represents a word |
| 58 * dictionary based on a trie. The base protocol is read-only. |
| 59 * Subclasses may allow writing.</p> |
| 60 */ |
| 61 class U_COMMON_API TrieWordDictionary : public UMemory { |
| 62 public: |
| 63 |
| 64 /** |
| 65 * <p>Default constructor.</p> |
| 66 * |
| 67 */ |
| 68 TrieWordDictionary(); |
| 69 |
| 70 /** |
| 71 * <p>Virtual destructor.</p> |
| 72 */ |
| 73 virtual ~TrieWordDictionary(); |
| 74 |
| 75 /** |
| 76 * <p>Find dictionary words that match the text.</p> |
| 77 * |
| 78 * @param text A UText representing the text. The |
| 79 * iterator is left after the longest prefix match in the dictionary. |
| 80 * @param start The current position in text. |
| 81 * @param maxLength The maximum number of code units to match. |
| 82 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 83 * @param count Filled with the number of elements output in lengths. |
| 84 * @param limit The size of the lengths array; this limits the number of words
output. |
| 85 * @return The number of characters in text that were matched. |
| 86 */ |
| 87 virtual int32_t matches( UText *text, |
| 88 int32_t maxLength, |
| 89 int32_t *lengths, |
| 90 int &count, |
| 91 int limit ) const = 0; |
| 92 |
| 93 /** |
| 94 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 95 * |
| 96 * @param status A status code recording the success of the call. |
| 97 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 98 * The caller is responsible for closing it. The order is unspecified. |
| 99 */ |
| 100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; |
| 101 |
| 102 }; |
| 103 |
| 104 /******************************************************************* |
| 105 * MutableTrieDictionary |
| 106 */ |
| 107 |
| 108 /** |
| 109 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be |
| 110 * added.</p> |
| 111 */ |
| 112 |
| 113 struct TernaryNode; // Forwards declaration |
| 114 |
| 115 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary { |
| 116 private: |
| 117 /** |
| 118 * The root node of the trie |
| 119 * @internal |
| 120 */ |
| 121 |
| 122 TernaryNode *fTrie; |
| 123 |
| 124 /** |
| 125 * A UText for internal use |
| 126 * @internal |
| 127 */ |
| 128 |
| 129 UText *fIter; |
| 130 |
| 131 friend class CompactTrieDictionary; // For fast conversion |
| 132 |
| 133 public: |
| 134 |
| 135 /** |
| 136 * <p>Constructor.</p> |
| 137 * |
| 138 * @param median A UChar around which to balance the trie. Ideally, it should |
| 139 * begin at least one word that is near the median of the set in the dictionary |
| 140 * @param status A status code recording the success of the call. |
| 141 */ |
| 142 MutableTrieDictionary( UChar median, UErrorCode &status ); |
| 143 |
| 144 /** |
| 145 * <p>Virtual destructor.</p> |
| 146 */ |
| 147 virtual ~MutableTrieDictionary(); |
| 148 |
| 149 /** |
| 150 * <p>Find dictionary words that match the text.</p> |
| 151 * |
| 152 * @param text A UText representing the text. The |
| 153 * iterator is left after the longest prefix match in the dictionary. |
| 154 * @param maxLength The maximum number of code units to match. |
| 155 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 156 * @param count Filled with the number of elements output in lengths. |
| 157 * @param limit The size of the lengths array; this limits the number of words
output. |
| 158 * @return The number of characters in text that were matched. |
| 159 */ |
| 160 virtual int32_t matches( UText *text, |
| 161 int32_t maxLength, |
| 162 int32_t *lengths, |
| 163 int &count, |
| 164 int limit ) const; |
| 165 |
| 166 /** |
| 167 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 168 * |
| 169 * @param status A status code recording the success of the call. |
| 170 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 171 * The caller is responsible for closing it. The order is unspecified. |
| 172 */ |
| 173 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| 174 |
| 175 /** |
| 176 * <p>Add one word to the dictionary.</p> |
| 177 * |
| 178 * @param word A UChar buffer containing the word. |
| 179 * @param length The length of the word. |
| 180 * @param status The resultant status |
| 181 */ |
| 182 virtual void addWord( const UChar *word, |
| 183 int32_t length, |
| 184 UErrorCode &status); |
| 185 |
| 186 #if 0 |
| 187 /** |
| 188 * <p>Add all strings from a UEnumeration to the dictionary.</p> |
| 189 * |
| 190 * @param words A UEnumeration that will return the desired words. |
| 191 * @param status The resultant status |
| 192 */ |
| 193 virtual void addWords( UEnumeration *words, UErrorCode &status ); |
| 194 #endif |
| 195 |
| 196 protected: |
| 197 /** |
| 198 * <p>Search the dictionary for matches.</p> |
| 199 * |
| 200 * @param text A UText representing the text. The |
| 201 * iterator is left after the longest prefix match in the dictionary. |
| 202 * @param maxLength The maximum number of code units to match. |
| 203 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 204 * @param count Filled with the number of elements output in lengths. |
| 205 * @param limit The size of the lengths array; this limits the number of words
output. |
| 206 * @param parent The parent of the current node |
| 207 * @param pMatched The returned parent node matched the input |
| 208 * @return The number of characters in text that were matched. |
| 209 */ |
| 210 virtual int32_t search( UText *text, |
| 211 int32_t maxLength, |
| 212 int32_t *lengths, |
| 213 int &count, |
| 214 int limit, |
| 215 TernaryNode *&parent, |
| 216 UBool &pMatched ) const; |
| 217 |
| 218 private: |
| 219 /** |
| 220 * <p>Private constructor. The root node it not allocated.</p> |
| 221 * |
| 222 * @param status A status code recording the success of the call. |
| 223 */ |
| 224 MutableTrieDictionary( UErrorCode &status ); |
| 225 }; |
| 226 |
| 227 /******************************************************************* |
| 228 * CompactTrieDictionary |
| 229 */ |
| 230 |
| 231 /** |
| 232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted |
| 233 * to save space.</p> |
| 234 */ |
| 235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { |
| 236 private: |
| 237 /** |
| 238 * The root node of the trie |
| 239 */ |
| 240 |
| 241 const CompactTrieHeader *fData; |
| 242 |
| 243 /** |
| 244 * A UBool indicating whether or not we own the fData. |
| 245 */ |
| 246 |
| 247 UBool fOwnData; |
| 248 |
| 249 UDataMemory *fUData; |
| 250 public: |
| 251 /** |
| 252 * <p>Construct a dictionary from a UDataMemory.</p> |
| 253 * |
| 254 * @param data A pointer to a UDataMemory, which is adopted |
| 255 * @param status A status code giving the result of the constructor |
| 256 */ |
| 257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); |
| 258 |
| 259 /** |
| 260 * <p>Construct a dictionary from raw saved data.</p> |
| 261 * |
| 262 * @param data A pointer to the raw data, which is still owned by the caller |
| 263 * @param status A status code giving the result of the constructor |
| 264 */ |
| 265 CompactTrieDictionary(const void *dataObj, UErrorCode &status); |
| 266 |
| 267 /** |
| 268 * <p>Construct a dictionary from a MutableTrieDictionary.</p> |
| 269 * |
| 270 * @param dict The dictionary to use as input. |
| 271 * @param status A status code recording the success of the call. |
| 272 */ |
| 273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status )
; |
| 274 |
| 275 /** |
| 276 * <p>Virtual destructor.</p> |
| 277 */ |
| 278 virtual ~CompactTrieDictionary(); |
| 279 |
| 280 /** |
| 281 * <p>Find dictionary words that match the text.</p> |
| 282 * |
| 283 * @param text A UText representing the text. The |
| 284 * iterator is left after the longest prefix match in the dictionary. |
| 285 * @param maxLength The maximum number of code units to match. |
| 286 * @param lengths An array that is filled with the lengths of words that matche
d. |
| 287 * @param count Filled with the number of elements output in lengths. |
| 288 * @param limit The size of the lengths array; this limits the number of words
output. |
| 289 * @return The number of characters in text that were matched. |
| 290 */ |
| 291 virtual int32_t matches( UText *text, |
| 292 int32_t rangeEnd, |
| 293 int32_t *lengths, |
| 294 int &count, |
| 295 int limit ) const; |
| 296 |
| 297 /** |
| 298 * <p>Return a StringEnumeration for iterating all the words in the dictionary
.</p> |
| 299 * |
| 300 * @param status A status code recording the success of the call. |
| 301 * @return A StringEnumeration that will iterate through the whole dictionary. |
| 302 * The caller is responsible for closing it. The order is unspecified. |
| 303 */ |
| 304 virtual StringEnumeration *openWords( UErrorCode &status ) const; |
| 305 |
| 306 /** |
| 307 * <p>Return the size of the compact data.</p> |
| 308 * |
| 309 * @return The size of the dictionary's compact data. |
| 310 */ |
| 311 virtual uint32_t dataSize() const; |
| 312 |
| 313 /** |
| 314 * <p>Return a void * pointer to the compact data, platform-endian.</p> |
| 315 * |
| 316 * @return The data for the compact dictionary, suitable for passing to the |
| 317 * constructor. |
| 318 */ |
| 319 virtual const void *data() const; |
| 320 |
| 321 /** |
| 322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> |
| 323 * |
| 324 * @param status A status code recording the success of the call. |
| 325 * @return A MutableTrieDictionary with the same data as this dictionary |
| 326 */ |
| 327 virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const; |
| 328 |
| 329 private: |
| 330 |
| 331 /** |
| 332 * <p>Convert a MutableTrieDictionary into a compact data blob.</p> |
| 333 * |
| 334 * @param dict The dictionary to convert. |
| 335 * @param status A status code recording the success of the call. |
| 336 * @return A single data blob starting with a CompactTrieHeader. |
| 337 */ |
| 338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti
onary &dict, |
| 339 UErrorCode &status ); |
| 340 |
| 341 }; |
| 342 |
| 343 U_NAMESPACE_END |
| 344 |
| 345 /* TRIEDICT_H */ |
| 346 #endif |
OLD | NEW |