Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: icu46/source/common/triedict.h

Issue 6370014: CJK segmentation patch for ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « icu46/source/common/rbbi.cpp ('k') | icu46/source/common/triedict.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /** 1 /**
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. * 3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. * 4 * All Rights Reserved. *
5 ******************************************************************************* 5 *******************************************************************************
6 */ 6 */
7 7
8 #ifndef TRIEDICT_H 8 #ifndef TRIEDICT_H
9 #define TRIEDICT_H 9 #define TRIEDICT_H
10 10
(...skipping 29 matching lines...) Expand all
40 */ 40 */
41 41
42 U_CAPI int32_t U_EXPORT2 42 U_CAPI int32_t U_EXPORT2
43 triedict_swap(const UDataSwapper *ds, 43 triedict_swap(const UDataSwapper *ds,
44 const void *inData, int32_t length, void *outData, 44 const void *inData, int32_t length, void *outData,
45 UErrorCode *pErrorCode); 45 UErrorCode *pErrorCode);
46 46
47 U_NAMESPACE_BEGIN 47 U_NAMESPACE_BEGIN
48 48
49 class StringEnumeration; 49 class StringEnumeration;
50 struct CompactTrieHeader;
51 50
52 /******************************************************************* 51 /*******************************************************************
53 * TrieWordDictionary 52 * TrieWordDictionary
54 */ 53 */
55 54
56 /** 55 /**
57 * <p>TrieWordDictionary is an abstract class that represents a word 56 * <p>TrieWordDictionary is an abstract class that represents a word
58 * dictionary based on a trie. The base protocol is read-only. 57 * dictionary based on a trie. The base protocol is read-only.
59 * Subclasses may allow writing.</p> 58 * Subclasses may allow writing.</p>
60 */ 59 */
61 class U_COMMON_API TrieWordDictionary : public UMemory { 60 class U_COMMON_API TrieWordDictionary : public UMemory {
62 public: 61 public:
63 62
64 /** 63 /**
65 * <p>Default constructor.</p> 64 * <p>Default constructor.</p>
66 * 65 *
67 */ 66 */
68 TrieWordDictionary(); 67 TrieWordDictionary();
69 68
70 /** 69 /**
71 * <p>Virtual destructor.</p> 70 * <p>Virtual destructor.</p>
72 */ 71 */
73 virtual ~TrieWordDictionary(); 72 virtual ~TrieWordDictionary();
74 73
74 /**
75 * <p>Returns true if the dictionary contains values associated with each word .</p>
76 */
77 virtual UBool getValued() const = 0;
78
75 /** 79 /**
76 * <p>Find dictionary words that match the text.</p> 80 * <p>Find dictionary words that match the text.</p>
77 * 81 *
78 * @param text A UText representing the text. The 82 * @param text A UText representing the text. The
79 * iterator is left after the longest prefix match in the dictionary. 83 * iterator is left after the longest prefix match in the dictionary.
80 * @param start The current position in text.
81 * @param maxLength The maximum number of code units to match. 84 * @param maxLength The maximum number of code units to match.
82 * @param lengths An array that is filled with the lengths of words that matche d. 85 * @param lengths An array that is filled with the lengths of words that matche d.
83 * @param count Filled with the number of elements output in lengths. 86 * @param count Filled with the number of elements output in lengths.
84 * @param limit The size of the lengths array; this limits the number of words output. 87 * @param limit The size of the lengths array; this limits the number of words output.
88 * @param values An array that is filled with the values associated with the ma tched words.
85 * @return The number of characters in text that were matched. 89 * @return The number of characters in text that were matched.
86 */ 90 */
87 virtual int32_t matches( UText *text, 91 virtual int32_t matches( UText *text,
88 int32_t maxLength, 92 int32_t maxLength,
89 int32_t *lengths, 93 int32_t *lengths,
90 int &count, 94 int &count,
91 int limit ) const = 0; 95 int limit,
96 uint16_t *values = NULL) const = 0;
92 97
93 /** 98 /**
94 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p> 99 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p>
95 * 100 *
96 * @param status A status code recording the success of the call. 101 * @param status A status code recording the success of the call.
97 * @return A StringEnumeration that will iterate through the whole dictionary. 102 * @return A StringEnumeration that will iterate through the whole dictionary.
98 * The caller is responsible for closing it. The order is unspecified. 103 * The caller is responsible for closing it. The order is unspecified.
99 */ 104 */
100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; 105 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
101 106
(...skipping 19 matching lines...) Expand all
121 126
122 TernaryNode *fTrie; 127 TernaryNode *fTrie;
123 128
124 /** 129 /**
125 * A UText for internal use 130 * A UText for internal use
126 * @internal 131 * @internal
127 */ 132 */
128 133
129 UText *fIter; 134 UText *fIter;
130 135
136 /**
137 * A UText for internal use
138 * @internal
139 */
140 UBool fValued;
141
131 friend class CompactTrieDictionary; // For fast conversion 142 friend class CompactTrieDictionary; // For fast conversion
132 143
133 public: 144 public:
134 145
135 /** 146 /**
136 * <p>Constructor.</p> 147 * <p>Constructor.</p>
137 * 148 *
138 * @param median A UChar around which to balance the trie. Ideally, it should 149 * @param median A UChar around which to balance the trie. Ideally, it should
139 * begin at least one word that is near the median of the set in the dictionary 150 * begin at least one word that is near the median of the set in the dictionary
140 * @param status A status code recording the success of the call. 151 * @param status A status code recording the success of the call.
152 * @param containsValue True if the dictionary stores values associated with ea ch word.
141 */ 153 */
142 MutableTrieDictionary( UChar median, UErrorCode &status ); 154 MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue = FALSE );
143 155
144 /** 156 /**
145 * <p>Virtual destructor.</p> 157 * <p>Virtual destructor.</p>
146 */ 158 */
147 virtual ~MutableTrieDictionary(); 159 virtual ~MutableTrieDictionary();
148 160
161 /**
162 * Indicate whether the MutableTrieDictionary stores values associated with ea ch word
163 */
164 void setValued(UBool valued){
165 fValued = valued;
166 }
167
168 /**
169 * <p>Returns true if the dictionary contains values associated with each word .</p>
170 */
171 virtual UBool getValued() const {
172 return fValued;
173 }
174
149 /** 175 /**
150 * <p>Find dictionary words that match the text.</p> 176 * <p>Find dictionary words that match the text.</p>
151 * 177 *
152 * @param text A UText representing the text. The 178 * @param text A UText representing the text. The
153 * iterator is left after the longest prefix match in the dictionary. 179 * iterator is left after the longest prefix match in the dictionary.
154 * @param maxLength The maximum number of code units to match. 180 * @param maxLength The maximum number of code units to match.
155 * @param lengths An array that is filled with the lengths of words that matche d. 181 * @param lengths An array that is filled with the lengths of words that matche d.
156 * @param count Filled with the number of elements output in lengths. 182 * @param count Filled with the number of elements output in lengths.
157 * @param limit The size of the lengths array; this limits the number of words output. 183 * @param limit The size of the lengths array; this limits the number of words output.
184 * @param values An array that is filled with the values associated with the ma tched words.
158 * @return The number of characters in text that were matched. 185 * @return The number of characters in text that were matched.
159 */ 186 */
160 virtual int32_t matches( UText *text, 187 virtual int32_t matches( UText *text,
161 int32_t maxLength, 188 int32_t maxLength,
162 int32_t *lengths, 189 int32_t *lengths,
163 int &count, 190 int &count,
164 int limit ) const; 191 int limit,
192 uint16_t *values = NULL) const;
165 193
166 /** 194 /**
167 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p> 195 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p>
168 * 196 *
169 * @param status A status code recording the success of the call. 197 * @param status A status code recording the success of the call.
170 * @return A StringEnumeration that will iterate through the whole dictionary. 198 * @return A StringEnumeration that will iterate through the whole dictionary.
171 * The caller is responsible for closing it. The order is unspecified. 199 * The caller is responsible for closing it. The order is unspecified.
172 */ 200 */
173 virtual StringEnumeration *openWords( UErrorCode &status ) const; 201 virtual StringEnumeration *openWords( UErrorCode &status ) const;
174 202
175 /** 203 /**
176 * <p>Add one word to the dictionary.</p> 204 * <p>Add one word to the dictionary with an optional associated value.</p>
177 * 205 *
178 * @param word A UChar buffer containing the word. 206 * @param word A UChar buffer containing the word.
179 * @param length The length of the word. 207 * @param length The length of the word.
180 * @param status The resultant status 208 * @param status The resultant status.
209 * @param value The nonzero value associated with this word.
181 */ 210 */
182 virtual void addWord( const UChar *word, 211 virtual void addWord( const UChar *word,
183 int32_t length, 212 int32_t length,
184 UErrorCode &status); 213 UErrorCode &status,
214 uint16_t value = 0);
185 215
186 #if 0 216 #if 0
187 /** 217 /**
188 * <p>Add all strings from a UEnumeration to the dictionary.</p> 218 * <p>Add all strings from a UEnumeration to the dictionary.</p>
189 * 219 *
190 * @param words A UEnumeration that will return the desired words. 220 * @param words A UEnumeration that will return the desired words.
191 * @param status The resultant status 221 * @param status The resultant status
192 */ 222 */
193 virtual void addWords( UEnumeration *words, UErrorCode &status ); 223 virtual void addWords( UEnumeration *words, UErrorCode &status );
194 #endif 224 #endif
195 225
196 protected: 226 protected:
197 /** 227 /**
198 * <p>Search the dictionary for matches.</p> 228 * <p>Search the dictionary for matches.</p>
199 * 229 *
200 * @param text A UText representing the text. The 230 * @param text A UText representing the text. The
201 * iterator is left after the longest prefix match in the dictionary. 231 * iterator is left after the longest prefix match in the dictionary.
202 * @param maxLength The maximum number of code units to match. 232 * @param maxLength The maximum number of code units to match.
203 * @param lengths An array that is filled with the lengths of words that matche d. 233 * @param lengths An array that is filled with the lengths of words that matche d.
204 * @param count Filled with the number of elements output in lengths. 234 * @param count Filled with the number of elements output in lengths.
205 * @param limit The size of the lengths array; this limits the number of words output. 235 * @param limit The size of the lengths array; this limits the number of words output.
206 * @param parent The parent of the current node 236 * @param parent The parent of the current node.
207 * @param pMatched The returned parent node matched the input 237 * @param pMatched The returned parent node matched the input/
238 * @param values An array that is filled with the values associated with the ma tched words.
208 * @return The number of characters in text that were matched. 239 * @return The number of characters in text that were matched.
209 */ 240 */
210 virtual int32_t search( UText *text, 241 virtual int32_t search( UText *text,
211 int32_t maxLength, 242 int32_t maxLength,
212 int32_t *lengths, 243 int32_t *lengths,
213 int &count, 244 int &count,
214 int limit, 245 int limit,
215 TernaryNode *&parent, 246 TernaryNode *&parent,
216 UBool &pMatched ) const; 247 UBool &pMatched,
248 uint16_t *values = NULL) const;
217 249
218 private: 250 private:
219 /** 251 /**
220 * <p>Private constructor. The root node it not allocated.</p> 252 * <p>Private constructor. The root node it not allocated.</p>
221 * 253 *
222 * @param status A status code recording the success of the call. 254 * @param status A status code recording the success of the call.
255 * @param containsValues True if the dictionary will store a value associated
256 * with each word added.
223 */ 257 */
224 MutableTrieDictionary( UErrorCode &status ); 258 MutableTrieDictionary( UErrorCode &status, UBool containsValues = false );
225 }; 259 };
226 260
227 /******************************************************************* 261 /*******************************************************************
228 * CompactTrieDictionary 262 * CompactTrieDictionary
229 */ 263 */
230 264
265 //forward declarations
266 struct CompactTrieHeader;
267 struct CompactTrieInfo;
268
231 /** 269 /**
232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted 270 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
233 * to save space.</p> 271 * to save space.</p>
234 */ 272 */
235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { 273 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
236 private: 274 private:
237 /** 275 /**
238 * The root node of the trie 276 * The header of the CompactTrieDictionary which contains all info
239 */ 277 */
240 278
241 const CompactTrieHeader *fData; 279 CompactTrieInfo *fInfo;
242 280
243 /** 281 /**
244 * A UBool indicating whether or not we own the fData. 282 * A UBool indicating whether or not we own the fData.
245 */ 283 */
246
247 UBool fOwnData; 284 UBool fOwnData;
248 285
249 UDataMemory *fUData; 286 UDataMemory *fUData;
250 public: 287 public:
251 /** 288 /**
252 * <p>Construct a dictionary from a UDataMemory.</p> 289 * <p>Construct a dictionary from a UDataMemory.</p>
253 * 290 *
254 * @param data A pointer to a UDataMemory, which is adopted 291 * @param data A pointer to a UDataMemory, which is adopted
255 * @param status A status code giving the result of the constructor 292 * @param status A status code giving the result of the constructor
256 */ 293 */
257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); 294 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
258 295
259 /** 296 /**
(...skipping 10 matching lines...) Expand all
270 * @param dict The dictionary to use as input. 307 * @param dict The dictionary to use as input.
271 * @param status A status code recording the success of the call. 308 * @param status A status code recording the success of the call.
272 */ 309 */
273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ) ; 310 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ) ;
274 311
275 /** 312 /**
276 * <p>Virtual destructor.</p> 313 * <p>Virtual destructor.</p>
277 */ 314 */
278 virtual ~CompactTrieDictionary(); 315 virtual ~CompactTrieDictionary();
279 316
317 /**
318 * <p>Returns true if the dictionary contains values associated with each word .</p>
319 */
320 virtual UBool getValued() const;
321
280 /** 322 /**
281 * <p>Find dictionary words that match the text.</p> 323 * <p>Find dictionary words that match the text.</p>
282 * 324 *
283 * @param text A UText representing the text. The 325 * @param text A UText representing the text. The
284 * iterator is left after the longest prefix match in the dictionary. 326 * iterator is left after the longest prefix match in the dictionary.
285 * @param maxLength The maximum number of code units to match. 327 * @param maxLength The maximum number of code units to match.
286 * @param lengths An array that is filled with the lengths of words that matche d. 328 * @param lengths An array that is filled with the lengths of words that matche d.
287 * @param count Filled with the number of elements output in lengths. 329 * @param count Filled with the number of elements output in lengths.
288 * @param limit The size of the lengths array; this limits the number of words output. 330 * @param limit The size of the lengths array; this limits the number of words output.
331 * @param values An array that is filled with the values associated with the ma tched words.
289 * @return The number of characters in text that were matched. 332 * @return The number of characters in text that were matched.
290 */ 333 */
291 virtual int32_t matches( UText *text, 334 virtual int32_t matches( UText *text,
292 int32_t rangeEnd, 335 int32_t maxLength,
293 int32_t *lengths, 336 int32_t *lengths,
294 int &count, 337 int &count,
295 int limit ) const; 338 int limit,
339 uint16_t *values = NULL) const;
296 340
297 /** 341 /**
298 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p> 342 * <p>Return a StringEnumeration for iterating all the words in the dictionary .</p>
299 * 343 *
300 * @param status A status code recording the success of the call. 344 * @param status A status code recording the success of the call.
301 * @return A StringEnumeration that will iterate through the whole dictionary. 345 * @return A StringEnumeration that will iterate through the whole dictionary.
302 * The caller is responsible for closing it. The order is unspecified. 346 * The caller is responsible for closing it. The order is unspecified.
303 */ 347 */
304 virtual StringEnumeration *openWords( UErrorCode &status ) const; 348 virtual StringEnumeration *openWords( UErrorCode &status ) const;
305 349
306 /** 350 /**
307 * <p>Return the size of the compact data.</p> 351 * <p>Return the size of the compact data.</p>
308 * 352 *
309 * @return The size of the dictionary's compact data. 353 * @return The size of the dictionary's compact data.
310 */ 354 */
311 virtual uint32_t dataSize() const; 355 virtual uint32_t dataSize() const;
312 356
313 /** 357 /**
314 * <p>Return a void * pointer to the compact data, platform-endian.</p> 358 * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian. </p>
315 * 359 *
316 * @return The data for the compact dictionary, suitable for passing to the 360 * @return The data for the compact dictionary, suitable for passing to the
317 * constructor. 361 * constructor.
318 */ 362 */
319 virtual const void *data() const; 363 virtual const void *data() const;
320 364
321 /** 365 /**
322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> 366 * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
323 * 367 *
324 * @param status A status code recording the success of the call. 368 * @param status A status code recording the success of the call.
(...skipping 10 matching lines...) Expand all
335 * @param status A status code recording the success of the call. 379 * @param status A status code recording the success of the call.
336 * @return A single data blob starting with a CompactTrieHeader. 380 * @return A single data blob starting with a CompactTrieHeader.
337 */ 381 */
338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti onary &dict, 382 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDicti onary &dict,
339 UErrorCode &status ); 383 UErrorCode &status );
340 384
341 }; 385 };
342 386
343 U_NAMESPACE_END 387 U_NAMESPACE_END
344 388
345 /* TRIEDICT_H */ 389 /* TRIEDICT_H */
346 #endif 390 #endif
OLDNEW
« no previous file with comments | « icu46/source/common/rbbi.cpp ('k') | icu46/source/common/triedict.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698