OLD | NEW |
| (Empty) |
1 /* | |
2 ****************************************************************************** | |
3 * Copyright (C) 1996-2010, International Business Machines * | |
4 * Corporation and others. All Rights Reserved. * | |
5 ****************************************************************************** | |
6 */ | |
7 | |
8 /** | |
9 * \file | |
10 * \brief C++ API: Collation data used to compute minLengthInChars. | |
11 * \internal | |
12 */ | |
13 | |
14 #ifndef COLL_DATA_H | |
15 #define COLL_DATA_H | |
16 | |
17 #include "unicode/utypes.h" | |
18 | |
19 #if !UCONFIG_NO_COLLATION | |
20 | |
21 #include "unicode/uobject.h" | |
22 #include "unicode/ucol.h" | |
23 | |
24 U_NAMESPACE_BEGIN | |
25 | |
26 /** | |
27 * The size of the internal buffer for the Collator's short description string. | |
28 * @internal ICU 4.0.1 technology preview | |
29 */ | |
30 #define KEY_BUFFER_SIZE 64 | |
31 | |
32 /** | |
33 * The size of the internal CE buffer in a <code>CEList</code> object | |
34 * @internal ICU 4.0.1 technology preview | |
35 */ | |
36 #define CELIST_BUFFER_SIZE 4 | |
37 | |
38 /** | |
39 * \def INSTRUMENT_CELIST | |
40 * Define this to enable the <code>CEList</code> objects to collect | |
41 * statistics. | |
42 * @internal ICU 4.0.1 technology preview | |
43 */ | |
44 //#define INSTRUMENT_CELIST | |
45 | |
46 /** | |
47 * The size of the initial list in a <code>StringList</code> object. | |
48 * @internal ICU 4.0.1 technology preview | |
49 */ | |
50 #define STRING_LIST_BUFFER_SIZE 16 | |
51 | |
52 /** | |
53 * \def INSTRUMENT_STRING_LIST | |
54 * Define this to enable the <code>StringList</code> objects to | |
55 * collect statistics. | |
56 * @internal ICU 4.0.1 technology preview | |
57 */ | |
58 //#define INSTRUMENT_STRING_LIST | |
59 | |
60 /** | |
61 * This object holds a list of CEs generated from a particular | |
62 * <code>UnicodeString</code> | |
63 * | |
64 * @internal ICU 4.0.1 technology preview | |
65 */ | |
66 class U_I18N_API CEList : public UObject | |
67 { | |
68 public: | |
69 /** | |
70 * Construct a <code>CEList</code> object. | |
71 * | |
72 * @param coll - the Collator used to collect the CEs. | |
73 * @param string - the string for which to collect the CEs. | |
74 * @param status - will be set if any errors occur. | |
75 * | |
76 * Note: if on return, status is set to an error code, | |
77 * the only safe thing to do with this object is to call | |
78 * the destructor. | |
79 * | |
80 * @internal ICU 4.0.1 technology preview | |
81 */ | |
82 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); | |
83 | |
84 /** | |
85 * The destructor. | |
86 * @internal ICU 4.0.1 technology preview | |
87 */ | |
88 ~CEList(); | |
89 | |
90 /** | |
91 * Return the number of CEs in the list. | |
92 * | |
93 * @return the number of CEs in the list. | |
94 * | |
95 * @internal ICU 4.0.1 technology preview | |
96 */ | |
97 int32_t size() const; | |
98 | |
99 /** | |
100 * Get a particular CE from the list. | |
101 * | |
102 * @param index - the index of the CE to return | |
103 * | |
104 * @return the CE, or <code>0</code> if <code>index</code> is out of range | |
105 * | |
106 * @internal ICU 4.0.1 technology preview | |
107 */ | |
108 uint32_t get(int32_t index) const; | |
109 | |
110 /** | |
111 * Check if the CEs in another <code>CEList</code> match the | |
112 * suffix of this list starting at a give offset. | |
113 * | |
114 * @param offset - the offset of the suffix | |
115 * @param other - the other <code>CEList</code> | |
116 * | |
117 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. | |
118 * | |
119 * @internal ICU 4.0.1 technology preview | |
120 */ | |
121 UBool matchesAt(int32_t offset, const CEList *other) const; | |
122 | |
123 /** | |
124 * The index operator. | |
125 * | |
126 * @param index - the index | |
127 * | |
128 * @return a reference to the given CE in the list | |
129 * | |
130 * @internal ICU 4.0.1 technology preview | |
131 */ | |
132 uint32_t &operator[](int32_t index) const; | |
133 | |
134 /** | |
135 * UObject glue... | |
136 * @internal ICU 4.0.1 technology preview | |
137 */ | |
138 virtual UClassID getDynamicClassID() const; | |
139 /** | |
140 * UObject glue... | |
141 * @internal ICU 4.0.1 technology preview | |
142 */ | |
143 static UClassID getStaticClassID(); | |
144 | |
145 private: | |
146 void add(uint32_t ce, UErrorCode &status); | |
147 | |
148 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; | |
149 uint32_t *ces; | |
150 int32_t listMax; | |
151 int32_t listSize; | |
152 | |
153 #ifdef INSTRUMENT_CELIST | |
154 static int32_t _active; | |
155 static int32_t _histogram[10]; | |
156 #endif | |
157 }; | |
158 | |
159 /** | |
160 * StringList | |
161 * | |
162 * This object holds a list of <code>UnicodeString</code> objects. | |
163 * | |
164 * @internal ICU 4.0.1 technology preview | |
165 */ | |
166 class U_I18N_API StringList : public UObject | |
167 { | |
168 public: | |
169 /** | |
170 * Construct an empty <code>StringList</code> | |
171 * | |
172 * @param status - will be set if any errors occur. | |
173 * | |
174 * Note: if on return, status is set to an error code, | |
175 * the only safe thing to do with this object is to call | |
176 * the destructor. | |
177 * | |
178 * @internal ICU 4.0.1 technology preview | |
179 */ | |
180 StringList(UErrorCode &status); | |
181 | |
182 /** | |
183 * The destructor. | |
184 * | |
185 * @internal ICU 4.0.1 technology preview | |
186 */ | |
187 ~StringList(); | |
188 | |
189 /** | |
190 * Add a string to the list. | |
191 * | |
192 * @param string - the string to add | |
193 * @param status - will be set if any errors occur. | |
194 * | |
195 * @internal ICU 4.0.1 technology preview | |
196 */ | |
197 void add(const UnicodeString *string, UErrorCode &status); | |
198 | |
199 /** | |
200 * Add an array of Unicode code points to the list. | |
201 * | |
202 * @param chars - the address of the array of code points | |
203 * @param count - the number of code points in the array | |
204 * @param status - will be set if any errors occur. | |
205 * | |
206 * @internal ICU 4.0.1 technology preview | |
207 */ | |
208 void add(const UChar *chars, int32_t count, UErrorCode &status); | |
209 | |
210 /** | |
211 * Get a particular string from the list. | |
212 * | |
213 * @param index - the index of the string | |
214 * | |
215 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> | |
216 * if <code>index</code> is out of bounds. | |
217 * | |
218 * @internal ICU 4.0.1 technology preview | |
219 */ | |
220 const UnicodeString *get(int32_t index) const; | |
221 | |
222 /** | |
223 * Get the number of stings in the list. | |
224 * | |
225 * @return the number of strings in the list. | |
226 * | |
227 * @internal ICU 4.0.1 technology preview | |
228 */ | |
229 int32_t size() const; | |
230 | |
231 /** | |
232 * the UObject glue... | |
233 * @internal ICU 4.0.1 technology preview | |
234 */ | |
235 virtual UClassID getDynamicClassID() const; | |
236 /** | |
237 * the UObject glue... | |
238 * @internal ICU 4.0.1 technology preview | |
239 */ | |
240 static UClassID getStaticClassID(); | |
241 | |
242 private: | |
243 UnicodeString *strings; | |
244 int32_t listMax; | |
245 int32_t listSize; | |
246 | |
247 #ifdef INSTRUMENT_STRING_LIST | |
248 static int32_t _lists; | |
249 static int32_t _strings; | |
250 static int32_t _histogram[101]; | |
251 #endif | |
252 }; | |
253 | |
254 /* | |
255 * Forward references to internal classes. | |
256 */ | |
257 class StringToCEsMap; | |
258 class CEToStringsMap; | |
259 class CollDataCache; | |
260 | |
261 /** | |
262 * CollData | |
263 * | |
264 * This class holds the Collator-specific data needed to | |
265 * compute the length of the shortest string that can | |
266 * generate a partcular list of CEs. | |
267 * | |
268 * <code>CollData</code> objects are quite expensive to compute. Because | |
269 * of this, they are cached. When you call <code>CollData::open</code> it | |
270 * returns a reference counted cached object. When you call <code>CollData::clos
e</code> | |
271 * the reference count on the object is decremented but the object is not delete
d. | |
272 * | |
273 * If you do not need to reuse any unreferenced objects in the cache, you can ca
ll | |
274 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>Co
llData</code> | |
275 * objects, you can call <code>CollData::freeCollDataCache</code> | |
276 * | |
277 * @internal ICU 4.0.1 technology preview | |
278 */ | |
279 class U_I18N_API CollData : public UObject | |
280 { | |
281 public: | |
282 /** | |
283 * Construct a <code>CollData</code> object. | |
284 * | |
285 * @param collator - the collator | |
286 * @param status - will be set if any errors occur. | |
287 * | |
288 * @return the <code>CollData</code> object. You must call | |
289 * <code>close</code> when you are done using the object. | |
290 * | |
291 * Note: if on return, status is set to an error code, | |
292 * the only safe thing to do with this object is to call | |
293 * <code>CollData::close</code>. | |
294 * | |
295 * @internal ICU 4.0.1 technology preview | |
296 */ | |
297 static CollData *open(UCollator *collator, UErrorCode &status); | |
298 | |
299 /** | |
300 * Release a <code>CollData</code> object. | |
301 * | |
302 * @param collData - the object | |
303 * | |
304 * @internal ICU 4.0.1 technology preview | |
305 */ | |
306 static void close(CollData *collData); | |
307 | |
308 /** | |
309 * Get the <code>UCollator</code> object used to create this object. | |
310 * The object returned may not be the exact object that was used to | |
311 * create this object, but it will have the same behavior. | |
312 * @internal ICU 4.0.1 technology preview | |
313 */ | |
314 UCollator *getCollator() const; | |
315 | |
316 /** | |
317 * Get a list of all the strings which generate a list | |
318 * of CEs starting with a given CE. | |
319 * | |
320 * @param ce - the CE | |
321 * | |
322 * return a <code>StringList</code> object containing all | |
323 * the stirngs, or <code>NULL</code> if there are | |
324 * no such strings. | |
325 * | |
326 * @internal ICU 4.0.1 technology preview. | |
327 */ | |
328 const StringList *getStringList(int32_t ce) const; | |
329 | |
330 /** | |
331 * Get a list of the CEs generated by a partcular stirng. | |
332 * | |
333 * @param string - the string | |
334 * | |
335 * @return a <code>CEList</code> object containt the CEs. You | |
336 * must call <code>freeCEList</code> when you are finished | |
337 * using the <code>CEList</code>/ | |
338 * | |
339 * @internal ICU 4.0.1 technology preview. | |
340 */ | |
341 const CEList *getCEList(const UnicodeString *string) const; | |
342 | |
343 /** | |
344 * Release a <code>CEList</code> returned by <code>getCEList</code>. | |
345 * | |
346 * @param list - the <code>CEList</code> to free. | |
347 * | |
348 * @internal ICU 4.0.1 technology preview | |
349 */ | |
350 void freeCEList(const CEList *list); | |
351 | |
352 /** | |
353 * Return the length of the shortest string that will generate | |
354 * the given list of CEs. | |
355 * | |
356 * @param ces - the CEs | |
357 * @param offset - the offset of the first CE in the list to use. | |
358 * | |
359 * @return the length of the shortest string. | |
360 * | |
361 * @internal ICU 4.0.1 technology preview | |
362 */ | |
363 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; | |
364 | |
365 | |
366 /** | |
367 * Return the length of the shortest string that will generate | |
368 * the given list of CEs. | |
369 * | |
370 * Note: the algorithm used to do this computation is recursive. To | |
371 * limit the amount of recursion, a "history" list is used to record | |
372 * the best answer starting at a particular offset in the list of CEs. | |
373 * If the same offset is visited again during the recursion, the answer | |
374 * in the history list is used. | |
375 * | |
376 * @param ces - the CEs | |
377 * @param offset - the offset of the first CE in the list to use. | |
378 * @param history - the history list. Must be at least as long as | |
379 * the number of cEs in the <code>CEList</code> | |
380 * | |
381 * @return the length of the shortest string. | |
382 * | |
383 * @internal ICU 4.0.1 technology preview | |
384 */ | |
385 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history)
const; | |
386 | |
387 /** | |
388 * UObject glue... | |
389 * @internal ICU 4.0.1 technology preview | |
390 */ | |
391 virtual UClassID getDynamicClassID() const; | |
392 /** | |
393 * UObject glue... | |
394 * @internal ICU 4.0.1 technology preview | |
395 */ | |
396 static UClassID getStaticClassID(); | |
397 | |
398 /** | |
399 * <code>CollData</code> objects are expensive to compute, and so | |
400 * may be cached. This routine will free the cached objects and delete | |
401 * the cache. | |
402 * | |
403 * WARNING: Don't call this until you are have called <code>close</code> | |
404 * for each <code>CollData</code> object that you have used. also, | |
405 * DO NOT call this if another thread may be calling <code>flushCollDataCach
e</code> | |
406 * at the same time. | |
407 * | |
408 * @internal 4.0.1 technology preview | |
409 */ | |
410 static void freeCollDataCache(); | |
411 | |
412 /** | |
413 * <code>CollData</code> objects are expensive to compute, and so | |
414 * may be cached. This routine will remove any unused <code>CollData</code> | |
415 * objects from the cache. | |
416 * | |
417 * @internal 4.0.1 technology preview | |
418 */ | |
419 static void flushCollDataCache(); | |
420 | |
421 private: | |
422 friend class CollDataCache; | |
423 friend class CollDataCacheEntry; | |
424 | |
425 CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UError
Code &status); | |
426 ~CollData(); | |
427 | |
428 CollData(); | |
429 | |
430 static char *getCollatorKey(UCollator *collator, char *buffer, int32_t buffe
rLength); | |
431 | |
432 static CollDataCache *getCollDataCache(); | |
433 | |
434 UCollator *coll; | |
435 StringToCEsMap *charsToCEList; | |
436 CEToStringsMap *ceToCharsStartingWith; | |
437 | |
438 char keyBuffer[KEY_BUFFER_SIZE]; | |
439 char *key; | |
440 | |
441 static CollDataCache *collDataCache; | |
442 | |
443 uint32_t minHan; | |
444 uint32_t maxHan; | |
445 | |
446 uint32_t jamoLimits[4]; | |
447 }; | |
448 | |
449 U_NAMESPACE_END | |
450 | |
451 #endif // #if !UCONFIG_NO_COLLATION | |
452 #endif // #ifndef COLL_DATA_H | |
OLD | NEW |