OLD | NEW |
| (Empty) |
1 /* | |
2 ****************************************************************************** | |
3 * Copyright (C) 1996-2012, International Business Machines * | |
4 * Corporation and others. All Rights Reserved. * | |
5 ****************************************************************************** | |
6 */ | |
7 | |
8 /** | |
9 * \file | |
10 * \brief Originally, added as C++ API for Collation data used to compute minLen
gthInChars | |
11 * \internal | |
12 */ | |
13 | |
14 /* | |
15 * Note: This module was incldued in ICU 4.0.1 as @internal technology preview f
or supporting | |
16 * Boyer-Moore string search API. For now, only SSearchTest depends on this modu
le. I temporaly | |
17 * moved the module from i18n directory to intltest, because we have no plan to
publish this | |
18 * as public API. (2012-12-18 yoshito) | |
19 */ | |
20 | |
21 #ifndef COLL_DATA_H | |
22 #define COLL_DATA_H | |
23 | |
24 #include "unicode/utypes.h" | |
25 | |
26 #if !UCONFIG_NO_COLLATION | |
27 | |
28 #include "unicode/ucol.h" | |
29 #include "unicode/unistr.h" | |
30 | |
31 /** | |
32 * The size of the internal CE buffer in a <code>CEList</code> object | |
33 */ | |
34 #define CELIST_BUFFER_SIZE 4 | |
35 | |
36 /** | |
37 * \def INSTRUMENT_CELIST | |
38 * Define this to enable the <code>CEList</code> objects to collect | |
39 * statistics. | |
40 */ | |
41 | |
42 /** | |
43 * The size of the initial list in a <code>StringList</code> object. | |
44 */ | |
45 #define STRING_LIST_BUFFER_SIZE 16 | |
46 | |
47 U_NAMESPACE_USE | |
48 | |
49 /** | |
50 * This object holds a list of CEs generated from a particular | |
51 * <code>UnicodeString</code> | |
52 * | |
53 */ | |
54 class CEList | |
55 { | |
56 public: | |
57 /** | |
58 * Construct a <code>CEList</code> object. | |
59 * | |
60 * @param coll - the Collator used to collect the CEs. | |
61 * @param string - the string for which to collect the CEs. | |
62 * @param status - will be set if any errors occur. | |
63 * | |
64 * Note: if on return, status is set to an error code, | |
65 * the only safe thing to do with this object is to call | |
66 * the destructor. | |
67 */ | |
68 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); | |
69 | |
70 /** | |
71 * The destructor. | |
72 */ | |
73 ~CEList(); | |
74 | |
75 /** | |
76 * Return the number of CEs in the list. | |
77 * | |
78 * @return the number of CEs in the list. | |
79 */ | |
80 int32_t size() const; | |
81 | |
82 /** | |
83 * Get a particular CE from the list. | |
84 * | |
85 * @param index - the index of the CE to return | |
86 * | |
87 * @return the CE, or <code>0</code> if <code>index</code> is out of range | |
88 */ | |
89 uint32_t get(int32_t index) const; | |
90 | |
91 /** | |
92 * Check if the CEs in another <code>CEList</code> match the | |
93 * suffix of this list starting at a give offset. | |
94 * | |
95 * @param offset - the offset of the suffix | |
96 * @param other - the other <code>CEList</code> | |
97 * | |
98 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. | |
99 */ | |
100 UBool matchesAt(int32_t offset, const CEList *other) const; | |
101 | |
102 /** | |
103 * The index operator. | |
104 * | |
105 * @param index - the index | |
106 * | |
107 * @return a reference to the given CE in the list | |
108 */ | |
109 uint32_t &operator[](int32_t index) const; | |
110 | |
111 private: | |
112 void add(uint32_t ce, UErrorCode &status); | |
113 | |
114 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; | |
115 uint32_t *ces; | |
116 int32_t listMax; | |
117 int32_t listSize; | |
118 }; | |
119 | |
120 /** | |
121 * StringList | |
122 * | |
123 * This object holds a list of <code>UnicodeString</code> objects. | |
124 */ | |
125 class StringList | |
126 { | |
127 public: | |
128 /** | |
129 * Construct an empty <code>StringList</code> | |
130 * | |
131 * @param status - will be set if any errors occur. | |
132 * | |
133 * Note: if on return, status is set to an error code, | |
134 * the only safe thing to do with this object is to call | |
135 * the destructor. | |
136 */ | |
137 StringList(UErrorCode &status); | |
138 | |
139 /** | |
140 * The destructor. | |
141 */ | |
142 ~StringList(); | |
143 | |
144 /** | |
145 * Add a string to the list. | |
146 * | |
147 * @param string - the string to add | |
148 * @param status - will be set if any errors occur. | |
149 */ | |
150 void add(const UnicodeString *string, UErrorCode &status); | |
151 | |
152 /** | |
153 * Add an array of Unicode code points to the list. | |
154 * | |
155 * @param chars - the address of the array of code points | |
156 * @param count - the number of code points in the array | |
157 * @param status - will be set if any errors occur. | |
158 */ | |
159 void add(const UChar *chars, int32_t count, UErrorCode &status); | |
160 | |
161 /** | |
162 * Get a particular string from the list. | |
163 * | |
164 * @param index - the index of the string | |
165 * | |
166 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> | |
167 * if <code>index</code> is out of bounds. | |
168 */ | |
169 const UnicodeString *get(int32_t index) const; | |
170 | |
171 /** | |
172 * Get the number of stings in the list. | |
173 * | |
174 * @return the number of strings in the list. | |
175 */ | |
176 int32_t size() const; | |
177 | |
178 private: | |
179 UnicodeString *strings; | |
180 int32_t listMax; | |
181 int32_t listSize; | |
182 }; | |
183 | |
184 | |
185 /* | |
186 * Forward references to internal classes. | |
187 */ | |
188 class StringToCEsMap; | |
189 class CEToStringsMap; | |
190 | |
191 /** | |
192 * CollData | |
193 * | |
194 * This class holds the Collator-specific data needed to | |
195 * compute the length of the shortest string that can | |
196 * generate a partcular list of CEs. | |
197 * | |
198 * <code>CollData</code> objects are quite expensive to compute. Because | |
199 * of this, they are cached. When you call <code>CollData::open</code> it | |
200 * returns a reference counted cached object. When you call <code>CollData::clos
e</code> | |
201 * the reference count on the object is decremented but the object is not delete
d. | |
202 * | |
203 * If you do not need to reuse any unreferenced objects in the cache, you can ca
ll | |
204 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>Co
llData</code> | |
205 * objects, you can call <code>CollData::freeCollDataCache</code> | |
206 */ | |
207 class CollData | |
208 { | |
209 public: | |
210 /** | |
211 * Construct a <code>CollData</code> object. | |
212 * | |
213 * @param collator - the collator | |
214 * @param status - will be set if any errors occur. | |
215 */ | |
216 CollData(UCollator *collator, UErrorCode &status); | |
217 | |
218 /** | |
219 * The destructor. | |
220 */ | |
221 ~CollData(); | |
222 | |
223 /** | |
224 * Get the <code>UCollator</code> object used to create this object. | |
225 * The object returned may not be the exact object that was used to | |
226 * create this object, but it will have the same behavior. | |
227 */ | |
228 UCollator *getCollator() const; | |
229 | |
230 /** | |
231 * Get a list of all the strings which generate a list | |
232 * of CEs starting with a given CE. | |
233 * | |
234 * @param ce - the CE | |
235 * | |
236 * return a <code>StringList</code> object containing all | |
237 * the stirngs, or <code>NULL</code> if there are | |
238 * no such strings. | |
239 */ | |
240 const StringList *getStringList(int32_t ce) const; | |
241 | |
242 /** | |
243 * Get a list of the CEs generated by a partcular stirng. | |
244 * | |
245 * @param string - the string | |
246 * | |
247 * @return a <code>CEList</code> object containt the CEs. You | |
248 * must call <code>freeCEList</code> when you are finished | |
249 * using the <code>CEList</code>/ | |
250 */ | |
251 const CEList *getCEList(const UnicodeString *string) const; | |
252 | |
253 /** | |
254 * Release a <code>CEList</code> returned by <code>getCEList</code>. | |
255 * | |
256 * @param list - the <code>CEList</code> to free. | |
257 */ | |
258 void freeCEList(const CEList *list); | |
259 | |
260 /** | |
261 * Return the length of the shortest string that will generate | |
262 * the given list of CEs. | |
263 * | |
264 * @param ces - the CEs | |
265 * @param offset - the offset of the first CE in the list to use. | |
266 * | |
267 * @return the length of the shortest string. | |
268 */ | |
269 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; | |
270 | |
271 | |
272 /** | |
273 * Return the length of the shortest string that will generate | |
274 * the given list of CEs. | |
275 * | |
276 * Note: the algorithm used to do this computation is recursive. To | |
277 * limit the amount of recursion, a "history" list is used to record | |
278 * the best answer starting at a particular offset in the list of CEs. | |
279 * If the same offset is visited again during the recursion, the answer | |
280 * in the history list is used. | |
281 * | |
282 * @param ces - the CEs | |
283 * @param offset - the offset of the first CE in the list to use. | |
284 * @param history - the history list. Must be at least as long as | |
285 * the number of cEs in the <code>CEList</code> | |
286 * | |
287 * @return the length of the shortest string. | |
288 */ | |
289 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history)
const; | |
290 | |
291 private: | |
292 UCollator *coll; | |
293 CEToStringsMap *ceToCharsStartingWith; | |
294 | |
295 uint32_t minHan; | |
296 uint32_t maxHan; | |
297 | |
298 uint32_t jamoLimits[4]; | |
299 }; | |
300 | |
301 #endif // #if !UCONFIG_NO_COLLATION | |
302 #endif // #ifndef COLL_DATA_H | |
OLD | NEW |