OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2005-2010, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: ucasemap.h | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created on: 2005may06 | |
14 * created by: Markus W. Scherer | |
15 * | |
16 * Case mapping service object and functions using it. | |
17 */ | |
18 | |
19 #ifndef __UCASEMAP_H__ | |
20 #define __UCASEMAP_H__ | |
21 | |
22 #include "unicode/utypes.h" | |
23 #include "unicode/ustring.h" | |
24 #include "unicode/localpointer.h" | |
25 | |
26 /** | |
27 * \file | |
28 * \brief C API: Unicode case mapping functions using a UCaseMap service object. | |
29 * | |
30 * The service object takes care of memory allocations, data loading, and setup | |
31 * for the attributes, as usual. | |
32 * | |
33 * Currently, the functionality provided here does not overlap with uchar.h | |
34 * and ustring.h, except for ucasemap_toTitle(). | |
35 * | |
36 * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings. | |
37 */ | |
38 | |
39 /** | |
40 * UCaseMap is an opaque service object for newer ICU case mapping functions. | |
41 * Older functions did not use a service object. | |
42 * @stable ICU 3.4 | |
43 */ | |
44 struct UCaseMap; | |
45 typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable IC
U 3.4 */ | |
46 | |
47 /** | |
48 * Open a UCaseMap service object for a locale and a set of options. | |
49 * The locale ID and options are preprocessed so that functions using the | |
50 * service object need not process them in each call. | |
51 * | |
52 * @param locale ICU locale ID, used for language-dependent | |
53 * upper-/lower-/title-casing according to the Unicode standard. | |
54 * Usual semantics: ""=root, NULL=default locale, etc. | |
55 * @param options Options bit set, used for case folding and string comparisons. | |
56 * Same flags as for u_foldCase(), u_strFoldCase(), | |
57 * u_strCaseCompare(), etc. | |
58 * Use 0 or U_FOLD_CASE_DEFAULT for default behavior. | |
59 * @param pErrorCode Must be a valid pointer to an error code value, | |
60 * which must not indicate a failure before the function call. | |
61 * @return Pointer to a UCaseMap service object, if successful. | |
62 * | |
63 * @see U_FOLD_CASE_DEFAULT | |
64 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
65 * @see U_TITLECASE_NO_LOWERCASE | |
66 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT | |
67 * @stable ICU 3.4 | |
68 */ | |
69 U_STABLE UCaseMap * U_EXPORT2 | |
70 ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode); | |
71 | |
72 /** | |
73 * Close a UCaseMap service object. | |
74 * @param csm Object to be closed. | |
75 * @stable ICU 3.4 | |
76 */ | |
77 U_STABLE void U_EXPORT2 | |
78 ucasemap_close(UCaseMap *csm); | |
79 | |
80 #if U_SHOW_CPLUSPLUS_API | |
81 | |
82 U_NAMESPACE_BEGIN | |
83 | |
84 /** | |
85 * \class LocalUCaseMapPointer | |
86 * "Smart pointer" class, closes a UCaseMap via ucasemap_close(). | |
87 * For most methods see the LocalPointerBase base class. | |
88 * | |
89 * @see LocalPointerBase | |
90 * @see LocalPointer | |
91 * @stable ICU 4.4 | |
92 */ | |
93 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close); | |
94 | |
95 U_NAMESPACE_END | |
96 | |
97 #endif | |
98 | |
99 /** | |
100 * Get the locale ID that is used for language-dependent case mappings. | |
101 * @param csm UCaseMap service object. | |
102 * @return locale ID | |
103 * @stable ICU 3.4 | |
104 */ | |
105 U_STABLE const char * U_EXPORT2 | |
106 ucasemap_getLocale(const UCaseMap *csm); | |
107 | |
108 /** | |
109 * Get the options bit set that is used for case folding and string comparisons. | |
110 * @param csm UCaseMap service object. | |
111 * @return options bit set | |
112 * @stable ICU 3.4 | |
113 */ | |
114 U_STABLE uint32_t U_EXPORT2 | |
115 ucasemap_getOptions(const UCaseMap *csm); | |
116 | |
117 /** | |
118 * Set the locale ID that is used for language-dependent case mappings. | |
119 * | |
120 * @param csm UCaseMap service object. | |
121 * @param locale Locale ID, see ucasemap_open(). | |
122 * @param pErrorCode Must be a valid pointer to an error code value, | |
123 * which must not indicate a failure before the function call. | |
124 * | |
125 * @see ucasemap_open | |
126 * @stable ICU 3.4 | |
127 */ | |
128 U_STABLE void U_EXPORT2 | |
129 ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); | |
130 | |
131 /** | |
132 * Set the options bit set that is used for case folding and string comparisons. | |
133 * | |
134 * @param csm UCaseMap service object. | |
135 * @param options Options bit set, see ucasemap_open(). | |
136 * @param pErrorCode Must be a valid pointer to an error code value, | |
137 * which must not indicate a failure before the function call. | |
138 * | |
139 * @see ucasemap_open | |
140 * @stable ICU 3.4 | |
141 */ | |
142 U_STABLE void U_EXPORT2 | |
143 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); | |
144 | |
145 /** | |
146 * Do not lowercase non-initial parts of words when titlecasing. | |
147 * Option bit for titlecasing APIs that take an options bit set. | |
148 * | |
149 * By default, titlecasing will titlecase the first cased character | |
150 * of a word and lowercase all other characters. | |
151 * With this option, the other characters will not be modified. | |
152 * | |
153 * @see ucasemap_setOptions | |
154 * @see ucasemap_toTitle | |
155 * @see ucasemap_utf8ToTitle | |
156 * @see UnicodeString::toTitle | |
157 * @stable ICU 3.8 | |
158 */ | |
159 #define U_TITLECASE_NO_LOWERCASE 0x100 | |
160 | |
161 /** | |
162 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; | |
163 * titlecase exactly the characters at breaks from the iterator. | |
164 * Option bit for titlecasing APIs that take an options bit set. | |
165 * | |
166 * By default, titlecasing will take each break iterator index, | |
167 * adjust it by looking for the next cased character, and titlecase that one. | |
168 * Other characters are lowercased. | |
169 * | |
170 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: | |
171 * | |
172 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex | |
173 * #29, "Text Boundaries." Between each pair of word boundaries, find the first | |
174 * cased character F. If F exists, map F to default_title(F); then map each | |
175 * subsequent character C to default_lower(C). | |
176 * | |
177 * @see ucasemap_setOptions | |
178 * @see ucasemap_toTitle | |
179 * @see ucasemap_utf8ToTitle | |
180 * @see UnicodeString::toTitle | |
181 * @see U_TITLECASE_NO_LOWERCASE | |
182 * @stable ICU 3.8 | |
183 */ | |
184 #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 | |
185 | |
186 #if !UCONFIG_NO_BREAK_ITERATION | |
187 | |
188 /** | |
189 * Get the break iterator that is used for titlecasing. | |
190 * Do not modify the returned break iterator. | |
191 * @param csm UCaseMap service object. | |
192 * @return titlecasing break iterator | |
193 * @stable ICU 3.8 | |
194 */ | |
195 U_STABLE const UBreakIterator * U_EXPORT2 | |
196 ucasemap_getBreakIterator(const UCaseMap *csm); | |
197 | |
198 /** | |
199 * Set the break iterator that is used for titlecasing. | |
200 * The UCaseMap service object releases a previously set break iterator | |
201 * and "adopts" this new one, taking ownership of it. | |
202 * It will be released in a subsequent call to ucasemap_setBreakIterator() | |
203 * or ucasemap_close(). | |
204 * | |
205 * Break iterator operations are not thread-safe. Therefore, titlecasing | |
206 * functions use non-const UCaseMap objects. It is not possible to titlecase | |
207 * strings concurrently using the same UCaseMap. | |
208 * | |
209 * @param csm UCaseMap service object. | |
210 * @param iterToAdopt Break iterator to be adopted for titlecasing. | |
211 * @param pErrorCode Must be a valid pointer to an error code value, | |
212 * which must not indicate a failure before the function call. | |
213 * | |
214 * @see ucasemap_toTitle | |
215 * @see ucasemap_utf8ToTitle | |
216 * @stable ICU 3.8 | |
217 */ | |
218 U_STABLE void U_EXPORT2 | |
219 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
*pErrorCode); | |
220 | |
221 /** | |
222 * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitl
e(), | |
223 * except that it takes ucasemap_setOptions() into account and has performance | |
224 * advantages from being able to use a UCaseMap object for multiple case mapping | |
225 * operations, saving setup time. | |
226 * | |
227 * Casing is locale-dependent and context-sensitive. | |
228 * Titlecasing uses a break iterator to find the first characters of words | |
229 * that are to be titlecased. It titlecases those characters and lowercases | |
230 * all others. (This can be modified with ucasemap_setOptions().) | |
231 * | |
232 * Note: This function takes a non-const UCaseMap pointer because it will | |
233 * open a default break iterator if no break iterator was set yet, | |
234 * and effectively call ucasemap_setBreakIterator(); | |
235 * also because the break iterator is stateful and will be modified during | |
236 * the iteration. | |
237 * | |
238 * The titlecase break iterator can be provided to customize for arbitrary | |
239 * styles, using rules and dictionaries beyond the standard iterators. | |
240 * The standard titlecase iterator for the root locale implements the | |
241 * algorithm of Unicode TR 21. | |
242 * | |
243 * This function uses only the setUText(), first(), next() and close() methods o
f the | |
244 * provided break iterator. | |
245 * | |
246 * The result may be longer or shorter than the original. | |
247 * The source string and the destination buffer must not overlap. | |
248 * | |
249 * @param csm UCaseMap service object. This pointer is non-const! | |
250 * See the note above for details. | |
251 * @param dest A buffer for the result string. The result will be NUL-termi
nated if | |
252 * the buffer is large enough. | |
253 * The contents is undefined in case of failure. | |
254 * @param destCapacity The size of the buffer (number of bytes). If it is 0, the
n | |
255 * dest may be NULL and the function will only return the lengt
h of the result | |
256 * without writing any of the result string. | |
257 * @param src The original string. | |
258 * @param srcLength The length of the original string. If -1, then src must be N
UL-terminated. | |
259 * @param pErrorCode Must be a valid pointer to an error code value, | |
260 * which must not indicate a failure before the function call. | |
261 * @return The length of the result string, if successful - or in case of a buff
er overflow, | |
262 * in which case it will be greater than destCapacity. | |
263 * | |
264 * @see u_strToTitle | |
265 * @stable ICU 3.8 | |
266 */ | |
267 U_STABLE int32_t U_EXPORT2 | |
268 ucasemap_toTitle(UCaseMap *csm, | |
269 UChar *dest, int32_t destCapacity, | |
270 const UChar *src, int32_t srcLength, | |
271 UErrorCode *pErrorCode); | |
272 | |
273 #endif | |
274 | |
275 /** | |
276 * Lowercase the characters in a UTF-8 string. | |
277 * Casing is locale-dependent and context-sensitive. | |
278 * The result may be longer or shorter than the original. | |
279 * The source string and the destination buffer must not overlap. | |
280 * | |
281 * @param csm UCaseMap service object. | |
282 * @param dest A buffer for the result string. The result will be NUL-termi
nated if | |
283 * the buffer is large enough. | |
284 * The contents is undefined in case of failure. | |
285 * @param destCapacity The size of the buffer (number of bytes). If it is 0, the
n | |
286 * dest may be NULL and the function will only return the lengt
h of the result | |
287 * without writing any of the result string. | |
288 * @param src The original string. | |
289 * @param srcLength The length of the original string. If -1, then src must be N
UL-terminated. | |
290 * @param pErrorCode Must be a valid pointer to an error code value, | |
291 * which must not indicate a failure before the function call. | |
292 * @return The length of the result string, if successful - or in case of a buff
er overflow, | |
293 * in which case it will be greater than destCapacity. | |
294 * | |
295 * @see u_strToLower | |
296 * @stable ICU 3.4 | |
297 */ | |
298 U_STABLE int32_t U_EXPORT2 | |
299 ucasemap_utf8ToLower(const UCaseMap *csm, | |
300 char *dest, int32_t destCapacity, | |
301 const char *src, int32_t srcLength, | |
302 UErrorCode *pErrorCode); | |
303 | |
304 /** | |
305 * Uppercase the characters in a UTF-8 string. | |
306 * Casing is locale-dependent and context-sensitive. | |
307 * The result may be longer or shorter than the original. | |
308 * The source string and the destination buffer must not overlap. | |
309 * | |
310 * @param csm UCaseMap service object. | |
311 * @param dest A buffer for the result string. The result will be NUL-termi
nated if | |
312 * the buffer is large enough. | |
313 * The contents is undefined in case of failure. | |
314 * @param destCapacity The size of the buffer (number of bytes). If it is 0, the
n | |
315 * dest may be NULL and the function will only return the lengt
h of the result | |
316 * without writing any of the result string. | |
317 * @param src The original string. | |
318 * @param srcLength The length of the original string. If -1, then src must be N
UL-terminated. | |
319 * @param pErrorCode Must be a valid pointer to an error code value, | |
320 * which must not indicate a failure before the function call. | |
321 * @return The length of the result string, if successful - or in case of a buff
er overflow, | |
322 * in which case it will be greater than destCapacity. | |
323 * | |
324 * @see u_strToUpper | |
325 * @stable ICU 3.4 | |
326 */ | |
327 U_STABLE int32_t U_EXPORT2 | |
328 ucasemap_utf8ToUpper(const UCaseMap *csm, | |
329 char *dest, int32_t destCapacity, | |
330 const char *src, int32_t srcLength, | |
331 UErrorCode *pErrorCode); | |
332 | |
333 #if !UCONFIG_NO_BREAK_ITERATION | |
334 | |
335 /** | |
336 * Titlecase a UTF-8 string. | |
337 * Casing is locale-dependent and context-sensitive. | |
338 * Titlecasing uses a break iterator to find the first characters of words | |
339 * that are to be titlecased. It titlecases those characters and lowercases | |
340 * all others. (This can be modified with ucasemap_setOptions().) | |
341 * | |
342 * Note: This function takes a non-const UCaseMap pointer because it will | |
343 * open a default break iterator if no break iterator was set yet, | |
344 * and effectively call ucasemap_setBreakIterator(); | |
345 * also because the break iterator is stateful and will be modified during | |
346 * the iteration. | |
347 * | |
348 * The titlecase break iterator can be provided to customize for arbitrary | |
349 * styles, using rules and dictionaries beyond the standard iterators. | |
350 * The standard titlecase iterator for the root locale implements the | |
351 * algorithm of Unicode TR 21. | |
352 * | |
353 * This function uses only the setUText(), first(), next() and close() methods o
f the | |
354 * provided break iterator. | |
355 * | |
356 * The result may be longer or shorter than the original. | |
357 * The source string and the destination buffer must not overlap. | |
358 * | |
359 * @param csm UCaseMap service object. This pointer is non-const! | |
360 * See the note above for details. | |
361 * @param dest A buffer for the result string. The result will be NUL-termi
nated if | |
362 * the buffer is large enough. | |
363 * The contents is undefined in case of failure. | |
364 * @param destCapacity The size of the buffer (number of bytes). If it is 0, the
n | |
365 * dest may be NULL and the function will only return the lengt
h of the result | |
366 * without writing any of the result string. | |
367 * @param src The original string. | |
368 * @param srcLength The length of the original string. If -1, then src must be N
UL-terminated. | |
369 * @param pErrorCode Must be a valid pointer to an error code value, | |
370 * which must not indicate a failure before the function call. | |
371 * @return The length of the result string, if successful - or in case of a buff
er overflow, | |
372 * in which case it will be greater than destCapacity. | |
373 * | |
374 * @see u_strToTitle | |
375 * @see U_TITLECASE_NO_LOWERCASE | |
376 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT | |
377 * @stable ICU 3.8 | |
378 */ | |
379 U_STABLE int32_t U_EXPORT2 | |
380 ucasemap_utf8ToTitle(UCaseMap *csm, | |
381 char *dest, int32_t destCapacity, | |
382 const char *src, int32_t srcLength, | |
383 UErrorCode *pErrorCode); | |
384 | |
385 #endif | |
386 | |
387 /** | |
388 * Case-fold the characters in a UTF-8 string. | |
389 * Case-folding is locale-independent and not context-sensitive, | |
390 * but there is an option for whether to include or exclude mappings for dotted
I | |
391 * and dotless i that are marked with 'I' in CaseFolding.txt. | |
392 * The result may be longer or shorter than the original. | |
393 * The source string and the destination buffer must not overlap. | |
394 * | |
395 * @param csm UCaseMap service object. | |
396 * @param dest A buffer for the result string. The result will be NUL-termi
nated if | |
397 * the buffer is large enough. | |
398 * The contents is undefined in case of failure. | |
399 * @param destCapacity The size of the buffer (number of bytes). If it is 0, the
n | |
400 * dest may be NULL and the function will only return the lengt
h of the result | |
401 * without writing any of the result string. | |
402 * @param src The original string. | |
403 * @param srcLength The length of the original string. If -1, then src must be N
UL-terminated. | |
404 * @param pErrorCode Must be a valid pointer to an error code value, | |
405 * which must not indicate a failure before the function call. | |
406 * @return The length of the result string, if successful - or in case of a buff
er overflow, | |
407 * in which case it will be greater than destCapacity. | |
408 * | |
409 * @see u_strFoldCase | |
410 * @see ucasemap_setOptions | |
411 * @see U_FOLD_CASE_DEFAULT | |
412 * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
413 * @stable ICU 3.8 | |
414 */ | |
415 U_STABLE int32_t U_EXPORT2 | |
416 ucasemap_utf8FoldCase(const UCaseMap *csm, | |
417 char *dest, int32_t destCapacity, | |
418 const char *src, int32_t srcLength, | |
419 UErrorCode *pErrorCode); | |
420 | |
421 #endif | |
OLD | NEW |