Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(225)

Side by Side Diff: source/common/unicode/normlzr.h

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/unicode/locid.h ('k') | source/common/unicode/platform.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************** 2 ********************************************************************
3 * COPYRIGHT: 3 * COPYRIGHT:
4 * Copyright (c) 1996-2011, International Business Machines Corporation and 4 * Copyright (c) 1996-2015, International Business Machines Corporation and
5 * others. All Rights Reserved. 5 * others. All Rights Reserved.
6 ******************************************************************** 6 ********************************************************************
7 */ 7 */
8 8
9 #ifndef NORMLZR_H 9 #ifndef NORMLZR_H
10 #define NORMLZR_H 10 #define NORMLZR_H
11 11
12 #include "unicode/utypes.h" 12 #include "unicode/utypes.h"
13 13
14 /** 14 /**
15 * \file 15 * \file
16 * \brief C++ API: Unicode Normalization 16 * \brief C++ API: Unicode Normalization
17 */ 17 */
18 18
19 #if !UCONFIG_NO_NORMALIZATION 19 #if !UCONFIG_NO_NORMALIZATION
20 20
21 #include "unicode/chariter.h" 21 #include "unicode/chariter.h"
22 #include "unicode/normalizer2.h" 22 #include "unicode/normalizer2.h"
23 #include "unicode/unistr.h" 23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h" 24 #include "unicode/unorm.h"
25 #include "unicode/uobject.h" 25 #include "unicode/uobject.h"
26 26
27 U_NAMESPACE_BEGIN 27 U_NAMESPACE_BEGIN
28 /** 28 /**
29 * Old Unicode normalization API.
30 *
31 * This API has been replaced by the Normalizer2 class and is only available
32 * for backward compatibility. This class simply delegates to the Normalizer2 cl ass.
33 * There is one exception: The new API does not provide a replacement for Normal izer::compare().
34 *
29 * The Normalizer class supports the standard normalization forms described in 35 * The Normalizer class supports the standard normalization forms described in
30 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> 36 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
31 * Unicode Standard Annex #15: Unicode Normalization Forms</a>. 37 * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
32 * 38 *
33 * Note: This API has been replaced by the Normalizer2 class and is only availab le
34 * for backward compatibility. This class simply delegates to the Normalizer2 cl ass.
35 * There is one exception: The new API does not provide a replacement for Normal izer::compare().
36 *
37 * The Normalizer class consists of two parts: 39 * The Normalizer class consists of two parts:
38 * - static functions that normalize strings or test if strings are normalized 40 * - static functions that normalize strings or test if strings are normalized
39 * - a Normalizer object is an iterator that takes any kind of text and 41 * - a Normalizer object is an iterator that takes any kind of text and
40 * provides iteration over its normalized form 42 * provides iteration over its normalized form
41 * 43 *
42 * The Normalizer class is not suitable for subclassing. 44 * The Normalizer class is not suitable for subclassing.
43 * 45 *
44 * For basic information about normalization forms and details about the C API 46 * For basic information about normalization forms and details about the C API
45 * please see the documentation in unorm.h. 47 * please see the documentation in unorm.h.
46 * 48 *
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 * This means that there is not necessarily a one-to-one correspondence 124 * This means that there is not necessarily a one-to-one correspondence
123 * between characters returned by next() and previous() and the indices 125 * between characters returned by next() and previous() and the indices
124 * passed to and returned from setIndex() and getIndex(). 126 * passed to and returned from setIndex() and getIndex().
125 * It is for this reason that Normalizer does not implement the CharacterIterato r interface. 127 * It is for this reason that Normalizer does not implement the CharacterIterato r interface.
126 * 128 *
127 * @author Laura Werner, Mark Davis, Markus Scherer 129 * @author Laura Werner, Mark Davis, Markus Scherer
128 * @stable ICU 2.0 130 * @stable ICU 2.0
129 */ 131 */
130 class U_COMMON_API Normalizer : public UObject { 132 class U_COMMON_API Normalizer : public UObject {
131 public: 133 public:
134 #ifndef U_HIDE_DEPRECATED_API
132 /** 135 /**
133 * If DONE is returned from an iteration function that returns a code point, 136 * If DONE is returned from an iteration function that returns a code point,
134 * then there are no more normalization results available. 137 * then there are no more normalization results available.
135 * @stable ICU 2.0 138 * @deprecated ICU 56 Use Normalizer2 instead.
136 */ 139 */
137 enum { 140 enum {
138 DONE=0xffff 141 DONE=0xffff
139 }; 142 };
140 143
141 // Constructors 144 // Constructors
142 145
143 /** 146 /**
144 * Creates a new <code>Normalizer</code> object for iterating over the 147 * Creates a new <code>Normalizer</code> object for iterating over the
145 * normalized form of a given string. 148 * normalized form of a given string.
146 * <p> 149 * <p>
147 * @param str The string to be normalized. The normalization 150 * @param str The string to be normalized. The normalization
148 * will start at the beginning of the string. 151 * will start at the beginning of the string.
149 * 152 *
150 * @param mode The normalization mode. 153 * @param mode The normalization mode.
151 * @stable ICU 2.0 154 * @deprecated ICU 56 Use Normalizer2 instead.
152 */ 155 */
153 Normalizer(const UnicodeString& str, UNormalizationMode mode); 156 Normalizer(const UnicodeString& str, UNormalizationMode mode);
154 157
155 /** 158 /**
156 * Creates a new <code>Normalizer</code> object for iterating over the 159 * Creates a new <code>Normalizer</code> object for iterating over the
157 * normalized form of a given string. 160 * normalized form of a given string.
158 * <p> 161 * <p>
159 * @param str The string to be normalized. The normalization 162 * @param str The string to be normalized. The normalization
160 * will start at the beginning of the string. 163 * will start at the beginning of the string.
161 * 164 *
162 * @param length Length of the string, or -1 if NUL-terminated. 165 * @param length Length of the string, or -1 if NUL-terminated.
163 * @param mode The normalization mode. 166 * @param mode The normalization mode.
164 * @stable ICU 2.0 167 * @deprecated ICU 56 Use Normalizer2 instead.
165 */ 168 */
166 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode); 169 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
167 170
168 /** 171 /**
169 * Creates a new <code>Normalizer</code> object for iterating over the 172 * Creates a new <code>Normalizer</code> object for iterating over the
170 * normalized form of the given text. 173 * normalized form of the given text.
171 * <p> 174 * <p>
172 * @param iter The input text to be normalized. The normalization 175 * @param iter The input text to be normalized. The normalization
173 * will start at the beginning of the string. 176 * will start at the beginning of the string.
174 * 177 *
175 * @param mode The normalization mode. 178 * @param mode The normalization mode.
176 * @stable ICU 2.0 179 * @deprecated ICU 56 Use Normalizer2 instead.
177 */ 180 */
178 Normalizer(const CharacterIterator& iter, UNormalizationMode mode); 181 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
179 182
180 /** 183 /**
181 * Copy constructor. 184 * Copy constructor.
182 * @param copy The object to be copied. 185 * @param copy The object to be copied.
183 * @stable ICU 2.0 186 * @deprecated ICU 56 Use Normalizer2 instead.
184 */ 187 */
185 Normalizer(const Normalizer& copy); 188 Normalizer(const Normalizer& copy);
189 #endif /* U_HIDE_DEPRECATED_API */
186 190
187 /** 191 /**
188 * Destructor 192 * Destructor
189 * @stable ICU 2.0 193 * @deprecated ICU 56 Use Normalizer2 instead.
190 */ 194 */
191 virtual ~Normalizer(); 195 virtual ~Normalizer();
192 196
193 197
194 //------------------------------------------------------------------------- 198 //-------------------------------------------------------------------------
195 // Static utility methods 199 // Static utility methods
196 //------------------------------------------------------------------------- 200 //-------------------------------------------------------------------------
197 201
202 #ifndef U_HIDE_DEPRECATED_API
198 /** 203 /**
199 * Normalizes a <code>UnicodeString</code> according to the specified normaliz ation mode. 204 * Normalizes a <code>UnicodeString</code> according to the specified normaliz ation mode.
200 * This is a wrapper for unorm_normalize(), using UnicodeString's. 205 * This is a wrapper for unorm_normalize(), using UnicodeString's.
201 * 206 *
202 * The <code>options</code> parameter specifies which optional 207 * The <code>options</code> parameter specifies which optional
203 * <code>Normalizer</code> features are to be enabled for this operation. 208 * <code>Normalizer</code> features are to be enabled for this operation.
204 * 209 *
205 * @param source the input string to be normalized. 210 * @param source the input string to be normalized.
206 * @param mode the normalization mode 211 * @param mode the normalization mode
207 * @param options the optional features to be enabled (0 for no options) 212 * @param options the optional features to be enabled (0 for no options)
208 * @param result The normalized string (on output). 213 * @param result The normalized string (on output).
209 * @param status The error code. 214 * @param status The error code.
210 * @stable ICU 2.0 215 * @deprecated ICU 56 Use Normalizer2 instead.
211 */ 216 */
212 static void U_EXPORT2 normalize(const UnicodeString& source, 217 static void U_EXPORT2 normalize(const UnicodeString& source,
213 UNormalizationMode mode, int32_t options, 218 UNormalizationMode mode, int32_t options,
214 UnicodeString& result, 219 UnicodeString& result,
215 UErrorCode &status); 220 UErrorCode &status);
216 221
217 /** 222 /**
218 * Compose a <code>UnicodeString</code>. 223 * Compose a <code>UnicodeString</code>.
219 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC. 224 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
220 * This is a wrapper for unorm_normalize(), using UnicodeString's. 225 * This is a wrapper for unorm_normalize(), using UnicodeString's.
221 * 226 *
222 * The <code>options</code> parameter specifies which optional 227 * The <code>options</code> parameter specifies which optional
223 * <code>Normalizer</code> features are to be enabled for this operation. 228 * <code>Normalizer</code> features are to be enabled for this operation.
224 * 229 *
225 * @param source the string to be composed. 230 * @param source the string to be composed.
226 * @param compat Perform compatibility decomposition before composition. 231 * @param compat Perform compatibility decomposition before composition.
227 * If this argument is <code>FALSE</code>, only canonical 232 * If this argument is <code>FALSE</code>, only canonical
228 * decomposition will be performed. 233 * decomposition will be performed.
229 * @param options the optional features to be enabled (0 for no options) 234 * @param options the optional features to be enabled (0 for no options)
230 * @param result The composed string (on output). 235 * @param result The composed string (on output).
231 * @param status The error code. 236 * @param status The error code.
232 * @stable ICU 2.0 237 * @deprecated ICU 56 Use Normalizer2 instead.
233 */ 238 */
234 static void U_EXPORT2 compose(const UnicodeString& source, 239 static void U_EXPORT2 compose(const UnicodeString& source,
235 UBool compat, int32_t options, 240 UBool compat, int32_t options,
236 UnicodeString& result, 241 UnicodeString& result,
237 UErrorCode &status); 242 UErrorCode &status);
238 243
239 /** 244 /**
240 * Static method to decompose a <code>UnicodeString</code>. 245 * Static method to decompose a <code>UnicodeString</code>.
241 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD. 246 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
242 * This is a wrapper for unorm_normalize(), using UnicodeString's. 247 * This is a wrapper for unorm_normalize(), using UnicodeString's.
243 * 248 *
244 * The <code>options</code> parameter specifies which optional 249 * The <code>options</code> parameter specifies which optional
245 * <code>Normalizer</code> features are to be enabled for this operation. 250 * <code>Normalizer</code> features are to be enabled for this operation.
246 * 251 *
247 * @param source the string to be decomposed. 252 * @param source the string to be decomposed.
248 * @param compat Perform compatibility decomposition. 253 * @param compat Perform compatibility decomposition.
249 * If this argument is <code>FALSE</code>, only canonical 254 * If this argument is <code>FALSE</code>, only canonical
250 * decomposition will be performed. 255 * decomposition will be performed.
251 * @param options the optional features to be enabled (0 for no options) 256 * @param options the optional features to be enabled (0 for no options)
252 * @param result The decomposed string (on output). 257 * @param result The decomposed string (on output).
253 * @param status The error code. 258 * @param status The error code.
254 * @stable ICU 2.0 259 * @deprecated ICU 56 Use Normalizer2 instead.
255 */ 260 */
256 static void U_EXPORT2 decompose(const UnicodeString& source, 261 static void U_EXPORT2 decompose(const UnicodeString& source,
257 UBool compat, int32_t options, 262 UBool compat, int32_t options,
258 UnicodeString& result, 263 UnicodeString& result,
259 UErrorCode &status); 264 UErrorCode &status);
260 265
261 /** 266 /**
262 * Performing quick check on a string, to quickly determine if the string is 267 * Performing quick check on a string, to quickly determine if the string is
263 * in a particular normalization format. 268 * in a particular normalization format.
264 * This is a wrapper for unorm_quickCheck(), using a UnicodeString. 269 * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
265 * 270 *
266 * Three types of result can be returned UNORM_YES, UNORM_NO or 271 * Three types of result can be returned UNORM_YES, UNORM_NO or
267 * UNORM_MAYBE. Result UNORM_YES indicates that the argument 272 * UNORM_MAYBE. Result UNORM_YES indicates that the argument
268 * string is in the desired normalized format, UNORM_NO determines that 273 * string is in the desired normalized format, UNORM_NO determines that
269 * argument string is not in the desired normalized format. A 274 * argument string is not in the desired normalized format. A
270 * UNORM_MAYBE result indicates that a more thorough check is required, 275 * UNORM_MAYBE result indicates that a more thorough check is required,
271 * the user may have to put the string in its normalized form and compare the 276 * the user may have to put the string in its normalized form and compare the
272 * results. 277 * results.
273 * @param source string for determining if it is in a normalized format 278 * @param source string for determining if it is in a normalized format
274 * @param mode normalization format 279 * @param mode normalization format
275 * @param status A reference to a UErrorCode to receive any errors 280 * @param status A reference to a UErrorCode to receive any errors
276 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 281 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
277 * 282 *
278 * @see isNormalized 283 * @see isNormalized
279 * @stable ICU 2.0 284 * @deprecated ICU 56 Use Normalizer2 instead.
280 */ 285 */
281 static inline UNormalizationCheckResult 286 static inline UNormalizationCheckResult
282 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &s tatus); 287 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &s tatus);
283 288
284 /** 289 /**
285 * Performing quick check on a string; same as the other version of quickCheck 290 * Performing quick check on a string; same as the other version of quickCheck
286 * but takes an extra options parameter like most normalization functions. 291 * but takes an extra options parameter like most normalization functions.
287 * 292 *
288 * @param source string for determining if it is in a normalized format 293 * @param source string for determining if it is in a normalized format
289 * @param mode normalization format 294 * @param mode normalization format
290 * @param options the optional features to be enabled (0 for no options) 295 * @param options the optional features to be enabled (0 for no options)
291 * @param status A reference to a UErrorCode to receive any errors 296 * @param status A reference to a UErrorCode to receive any errors
292 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE 297 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
293 * 298 *
294 * @see isNormalized 299 * @see isNormalized
295 * @stable ICU 2.6 300 * @deprecated ICU 56 Use Normalizer2 instead.
296 */ 301 */
297 static UNormalizationCheckResult 302 static UNormalizationCheckResult
298 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t optio ns, UErrorCode &status); 303 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t optio ns, UErrorCode &status);
299 304
300 /** 305 /**
301 * Test if a string is in a given normalization form. 306 * Test if a string is in a given normalization form.
302 * This is semantically equivalent to source.equals(normalize(source, mode)) . 307 * This is semantically equivalent to source.equals(normalize(source, mode)) .
303 * 308 *
304 * Unlike unorm_quickCheck(), this function returns a definitive result, 309 * Unlike unorm_quickCheck(), this function returns a definitive result,
305 * never a "maybe". 310 * never a "maybe".
306 * For NFD, NFKD, and FCD, both functions work exactly the same. 311 * For NFD, NFKD, and FCD, both functions work exactly the same.
307 * For NFC and NFKC where quickCheck may return "maybe", this function will 312 * For NFC and NFKC where quickCheck may return "maybe", this function will
308 * perform further tests to arrive at a TRUE/FALSE result. 313 * perform further tests to arrive at a TRUE/FALSE result.
309 * 314 *
310 * @param src String that is to be tested if it is in a normalization f ormat. 315 * @param src String that is to be tested if it is in a normalization f ormat.
311 * @param mode Which normalization form to test for. 316 * @param mode Which normalization form to test for.
312 * @param errorCode ICU error code in/out parameter. 317 * @param errorCode ICU error code in/out parameter.
313 * Must fulfill U_SUCCESS before the function call. 318 * Must fulfill U_SUCCESS before the function call.
314 * @return Boolean value indicating whether the source string is in the 319 * @return Boolean value indicating whether the source string is in the
315 * "mode" normalization form. 320 * "mode" normalization form.
316 * 321 *
317 * @see quickCheck 322 * @see quickCheck
318 * @stable ICU 2.2 323 * @deprecated ICU 56 Use Normalizer2 instead.
319 */ 324 */
320 static inline UBool 325 static inline UBool
321 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &er rorCode); 326 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &er rorCode);
322 327
323 /** 328 /**
324 * Test if a string is in a given normalization form; same as the other versio n of isNormalized 329 * Test if a string is in a given normalization form; same as the other versio n of isNormalized
325 * but takes an extra options parameter like most normalization functions. 330 * but takes an extra options parameter like most normalization functions.
326 * 331 *
327 * @param src String that is to be tested if it is in a normalization f ormat. 332 * @param src String that is to be tested if it is in a normalization f ormat.
328 * @param mode Which normalization form to test for. 333 * @param mode Which normalization form to test for.
329 * @param options the optional features to be enabled (0 for no options) 334 * @param options the optional features to be enabled (0 for no options)
330 * @param errorCode ICU error code in/out parameter. 335 * @param errorCode ICU error code in/out parameter.
331 * Must fulfill U_SUCCESS before the function call. 336 * Must fulfill U_SUCCESS before the function call.
332 * @return Boolean value indicating whether the source string is in the 337 * @return Boolean value indicating whether the source string is in the
333 * "mode" normalization form. 338 * "mode" normalization form.
334 * 339 *
335 * @see quickCheck 340 * @see quickCheck
336 * @stable ICU 2.6 341 * @deprecated ICU 56 Use Normalizer2 instead.
337 */ 342 */
338 static UBool 343 static UBool
339 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t option s, UErrorCode &errorCode); 344 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t option s, UErrorCode &errorCode);
340 345
341 /** 346 /**
342 * Concatenate normalized strings, making sure that the result is normalized a s well. 347 * Concatenate normalized strings, making sure that the result is normalized a s well.
343 * 348 *
344 * If both the left and the right strings are in 349 * If both the left and the right strings are in
345 * the normalization form according to "mode/options", 350 * the normalization form according to "mode/options",
346 * then the result will be 351 * then the result will be
(...skipping 11 matching lines...) Expand all
358 * @param options A bit set of normalization options. 363 * @param options A bit set of normalization options.
359 * @param errorCode ICU error code in/out parameter. 364 * @param errorCode ICU error code in/out parameter.
360 * Must fulfill U_SUCCESS before the function call. 365 * Must fulfill U_SUCCESS before the function call.
361 * @return result 366 * @return result
362 * 367 *
363 * @see unorm_concatenate 368 * @see unorm_concatenate
364 * @see normalize 369 * @see normalize
365 * @see unorm_next 370 * @see unorm_next
366 * @see unorm_previous 371 * @see unorm_previous
367 * 372 *
368 * @stable ICU 2.1 373 * @deprecated ICU 56 Use Normalizer2 instead.
369 */ 374 */
370 static UnicodeString & 375 static UnicodeString &
371 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right, 376 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
372 UnicodeString &result, 377 UnicodeString &result,
373 UNormalizationMode mode, int32_t options, 378 UNormalizationMode mode, int32_t options,
374 UErrorCode &errorCode); 379 UErrorCode &errorCode);
380 #endif /* U_HIDE_DEPRECATED_API */
375 381
376 /** 382 /**
377 * Compare two strings for canonical equivalence. 383 * Compare two strings for canonical equivalence.
378 * Further options include case-insensitive comparison and 384 * Further options include case-insensitive comparison and
379 * code point order (as opposed to code unit order). 385 * code point order (as opposed to code unit order).
380 * 386 *
381 * Canonical equivalence between two strings is defined as their normalized 387 * Canonical equivalence between two strings is defined as their normalized
382 * forms (NFD or NFC) being identical. 388 * forms (NFD or NFC) being identical.
383 * This function compares strings incrementally instead of normalizing 389 * This function compares strings incrementally instead of normalizing
384 * (and optionally case-folding) both strings entirely, 390 * (and optionally case-folding) both strings entirely,
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
435 * @see u_strCompare 441 * @see u_strCompare
436 * @see u_strCaseCompare 442 * @see u_strCaseCompare
437 * 443 *
438 * @stable ICU 2.2 444 * @stable ICU 2.2
439 */ 445 */
440 static inline int32_t 446 static inline int32_t
441 compare(const UnicodeString &s1, const UnicodeString &s2, 447 compare(const UnicodeString &s1, const UnicodeString &s2,
442 uint32_t options, 448 uint32_t options,
443 UErrorCode &errorCode); 449 UErrorCode &errorCode);
444 450
451 #ifndef U_HIDE_DEPRECATED_API
445 //------------------------------------------------------------------------- 452 //-------------------------------------------------------------------------
446 // Iteration API 453 // Iteration API
447 //------------------------------------------------------------------------- 454 //-------------------------------------------------------------------------
448 455
449 /** 456 /**
450 * Return the current character in the normalized text. 457 * Return the current character in the normalized text.
451 * current() may need to normalize some text at getIndex(). 458 * current() may need to normalize some text at getIndex().
452 * The getIndex() is not changed. 459 * The getIndex() is not changed.
453 * 460 *
454 * @return the current normalized code point 461 * @return the current normalized code point
455 * @stable ICU 2.0 462 * @deprecated ICU 56 Use Normalizer2 instead.
456 */ 463 */
457 UChar32 current(void); 464 UChar32 current(void);
458 465
459 /** 466 /**
460 * Return the first character in the normalized text. 467 * Return the first character in the normalized text.
461 * This is equivalent to setIndexOnly(startIndex()) followed by next(). 468 * This is equivalent to setIndexOnly(startIndex()) followed by next().
462 * (Post-increment semantics.) 469 * (Post-increment semantics.)
463 * 470 *
464 * @return the first normalized code point 471 * @return the first normalized code point
465 * @stable ICU 2.0 472 * @deprecated ICU 56 Use Normalizer2 instead.
466 */ 473 */
467 UChar32 first(void); 474 UChar32 first(void);
468 475
469 /** 476 /**
470 * Return the last character in the normalized text. 477 * Return the last character in the normalized text.
471 * This is equivalent to setIndexOnly(endIndex()) followed by previous(). 478 * This is equivalent to setIndexOnly(endIndex()) followed by previous().
472 * (Pre-decrement semantics.) 479 * (Pre-decrement semantics.)
473 * 480 *
474 * @return the last normalized code point 481 * @return the last normalized code point
475 * @stable ICU 2.0 482 * @deprecated ICU 56 Use Normalizer2 instead.
476 */ 483 */
477 UChar32 last(void); 484 UChar32 last(void);
478 485
479 /** 486 /**
480 * Return the next character in the normalized text. 487 * Return the next character in the normalized text.
481 * (Post-increment semantics.) 488 * (Post-increment semantics.)
482 * If the end of the text has already been reached, DONE is returned. 489 * If the end of the text has already been reached, DONE is returned.
483 * The DONE value could be confused with a U+FFFF non-character code point 490 * The DONE value could be confused with a U+FFFF non-character code point
484 * in the text. If this is possible, you can test getIndex()<endIndex() 491 * in the text. If this is possible, you can test getIndex()<endIndex()
485 * before calling next(), or (getIndex()<endIndex() || last()!=DONE) 492 * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
486 * after calling next(). (Calling last() will change the iterator state!) 493 * after calling next(). (Calling last() will change the iterator state!)
487 * 494 *
488 * The C API unorm_next() is more efficient and does not have this ambiguity. 495 * The C API unorm_next() is more efficient and does not have this ambiguity.
489 * 496 *
490 * @return the next normalized code point 497 * @return the next normalized code point
491 * @stable ICU 2.0 498 * @deprecated ICU 56 Use Normalizer2 instead.
492 */ 499 */
493 UChar32 next(void); 500 UChar32 next(void);
494 501
495 /** 502 /**
496 * Return the previous character in the normalized text and decrement. 503 * Return the previous character in the normalized text and decrement.
497 * (Pre-decrement semantics.) 504 * (Pre-decrement semantics.)
498 * If the beginning of the text has already been reached, DONE is returned. 505 * If the beginning of the text has already been reached, DONE is returned.
499 * The DONE value could be confused with a U+FFFF non-character code point 506 * The DONE value could be confused with a U+FFFF non-character code point
500 * in the text. If this is possible, you can test 507 * in the text. If this is possible, you can test
501 * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change 508 * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
502 * the iterator state!) 509 * the iterator state!)
503 * 510 *
504 * The C API unorm_previous() is more efficient and does not have this ambigui ty. 511 * The C API unorm_previous() is more efficient and does not have this ambigui ty.
505 * 512 *
506 * @return the previous normalized code point 513 * @return the previous normalized code point
507 * @stable ICU 2.0 514 * @deprecated ICU 56 Use Normalizer2 instead.
508 */ 515 */
509 UChar32 previous(void); 516 UChar32 previous(void);
510 517
511 /** 518 /**
512 * Set the iteration position in the input text that is being normalized, 519 * Set the iteration position in the input text that is being normalized,
513 * without any immediate normalization. 520 * without any immediate normalization.
514 * After setIndexOnly(), getIndex() will return the same index that is 521 * After setIndexOnly(), getIndex() will return the same index that is
515 * specified here. 522 * specified here.
516 * 523 *
517 * @param index the desired index in the input text. 524 * @param index the desired index in the input text.
518 * @stable ICU 2.0 525 * @deprecated ICU 56 Use Normalizer2 instead.
519 */ 526 */
520 void setIndexOnly(int32_t index); 527 void setIndexOnly(int32_t index);
521 528
522 /** 529 /**
523 * Reset the index to the beginning of the text. 530 * Reset the index to the beginning of the text.
524 * This is equivalent to setIndexOnly(startIndex)). 531 * This is equivalent to setIndexOnly(startIndex)).
525 * @stable ICU 2.0 532 * @deprecated ICU 56 Use Normalizer2 instead.
526 */ 533 */
527 void reset(void); 534 void reset(void);
528 535
529 /** 536 /**
530 * Retrieve the current iteration position in the input text that is 537 * Retrieve the current iteration position in the input text that is
531 * being normalized. 538 * being normalized.
532 * 539 *
533 * A following call to next() will return a normalized code point from 540 * A following call to next() will return a normalized code point from
534 * the input text at or after this index. 541 * the input text at or after this index.
535 * 542 *
536 * After a call to previous(), getIndex() will point at or before the 543 * After a call to previous(), getIndex() will point at or before the
537 * position in the input text where the normalized code point 544 * position in the input text where the normalized code point
538 * was returned from with previous(). 545 * was returned from with previous().
539 * 546 *
540 * @return the current index in the input text 547 * @return the current index in the input text
541 * @stable ICU 2.0 548 * @deprecated ICU 56 Use Normalizer2 instead.
542 */ 549 */
543 int32_t getIndex(void) const; 550 int32_t getIndex(void) const;
544 551
545 /** 552 /**
546 * Retrieve the index of the start of the input text. This is the begin index 553 * Retrieve the index of the start of the input text. This is the begin index
547 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the st ring 554 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the st ring
548 * over which this <code>Normalizer</code> is iterating. 555 * over which this <code>Normalizer</code> is iterating.
549 * 556 *
550 * @return the smallest index in the input text where the Normalizer operates 557 * @return the smallest index in the input text where the Normalizer operates
551 * @stable ICU 2.0 558 * @deprecated ICU 56 Use Normalizer2 instead.
552 */ 559 */
553 int32_t startIndex(void) const; 560 int32_t startIndex(void) const;
554 561
555 /** 562 /**
556 * Retrieve the index of the end of the input text. This is the end index 563 * Retrieve the index of the end of the input text. This is the end index
557 * of the <code>CharacterIterator</code> or the length of the string 564 * of the <code>CharacterIterator</code> or the length of the string
558 * over which this <code>Normalizer</code> is iterating. 565 * over which this <code>Normalizer</code> is iterating.
559 * This end index is exclusive, i.e., the Normalizer operates only on characte rs 566 * This end index is exclusive, i.e., the Normalizer operates only on characte rs
560 * before this index. 567 * before this index.
561 * 568 *
562 * @return the first index in the input text where the Normalizer does not ope rate 569 * @return the first index in the input text where the Normalizer does not ope rate
563 * @stable ICU 2.0 570 * @deprecated ICU 56 Use Normalizer2 instead.
564 */ 571 */
565 int32_t endIndex(void) const; 572 int32_t endIndex(void) const;
566 573
567 /** 574 /**
568 * Returns TRUE when both iterators refer to the same character in the same 575 * Returns TRUE when both iterators refer to the same character in the same
569 * input text. 576 * input text.
570 * 577 *
571 * @param that a Normalizer object to compare this one to 578 * @param that a Normalizer object to compare this one to
572 * @return comparison result 579 * @return comparison result
573 * @stable ICU 2.0 580 * @deprecated ICU 56 Use Normalizer2 instead.
574 */ 581 */
575 UBool operator==(const Normalizer& that) const; 582 UBool operator==(const Normalizer& that) const;
576 583
577 /** 584 /**
578 * Returns FALSE when both iterators refer to the same character in the same 585 * Returns FALSE when both iterators refer to the same character in the same
579 * input text. 586 * input text.
580 * 587 *
581 * @param that a Normalizer object to compare this one to 588 * @param that a Normalizer object to compare this one to
582 * @return comparison result 589 * @return comparison result
583 * @stable ICU 2.0 590 * @deprecated ICU 56 Use Normalizer2 instead.
584 */ 591 */
585 inline UBool operator!=(const Normalizer& that) const; 592 inline UBool operator!=(const Normalizer& that) const;
586 593
587 /** 594 /**
588 * Returns a pointer to a new Normalizer that is a clone of this one. 595 * Returns a pointer to a new Normalizer that is a clone of this one.
589 * The caller is responsible for deleting the new clone. 596 * The caller is responsible for deleting the new clone.
590 * @return a pointer to a new Normalizer 597 * @return a pointer to a new Normalizer
591 * @stable ICU 2.0 598 * @deprecated ICU 56 Use Normalizer2 instead.
592 */ 599 */
593 Normalizer* clone(void) const; 600 Normalizer* clone(void) const;
594 601
595 /** 602 /**
596 * Generates a hash code for this iterator. 603 * Generates a hash code for this iterator.
597 * 604 *
598 * @return the hash code 605 * @return the hash code
599 * @stable ICU 2.0 606 * @deprecated ICU 56 Use Normalizer2 instead.
600 */ 607 */
601 int32_t hashCode(void) const; 608 int32_t hashCode(void) const;
602 609
603 //------------------------------------------------------------------------- 610 //-------------------------------------------------------------------------
604 // Property access methods 611 // Property access methods
605 //------------------------------------------------------------------------- 612 //-------------------------------------------------------------------------
606 613
607 /** 614 /**
608 * Set the normalization mode for this object. 615 * Set the normalization mode for this object.
609 * <p> 616 * <p>
610 * <b>Note:</b>If the normalization mode is changed while iterating 617 * <b>Note:</b>If the normalization mode is changed while iterating
611 * over a string, calls to {@link #next() } and {@link #previous() } may 618 * over a string, calls to {@link #next() } and {@link #previous() } may
612 * return previously buffers characters in the old normalization mode 619 * return previously buffers characters in the old normalization mode
613 * until the iteration is able to re-sync at the next base character. 620 * until the iteration is able to re-sync at the next base character.
614 * It is safest to call {@link #setIndexOnly }, {@link #reset() }, 621 * It is safest to call {@link #setIndexOnly }, {@link #reset() },
615 * {@link #setText }, {@link #first() }, 622 * {@link #setText }, {@link #first() },
616 * {@link #last() }, etc. after calling <code>setMode</code>. 623 * {@link #last() }, etc. after calling <code>setMode</code>.
617 * <p> 624 * <p>
618 * @param newMode the new mode for this <code>Normalizer</code>. 625 * @param newMode the new mode for this <code>Normalizer</code>.
619 * @see #getUMode 626 * @see #getUMode
620 * @stable ICU 2.0 627 * @deprecated ICU 56 Use Normalizer2 instead.
621 */ 628 */
622 void setMode(UNormalizationMode newMode); 629 void setMode(UNormalizationMode newMode);
623 630
624 /** 631 /**
625 * Return the normalization mode for this object. 632 * Return the normalization mode for this object.
626 * 633 *
627 * This is an unusual name because there used to be a getMode() that 634 * This is an unusual name because there used to be a getMode() that
628 * returned a different type. 635 * returned a different type.
629 * 636 *
630 * @return the mode for this <code>Normalizer</code> 637 * @return the mode for this <code>Normalizer</code>
631 * @see #setMode 638 * @see #setMode
632 * @stable ICU 2.0 639 * @deprecated ICU 56 Use Normalizer2 instead.
633 */ 640 */
634 UNormalizationMode getUMode(void) const; 641 UNormalizationMode getUMode(void) const;
635 642
636 /** 643 /**
637 * Set options that affect this <code>Normalizer</code>'s operation. 644 * Set options that affect this <code>Normalizer</code>'s operation.
638 * Options do not change the basic composition or decomposition operation 645 * Options do not change the basic composition or decomposition operation
639 * that is being performed, but they control whether 646 * that is being performed, but they control whether
640 * certain optional portions of the operation are done. 647 * certain optional portions of the operation are done.
641 * Currently the only available option is obsolete. 648 * Currently the only available option is obsolete.
642 * 649 *
643 * It is possible to specify multiple options that are all turned on or off. 650 * It is possible to specify multiple options that are all turned on or off.
644 * 651 *
645 * @param option the option(s) whose value is/are to be set. 652 * @param option the option(s) whose value is/are to be set.
646 * @param value the new setting for the option. Use <code>TRUE</code> to 653 * @param value the new setting for the option. Use <code>TRUE</code> to
647 * turn the option(s) on and <code>FALSE</code> to turn it/th em off. 654 * turn the option(s) on and <code>FALSE</code> to turn it/th em off.
648 * 655 *
649 * @see #getOption 656 * @see #getOption
650 * @stable ICU 2.0 657 * @deprecated ICU 56 Use Normalizer2 instead.
651 */ 658 */
652 void setOption(int32_t option, 659 void setOption(int32_t option,
653 UBool value); 660 UBool value);
654 661
655 /** 662 /**
656 * Determine whether an option is turned on or off. 663 * Determine whether an option is turned on or off.
657 * If multiple options are specified, then the result is TRUE if any 664 * If multiple options are specified, then the result is TRUE if any
658 * of them are set. 665 * of them are set.
659 * <p> 666 * <p>
660 * @param option the option(s) that are to be checked 667 * @param option the option(s) that are to be checked
661 * @return TRUE if any of the option(s) are set 668 * @return TRUE if any of the option(s) are set
662 * @see #setOption 669 * @see #setOption
663 * @stable ICU 2.0 670 * @deprecated ICU 56 Use Normalizer2 instead.
664 */ 671 */
665 UBool getOption(int32_t option) const; 672 UBool getOption(int32_t option) const;
666 673
667 /** 674 /**
668 * Set the input text over which this <code>Normalizer</code> will iterate. 675 * Set the input text over which this <code>Normalizer</code> will iterate.
669 * The iteration position is set to the beginning. 676 * The iteration position is set to the beginning.
670 * 677 *
671 * @param newText a string that replaces the current input text 678 * @param newText a string that replaces the current input text
672 * @param status a UErrorCode 679 * @param status a UErrorCode
673 * @stable ICU 2.0 680 * @deprecated ICU 56 Use Normalizer2 instead.
674 */ 681 */
675 void setText(const UnicodeString& newText, 682 void setText(const UnicodeString& newText,
676 UErrorCode &status); 683 UErrorCode &status);
677 684
678 /** 685 /**
679 * Set the input text over which this <code>Normalizer</code> will iterate. 686 * Set the input text over which this <code>Normalizer</code> will iterate.
680 * The iteration position is set to the beginning. 687 * The iteration position is set to the beginning.
681 * 688 *
682 * @param newText a CharacterIterator object that replaces the current input t ext 689 * @param newText a CharacterIterator object that replaces the current input t ext
683 * @param status a UErrorCode 690 * @param status a UErrorCode
684 * @stable ICU 2.0 691 * @deprecated ICU 56 Use Normalizer2 instead.
685 */ 692 */
686 void setText(const CharacterIterator& newText, 693 void setText(const CharacterIterator& newText,
687 UErrorCode &status); 694 UErrorCode &status);
688 695
689 /** 696 /**
690 * Set the input text over which this <code>Normalizer</code> will iterate. 697 * Set the input text over which this <code>Normalizer</code> will iterate.
691 * The iteration position is set to the beginning. 698 * The iteration position is set to the beginning.
692 * 699 *
693 * @param newText a string that replaces the current input text 700 * @param newText a string that replaces the current input text
694 * @param length the length of the string, or -1 if NUL-terminated 701 * @param length the length of the string, or -1 if NUL-terminated
695 * @param status a UErrorCode 702 * @param status a UErrorCode
696 * @stable ICU 2.0 703 * @deprecated ICU 56 Use Normalizer2 instead.
697 */ 704 */
698 void setText(const UChar* newText, 705 void setText(const UChar* newText,
699 int32_t length, 706 int32_t length,
700 UErrorCode &status); 707 UErrorCode &status);
701 /** 708 /**
702 * Copies the input text into the UnicodeString argument. 709 * Copies the input text into the UnicodeString argument.
703 * 710 *
704 * @param result Receives a copy of the text under iteration. 711 * @param result Receives a copy of the text under iteration.
705 * @stable ICU 2.0 712 * @deprecated ICU 56 Use Normalizer2 instead.
706 */ 713 */
707 void getText(UnicodeString& result); 714 void getText(UnicodeString& result);
708 715
709 /** 716 /**
710 * ICU "poor man's RTTI", returns a UClassID for this class. 717 * ICU "poor man's RTTI", returns a UClassID for this class.
711 * @returns a UClassID for this class. 718 * @returns a UClassID for this class.
712 * @stable ICU 2.2 719 * @deprecated ICU 56 Use Normalizer2 instead.
713 */ 720 */
714 static UClassID U_EXPORT2 getStaticClassID(); 721 static UClassID U_EXPORT2 getStaticClassID();
722 #endif /* U_HIDE_DEPRECATED_API */
715 723
716 /** 724 /**
717 * ICU "poor man's RTTI", returns a UClassID for the actual class. 725 * ICU "poor man's RTTI", returns a UClassID for the actual class.
718 * @return a UClassID for the actual class. 726 * @return a UClassID for the actual class.
719 * @stable ICU 2.2 727 * @deprecated ICU 56 Use Normalizer2 instead.
720 */ 728 */
721 virtual UClassID getDynamicClassID() const; 729 virtual UClassID getDynamicClassID() const;
722 730
723 private: 731 private:
724 //------------------------------------------------------------------------- 732 //-------------------------------------------------------------------------
725 // Private functions 733 // Private functions
726 //------------------------------------------------------------------------- 734 //-------------------------------------------------------------------------
727 735
728 Normalizer(); // default constructor not implemented 736 Normalizer(); // default constructor not implemented
729 Normalizer &operator=(const Normalizer &that); // assignment operator not impl emented 737 Normalizer &operator=(const Normalizer &that); // assignment operator not impl emented
730 738
731 // Private utility methods for iteration 739 // Private utility methods for iteration
732 // For documentation, see the source code 740 // For documentation, see the source code
733 UBool nextNormalize(); 741 UBool nextNormalize();
734 UBool previousNormalize(); 742 UBool previousNormalize();
735 743
736 void init(); 744 void init();
737 void clearBuffer(void); 745 void clearBuffer(void);
738 746
739 //------------------------------------------------------------------------- 747 //-------------------------------------------------------------------------
740 // Private data 748 // Private data
741 //------------------------------------------------------------------------- 749 //-------------------------------------------------------------------------
742 750
743 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL 751 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL
744 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2 752 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
753 #ifndef U_HIDE_DEPRECATED_API
745 UNormalizationMode fUMode; 754 UNormalizationMode fUMode;
755 #endif /* U_HIDE_DEPRECATED_API */
746 int32_t fOptions; 756 int32_t fOptions;
747 757
748 // The input text and our position in it 758 // The input text and our position in it
749 CharacterIterator *text; 759 CharacterIterator *text;
750 760
751 // The normalization buffer is the result of normalization 761 // The normalization buffer is the result of normalization
752 // of the source in [currentIndex..nextIndex[ . 762 // of the source in [currentIndex..nextIndex[ .
753 int32_t currentIndex, nextIndex; 763 int32_t currentIndex, nextIndex;
754 764
755 // A buffer for holding intermediate results 765 // A buffer for holding intermediate results
756 UnicodeString buffer; 766 UnicodeString buffer;
757 int32_t bufferPos; 767 int32_t bufferPos;
758 }; 768 };
759 769
760 //------------------------------------------------------------------------- 770 //-------------------------------------------------------------------------
761 // Inline implementations 771 // Inline implementations
762 //------------------------------------------------------------------------- 772 //-------------------------------------------------------------------------
763 773
774 #ifndef U_HIDE_DEPRECATED_API
764 inline UBool 775 inline UBool
765 Normalizer::operator!= (const Normalizer& other) const 776 Normalizer::operator!= (const Normalizer& other) const
766 { return ! operator==(other); } 777 { return ! operator==(other); }
767 778
768 inline UNormalizationCheckResult 779 inline UNormalizationCheckResult
769 Normalizer::quickCheck(const UnicodeString& source, 780 Normalizer::quickCheck(const UnicodeString& source,
770 UNormalizationMode mode, 781 UNormalizationMode mode,
771 UErrorCode &status) { 782 UErrorCode &status) {
772 return quickCheck(source, mode, 0, status); 783 return quickCheck(source, mode, 0, status);
773 } 784 }
774 785
775 inline UBool 786 inline UBool
776 Normalizer::isNormalized(const UnicodeString& source, 787 Normalizer::isNormalized(const UnicodeString& source,
777 UNormalizationMode mode, 788 UNormalizationMode mode,
778 UErrorCode &status) { 789 UErrorCode &status) {
779 return isNormalized(source, mode, 0, status); 790 return isNormalized(source, mode, 0, status);
780 } 791 }
792 #endif /* U_HIDE_DEPRECATED_API */
781 793
782 inline int32_t 794 inline int32_t
783 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2, 795 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
784 uint32_t options, 796 uint32_t options,
785 UErrorCode &errorCode) { 797 UErrorCode &errorCode) {
786 // all argument checking is done in unorm_compare 798 // all argument checking is done in unorm_compare
787 return unorm_compare(s1.getBuffer(), s1.length(), 799 return unorm_compare(s1.getBuffer(), s1.length(),
788 s2.getBuffer(), s2.length(), 800 s2.getBuffer(), s2.length(),
789 options, 801 options,
790 &errorCode); 802 &errorCode);
791 } 803 }
792 804
793 U_NAMESPACE_END 805 U_NAMESPACE_END
794 806
795 #endif /* #if !UCONFIG_NO_NORMALIZATION */ 807 #endif /* #if !UCONFIG_NO_NORMALIZATION */
796 808
797 #endif // NORMLZR_H 809 #endif // NORMLZR_H
OLDNEW
« no previous file with comments | « source/common/unicode/locid.h ('k') | source/common/unicode/platform.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698