source/common/unicode/normlzr.h - Issue 1621843002: ICU 56 update step 1

Side by Side Diff: source/common/unicode/normlzr.h

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 ********************************************************************	2 ********************************************************************

3 * COPYRIGHT:	3 * COPYRIGHT:

4 * Copyright (c) 1996-2011, International Business Machines Corporation and	4 * Copyright (c) 1996-2015, International Business Machines Corporation and

5 * others. All Rights Reserved.	5 * others. All Rights Reserved.

6 ********************************************************************	6 ********************************************************************

7 */	7 */

8	8

9 #ifndef NORMLZR_H	9 #ifndef NORMLZR_H

10 #define NORMLZR_H	10 #define NORMLZR_H

11	11

12 #include "unicode/utypes.h"	12 #include "unicode/utypes.h"

13	13

14 /**	14 /**

15 * \file	15 * \file

16 * \brief C++ API: Unicode Normalization	16 * \brief C++ API: Unicode Normalization

17 */	17 */

18	18

19 #if !UCONFIG_NO_NORMALIZATION	19 #if !UCONFIG_NO_NORMALIZATION

20	20

21 #include "unicode/chariter.h"	21 #include "unicode/chariter.h"

22 #include "unicode/normalizer2.h"	22 #include "unicode/normalizer2.h"

23 #include "unicode/unistr.h"	23 #include "unicode/unistr.h"

24 #include "unicode/unorm.h"	24 #include "unicode/unorm.h"

25 #include "unicode/uobject.h"	25 #include "unicode/uobject.h"

26	26

27 U_NAMESPACE_BEGIN	27 U_NAMESPACE_BEGIN

28 /**	28 /**

	29 * Old Unicode normalization API.

	30 *

	31 * This API has been replaced by the Normalizer2 class and is only available

	32 * for backward compatibility. This class simply delegates to the Normalizer2 cl ass.

	33 * There is one exception: The new API does not provide a replacement for Normal izer::compare().

	34 *

29 * The Normalizer class supports the standard normalization forms described in	35 * The Normalizer class supports the standard normalization forms described in

30 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">	36 * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">

31 * Unicode Standard Annex #15: Unicode Normalization Forms</a>.	37 * Unicode Standard Annex #15: Unicode Normalization Forms</a>.

32 *	38 *

33 * Note: This API has been replaced by the Normalizer2 class and is only availab le

34 * for backward compatibility. This class simply delegates to the Normalizer2 cl ass.

35 * There is one exception: The new API does not provide a replacement for Normal izer::compare().

36 *

37 * The Normalizer class consists of two parts:	39 * The Normalizer class consists of two parts:

38 * - static functions that normalize strings or test if strings are normalized	40 * - static functions that normalize strings or test if strings are normalized

39 * - a Normalizer object is an iterator that takes any kind of text and	41 * - a Normalizer object is an iterator that takes any kind of text and

40 * provides iteration over its normalized form	42 * provides iteration over its normalized form

41 *	43 *

42 * The Normalizer class is not suitable for subclassing.	44 * The Normalizer class is not suitable for subclassing.

43 *	45 *

44 * For basic information about normalization forms and details about the C API	46 * For basic information about normalization forms and details about the C API

45 * please see the documentation in unorm.h.	47 * please see the documentation in unorm.h.

46 *	48 *

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
122 * This means that there is not necessarily a one-to-one correspondence	124 * This means that there is not necessarily a one-to-one correspondence

123 * between characters returned by next() and previous() and the indices	125 * between characters returned by next() and previous() and the indices

124 * passed to and returned from setIndex() and getIndex().	126 * passed to and returned from setIndex() and getIndex().

125 * It is for this reason that Normalizer does not implement the CharacterIterato r interface.	127 * It is for this reason that Normalizer does not implement the CharacterIterato r interface.

126 *	128 *

127 * @author Laura Werner, Mark Davis, Markus Scherer	129 * @author Laura Werner, Mark Davis, Markus Scherer

128 * @stable ICU 2.0	130 * @stable ICU 2.0

129 */	131 */

130 class U_COMMON_API Normalizer : public UObject {	132 class U_COMMON_API Normalizer : public UObject {

131 public:	133 public:

	134 #ifndef U_HIDE_DEPRECATED_API

132 /**	135 /**

133 * If DONE is returned from an iteration function that returns a code point,	136 * If DONE is returned from an iteration function that returns a code point,

134 * then there are no more normalization results available.	137 * then there are no more normalization results available.

135 * @stable ICU 2.0	138 * @deprecated ICU 56 Use Normalizer2 instead.

136 */	139 */

137 enum {	140 enum {

138 DONE=0xffff	141 DONE=0xffff

139 };	142 };

140	143

141 // Constructors	144 // Constructors

142	145

143 /**	146 /**

144 * Creates a new <code>Normalizer</code> object for iterating over the	147 * Creates a new <code>Normalizer</code> object for iterating over the

145 * normalized form of a given string.	148 * normalized form of a given string.

146 * <p>	149 * <p>

147 * @param str The string to be normalized. The normalization	150 * @param str The string to be normalized. The normalization

148 * will start at the beginning of the string.	151 * will start at the beginning of the string.

149 *	152 *

150 * @param mode The normalization mode.	153 * @param mode The normalization mode.

151 * @stable ICU 2.0	154 * @deprecated ICU 56 Use Normalizer2 instead.

152 */	155 */

153 Normalizer(const UnicodeString& str, UNormalizationMode mode);	156 Normalizer(const UnicodeString& str, UNormalizationMode mode);

154	157

155 /**	158 /**

156 * Creates a new <code>Normalizer</code> object for iterating over the	159 * Creates a new <code>Normalizer</code> object for iterating over the

157 * normalized form of a given string.	160 * normalized form of a given string.

158 * <p>	161 * <p>

159 * @param str The string to be normalized. The normalization	162 * @param str The string to be normalized. The normalization

160 * will start at the beginning of the string.	163 * will start at the beginning of the string.

161 *	164 *

162 * @param length Length of the string, or -1 if NUL-terminated.	165 * @param length Length of the string, or -1 if NUL-terminated.

163 * @param mode The normalization mode.	166 * @param mode The normalization mode.

164 * @stable ICU 2.0	167 * @deprecated ICU 56 Use Normalizer2 instead.

165 */	168 */

166 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);	169 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);

167	170

168 /**	171 /**

169 * Creates a new <code>Normalizer</code> object for iterating over the	172 * Creates a new <code>Normalizer</code> object for iterating over the

170 * normalized form of the given text.	173 * normalized form of the given text.

171 * <p>	174 * <p>

172 * @param iter The input text to be normalized. The normalization	175 * @param iter The input text to be normalized. The normalization

173 * will start at the beginning of the string.	176 * will start at the beginning of the string.

174 *	177 *

175 * @param mode The normalization mode.	178 * @param mode The normalization mode.

176 * @stable ICU 2.0	179 * @deprecated ICU 56 Use Normalizer2 instead.

177 */	180 */

178 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);	181 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);

179	182

180 /**	183 /**

181 * Copy constructor.	184 * Copy constructor.

182 * @param copy The object to be copied.	185 * @param copy The object to be copied.

183 * @stable ICU 2.0	186 * @deprecated ICU 56 Use Normalizer2 instead.

184 */	187 */

185 Normalizer(const Normalizer& copy);	188 Normalizer(const Normalizer& copy);

	189 #endif /* U_HIDE_DEPRECATED_API */

186	190

187 /**	191 /**

188 * Destructor	192 * Destructor

189 * @stable ICU 2.0	193 * @deprecated ICU 56 Use Normalizer2 instead.

190 */	194 */

191 virtual ~Normalizer();	195 virtual ~Normalizer();

192	196

193	197

194 //-------------------------------------------------------------------------	198 //-------------------------------------------------------------------------

195 // Static utility methods	199 // Static utility methods

196 //-------------------------------------------------------------------------	200 //-------------------------------------------------------------------------

197	201

	202 #ifndef U_HIDE_DEPRECATED_API

198 /**	203 /**

199 * Normalizes a <code>UnicodeString</code> according to the specified normaliz ation mode.	204 * Normalizes a <code>UnicodeString</code> according to the specified normaliz ation mode.

200 * This is a wrapper for unorm_normalize(), using UnicodeString's.	205 * This is a wrapper for unorm_normalize(), using UnicodeString's.

201 *	206 *

202 * The <code>options</code> parameter specifies which optional	207 * The <code>options</code> parameter specifies which optional

203 * <code>Normalizer</code> features are to be enabled for this operation.	208 * <code>Normalizer</code> features are to be enabled for this operation.

204 *	209 *

205 * @param source the input string to be normalized.	210 * @param source the input string to be normalized.

206 * @param mode the normalization mode	211 * @param mode the normalization mode

207 * @param options the optional features to be enabled (0 for no options)	212 * @param options the optional features to be enabled (0 for no options)

208 * @param result The normalized string (on output).	213 * @param result The normalized string (on output).

209 * @param status The error code.	214 * @param status The error code.

210 * @stable ICU 2.0	215 * @deprecated ICU 56 Use Normalizer2 instead.

211 */	216 */

212 static void U_EXPORT2 normalize(const UnicodeString& source,	217 static void U_EXPORT2 normalize(const UnicodeString& source,

213 UNormalizationMode mode, int32_t options,	218 UNormalizationMode mode, int32_t options,

214 UnicodeString& result,	219 UnicodeString& result,

215 UErrorCode &status);	220 UErrorCode &status);

216	221

217 /**	222 /**

218 * Compose a <code>UnicodeString</code>.	223 * Compose a <code>UnicodeString</code>.

219 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.	224 * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.

220 * This is a wrapper for unorm_normalize(), using UnicodeString's.	225 * This is a wrapper for unorm_normalize(), using UnicodeString's.

221 *	226 *

222 * The <code>options</code> parameter specifies which optional	227 * The <code>options</code> parameter specifies which optional

223 * <code>Normalizer</code> features are to be enabled for this operation.	228 * <code>Normalizer</code> features are to be enabled for this operation.

224 *	229 *

225 * @param source the string to be composed.	230 * @param source the string to be composed.

226 * @param compat Perform compatibility decomposition before composition.	231 * @param compat Perform compatibility decomposition before composition.

227 * If this argument is <code>FALSE</code>, only canonical	232 * If this argument is <code>FALSE</code>, only canonical

228 * decomposition will be performed.	233 * decomposition will be performed.

229 * @param options the optional features to be enabled (0 for no options)	234 * @param options the optional features to be enabled (0 for no options)

230 * @param result The composed string (on output).	235 * @param result The composed string (on output).

231 * @param status The error code.	236 * @param status The error code.

232 * @stable ICU 2.0	237 * @deprecated ICU 56 Use Normalizer2 instead.

233 */	238 */

234 static void U_EXPORT2 compose(const UnicodeString& source,	239 static void U_EXPORT2 compose(const UnicodeString& source,

235 UBool compat, int32_t options,	240 UBool compat, int32_t options,

236 UnicodeString& result,	241 UnicodeString& result,

237 UErrorCode &status);	242 UErrorCode &status);

238	243

239 /**	244 /**

240 * Static method to decompose a <code>UnicodeString</code>.	245 * Static method to decompose a <code>UnicodeString</code>.

241 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.	246 * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.

242 * This is a wrapper for unorm_normalize(), using UnicodeString's.	247 * This is a wrapper for unorm_normalize(), using UnicodeString's.

243 *	248 *

244 * The <code>options</code> parameter specifies which optional	249 * The <code>options</code> parameter specifies which optional

245 * <code>Normalizer</code> features are to be enabled for this operation.	250 * <code>Normalizer</code> features are to be enabled for this operation.

246 *	251 *

247 * @param source the string to be decomposed.	252 * @param source the string to be decomposed.

248 * @param compat Perform compatibility decomposition.	253 * @param compat Perform compatibility decomposition.

249 * If this argument is <code>FALSE</code>, only canonical	254 * If this argument is <code>FALSE</code>, only canonical

250 * decomposition will be performed.	255 * decomposition will be performed.

251 * @param options the optional features to be enabled (0 for no options)	256 * @param options the optional features to be enabled (0 for no options)

252 * @param result The decomposed string (on output).	257 * @param result The decomposed string (on output).

253 * @param status The error code.	258 * @param status The error code.

254 * @stable ICU 2.0	259 * @deprecated ICU 56 Use Normalizer2 instead.

255 */	260 */

256 static void U_EXPORT2 decompose(const UnicodeString& source,	261 static void U_EXPORT2 decompose(const UnicodeString& source,

257 UBool compat, int32_t options,	262 UBool compat, int32_t options,

258 UnicodeString& result,	263 UnicodeString& result,

259 UErrorCode &status);	264 UErrorCode &status);

260	265

261 /**	266 /**

262 * Performing quick check on a string, to quickly determine if the string is	267 * Performing quick check on a string, to quickly determine if the string is

263 * in a particular normalization format.	268 * in a particular normalization format.

264 * This is a wrapper for unorm_quickCheck(), using a UnicodeString.	269 * This is a wrapper for unorm_quickCheck(), using a UnicodeString.

265 *	270 *

266 * Three types of result can be returned UNORM_YES, UNORM_NO or	271 * Three types of result can be returned UNORM_YES, UNORM_NO or

267 * UNORM_MAYBE. Result UNORM_YES indicates that the argument	272 * UNORM_MAYBE. Result UNORM_YES indicates that the argument

268 * string is in the desired normalized format, UNORM_NO determines that	273 * string is in the desired normalized format, UNORM_NO determines that

269 * argument string is not in the desired normalized format. A	274 * argument string is not in the desired normalized format. A

270 * UNORM_MAYBE result indicates that a more thorough check is required,	275 * UNORM_MAYBE result indicates that a more thorough check is required,

271 * the user may have to put the string in its normalized form and compare the	276 * the user may have to put the string in its normalized form and compare the

272 * results.	277 * results.

273 * @param source string for determining if it is in a normalized format	278 * @param source string for determining if it is in a normalized format

274 * @param mode normalization format	279 * @param mode normalization format

275 * @param status A reference to a UErrorCode to receive any errors	280 * @param status A reference to a UErrorCode to receive any errors

276 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE	281 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE

277 *	282 *

278 * @see isNormalized	283 * @see isNormalized

279 * @stable ICU 2.0	284 * @deprecated ICU 56 Use Normalizer2 instead.

280 */	285 */

281 static inline UNormalizationCheckResult	286 static inline UNormalizationCheckResult

282 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &s tatus);	287 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &s tatus);

283	288

284 /**	289 /**

285 * Performing quick check on a string; same as the other version of quickCheck	290 * Performing quick check on a string; same as the other version of quickCheck

286 * but takes an extra options parameter like most normalization functions.	291 * but takes an extra options parameter like most normalization functions.

287 *	292 *

288 * @param source string for determining if it is in a normalized format	293 * @param source string for determining if it is in a normalized format

289 * @param mode normalization format	294 * @param mode normalization format

290 * @param options the optional features to be enabled (0 for no options)	295 * @param options the optional features to be enabled (0 for no options)

291 * @param status A reference to a UErrorCode to receive any errors	296 * @param status A reference to a UErrorCode to receive any errors

292 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE	297 * @return UNORM_YES, UNORM_NO or UNORM_MAYBE

293 *	298 *

294 * @see isNormalized	299 * @see isNormalized

295 * @stable ICU 2.6	300 * @deprecated ICU 56 Use Normalizer2 instead.

296 */	301 */

297 static UNormalizationCheckResult	302 static UNormalizationCheckResult

298 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t optio ns, UErrorCode &status);	303 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t optio ns, UErrorCode &status);

299	304

300 /**	305 /**

301 * Test if a string is in a given normalization form.	306 * Test if a string is in a given normalization form.

302 * This is semantically equivalent to source.equals(normalize(source, mode)) .	307 * This is semantically equivalent to source.equals(normalize(source, mode)) .

303 *	308 *

304 * Unlike unorm_quickCheck(), this function returns a definitive result,	309 * Unlike unorm_quickCheck(), this function returns a definitive result,

305 * never a "maybe".	310 * never a "maybe".

306 * For NFD, NFKD, and FCD, both functions work exactly the same.	311 * For NFD, NFKD, and FCD, both functions work exactly the same.

307 * For NFC and NFKC where quickCheck may return "maybe", this function will	312 * For NFC and NFKC where quickCheck may return "maybe", this function will

308 * perform further tests to arrive at a TRUE/FALSE result.	313 * perform further tests to arrive at a TRUE/FALSE result.

309 *	314 *

310 * @param src String that is to be tested if it is in a normalization f ormat.	315 * @param src String that is to be tested if it is in a normalization f ormat.

311 * @param mode Which normalization form to test for.	316 * @param mode Which normalization form to test for.

312 * @param errorCode ICU error code in/out parameter.	317 * @param errorCode ICU error code in/out parameter.

313 * Must fulfill U_SUCCESS before the function call.	318 * Must fulfill U_SUCCESS before the function call.

314 * @return Boolean value indicating whether the source string is in the	319 * @return Boolean value indicating whether the source string is in the

315 * "mode" normalization form.	320 * "mode" normalization form.

316 *	321 *

317 * @see quickCheck	322 * @see quickCheck

318 * @stable ICU 2.2	323 * @deprecated ICU 56 Use Normalizer2 instead.

319 */	324 */

320 static inline UBool	325 static inline UBool

321 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &er rorCode);	326 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &er rorCode);

322	327

323 /**	328 /**

324 * Test if a string is in a given normalization form; same as the other versio n of isNormalized	329 * Test if a string is in a given normalization form; same as the other versio n of isNormalized

325 * but takes an extra options parameter like most normalization functions.	330 * but takes an extra options parameter like most normalization functions.

326 *	331 *

327 * @param src String that is to be tested if it is in a normalization f ormat.	332 * @param src String that is to be tested if it is in a normalization f ormat.

328 * @param mode Which normalization form to test for.	333 * @param mode Which normalization form to test for.

329 * @param options the optional features to be enabled (0 for no options)	334 * @param options the optional features to be enabled (0 for no options)

330 * @param errorCode ICU error code in/out parameter.	335 * @param errorCode ICU error code in/out parameter.

331 * Must fulfill U_SUCCESS before the function call.	336 * Must fulfill U_SUCCESS before the function call.

332 * @return Boolean value indicating whether the source string is in the	337 * @return Boolean value indicating whether the source string is in the

333 * "mode" normalization form.	338 * "mode" normalization form.

334 *	339 *

335 * @see quickCheck	340 * @see quickCheck

336 * @stable ICU 2.6	341 * @deprecated ICU 56 Use Normalizer2 instead.

337 */	342 */

338 static UBool	343 static UBool

339 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t option s, UErrorCode &errorCode);	344 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t option s, UErrorCode &errorCode);

340	345

341 /**	346 /**

342 * Concatenate normalized strings, making sure that the result is normalized a s well.	347 * Concatenate normalized strings, making sure that the result is normalized a s well.

343 *	348 *

344 * If both the left and the right strings are in	349 * If both the left and the right strings are in

345 * the normalization form according to "mode/options",	350 * the normalization form according to "mode/options",

346 * then the result will be	351 * then the result will be

(...skipping 11 matching lines...) Expand all Loading...
358 * @param options A bit set of normalization options.	363 * @param options A bit set of normalization options.

359 * @param errorCode ICU error code in/out parameter.	364 * @param errorCode ICU error code in/out parameter.

360 * Must fulfill U_SUCCESS before the function call.	365 * Must fulfill U_SUCCESS before the function call.

361 * @return result	366 * @return result

362 *	367 *

363 * @see unorm_concatenate	368 * @see unorm_concatenate

364 * @see normalize	369 * @see normalize

365 * @see unorm_next	370 * @see unorm_next

366 * @see unorm_previous	371 * @see unorm_previous

367 *	372 *

368 * @stable ICU 2.1	373 * @deprecated ICU 56 Use Normalizer2 instead.

369 */	374 */

370 static UnicodeString &	375 static UnicodeString &

371 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,	376 U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,

372 UnicodeString &result,	377 UnicodeString &result,

373 UNormalizationMode mode, int32_t options,	378 UNormalizationMode mode, int32_t options,

374 UErrorCode &errorCode);	379 UErrorCode &errorCode);

	380 #endif /* U_HIDE_DEPRECATED_API */

375	381

376 /**	382 /**

377 * Compare two strings for canonical equivalence.	383 * Compare two strings for canonical equivalence.

378 * Further options include case-insensitive comparison and	384 * Further options include case-insensitive comparison and

379 * code point order (as opposed to code unit order).	385 * code point order (as opposed to code unit order).

380 *	386 *

381 * Canonical equivalence between two strings is defined as their normalized	387 * Canonical equivalence between two strings is defined as their normalized

382 * forms (NFD or NFC) being identical.	388 * forms (NFD or NFC) being identical.

383 * This function compares strings incrementally instead of normalizing	389 * This function compares strings incrementally instead of normalizing

384 * (and optionally case-folding) both strings entirely,	390 * (and optionally case-folding) both strings entirely,

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
435 * @see u_strCompare	441 * @see u_strCompare

436 * @see u_strCaseCompare	442 * @see u_strCaseCompare

437 *	443 *

438 * @stable ICU 2.2	444 * @stable ICU 2.2

439 */	445 */

440 static inline int32_t	446 static inline int32_t

441 compare(const UnicodeString &s1, const UnicodeString &s2,	447 compare(const UnicodeString &s1, const UnicodeString &s2,

442 uint32_t options,	448 uint32_t options,

443 UErrorCode &errorCode);	449 UErrorCode &errorCode);

444	450

	451 #ifndef U_HIDE_DEPRECATED_API

445 //-------------------------------------------------------------------------	452 //-------------------------------------------------------------------------

446 // Iteration API	453 // Iteration API

447 //-------------------------------------------------------------------------	454 //-------------------------------------------------------------------------

448	455

449 /**	456 /**

450 * Return the current character in the normalized text.	457 * Return the current character in the normalized text.

451 * current() may need to normalize some text at getIndex().	458 * current() may need to normalize some text at getIndex().

452 * The getIndex() is not changed.	459 * The getIndex() is not changed.

453 *	460 *

454 * @return the current normalized code point	461 * @return the current normalized code point

455 * @stable ICU 2.0	462 * @deprecated ICU 56 Use Normalizer2 instead.

456 */	463 */

457 UChar32 current(void);	464 UChar32 current(void);

458	465

459 /**	466 /**

460 * Return the first character in the normalized text.	467 * Return the first character in the normalized text.

461 * This is equivalent to setIndexOnly(startIndex()) followed by next().	468 * This is equivalent to setIndexOnly(startIndex()) followed by next().

462 * (Post-increment semantics.)	469 * (Post-increment semantics.)

463 *	470 *

464 * @return the first normalized code point	471 * @return the first normalized code point

465 * @stable ICU 2.0	472 * @deprecated ICU 56 Use Normalizer2 instead.

466 */	473 */

467 UChar32 first(void);	474 UChar32 first(void);

468	475

469 /**	476 /**

470 * Return the last character in the normalized text.	477 * Return the last character in the normalized text.

471 * This is equivalent to setIndexOnly(endIndex()) followed by previous().	478 * This is equivalent to setIndexOnly(endIndex()) followed by previous().

472 * (Pre-decrement semantics.)	479 * (Pre-decrement semantics.)

473 *	480 *

474 * @return the last normalized code point	481 * @return the last normalized code point

475 * @stable ICU 2.0	482 * @deprecated ICU 56 Use Normalizer2 instead.

476 */	483 */

477 UChar32 last(void);	484 UChar32 last(void);

478	485

479 /**	486 /**

480 * Return the next character in the normalized text.	487 * Return the next character in the normalized text.

481 * (Post-increment semantics.)	488 * (Post-increment semantics.)

482 * If the end of the text has already been reached, DONE is returned.	489 * If the end of the text has already been reached, DONE is returned.

483 * The DONE value could be confused with a U+FFFF non-character code point	490 * The DONE value could be confused with a U+FFFF non-character code point

484 * in the text. If this is possible, you can test getIndex()<endIndex()	491 * in the text. If this is possible, you can test getIndex()<endIndex()

485 * before calling next(), or (getIndex()<endIndex() \|\| last()!=DONE)	492 * before calling next(), or (getIndex()<endIndex() \|\| last()!=DONE)

486 * after calling next(). (Calling last() will change the iterator state!)	493 * after calling next(). (Calling last() will change the iterator state!)

487 *	494 *

488 * The C API unorm_next() is more efficient and does not have this ambiguity.	495 * The C API unorm_next() is more efficient and does not have this ambiguity.

489 *	496 *

490 * @return the next normalized code point	497 * @return the next normalized code point

491 * @stable ICU 2.0	498 * @deprecated ICU 56 Use Normalizer2 instead.

492 */	499 */

493 UChar32 next(void);	500 UChar32 next(void);

494	501

495 /**	502 /**

496 * Return the previous character in the normalized text and decrement.	503 * Return the previous character in the normalized text and decrement.

497 * (Pre-decrement semantics.)	504 * (Pre-decrement semantics.)

498 * If the beginning of the text has already been reached, DONE is returned.	505 * If the beginning of the text has already been reached, DONE is returned.

499 * The DONE value could be confused with a U+FFFF non-character code point	506 * The DONE value could be confused with a U+FFFF non-character code point

500 * in the text. If this is possible, you can test	507 * in the text. If this is possible, you can test

501 * (getIndex()>startIndex() \|\| first()!=DONE). (Calling first() will change	508 * (getIndex()>startIndex() \|\| first()!=DONE). (Calling first() will change

502 * the iterator state!)	509 * the iterator state!)

503 *	510 *

504 * The C API unorm_previous() is more efficient and does not have this ambigui ty.	511 * The C API unorm_previous() is more efficient and does not have this ambigui ty.

505 *	512 *

506 * @return the previous normalized code point	513 * @return the previous normalized code point

507 * @stable ICU 2.0	514 * @deprecated ICU 56 Use Normalizer2 instead.

508 */	515 */

509 UChar32 previous(void);	516 UChar32 previous(void);

510	517

511 /**	518 /**

512 * Set the iteration position in the input text that is being normalized,	519 * Set the iteration position in the input text that is being normalized,

513 * without any immediate normalization.	520 * without any immediate normalization.

514 * After setIndexOnly(), getIndex() will return the same index that is	521 * After setIndexOnly(), getIndex() will return the same index that is

515 * specified here.	522 * specified here.

516 *	523 *

517 * @param index the desired index in the input text.	524 * @param index the desired index in the input text.

518 * @stable ICU 2.0	525 * @deprecated ICU 56 Use Normalizer2 instead.

519 */	526 */

520 void setIndexOnly(int32_t index);	527 void setIndexOnly(int32_t index);

521	528

522 /**	529 /**

523 * Reset the index to the beginning of the text.	530 * Reset the index to the beginning of the text.

524 * This is equivalent to setIndexOnly(startIndex)).	531 * This is equivalent to setIndexOnly(startIndex)).

525 * @stable ICU 2.0	532 * @deprecated ICU 56 Use Normalizer2 instead.

526 */	533 */

527 void reset(void);	534 void reset(void);

528	535

529 /**	536 /**

530 * Retrieve the current iteration position in the input text that is	537 * Retrieve the current iteration position in the input text that is

531 * being normalized.	538 * being normalized.

532 *	539 *

533 * A following call to next() will return a normalized code point from	540 * A following call to next() will return a normalized code point from

534 * the input text at or after this index.	541 * the input text at or after this index.

535 *	542 *

536 * After a call to previous(), getIndex() will point at or before the	543 * After a call to previous(), getIndex() will point at or before the

537 * position in the input text where the normalized code point	544 * position in the input text where the normalized code point

538 * was returned from with previous().	545 * was returned from with previous().

539 *	546 *

540 * @return the current index in the input text	547 * @return the current index in the input text

541 * @stable ICU 2.0	548 * @deprecated ICU 56 Use Normalizer2 instead.

542 */	549 */

543 int32_t getIndex(void) const;	550 int32_t getIndex(void) const;

544	551

545 /**	552 /**

546 * Retrieve the index of the start of the input text. This is the begin index	553 * Retrieve the index of the start of the input text. This is the begin index

547 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the st ring	554 * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the st ring

548 * over which this <code>Normalizer</code> is iterating.	555 * over which this <code>Normalizer</code> is iterating.

549 *	556 *

550 * @return the smallest index in the input text where the Normalizer operates	557 * @return the smallest index in the input text where the Normalizer operates

551 * @stable ICU 2.0	558 * @deprecated ICU 56 Use Normalizer2 instead.

552 */	559 */

553 int32_t startIndex(void) const;	560 int32_t startIndex(void) const;

554	561

555 /**	562 /**

556 * Retrieve the index of the end of the input text. This is the end index	563 * Retrieve the index of the end of the input text. This is the end index

557 * of the <code>CharacterIterator</code> or the length of the string	564 * of the <code>CharacterIterator</code> or the length of the string

558 * over which this <code>Normalizer</code> is iterating.	565 * over which this <code>Normalizer</code> is iterating.

559 * This end index is exclusive, i.e., the Normalizer operates only on characte rs	566 * This end index is exclusive, i.e., the Normalizer operates only on characte rs

560 * before this index.	567 * before this index.

561 *	568 *

562 * @return the first index in the input text where the Normalizer does not ope rate	569 * @return the first index in the input text where the Normalizer does not ope rate

563 * @stable ICU 2.0	570 * @deprecated ICU 56 Use Normalizer2 instead.

564 */	571 */

565 int32_t endIndex(void) const;	572 int32_t endIndex(void) const;

566	573

567 /**	574 /**

568 * Returns TRUE when both iterators refer to the same character in the same	575 * Returns TRUE when both iterators refer to the same character in the same

569 * input text.	576 * input text.

570 *	577 *

571 * @param that a Normalizer object to compare this one to	578 * @param that a Normalizer object to compare this one to

572 * @return comparison result	579 * @return comparison result

573 * @stable ICU 2.0	580 * @deprecated ICU 56 Use Normalizer2 instead.

574 */	581 */

575 UBool operator==(const Normalizer& that) const;	582 UBool operator==(const Normalizer& that) const;

576	583

577 /**	584 /**

578 * Returns FALSE when both iterators refer to the same character in the same	585 * Returns FALSE when both iterators refer to the same character in the same

579 * input text.	586 * input text.

580 *	587 *

581 * @param that a Normalizer object to compare this one to	588 * @param that a Normalizer object to compare this one to

582 * @return comparison result	589 * @return comparison result

583 * @stable ICU 2.0	590 * @deprecated ICU 56 Use Normalizer2 instead.

584 */	591 */

585 inline UBool operator!=(const Normalizer& that) const;	592 inline UBool operator!=(const Normalizer& that) const;

586	593

587 /**	594 /**

588 * Returns a pointer to a new Normalizer that is a clone of this one.	595 * Returns a pointer to a new Normalizer that is a clone of this one.

589 * The caller is responsible for deleting the new clone.	596 * The caller is responsible for deleting the new clone.

590 * @return a pointer to a new Normalizer	597 * @return a pointer to a new Normalizer

591 * @stable ICU 2.0	598 * @deprecated ICU 56 Use Normalizer2 instead.

592 */	599 */

593 Normalizer* clone(void) const;	600 Normalizer* clone(void) const;

594	601

595 /**	602 /**

596 * Generates a hash code for this iterator.	603 * Generates a hash code for this iterator.

597 *	604 *

598 * @return the hash code	605 * @return the hash code

599 * @stable ICU 2.0	606 * @deprecated ICU 56 Use Normalizer2 instead.

600 */	607 */

601 int32_t hashCode(void) const;	608 int32_t hashCode(void) const;

602	609

603 //-------------------------------------------------------------------------	610 //-------------------------------------------------------------------------

604 // Property access methods	611 // Property access methods

605 //-------------------------------------------------------------------------	612 //-------------------------------------------------------------------------

606	613

607 /**	614 /**

608 * Set the normalization mode for this object.	615 * Set the normalization mode for this object.

609 * <p>	616 * <p>

610 * <b>Note:</b>If the normalization mode is changed while iterating	617 * <b>Note:</b>If the normalization mode is changed while iterating

611 * over a string, calls to {@link #next() } and {@link #previous() } may	618 * over a string, calls to {@link #next() } and {@link #previous() } may

612 * return previously buffers characters in the old normalization mode	619 * return previously buffers characters in the old normalization mode

613 * until the iteration is able to re-sync at the next base character.	620 * until the iteration is able to re-sync at the next base character.

614 * It is safest to call {@link #setIndexOnly }, {@link #reset() },	621 * It is safest to call {@link #setIndexOnly }, {@link #reset() },

615 * {@link #setText }, {@link #first() },	622 * {@link #setText }, {@link #first() },

616 * {@link #last() }, etc. after calling <code>setMode</code>.	623 * {@link #last() }, etc. after calling <code>setMode</code>.

617 * <p>	624 * <p>

618 * @param newMode the new mode for this <code>Normalizer</code>.	625 * @param newMode the new mode for this <code>Normalizer</code>.

619 * @see #getUMode	626 * @see #getUMode

620 * @stable ICU 2.0	627 * @deprecated ICU 56 Use Normalizer2 instead.

621 */	628 */

622 void setMode(UNormalizationMode newMode);	629 void setMode(UNormalizationMode newMode);

623	630

624 /**	631 /**

625 * Return the normalization mode for this object.	632 * Return the normalization mode for this object.

626 *	633 *

627 * This is an unusual name because there used to be a getMode() that	634 * This is an unusual name because there used to be a getMode() that

628 * returned a different type.	635 * returned a different type.

629 *	636 *

630 * @return the mode for this <code>Normalizer</code>	637 * @return the mode for this <code>Normalizer</code>

631 * @see #setMode	638 * @see #setMode

632 * @stable ICU 2.0	639 * @deprecated ICU 56 Use Normalizer2 instead.

633 */	640 */

634 UNormalizationMode getUMode(void) const;	641 UNormalizationMode getUMode(void) const;

635	642

636 /**	643 /**

637 * Set options that affect this <code>Normalizer</code>'s operation.	644 * Set options that affect this <code>Normalizer</code>'s operation.

638 * Options do not change the basic composition or decomposition operation	645 * Options do not change the basic composition or decomposition operation

639 * that is being performed, but they control whether	646 * that is being performed, but they control whether

640 * certain optional portions of the operation are done.	647 * certain optional portions of the operation are done.

641 * Currently the only available option is obsolete.	648 * Currently the only available option is obsolete.

642 *	649 *

643 * It is possible to specify multiple options that are all turned on or off.	650 * It is possible to specify multiple options that are all turned on or off.

644 *	651 *

645 * @param option the option(s) whose value is/are to be set.	652 * @param option the option(s) whose value is/are to be set.

646 * @param value the new setting for the option. Use <code>TRUE</code> to	653 * @param value the new setting for the option. Use <code>TRUE</code> to

647 * turn the option(s) on and <code>FALSE</code> to turn it/th em off.	654 * turn the option(s) on and <code>FALSE</code> to turn it/th em off.

648 *	655 *

649 * @see #getOption	656 * @see #getOption

650 * @stable ICU 2.0	657 * @deprecated ICU 56 Use Normalizer2 instead.

651 */	658 */

652 void setOption(int32_t option,	659 void setOption(int32_t option,

653 UBool value);	660 UBool value);

654	661

655 /**	662 /**

656 * Determine whether an option is turned on or off.	663 * Determine whether an option is turned on or off.

657 * If multiple options are specified, then the result is TRUE if any	664 * If multiple options are specified, then the result is TRUE if any

658 * of them are set.	665 * of them are set.

659 * <p>	666 * <p>

660 * @param option the option(s) that are to be checked	667 * @param option the option(s) that are to be checked

661 * @return TRUE if any of the option(s) are set	668 * @return TRUE if any of the option(s) are set

662 * @see #setOption	669 * @see #setOption

663 * @stable ICU 2.0	670 * @deprecated ICU 56 Use Normalizer2 instead.

664 */	671 */

665 UBool getOption(int32_t option) const;	672 UBool getOption(int32_t option) const;

666	673

667 /**	674 /**

668 * Set the input text over which this <code>Normalizer</code> will iterate.	675 * Set the input text over which this <code>Normalizer</code> will iterate.

669 * The iteration position is set to the beginning.	676 * The iteration position is set to the beginning.

670 *	677 *

671 * @param newText a string that replaces the current input text	678 * @param newText a string that replaces the current input text

672 * @param status a UErrorCode	679 * @param status a UErrorCode

673 * @stable ICU 2.0	680 * @deprecated ICU 56 Use Normalizer2 instead.

674 */	681 */

675 void setText(const UnicodeString& newText,	682 void setText(const UnicodeString& newText,

676 UErrorCode &status);	683 UErrorCode &status);

677	684

678 /**	685 /**

679 * Set the input text over which this <code>Normalizer</code> will iterate.	686 * Set the input text over which this <code>Normalizer</code> will iterate.

680 * The iteration position is set to the beginning.	687 * The iteration position is set to the beginning.

681 *	688 *

682 * @param newText a CharacterIterator object that replaces the current input t ext	689 * @param newText a CharacterIterator object that replaces the current input t ext

683 * @param status a UErrorCode	690 * @param status a UErrorCode

684 * @stable ICU 2.0	691 * @deprecated ICU 56 Use Normalizer2 instead.

685 */	692 */

686 void setText(const CharacterIterator& newText,	693 void setText(const CharacterIterator& newText,

687 UErrorCode &status);	694 UErrorCode &status);

688	695

689 /**	696 /**

690 * Set the input text over which this <code>Normalizer</code> will iterate.	697 * Set the input text over which this <code>Normalizer</code> will iterate.

691 * The iteration position is set to the beginning.	698 * The iteration position is set to the beginning.

692 *	699 *

693 * @param newText a string that replaces the current input text	700 * @param newText a string that replaces the current input text

694 * @param length the length of the string, or -1 if NUL-terminated	701 * @param length the length of the string, or -1 if NUL-terminated

695 * @param status a UErrorCode	702 * @param status a UErrorCode

696 * @stable ICU 2.0	703 * @deprecated ICU 56 Use Normalizer2 instead.

697 */	704 */

698 void setText(const UChar* newText,	705 void setText(const UChar* newText,

699 int32_t length,	706 int32_t length,

700 UErrorCode &status);	707 UErrorCode &status);

701 /**	708 /**

702 * Copies the input text into the UnicodeString argument.	709 * Copies the input text into the UnicodeString argument.

703 *	710 *

704 * @param result Receives a copy of the text under iteration.	711 * @param result Receives a copy of the text under iteration.

705 * @stable ICU 2.0	712 * @deprecated ICU 56 Use Normalizer2 instead.

706 */	713 */

707 void getText(UnicodeString& result);	714 void getText(UnicodeString& result);

708	715

709 /**	716 /**

710 * ICU "poor man's RTTI", returns a UClassID for this class.	717 * ICU "poor man's RTTI", returns a UClassID for this class.

711 * @returns a UClassID for this class.	718 * @returns a UClassID for this class.

712 * @stable ICU 2.2	719 * @deprecated ICU 56 Use Normalizer2 instead.

713 */	720 */

714 static UClassID U_EXPORT2 getStaticClassID();	721 static UClassID U_EXPORT2 getStaticClassID();

	722 #endif /* U_HIDE_DEPRECATED_API */

715	723

716 /**	724 /**

717 * ICU "poor man's RTTI", returns a UClassID for the actual class.	725 * ICU "poor man's RTTI", returns a UClassID for the actual class.

718 * @return a UClassID for the actual class.	726 * @return a UClassID for the actual class.

719 * @stable ICU 2.2	727 * @deprecated ICU 56 Use Normalizer2 instead.

720 */	728 */

721 virtual UClassID getDynamicClassID() const;	729 virtual UClassID getDynamicClassID() const;

722	730

723 private:	731 private:

724 //-------------------------------------------------------------------------	732 //-------------------------------------------------------------------------

725 // Private functions	733 // Private functions

726 //-------------------------------------------------------------------------	734 //-------------------------------------------------------------------------

727	735

728 Normalizer(); // default constructor not implemented	736 Normalizer(); // default constructor not implemented

729 Normalizer &operator=(const Normalizer &that); // assignment operator not impl emented	737 Normalizer &operator=(const Normalizer &that); // assignment operator not impl emented

730	738

731 // Private utility methods for iteration	739 // Private utility methods for iteration

732 // For documentation, see the source code	740 // For documentation, see the source code

733 UBool nextNormalize();	741 UBool nextNormalize();

734 UBool previousNormalize();	742 UBool previousNormalize();

735	743

736 void init();	744 void init();

737 void clearBuffer(void);	745 void clearBuffer(void);

738	746

739 //-------------------------------------------------------------------------	747 //-------------------------------------------------------------------------

740 // Private data	748 // Private data

741 //-------------------------------------------------------------------------	749 //-------------------------------------------------------------------------

742	750

743 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL	751 FilteredNormalizer2*fFilteredNorm2; // owned if not NULL

744 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2	752 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2

	753 #ifndef U_HIDE_DEPRECATED_API

745 UNormalizationMode fUMode;	754 UNormalizationMode fUMode;

	755 #endif /* U_HIDE_DEPRECATED_API */

746 int32_t fOptions;	756 int32_t fOptions;

747	757

748 // The input text and our position in it	758 // The input text and our position in it

749 CharacterIterator *text;	759 CharacterIterator *text;

750	760

751 // The normalization buffer is the result of normalization	761 // The normalization buffer is the result of normalization

752 // of the source in [currentIndex..nextIndex[ .	762 // of the source in [currentIndex..nextIndex[ .

753 int32_t currentIndex, nextIndex;	763 int32_t currentIndex, nextIndex;

754	764

755 // A buffer for holding intermediate results	765 // A buffer for holding intermediate results

756 UnicodeString buffer;	766 UnicodeString buffer;

757 int32_t bufferPos;	767 int32_t bufferPos;

758 };	768 };

759	769

760 //-------------------------------------------------------------------------	770 //-------------------------------------------------------------------------

761 // Inline implementations	771 // Inline implementations

762 //-------------------------------------------------------------------------	772 //-------------------------------------------------------------------------

763	773

	774 #ifndef U_HIDE_DEPRECATED_API

764 inline UBool	775 inline UBool

765 Normalizer::operator!= (const Normalizer& other) const	776 Normalizer::operator!= (const Normalizer& other) const

766 { return ! operator==(other); }	777 { return ! operator==(other); }

767	778

768 inline UNormalizationCheckResult	779 inline UNormalizationCheckResult

769 Normalizer::quickCheck(const UnicodeString& source,	780 Normalizer::quickCheck(const UnicodeString& source,

770 UNormalizationMode mode,	781 UNormalizationMode mode,

771 UErrorCode &status) {	782 UErrorCode &status) {

772 return quickCheck(source, mode, 0, status);	783 return quickCheck(source, mode, 0, status);

773 }	784 }

774	785

775 inline UBool	786 inline UBool

776 Normalizer::isNormalized(const UnicodeString& source,	787 Normalizer::isNormalized(const UnicodeString& source,

777 UNormalizationMode mode,	788 UNormalizationMode mode,

778 UErrorCode &status) {	789 UErrorCode &status) {

779 return isNormalized(source, mode, 0, status);	790 return isNormalized(source, mode, 0, status);

780 }	791 }

	792 #endif /* U_HIDE_DEPRECATED_API */

781	793

782 inline int32_t	794 inline int32_t

783 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,	795 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,

784 uint32_t options,	796 uint32_t options,

785 UErrorCode &errorCode) {	797 UErrorCode &errorCode) {

786 // all argument checking is done in unorm_compare	798 // all argument checking is done in unorm_compare

787 return unorm_compare(s1.getBuffer(), s1.length(),	799 return unorm_compare(s1.getBuffer(), s1.length(),

788 s2.getBuffer(), s2.length(),	800 s2.getBuffer(), s2.length(),

789 options,	801 options,

790 &errorCode);	802 &errorCode);

791 }	803 }

792	804

793 U_NAMESPACE_END	805 U_NAMESPACE_END

794	806

795 #endif /* #if !UCONFIG_NO_NORMALIZATION */	807 #endif /* #if !UCONFIG_NO_NORMALIZATION */

796	808

797 #endif // NORMLZR_H	809 #endif // NORMLZR_H

OLD	NEW

« no previous file with comments | « source/common/unicode/locid.h ('k') | source/common/unicode/platform.h » ('j') | no next file with comments »