source/common/normalizer2impl.h - Issue 845603002: Update ICU to 54.1 step 1

Side by Side Diff: source/common/normalizer2impl.h

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master

Patch Set: remove unusued directories Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 *******************************************************************************	2 *******************************************************************************

3 *	3 *

4 * Copyright (C) 2009-2013, International Business Machines	4 * Copyright (C) 2009-2014, International Business Machines

5 * Corporation and others. All Rights Reserved.	5 * Corporation and others. All Rights Reserved.

6 *	6 *

7 *******************************************************************************	7 *******************************************************************************

8 * file name: normalizer2impl.h	8 * file name: normalizer2impl.h

9 * encoding: US-ASCII	9 * encoding: US-ASCII

10 * tab size: 8 (not used)	10 * tab size: 8 (not used)

11 * indentation:4	11 * indentation:4

12 *	12 *

13 * created on: 2009nov22	13 * created on: 2009nov22

14 * created by: Markus W. Scherer	14 * created by: Markus W. Scherer

15 */	15 */

16	16

17 #ifndef __NORMALIZER2IMPL_H__	17 #ifndef __NORMALIZER2IMPL_H__

18 #define __NORMALIZER2IMPL_H__	18 #define __NORMALIZER2IMPL_H__

19	19

20 #include "unicode/utypes.h"	20 #include "unicode/utypes.h"

21	21

22 #if !UCONFIG_NO_NORMALIZATION	22 #if !UCONFIG_NO_NORMALIZATION

23	23

24 #include "unicode/normalizer2.h"	24 #include "unicode/normalizer2.h"

25 #include "unicode/udata.h"

26 #include "unicode/unistr.h"	25 #include "unicode/unistr.h"

27 #include "unicode/unorm.h"	26 #include "unicode/unorm.h"

28 #include "unicode/utf16.h"	27 #include "unicode/utf16.h"

29 #include "mutex.h"	28 #include "mutex.h"

30 #include "uset_imp.h"	29 #include "uset_imp.h"

31 #include "utrie2.h"	30 #include "utrie2.h"

32	31

33 U_NAMESPACE_BEGIN	32 U_NAMESPACE_BEGIN

34	33

35 struct CanonIterData;	34 struct CanonIterData;

36	35

37 class Hangul {	36 class U_COMMON_API Hangul {

38 public:	37 public:

39 /* Korean Hangul and Jamo constants */	38 /* Korean Hangul and Jamo constants */

40 enum {	39 enum {

41 JAMO_L_BASE=0x1100, /* "lead" jamo */	40 JAMO_L_BASE=0x1100, /* "lead" jamo */

	41 JAMO_L_END=0x1112,

42 JAMO_V_BASE=0x1161, /* "vowel" jamo */	42 JAMO_V_BASE=0x1161, /* "vowel" jamo */

	43 JAMO_V_END=0x1175,

43 JAMO_T_BASE=0x11a7, /* "trail" jamo */	44 JAMO_T_BASE=0x11a7, /* "trail" jamo */

	45 JAMO_T_END=0x11c2,

44	46

45 HANGUL_BASE=0xac00,	47 HANGUL_BASE=0xac00,

	48 HANGUL_END=0xd7a3,

46	49

47 JAMO_L_COUNT=19,	50 JAMO_L_COUNT=19,

48 JAMO_V_COUNT=21,	51 JAMO_V_COUNT=21,

49 JAMO_T_COUNT=28,	52 JAMO_T_COUNT=28,

50	53

51 JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,	54 JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,

52	55

53 HANGUL_COUNT=JAMO_L_COUNTJAMO_V_COUNTJAMO_T_COUNT,	56 HANGUL_COUNT=JAMO_L_COUNTJAMO_V_COUNTJAMO_T_COUNT,

54 HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT	57 HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT

55 };	58 };

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
103 buffer[0]=orig-c2; // LV syllable	106 buffer[0]=orig-c2; // LV syllable

104 buffer[1]=(UChar)(JAMO_T_BASE+c2);	107 buffer[1]=(UChar)(JAMO_T_BASE+c2);

105 }	108 }

106 }	109 }

107 private:	110 private:

108 Hangul(); // no instantiation	111 Hangul(); // no instantiation

109 };	112 };

110	113

111 class Normalizer2Impl;	114 class Normalizer2Impl;

112	115

113 class ReorderingBuffer : public UMemory {	116 class U_COMMON_API ReorderingBuffer : public UMemory {

114 public:	117 public:

115 ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :	118 ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :

116 impl(ni), str(dest),	119 impl(ni), str(dest),

117 start(NULL), reorderStart(NULL), limit(NULL),	120 start(NULL), reorderStart(NULL), limit(NULL),

118 remainingCapacity(0), lastCC(0) {}	121 remainingCapacity(0), lastCC(0) {}

119 ~ReorderingBuffer() {	122 ~ReorderingBuffer() {

120 if(start!=NULL) {	123 if(start!=NULL) {

121 str.releaseBuffer((int32_t)(limit-start));	124 str.releaseBuffer((int32_t)(limit-start));

122 }	125 }

123 }	126 }

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
206 uint8_t lastCC;	209 uint8_t lastCC;

207	210

208 // private backward iterator	211 // private backward iterator

209 void setIterator() { codePointStart=limit; }	212 void setIterator() { codePointStart=limit; }

210 void skipPrevious(); // Requires start<codePointStart.	213 void skipPrevious(); // Requires start<codePointStart.

211 uint8_t previousCC(); // Returns 0 if there is no previous character.	214 uint8_t previousCC(); // Returns 0 if there is no previous character.

212	215

213 UChar codePointStart, codePointLimit;	216 UChar codePointStart, codePointLimit;

214 };	217 };

215	218

216 class U_COMMON_API Normalizer2Impl : public UMemory {	219 class U_COMMON_API Normalizer2Impl : public UObject {

217 public:	220 public:

218 Normalizer2Impl() : memory(NULL), normTrie(NULL), fCanonIterData(NULL) {	221 Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {

219 fCanonIterDataInitOnce.reset();	222 fCanonIterDataInitOnce.reset();

220 }	223 }

221 ~Normalizer2Impl();	224 virtual ~Normalizer2Impl();

222	225

223 void load(const char packageName, const char name, UErrorCode &errorCode);	226 void init(const int32_t inIndexes, const UTrie2 inTrie,

	227 const uint16_t inExtraData, const uint8_t inSmallFCD);

224	228

	229 void addLcccChars(UnicodeSet &set) const;

225 void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;	230 void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;

226 void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;	231 void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;

227	232

228 // low-level properties ------------------------------------------------ ***	233 // low-level properties ------------------------------------------------ ***

229	234

230 const UTrie2 *getNormTrie() const { return normTrie; }	235 const UTrie2 *getNormTrie() const { return normTrie; }

231	236

232 UBool ensureCanonIterData(UErrorCode &errorCode) const;	237 UBool ensureCanonIterData(UErrorCode &errorCode) const;

233	238

234 uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }	239 uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }

235	240

236 UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {	241 UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {

237 if(norm16<minNoNo \|\| MIN_YES_YES_WITH_CC<=norm16) {	242 if(norm16<minNoNo \|\| MIN_YES_YES_WITH_CC<=norm16) {

238 return UNORM_YES;	243 return UNORM_YES;

239 } else if(minMaybeYes<=norm16) {	244 } else if(minMaybeYes<=norm16) {

240 return UNORM_MAYBE;	245 return UNORM_MAYBE;

241 } else {	246 } else {

242 return UNORM_NO;	247 return UNORM_NO;

243 }	248 }

244 }	249 }

	250 UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }

245 UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<min MaybeYes; }	251 UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<min MaybeYes; }

246 UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo \|\| minMayb eYes<=norm16; }	252 UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo \|\| minMayb eYes<=norm16; }

247	253

248 uint8_t getCC(uint16_t norm16) const {	254 uint8_t getCC(uint16_t norm16) const {

249 if(norm16>=MIN_NORMAL_MAYBE_YES) {	255 if(norm16>=MIN_NORMAL_MAYBE_YES) {

250 return (uint8_t)norm16;	256 return (uint8_t)norm16;

251 }	257 }

252 if(norm16<minNoNo \|\| limitNoNo<=norm16) {	258 if(norm16<minNoNo \|\| limitNoNo<=norm16) {

253 return 0;	259 return 0;

254 }	260 }

(...skipping 154 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
409 COMP_1_TRIPLE=1,	415 COMP_1_TRIPLE=1,

410 COMP_1_TRAIL_LIMIT=0x3400,	416 COMP_1_TRAIL_LIMIT=0x3400,

411 COMP_1_TRAIL_MASK=0x7ffe,	417 COMP_1_TRAIL_MASK=0x7ffe,

412 COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit	418 COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit

413 COMP_2_TRAIL_SHIFT=6,	419 COMP_2_TRAIL_SHIFT=6,

414 COMP_2_TRAIL_MASK=0xffc0	420 COMP_2_TRAIL_MASK=0xffc0

415 };	421 };

416	422

417 // higher-level functionality ------------------------------------------ ***	423 // higher-level functionality ------------------------------------------ ***

418	424

	425 // NFD without an NFD Normalizer2 instance.

	426 UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,

	427 UErrorCode &errorCode) const;

	428 /**

	429 * Decomposes [src, limit[ and writes the result to dest.

	430 * limit can be NULL if src is NUL-terminated.

	431 * destLengthEstimate is the initial dest buffer capacity and can be -1.

	432 */

	433 void decompose(const UChar src, const UChar limit,

	434 UnicodeString &dest, int32_t destLengthEstimate,

	435 UErrorCode &errorCode) const;

	436

419 const UChar decompose(const UChar src, const UChar *limit,	437 const UChar decompose(const UChar src, const UChar *limit,

420 ReorderingBuffer *buffer, UErrorCode &errorCode) cons t;	438 ReorderingBuffer *buffer, UErrorCode &errorCode) cons t;

421 void decomposeAndAppend(const UChar src, const UChar limit,	439 void decomposeAndAppend(const UChar src, const UChar limit,

422 UBool doDecompose,	440 UBool doDecompose,

423 UnicodeString &safeMiddle,	441 UnicodeString &safeMiddle,

424 ReorderingBuffer &buffer,	442 ReorderingBuffer &buffer,

425 UErrorCode &errorCode) const;	443 UErrorCode &errorCode) const;

426 UBool compose(const UChar src, const UChar limit,	444 UBool compose(const UChar src, const UChar limit,

427 UBool onlyContiguous,	445 UBool onlyContiguous,

428 UBool doCompose,	446 UBool doCompose,

(...skipping 24 matching lines...) Expand all Loading...
453 }	471 }

454 UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;	472 UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;

455	473

456 UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP \|\| ge tFCD16(c)<=0xff; }	474 UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP \|\| ge tFCD16(c)<=0xff; }

457 UBool hasFCDBoundaryAfter(UChar32 c) const {	475 UBool hasFCDBoundaryAfter(UChar32 c) const {

458 uint16_t fcd16=getFCD16(c);	476 uint16_t fcd16=getFCD16(c);

459 return fcd16<=1 \|\| (fcd16&0xff)==0;	477 return fcd16<=1 \|\| (fcd16&0xff)==0;

460 }	478 }

461 UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }	479 UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }

462 private:	480 private:

463 static UBool U_CALLCONV

464 isAcceptable(void context, const char type, const char name, const UDataI nfo pInfo);

465

466 UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16< =JAMO_VT; }	481 UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16< =JAMO_VT; }

467 UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes ; }	482 UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes ; }

468 static UBool isInert(uint16_t norm16) { return norm16==0; }	483 static UBool isInert(uint16_t norm16) { return norm16==0; }

469 static UBool isJamoL(uint16_t norm16) { return norm16==1; }	484 static UBool isJamoL(uint16_t norm16) { return norm16==1; }

470 static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }	485 static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }

471 UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }	486 UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }

472 UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }	487 UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }

473 // UBool isCompYes(uint16_t norm16) const {	488 // UBool isCompYes(uint16_t norm16) const {

474 // return norm16>=MIN_YES_YES_WITH_CC \|\| norm16<minNoNo;	489 // return norm16>=MIN_YES_YES_WITH_CC \|\| norm16<minNoNo;

475 // }	490 // }

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
559 UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;	574 UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;

560 const UChar findPreviousCompBoundary(const UChar start, const UChar *p) co nst;	575 const UChar findPreviousCompBoundary(const UChar start, const UChar *p) co nst;

561 const UChar findNextCompBoundary(const UChar p, const UChar *limit) const;	576 const UChar findNextCompBoundary(const UChar p, const UChar *limit) const;

562	577

563 const UChar findPreviousFCDBoundary(const UChar start, const UChar *p) con st;	578 const UChar findPreviousFCDBoundary(const UChar start, const UChar *p) con st;

564 const UChar findNextFCDBoundary(const UChar p, const UChar *limit) const;	579 const UChar findNextFCDBoundary(const UChar p, const UChar *limit) const;

565	580

566 int32_t getCanonValue(UChar32 c) const;	581 int32_t getCanonValue(UChar32 c) const;

567 const UnicodeSet &getCanonStartSet(int32_t n) const;	582 const UnicodeSet &getCanonStartSet(int32_t n) const;

568	583

569 UDataMemory *memory;	584 // UVersionInfo dataVersion;

570 UVersionInfo dataVersion;

571	585

572 // Code point thresholds for quick check codes.	586 // Code point thresholds for quick check codes.

573 UChar32 minDecompNoCP;	587 UChar32 minDecompNoCP;

574 UChar32 minCompNoMaybeCP;	588 UChar32 minCompNoMaybeCP;

575	589

576 // Norm16 value thresholds for quick check combinations and types of extra d ata.	590 // Norm16 value thresholds for quick check combinations and types of extra d ata.

577 uint16_t minYesNo;	591 uint16_t minYesNo;

578 uint16_t minYesNoMappingsOnly;	592 uint16_t minYesNoMappingsOnly;

579 uint16_t minNoNo;	593 uint16_t minNoNo;

580 uint16_t limitNoNo;	594 uint16_t limitNoNo;

581 uint16_t minMaybeYes;	595 uint16_t minMaybeYes;

582	596

583 UTrie2 *normTrie;	597 const UTrie2 *normTrie;

584 const uint16_t *maybeYesCompositions;	598 const uint16_t *maybeYesCompositions;

585 const uint16_t *extraData; // mappings and/or compositions for yesYes, yesN o & noNo characters	599 const uint16_t *extraData; // mappings and/or compositions for yesYes, yesN o & noNo characters

586 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0	600 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0

587 uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F	601 uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F

588	602

589 public: // CanonIterData is public to allow access from C callback f unctions.	603 public: // CanonIterData is public to allow access from C callback functions.

590 UInitOnce fCanonIterDataInitOnce;	604 UInitOnce fCanonIterDataInitOnce;

591 CanonIterData *fCanonIterData;	605 CanonIterData *fCanonIterData;

592 };	606 };

593	607

594 // bits in canonIterData	608 // bits in canonIterData

595 #define CANON_NOT_SEGMENT_STARTER 0x80000000	609 #define CANON_NOT_SEGMENT_STARTER 0x80000000

596 #define CANON_HAS_COMPOSITIONS 0x40000000	610 #define CANON_HAS_COMPOSITIONS 0x40000000

597 #define CANON_HAS_SET 0x200000	611 #define CANON_HAS_SET 0x200000

598 #define CANON_VALUE_MASK 0x1fffff	612 #define CANON_VALUE_MASK 0x1fffff

599	613

600 /**	614 /**

601 * ICU-internal shortcut for quick access to standard Unicode normalization.	615 * ICU-internal shortcut for quick access to standard Unicode normalization.

602 */	616 */

603 class U_COMMON_API Normalizer2Factory {	617 class U_COMMON_API Normalizer2Factory {

604 public:	618 public:

605 static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);

606 static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);

607 static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);	619 static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);

608 static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);	620 static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);

609 static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);

610 static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);

611 static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);

612 static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);	621 static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);

613	622

614 static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &e rrorCode);	623 static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &e rrorCode);

615	624

616 static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);	625 static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);

617 static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);	626 static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);

618 static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);	627 static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);

619	628

620 // Get the Impl instance of the Normalizer2.	629 // Get the Impl instance of the Normalizer2.

621 // Must be used only when it is known that norm2 is a Normalizer2WithImpl in stance.	630 // Must be used only when it is known that norm2 is a Normalizer2WithImpl in stance.

(...skipping 148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
770 * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesN o extraData into	779 * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesN o extraData into

771 * distinct ranges (combines-forward vs. not)	780 * distinct ranges (combines-forward vs. not)

772 * so that a range check can be used to find out if there is a compositions li st.	781 * so that a range check can be used to find out if there is a compositions li st.

773 * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LI ST flag.	782 * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LI ST flag.

774 * It is needed for the new (in ICU 49) composePair(), not for other normaliza tion.	783 * It is needed for the new (in ICU 49) composePair(), not for other normaliza tion.

775 * - Addition of the smallFCD[] bit set.	784 * - Addition of the smallFCD[] bit set.

776 */	785 */

777	786

778 #endif /* !UCONFIG_NO_NORMALIZATION */	787 #endif /* !UCONFIG_NO_NORMALIZATION */

779 #endif /* __NORMALIZER2IMPL_H__ */	788 #endif /* __NORMALIZER2IMPL_H__ */

OLD	NEW

« no previous file with comments | « source/common/normalizer2.cpp ('k') | source/common/normalizer2impl.cpp » ('j') | no next file with comments »