Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: source/common/normalizer2impl.h

Issue 845603002: Update ICU to 54.1 step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@master
Patch Set: remove unusued directories Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/normalizer2.cpp ('k') | source/common/normalizer2impl.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * 3 *
4 * Copyright (C) 2009-2013, International Business Machines 4 * Copyright (C) 2009-2014, International Business Machines
5 * Corporation and others. All Rights Reserved. 5 * Corporation and others. All Rights Reserved.
6 * 6 *
7 ******************************************************************************* 7 *******************************************************************************
8 * file name: normalizer2impl.h 8 * file name: normalizer2impl.h
9 * encoding: US-ASCII 9 * encoding: US-ASCII
10 * tab size: 8 (not used) 10 * tab size: 8 (not used)
11 * indentation:4 11 * indentation:4
12 * 12 *
13 * created on: 2009nov22 13 * created on: 2009nov22
14 * created by: Markus W. Scherer 14 * created by: Markus W. Scherer
15 */ 15 */
16 16
17 #ifndef __NORMALIZER2IMPL_H__ 17 #ifndef __NORMALIZER2IMPL_H__
18 #define __NORMALIZER2IMPL_H__ 18 #define __NORMALIZER2IMPL_H__
19 19
20 #include "unicode/utypes.h" 20 #include "unicode/utypes.h"
21 21
22 #if !UCONFIG_NO_NORMALIZATION 22 #if !UCONFIG_NO_NORMALIZATION
23 23
24 #include "unicode/normalizer2.h" 24 #include "unicode/normalizer2.h"
25 #include "unicode/udata.h"
26 #include "unicode/unistr.h" 25 #include "unicode/unistr.h"
27 #include "unicode/unorm.h" 26 #include "unicode/unorm.h"
28 #include "unicode/utf16.h" 27 #include "unicode/utf16.h"
29 #include "mutex.h" 28 #include "mutex.h"
30 #include "uset_imp.h" 29 #include "uset_imp.h"
31 #include "utrie2.h" 30 #include "utrie2.h"
32 31
33 U_NAMESPACE_BEGIN 32 U_NAMESPACE_BEGIN
34 33
35 struct CanonIterData; 34 struct CanonIterData;
36 35
37 class Hangul { 36 class U_COMMON_API Hangul {
38 public: 37 public:
39 /* Korean Hangul and Jamo constants */ 38 /* Korean Hangul and Jamo constants */
40 enum { 39 enum {
41 JAMO_L_BASE=0x1100, /* "lead" jamo */ 40 JAMO_L_BASE=0x1100, /* "lead" jamo */
41 JAMO_L_END=0x1112,
42 JAMO_V_BASE=0x1161, /* "vowel" jamo */ 42 JAMO_V_BASE=0x1161, /* "vowel" jamo */
43 JAMO_V_END=0x1175,
43 JAMO_T_BASE=0x11a7, /* "trail" jamo */ 44 JAMO_T_BASE=0x11a7, /* "trail" jamo */
45 JAMO_T_END=0x11c2,
44 46
45 HANGUL_BASE=0xac00, 47 HANGUL_BASE=0xac00,
48 HANGUL_END=0xd7a3,
46 49
47 JAMO_L_COUNT=19, 50 JAMO_L_COUNT=19,
48 JAMO_V_COUNT=21, 51 JAMO_V_COUNT=21,
49 JAMO_T_COUNT=28, 52 JAMO_T_COUNT=28,
50 53
51 JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT, 54 JAMO_VT_COUNT=JAMO_V_COUNT*JAMO_T_COUNT,
52 55
53 HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT, 56 HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT,
54 HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT 57 HANGUL_LIMIT=HANGUL_BASE+HANGUL_COUNT
55 }; 58 };
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
103 buffer[0]=orig-c2; // LV syllable 106 buffer[0]=orig-c2; // LV syllable
104 buffer[1]=(UChar)(JAMO_T_BASE+c2); 107 buffer[1]=(UChar)(JAMO_T_BASE+c2);
105 } 108 }
106 } 109 }
107 private: 110 private:
108 Hangul(); // no instantiation 111 Hangul(); // no instantiation
109 }; 112 };
110 113
111 class Normalizer2Impl; 114 class Normalizer2Impl;
112 115
113 class ReorderingBuffer : public UMemory { 116 class U_COMMON_API ReorderingBuffer : public UMemory {
114 public: 117 public:
115 ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) : 118 ReorderingBuffer(const Normalizer2Impl &ni, UnicodeString &dest) :
116 impl(ni), str(dest), 119 impl(ni), str(dest),
117 start(NULL), reorderStart(NULL), limit(NULL), 120 start(NULL), reorderStart(NULL), limit(NULL),
118 remainingCapacity(0), lastCC(0) {} 121 remainingCapacity(0), lastCC(0) {}
119 ~ReorderingBuffer() { 122 ~ReorderingBuffer() {
120 if(start!=NULL) { 123 if(start!=NULL) {
121 str.releaseBuffer((int32_t)(limit-start)); 124 str.releaseBuffer((int32_t)(limit-start));
122 } 125 }
123 } 126 }
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
206 uint8_t lastCC; 209 uint8_t lastCC;
207 210
208 // private backward iterator 211 // private backward iterator
209 void setIterator() { codePointStart=limit; } 212 void setIterator() { codePointStart=limit; }
210 void skipPrevious(); // Requires start<codePointStart. 213 void skipPrevious(); // Requires start<codePointStart.
211 uint8_t previousCC(); // Returns 0 if there is no previous character. 214 uint8_t previousCC(); // Returns 0 if there is no previous character.
212 215
213 UChar *codePointStart, *codePointLimit; 216 UChar *codePointStart, *codePointLimit;
214 }; 217 };
215 218
216 class U_COMMON_API Normalizer2Impl : public UMemory { 219 class U_COMMON_API Normalizer2Impl : public UObject {
217 public: 220 public:
218 Normalizer2Impl() : memory(NULL), normTrie(NULL), fCanonIterData(NULL) { 221 Normalizer2Impl() : normTrie(NULL), fCanonIterData(NULL) {
219 fCanonIterDataInitOnce.reset(); 222 fCanonIterDataInitOnce.reset();
220 } 223 }
221 ~Normalizer2Impl(); 224 virtual ~Normalizer2Impl();
222 225
223 void load(const char *packageName, const char *name, UErrorCode &errorCode); 226 void init(const int32_t *inIndexes, const UTrie2 *inTrie,
227 const uint16_t *inExtraData, const uint8_t *inSmallFCD);
224 228
229 void addLcccChars(UnicodeSet &set) const;
225 void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; 230 void addPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
226 void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const; 231 void addCanonIterPropertyStarts(const USetAdder *sa, UErrorCode &errorCode) const;
227 232
228 // low-level properties ------------------------------------------------ *** 233 // low-level properties ------------------------------------------------ ***
229 234
230 const UTrie2 *getNormTrie() const { return normTrie; } 235 const UTrie2 *getNormTrie() const { return normTrie; }
231 236
232 UBool ensureCanonIterData(UErrorCode &errorCode) const; 237 UBool ensureCanonIterData(UErrorCode &errorCode) const;
233 238
234 uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); } 239 uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); }
235 240
236 UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { 241 UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const {
237 if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) { 242 if(norm16<minNoNo || MIN_YES_YES_WITH_CC<=norm16) {
238 return UNORM_YES; 243 return UNORM_YES;
239 } else if(minMaybeYes<=norm16) { 244 } else if(minMaybeYes<=norm16) {
240 return UNORM_MAYBE; 245 return UNORM_MAYBE;
241 } else { 246 } else {
242 return UNORM_NO; 247 return UNORM_NO;
243 } 248 }
244 } 249 }
250 UBool isAlgorithmicNoNo(uint16_t norm16) const { return limitNoNo<=norm16 && norm16<minMaybeYes; }
245 UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<min MaybeYes; } 251 UBool isCompNo(uint16_t norm16) const { return minNoNo<=norm16 && norm16<min MaybeYes; }
246 UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMayb eYes<=norm16; } 252 UBool isDecompYes(uint16_t norm16) const { return norm16<minYesNo || minMayb eYes<=norm16; }
247 253
248 uint8_t getCC(uint16_t norm16) const { 254 uint8_t getCC(uint16_t norm16) const {
249 if(norm16>=MIN_NORMAL_MAYBE_YES) { 255 if(norm16>=MIN_NORMAL_MAYBE_YES) {
250 return (uint8_t)norm16; 256 return (uint8_t)norm16;
251 } 257 }
252 if(norm16<minNoNo || limitNoNo<=norm16) { 258 if(norm16<minNoNo || limitNoNo<=norm16) {
253 return 0; 259 return 0;
254 } 260 }
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 COMP_1_TRIPLE=1, 415 COMP_1_TRIPLE=1,
410 COMP_1_TRAIL_LIMIT=0x3400, 416 COMP_1_TRAIL_LIMIT=0x3400,
411 COMP_1_TRAIL_MASK=0x7ffe, 417 COMP_1_TRAIL_MASK=0x7ffe,
412 COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit 418 COMP_1_TRAIL_SHIFT=9, // 10-1 for the "triple" bit
413 COMP_2_TRAIL_SHIFT=6, 419 COMP_2_TRAIL_SHIFT=6,
414 COMP_2_TRAIL_MASK=0xffc0 420 COMP_2_TRAIL_MASK=0xffc0
415 }; 421 };
416 422
417 // higher-level functionality ------------------------------------------ *** 423 // higher-level functionality ------------------------------------------ ***
418 424
425 // NFD without an NFD Normalizer2 instance.
426 UnicodeString &decompose(const UnicodeString &src, UnicodeString &dest,
427 UErrorCode &errorCode) const;
428 /**
429 * Decomposes [src, limit[ and writes the result to dest.
430 * limit can be NULL if src is NUL-terminated.
431 * destLengthEstimate is the initial dest buffer capacity and can be -1.
432 */
433 void decompose(const UChar *src, const UChar *limit,
434 UnicodeString &dest, int32_t destLengthEstimate,
435 UErrorCode &errorCode) const;
436
419 const UChar *decompose(const UChar *src, const UChar *limit, 437 const UChar *decompose(const UChar *src, const UChar *limit,
420 ReorderingBuffer *buffer, UErrorCode &errorCode) cons t; 438 ReorderingBuffer *buffer, UErrorCode &errorCode) cons t;
421 void decomposeAndAppend(const UChar *src, const UChar *limit, 439 void decomposeAndAppend(const UChar *src, const UChar *limit,
422 UBool doDecompose, 440 UBool doDecompose,
423 UnicodeString &safeMiddle, 441 UnicodeString &safeMiddle,
424 ReorderingBuffer &buffer, 442 ReorderingBuffer &buffer,
425 UErrorCode &errorCode) const; 443 UErrorCode &errorCode) const;
426 UBool compose(const UChar *src, const UChar *limit, 444 UBool compose(const UChar *src, const UChar *limit,
427 UBool onlyContiguous, 445 UBool onlyContiguous,
428 UBool doCompose, 446 UBool doCompose,
(...skipping 24 matching lines...) Expand all
453 } 471 }
454 UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const; 472 UBool hasCompBoundaryAfter(UChar32 c, UBool onlyContiguous, UBool testInert) const;
455 473
456 UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || ge tFCD16(c)<=0xff; } 474 UBool hasFCDBoundaryBefore(UChar32 c) const { return c<MIN_CCC_LCCC_CP || ge tFCD16(c)<=0xff; }
457 UBool hasFCDBoundaryAfter(UChar32 c) const { 475 UBool hasFCDBoundaryAfter(UChar32 c) const {
458 uint16_t fcd16=getFCD16(c); 476 uint16_t fcd16=getFCD16(c);
459 return fcd16<=1 || (fcd16&0xff)==0; 477 return fcd16<=1 || (fcd16&0xff)==0;
460 } 478 }
461 UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; } 479 UBool isFCDInert(UChar32 c) const { return getFCD16(c)<=1; }
462 private: 480 private:
463 static UBool U_CALLCONV
464 isAcceptable(void *context, const char *type, const char *name, const UDataI nfo *pInfo);
465
466 UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16< =JAMO_VT; } 481 UBool isMaybe(uint16_t norm16) const { return minMaybeYes<=norm16 && norm16< =JAMO_VT; }
467 UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes ; } 482 UBool isMaybeOrNonZeroCC(uint16_t norm16) const { return norm16>=minMaybeYes ; }
468 static UBool isInert(uint16_t norm16) { return norm16==0; } 483 static UBool isInert(uint16_t norm16) { return norm16==0; }
469 static UBool isJamoL(uint16_t norm16) { return norm16==1; } 484 static UBool isJamoL(uint16_t norm16) { return norm16==1; }
470 static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; } 485 static UBool isJamoVT(uint16_t norm16) { return norm16==JAMO_VT; }
471 UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; } 486 UBool isHangul(uint16_t norm16) const { return norm16==minYesNo; }
472 UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; } 487 UBool isCompYesAndZeroCC(uint16_t norm16) const { return norm16<minNoNo; }
473 // UBool isCompYes(uint16_t norm16) const { 488 // UBool isCompYes(uint16_t norm16) const {
474 // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo; 489 // return norm16>=MIN_YES_YES_WITH_CC || norm16<minNoNo;
475 // } 490 // }
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
559 UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const; 574 UBool hasCompBoundaryBefore(UChar32 c, uint16_t norm16) const;
560 const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) co nst; 575 const UChar *findPreviousCompBoundary(const UChar *start, const UChar *p) co nst;
561 const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const; 576 const UChar *findNextCompBoundary(const UChar *p, const UChar *limit) const;
562 577
563 const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) con st; 578 const UChar *findPreviousFCDBoundary(const UChar *start, const UChar *p) con st;
564 const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const; 579 const UChar *findNextFCDBoundary(const UChar *p, const UChar *limit) const;
565 580
566 int32_t getCanonValue(UChar32 c) const; 581 int32_t getCanonValue(UChar32 c) const;
567 const UnicodeSet &getCanonStartSet(int32_t n) const; 582 const UnicodeSet &getCanonStartSet(int32_t n) const;
568 583
569 UDataMemory *memory; 584 // UVersionInfo dataVersion;
570 UVersionInfo dataVersion;
571 585
572 // Code point thresholds for quick check codes. 586 // Code point thresholds for quick check codes.
573 UChar32 minDecompNoCP; 587 UChar32 minDecompNoCP;
574 UChar32 minCompNoMaybeCP; 588 UChar32 minCompNoMaybeCP;
575 589
576 // Norm16 value thresholds for quick check combinations and types of extra d ata. 590 // Norm16 value thresholds for quick check combinations and types of extra d ata.
577 uint16_t minYesNo; 591 uint16_t minYesNo;
578 uint16_t minYesNoMappingsOnly; 592 uint16_t minYesNoMappingsOnly;
579 uint16_t minNoNo; 593 uint16_t minNoNo;
580 uint16_t limitNoNo; 594 uint16_t limitNoNo;
581 uint16_t minMaybeYes; 595 uint16_t minMaybeYes;
582 596
583 UTrie2 *normTrie; 597 const UTrie2 *normTrie;
584 const uint16_t *maybeYesCompositions; 598 const uint16_t *maybeYesCompositions;
585 const uint16_t *extraData; // mappings and/or compositions for yesYes, yesN o & noNo characters 599 const uint16_t *extraData; // mappings and/or compositions for yesYes, yesN o & noNo characters
586 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0 600 const uint8_t *smallFCD; // [0x100] one bit per 32 BMP code points, set if any FCD!=0
587 uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F 601 uint8_t tccc180[0x180]; // tccc values for U+0000..U+017F
588 602
589 public: // CanonIterData is public to allow access from C callback f unctions. 603 public: // CanonIterData is public to allow access from C callback functions.
590 UInitOnce fCanonIterDataInitOnce; 604 UInitOnce fCanonIterDataInitOnce;
591 CanonIterData *fCanonIterData; 605 CanonIterData *fCanonIterData;
592 }; 606 };
593 607
594 // bits in canonIterData 608 // bits in canonIterData
595 #define CANON_NOT_SEGMENT_STARTER 0x80000000 609 #define CANON_NOT_SEGMENT_STARTER 0x80000000
596 #define CANON_HAS_COMPOSITIONS 0x40000000 610 #define CANON_HAS_COMPOSITIONS 0x40000000
597 #define CANON_HAS_SET 0x200000 611 #define CANON_HAS_SET 0x200000
598 #define CANON_VALUE_MASK 0x1fffff 612 #define CANON_VALUE_MASK 0x1fffff
599 613
600 /** 614 /**
601 * ICU-internal shortcut for quick access to standard Unicode normalization. 615 * ICU-internal shortcut for quick access to standard Unicode normalization.
602 */ 616 */
603 class U_COMMON_API Normalizer2Factory { 617 class U_COMMON_API Normalizer2Factory {
604 public: 618 public:
605 static const Normalizer2 *getNFCInstance(UErrorCode &errorCode);
606 static const Normalizer2 *getNFDInstance(UErrorCode &errorCode);
607 static const Normalizer2 *getFCDInstance(UErrorCode &errorCode); 619 static const Normalizer2 *getFCDInstance(UErrorCode &errorCode);
608 static const Normalizer2 *getFCCInstance(UErrorCode &errorCode); 620 static const Normalizer2 *getFCCInstance(UErrorCode &errorCode);
609 static const Normalizer2 *getNFKCInstance(UErrorCode &errorCode);
610 static const Normalizer2 *getNFKDInstance(UErrorCode &errorCode);
611 static const Normalizer2 *getNFKC_CFInstance(UErrorCode &errorCode);
612 static const Normalizer2 *getNoopInstance(UErrorCode &errorCode); 621 static const Normalizer2 *getNoopInstance(UErrorCode &errorCode);
613 622
614 static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &e rrorCode); 623 static const Normalizer2 *getInstance(UNormalizationMode mode, UErrorCode &e rrorCode);
615 624
616 static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode); 625 static const Normalizer2Impl *getNFCImpl(UErrorCode &errorCode);
617 static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode); 626 static const Normalizer2Impl *getNFKCImpl(UErrorCode &errorCode);
618 static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode); 627 static const Normalizer2Impl *getNFKC_CFImpl(UErrorCode &errorCode);
619 628
620 // Get the Impl instance of the Normalizer2. 629 // Get the Impl instance of the Normalizer2.
621 // Must be used only when it is known that norm2 is a Normalizer2WithImpl in stance. 630 // Must be used only when it is known that norm2 is a Normalizer2WithImpl in stance.
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
770 * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesN o extraData into 779 * - Addition of indexes[IX_MIN_YES_NO_MAPPINGS_ONLY] and separation of the yesN o extraData into
771 * distinct ranges (combines-forward vs. not) 780 * distinct ranges (combines-forward vs. not)
772 * so that a range check can be used to find out if there is a compositions li st. 781 * so that a range check can be used to find out if there is a compositions li st.
773 * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LI ST flag. 782 * This is fully equivalent with formatVersion 1's MAPPING_PLUS_COMPOSITION_LI ST flag.
774 * It is needed for the new (in ICU 49) composePair(), not for other normaliza tion. 783 * It is needed for the new (in ICU 49) composePair(), not for other normaliza tion.
775 * - Addition of the smallFCD[] bit set. 784 * - Addition of the smallFCD[] bit set.
776 */ 785 */
777 786
778 #endif /* !UCONFIG_NO_NORMALIZATION */ 787 #endif /* !UCONFIG_NO_NORMALIZATION */
779 #endif /* __NORMALIZER2IMPL_H__ */ 788 #endif /* __NORMALIZER2IMPL_H__ */
OLDNEW
« no previous file with comments | « source/common/normalizer2.cpp ('k') | source/common/normalizer2impl.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698