| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 /* | 
|  | 2 ******************************************************************************* | 
|  | 3 * Copyright (C) 2010-2014, International Business Machines | 
|  | 4 * Corporation and others.  All Rights Reserved. | 
|  | 5 ******************************************************************************* | 
|  | 6 * collation.h | 
|  | 7 * | 
|  | 8 * created on: 2010oct27 | 
|  | 9 * created by: Markus W. Scherer | 
|  | 10 */ | 
|  | 11 | 
|  | 12 #ifndef __COLLATION_H__ | 
|  | 13 #define __COLLATION_H__ | 
|  | 14 | 
|  | 15 #include "unicode/utypes.h" | 
|  | 16 | 
|  | 17 #if !UCONFIG_NO_COLLATION | 
|  | 18 | 
|  | 19 U_NAMESPACE_BEGIN | 
|  | 20 | 
|  | 21 /** | 
|  | 22  * Collation v2 basic definitions and static helper functions. | 
|  | 23  * | 
|  | 24  * Data structures except for expansion tables store 32-bit CEs which are | 
|  | 25  * either specials (see tags below) or are compact forms of 64-bit CEs. | 
|  | 26  */ | 
|  | 27 class U_I18N_API Collation { | 
|  | 28 public: | 
|  | 29     // Special sort key bytes for all levels. | 
|  | 30     static const uint8_t TERMINATOR_BYTE = 0; | 
|  | 31     static const uint8_t LEVEL_SEPARATOR_BYTE = 1; | 
|  | 32     /** | 
|  | 33      * Merge-sort-key separator. | 
|  | 34      * Must not be used as the lead byte of any CE weight, | 
|  | 35      * nor as primary compression low terminator. | 
|  | 36      * Otherwise usable. | 
|  | 37      */ | 
|  | 38     static const uint8_t MERGE_SEPARATOR_BYTE = 2; | 
|  | 39     static const uint32_t MERGE_SEPARATOR_PRIMARY = 0x02000000;  // U+FFFE | 
|  | 40     static const uint32_t MERGE_SEPARATOR_WEIGHT16 = 0x0200;  // U+FFFE | 
|  | 41     static const uint32_t MERGE_SEPARATOR_LOWER32 = 0x02000200;  // U+FFFE | 
|  | 42     static const uint32_t MERGE_SEPARATOR_CE32 = 0x02000202;  // U+FFFE | 
|  | 43 | 
|  | 44     /** | 
|  | 45      * Primary compression low terminator, must be greater than MERGE_SEPARATOR_
     BYTE. | 
|  | 46      * Reserved value in primary second byte if the lead byte is compressible. | 
|  | 47      * Otherwise usable in all CE weight bytes. | 
|  | 48      */ | 
|  | 49     static const uint8_t PRIMARY_COMPRESSION_LOW_BYTE = 3; | 
|  | 50     /** | 
|  | 51      * Primary compression high terminator. | 
|  | 52      * Reserved value in primary second byte if the lead byte is compressible. | 
|  | 53      * Otherwise usable in all CE weight bytes. | 
|  | 54      */ | 
|  | 55     static const uint8_t PRIMARY_COMPRESSION_HIGH_BYTE = 0xff; | 
|  | 56 | 
|  | 57     /** Default secondary/tertiary weight lead byte. */ | 
|  | 58     static const uint8_t COMMON_BYTE = 5; | 
|  | 59     static const uint32_t COMMON_WEIGHT16 = 0x0500; | 
|  | 60     /** Middle 16 bits of a CE with a common secondary weight. */ | 
|  | 61     static const uint32_t COMMON_SECONDARY_CE = 0x05000000; | 
|  | 62     /** Lower 16 bits of a CE with a common tertiary weight. */ | 
|  | 63     static const uint32_t COMMON_TERTIARY_CE = 0x0500; | 
|  | 64     /** Lower 32 bits of a CE with common secondary and tertiary weights. */ | 
|  | 65     static const uint32_t COMMON_SEC_AND_TER_CE = 0x05000500; | 
|  | 66 | 
|  | 67     static const uint32_t SECONDARY_MASK = 0xffff0000; | 
|  | 68     static const uint32_t CASE_MASK = 0xc000; | 
|  | 69     static const uint32_t SECONDARY_AND_CASE_MASK = SECONDARY_MASK | CASE_MASK; | 
|  | 70     /** Only the 2*6 bits for the pure tertiary weight. */ | 
|  | 71     static const uint32_t ONLY_TERTIARY_MASK = 0x3f3f; | 
|  | 72     /** Only the secondary & tertiary bits; no case, no quaternary. */ | 
|  | 73     static const uint32_t ONLY_SEC_TER_MASK = SECONDARY_MASK | ONLY_TERTIARY_MAS
     K; | 
|  | 74     /** Case bits and tertiary bits. */ | 
|  | 75     static const uint32_t CASE_AND_TERTIARY_MASK = CASE_MASK | ONLY_TERTIARY_MAS
     K; | 
|  | 76     static const uint32_t QUATERNARY_MASK = 0xc0; | 
|  | 77     /** Case bits and quaternary bits. */ | 
|  | 78     static const uint32_t CASE_AND_QUATERNARY_MASK = CASE_MASK | QUATERNARY_MASK
     ; | 
|  | 79 | 
|  | 80     static const uint8_t UNASSIGNED_IMPLICIT_BYTE = 0xfe;  // compressible | 
|  | 81     /** | 
|  | 82      * First unassigned: AlphabeticIndex overflow boundary. | 
|  | 83      * We want a 3-byte primary so that it fits into the root elements table. | 
|  | 84      * | 
|  | 85      * This 3-byte primary will not collide with | 
|  | 86      * any unassigned-implicit 4-byte primaries because | 
|  | 87      * the first few hundred Unicode code points all have real mappings. | 
|  | 88      */ | 
|  | 89     static const uint32_t FIRST_UNASSIGNED_PRIMARY = 0xfe040200; | 
|  | 90 | 
|  | 91     static const uint8_t TRAIL_WEIGHT_BYTE = 0xff;  // not compressible | 
|  | 92     static const uint32_t FIRST_TRAILING_PRIMARY = 0xff020200;  // [first traili
     ng] | 
|  | 93     static const uint32_t MAX_PRIMARY = 0xffff0000;  // U+FFFF | 
|  | 94     static const uint32_t MAX_REGULAR_CE32 = 0xffff0505;  // U+FFFF | 
|  | 95 | 
|  | 96     // CE32 value for U+FFFD as well as illegal UTF-8 byte sequences (which beha
     ve like U+FFFD). | 
|  | 97     // We use the third-highest primary weight for U+FFFD (as in UCA 6.3+). | 
|  | 98     static const uint32_t FFFD_PRIMARY = MAX_PRIMARY - 0x20000; | 
|  | 99     static const uint32_t FFFD_CE32 = MAX_REGULAR_CE32 - 0x20000; | 
|  | 100 | 
|  | 101     /** | 
|  | 102      * A CE32 is special if its low byte is this or greater. | 
|  | 103      * Impossible case bits 11 mark special CE32s. | 
|  | 104      * This value itself is used to indicate a fallback to the base collator. | 
|  | 105      */ | 
|  | 106     static const uint8_t SPECIAL_CE32_LOW_BYTE = 0xc0; | 
|  | 107     static const uint32_t FALLBACK_CE32 = SPECIAL_CE32_LOW_BYTE; | 
|  | 108     /** | 
|  | 109      * Low byte of a long-primary special CE32. | 
|  | 110      */ | 
|  | 111     static const uint8_t LONG_PRIMARY_CE32_LOW_BYTE = 0xc1;  // SPECIAL_CE32_LOW
     _BYTE | LONG_PRIMARY_TAG | 
|  | 112 | 
|  | 113     static const uint32_t UNASSIGNED_CE32 = 0xffffffff;  // Compute an unassigne
     d-implicit CE. | 
|  | 114 | 
|  | 115     static const uint32_t NO_CE32 = 1; | 
|  | 116 | 
|  | 117     /** No CE: End of input. Only used in runtime code, not stored in data. */ | 
|  | 118     static const uint32_t NO_CE_PRIMARY = 1;  // not a left-adjusted weight | 
|  | 119     static const uint32_t NO_CE_WEIGHT16 = 0x0100;  // weight of LEVEL_SEPARATOR
     _BYTE | 
|  | 120     static const int64_t NO_CE = INT64_C(0x101000100);  // NO_CE_PRIMARY, NO_CE_
     WEIGHT16, NO_CE_WEIGHT16 | 
|  | 121 | 
|  | 122     /** Sort key levels. */ | 
|  | 123     enum Level { | 
|  | 124         /** Unspecified level. */ | 
|  | 125         NO_LEVEL, | 
|  | 126         PRIMARY_LEVEL, | 
|  | 127         SECONDARY_LEVEL, | 
|  | 128         CASE_LEVEL, | 
|  | 129         TERTIARY_LEVEL, | 
|  | 130         QUATERNARY_LEVEL, | 
|  | 131         IDENTICAL_LEVEL, | 
|  | 132         /** Beyond sort key bytes. */ | 
|  | 133         ZERO_LEVEL | 
|  | 134     }; | 
|  | 135 | 
|  | 136     /** | 
|  | 137      * Sort key level flags: xx_FLAG = 1 << xx_LEVEL. | 
|  | 138      * In Java, use enum Level with flag() getters, or use EnumSet rather than h
     and-made bit sets. | 
|  | 139      */ | 
|  | 140     static const uint32_t NO_LEVEL_FLAG = 1; | 
|  | 141     static const uint32_t PRIMARY_LEVEL_FLAG = 2; | 
|  | 142     static const uint32_t SECONDARY_LEVEL_FLAG = 4; | 
|  | 143     static const uint32_t CASE_LEVEL_FLAG = 8; | 
|  | 144     static const uint32_t TERTIARY_LEVEL_FLAG = 0x10; | 
|  | 145     static const uint32_t QUATERNARY_LEVEL_FLAG = 0x20; | 
|  | 146     static const uint32_t IDENTICAL_LEVEL_FLAG = 0x40; | 
|  | 147     static const uint32_t ZERO_LEVEL_FLAG = 0x80; | 
|  | 148 | 
|  | 149     /** | 
|  | 150      * Special-CE32 tags, from bits 3..0 of a special 32-bit CE. | 
|  | 151      * Bits 31..8 are available for tag-specific data. | 
|  | 152      * Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and t
     ccc!=0. | 
|  | 153      */ | 
|  | 154     enum { | 
|  | 155         /** | 
|  | 156          * Fall back to the base collator. | 
|  | 157          * This is the tag value in SPECIAL_CE32_LOW_BYTE and FALLBACK_CE32. | 
|  | 158          * Bits 31..8: Unused, 0. | 
|  | 159          */ | 
|  | 160         FALLBACK_TAG = 0, | 
|  | 161         /** | 
|  | 162          * Long-primary CE with COMMON_SEC_AND_TER_CE. | 
|  | 163          * Bits 31..8: Three-byte primary. | 
|  | 164          */ | 
|  | 165         LONG_PRIMARY_TAG = 1, | 
|  | 166         /** | 
|  | 167          * Long-secondary CE with zero primary. | 
|  | 168          * Bits 31..16: Secondary weight. | 
|  | 169          * Bits 15.. 8: Tertiary weight. | 
|  | 170          */ | 
|  | 171         LONG_SECONDARY_TAG = 2, | 
|  | 172         /** | 
|  | 173          * Unused. | 
|  | 174          * May be used in the future for single-byte secondary CEs (SHORT_SECOND
     ARY_TAG), | 
|  | 175          * storing the secondary in bits 31..24, the ccc in bits 23..16, | 
|  | 176          * and the tertiary in bits 15..8. | 
|  | 177          */ | 
|  | 178         RESERVED_TAG_3 = 3, | 
|  | 179         /** | 
|  | 180          * Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05]. | 
|  | 181          * Bits 31..24: Single-byte primary weight pp of the first CE. | 
|  | 182          * Bits 23..16: Tertiary weight tt of the first CE. | 
|  | 183          * Bits 15.. 8: Secondary weight ss of the second CE. | 
|  | 184          */ | 
|  | 185         LATIN_EXPANSION_TAG = 4, | 
|  | 186         /** | 
|  | 187          * Points to one or more simple/long-primary/long-secondary 32-bit CE32s
     . | 
|  | 188          * Bits 31..13: Index into uint32_t table. | 
|  | 189          * Bits 12.. 8: Length=1..31. | 
|  | 190          */ | 
|  | 191         EXPANSION32_TAG = 5, | 
|  | 192         /** | 
|  | 193          * Points to one or more 64-bit CEs. | 
|  | 194          * Bits 31..13: Index into CE table. | 
|  | 195          * Bits 12.. 8: Length=1..31. | 
|  | 196          */ | 
|  | 197         EXPANSION_TAG = 6, | 
|  | 198         /** | 
|  | 199          * Builder data, used only in the CollationDataBuilder, not in runtime d
     ata. | 
|  | 200          * | 
|  | 201          * If bit 8 is 0: Builder context, points to a list of context-sensitive
      mappings. | 
|  | 202          * Bits 31..13: Index to the builder's list of ConditionalCE32 for this 
     character. | 
|  | 203          * Bits 12.. 9: Unused, 0. | 
|  | 204          * | 
|  | 205          * If bit 8 is 1 (IS_BUILDER_JAMO_CE32): Builder-only jamoCE32 value. | 
|  | 206          * The builder fetches the Jamo CE32 from the trie. | 
|  | 207          * Bits 31..13: Jamo code point. | 
|  | 208          * Bits 12.. 9: Unused, 0. | 
|  | 209          */ | 
|  | 210         BUILDER_DATA_TAG = 7, | 
|  | 211         /** | 
|  | 212          * Points to prefix trie. | 
|  | 213          * Bits 31..13: Index into prefix/contraction data. | 
|  | 214          * Bits 12.. 8: Unused, 0. | 
|  | 215          */ | 
|  | 216         PREFIX_TAG = 8, | 
|  | 217         /** | 
|  | 218          * Points to contraction data. | 
|  | 219          * Bits 31..13: Index into prefix/contraction data. | 
|  | 220          * Bits 12..11: Unused, 0. | 
|  | 221          * Bit      10: CONTRACT_TRAILING_CCC flag. | 
|  | 222          * Bit       9: CONTRACT_NEXT_CCC flag. | 
|  | 223          * Bit       8: CONTRACT_SINGLE_CP_NO_MATCH flag. | 
|  | 224          */ | 
|  | 225         CONTRACTION_TAG = 9, | 
|  | 226         /** | 
|  | 227          * Decimal digit. | 
|  | 228          * Bits 31..13: Index into uint32_t table for non-numeric-collation CE32
     . | 
|  | 229          * Bit      12: Unused, 0. | 
|  | 230          * Bits 11.. 8: Digit value 0..9. | 
|  | 231          */ | 
|  | 232         DIGIT_TAG = 10, | 
|  | 233         /** | 
|  | 234          * Tag for U+0000, for moving the NUL-termination handling | 
|  | 235          * from the regular fastpath into specials-handling code. | 
|  | 236          * Bits 31..8: Unused, 0. | 
|  | 237          */ | 
|  | 238         U0000_TAG = 11, | 
|  | 239         /** | 
|  | 240          * Tag for a Hangul syllable. | 
|  | 241          * Bits 31..9: Unused, 0. | 
|  | 242          * Bit      8: HANGUL_NO_SPECIAL_JAMO flag. | 
|  | 243          */ | 
|  | 244         HANGUL_TAG = 12, | 
|  | 245         /** | 
|  | 246          * Tag for a lead surrogate code unit. | 
|  | 247          * Optional optimization for UTF-16 string processing. | 
|  | 248          * Bits 31..10: Unused, 0. | 
|  | 249          *       9.. 8: =0: All associated supplementary code points are unassig
     ned-implict. | 
|  | 250          *              =1: All associated supplementary code points fall back t
     o the base data. | 
|  | 251          *              else: (Normally 2) Look up the data for the supplementar
     y code point. | 
|  | 252          */ | 
|  | 253         LEAD_SURROGATE_TAG = 13, | 
|  | 254         /** | 
|  | 255          * Tag for CEs with primary weights in code point order. | 
|  | 256          * Bits 31..13: Index into CE table, for one data "CE". | 
|  | 257          * Bits 12.. 8: Unused, 0. | 
|  | 258          * | 
|  | 259          * This data "CE" has the following bit fields: | 
|  | 260          * Bits 63..32: Three-byte primary pppppp00. | 
|  | 261          *      31.. 8: Start/base code point of the in-order range. | 
|  | 262          *           7: Flag isCompressible primary. | 
|  | 263          *       6.. 0: Per-code point primary-weight increment. | 
|  | 264          */ | 
|  | 265         OFFSET_TAG = 14, | 
|  | 266         /** | 
|  | 267          * Implicit CE tag. Compute an unassigned-implicit CE. | 
|  | 268          * All bits are set (UNASSIGNED_CE32=0xffffffff). | 
|  | 269          */ | 
|  | 270         IMPLICIT_TAG = 15 | 
|  | 271     }; | 
|  | 272 | 
|  | 273     static UBool isAssignedCE32(uint32_t ce32) { | 
|  | 274         return ce32 != FALLBACK_CE32 && ce32 != UNASSIGNED_CE32; | 
|  | 275     } | 
|  | 276 | 
|  | 277     /** | 
|  | 278      * We limit the number of CEs in an expansion | 
|  | 279      * so that we can use a small number of length bits in the data structure, | 
|  | 280      * and so that an implementation can copy CEs at runtime without growing a d
     estination buffer. | 
|  | 281      */ | 
|  | 282     static const int32_t MAX_EXPANSION_LENGTH = 31; | 
|  | 283     static const int32_t MAX_INDEX = 0x7ffff; | 
|  | 284 | 
|  | 285     /** | 
|  | 286      * Set if there is no match for the single (no-suffix) character itself. | 
|  | 287      * This is only possible if there is a prefix. | 
|  | 288      * In this case, discontiguous contraction matching cannot add combining mar
     ks | 
|  | 289      * starting from an empty suffix. | 
|  | 290      * The default CE32 is used anyway if there is no suffix match. | 
|  | 291      */ | 
|  | 292     static const uint32_t CONTRACT_SINGLE_CP_NO_MATCH = 0x100; | 
|  | 293     /** Set if the first character of every contraction suffix has lccc!=0. */ | 
|  | 294     static const uint32_t CONTRACT_NEXT_CCC = 0x200; | 
|  | 295     /** Set if any contraction suffix ends with lccc!=0. */ | 
|  | 296     static const uint32_t CONTRACT_TRAILING_CCC = 0x400; | 
|  | 297 | 
|  | 298     /** For HANGUL_TAG: None of its Jamo CE32s isSpecialCE32(). */ | 
|  | 299     static const uint32_t HANGUL_NO_SPECIAL_JAMO = 0x100; | 
|  | 300 | 
|  | 301     static const uint32_t LEAD_ALL_UNASSIGNED = 0; | 
|  | 302     static const uint32_t LEAD_ALL_FALLBACK = 0x100; | 
|  | 303     static const uint32_t LEAD_MIXED = 0x200; | 
|  | 304     static const uint32_t LEAD_TYPE_MASK = 0x300; | 
|  | 305 | 
|  | 306     static uint32_t makeLongPrimaryCE32(uint32_t p) { return p | LONG_PRIMARY_CE
     32_LOW_BYTE; } | 
|  | 307 | 
|  | 308     /** Turns the long-primary CE32 into a primary weight pppppp00. */ | 
|  | 309     static inline uint32_t primaryFromLongPrimaryCE32(uint32_t ce32) { | 
|  | 310         return ce32 & 0xffffff00; | 
|  | 311     } | 
|  | 312     static inline int64_t ceFromLongPrimaryCE32(uint32_t ce32) { | 
|  | 313         return ((int64_t)(ce32 & 0xffffff00) << 32) | COMMON_SEC_AND_TER_CE; | 
|  | 314     } | 
|  | 315 | 
|  | 316     static uint32_t makeLongSecondaryCE32(uint32_t lower32) { | 
|  | 317         return lower32 | SPECIAL_CE32_LOW_BYTE | LONG_SECONDARY_TAG; | 
|  | 318     } | 
|  | 319     static inline int64_t ceFromLongSecondaryCE32(uint32_t ce32) { | 
|  | 320         return ce32 & 0xffffff00; | 
|  | 321     } | 
|  | 322 | 
|  | 323     /** Makes a special CE32 with tag, index and length. */ | 
|  | 324     static uint32_t makeCE32FromTagIndexAndLength(int32_t tag, int32_t index, in
     t32_t length) { | 
|  | 325         return (index << 13) | (length << 8) | SPECIAL_CE32_LOW_BYTE | tag; | 
|  | 326     } | 
|  | 327     /** Makes a special CE32 with only tag and index. */ | 
|  | 328     static uint32_t makeCE32FromTagAndIndex(int32_t tag, int32_t index) { | 
|  | 329         return (index << 13) | SPECIAL_CE32_LOW_BYTE | tag; | 
|  | 330     } | 
|  | 331 | 
|  | 332     static inline UBool isSpecialCE32(uint32_t ce32) { | 
|  | 333         return (ce32 & 0xff) >= SPECIAL_CE32_LOW_BYTE; | 
|  | 334     } | 
|  | 335 | 
|  | 336     static inline int32_t tagFromCE32(uint32_t ce32) { | 
|  | 337         return (int32_t)(ce32 & 0xf); | 
|  | 338     } | 
|  | 339 | 
|  | 340     static inline UBool hasCE32Tag(uint32_t ce32, int32_t tag) { | 
|  | 341         return isSpecialCE32(ce32) && tagFromCE32(ce32) == tag; | 
|  | 342     } | 
|  | 343 | 
|  | 344     static inline UBool isLongPrimaryCE32(uint32_t ce32) { | 
|  | 345         return hasCE32Tag(ce32, LONG_PRIMARY_TAG); | 
|  | 346     } | 
|  | 347 | 
|  | 348     static UBool isSimpleOrLongCE32(uint32_t ce32) { | 
|  | 349         return !isSpecialCE32(ce32) || | 
|  | 350                 tagFromCE32(ce32) == LONG_PRIMARY_TAG || | 
|  | 351                 tagFromCE32(ce32) == LONG_SECONDARY_TAG; | 
|  | 352     } | 
|  | 353 | 
|  | 354     /** | 
|  | 355      * @return TRUE if the ce32 yields one or more CEs without further data look
     ups | 
|  | 356      */ | 
|  | 357     static UBool isSelfContainedCE32(uint32_t ce32) { | 
|  | 358         return !isSpecialCE32(ce32) || | 
|  | 359                 tagFromCE32(ce32) == LONG_PRIMARY_TAG || | 
|  | 360                 tagFromCE32(ce32) == LONG_SECONDARY_TAG || | 
|  | 361                 tagFromCE32(ce32) == LATIN_EXPANSION_TAG; | 
|  | 362     } | 
|  | 363 | 
|  | 364     static inline UBool isPrefixCE32(uint32_t ce32) { | 
|  | 365         return hasCE32Tag(ce32, PREFIX_TAG); | 
|  | 366     } | 
|  | 367 | 
|  | 368     static inline UBool isContractionCE32(uint32_t ce32) { | 
|  | 369         return hasCE32Tag(ce32, CONTRACTION_TAG); | 
|  | 370     } | 
|  | 371 | 
|  | 372     static inline UBool ce32HasContext(uint32_t ce32) { | 
|  | 373         return isSpecialCE32(ce32) && | 
|  | 374                 (tagFromCE32(ce32) == PREFIX_TAG || | 
|  | 375                 tagFromCE32(ce32) == CONTRACTION_TAG); | 
|  | 376     } | 
|  | 377 | 
|  | 378     /** | 
|  | 379      * Get the first of the two Latin-expansion CEs encoded in ce32. | 
|  | 380      * @see LATIN_EXPANSION_TAG | 
|  | 381      */ | 
|  | 382     static inline int64_t latinCE0FromCE32(uint32_t ce32) { | 
|  | 383         return ((int64_t)(ce32 & 0xff000000) << 32) | COMMON_SECONDARY_CE | ((ce
     32 & 0xff0000) >> 8); | 
|  | 384     } | 
|  | 385 | 
|  | 386     /** | 
|  | 387      * Get the second of the two Latin-expansion CEs encoded in ce32. | 
|  | 388      * @see LATIN_EXPANSION_TAG | 
|  | 389      */ | 
|  | 390     static inline int64_t latinCE1FromCE32(uint32_t ce32) { | 
|  | 391         return ((ce32 & 0xff00) << 16) | COMMON_TERTIARY_CE; | 
|  | 392     } | 
|  | 393 | 
|  | 394     /** | 
|  | 395      * Returns the data index from a special CE32. | 
|  | 396      */ | 
|  | 397     static inline int32_t indexFromCE32(uint32_t ce32) { | 
|  | 398         return (int32_t)(ce32 >> 13); | 
|  | 399     } | 
|  | 400 | 
|  | 401     /** | 
|  | 402      * Returns the data length from a ce32. | 
|  | 403      */ | 
|  | 404     static inline int32_t lengthFromCE32(uint32_t ce32) { | 
|  | 405         return (ce32 >> 8) & 31; | 
|  | 406     } | 
|  | 407 | 
|  | 408     /** | 
|  | 409      * Returns the digit value from a DIGIT_TAG ce32. | 
|  | 410      */ | 
|  | 411     static inline char digitFromCE32(uint32_t ce32) { | 
|  | 412         return (char)((ce32 >> 8) & 0xf); | 
|  | 413     } | 
|  | 414 | 
|  | 415     /** Returns a 64-bit CE from a simple CE32 (not special). */ | 
|  | 416     static inline int64_t ceFromSimpleCE32(uint32_t ce32) { | 
|  | 417         // normal form ppppsstt -> pppp0000ss00tt00 | 
|  | 418         // assert (ce32 & 0xff) < SPECIAL_CE32_LOW_BYTE | 
|  | 419         return ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | 
     ((ce32 & 0xff) << 8); | 
|  | 420     } | 
|  | 421 | 
|  | 422     /** Returns a 64-bit CE from a simple/long-primary/long-secondary CE32. */ | 
|  | 423     static inline int64_t ceFromCE32(uint32_t ce32) { | 
|  | 424         uint32_t tertiary = ce32 & 0xff; | 
|  | 425         if(tertiary < SPECIAL_CE32_LOW_BYTE) { | 
|  | 426             // normal form ppppsstt -> pppp0000ss00tt00 | 
|  | 427             return ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16
     ) | (tertiary << 8); | 
|  | 428         } else { | 
|  | 429             ce32 -= tertiary; | 
|  | 430             if((tertiary & 0xf) == LONG_PRIMARY_TAG) { | 
|  | 431                 // long-primary form ppppppC1 -> pppppp00050000500 | 
|  | 432                 return ((int64_t)ce32 << 32) | COMMON_SEC_AND_TER_CE; | 
|  | 433             } else { | 
|  | 434                 // long-secondary form ssssttC2 -> 00000000sssstt00 | 
|  | 435                 // assert (tertiary & 0xf) == LONG_SECONDARY_TAG | 
|  | 436                 return ce32; | 
|  | 437             } | 
|  | 438         } | 
|  | 439     } | 
|  | 440 | 
|  | 441     /** Creates a CE from a primary weight. */ | 
|  | 442     static inline int64_t makeCE(uint32_t p) { | 
|  | 443         return ((int64_t)p << 32) | COMMON_SEC_AND_TER_CE; | 
|  | 444     } | 
|  | 445     /** | 
|  | 446      * Creates a CE from a primary weight, | 
|  | 447      * 16-bit secondary/tertiary weights, and a 2-bit quaternary. | 
|  | 448      */ | 
|  | 449     static inline int64_t makeCE(uint32_t p, uint32_t s, uint32_t t, uint32_t q)
      { | 
|  | 450         return ((int64_t)p << 32) | (s << 16) | t | (q << 6); | 
|  | 451     } | 
|  | 452 | 
|  | 453     /** | 
|  | 454      * Increments a 2-byte primary by a code point offset. | 
|  | 455      */ | 
|  | 456     static uint32_t incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isComp
     ressible, | 
|  | 457                                               int32_t offset); | 
|  | 458 | 
|  | 459     /** | 
|  | 460      * Increments a 3-byte primary by a code point offset. | 
|  | 461      */ | 
|  | 462     static uint32_t incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCo
     mpressible, | 
|  | 463                                                 int32_t offset); | 
|  | 464 | 
|  | 465     /** | 
|  | 466      * Decrements a 2-byte primary by one range step (1..0x7f). | 
|  | 467      */ | 
|  | 468     static uint32_t decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCom
     pressible, int32_t step); | 
|  | 469 | 
|  | 470     /** | 
|  | 471      * Decrements a 3-byte primary by one range step (1..0x7f). | 
|  | 472      */ | 
|  | 473     static uint32_t decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isC
     ompressible, int32_t step); | 
|  | 474 | 
|  | 475     /** | 
|  | 476      * Computes a 3-byte primary for c's OFFSET_TAG data "CE". | 
|  | 477      */ | 
|  | 478     static uint32_t getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE); | 
|  | 479 | 
|  | 480     /** | 
|  | 481      * Returns the unassigned-character implicit primary weight for any valid co
     de point c. | 
|  | 482      */ | 
|  | 483     static uint32_t unassignedPrimaryFromCodePoint(UChar32 c); | 
|  | 484 | 
|  | 485     static inline int64_t unassignedCEFromCodePoint(UChar32 c) { | 
|  | 486         return makeCE(unassignedPrimaryFromCodePoint(c)); | 
|  | 487     } | 
|  | 488 | 
|  | 489     static inline uint32_t reorder(const uint8_t reorderTable[256], uint32_t pri
     mary) { | 
|  | 490         return ((uint32_t)reorderTable[primary >> 24] << 24) | (primary & 0xffff
     ff); | 
|  | 491     } | 
|  | 492 | 
|  | 493 private: | 
|  | 494     Collation();  // No instantiation. | 
|  | 495 }; | 
|  | 496 | 
|  | 497 U_NAMESPACE_END | 
|  | 498 | 
|  | 499 #endif  // !UCONFIG_NO_COLLATION | 
|  | 500 #endif  // __COLLATION_H__ | 
| OLD | NEW | 
|---|