OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2012-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * collationbasedatabuilder.h |
| 7 * |
| 8 * created on: 2012aug11 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #ifndef __COLLATIONBASEDATABUILDER_H__ |
| 13 #define __COLLATIONBASEDATABUILDER_H__ |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 |
| 17 #if !UCONFIG_NO_COLLATION |
| 18 |
| 19 #include "unicode/uniset.h" |
| 20 #include "unicode/unistr.h" |
| 21 #include "collation.h" |
| 22 #include "collationdata.h" |
| 23 #include "collationdatabuilder.h" |
| 24 #include "normalizer2impl.h" |
| 25 #include "utrie2.h" |
| 26 #include "uvectr32.h" |
| 27 #include "uvectr64.h" |
| 28 #include "uvector.h" |
| 29 |
| 30 U_NAMESPACE_BEGIN |
| 31 |
| 32 /** |
| 33 * Low-level base CollationData builder. |
| 34 */ |
| 35 class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { |
| 36 public: |
| 37 CollationBaseDataBuilder(UErrorCode &errorCode); |
| 38 |
| 39 virtual ~CollationBaseDataBuilder(); |
| 40 |
| 41 void init(UErrorCode &errorCode); |
| 42 |
| 43 /** |
| 44 * Sets the Han ranges as ranges of offset CE32s. |
| 45 * Note: Unihan extension A sorts after the other BMP ranges. |
| 46 * See http://www.unicode.org/reports/tr10/#Implicit_Weights |
| 47 * |
| 48 * @param ranges array of ranges of [:Unified_Ideograph:] in collation order
, |
| 49 * as (start, end) code point pairs |
| 50 * @param length number of code points (not pairs) |
| 51 * @param errorCode in/out error code |
| 52 */ |
| 53 void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &error
Code); |
| 54 |
| 55 void setNumericPrimary(uint32_t np) { numericPrimary = np; } |
| 56 |
| 57 virtual UBool isCompressibleLeadByte(uint32_t b) const; |
| 58 |
| 59 void setCompressibleLeadByte(uint32_t b); |
| 60 |
| 61 static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompre
ssible); |
| 62 static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isComp
ressible); |
| 63 |
| 64 virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCod
e &errorCode); |
| 65 |
| 66 void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &err
orCode); |
| 67 void addRootElement(int64_t ce, UErrorCode &errorCode); |
| 68 |
| 69 void addReorderingGroup(uint32_t firstByte, uint32_t lastByte, |
| 70 const UnicodeString &groupScripts, |
| 71 UErrorCode &errorCode); |
| 72 |
| 73 virtual void build(CollationData &data, UErrorCode &errorCode); |
| 74 |
| 75 void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); |
| 76 |
| 77 private: |
| 78 int32_t writeRootElementsRange( |
| 79 uint32_t prevPrimary, uint32_t p, int32_t i, |
| 80 UVector32 &table, UErrorCode &errorCode); |
| 81 |
| 82 // Flags for which primary-weight lead bytes are compressible. |
| 83 UBool compressibleBytes[256]; |
| 84 uint32_t numericPrimary; |
| 85 uint32_t firstHanPrimary; |
| 86 uint32_t lastHanPrimary; |
| 87 int32_t hanStep; |
| 88 UVector64 rootElements; |
| 89 UnicodeString scripts; |
| 90 }; |
| 91 |
| 92 U_NAMESPACE_END |
| 93 |
| 94 #endif // !UCONFIG_NO_COLLATION |
| 95 #endif // __COLLATIONBASEDATABUILDER_H__ |
OLD | NEW |