OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * Copyright (C) 2012-2014, International Business Machines | |
4 * Corporation and others. All Rights Reserved. | |
5 ******************************************************************************* | |
6 * collationbasedatabuilder.h | |
7 * | |
8 * created on: 2012aug11 | |
9 * created by: Markus W. Scherer | |
10 */ | |
11 | |
12 #ifndef __COLLATIONBASEDATABUILDER_H__ | |
13 #define __COLLATIONBASEDATABUILDER_H__ | |
14 | |
15 #include "unicode/utypes.h" | |
16 | |
17 #if !UCONFIG_NO_COLLATION | |
18 | |
19 #include "unicode/uniset.h" | |
20 #include "unicode/unistr.h" | |
21 #include "collation.h" | |
22 #include "collationdata.h" | |
23 #include "collationdatabuilder.h" | |
24 #include "normalizer2impl.h" | |
25 #include "utrie2.h" | |
26 #include "uvectr32.h" | |
27 #include "uvectr64.h" | |
28 #include "uvector.h" | |
29 | |
30 U_NAMESPACE_BEGIN | |
31 | |
32 /** | |
33 * Low-level base CollationData builder. | |
34 */ | |
35 class U_I18N_API CollationBaseDataBuilder : public CollationDataBuilder { | |
36 public: | |
37 CollationBaseDataBuilder(UErrorCode &errorCode); | |
38 | |
39 virtual ~CollationBaseDataBuilder(); | |
40 | |
41 void init(UErrorCode &errorCode); | |
42 | |
43 /** | |
44 * Sets the Han ranges as ranges of offset CE32s. | |
45 * Note: Unihan extension A sorts after the other BMP ranges. | |
46 * See http://www.unicode.org/reports/tr10/#Implicit_Weights | |
47 * | |
48 * @param ranges array of ranges of [:Unified_Ideograph:] in collation order
, | |
49 * as (start, end) code point pairs | |
50 * @param length number of code points (not pairs) | |
51 * @param errorCode in/out error code | |
52 */ | |
53 void initHanRanges(const UChar32 ranges[], int32_t length, UErrorCode &error
Code); | |
54 | |
55 void setNumericPrimary(uint32_t np) { numericPrimary = np; } | |
56 | |
57 virtual UBool isCompressibleLeadByte(uint32_t b) const; | |
58 | |
59 void setCompressibleLeadByte(uint32_t b); | |
60 | |
61 static int32_t diffTwoBytePrimaries(uint32_t p1, uint32_t p2, UBool isCompre
ssible); | |
62 static int32_t diffThreeBytePrimaries(uint32_t p1, uint32_t p2, UBool isComp
ressible); | |
63 | |
64 virtual uint32_t encodeCEs(const int64_t ces[], int32_t cesLength, UErrorCod
e &errorCode); | |
65 | |
66 void addRootElements(const int64_t ces[], int32_t cesLength, UErrorCode &err
orCode); | |
67 void addRootElement(int64_t ce, UErrorCode &errorCode); | |
68 | |
69 void addReorderingGroup(uint32_t firstByte, uint32_t lastByte, | |
70 const UnicodeString &groupScripts, | |
71 UErrorCode &errorCode); | |
72 | |
73 virtual void build(CollationData &data, UErrorCode &errorCode); | |
74 | |
75 void buildRootElementsTable(UVector32 &table, UErrorCode &errorCode); | |
76 | |
77 private: | |
78 int32_t writeRootElementsRange( | |
79 uint32_t prevPrimary, uint32_t p, int32_t i, | |
80 UVector32 &table, UErrorCode &errorCode); | |
81 | |
82 // Flags for which primary-weight lead bytes are compressible. | |
83 UBool compressibleBytes[256]; | |
84 uint32_t numericPrimary; | |
85 uint32_t firstHanPrimary; | |
86 uint32_t lastHanPrimary; | |
87 int32_t hanStep; | |
88 UVector64 rootElements; | |
89 UnicodeString scripts; | |
90 }; | |
91 | |
92 U_NAMESPACE_END | |
93 | |
94 #endif // !UCONFIG_NO_COLLATION | |
95 #endif // __COLLATIONBASEDATABUILDER_H__ | |
OLD | NEW |