OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * Copyright (C) 2013-2014, International Business Machines |
| 4 * Corporation and others. All Rights Reserved. |
| 5 ******************************************************************************* |
| 6 * collationsets.h |
| 7 * |
| 8 * created on: 2013feb09 |
| 9 * created by: Markus W. Scherer |
| 10 */ |
| 11 |
| 12 #ifndef __COLLATIONSETS_H__ |
| 13 #define __COLLATIONSETS_H__ |
| 14 |
| 15 #include "unicode/utypes.h" |
| 16 |
| 17 #if !UCONFIG_NO_COLLATION |
| 18 |
| 19 #include "unicode/uniset.h" |
| 20 #include "collation.h" |
| 21 |
| 22 U_NAMESPACE_BEGIN |
| 23 |
| 24 struct CollationData; |
| 25 |
| 26 /** |
| 27 * Finds the set of characters and strings that sort differently in the tailorin
g |
| 28 * from the base data. |
| 29 * |
| 30 * Every mapping in the tailoring needs to be compared to the base, |
| 31 * because some mappings are copied for optimization, and |
| 32 * all contractions for a character are copied if any contractions for that char
acter |
| 33 * are added, modified or removed. |
| 34 * |
| 35 * It might be simpler to re-parse the rule string, but: |
| 36 * - That would require duplicating some of the from-rules builder code. |
| 37 * - That would make the runtime code depend on the builder. |
| 38 * - That would only work if we have the rule string, and we allow users to |
| 39 * omit the rule string from data files. |
| 40 */ |
| 41 class TailoredSet : public UMemory { |
| 42 public: |
| 43 TailoredSet(UnicodeSet *t) |
| 44 : data(NULL), baseData(NULL), |
| 45 tailored(t), |
| 46 suffix(NULL), |
| 47 errorCode(U_ZERO_ERROR) {} |
| 48 |
| 49 void forData(const CollationData *d, UErrorCode &errorCode); |
| 50 |
| 51 /** |
| 52 * @return U_SUCCESS(errorCode) in C++, void in Java |
| 53 * @internal only public for access by callback |
| 54 */ |
| 55 UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32); |
| 56 |
| 57 private: |
| 58 void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32); |
| 59 void comparePrefixes(UChar32 c, const UChar *p, const UChar *q); |
| 60 void compareContractions(UChar32 c, const UChar *p, const UChar *q); |
| 61 |
| 62 void addPrefixes(const CollationData *d, UChar32 c, const UChar *p); |
| 63 void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c,
uint32_t ce32); |
| 64 void addContractions(UChar32 c, const UChar *p); |
| 65 void addSuffix(UChar32 c, const UnicodeString &sfx); |
| 66 void add(UChar32 c); |
| 67 |
| 68 /** Prefixes are reversed in the data structure. */ |
| 69 void setPrefix(const UnicodeString &pfx) { |
| 70 unreversedPrefix = pfx; |
| 71 unreversedPrefix.reverse(); |
| 72 } |
| 73 void resetPrefix() { |
| 74 unreversedPrefix.remove(); |
| 75 } |
| 76 |
| 77 const CollationData *data; |
| 78 const CollationData *baseData; |
| 79 UnicodeSet *tailored; |
| 80 UnicodeString unreversedPrefix; |
| 81 const UnicodeString *suffix; |
| 82 UErrorCode errorCode; |
| 83 }; |
| 84 |
| 85 class ContractionsAndExpansions : public UMemory { |
| 86 public: |
| 87 class CESink : public UMemory { |
| 88 public: |
| 89 virtual ~CESink(); |
| 90 virtual void handleCE(int64_t ce) = 0; |
| 91 virtual void handleExpansion(const int64_t ces[], int32_t length) = 0; |
| 92 }; |
| 93 |
| 94 ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool
prefixes) |
| 95 : data(NULL), |
| 96 contractions(con), expansions(exp), |
| 97 sink(s), |
| 98 addPrefixes(prefixes), |
| 99 checkTailored(0), |
| 100 suffix(NULL), |
| 101 errorCode(U_ZERO_ERROR) {} |
| 102 |
| 103 void forData(const CollationData *d, UErrorCode &errorCode); |
| 104 void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec); |
| 105 |
| 106 // all following: @internal, only public for access by callback |
| 107 |
| 108 void handleCE32(UChar32 start, UChar32 end, uint32_t ce32); |
| 109 |
| 110 void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32); |
| 111 void handleContractions(UChar32 start, UChar32 end, uint32_t ce32); |
| 112 |
| 113 void addExpansions(UChar32 start, UChar32 end); |
| 114 void addStrings(UChar32 start, UChar32 end, UnicodeSet *set); |
| 115 |
| 116 /** Prefixes are reversed in the data structure. */ |
| 117 void setPrefix(const UnicodeString &pfx) { |
| 118 unreversedPrefix = pfx; |
| 119 unreversedPrefix.reverse(); |
| 120 } |
| 121 void resetPrefix() { |
| 122 unreversedPrefix.remove(); |
| 123 } |
| 124 |
| 125 const CollationData *data; |
| 126 UnicodeSet *contractions; |
| 127 UnicodeSet *expansions; |
| 128 CESink *sink; |
| 129 UBool addPrefixes; |
| 130 int8_t checkTailored; // -1: collected tailored +1: exclude tailored |
| 131 UnicodeSet tailored; |
| 132 UnicodeSet ranges; |
| 133 UnicodeString unreversedPrefix; |
| 134 const UnicodeString *suffix; |
| 135 int64_t ces[Collation::MAX_EXPANSION_LENGTH]; |
| 136 UErrorCode errorCode; |
| 137 }; |
| 138 |
| 139 U_NAMESPACE_END |
| 140 |
| 141 #endif // !UCONFIG_NO_COLLATION |
| 142 #endif // __COLLATIONSETS_H__ |
OLD | NEW |