OLD | NEW |
| (Empty) |
1 /* | |
2 ******************************************************************************* | |
3 * | |
4 * Copyright (C) 2001-2011, International Business Machines | |
5 * Corporation and others. All Rights Reserved. | |
6 * | |
7 ******************************************************************************* | |
8 * file name: ucol_tok.h | |
9 * encoding: US-ASCII | |
10 * tab size: 8 (not used) | |
11 * indentation:4 | |
12 * | |
13 * created 02/22/2001 | |
14 * created by: Vladimir Weinstein | |
15 * | |
16 * This module reads a tailoring rule string and produces a list of | |
17 * tokens that will be turned into collation elements | |
18 * | |
19 */ | |
20 | |
21 #ifndef UCOL_TOKENS_H | |
22 #define UCOL_TOKENS_H | |
23 | |
24 #include "unicode/utypes.h" | |
25 #include "unicode/uset.h" | |
26 | |
27 #if !UCONFIG_NO_COLLATION | |
28 | |
29 #include "ucol_imp.h" | |
30 #include "uhash.h" | |
31 #include "unicode/parseerr.h" | |
32 | |
33 #define UCOL_TOK_UNSET 0xFFFFFFFF | |
34 #define UCOL_TOK_RESET 0xDEADBEEF | |
35 | |
36 #define UCOL_TOK_POLARITY_NEGATIVE 0 | |
37 #define UCOL_TOK_POLARITY_POSITIVE 1 | |
38 | |
39 #define UCOL_TOK_TOP 0x04 | |
40 #define UCOL_TOK_VARIABLE_TOP 0x08 | |
41 #define UCOL_TOK_BEFORE 0x03 | |
42 #define UCOL_TOK_SUCCESS 0x10 | |
43 | |
44 /* this is space for the extra strings that need to be unquoted */ | |
45 /* during the parsing of the rules */ | |
46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 | |
47 typedef struct UColToken UColToken; | |
48 | |
49 typedef struct { | |
50 UColToken* first; | |
51 UColToken* last; | |
52 UColToken* reset; | |
53 UBool indirect; | |
54 uint32_t baseCE; | |
55 uint32_t baseContCE; | |
56 uint32_t nextCE; | |
57 uint32_t nextContCE; | |
58 uint32_t previousCE; | |
59 uint32_t previousContCE; | |
60 int32_t pos[UCOL_STRENGTH_LIMIT]; | |
61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; | |
62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; | |
63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; | |
64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
66 } UColTokListHeader; | |
67 | |
68 struct UColToken { | |
69 UChar debugSource; | |
70 UChar debugExpansion; | |
71 UChar debugPrefix; | |
72 uint32_t CEs[128]; | |
73 uint32_t noOfCEs; | |
74 uint32_t expCEs[128]; | |
75 uint32_t noOfExpCEs; | |
76 uint32_t source; | |
77 uint32_t expansion; | |
78 uint32_t prefix; | |
79 uint32_t strength; | |
80 uint32_t toInsert; | |
81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ | |
82 UColTokListHeader *listHeader; | |
83 UColToken* previous; | |
84 UColToken* next; | |
85 UChar **rulesToParseHdl; | |
86 uint16_t flags; | |
87 }; | |
88 | |
89 /* | |
90 * This is a token that has been parsed | |
91 * but not yet processed. Used to reduce | |
92 * the number of arguments in the parser | |
93 */ | |
94 typedef struct { | |
95 uint32_t strength; | |
96 uint32_t charsOffset; | |
97 uint32_t charsLen; | |
98 uint32_t extensionOffset; | |
99 uint32_t extensionLen; | |
100 uint32_t prefixOffset; | |
101 uint32_t prefixLen; | |
102 uint16_t flags; | |
103 uint16_t indirectIndex; | |
104 } UColParsedToken; | |
105 | |
106 | |
107 typedef struct { | |
108 UColParsedToken parsedToken; | |
109 UChar *source; | |
110 UChar *end; | |
111 const UChar *current; | |
112 UChar *sourceCurrent; | |
113 UChar *extraCurrent; | |
114 UChar *extraEnd; | |
115 const InverseUCATableHeader *invUCA; | |
116 const UCollator *UCA; | |
117 UHashtable *tailored; | |
118 UColOptionSet *opts; | |
119 uint32_t resultLen; | |
120 uint32_t listCapacity; | |
121 UColTokListHeader *lh; | |
122 UColToken *varTop; | |
123 USet *copySet; | |
124 USet *removeSet; | |
125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class tabl
e. */ | |
126 | |
127 UChar32 previousCp; /* Previous code point. */ | |
128 /* For processing starred lists. */ | |
129 UBool isStarred; /* Are we processing a starred token? */ | |
130 UBool savedIsStarred; | |
131 uint32_t currentStarredCharIndex; /* Index of the current charrecter in the s
tarred expression. */ | |
132 uint32_t lastStarredCharIndex; /* Index to the last character in the starre
d expression. */ | |
133 | |
134 /* For processing ranges. */ | |
135 UBool inRange; /* Are we in a range? */ | |
136 UChar32 currentRangeCp; /* Current code point in the range. */ | |
137 UChar32 lastRangeCp; /* The last code point in the range. */ | |
138 | |
139 /* reorder codes for collation reordering */ | |
140 int32_t* reorderCodes; | |
141 int32_t reorderCodesLength; | |
142 | |
143 } UColTokenParser; | |
144 | |
145 typedef struct { | |
146 const UChar *subName; | |
147 int32_t subLen; | |
148 UColAttributeValue attrVal; | |
149 } ucolTokSuboption; | |
150 | |
151 typedef struct { | |
152 const UChar *optionName; | |
153 int32_t optionLen; | |
154 const ucolTokSuboption *subopts; | |
155 int32_t subSize; | |
156 UColAttribute attr; | |
157 } ucolTokOption; | |
158 | |
159 #define ucol_tok_isSpecialChar(ch) \ | |
160 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ | |
161 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ | |
162 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ | |
163 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ | |
164 (ch) == 0x007B)) | |
165 | |
166 | |
167 U_CFUNC | |
168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, | |
169 UParseError *parseError, | |
170 UErrorCode *status); | |
171 | |
172 U_CFUNC | |
173 void ucol_tok_initTokenList(UColTokenParser *src, | |
174 const UChar *rules, | |
175 const uint32_t rulesLength, | |
176 const UCollator *UCA, | |
177 GetCollationRulesFunction importFunc, | |
178 void* context, | |
179 UErrorCode *status); | |
180 | |
181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); | |
182 | |
183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, | |
184 UBool startOfRules, | |
185 UParseError *parseError, | |
186 UErrorCode *status); | |
187 | |
188 | |
189 U_CAPI const UChar * U_EXPORT2 | |
190 ucol_tok_getNextArgument(const UChar *start, const UChar *end, | |
191 UColAttribute *attrib, UColAttributeValue *value,
| |
192 UErrorCode *status); | |
193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, | |
194 uint32_t CE, uint32_t contCE, | |
195 uint32_t *nextCE, uint32_t *nextCont
CE, | |
196 uint32_t strength); | |
197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, | |
198 uint32_t CE, uint32_t contCE, | |
199 uint32_t *prevCE, uint32_t *prevCont
CE, | |
200 uint32_t strength); | |
201 | |
202 const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( | |
203 void* context, | |
204 const char* locale, | |
205 const char* type, | |
206 int32_t* pLength, | |
207 UErrorCode* status); | |
208 | |
209 #endif /* #if !UCONFIG_NO_COLLATION */ | |
210 | |
211 #endif | |
OLD | NEW |