OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ******************************************************************************* |
| 3 * |
| 4 * Copyright (C) 2001-2010, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. |
| 6 * |
| 7 ******************************************************************************* |
| 8 * file name: ucol_tok.h |
| 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) |
| 11 * indentation:4 |
| 12 * |
| 13 * created 02/22/2001 |
| 14 * created by: Vladimir Weinstein |
| 15 * |
| 16 * This module reads a tailoring rule string and produces a list of |
| 17 * tokens that will be turned into collation elements |
| 18 * |
| 19 */ |
| 20 |
| 21 #ifndef UCOL_TOKENS_H |
| 22 #define UCOL_TOKENS_H |
| 23 |
| 24 #include "unicode/utypes.h" |
| 25 #include "unicode/uset.h" |
| 26 |
| 27 #if !UCONFIG_NO_COLLATION |
| 28 |
| 29 #include "ucol_imp.h" |
| 30 #include "uhash.h" |
| 31 #include "unicode/parseerr.h" |
| 32 |
| 33 #define UCOL_TOK_UNSET 0xFFFFFFFF |
| 34 #define UCOL_TOK_RESET 0xDEADBEEF |
| 35 |
| 36 #define UCOL_TOK_POLARITY_NEGATIVE 0 |
| 37 #define UCOL_TOK_POLARITY_POSITIVE 1 |
| 38 |
| 39 #define UCOL_TOK_TOP 0x04 |
| 40 #define UCOL_TOK_VARIABLE_TOP 0x08 |
| 41 #define UCOL_TOK_BEFORE 0x03 |
| 42 #define UCOL_TOK_SUCCESS 0x10 |
| 43 |
| 44 /* this is space for the extra strings that need to be unquoted */ |
| 45 /* during the parsing of the rules */ |
| 46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 |
| 47 typedef struct UColToken UColToken; |
| 48 |
| 49 typedef struct { |
| 50 UColToken* first; |
| 51 UColToken* last; |
| 52 UColToken* reset; |
| 53 UBool indirect; |
| 54 uint32_t baseCE; |
| 55 uint32_t baseContCE; |
| 56 uint32_t nextCE; |
| 57 uint32_t nextContCE; |
| 58 uint32_t previousCE; |
| 59 uint32_t previousContCE; |
| 60 int32_t pos[UCOL_STRENGTH_LIMIT]; |
| 61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; |
| 62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; |
| 63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; |
| 64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; |
| 65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; |
| 66 } UColTokListHeader; |
| 67 |
| 68 struct UColToken { |
| 69 UChar debugSource; |
| 70 UChar debugExpansion; |
| 71 UChar debugPrefix; |
| 72 uint32_t CEs[128]; |
| 73 uint32_t noOfCEs; |
| 74 uint32_t expCEs[128]; |
| 75 uint32_t noOfExpCEs; |
| 76 uint32_t source; |
| 77 uint32_t expansion; |
| 78 uint32_t prefix; |
| 79 uint32_t strength; |
| 80 uint32_t toInsert; |
| 81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ |
| 82 UColTokListHeader *listHeader; |
| 83 UColToken* previous; |
| 84 UColToken* next; |
| 85 UChar **rulesToParseHdl; |
| 86 uint16_t flags; |
| 87 }; |
| 88 |
| 89 /* |
| 90 * This is a token that has been parsed |
| 91 * but not yet processed. Used to reduce |
| 92 * the number of arguments in the parser |
| 93 */ |
| 94 typedef struct { |
| 95 uint32_t strength; |
| 96 uint32_t charsOffset; |
| 97 uint32_t charsLen; |
| 98 uint32_t extensionOffset; |
| 99 uint32_t extensionLen; |
| 100 uint32_t prefixOffset; |
| 101 uint32_t prefixLen; |
| 102 uint16_t flags; |
| 103 uint16_t indirectIndex; |
| 104 } UColParsedToken; |
| 105 |
| 106 |
| 107 typedef struct { |
| 108 UColParsedToken parsedToken; |
| 109 UChar *source; |
| 110 UChar *end; |
| 111 const UChar *current; |
| 112 UChar *sourceCurrent; |
| 113 UChar *extraCurrent; |
| 114 UChar *extraEnd; |
| 115 const InverseUCATableHeader *invUCA; |
| 116 const UCollator *UCA; |
| 117 UHashtable *tailored; |
| 118 UColOptionSet *opts; |
| 119 uint32_t resultLen; |
| 120 uint32_t listCapacity; |
| 121 UColTokListHeader *lh; |
| 122 UColToken *varTop; |
| 123 USet *copySet; |
| 124 USet *removeSet; |
| 125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class tabl
e. */ |
| 126 |
| 127 UChar32 previousCp; /* Previous code point. */ |
| 128 /* For processing starred lists. */ |
| 129 UBool isStarred; /* Are we processing a starred token? */ |
| 130 UBool savedIsStarred; |
| 131 uint32_t currentStarredCharIndex; /* Index of the current charrecter in the s
tarred expression. */ |
| 132 uint32_t lastStarredCharIndex; /* Index to the last character in the starre
d expression. */ |
| 133 |
| 134 /* For processing ranges. */ |
| 135 UBool inRange; /* Are we in a range? */ |
| 136 UChar32 currentRangeCp; /* Current code point in the range. */ |
| 137 UChar32 lastRangeCp; /* The last code point in the range. */ |
| 138 |
| 139 /* reorder codes for collation reordering */ |
| 140 int32_t* reorderCodes; |
| 141 int32_t reorderCodesLength; |
| 142 |
| 143 } UColTokenParser; |
| 144 |
| 145 typedef struct { |
| 146 const UChar *subName; |
| 147 int32_t subLen; |
| 148 UColAttributeValue attrVal; |
| 149 } ucolTokSuboption; |
| 150 |
| 151 typedef struct { |
| 152 const UChar *optionName; |
| 153 int32_t optionLen; |
| 154 const ucolTokSuboption *subopts; |
| 155 int32_t subSize; |
| 156 UColAttribute attr; |
| 157 } ucolTokOption; |
| 158 |
| 159 #define ucol_tok_isSpecialChar(ch) \ |
| 160 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ |
| 161 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ |
| 162 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ |
| 163 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ |
| 164 (ch) == 0x007B)) |
| 165 |
| 166 |
| 167 U_CFUNC |
| 168 uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, |
| 169 UParseError *parseError, |
| 170 UErrorCode *status); |
| 171 |
| 172 U_CFUNC |
| 173 void ucol_tok_initTokenList(UColTokenParser *src, |
| 174 const UChar *rules, |
| 175 const uint32_t rulesLength, |
| 176 const UCollator *UCA, |
| 177 GetCollationRulesFunction importFunc, |
| 178 void* context, |
| 179 UErrorCode *status); |
| 180 |
| 181 U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); |
| 182 |
| 183 U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, |
| 184 UBool startOfRules, |
| 185 UParseError *parseError, |
| 186 UErrorCode *status); |
| 187 |
| 188 |
| 189 U_CAPI const UChar * U_EXPORT2 |
| 190 ucol_tok_getNextArgument(const UChar *start, const UChar *end, |
| 191 UColAttribute *attrib, UColAttributeValue *value,
|
| 192 UErrorCode *status); |
| 193 U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, |
| 194 uint32_t CE, uint32_t contCE, |
| 195 uint32_t *nextCE, uint32_t *nextCont
CE, |
| 196 uint32_t strength); |
| 197 U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, |
| 198 uint32_t CE, uint32_t contCE, |
| 199 uint32_t *prevCE, uint32_t *prevCont
CE, |
| 200 uint32_t strength); |
| 201 |
| 202 U_CFUNC const UChar* ucol_tok_getRulesFromBundle( |
| 203 void* context, |
| 204 const char* locale, |
| 205 const char* type, |
| 206 int32_t* pLength, |
| 207 UErrorCode* status); |
| 208 |
| 209 #endif /* #if !UCONFIG_NO_COLLATION */ |
| 210 |
| 211 #endif |
OLD | NEW |