OLD | NEW |
1 // | 1 // |
2 // regexst.h | 2 // regexst.h |
3 // | 3 // |
4 // Copyright (C) 2004-2013, International Business Machines Corporation and oth
ers. | 4 // Copyright (C) 2004-2015, International Business Machines Corporation and oth
ers. |
5 // All Rights Reserved. | 5 // All Rights Reserved. |
6 // | 6 // |
7 // This file contains class RegexStaticSets | 7 // This file contains class RegexStaticSets |
8 // | 8 // |
9 // This class is internal to the regular expression implementation. | 9 // This class is internal to the regular expression implementation. |
10 // For the public Regular Expression API, see the file "unicode/regex.h" | 10 // For the public Regular Expression API, see the file "unicode/regex.h" |
11 // | 11 // |
12 // RegexStaticSets groups together the common UnicodeSets that are needed | 12 // RegexStaticSets groups together the common UnicodeSets that are needed |
13 // for compiling or executing RegularExpressions. This grouping simplifies | 13 // for compiling or executing RegularExpressions. This grouping simplifies |
14 // the thread safe lazy creation and sharing of these sets across | 14 // the thread safe lazy creation and sharing of these sets across |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 | 48 |
49 // "Rule Char" Characters are those with no special meaning, and therefore do no
t | 49 // "Rule Char" Characters are those with no special meaning, and therefore do no
t |
50 // need to be escaped to appear as literals in a regexp. Expressed | 50 // need to be escaped to appear as literals in a regexp. Expressed |
51 // as the inverse of those needing escaping -- [^\*\?\+\[\(\)\{\}\^\$\|\\\.] | 51 // as the inverse of those needing escaping -- [^\*\?\+\[\(\)\{\}\^\$\|\\\.] |
52 static const UChar gRuleSet_rule_char_pattern[] = { | 52 static const UChar gRuleSet_rule_char_pattern[] = { |
53 // [ ^ \ * \ ? \ + \ [ \ ( /
) | 53 // [ ^ \ * \ ? \ + \ [ \ ( /
) |
54 0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c
, 0x29, | 54 0x5b, 0x5e, 0x5c, 0x2a, 0x5c, 0x3f, 0x5c, 0x2b, 0x5c, 0x5b, 0x5c, 0x28, 0x5c
, 0x29, |
55 // \ { \ } \ ^ \ $ \ | \ \ \
. ] | 55 // \ { \ } \ ^ \ $ \ | \ \ \
. ] |
56 0x5c, 0x7b,0x5c, 0x7d, 0x5c, 0x5e, 0x5c, 0x24, 0x5c, 0x7c, 0x5c, 0x5c, 0x5c,
0x2e, 0x5d, 0}; | 56 0x5c, 0x7b,0x5c, 0x7d, 0x5c, 0x5e, 0x5c, 0x24, 0x5c, 0x7c, 0x5c, 0x5c, 0x5c,
0x2e, 0x5d, 0}; |
57 | 57 |
58 | |
59 static const UChar gRuleSet_digit_char_pattern[] = { | |
60 // [ 0 - 9 ] | |
61 0x5b, 0x30, 0x2d, 0x39, 0x5d, 0}; | |
62 | |
63 // | 58 // |
64 // Here are the backslash escape characters that ICU's unescape() function | 59 // Here are the backslash escape characters that ICU's unescape() function |
65 // will handle. | 60 // will handle. |
66 // | 61 // |
67 static const UChar gUnescapeCharPattern[] = { | 62 static const UChar gUnescapeCharPattern[] = { |
68 // [ a c e f n r t u U x ] | 63 // [ a c e f n r t u U x ] |
69 0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x78, 0x5d, 0}; | 64 0x5b, 0x61, 0x63, 0x65, 0x66, 0x6e, 0x72, 0x74, 0x75, 0x55, 0x78, 0x5d, 0}; |
70 | 65 |
71 | 66 |
72 // | 67 // |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
206 // UnicodeSets. | 201 // UnicodeSets. |
207 for (i=0; i<URX_LAST_SET; i++) { | 202 for (i=0; i<URX_LAST_SET; i++) { |
208 if (fPropSets[i]) { | 203 if (fPropSets[i]) { |
209 fPropSets[i]->compact(); | 204 fPropSets[i]->compact(); |
210 fPropSets8[i].init(fPropSets[i]); | 205 fPropSets8[i].init(fPropSets[i]); |
211 } | 206 } |
212 } | 207 } |
213 | 208 |
214 // Sets used while parsing rules, but not referenced from the parse state ta
ble | 209 // Sets used while parsing rules, but not referenced from the parse state ta
ble |
215 fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(UnicodeString(TRUE, gRuleSe
t_rule_char_pattern, -1), *status); | 210 fRuleSets[kRuleSet_rule_char-128] = UnicodeSet(UnicodeString(TRUE, gRuleSe
t_rule_char_pattern, -1), *status); |
216 fRuleSets[kRuleSet_digit_char-128] = UnicodeSet(UnicodeString(TRUE, gRuleSe
t_digit_char_pattern, -1), *status); | 211 fRuleSets[kRuleSet_digit_char-128].add((UChar)0x30, (UChar)0x39); // [0-9
] |
| 212 fRuleSets[kRuleSet_ascii_letter-128].add((UChar)0x41, (UChar)0x5A); // [A-Z
] |
| 213 fRuleSets[kRuleSet_ascii_letter-128].add((UChar)0x61, (UChar)0x7A); // [a-z
] |
217 fRuleDigitsAlias = &fRuleSets[kRuleSet_digit_char-128]; | 214 fRuleDigitsAlias = &fRuleSets[kRuleSet_digit_char-128]; |
218 for (i=0; i<(int32_t)(sizeof(fRuleSets)/sizeof(fRuleSets[0])); i++) { | 215 for (i=0; i<UPRV_LENGTHOF(fRuleSets); i++) { |
219 fRuleSets[i].compact(); | 216 fRuleSets[i].compact(); |
220 } | 217 } |
221 | 218 |
222 // Finally, initialize an empty string for utility purposes | 219 // Finally, initialize an empty string for utility purposes |
223 fEmptyText = utext_openUChars(NULL, NULL, 0, status); | 220 fEmptyText = utext_openUChars(NULL, NULL, 0, status); |
224 | 221 |
225 return; // If we reached this point, everything is fine so just exit | 222 if (U_SUCCESS(*status)) { |
| 223 return; |
| 224 } |
226 | 225 |
227 ExitConstrDeleteAll: // Remove fPropSets and fRuleSets and return error | 226 ExitConstrDeleteAll: // Remove fPropSets and fRuleSets and return error |
228 for (i=0; i<URX_LAST_SET; i++) { | 227 for (i=0; i<URX_LAST_SET; i++) { |
229 delete fPropSets[i]; | 228 delete fPropSets[i]; |
230 fPropSets[i] = NULL; | 229 fPropSets[i] = NULL; |
231 } | 230 } |
232 *status = U_MEMORY_ALLOCATION_ERROR; | 231 if (U_SUCCESS(*status)) { |
| 232 *status = U_MEMORY_ALLOCATION_ERROR; |
| 233 } |
233 } | 234 } |
234 | 235 |
235 | 236 |
236 RegexStaticSets::~RegexStaticSets() { | 237 RegexStaticSets::~RegexStaticSets() { |
237 int32_t i; | 238 int32_t i; |
238 | 239 |
239 for (i=0; i<URX_LAST_SET; i++) { | 240 for (i=0; i<URX_LAST_SET; i++) { |
240 delete fPropSets[i]; | 241 delete fPropSets[i]; |
241 fPropSets[i] = NULL; | 242 fPropSets[i] = NULL; |
242 } | 243 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
279 } | 280 } |
280 } | 281 } |
281 U_CDECL_END | 282 U_CDECL_END |
282 | 283 |
283 void RegexStaticSets::initGlobals(UErrorCode *status) { | 284 void RegexStaticSets::initGlobals(UErrorCode *status) { |
284 umtx_initOnce(gStaticSetsInitOnce, &initStaticSets, *status); | 285 umtx_initOnce(gStaticSetsInitOnce, &initStaticSets, *status); |
285 } | 286 } |
286 | 287 |
287 U_NAMESPACE_END | 288 U_NAMESPACE_END |
288 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 289 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
OLD | NEW |