OLD | NEW |
1 // | 1 // |
2 // regexcmp.h | 2 // regexcmp.h |
3 // | 3 // |
4 // Copyright (C) 2002-2012, International Business Machines Corporation and oth
ers. | 4 // Copyright (C) 2002-2014, International Business Machines Corporation and oth
ers. |
5 // All Rights Reserved. | 5 // All Rights Reserved. |
6 // | 6 // |
7 // This file contains declarations for the class RegexCompile | 7 // This file contains declarations for the class RegexCompile |
8 // | 8 // |
9 // This class is internal to the regular expression implementation. | 9 // This class is internal to the regular expression implementation. |
10 // For the public Regular Expression API, see the file "unicode/regex.h" | 10 // For the public Regular Expression API, see the file "unicode/regex.h" |
11 // | 11 // |
12 | 12 |
13 | 13 |
14 #ifndef RBBISCAN_H | 14 #ifndef RBBISCAN_H |
15 #define RBBISCAN_H | 15 #define RBBISCAN_H |
16 | 16 |
17 #include "unicode/utypes.h" | 17 #include "unicode/utypes.h" |
18 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 18 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
19 | 19 |
20 #include "unicode/uobject.h" | 20 #include "unicode/uobject.h" |
21 #include "unicode/uniset.h" | 21 #include "unicode/uniset.h" |
22 #include "unicode/parseerr.h" | 22 #include "unicode/parseerr.h" |
23 #include "uhash.h" | 23 #include "uhash.h" |
24 #include "uvector.h" | 24 #include "uvector.h" |
| 25 #include "uvectr32.h" |
25 | 26 |
26 | 27 |
27 | 28 |
28 U_NAMESPACE_BEGIN | 29 U_NAMESPACE_BEGIN |
29 | 30 |
30 | 31 |
31 //------------------------------------------------------------------------------
-- | 32 //------------------------------------------------------------------------------
-- |
32 // | 33 // |
33 // class RegexCompile Contains the regular expression compiler. | 34 // class RegexCompile Contains the regular expression compiler. |
34 // | 35 // |
35 //------------------------------------------------------------------------------
-- | 36 //------------------------------------------------------------------------------
-- |
36 struct RegexTableEl; | 37 struct RegexTableEl; |
37 class RegexPattern; | 38 class RegexPattern; |
38 | 39 |
39 | 40 |
40 class RegexCompile : public UMemory { | 41 class U_I18N_API RegexCompile : public UMemory { |
41 public: | 42 public: |
42 | 43 |
43 enum { | 44 enum { |
44 kStackSize = 100 // The size of the state stack for | 45 kStackSize = 100 // The size of the state stack for |
45 }; // pattern parsing. Corresponds roughly | 46 }; // pattern parsing. Corresponds roughly |
46 // to the depth of parentheses nesting | 47 // to the depth of parentheses nesting |
47 // that is allowed in the rules. | 48 // that is allowed in the rules. |
48 | 49 |
49 struct RegexPatternChar { | 50 struct RegexPatternChar { |
50 UChar32 fChar; | 51 UChar32 fChar; |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
96 // there is space to add a
n opcode there. | 97 // there is space to add a
n opcode there. |
97 void compileSet(UnicodeSet *theSet); // Generate the compiled pa
ttern for | 98 void compileSet(UnicodeSet *theSet); // Generate the compiled pa
ttern for |
98 // a reference to a Unico
deSet. | 99 // a reference to a Unico
deSet. |
99 void compileInterval(int32_t InitOp, // Generate the code for a
{min,max} quantifier. | 100 void compileInterval(int32_t InitOp, // Generate the code for a
{min,max} quantifier. |
100 int32_t LoopOp); | 101 int32_t LoopOp); |
101 UBool compileInlineInterval(); // Generate inline code for
a {min,max} quantifier | 102 UBool compileInlineInterval(); // Generate inline code for
a {min,max} quantifier |
102 void literalChar(UChar32 c); // Compile a literal char | 103 void literalChar(UChar32 c); // Compile a literal char |
103 void fixLiterals(UBool split=FALSE); // Generate code for pendin
g literal characters. | 104 void fixLiterals(UBool split=FALSE); // Generate code for pendin
g literal characters. |
104 void insertOp(int32_t where); // Open up a slot for a new
op in the | 105 void insertOp(int32_t where); // Open up a slot for a new
op in the |
105 // generated code at the
specified location. | 106 // generated code at the
specified location. |
106 void appendOp(int32_t op); // Append a new op to the c
ompiled pattern. | |
107 int32_t allocateData(int32_t size); // Allocate space in the ma
tcher data area. | |
108 // Return index of the ne
wly allocated data. | |
109 int32_t allocateStackData(int32_t size); // Allocate space in the ma
tch back-track stack frame. | |
110 // Return offset index in
the frame. | |
111 int32_t minMatchLength(int32_t start, | 107 int32_t minMatchLength(int32_t start, |
112 int32_t end); | 108 int32_t end); |
113 int32_t maxMatchLength(int32_t start, | 109 int32_t maxMatchLength(int32_t start, |
114 int32_t end); | 110 int32_t end); |
115 void matchStartType(); | 111 void matchStartType(); |
116 void stripNOPs(); | 112 void stripNOPs(); |
117 | 113 |
118 void setEval(int32_t op); | 114 void setEval(int32_t op); |
119 void setPushOp(int32_t op); | 115 void setPushOp(int32_t op); |
120 UChar32 scanNamedChar(); | 116 UChar32 scanNamedChar(); |
121 UnicodeSet *createSetForProperty(const UnicodeString &propName, UBool negate
d); | 117 UnicodeSet *createSetForProperty(const UnicodeString &propName, UBool negate
d); |
122 | 118 |
| 119 public: // Public for testing only. |
| 120 static void U_EXPORT2 findCaseInsensitiveStarters(UChar32 c, UnicodeSet *sta
rterChars); |
| 121 private: |
| 122 |
123 | 123 |
124 UErrorCode *fStatus; | 124 UErrorCode *fStatus; |
125 RegexPattern *fRXPat; | 125 RegexPattern *fRXPat; |
126 UParseError *fParseErr; | 126 UParseError *fParseErr; |
127 | 127 |
128 // | 128 // |
129 // Data associated with low level character scanning | 129 // Data associated with low level character scanning |
130 // | 130 // |
131 int64_t fScanIndex; // Index of current charact
er being processed | 131 int64_t fScanIndex; // Index of current charact
er being processed |
132 // in the rule input stri
ng. | 132 // in the rule input stri
ng. |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 // spot reserved for use
when a quantifier | 180 // spot reserved for use
when a quantifier |
181 // needs to add a SAVE at
the start of a (block) | 181 // needs to add a SAVE at
the start of a (block) |
182 // The negative value (-1
, -2,...) indicates | 182 // The negative value (-1
, -2,...) indicates |
183 // the kind of paren that
opened the frame. Some | 183 // the kind of paren that
opened the frame. Some |
184 // need special handling
on close. | 184 // need special handling
on close. |
185 | 185 |
186 | 186 |
187 int32_t fMatchOpenParen; // The position in the comp
iled pattern | 187 int32_t fMatchOpenParen; // The position in the comp
iled pattern |
188 // of the slot reserved f
or a state save | 188 // of the slot reserved f
or a state save |
189 // at the start of the mo
st recently processed | 189 // at the start of the mo
st recently processed |
190 // parenthesized block. U
pdated when processing | 190 // parenthesized block. |
191 // a close to the locatio
n for the corresponding open. | |
192 | |
193 int32_t fMatchCloseParen; // The position in the patt
ern of the first | 191 int32_t fMatchCloseParen; // The position in the patt
ern of the first |
194 // location after the mos
t recently processed | 192 // location after the mos
t recently processed |
195 // parenthesized block. | 193 // parenthesized block. |
196 | 194 |
197 int32_t fIntervalLow; // {lower, upper} interval
quantifier values. | 195 int32_t fIntervalLow; // {lower, upper} interval
quantifier values. |
198 int32_t fIntervalUpper; // Placed here temporarily,
when pattern is | 196 int32_t fIntervalUpper; // Placed here temporarily,
when pattern is |
199 // initially scanned. Ea
ch new interval | 197 // initially scanned. Ea
ch new interval |
200 // encountered overwrites
these values. | 198 // encountered overwrites
these values. |
201 // -1 for the upper inter
val value means none | 199 // -1 for the upper inter
val value means none |
202 // was specified (unlimit
ed occurences.) | 200 // was specified (unlimit
ed occurences.) |
(...skipping 23 matching lines...) Expand all Loading... |
226 setDifference2 = 3 << 16 | 4, // '--' set difference operator | 224 setDifference2 = 3 << 16 | 4, // '--' set difference operator |
227 setIntersection2 = 3 << 16 | 5, // '&&' set intersection operator | 225 setIntersection2 = 3 << 16 | 5, // '&&' set intersection operator |
228 setUnion = 4 << 16 | 6, // implicit union of adjacent items | 226 setUnion = 4 << 16 | 6, // implicit union of adjacent items |
229 setDifference1 = 4 << 16 | 7, // '-', single dash difference op, for co
mpatibility with old UnicodeSet. | 227 setDifference1 = 4 << 16 | 7, // '-', single dash difference op, for co
mpatibility with old UnicodeSet. |
230 setIntersection1 = 4 << 16 | 8 // '&', single amp intersection op, for c
ompatibility with old UnicodeSet. | 228 setIntersection1 = 4 << 16 | 8 // '&', single amp intersection op, for c
ompatibility with old UnicodeSet. |
231 }; | 229 }; |
232 | 230 |
233 U_NAMESPACE_END | 231 U_NAMESPACE_END |
234 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 232 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
235 #endif // RBBISCAN_H | 233 #endif // RBBISCAN_H |
OLD | NEW |