| OLD | NEW |
| 1 // | 1 // |
| 2 // regexcmp.h | 2 // regexcmp.h |
| 3 // | 3 // |
| 4 // Copyright (C) 2002-2012, International Business Machines Corporation and oth
ers. | 4 // Copyright (C) 2002-2014, International Business Machines Corporation and oth
ers. |
| 5 // All Rights Reserved. | 5 // All Rights Reserved. |
| 6 // | 6 // |
| 7 // This file contains declarations for the class RegexCompile | 7 // This file contains declarations for the class RegexCompile |
| 8 // | 8 // |
| 9 // This class is internal to the regular expression implementation. | 9 // This class is internal to the regular expression implementation. |
| 10 // For the public Regular Expression API, see the file "unicode/regex.h" | 10 // For the public Regular Expression API, see the file "unicode/regex.h" |
| 11 // | 11 // |
| 12 | 12 |
| 13 | 13 |
| 14 #ifndef RBBISCAN_H | 14 #ifndef RBBISCAN_H |
| 15 #define RBBISCAN_H | 15 #define RBBISCAN_H |
| 16 | 16 |
| 17 #include "unicode/utypes.h" | 17 #include "unicode/utypes.h" |
| 18 #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 18 #if !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 19 | 19 |
| 20 #include "unicode/uobject.h" | 20 #include "unicode/uobject.h" |
| 21 #include "unicode/uniset.h" | 21 #include "unicode/uniset.h" |
| 22 #include "unicode/parseerr.h" | 22 #include "unicode/parseerr.h" |
| 23 #include "uhash.h" | 23 #include "uhash.h" |
| 24 #include "uvector.h" | 24 #include "uvector.h" |
| 25 #include "uvectr32.h" |
| 25 | 26 |
| 26 | 27 |
| 27 | 28 |
| 28 U_NAMESPACE_BEGIN | 29 U_NAMESPACE_BEGIN |
| 29 | 30 |
| 30 | 31 |
| 31 //------------------------------------------------------------------------------
-- | 32 //------------------------------------------------------------------------------
-- |
| 32 // | 33 // |
| 33 // class RegexCompile Contains the regular expression compiler. | 34 // class RegexCompile Contains the regular expression compiler. |
| 34 // | 35 // |
| 35 //------------------------------------------------------------------------------
-- | 36 //------------------------------------------------------------------------------
-- |
| 36 struct RegexTableEl; | 37 struct RegexTableEl; |
| 37 class RegexPattern; | 38 class RegexPattern; |
| 38 | 39 |
| 39 | 40 |
| 40 class RegexCompile : public UMemory { | 41 class U_I18N_API RegexCompile : public UMemory { |
| 41 public: | 42 public: |
| 42 | 43 |
| 43 enum { | 44 enum { |
| 44 kStackSize = 100 // The size of the state stack for | 45 kStackSize = 100 // The size of the state stack for |
| 45 }; // pattern parsing. Corresponds roughly | 46 }; // pattern parsing. Corresponds roughly |
| 46 // to the depth of parentheses nesting | 47 // to the depth of parentheses nesting |
| 47 // that is allowed in the rules. | 48 // that is allowed in the rules. |
| 48 | 49 |
| 49 struct RegexPatternChar { | 50 struct RegexPatternChar { |
| 50 UChar32 fChar; | 51 UChar32 fChar; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 96 // there is space to add a
n opcode there. | 97 // there is space to add a
n opcode there. |
| 97 void compileSet(UnicodeSet *theSet); // Generate the compiled pa
ttern for | 98 void compileSet(UnicodeSet *theSet); // Generate the compiled pa
ttern for |
| 98 // a reference to a Unico
deSet. | 99 // a reference to a Unico
deSet. |
| 99 void compileInterval(int32_t InitOp, // Generate the code for a
{min,max} quantifier. | 100 void compileInterval(int32_t InitOp, // Generate the code for a
{min,max} quantifier. |
| 100 int32_t LoopOp); | 101 int32_t LoopOp); |
| 101 UBool compileInlineInterval(); // Generate inline code for
a {min,max} quantifier | 102 UBool compileInlineInterval(); // Generate inline code for
a {min,max} quantifier |
| 102 void literalChar(UChar32 c); // Compile a literal char | 103 void literalChar(UChar32 c); // Compile a literal char |
| 103 void fixLiterals(UBool split=FALSE); // Generate code for pendin
g literal characters. | 104 void fixLiterals(UBool split=FALSE); // Generate code for pendin
g literal characters. |
| 104 void insertOp(int32_t where); // Open up a slot for a new
op in the | 105 void insertOp(int32_t where); // Open up a slot for a new
op in the |
| 105 // generated code at the
specified location. | 106 // generated code at the
specified location. |
| 106 void appendOp(int32_t op); // Append a new op to the c
ompiled pattern. | |
| 107 int32_t allocateData(int32_t size); // Allocate space in the ma
tcher data area. | |
| 108 // Return index of the ne
wly allocated data. | |
| 109 int32_t allocateStackData(int32_t size); // Allocate space in the ma
tch back-track stack frame. | |
| 110 // Return offset index in
the frame. | |
| 111 int32_t minMatchLength(int32_t start, | 107 int32_t minMatchLength(int32_t start, |
| 112 int32_t end); | 108 int32_t end); |
| 113 int32_t maxMatchLength(int32_t start, | 109 int32_t maxMatchLength(int32_t start, |
| 114 int32_t end); | 110 int32_t end); |
| 115 void matchStartType(); | 111 void matchStartType(); |
| 116 void stripNOPs(); | 112 void stripNOPs(); |
| 117 | 113 |
| 118 void setEval(int32_t op); | 114 void setEval(int32_t op); |
| 119 void setPushOp(int32_t op); | 115 void setPushOp(int32_t op); |
| 120 UChar32 scanNamedChar(); | 116 UChar32 scanNamedChar(); |
| 121 UnicodeSet *createSetForProperty(const UnicodeString &propName, UBool negate
d); | 117 UnicodeSet *createSetForProperty(const UnicodeString &propName, UBool negate
d); |
| 122 | 118 |
| 119 public: // Public for testing only. |
| 120 static void U_EXPORT2 findCaseInsensitiveStarters(UChar32 c, UnicodeSet *sta
rterChars); |
| 121 private: |
| 122 |
| 123 | 123 |
| 124 UErrorCode *fStatus; | 124 UErrorCode *fStatus; |
| 125 RegexPattern *fRXPat; | 125 RegexPattern *fRXPat; |
| 126 UParseError *fParseErr; | 126 UParseError *fParseErr; |
| 127 | 127 |
| 128 // | 128 // |
| 129 // Data associated with low level character scanning | 129 // Data associated with low level character scanning |
| 130 // | 130 // |
| 131 int64_t fScanIndex; // Index of current charact
er being processed | 131 int64_t fScanIndex; // Index of current charact
er being processed |
| 132 // in the rule input stri
ng. | 132 // in the rule input stri
ng. |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 // spot reserved for use
when a quantifier | 180 // spot reserved for use
when a quantifier |
| 181 // needs to add a SAVE at
the start of a (block) | 181 // needs to add a SAVE at
the start of a (block) |
| 182 // The negative value (-1
, -2,...) indicates | 182 // The negative value (-1
, -2,...) indicates |
| 183 // the kind of paren that
opened the frame. Some | 183 // the kind of paren that
opened the frame. Some |
| 184 // need special handling
on close. | 184 // need special handling
on close. |
| 185 | 185 |
| 186 | 186 |
| 187 int32_t fMatchOpenParen; // The position in the comp
iled pattern | 187 int32_t fMatchOpenParen; // The position in the comp
iled pattern |
| 188 // of the slot reserved f
or a state save | 188 // of the slot reserved f
or a state save |
| 189 // at the start of the mo
st recently processed | 189 // at the start of the mo
st recently processed |
| 190 // parenthesized block. U
pdated when processing | 190 // parenthesized block. |
| 191 // a close to the locatio
n for the corresponding open. | |
| 192 | |
| 193 int32_t fMatchCloseParen; // The position in the patt
ern of the first | 191 int32_t fMatchCloseParen; // The position in the patt
ern of the first |
| 194 // location after the mos
t recently processed | 192 // location after the mos
t recently processed |
| 195 // parenthesized block. | 193 // parenthesized block. |
| 196 | 194 |
| 197 int32_t fIntervalLow; // {lower, upper} interval
quantifier values. | 195 int32_t fIntervalLow; // {lower, upper} interval
quantifier values. |
| 198 int32_t fIntervalUpper; // Placed here temporarily,
when pattern is | 196 int32_t fIntervalUpper; // Placed here temporarily,
when pattern is |
| 199 // initially scanned. Ea
ch new interval | 197 // initially scanned. Ea
ch new interval |
| 200 // encountered overwrites
these values. | 198 // encountered overwrites
these values. |
| 201 // -1 for the upper inter
val value means none | 199 // -1 for the upper inter
val value means none |
| 202 // was specified (unlimit
ed occurences.) | 200 // was specified (unlimit
ed occurences.) |
| (...skipping 23 matching lines...) Expand all Loading... |
| 226 setDifference2 = 3 << 16 | 4, // '--' set difference operator | 224 setDifference2 = 3 << 16 | 4, // '--' set difference operator |
| 227 setIntersection2 = 3 << 16 | 5, // '&&' set intersection operator | 225 setIntersection2 = 3 << 16 | 5, // '&&' set intersection operator |
| 228 setUnion = 4 << 16 | 6, // implicit union of adjacent items | 226 setUnion = 4 << 16 | 6, // implicit union of adjacent items |
| 229 setDifference1 = 4 << 16 | 7, // '-', single dash difference op, for co
mpatibility with old UnicodeSet. | 227 setDifference1 = 4 << 16 | 7, // '-', single dash difference op, for co
mpatibility with old UnicodeSet. |
| 230 setIntersection1 = 4 << 16 | 8 // '&', single amp intersection op, for c
ompatibility with old UnicodeSet. | 228 setIntersection1 = 4 << 16 | 8 // '&', single amp intersection op, for c
ompatibility with old UnicodeSet. |
| 231 }; | 229 }; |
| 232 | 230 |
| 233 U_NAMESPACE_END | 231 U_NAMESPACE_END |
| 234 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS | 232 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |
| 235 #endif // RBBISCAN_H | 233 #endif // RBBISCAN_H |
| OLD | NEW |