OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #ifndef VM_REGEXP_PARSER_H_ |
| 6 #define VM_REGEXP_PARSER_H_ |
| 7 |
| 8 // SNIP |
| 9 |
| 10 namespace dart { |
| 11 |
| 12 // SNIP |
| 13 |
| 14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 15 class RegExpBuilder: public ZoneObject { |
| 16 public: |
| 17 explicit RegExpBuilder(Zone* zone); |
| 18 void AddCharacter(uc16 character); |
| 19 // "Adds" an empty expression. Does nothing except consume a |
| 20 // following quantifier |
| 21 void AddEmpty(); |
| 22 void AddAtom(RegExpTree* tree); |
| 23 void AddAssertion(RegExpTree* tree); |
| 24 void NewAlternative(); // '|' |
| 25 void AddQuantifierToAtom( |
| 26 int min, int max, RegExpQuantifier::QuantifierType type); |
| 27 RegExpTree* ToRegExp(); |
| 28 |
| 29 private: |
| 30 void FlushCharacters(); |
| 31 void FlushText(); |
| 32 void FlushTerms(); |
| 33 Zone* zone() const { return zone_; } |
| 34 |
| 35 Zone* zone_; |
| 36 bool pending_empty_; |
| 37 ZoneList<uc16>* characters_; |
| 38 BufferedZoneList<RegExpTree, 2> terms_; |
| 39 BufferedZoneList<RegExpTree, 2> text_; |
| 40 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 41 #ifdef DEBUG |
| 42 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 43 #define LAST(x) last_added_ = x; |
| 44 #else |
| 45 #define LAST(x) |
| 46 #endif |
| 47 }; |
| 48 |
| 49 |
| 50 class RegExpParser BASE_EMBEDDED { |
| 51 public: |
| 52 RegExpParser(FlatStringReader* in, |
| 53 Handle<String>* error, |
| 54 bool multiline_mode, |
| 55 Zone* zone); |
| 56 |
| 57 static bool ParseRegExp(FlatStringReader* input, |
| 58 bool multiline, |
| 59 RegExpCompileData* result, |
| 60 Zone* zone); |
| 61 |
| 62 RegExpTree* ParsePattern(); |
| 63 RegExpTree* ParseDisjunction(); |
| 64 RegExpTree* ParseGroup(); |
| 65 RegExpTree* ParseCharacterClass(); |
| 66 |
| 67 // Parses a {...,...} quantifier and stores the range in the given |
| 68 // out parameters. |
| 69 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 70 |
| 71 // Parses and returns a single escaped character. The character |
| 72 // must not be 'b' or 'B' since they are usually handle specially. |
| 73 uc32 ParseClassCharacterEscape(); |
| 74 |
| 75 // Checks whether the following is a length-digit hexadecimal number, |
| 76 // and sets the value if it is. |
| 77 bool ParseHexEscape(int length, uc32* value); |
| 78 |
| 79 uc32 ParseOctalLiteral(); |
| 80 |
| 81 // Tries to parse the input as a back reference. If successful it |
| 82 // stores the result in the output parameter and returns true. If |
| 83 // it fails it will push back the characters read so the same characters |
| 84 // can be reparsed. |
| 85 bool ParseBackReferenceIndex(int* index_out); |
| 86 |
| 87 CharacterRange ParseClassAtom(uc16* char_class); |
| 88 RegExpTree* ReportError(Vector<const char> message); |
| 89 void Advance(); |
| 90 void Advance(int dist); |
| 91 void Reset(int pos); |
| 92 |
| 93 // Reports whether the pattern might be used as a literal search string. |
| 94 // Only use if the result of the parse is a single atom node. |
| 95 bool simple(); |
| 96 bool contains_anchor() { return contains_anchor_; } |
| 97 void set_contains_anchor() { contains_anchor_ = true; } |
| 98 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 99 int position() { return next_pos_ - 1; } |
| 100 bool failed() { return failed_; } |
| 101 |
| 102 static const int kMaxCaptures = 1 << 16; |
| 103 static const uc32 kEndMarker = (1 << 21); |
| 104 |
| 105 private: |
| 106 enum SubexpressionType { |
| 107 INITIAL, |
| 108 CAPTURE, // All positive values represent captures. |
| 109 POSITIVE_LOOKAHEAD, |
| 110 NEGATIVE_LOOKAHEAD, |
| 111 GROUPING |
| 112 }; |
| 113 |
| 114 class RegExpParserState : public ZoneObject { |
| 115 public: |
| 116 RegExpParserState(RegExpParserState* previous_state, |
| 117 SubexpressionType group_type, |
| 118 int disjunction_capture_index, |
| 119 Zone* zone) |
| 120 : previous_state_(previous_state), |
| 121 builder_(new(zone) RegExpBuilder(zone)), |
| 122 group_type_(group_type), |
| 123 disjunction_capture_index_(disjunction_capture_index) {} |
| 124 // Parser state of containing expression, if any. |
| 125 RegExpParserState* previous_state() { return previous_state_; } |
| 126 bool IsSubexpression() { return previous_state_ != NULL; } |
| 127 // RegExpBuilder building this regexp's AST. |
| 128 RegExpBuilder* builder() { return builder_; } |
| 129 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 130 SubexpressionType group_type() { return group_type_; } |
| 131 // Index in captures array of first capture in this sub-expression, if any. |
| 132 // Also the capture index of this sub-expression itself, if group_type |
| 133 // is CAPTURE. |
| 134 int capture_index() { return disjunction_capture_index_; } |
| 135 |
| 136 private: |
| 137 // Linked list implementation of stack of states. |
| 138 RegExpParserState* previous_state_; |
| 139 // Builder for the stored disjunction. |
| 140 RegExpBuilder* builder_; |
| 141 // Stored disjunction type (capture, look-ahead or grouping), if any. |
| 142 SubexpressionType group_type_; |
| 143 // Stored disjunction's capture index (if any). |
| 144 int disjunction_capture_index_; |
| 145 }; |
| 146 |
| 147 Isolate* isolate() { return isolate_; } |
| 148 Zone* zone() const { return zone_; } |
| 149 |
| 150 uc32 current() { return current_; } |
| 151 bool has_more() { return has_more_; } |
| 152 bool has_next() { return next_pos_ < in()->length(); } |
| 153 uc32 Next(); |
| 154 FlatStringReader* in() { return in_; } |
| 155 void ScanForCaptures(); |
| 156 |
| 157 Isolate* isolate_; |
| 158 Zone* zone_; |
| 159 Handle<String>* error_; |
| 160 ZoneList<RegExpCapture*>* captures_; |
| 161 FlatStringReader* in_; |
| 162 uc32 current_; |
| 163 int next_pos_; |
| 164 // The capture count is only valid after we have scanned for captures. |
| 165 int capture_count_; |
| 166 bool has_more_; |
| 167 bool multiline_; |
| 168 bool simple_; |
| 169 bool contains_anchor_; |
| 170 bool is_scanned_for_captures_; |
| 171 bool failed_; |
| 172 }; |
| 173 |
| 174 // SNIP |
| 175 |
| 176 } // namespace dart |
| 177 |
| 178 #endif // VM_REGEXP_PARSER_H_ |
OLD | NEW |