OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef RUNTIME_VM_REGEXP_PARSER_H_ | 5 #ifndef RUNTIME_VM_REGEXP_PARSER_H_ |
6 #define RUNTIME_VM_REGEXP_PARSER_H_ | 6 #define RUNTIME_VM_REGEXP_PARSER_H_ |
7 | 7 |
8 #include "vm/allocation.h" | 8 #include "vm/allocation.h" |
9 #include "vm/growable_array.h" | 9 #include "vm/growable_array.h" |
10 #include "vm/regexp_ast.h" | 10 #include "vm/regexp_ast.h" |
11 | 11 |
12 namespace dart { | 12 namespace dart { |
13 | 13 |
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
15 class RegExpBuilder: public ZoneAllocated { | 15 class RegExpBuilder : public ZoneAllocated { |
16 public: | 16 public: |
17 RegExpBuilder(); | 17 RegExpBuilder(); |
18 | 18 |
19 void AddCharacter(uint16_t character); | 19 void AddCharacter(uint16_t character); |
20 // "Adds" an empty expression. Does nothing except consume a | 20 // "Adds" an empty expression. Does nothing except consume a |
21 // following quantifier | 21 // following quantifier |
22 void AddEmpty(); | 22 void AddEmpty(); |
23 void AddAtom(RegExpTree* tree); | 23 void AddAtom(RegExpTree* tree); |
24 void AddAssertion(RegExpTree* tree); | 24 void AddAssertion(RegExpTree* tree); |
25 void NewAlternative(); // '|' | 25 void NewAlternative(); // '|' |
26 void AddQuantifierToAtom( | 26 void AddQuantifierToAtom(intptr_t min, |
27 intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type); | 27 intptr_t max, |
| 28 RegExpQuantifier::QuantifierType type); |
28 RegExpTree* ToRegExp(); | 29 RegExpTree* ToRegExp(); |
29 | 30 |
30 private: | 31 private: |
31 void FlushCharacters(); | 32 void FlushCharacters(); |
32 void FlushText(); | 33 void FlushText(); |
33 void FlushTerms(); | 34 void FlushTerms(); |
34 | 35 |
35 Zone* zone() const { return zone_; } | 36 Zone* zone() const { return zone_; } |
36 | 37 |
37 Zone* zone_; | 38 Zone* zone_; |
38 bool pending_empty_; | 39 bool pending_empty_; |
39 ZoneGrowableArray<uint16_t>* characters_; | 40 ZoneGrowableArray<uint16_t>* characters_; |
40 GrowableArray<RegExpTree*> terms_; | 41 GrowableArray<RegExpTree*> terms_; |
41 GrowableArray<RegExpTree*> text_; | 42 GrowableArray<RegExpTree*> text_; |
42 GrowableArray<RegExpTree*> alternatives_; | 43 GrowableArray<RegExpTree*> alternatives_; |
43 #ifdef DEBUG | 44 #ifdef DEBUG |
44 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 45 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
45 #define LAST(x) last_added_ = x; | 46 #define LAST(x) last_added_ = x; |
46 #else | 47 #else |
47 #define LAST(x) | 48 #define LAST(x) |
48 #endif | 49 #endif |
49 }; | 50 }; |
50 | 51 |
51 class RegExpParser : public ValueObject { | 52 class RegExpParser : public ValueObject { |
52 public: | 53 public: |
53 RegExpParser(const String& in, | 54 RegExpParser(const String& in, String* error, bool multiline_mode); |
54 String* error, | |
55 bool multiline_mode); | |
56 | 55 |
57 static bool ParseFunction(ParsedFunction* parsed_function); | 56 static bool ParseFunction(ParsedFunction* parsed_function); |
58 | 57 |
59 static bool ParseRegExp(const String& input, | 58 static bool ParseRegExp(const String& input, |
60 bool multiline, | 59 bool multiline, |
61 RegExpCompileData* result); | 60 RegExpCompileData* result); |
62 | 61 |
63 RegExpTree* ParsePattern(); | 62 RegExpTree* ParsePattern(); |
64 RegExpTree* ParseDisjunction(); | 63 RegExpTree* ParseDisjunction(); |
65 RegExpTree* ParseGroup(); | 64 RegExpTree* ParseGroup(); |
(...skipping 23 matching lines...) Expand all Loading... |
89 void ReportError(const char* message); | 88 void ReportError(const char* message); |
90 void Advance(); | 89 void Advance(); |
91 void Advance(intptr_t dist); | 90 void Advance(intptr_t dist); |
92 void Reset(intptr_t pos); | 91 void Reset(intptr_t pos); |
93 | 92 |
94 // Reports whether the pattern might be used as a literal search string. | 93 // Reports whether the pattern might be used as a literal search string. |
95 // Only use if the result of the parse is a single atom node. | 94 // Only use if the result of the parse is a single atom node. |
96 bool simple(); | 95 bool simple(); |
97 bool contains_anchor() { return contains_anchor_; } | 96 bool contains_anchor() { return contains_anchor_; } |
98 void set_contains_anchor() { contains_anchor_ = true; } | 97 void set_contains_anchor() { contains_anchor_ = true; } |
99 intptr_t captures_started() { return captures_ == NULL ? | 98 intptr_t captures_started() { |
100 0 : captures_->length(); } | 99 return captures_ == NULL ? 0 : captures_->length(); |
| 100 } |
101 intptr_t position() { return next_pos_ - 1; } | 101 intptr_t position() { return next_pos_ - 1; } |
102 bool failed() { return failed_; } | 102 bool failed() { return failed_; } |
103 | 103 |
104 static const intptr_t kMaxCaptures = 1 << 16; | 104 static const intptr_t kMaxCaptures = 1 << 16; |
105 static const uint32_t kEndMarker = (1 << 21); | 105 static const uint32_t kEndMarker = (1 << 21); |
106 | 106 |
107 private: | 107 private: |
108 enum SubexpressionType { | 108 enum SubexpressionType { |
109 INITIAL, | 109 INITIAL, |
110 CAPTURE, // All positive values represent captures. | 110 CAPTURE, // All positive values represent captures. |
111 POSITIVE_LOOKAHEAD, | 111 POSITIVE_LOOKAHEAD, |
112 NEGATIVE_LOOKAHEAD, | 112 NEGATIVE_LOOKAHEAD, |
113 GROUPING | 113 GROUPING |
114 }; | 114 }; |
115 | 115 |
116 class RegExpParserState : public ZoneAllocated { | 116 class RegExpParserState : public ZoneAllocated { |
117 public: | 117 public: |
118 RegExpParserState(RegExpParserState* previous_state, | 118 RegExpParserState(RegExpParserState* previous_state, |
119 SubexpressionType group_type, | 119 SubexpressionType group_type, |
120 intptr_t disjunction_capture_index, | 120 intptr_t disjunction_capture_index, |
121 Zone *zone) | 121 Zone* zone) |
122 : previous_state_(previous_state), | 122 : previous_state_(previous_state), |
123 builder_(new(zone) RegExpBuilder()), | 123 builder_(new (zone) RegExpBuilder()), |
124 group_type_(group_type), | 124 group_type_(group_type), |
125 disjunction_capture_index_(disjunction_capture_index) {} | 125 disjunction_capture_index_(disjunction_capture_index) {} |
126 // Parser state of containing expression, if any. | 126 // Parser state of containing expression, if any. |
127 RegExpParserState* previous_state() { return previous_state_; } | 127 RegExpParserState* previous_state() { return previous_state_; } |
128 bool IsSubexpression() { return previous_state_ != NULL; } | 128 bool IsSubexpression() { return previous_state_ != NULL; } |
129 // RegExpBuilder building this regexp's AST. | 129 // RegExpBuilder building this regexp's AST. |
130 RegExpBuilder* builder() { return builder_; } | 130 RegExpBuilder* builder() { return builder_; } |
131 // Type of regexp being parsed (parenthesized group or entire regexp). | 131 // Type of regexp being parsed (parenthesized group or entire regexp). |
132 SubexpressionType group_type() { return group_type_; } | 132 SubexpressionType group_type() { return group_type_; } |
133 // Index in captures array of first capture in this sub-expression, if any. | 133 // Index in captures array of first capture in this sub-expression, if any. |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
167 bool multiline_; | 167 bool multiline_; |
168 bool simple_; | 168 bool simple_; |
169 bool contains_anchor_; | 169 bool contains_anchor_; |
170 bool is_scanned_for_captures_; | 170 bool is_scanned_for_captures_; |
171 bool failed_; | 171 bool failed_; |
172 }; | 172 }; |
173 | 173 |
174 } // namespace dart | 174 } // namespace dart |
175 | 175 |
176 #endif // RUNTIME_VM_REGEXP_PARSER_H_ | 176 #endif // RUNTIME_VM_REGEXP_PARSER_H_ |
OLD | NEW |