OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_REGEXP_PARSER_H_ | 5 #ifndef VM_REGEXP_PARSER_H_ |
6 #define VM_REGEXP_PARSER_H_ | 6 #define VM_REGEXP_PARSER_H_ |
7 | 7 |
8 // SNIP | 8 #include "vm/allocation.h" |
| 9 #include "vm/growable_array.h" |
| 10 #include "vm/regexp_ast.h" |
9 | 11 |
10 namespace dart { | 12 namespace dart { |
11 | 13 |
12 // SNIP | 14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 15 class RegExpBuilder: public ZoneAllocated { |
| 16 public: |
| 17 RegExpBuilder(); |
13 | 18 |
14 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 19 void AddCharacter(uint16_t character); |
15 class RegExpBuilder: public ZoneObject { | |
16 public: | |
17 explicit RegExpBuilder(Zone* zone); | |
18 void AddCharacter(uc16 character); | |
19 // "Adds" an empty expression. Does nothing except consume a | 20 // "Adds" an empty expression. Does nothing except consume a |
20 // following quantifier | 21 // following quantifier |
21 void AddEmpty(); | 22 void AddEmpty(); |
22 void AddAtom(RegExpTree* tree); | 23 void AddAtom(RegExpTree* tree); |
23 void AddAssertion(RegExpTree* tree); | 24 void AddAssertion(RegExpTree* tree); |
24 void NewAlternative(); // '|' | 25 void NewAlternative(); // '|' |
25 void AddQuantifierToAtom( | 26 void AddQuantifierToAtom( |
26 int min, int max, RegExpQuantifier::QuantifierType type); | 27 intptr_t min, intptr_t max, RegExpQuantifier::QuantifierType type); |
27 RegExpTree* ToRegExp(); | 28 RegExpTree* ToRegExp(); |
28 | 29 |
29 private: | 30 private: |
30 void FlushCharacters(); | 31 void FlushCharacters(); |
31 void FlushText(); | 32 void FlushText(); |
32 void FlushTerms(); | 33 void FlushTerms(); |
33 Zone* zone() const { return zone_; } | |
34 | 34 |
35 Zone* zone_; | 35 Isolate* isolate() const { return isolate_; } |
| 36 |
| 37 Isolate* isolate_; |
36 bool pending_empty_; | 38 bool pending_empty_; |
37 ZoneList<uc16>* characters_; | 39 ZoneGrowableArray<uint16_t>* characters_; |
38 BufferedZoneList<RegExpTree, 2> terms_; | 40 GrowableArray<RegExpTree*> terms_; |
39 BufferedZoneList<RegExpTree, 2> text_; | 41 GrowableArray<RegExpTree*> text_; |
40 BufferedZoneList<RegExpTree, 2> alternatives_; | 42 GrowableArray<RegExpTree*> alternatives_; |
41 #ifdef DEBUG | 43 #ifdef DEBUG |
42 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | 44 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
43 #define LAST(x) last_added_ = x; | 45 #define LAST(x) last_added_ = x; |
44 #else | 46 #else |
45 #define LAST(x) | 47 #define LAST(x) |
46 #endif | 48 #endif |
47 }; | 49 }; |
48 | 50 |
| 51 class RegExpParser : public ValueObject { |
| 52 public: |
| 53 RegExpParser(const String& in, |
| 54 String* error, |
| 55 bool multiline_mode); |
49 | 56 |
50 class RegExpParser BASE_EMBEDDED { | 57 static bool ParseFunction(ParsedFunction* parsed_function); |
51 public: | |
52 RegExpParser(FlatStringReader* in, | |
53 Handle<String>* error, | |
54 bool multiline_mode, | |
55 Zone* zone); | |
56 | 58 |
57 static bool ParseRegExp(FlatStringReader* input, | 59 static bool ParseRegExp(const String& input, |
58 bool multiline, | 60 bool multiline, |
59 RegExpCompileData* result, | 61 RegExpCompileData* result); |
60 Zone* zone); | |
61 | 62 |
62 RegExpTree* ParsePattern(); | 63 RegExpTree* ParsePattern(); |
63 RegExpTree* ParseDisjunction(); | 64 RegExpTree* ParseDisjunction(); |
64 RegExpTree* ParseGroup(); | 65 RegExpTree* ParseGroup(); |
65 RegExpTree* ParseCharacterClass(); | 66 RegExpTree* ParseCharacterClass(); |
66 | 67 |
67 // Parses a {...,...} quantifier and stores the range in the given | 68 // Parses a {...,...} quantifier and stores the range in the given |
68 // out parameters. | 69 // out parameters. |
69 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 70 bool ParseIntervalQuantifier(intptr_t* min_out, intptr_t* max_out); |
70 | 71 |
71 // Parses and returns a single escaped character. The character | 72 // Parses and returns a single escaped character. The character |
72 // must not be 'b' or 'B' since they are usually handle specially. | 73 // must not be 'b' or 'B' since they are usually handle specially. |
73 uc32 ParseClassCharacterEscape(); | 74 uint32_t ParseClassCharacterEscape(); |
74 | 75 |
75 // Checks whether the following is a length-digit hexadecimal number, | 76 // Checks whether the following is a length-digit hexadecimal number, |
76 // and sets the value if it is. | 77 // and sets the value if it is. |
77 bool ParseHexEscape(int length, uc32* value); | 78 bool ParseHexEscape(intptr_t length, uint32_t* value); |
78 | 79 |
79 uc32 ParseOctalLiteral(); | 80 uint32_t ParseOctalLiteral(); |
80 | 81 |
81 // Tries to parse the input as a back reference. If successful it | 82 // Tries to parse the input as a back reference. If successful it |
82 // stores the result in the output parameter and returns true. If | 83 // stores the result in the output parameter and returns true. If |
83 // it fails it will push back the characters read so the same characters | 84 // it fails it will push back the characters read so the same characters |
84 // can be reparsed. | 85 // can be reparsed. |
85 bool ParseBackReferenceIndex(int* index_out); | 86 bool ParseBackReferenceIndex(intptr_t* index_out); |
86 | 87 |
87 CharacterRange ParseClassAtom(uc16* char_class); | 88 CharacterRange ParseClassAtom(uint16_t* char_class); |
88 RegExpTree* ReportError(Vector<const char> message); | 89 void ReportError(const char* message); |
89 void Advance(); | 90 void Advance(); |
90 void Advance(int dist); | 91 void Advance(intptr_t dist); |
91 void Reset(int pos); | 92 void Reset(intptr_t pos); |
92 | 93 |
93 // Reports whether the pattern might be used as a literal search string. | 94 // Reports whether the pattern might be used as a literal search string. |
94 // Only use if the result of the parse is a single atom node. | 95 // Only use if the result of the parse is a single atom node. |
95 bool simple(); | 96 bool simple(); |
96 bool contains_anchor() { return contains_anchor_; } | 97 bool contains_anchor() { return contains_anchor_; } |
97 void set_contains_anchor() { contains_anchor_ = true; } | 98 void set_contains_anchor() { contains_anchor_ = true; } |
98 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | 99 intptr_t captures_started() { return captures_ == NULL ? |
99 int position() { return next_pos_ - 1; } | 100 0 : captures_->length(); } |
| 101 intptr_t position() { return next_pos_ - 1; } |
100 bool failed() { return failed_; } | 102 bool failed() { return failed_; } |
101 | 103 |
102 static const int kMaxCaptures = 1 << 16; | 104 static const intptr_t kMaxCaptures = 1 << 16; |
103 static const uc32 kEndMarker = (1 << 21); | 105 static const uint32_t kEndMarker = (1 << 21); |
104 | 106 |
105 private: | 107 private: |
106 enum SubexpressionType { | 108 enum SubexpressionType { |
107 INITIAL, | 109 INITIAL, |
108 CAPTURE, // All positive values represent captures. | 110 CAPTURE, // All positive values represent captures. |
109 POSITIVE_LOOKAHEAD, | 111 POSITIVE_LOOKAHEAD, |
110 NEGATIVE_LOOKAHEAD, | 112 NEGATIVE_LOOKAHEAD, |
111 GROUPING | 113 GROUPING |
112 }; | 114 }; |
113 | 115 |
114 class RegExpParserState : public ZoneObject { | 116 class RegExpParserState : public ZoneAllocated { |
115 public: | 117 public: |
116 RegExpParserState(RegExpParserState* previous_state, | 118 RegExpParserState(RegExpParserState* previous_state, |
117 SubexpressionType group_type, | 119 SubexpressionType group_type, |
118 int disjunction_capture_index, | 120 intptr_t disjunction_capture_index, |
119 Zone* zone) | 121 Isolate *isolate) |
120 : previous_state_(previous_state), | 122 : previous_state_(previous_state), |
121 builder_(new(zone) RegExpBuilder(zone)), | 123 builder_(new(isolate) RegExpBuilder()), |
122 group_type_(group_type), | 124 group_type_(group_type), |
123 disjunction_capture_index_(disjunction_capture_index) {} | 125 disjunction_capture_index_(disjunction_capture_index) {} |
124 // Parser state of containing expression, if any. | 126 // Parser state of containing expression, if any. |
125 RegExpParserState* previous_state() { return previous_state_; } | 127 RegExpParserState* previous_state() { return previous_state_; } |
126 bool IsSubexpression() { return previous_state_ != NULL; } | 128 bool IsSubexpression() { return previous_state_ != NULL; } |
127 // RegExpBuilder building this regexp's AST. | 129 // RegExpBuilder building this regexp's AST. |
128 RegExpBuilder* builder() { return builder_; } | 130 RegExpBuilder* builder() { return builder_; } |
129 // Type of regexp being parsed (parenthesized group or entire regexp). | 131 // Type of regexp being parsed (parenthesized group or entire regexp). |
130 SubexpressionType group_type() { return group_type_; } | 132 SubexpressionType group_type() { return group_type_; } |
131 // Index in captures array of first capture in this sub-expression, if any. | 133 // Index in captures array of first capture in this sub-expression, if any. |
132 // Also the capture index of this sub-expression itself, if group_type | 134 // Also the capture index of this sub-expression itself, if group_type |
133 // is CAPTURE. | 135 // is CAPTURE. |
134 int capture_index() { return disjunction_capture_index_; } | 136 intptr_t capture_index() { return disjunction_capture_index_; } |
135 | 137 |
136 private: | 138 private: |
137 // Linked list implementation of stack of states. | 139 // Linked list implementation of stack of states. |
138 RegExpParserState* previous_state_; | 140 RegExpParserState* previous_state_; |
139 // Builder for the stored disjunction. | 141 // Builder for the stored disjunction. |
140 RegExpBuilder* builder_; | 142 RegExpBuilder* builder_; |
141 // Stored disjunction type (capture, look-ahead or grouping), if any. | 143 // Stored disjunction type (capture, look-ahead or grouping), if any. |
142 SubexpressionType group_type_; | 144 SubexpressionType group_type_; |
143 // Stored disjunction's capture index (if any). | 145 // Stored disjunction's capture index (if any). |
144 int disjunction_capture_index_; | 146 intptr_t disjunction_capture_index_; |
145 }; | 147 }; |
146 | 148 |
147 Isolate* isolate() { return isolate_; } | 149 Isolate* isolate() { return isolate_; } |
148 Zone* zone() const { return zone_; } | |
149 | 150 |
150 uc32 current() { return current_; } | 151 uint32_t current() { return current_; } |
151 bool has_more() { return has_more_; } | 152 bool has_more() { return has_more_; } |
152 bool has_next() { return next_pos_ < in()->length(); } | 153 bool has_next() { return next_pos_ < in().Length(); } |
153 uc32 Next(); | 154 uint32_t Next(); |
154 FlatStringReader* in() { return in_; } | 155 const String& in() { return in_; } |
155 void ScanForCaptures(); | 156 void ScanForCaptures(); |
156 | 157 |
157 Isolate* isolate_; | 158 Isolate* isolate_; |
158 Zone* zone_; | 159 String* error_; |
159 Handle<String>* error_; | 160 ZoneGrowableArray<RegExpCapture*>* captures_; |
160 ZoneList<RegExpCapture*>* captures_; | 161 const String& in_; |
161 FlatStringReader* in_; | 162 uint32_t current_; |
162 uc32 current_; | 163 intptr_t next_pos_; |
163 int next_pos_; | |
164 // The capture count is only valid after we have scanned for captures. | 164 // The capture count is only valid after we have scanned for captures. |
165 int capture_count_; | 165 intptr_t capture_count_; |
166 bool has_more_; | 166 bool has_more_; |
167 bool multiline_; | 167 bool multiline_; |
168 bool simple_; | 168 bool simple_; |
169 bool contains_anchor_; | 169 bool contains_anchor_; |
170 bool is_scanned_for_captures_; | 170 bool is_scanned_for_captures_; |
171 bool failed_; | 171 bool failed_; |
172 }; | 172 }; |
173 | 173 |
174 // SNIP | |
175 | |
176 } // namespace dart | 174 } // namespace dart |
177 | 175 |
178 #endif // VM_REGEXP_PARSER_H_ | 176 #endif // VM_REGEXP_PARSER_H_ |
OLD | NEW |