OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
7 | 7 |
8 #include "src/objects.h" | 8 #include "src/objects.h" |
9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
10 #include "src/zone.h" | 10 #include "src/zone.h" |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 | 92 |
93 private: | 93 private: |
94 ZoneList<T*>* list_; | 94 ZoneList<T*>* list_; |
95 T* last_; | 95 T* last_; |
96 }; | 96 }; |
97 | 97 |
98 | 98 |
99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | 99 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
100 class RegExpBuilder : public ZoneObject { | 100 class RegExpBuilder : public ZoneObject { |
101 public: | 101 public: |
102 explicit RegExpBuilder(Zone* zone); | 102 RegExpBuilder(Zone* zone, JSRegExp::Flags flags); |
103 void AddCharacter(uc16 character); | 103 void AddCharacter(uc16 character); |
104 void AddUnicodeCharacter(uc32 character); | 104 void AddUnicodeCharacter(uc32 character); |
105 // "Adds" an empty expression. Does nothing except consume a | 105 // "Adds" an empty expression. Does nothing except consume a |
106 // following quantifier | 106 // following quantifier |
107 void AddEmpty(); | 107 void AddEmpty(); |
| 108 void AddCharacterClass(RegExpCharacterClass* cc); |
108 void AddAtom(RegExpTree* tree); | 109 void AddAtom(RegExpTree* tree); |
| 110 void AddTerm(RegExpTree* tree); |
109 void AddAssertion(RegExpTree* tree); | 111 void AddAssertion(RegExpTree* tree); |
110 void NewAlternative(); // '|' | 112 void NewAlternative(); // '|' |
111 void AddQuantifierToAtom(int min, int max, | 113 void AddQuantifierToAtom(int min, int max, |
112 RegExpQuantifier::QuantifierType type); | 114 RegExpQuantifier::QuantifierType type); |
113 RegExpTree* ToRegExp(); | 115 RegExpTree* ToRegExp(); |
114 | 116 |
115 private: | 117 private: |
| 118 static const uc16 kNoPendingSurrogate = 0; |
| 119 void AddLeadSurrogate(uc16 lead_surrogate); |
| 120 void AddTrailSurrogate(uc16 trail_surrogate); |
| 121 void FlushPendingSurrogate(); |
116 void FlushCharacters(); | 122 void FlushCharacters(); |
117 void FlushText(); | 123 void FlushText(); |
118 void FlushTerms(); | 124 void FlushTerms(); |
119 Zone* zone() const { return zone_; } | 125 Zone* zone() const { return zone_; } |
| 126 bool unicode() const { return flags_ & JSRegExp::kUnicode; } |
120 | 127 |
121 Zone* zone_; | 128 Zone* zone_; |
122 bool pending_empty_; | 129 bool pending_empty_; |
| 130 JSRegExp::Flags flags_; |
123 ZoneList<uc16>* characters_; | 131 ZoneList<uc16>* characters_; |
| 132 uc16 pending_surrogate_; |
124 BufferedZoneList<RegExpTree, 2> terms_; | 133 BufferedZoneList<RegExpTree, 2> terms_; |
125 BufferedZoneList<RegExpTree, 2> text_; | 134 BufferedZoneList<RegExpTree, 2> text_; |
126 BufferedZoneList<RegExpTree, 2> alternatives_; | 135 BufferedZoneList<RegExpTree, 2> alternatives_; |
127 #ifdef DEBUG | 136 #ifdef DEBUG |
128 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; | 137 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_; |
129 #define LAST(x) last_added_ = x; | 138 #define LAST(x) last_added_ = x; |
130 #else | 139 #else |
131 #define LAST(x) | 140 #define LAST(x) |
132 #endif | 141 #endif |
133 }; | 142 }; |
134 | 143 |
135 | 144 |
136 class RegExpParser BASE_EMBEDDED { | 145 class RegExpParser BASE_EMBEDDED { |
137 public: | 146 public: |
138 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, | 147 RegExpParser(FlatStringReader* in, Handle<String>* error, |
139 bool unicode, Isolate* isolate, Zone* zone); | 148 JSRegExp::Flags flags, Isolate* isolate, Zone* zone); |
140 | 149 |
141 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | 150 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, |
142 bool multiline, bool unicode, | 151 JSRegExp::Flags flags, RegExpCompileData* result); |
143 RegExpCompileData* result); | |
144 | 152 |
145 RegExpTree* ParsePattern(); | 153 RegExpTree* ParsePattern(); |
146 RegExpTree* ParseDisjunction(); | 154 RegExpTree* ParseDisjunction(); |
147 RegExpTree* ParseGroup(); | 155 RegExpTree* ParseGroup(); |
148 RegExpTree* ParseCharacterClass(); | 156 RegExpTree* ParseCharacterClass(); |
149 | 157 |
150 // Parses a {...,...} quantifier and stores the range in the given | 158 // Parses a {...,...} quantifier and stores the range in the given |
151 // out parameters. | 159 // out parameters. |
152 bool ParseIntervalQuantifier(int* min_out, int* max_out); | 160 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
153 | 161 |
(...skipping 22 matching lines...) Expand all Loading... |
176 void Reset(int pos); | 184 void Reset(int pos); |
177 | 185 |
178 // Reports whether the pattern might be used as a literal search string. | 186 // Reports whether the pattern might be used as a literal search string. |
179 // Only use if the result of the parse is a single atom node. | 187 // Only use if the result of the parse is a single atom node. |
180 bool simple(); | 188 bool simple(); |
181 bool contains_anchor() { return contains_anchor_; } | 189 bool contains_anchor() { return contains_anchor_; } |
182 void set_contains_anchor() { contains_anchor_ = true; } | 190 void set_contains_anchor() { contains_anchor_ = true; } |
183 int captures_started() { return captures_started_; } | 191 int captures_started() { return captures_started_; } |
184 int position() { return next_pos_ - 1; } | 192 int position() { return next_pos_ - 1; } |
185 bool failed() { return failed_; } | 193 bool failed() { return failed_; } |
| 194 bool unicode() const { return flags_ & JSRegExp::kUnicode; } |
| 195 bool multiline() const { return flags_ & JSRegExp::kMultiline; } |
186 | 196 |
187 static bool IsSyntaxCharacter(uc32 c); | 197 static bool IsSyntaxCharacter(uc32 c); |
188 | 198 |
189 static const int kMaxCaptures = 1 << 16; | 199 static const int kMaxCaptures = 1 << 16; |
190 static const uc32 kEndMarker = (1 << 21); | 200 static const uc32 kEndMarker = (1 << 21); |
191 | 201 |
192 private: | 202 private: |
193 enum SubexpressionType { | 203 enum SubexpressionType { |
194 INITIAL, | 204 INITIAL, |
195 CAPTURE, // All positive values represent captures. | 205 CAPTURE, // All positive values represent captures. |
196 POSITIVE_LOOKAROUND, | 206 POSITIVE_LOOKAROUND, |
197 NEGATIVE_LOOKAROUND, | 207 NEGATIVE_LOOKAROUND, |
198 GROUPING | 208 GROUPING |
199 }; | 209 }; |
200 | 210 |
201 class RegExpParserState : public ZoneObject { | 211 class RegExpParserState : public ZoneObject { |
202 public: | 212 public: |
203 RegExpParserState(RegExpParserState* previous_state, | 213 RegExpParserState(RegExpParserState* previous_state, |
204 SubexpressionType group_type, | 214 SubexpressionType group_type, |
205 RegExpLookaround::Type lookaround_type, | 215 RegExpLookaround::Type lookaround_type, |
206 int disjunction_capture_index, Zone* zone) | 216 int disjunction_capture_index, JSRegExp::Flags flags, |
| 217 Zone* zone) |
207 : previous_state_(previous_state), | 218 : previous_state_(previous_state), |
208 builder_(new (zone) RegExpBuilder(zone)), | 219 builder_(new (zone) RegExpBuilder(zone, flags)), |
209 group_type_(group_type), | 220 group_type_(group_type), |
210 lookaround_type_(lookaround_type), | 221 lookaround_type_(lookaround_type), |
211 disjunction_capture_index_(disjunction_capture_index) {} | 222 disjunction_capture_index_(disjunction_capture_index) {} |
212 // Parser state of containing expression, if any. | 223 // Parser state of containing expression, if any. |
213 RegExpParserState* previous_state() { return previous_state_; } | 224 RegExpParserState* previous_state() { return previous_state_; } |
214 bool IsSubexpression() { return previous_state_ != NULL; } | 225 bool IsSubexpression() { return previous_state_ != NULL; } |
215 // RegExpBuilder building this regexp's AST. | 226 // RegExpBuilder building this regexp's AST. |
216 RegExpBuilder* builder() { return builder_; } | 227 RegExpBuilder* builder() { return builder_; } |
217 // Type of regexp being parsed (parenthesized group or entire regexp). | 228 // Type of regexp being parsed (parenthesized group or entire regexp). |
218 SubexpressionType group_type() { return group_type_; } | 229 SubexpressionType group_type() { return group_type_; } |
(...skipping 23 matching lines...) Expand all Loading... |
242 // Return the 1-indexed RegExpCapture object, allocate if necessary. | 253 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
243 RegExpCapture* GetCapture(int index); | 254 RegExpCapture* GetCapture(int index); |
244 | 255 |
245 Isolate* isolate() { return isolate_; } | 256 Isolate* isolate() { return isolate_; } |
246 Zone* zone() const { return zone_; } | 257 Zone* zone() const { return zone_; } |
247 | 258 |
248 uc32 current() { return current_; } | 259 uc32 current() { return current_; } |
249 bool has_more() { return has_more_; } | 260 bool has_more() { return has_more_; } |
250 bool has_next() { return next_pos_ < in()->length(); } | 261 bool has_next() { return next_pos_ < in()->length(); } |
251 uc32 Next(); | 262 uc32 Next(); |
| 263 template <bool update_position> |
| 264 uc32 ReadNext(); |
252 FlatStringReader* in() { return in_; } | 265 FlatStringReader* in() { return in_; } |
253 void ScanForCaptures(); | 266 void ScanForCaptures(); |
254 | 267 |
255 Isolate* isolate_; | 268 Isolate* isolate_; |
256 Zone* zone_; | 269 Zone* zone_; |
257 Handle<String>* error_; | 270 Handle<String>* error_; |
258 ZoneList<RegExpCapture*>* captures_; | 271 ZoneList<RegExpCapture*>* captures_; |
259 FlatStringReader* in_; | 272 FlatStringReader* in_; |
260 uc32 current_; | 273 uc32 current_; |
| 274 JSRegExp::Flags flags_; |
261 int next_pos_; | 275 int next_pos_; |
262 int captures_started_; | 276 int captures_started_; |
263 // The capture count is only valid after we have scanned for captures. | 277 // The capture count is only valid after we have scanned for captures. |
264 int capture_count_; | 278 int capture_count_; |
265 bool has_more_; | 279 bool has_more_; |
266 bool multiline_; | |
267 bool unicode_; | |
268 bool simple_; | 280 bool simple_; |
269 bool contains_anchor_; | 281 bool contains_anchor_; |
270 bool is_scanned_for_captures_; | 282 bool is_scanned_for_captures_; |
271 bool failed_; | 283 bool failed_; |
272 }; | 284 }; |
273 | 285 |
274 } // namespace internal | 286 } // namespace internal |
275 } // namespace v8 | 287 } // namespace v8 |
276 | 288 |
277 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 289 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
OLD | NEW |