| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ | 5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ |
| 6 #define V8_REGEXP_REGEXP_PARSER_H_ | 6 #define V8_REGEXP_REGEXP_PARSER_H_ |
| 7 | 7 |
| 8 #include "src/objects.h" | 8 #include "src/objects.h" |
| 9 #include "src/regexp/regexp-ast.h" | 9 #include "src/regexp/regexp-ast.h" |
| 10 #include "src/zone.h" | 10 #include "src/zone.h" |
| (...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 215 POSITIVE_LOOKAROUND, | 215 POSITIVE_LOOKAROUND, |
| 216 NEGATIVE_LOOKAROUND, | 216 NEGATIVE_LOOKAROUND, |
| 217 GROUPING | 217 GROUPING |
| 218 }; | 218 }; |
| 219 | 219 |
| 220 class RegExpParserState : public ZoneObject { | 220 class RegExpParserState : public ZoneObject { |
| 221 public: | 221 public: |
| 222 RegExpParserState(RegExpParserState* previous_state, | 222 RegExpParserState(RegExpParserState* previous_state, |
| 223 SubexpressionType group_type, | 223 SubexpressionType group_type, |
| 224 RegExpLookaround::Type lookaround_type, | 224 RegExpLookaround::Type lookaround_type, |
| 225 int disjunction_capture_index, bool ignore_case, | 225 int disjunction_capture_index, |
| 226 const ZoneVector<uc16>* capture_name, bool ignore_case, |
| 226 bool unicode, Zone* zone) | 227 bool unicode, Zone* zone) |
| 227 : previous_state_(previous_state), | 228 : previous_state_(previous_state), |
| 228 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), | 229 builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), |
| 229 group_type_(group_type), | 230 group_type_(group_type), |
| 230 lookaround_type_(lookaround_type), | 231 lookaround_type_(lookaround_type), |
| 231 disjunction_capture_index_(disjunction_capture_index) {} | 232 disjunction_capture_index_(disjunction_capture_index), |
| 233 capture_name_(capture_name) {} |
| 232 // Parser state of containing expression, if any. | 234 // Parser state of containing expression, if any. |
| 233 RegExpParserState* previous_state() { return previous_state_; } | 235 RegExpParserState* previous_state() { return previous_state_; } |
| 234 bool IsSubexpression() { return previous_state_ != NULL; } | 236 bool IsSubexpression() { return previous_state_ != NULL; } |
| 235 // RegExpBuilder building this regexp's AST. | 237 // RegExpBuilder building this regexp's AST. |
| 236 RegExpBuilder* builder() { return builder_; } | 238 RegExpBuilder* builder() { return builder_; } |
| 237 // Type of regexp being parsed (parenthesized group or entire regexp). | 239 // Type of regexp being parsed (parenthesized group or entire regexp). |
| 238 SubexpressionType group_type() { return group_type_; } | 240 SubexpressionType group_type() { return group_type_; } |
| 239 // Lookahead or Lookbehind. | 241 // Lookahead or Lookbehind. |
| 240 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } | 242 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } |
| 241 // Index in captures array of first capture in this sub-expression, if any. | 243 // Index in captures array of first capture in this sub-expression, if any. |
| 242 // Also the capture index of this sub-expression itself, if group_type | 244 // Also the capture index of this sub-expression itself, if group_type |
| 243 // is CAPTURE. | 245 // is CAPTURE. |
| 244 int capture_index() { return disjunction_capture_index_; } | 246 int capture_index() { return disjunction_capture_index_; } |
| 247 // The name of the current sub-expression, if group_type is CAPTURE. Only |
| 248 // used for named captures. |
| 249 const ZoneVector<uc16>* capture_name() { return capture_name_; } |
| 250 |
| 251 bool IsNamedCapture() const { return capture_name_ != nullptr; } |
| 245 | 252 |
| 246 // Check whether the parser is inside a capture group with the given index. | 253 // Check whether the parser is inside a capture group with the given index. |
| 247 bool IsInsideCaptureGroup(int index); | 254 bool IsInsideCaptureGroup(int index); |
| 255 // Check whether the parser is inside a capture group with the given name. |
| 256 bool IsInsideCaptureGroup(const ZoneVector<uc16>* name); |
| 248 | 257 |
| 249 private: | 258 private: |
| 250 // Linked list implementation of stack of states. | 259 // Linked list implementation of stack of states. |
| 251 RegExpParserState* previous_state_; | 260 RegExpParserState* previous_state_; |
| 252 // Builder for the stored disjunction. | 261 // Builder for the stored disjunction. |
| 253 RegExpBuilder* builder_; | 262 RegExpBuilder* builder_; |
| 254 // Stored disjunction type (capture, look-ahead or grouping), if any. | 263 // Stored disjunction type (capture, look-ahead or grouping), if any. |
| 255 SubexpressionType group_type_; | 264 SubexpressionType group_type_; |
| 256 // Stored read direction. | 265 // Stored read direction. |
| 257 RegExpLookaround::Type lookaround_type_; | 266 RegExpLookaround::Type lookaround_type_; |
| 258 // Stored disjunction's capture index (if any). | 267 // Stored disjunction's capture index (if any). |
| 259 int disjunction_capture_index_; | 268 int disjunction_capture_index_; |
| 269 // Stored capture name (if any). |
| 270 const ZoneVector<uc16>* capture_name_; |
| 260 }; | 271 }; |
| 261 | 272 |
| 262 // Return the 1-indexed RegExpCapture object, allocate if necessary. | 273 // Return the 1-indexed RegExpCapture object, allocate if necessary. |
| 263 RegExpCapture* GetCapture(int index); | 274 RegExpCapture* GetCapture(int index); |
| 264 | 275 |
| 276 // Creates a new named capture at the specified index. Must be called exactly |
| 277 // once for each named capture. Fails if a capture with the same name is |
| 278 // encountered. |
| 279 bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index); |
| 280 |
| 281 // Parses the name of a capture group (?<name>pattern). The name must adhere |
| 282 // to IdentifierName in the ECMAScript standard. |
| 283 const ZoneVector<uc16>* ParseCaptureGroupName(); |
| 284 |
| 285 bool ParseNamedBackReference(RegExpBuilder* builder, |
| 286 RegExpParserState* state); |
| 287 |
| 288 // After the initial parsing pass, patch corresponding RegExpCapture objects |
| 289 // into all RegExpBackReferences. This is done after initial parsing in order |
| 290 // to avoid complicating cases in which references comes before the capture. |
| 291 void PatchNamedBackReferences(); |
| 292 |
| 293 Handle<FixedArray> CreateCaptureNameMap(); |
| 294 |
| 265 Isolate* isolate() { return isolate_; } | 295 Isolate* isolate() { return isolate_; } |
| 266 Zone* zone() const { return zone_; } | 296 Zone* zone() const { return zone_; } |
| 267 | 297 |
| 268 uc32 current() { return current_; } | 298 uc32 current() { return current_; } |
| 269 bool has_more() { return has_more_; } | 299 bool has_more() { return has_more_; } |
| 270 bool has_next() { return next_pos_ < in()->length(); } | 300 bool has_next() { return next_pos_ < in()->length(); } |
| 271 uc32 Next(); | 301 uc32 Next(); |
| 272 template <bool update_position> | 302 template <bool update_position> |
| 273 uc32 ReadNext(); | 303 uc32 ReadNext(); |
| 274 FlatStringReader* in() { return in_; } | 304 FlatStringReader* in() { return in_; } |
| 275 void ScanForCaptures(); | 305 void ScanForCaptures(); |
| 276 | 306 |
| 277 Isolate* isolate_; | 307 Isolate* isolate_; |
| 278 Zone* zone_; | 308 Zone* zone_; |
| 279 Handle<String>* error_; | 309 Handle<String>* error_; |
| 280 ZoneList<RegExpCapture*>* captures_; | 310 ZoneList<RegExpCapture*>* captures_; |
| 311 ZoneList<RegExpCapture*>* named_captures_; |
| 312 ZoneList<RegExpBackReference*>* named_back_references_; |
| 281 FlatStringReader* in_; | 313 FlatStringReader* in_; |
| 282 uc32 current_; | 314 uc32 current_; |
| 283 bool ignore_case_; | 315 bool ignore_case_; |
| 284 bool multiline_; | 316 bool multiline_; |
| 285 bool unicode_; | 317 bool unicode_; |
| 286 int next_pos_; | 318 int next_pos_; |
| 287 int captures_started_; | 319 int captures_started_; |
| 288 // The capture count is only valid after we have scanned for captures. | 320 // The capture count is only valid after we have scanned for captures. |
| 289 int capture_count_; | 321 int capture_count_; |
| 290 bool has_more_; | 322 bool has_more_; |
| 291 bool simple_; | 323 bool simple_; |
| 292 bool contains_anchor_; | 324 bool contains_anchor_; |
| 293 bool is_scanned_for_captures_; | 325 bool is_scanned_for_captures_; |
| 294 bool failed_; | 326 bool failed_; |
| 295 }; | 327 }; |
| 296 | 328 |
| 297 } // namespace internal | 329 } // namespace internal |
| 298 } // namespace v8 | 330 } // namespace v8 |
| 299 | 331 |
| 300 #endif // V8_REGEXP_REGEXP_PARSER_H_ | 332 #endif // V8_REGEXP_REGEXP_PARSER_H_ |
| OLD | NEW |