OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_PARSING_PARSER_H_ | 5 #ifndef V8_PARSING_PARSER_H_ |
6 #define V8_PARSING_PARSER_H_ | 6 #define V8_PARSING_PARSER_H_ |
7 | 7 |
8 #include "src/allocation.h" | 8 #include "src/allocation.h" |
9 #include "src/ast/ast.h" | 9 #include "src/ast/ast.h" |
10 #include "src/ast/scopes.h" | 10 #include "src/ast/scopes.h" |
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
282 return script_data_->length() / sizeof(unsigned); | 282 return script_data_->length() / sizeof(unsigned); |
283 } | 283 } |
284 | 284 |
285 ScriptData* script_data_; | 285 ScriptData* script_data_; |
286 int function_index_; | 286 int function_index_; |
287 | 287 |
288 DISALLOW_COPY_AND_ASSIGN(ParseData); | 288 DISALLOW_COPY_AND_ASSIGN(ParseData); |
289 }; | 289 }; |
290 | 290 |
291 // ---------------------------------------------------------------------------- | 291 // ---------------------------------------------------------------------------- |
292 // REGEXP PARSING | |
293 | |
294 // A BufferedZoneList is an automatically growing list, just like (and backed | |
295 // by) a ZoneList, that is optimized for the case of adding and removing | |
296 // a single element. The last element added is stored outside the backing list, | |
297 // and if no more than one element is ever added, the ZoneList isn't even | |
298 // allocated. | |
299 // Elements must not be NULL pointers. | |
300 template <typename T, int initial_size> | |
301 class BufferedZoneList { | |
302 public: | |
303 BufferedZoneList() : list_(NULL), last_(NULL) {} | |
304 | |
305 // Adds element at end of list. This element is buffered and can | |
306 // be read using last() or removed using RemoveLast until a new Add or until | |
307 // RemoveLast or GetList has been called. | |
308 void Add(T* value, Zone* zone) { | |
309 if (last_ != NULL) { | |
310 if (list_ == NULL) { | |
311 list_ = new(zone) ZoneList<T*>(initial_size, zone); | |
312 } | |
313 list_->Add(last_, zone); | |
314 } | |
315 last_ = value; | |
316 } | |
317 | |
318 T* last() { | |
319 DCHECK(last_ != NULL); | |
320 return last_; | |
321 } | |
322 | |
323 T* RemoveLast() { | |
324 DCHECK(last_ != NULL); | |
325 T* result = last_; | |
326 if ((list_ != NULL) && (list_->length() > 0)) | |
327 last_ = list_->RemoveLast(); | |
328 else | |
329 last_ = NULL; | |
330 return result; | |
331 } | |
332 | |
333 T* Get(int i) { | |
334 DCHECK((0 <= i) && (i < length())); | |
335 if (list_ == NULL) { | |
336 DCHECK_EQ(0, i); | |
337 return last_; | |
338 } else { | |
339 if (i == list_->length()) { | |
340 DCHECK(last_ != NULL); | |
341 return last_; | |
342 } else { | |
343 return list_->at(i); | |
344 } | |
345 } | |
346 } | |
347 | |
348 void Clear() { | |
349 list_ = NULL; | |
350 last_ = NULL; | |
351 } | |
352 | |
353 int length() { | |
354 int length = (list_ == NULL) ? 0 : list_->length(); | |
355 return length + ((last_ == NULL) ? 0 : 1); | |
356 } | |
357 | |
358 ZoneList<T*>* GetList(Zone* zone) { | |
359 if (list_ == NULL) { | |
360 list_ = new(zone) ZoneList<T*>(initial_size, zone); | |
361 } | |
362 if (last_ != NULL) { | |
363 list_->Add(last_, zone); | |
364 last_ = NULL; | |
365 } | |
366 return list_; | |
367 } | |
368 | |
369 private: | |
370 ZoneList<T*>* list_; | |
371 T* last_; | |
372 }; | |
373 | |
374 | |
375 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | |
376 class RegExpBuilder: public ZoneObject { | |
377 public: | |
378 explicit RegExpBuilder(Zone* zone); | |
379 void AddCharacter(uc16 character); | |
380 // "Adds" an empty expression. Does nothing except consume a | |
381 // following quantifier | |
382 void AddEmpty(); | |
383 void AddAtom(RegExpTree* tree); | |
384 void AddAssertion(RegExpTree* tree); | |
385 void NewAlternative(); // '|' | |
386 void AddQuantifierToAtom( | |
387 int min, int max, RegExpQuantifier::QuantifierType type); | |
388 RegExpTree* ToRegExp(); | |
389 | |
390 private: | |
391 void FlushCharacters(); | |
392 void FlushText(); | |
393 void FlushTerms(); | |
394 Zone* zone() const { return zone_; } | |
395 | |
396 Zone* zone_; | |
397 bool pending_empty_; | |
398 ZoneList<uc16>* characters_; | |
399 BufferedZoneList<RegExpTree, 2> terms_; | |
400 BufferedZoneList<RegExpTree, 2> text_; | |
401 BufferedZoneList<RegExpTree, 2> alternatives_; | |
402 #ifdef DEBUG | |
403 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | |
404 #define LAST(x) last_added_ = x; | |
405 #else | |
406 #define LAST(x) | |
407 #endif | |
408 }; | |
409 | |
410 | |
411 class RegExpParser BASE_EMBEDDED { | |
412 public: | |
413 RegExpParser(FlatStringReader* in, Handle<String>* error, bool multiline_mode, | |
414 bool unicode, Isolate* isolate, Zone* zone); | |
415 | |
416 static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input, | |
417 bool multiline, bool unicode, | |
418 RegExpCompileData* result); | |
419 | |
420 RegExpTree* ParsePattern(); | |
421 RegExpTree* ParseDisjunction(); | |
422 RegExpTree* ParseGroup(); | |
423 RegExpTree* ParseCharacterClass(); | |
424 | |
425 // Parses a {...,...} quantifier and stores the range in the given | |
426 // out parameters. | |
427 bool ParseIntervalQuantifier(int* min_out, int* max_out); | |
428 | |
429 // Parses and returns a single escaped character. The character | |
430 // must not be 'b' or 'B' since they are usually handle specially. | |
431 uc32 ParseClassCharacterEscape(); | |
432 | |
433 // Checks whether the following is a length-digit hexadecimal number, | |
434 // and sets the value if it is. | |
435 bool ParseHexEscape(int length, uc32* value); | |
436 bool ParseUnicodeEscape(uc32* value); | |
437 bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value); | |
438 | |
439 uc32 ParseOctalLiteral(); | |
440 | |
441 // Tries to parse the input as a back reference. If successful it | |
442 // stores the result in the output parameter and returns true. If | |
443 // it fails it will push back the characters read so the same characters | |
444 // can be reparsed. | |
445 bool ParseBackReferenceIndex(int* index_out); | |
446 | |
447 CharacterRange ParseClassAtom(uc16* char_class); | |
448 RegExpTree* ReportError(Vector<const char> message); | |
449 void Advance(); | |
450 void Advance(int dist); | |
451 void Reset(int pos); | |
452 | |
453 // Reports whether the pattern might be used as a literal search string. | |
454 // Only use if the result of the parse is a single atom node. | |
455 bool simple(); | |
456 bool contains_anchor() { return contains_anchor_; } | |
457 void set_contains_anchor() { contains_anchor_ = true; } | |
458 int captures_started() { return captures_started_; } | |
459 int position() { return next_pos_ - 1; } | |
460 bool failed() { return failed_; } | |
461 | |
462 static bool IsSyntaxCharacter(uc32 c); | |
463 | |
464 static const int kMaxCaptures = 1 << 16; | |
465 static const uc32 kEndMarker = (1 << 21); | |
466 | |
467 private: | |
468 enum SubexpressionType { | |
469 INITIAL, | |
470 CAPTURE, // All positive values represent captures. | |
471 POSITIVE_LOOKAROUND, | |
472 NEGATIVE_LOOKAROUND, | |
473 GROUPING | |
474 }; | |
475 | |
476 class RegExpParserState : public ZoneObject { | |
477 public: | |
478 RegExpParserState(RegExpParserState* previous_state, | |
479 SubexpressionType group_type, | |
480 RegExpLookaround::Type lookaround_type, | |
481 int disjunction_capture_index, Zone* zone) | |
482 : previous_state_(previous_state), | |
483 builder_(new (zone) RegExpBuilder(zone)), | |
484 group_type_(group_type), | |
485 lookaround_type_(lookaround_type), | |
486 disjunction_capture_index_(disjunction_capture_index) {} | |
487 // Parser state of containing expression, if any. | |
488 RegExpParserState* previous_state() { return previous_state_; } | |
489 bool IsSubexpression() { return previous_state_ != NULL; } | |
490 // RegExpBuilder building this regexp's AST. | |
491 RegExpBuilder* builder() { return builder_; } | |
492 // Type of regexp being parsed (parenthesized group or entire regexp). | |
493 SubexpressionType group_type() { return group_type_; } | |
494 // Lookahead or Lookbehind. | |
495 RegExpLookaround::Type lookaround_type() { return lookaround_type_; } | |
496 // Index in captures array of first capture in this sub-expression, if any. | |
497 // Also the capture index of this sub-expression itself, if group_type | |
498 // is CAPTURE. | |
499 int capture_index() { return disjunction_capture_index_; } | |
500 | |
501 // Check whether the parser is inside a capture group with the given index. | |
502 bool IsInsideCaptureGroup(int index); | |
503 | |
504 private: | |
505 // Linked list implementation of stack of states. | |
506 RegExpParserState* previous_state_; | |
507 // Builder for the stored disjunction. | |
508 RegExpBuilder* builder_; | |
509 // Stored disjunction type (capture, look-ahead or grouping), if any. | |
510 SubexpressionType group_type_; | |
511 // Stored read direction. | |
512 RegExpLookaround::Type lookaround_type_; | |
513 // Stored disjunction's capture index (if any). | |
514 int disjunction_capture_index_; | |
515 }; | |
516 | |
517 // Return the 1-indexed RegExpCapture object, allocate if necessary. | |
518 RegExpCapture* GetCapture(int index); | |
519 | |
520 Isolate* isolate() { return isolate_; } | |
521 Zone* zone() const { return zone_; } | |
522 | |
523 uc32 current() { return current_; } | |
524 bool has_more() { return has_more_; } | |
525 bool has_next() { return next_pos_ < in()->length(); } | |
526 uc32 Next(); | |
527 FlatStringReader* in() { return in_; } | |
528 void ScanForCaptures(); | |
529 | |
530 Isolate* isolate_; | |
531 Zone* zone_; | |
532 Handle<String>* error_; | |
533 ZoneList<RegExpCapture*>* captures_; | |
534 FlatStringReader* in_; | |
535 uc32 current_; | |
536 int next_pos_; | |
537 int captures_started_; | |
538 // The capture count is only valid after we have scanned for captures. | |
539 int capture_count_; | |
540 bool has_more_; | |
541 bool multiline_; | |
542 bool unicode_; | |
543 bool simple_; | |
544 bool contains_anchor_; | |
545 bool is_scanned_for_captures_; | |
546 bool failed_; | |
547 }; | |
548 | |
549 // ---------------------------------------------------------------------------- | |
550 // JAVASCRIPT PARSING | 292 // JAVASCRIPT PARSING |
551 | 293 |
552 class Parser; | 294 class Parser; |
553 class SingletonLogger; | 295 class SingletonLogger; |
554 | 296 |
555 | 297 |
556 struct ParserFormalParameters : FormalParametersBase { | 298 struct ParserFormalParameters : FormalParametersBase { |
557 struct Parameter { | 299 struct Parameter { |
558 Parameter(const AstRawString* name, Expression* pattern, | 300 Parameter(const AstRawString* name, Expression* pattern, |
559 Expression* initializer, int initializer_end_position, | 301 Expression* initializer, int initializer_end_position, |
(...skipping 890 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1450 | 1192 |
1451 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { | 1193 DoExpression* ParserTraits::ParseDoExpression(bool* ok) { |
1452 return parser_->ParseDoExpression(ok); | 1194 return parser_->ParseDoExpression(ok); |
1453 } | 1195 } |
1454 | 1196 |
1455 | 1197 |
1456 } // namespace internal | 1198 } // namespace internal |
1457 } // namespace v8 | 1199 } // namespace v8 |
1458 | 1200 |
1459 #endif // V8_PARSING_PARSER_H_ | 1201 #endif // V8_PARSING_PARSER_H_ |
OLD | NEW |