OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parser.h" | 5 #include "src/parser.h" |
6 | 6 |
7 #include "src/api.h" | 7 #include "src/api.h" |
8 #include "src/ast.h" | 8 #include "src/ast.h" |
9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
(...skipping 5175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
5187 bool multiline, bool unicode, Isolate* isolate, | 5187 bool multiline, bool unicode, Isolate* isolate, |
5188 Zone* zone) | 5188 Zone* zone) |
5189 : isolate_(isolate), | 5189 : isolate_(isolate), |
5190 zone_(zone), | 5190 zone_(zone), |
5191 error_(error), | 5191 error_(error), |
5192 captures_(NULL), | 5192 captures_(NULL), |
5193 in_(in), | 5193 in_(in), |
5194 current_(kEndMarker), | 5194 current_(kEndMarker), |
5195 next_pos_(0), | 5195 next_pos_(0), |
5196 captures_started_(0), | |
5197 capture_count_(0), | 5196 capture_count_(0), |
5198 has_more_(true), | 5197 has_more_(true), |
5199 multiline_(multiline), | 5198 multiline_(multiline), |
5200 unicode_(unicode), | 5199 unicode_(unicode), |
5201 simple_(false), | 5200 simple_(false), |
5202 contains_anchor_(false), | 5201 contains_anchor_(false), |
5203 is_scanned_for_captures_(false), | 5202 is_scanned_for_captures_(false), |
5204 failed_(false) { | 5203 failed_(false) { |
5205 Advance(); | 5204 Advance(); |
5206 } | 5205 } |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5290 // Alternative | Disjunction | 5289 // Alternative | Disjunction |
5291 // Alternative :: | 5290 // Alternative :: |
5292 // [empty] | 5291 // [empty] |
5293 // Term Alternative | 5292 // Term Alternative |
5294 // Term :: | 5293 // Term :: |
5295 // Assertion | 5294 // Assertion |
5296 // Atom | 5295 // Atom |
5297 // Atom Quantifier | 5296 // Atom Quantifier |
5298 RegExpTree* RegExpParser::ParseDisjunction() { | 5297 RegExpTree* RegExpParser::ParseDisjunction() { |
5299 // Used to store current state while parsing subexpressions. | 5298 // Used to store current state while parsing subexpressions. |
5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); |
5301 zone()); | 5300 RegExpParserState* stored_state = &initial_state; |
5302 RegExpParserState* state = &initial_state; | |
5303 // Cache the builder in a local variable for quick access. | 5301 // Cache the builder in a local variable for quick access. |
5304 RegExpBuilder* builder = initial_state.builder(); | 5302 RegExpBuilder* builder = initial_state.builder(); |
5305 while (true) { | 5303 while (true) { |
5306 switch (current()) { | 5304 switch (current()) { |
5307 case kEndMarker: | 5305 case kEndMarker: |
5308 if (state->IsSubexpression()) { | 5306 if (stored_state->IsSubexpression()) { |
5309 // Inside a parenthesized group when hitting end of input. | 5307 // Inside a parenthesized group when hitting end of input. |
5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
5311 } | 5309 } |
5312 DCHECK_EQ(INITIAL, state->group_type()); | 5310 DCHECK_EQ(INITIAL, stored_state->group_type()); |
5313 // Parsing completed successfully. | 5311 // Parsing completed successfully. |
5314 return builder->ToRegExp(); | 5312 return builder->ToRegExp(); |
5315 case ')': { | 5313 case ')': { |
5316 if (!state->IsSubexpression()) { | 5314 if (!stored_state->IsSubexpression()) { |
5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
5318 } | 5316 } |
5319 DCHECK_NE(INITIAL, state->group_type()); | 5317 DCHECK_NE(INITIAL, stored_state->group_type()); |
5320 | 5318 |
5321 Advance(); | 5319 Advance(); |
5322 // End disjunction parsing and convert builder content to new single | 5320 // End disjunction parsing and convert builder content to new single |
5323 // regexp atom. | 5321 // regexp atom. |
5324 RegExpTree* body = builder->ToRegExp(); | 5322 RegExpTree* body = builder->ToRegExp(); |
5325 | 5323 |
5326 int end_capture_index = captures_started(); | 5324 int end_capture_index = captures_started(); |
5327 | 5325 |
5328 int capture_index = state->capture_index(); | 5326 int capture_index = stored_state->capture_index(); |
5329 SubexpressionType group_type = state->group_type(); | 5327 SubexpressionType group_type = stored_state->group_type(); |
| 5328 |
| 5329 // Restore previous state. |
| 5330 stored_state = stored_state->previous_state(); |
| 5331 builder = stored_state->builder(); |
5330 | 5332 |
5331 // Build result of subexpression. | 5333 // Build result of subexpression. |
5332 if (group_type == CAPTURE) { | 5334 if (group_type == CAPTURE) { |
5333 RegExpCapture* capture = GetCapture(capture_index); | 5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); |
5334 capture->set_body(body); | 5336 captures_->at(capture_index - 1) = capture; |
5335 body = capture; | 5337 body = capture; |
5336 } else if (group_type != GROUPING) { | 5338 } else if (group_type != GROUPING) { |
5337 DCHECK(group_type == POSITIVE_LOOKAROUND || | 5339 DCHECK(group_type == POSITIVE_LOOKAHEAD || |
5338 group_type == NEGATIVE_LOOKAROUND); | 5340 group_type == NEGATIVE_LOOKAHEAD); |
5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND); | 5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); |
5340 body = new (zone()) RegExpLookaround( | 5342 body = new(zone()) RegExpLookahead(body, |
5341 body, is_positive, end_capture_index - capture_index, capture_index, | 5343 is_positive, |
5342 state->lookaround_type()); | 5344 end_capture_index - capture_index, |
| 5345 capture_index); |
5343 } | 5346 } |
5344 | |
5345 // Restore previous state. | |
5346 state = state->previous_state(); | |
5347 builder = state->builder(); | |
5348 | |
5349 builder->AddAtom(body); | 5347 builder->AddAtom(body); |
5350 // For compatability with JSC and ES3, we allow quantifiers after | 5348 // For compatability with JSC and ES3, we allow quantifiers after |
5351 // lookaheads, and break in all cases. | 5349 // lookaheads, and break in all cases. |
5352 break; | 5350 break; |
5353 } | 5351 } |
5354 case '|': { | 5352 case '|': { |
5355 Advance(); | 5353 Advance(); |
5356 builder->NewAlternative(); | 5354 builder->NewAlternative(); |
5357 continue; | 5355 continue; |
5358 } | 5356 } |
(...skipping 26 matching lines...) Expand all Loading... |
5385 // everything except \x0a, \x0d, \u2028 and \u2029 | 5383 // everything except \x0a, \x0d, \u2028 and \u2029 |
5386 ZoneList<CharacterRange>* ranges = | 5384 ZoneList<CharacterRange>* ranges = |
5387 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5385 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5388 CharacterRange::AddClassEscape('.', ranges, zone()); | 5386 CharacterRange::AddClassEscape('.', ranges, zone()); |
5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5390 builder->AddAtom(atom); | 5388 builder->AddAtom(atom); |
5391 break; | 5389 break; |
5392 } | 5390 } |
5393 case '(': { | 5391 case '(': { |
5394 SubexpressionType subexpr_type = CAPTURE; | 5392 SubexpressionType subexpr_type = CAPTURE; |
5395 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | |
5396 Advance(); | 5393 Advance(); |
5397 if (current() == '?') { | 5394 if (current() == '?') { |
5398 switch (Next()) { | 5395 switch (Next()) { |
5399 case ':': | 5396 case ':': |
5400 subexpr_type = GROUPING; | 5397 subexpr_type = GROUPING; |
5401 break; | 5398 break; |
5402 case '=': | 5399 case '=': |
5403 lookaround_type = RegExpLookaround::LOOKAHEAD; | 5400 subexpr_type = POSITIVE_LOOKAHEAD; |
5404 subexpr_type = POSITIVE_LOOKAROUND; | |
5405 break; | 5401 break; |
5406 case '!': | 5402 case '!': |
5407 lookaround_type = RegExpLookaround::LOOKAHEAD; | 5403 subexpr_type = NEGATIVE_LOOKAHEAD; |
5408 subexpr_type = NEGATIVE_LOOKAROUND; | |
5409 break; | 5404 break; |
5410 case '<': | |
5411 if (FLAG_harmony_regexp_lookbehind) { | |
5412 Advance(); | |
5413 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
5414 if (Next() == '=') { | |
5415 subexpr_type = POSITIVE_LOOKAROUND; | |
5416 break; | |
5417 } else if (Next() == '!') { | |
5418 subexpr_type = NEGATIVE_LOOKAROUND; | |
5419 break; | |
5420 } | |
5421 } | |
5422 // Fall through. | |
5423 default: | 5405 default: |
5424 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5406 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
5425 break; | 5407 break; |
5426 } | 5408 } |
5427 Advance(2); | 5409 Advance(2); |
5428 } else { | 5410 } else { |
5429 if (captures_started_ >= kMaxCaptures) { | 5411 if (captures_ == NULL) { |
| 5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); |
| 5413 } |
| 5414 if (captures_started() >= kMaxCaptures) { |
5430 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5415 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
5431 } | 5416 } |
5432 captures_started_++; | 5417 captures_->Add(NULL, zone()); |
5433 } | 5418 } |
5434 // Store current state and begin new disjunction parsing. | 5419 // Store current state and begin new disjunction parsing. |
5435 state = new (zone()) RegExpParserState( | 5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, |
5436 state, subexpr_type, lookaround_type, captures_started_, zone()); | 5421 captures_started(), zone()); |
5437 builder = state->builder(); | 5422 builder = stored_state->builder(); |
5438 continue; | 5423 continue; |
5439 } | 5424 } |
5440 case '[': { | 5425 case '[': { |
5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
5442 builder->AddAtom(atom); | 5427 builder->AddAtom(atom); |
5443 break; | 5428 break; |
5444 } | 5429 } |
5445 // Atom :: | 5430 // Atom :: |
5446 // \ AtomEscape | 5431 // \ AtomEscape |
5447 case '\\': | 5432 case '\\': |
(...skipping 22 matching lines...) Expand all Loading... |
5470 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5455 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5471 CharacterRange::AddClassEscape(c, ranges, zone()); | 5456 CharacterRange::AddClassEscape(c, ranges, zone()); |
5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5473 builder->AddAtom(atom); | 5458 builder->AddAtom(atom); |
5474 break; | 5459 break; |
5475 } | 5460 } |
5476 case '1': case '2': case '3': case '4': case '5': case '6': | 5461 case '1': case '2': case '3': case '4': case '5': case '6': |
5477 case '7': case '8': case '9': { | 5462 case '7': case '8': case '9': { |
5478 int index = 0; | 5463 int index = 0; |
5479 if (ParseBackReferenceIndex(&index)) { | 5464 if (ParseBackReferenceIndex(&index)) { |
5480 if (state->IsInsideCaptureGroup(index)) { | 5465 RegExpCapture* capture = NULL; |
5481 // The backreference is inside the capture group it refers to. | 5466 if (captures_ != NULL && index <= captures_->length()) { |
5482 // Nothing can possibly have been captured yet. | 5467 capture = captures_->at(index - 1); |
| 5468 } |
| 5469 if (capture == NULL) { |
5483 builder->AddEmpty(); | 5470 builder->AddEmpty(); |
5484 } else { | 5471 break; |
5485 RegExpCapture* capture = GetCapture(index); | |
5486 RegExpTree* atom = new (zone()) RegExpBackReference(capture); | |
5487 builder->AddAtom(atom); | |
5488 } | 5472 } |
| 5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture); |
| 5474 builder->AddAtom(atom); |
5489 break; | 5475 break; |
5490 } | 5476 } |
5491 uc32 first_digit = Next(); | 5477 uc32 first_digit = Next(); |
5492 if (first_digit == '8' || first_digit == '9') { | 5478 if (first_digit == '8' || first_digit == '9') { |
5493 // If the 'u' flag is present, only syntax characters can be escaped, | 5479 // If the 'u' flag is present, only syntax characters can be escaped, |
5494 // no other identity escapes are allowed. If the 'u' flag is not | 5480 // no other identity escapes are allowed. If the 'u' flag is not |
5495 // present, all identity escapes are allowed. | 5481 // present, all identity escapes are allowed. |
5496 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5482 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
5497 builder->AddCharacter(first_digit); | 5483 builder->AddCharacter(first_digit); |
5498 Advance(2); | 5484 Advance(2); |
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5739 if (value > capture_count_) { | 5725 if (value > capture_count_) { |
5740 Reset(start); | 5726 Reset(start); |
5741 return false; | 5727 return false; |
5742 } | 5728 } |
5743 } | 5729 } |
5744 *index_out = value; | 5730 *index_out = value; |
5745 return true; | 5731 return true; |
5746 } | 5732 } |
5747 | 5733 |
5748 | 5734 |
5749 RegExpCapture* RegExpParser::GetCapture(int index) { | |
5750 // The index for the capture groups are one-based. Its index in the list is | |
5751 // zero-based. | |
5752 int know_captures = | |
5753 is_scanned_for_captures_ ? capture_count_ : captures_started_; | |
5754 DCHECK(index <= know_captures); | |
5755 if (captures_ == NULL) { | |
5756 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | |
5757 } | |
5758 while (captures_->length() < know_captures) { | |
5759 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | |
5760 } | |
5761 return captures_->at(index - 1); | |
5762 } | |
5763 | |
5764 | |
5765 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | |
5766 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | |
5767 if (s->group_type() != CAPTURE) continue; | |
5768 // Return true if we found the matching capture index. | |
5769 if (index == s->capture_index()) return true; | |
5770 // Abort if index is larger than what has been parsed up till this state. | |
5771 if (index > s->capture_index()) return false; | |
5772 } | |
5773 return false; | |
5774 } | |
5775 | |
5776 | |
5777 // QuantifierPrefix :: | 5735 // QuantifierPrefix :: |
5778 // { DecimalDigits } | 5736 // { DecimalDigits } |
5779 // { DecimalDigits , } | 5737 // { DecimalDigits , } |
5780 // { DecimalDigits , DecimalDigits } | 5738 // { DecimalDigits , DecimalDigits } |
5781 // | 5739 // |
5782 // Returns true if parsing succeeds, and set the min_out and max_out | 5740 // Returns true if parsing succeeds, and set the min_out and max_out |
5783 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
5784 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
5785 DCHECK_EQ(current(), '{'); | 5743 DCHECK_EQ(current(), '{'); |
5786 int start = position(); | 5744 int start = position(); |
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6098 } | 6056 } |
6099 } | 6057 } |
6100 if (!has_more()) { | 6058 if (!has_more()) { |
6101 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
6102 } | 6060 } |
6103 Advance(); | 6061 Advance(); |
6104 if (ranges->length() == 0) { | 6062 if (ranges->length() == 0) { |
6105 ranges->Add(CharacterRange::Everything(), zone()); | 6063 ranges->Add(CharacterRange::Everything(), zone()); |
6106 is_negated = !is_negated; | 6064 is_negated = !is_negated; |
6107 } | 6065 } |
6108 return new (zone()) RegExpCharacterClass(ranges, is_negated); | 6066 return new(zone()) RegExpCharacterClass(ranges, is_negated); |
6109 } | 6067 } |
6110 | 6068 |
6111 | 6069 |
6112 // ---------------------------------------------------------------------------- | 6070 // ---------------------------------------------------------------------------- |
6113 // The Parser interface. | 6071 // The Parser interface. |
6114 | 6072 |
6115 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
6116 FlatStringReader* input, bool multiline, | 6074 FlatStringReader* input, bool multiline, |
6117 bool unicode, RegExpCompileData* result) { | 6075 bool unicode, RegExpCompileData* result) { |
6118 DCHECK(result != NULL); | 6076 DCHECK(result != NULL); |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6467 } | 6425 } |
6468 | 6426 |
6469 | 6427 |
6470 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6428 void Parser::RaiseLanguageMode(LanguageMode mode) { |
6471 SetLanguageMode(scope_, | 6429 SetLanguageMode(scope_, |
6472 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6430 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
6473 } | 6431 } |
6474 | 6432 |
6475 } // namespace internal | 6433 } // namespace internal |
6476 } // namespace v8 | 6434 } // namespace v8 |
OLD | NEW |