| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/parser.h" | 5 #include "src/parser.h" |
| 6 | 6 |
| 7 #include "src/api.h" | 7 #include "src/api.h" |
| 8 #include "src/ast.h" | 8 #include "src/ast.h" |
| 9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
| 10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
| (...skipping 5175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 5187 bool multiline, bool unicode, Isolate* isolate, | 5187 bool multiline, bool unicode, Isolate* isolate, |
| 5188 Zone* zone) | 5188 Zone* zone) |
| 5189 : isolate_(isolate), | 5189 : isolate_(isolate), |
| 5190 zone_(zone), | 5190 zone_(zone), |
| 5191 error_(error), | 5191 error_(error), |
| 5192 captures_(NULL), | 5192 captures_(NULL), |
| 5193 in_(in), | 5193 in_(in), |
| 5194 current_(kEndMarker), | 5194 current_(kEndMarker), |
| 5195 next_pos_(0), | 5195 next_pos_(0), |
| 5196 captures_started_(0), | |
| 5197 capture_count_(0), | 5196 capture_count_(0), |
| 5198 has_more_(true), | 5197 has_more_(true), |
| 5199 multiline_(multiline), | 5198 multiline_(multiline), |
| 5200 unicode_(unicode), | 5199 unicode_(unicode), |
| 5201 simple_(false), | 5200 simple_(false), |
| 5202 contains_anchor_(false), | 5201 contains_anchor_(false), |
| 5203 is_scanned_for_captures_(false), | 5202 is_scanned_for_captures_(false), |
| 5204 failed_(false) { | 5203 failed_(false) { |
| 5205 Advance(); | 5204 Advance(); |
| 5206 } | 5205 } |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5290 // Alternative | Disjunction | 5289 // Alternative | Disjunction |
| 5291 // Alternative :: | 5290 // Alternative :: |
| 5292 // [empty] | 5291 // [empty] |
| 5293 // Term Alternative | 5292 // Term Alternative |
| 5294 // Term :: | 5293 // Term :: |
| 5295 // Assertion | 5294 // Assertion |
| 5296 // Atom | 5295 // Atom |
| 5297 // Atom Quantifier | 5296 // Atom Quantifier |
| 5298 RegExpTree* RegExpParser::ParseDisjunction() { | 5297 RegExpTree* RegExpParser::ParseDisjunction() { |
| 5299 // Used to store current state while parsing subexpressions. | 5298 // Used to store current state while parsing subexpressions. |
| 5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, | 5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); |
| 5301 zone()); | 5300 RegExpParserState* stored_state = &initial_state; |
| 5302 RegExpParserState* state = &initial_state; | |
| 5303 // Cache the builder in a local variable for quick access. | 5301 // Cache the builder in a local variable for quick access. |
| 5304 RegExpBuilder* builder = initial_state.builder(); | 5302 RegExpBuilder* builder = initial_state.builder(); |
| 5305 while (true) { | 5303 while (true) { |
| 5306 switch (current()) { | 5304 switch (current()) { |
| 5307 case kEndMarker: | 5305 case kEndMarker: |
| 5308 if (state->IsSubexpression()) { | 5306 if (stored_state->IsSubexpression()) { |
| 5309 // Inside a parenthesized group when hitting end of input. | 5307 // Inside a parenthesized group when hitting end of input. |
| 5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
| 5311 } | 5309 } |
| 5312 DCHECK_EQ(INITIAL, state->group_type()); | 5310 DCHECK_EQ(INITIAL, stored_state->group_type()); |
| 5313 // Parsing completed successfully. | 5311 // Parsing completed successfully. |
| 5314 return builder->ToRegExp(); | 5312 return builder->ToRegExp(); |
| 5315 case ')': { | 5313 case ')': { |
| 5316 if (!state->IsSubexpression()) { | 5314 if (!stored_state->IsSubexpression()) { |
| 5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
| 5318 } | 5316 } |
| 5319 DCHECK_NE(INITIAL, state->group_type()); | 5317 DCHECK_NE(INITIAL, stored_state->group_type()); |
| 5320 | 5318 |
| 5321 Advance(); | 5319 Advance(); |
| 5322 // End disjunction parsing and convert builder content to new single | 5320 // End disjunction parsing and convert builder content to new single |
| 5323 // regexp atom. | 5321 // regexp atom. |
| 5324 RegExpTree* body = builder->ToRegExp(); | 5322 RegExpTree* body = builder->ToRegExp(); |
| 5325 | 5323 |
| 5326 int end_capture_index = captures_started(); | 5324 int end_capture_index = captures_started(); |
| 5327 | 5325 |
| 5328 int capture_index = state->capture_index(); | 5326 int capture_index = stored_state->capture_index(); |
| 5329 SubexpressionType group_type = state->group_type(); | 5327 SubexpressionType group_type = stored_state->group_type(); |
| 5328 |
| 5329 // Restore previous state. |
| 5330 stored_state = stored_state->previous_state(); |
| 5331 builder = stored_state->builder(); |
| 5330 | 5332 |
| 5331 // Build result of subexpression. | 5333 // Build result of subexpression. |
| 5332 if (group_type == CAPTURE) { | 5334 if (group_type == CAPTURE) { |
| 5333 RegExpCapture* capture = GetCapture(capture_index); | 5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); |
| 5334 capture->set_body(body); | 5336 captures_->at(capture_index - 1) = capture; |
| 5335 body = capture; | 5337 body = capture; |
| 5336 } else if (group_type != GROUPING) { | 5338 } else if (group_type != GROUPING) { |
| 5337 DCHECK(group_type == POSITIVE_LOOKAROUND || | 5339 DCHECK(group_type == POSITIVE_LOOKAHEAD || |
| 5338 group_type == NEGATIVE_LOOKAROUND); | 5340 group_type == NEGATIVE_LOOKAHEAD); |
| 5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND); | 5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); |
| 5340 body = new (zone()) RegExpLookaround( | 5342 body = new(zone()) RegExpLookahead(body, |
| 5341 body, is_positive, end_capture_index - capture_index, capture_index, | 5343 is_positive, |
| 5342 state->lookaround_type()); | 5344 end_capture_index - capture_index, |
| 5345 capture_index); |
| 5343 } | 5346 } |
| 5344 | |
| 5345 // Restore previous state. | |
| 5346 state = state->previous_state(); | |
| 5347 builder = state->builder(); | |
| 5348 | |
| 5349 builder->AddAtom(body); | 5347 builder->AddAtom(body); |
| 5350 // For compatability with JSC and ES3, we allow quantifiers after | 5348 // For compatability with JSC and ES3, we allow quantifiers after |
| 5351 // lookaheads, and break in all cases. | 5349 // lookaheads, and break in all cases. |
| 5352 break; | 5350 break; |
| 5353 } | 5351 } |
| 5354 case '|': { | 5352 case '|': { |
| 5355 Advance(); | 5353 Advance(); |
| 5356 builder->NewAlternative(); | 5354 builder->NewAlternative(); |
| 5357 continue; | 5355 continue; |
| 5358 } | 5356 } |
| (...skipping 26 matching lines...) Expand all Loading... |
| 5385 // everything except \x0a, \x0d, \u2028 and \u2029 | 5383 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 5386 ZoneList<CharacterRange>* ranges = | 5384 ZoneList<CharacterRange>* ranges = |
| 5387 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5385 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5388 CharacterRange::AddClassEscape('.', ranges, zone()); | 5386 CharacterRange::AddClassEscape('.', ranges, zone()); |
| 5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
| 5390 builder->AddAtom(atom); | 5388 builder->AddAtom(atom); |
| 5391 break; | 5389 break; |
| 5392 } | 5390 } |
| 5393 case '(': { | 5391 case '(': { |
| 5394 SubexpressionType subexpr_type = CAPTURE; | 5392 SubexpressionType subexpr_type = CAPTURE; |
| 5395 RegExpLookaround::Type lookaround_type = state->lookaround_type(); | |
| 5396 Advance(); | 5393 Advance(); |
| 5397 if (current() == '?') { | 5394 if (current() == '?') { |
| 5398 switch (Next()) { | 5395 switch (Next()) { |
| 5399 case ':': | 5396 case ':': |
| 5400 subexpr_type = GROUPING; | 5397 subexpr_type = GROUPING; |
| 5401 break; | 5398 break; |
| 5402 case '=': | 5399 case '=': |
| 5403 lookaround_type = RegExpLookaround::LOOKAHEAD; | 5400 subexpr_type = POSITIVE_LOOKAHEAD; |
| 5404 subexpr_type = POSITIVE_LOOKAROUND; | |
| 5405 break; | 5401 break; |
| 5406 case '!': | 5402 case '!': |
| 5407 lookaround_type = RegExpLookaround::LOOKAHEAD; | 5403 subexpr_type = NEGATIVE_LOOKAHEAD; |
| 5408 subexpr_type = NEGATIVE_LOOKAROUND; | |
| 5409 break; | 5404 break; |
| 5410 case '<': | |
| 5411 if (FLAG_harmony_regexp_lookbehind) { | |
| 5412 Advance(); | |
| 5413 lookaround_type = RegExpLookaround::LOOKBEHIND; | |
| 5414 if (Next() == '=') { | |
| 5415 subexpr_type = POSITIVE_LOOKAROUND; | |
| 5416 break; | |
| 5417 } else if (Next() == '!') { | |
| 5418 subexpr_type = NEGATIVE_LOOKAROUND; | |
| 5419 break; | |
| 5420 } | |
| 5421 } | |
| 5422 // Fall through. | |
| 5423 default: | 5405 default: |
| 5424 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5406 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
| 5425 break; | 5407 break; |
| 5426 } | 5408 } |
| 5427 Advance(2); | 5409 Advance(2); |
| 5428 } else { | 5410 } else { |
| 5429 if (captures_started_ >= kMaxCaptures) { | 5411 if (captures_ == NULL) { |
| 5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); |
| 5413 } |
| 5414 if (captures_started() >= kMaxCaptures) { |
| 5430 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5415 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
| 5431 } | 5416 } |
| 5432 captures_started_++; | 5417 captures_->Add(NULL, zone()); |
| 5433 } | 5418 } |
| 5434 // Store current state and begin new disjunction parsing. | 5419 // Store current state and begin new disjunction parsing. |
| 5435 state = new (zone()) RegExpParserState( | 5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, |
| 5436 state, subexpr_type, lookaround_type, captures_started_, zone()); | 5421 captures_started(), zone()); |
| 5437 builder = state->builder(); | 5422 builder = stored_state->builder(); |
| 5438 continue; | 5423 continue; |
| 5439 } | 5424 } |
| 5440 case '[': { | 5425 case '[': { |
| 5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
| 5442 builder->AddAtom(atom); | 5427 builder->AddAtom(atom); |
| 5443 break; | 5428 break; |
| 5444 } | 5429 } |
| 5445 // Atom :: | 5430 // Atom :: |
| 5446 // \ AtomEscape | 5431 // \ AtomEscape |
| 5447 case '\\': | 5432 case '\\': |
| (...skipping 22 matching lines...) Expand all Loading... |
| 5470 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5455 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5471 CharacterRange::AddClassEscape(c, ranges, zone()); | 5456 CharacterRange::AddClassEscape(c, ranges, zone()); |
| 5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
| 5473 builder->AddAtom(atom); | 5458 builder->AddAtom(atom); |
| 5474 break; | 5459 break; |
| 5475 } | 5460 } |
| 5476 case '1': case '2': case '3': case '4': case '5': case '6': | 5461 case '1': case '2': case '3': case '4': case '5': case '6': |
| 5477 case '7': case '8': case '9': { | 5462 case '7': case '8': case '9': { |
| 5478 int index = 0; | 5463 int index = 0; |
| 5479 if (ParseBackReferenceIndex(&index)) { | 5464 if (ParseBackReferenceIndex(&index)) { |
| 5480 if (state->IsInsideCaptureGroup(index)) { | 5465 RegExpCapture* capture = NULL; |
| 5481 // The backreference is inside the capture group it refers to. | 5466 if (captures_ != NULL && index <= captures_->length()) { |
| 5482 // Nothing can possibly have been captured yet. | 5467 capture = captures_->at(index - 1); |
| 5468 } |
| 5469 if (capture == NULL) { |
| 5483 builder->AddEmpty(); | 5470 builder->AddEmpty(); |
| 5484 } else { | 5471 break; |
| 5485 RegExpCapture* capture = GetCapture(index); | |
| 5486 RegExpTree* atom = new (zone()) RegExpBackReference(capture); | |
| 5487 builder->AddAtom(atom); | |
| 5488 } | 5472 } |
| 5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture); |
| 5474 builder->AddAtom(atom); |
| 5489 break; | 5475 break; |
| 5490 } | 5476 } |
| 5491 uc32 first_digit = Next(); | 5477 uc32 first_digit = Next(); |
| 5492 if (first_digit == '8' || first_digit == '9') { | 5478 if (first_digit == '8' || first_digit == '9') { |
| 5493 // If the 'u' flag is present, only syntax characters can be escaped, | 5479 // If the 'u' flag is present, only syntax characters can be escaped, |
| 5494 // no other identity escapes are allowed. If the 'u' flag is not | 5480 // no other identity escapes are allowed. If the 'u' flag is not |
| 5495 // present, all identity escapes are allowed. | 5481 // present, all identity escapes are allowed. |
| 5496 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5482 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
| 5497 builder->AddCharacter(first_digit); | 5483 builder->AddCharacter(first_digit); |
| 5498 Advance(2); | 5484 Advance(2); |
| (...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5739 if (value > capture_count_) { | 5725 if (value > capture_count_) { |
| 5740 Reset(start); | 5726 Reset(start); |
| 5741 return false; | 5727 return false; |
| 5742 } | 5728 } |
| 5743 } | 5729 } |
| 5744 *index_out = value; | 5730 *index_out = value; |
| 5745 return true; | 5731 return true; |
| 5746 } | 5732 } |
| 5747 | 5733 |
| 5748 | 5734 |
| 5749 RegExpCapture* RegExpParser::GetCapture(int index) { | |
| 5750 // The index for the capture groups are one-based. Its index in the list is | |
| 5751 // zero-based. | |
| 5752 int know_captures = | |
| 5753 is_scanned_for_captures_ ? capture_count_ : captures_started_; | |
| 5754 DCHECK(index <= know_captures); | |
| 5755 if (captures_ == NULL) { | |
| 5756 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); | |
| 5757 } | |
| 5758 while (captures_->length() < know_captures) { | |
| 5759 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); | |
| 5760 } | |
| 5761 return captures_->at(index - 1); | |
| 5762 } | |
| 5763 | |
| 5764 | |
| 5765 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { | |
| 5766 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { | |
| 5767 if (s->group_type() != CAPTURE) continue; | |
| 5768 // Return true if we found the matching capture index. | |
| 5769 if (index == s->capture_index()) return true; | |
| 5770 // Abort if index is larger than what has been parsed up till this state. | |
| 5771 if (index > s->capture_index()) return false; | |
| 5772 } | |
| 5773 return false; | |
| 5774 } | |
| 5775 | |
| 5776 | |
| 5777 // QuantifierPrefix :: | 5735 // QuantifierPrefix :: |
| 5778 // { DecimalDigits } | 5736 // { DecimalDigits } |
| 5779 // { DecimalDigits , } | 5737 // { DecimalDigits , } |
| 5780 // { DecimalDigits , DecimalDigits } | 5738 // { DecimalDigits , DecimalDigits } |
| 5781 // | 5739 // |
| 5782 // Returns true if parsing succeeds, and set the min_out and max_out | 5740 // Returns true if parsing succeeds, and set the min_out and max_out |
| 5783 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
| 5784 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 5785 DCHECK_EQ(current(), '{'); | 5743 DCHECK_EQ(current(), '{'); |
| 5786 int start = position(); | 5744 int start = position(); |
| (...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6098 } | 6056 } |
| 6099 } | 6057 } |
| 6100 if (!has_more()) { | 6058 if (!has_more()) { |
| 6101 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
| 6102 } | 6060 } |
| 6103 Advance(); | 6061 Advance(); |
| 6104 if (ranges->length() == 0) { | 6062 if (ranges->length() == 0) { |
| 6105 ranges->Add(CharacterRange::Everything(), zone()); | 6063 ranges->Add(CharacterRange::Everything(), zone()); |
| 6106 is_negated = !is_negated; | 6064 is_negated = !is_negated; |
| 6107 } | 6065 } |
| 6108 return new (zone()) RegExpCharacterClass(ranges, is_negated); | 6066 return new(zone()) RegExpCharacterClass(ranges, is_negated); |
| 6109 } | 6067 } |
| 6110 | 6068 |
| 6111 | 6069 |
| 6112 // ---------------------------------------------------------------------------- | 6070 // ---------------------------------------------------------------------------- |
| 6113 // The Parser interface. | 6071 // The Parser interface. |
| 6114 | 6072 |
| 6115 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
| 6116 FlatStringReader* input, bool multiline, | 6074 FlatStringReader* input, bool multiline, |
| 6117 bool unicode, RegExpCompileData* result) { | 6075 bool unicode, RegExpCompileData* result) { |
| 6118 DCHECK(result != NULL); | 6076 DCHECK(result != NULL); |
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6467 } | 6425 } |
| 6468 | 6426 |
| 6469 | 6427 |
| 6470 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6428 void Parser::RaiseLanguageMode(LanguageMode mode) { |
| 6471 SetLanguageMode(scope_, | 6429 SetLanguageMode(scope_, |
| 6472 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6430 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
| 6473 } | 6431 } |
| 6474 | 6432 |
| 6475 } // namespace internal | 6433 } // namespace internal |
| 6476 } // namespace v8 | 6434 } // namespace v8 |
| OLD | NEW |