OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parser.h" | 5 #include "src/parser.h" |
6 | 6 |
7 #include "src/api.h" | 7 #include "src/api.h" |
8 #include "src/ast.h" | 8 #include "src/ast.h" |
9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
(...skipping 5164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5175 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5175 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
5176 bool multiline, bool unicode, Isolate* isolate, | 5176 bool multiline, bool unicode, Isolate* isolate, |
5177 Zone* zone) | 5177 Zone* zone) |
5178 : isolate_(isolate), | 5178 : isolate_(isolate), |
5179 zone_(zone), | 5179 zone_(zone), |
5180 error_(error), | 5180 error_(error), |
5181 captures_(NULL), | 5181 captures_(NULL), |
5182 in_(in), | 5182 in_(in), |
5183 current_(kEndMarker), | 5183 current_(kEndMarker), |
5184 next_pos_(0), | 5184 next_pos_(0), |
| 5185 captures_started_(0), |
5185 capture_count_(0), | 5186 capture_count_(0), |
5186 has_more_(true), | 5187 has_more_(true), |
5187 multiline_(multiline), | 5188 multiline_(multiline), |
5188 unicode_(unicode), | 5189 unicode_(unicode), |
5189 simple_(false), | 5190 simple_(false), |
5190 contains_anchor_(false), | 5191 contains_anchor_(false), |
5191 is_scanned_for_captures_(false), | 5192 is_scanned_for_captures_(false), |
5192 failed_(false) { | 5193 failed_(false) { |
5193 Advance(); | 5194 Advance(); |
5194 } | 5195 } |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5278 // Alternative | Disjunction | 5279 // Alternative | Disjunction |
5279 // Alternative :: | 5280 // Alternative :: |
5280 // [empty] | 5281 // [empty] |
5281 // Term Alternative | 5282 // Term Alternative |
5282 // Term :: | 5283 // Term :: |
5283 // Assertion | 5284 // Assertion |
5284 // Atom | 5285 // Atom |
5285 // Atom Quantifier | 5286 // Atom Quantifier |
5286 RegExpTree* RegExpParser::ParseDisjunction() { | 5287 RegExpTree* RegExpParser::ParseDisjunction() { |
5287 // Used to store current state while parsing subexpressions. | 5288 // Used to store current state while parsing subexpressions. |
5288 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); | 5289 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
5289 RegExpParserState* stored_state = &initial_state; | 5290 zone()); |
| 5291 RegExpParserState* state = &initial_state; |
5290 // Cache the builder in a local variable for quick access. | 5292 // Cache the builder in a local variable for quick access. |
5291 RegExpBuilder* builder = initial_state.builder(); | 5293 RegExpBuilder* builder = initial_state.builder(); |
5292 while (true) { | 5294 while (true) { |
5293 switch (current()) { | 5295 switch (current()) { |
5294 case kEndMarker: | 5296 case kEndMarker: |
5295 if (stored_state->IsSubexpression()) { | 5297 if (state->IsSubexpression()) { |
5296 // Inside a parenthesized group when hitting end of input. | 5298 // Inside a parenthesized group when hitting end of input. |
5297 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5299 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
5298 } | 5300 } |
5299 DCHECK_EQ(INITIAL, stored_state->group_type()); | 5301 DCHECK_EQ(INITIAL, state->group_type()); |
5300 // Parsing completed successfully. | 5302 // Parsing completed successfully. |
5301 return builder->ToRegExp(); | 5303 return builder->ToRegExp(); |
5302 case ')': { | 5304 case ')': { |
5303 if (!stored_state->IsSubexpression()) { | 5305 if (!state->IsSubexpression()) { |
5304 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5306 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
5305 } | 5307 } |
5306 DCHECK_NE(INITIAL, stored_state->group_type()); | 5308 DCHECK_NE(INITIAL, state->group_type()); |
5307 | 5309 |
5308 Advance(); | 5310 Advance(); |
5309 // End disjunction parsing and convert builder content to new single | 5311 // End disjunction parsing and convert builder content to new single |
5310 // regexp atom. | 5312 // regexp atom. |
5311 RegExpTree* body = builder->ToRegExp(); | 5313 RegExpTree* body = builder->ToRegExp(); |
5312 | 5314 |
5313 int end_capture_index = captures_started(); | 5315 int end_capture_index = captures_started(); |
5314 | 5316 |
5315 int capture_index = stored_state->capture_index(); | 5317 int capture_index = state->capture_index(); |
5316 SubexpressionType group_type = stored_state->group_type(); | 5318 SubexpressionType group_type = state->group_type(); |
5317 | |
5318 // Restore previous state. | |
5319 stored_state = stored_state->previous_state(); | |
5320 builder = stored_state->builder(); | |
5321 | 5319 |
5322 // Build result of subexpression. | 5320 // Build result of subexpression. |
5323 if (group_type == CAPTURE) { | 5321 if (group_type == CAPTURE) { |
5324 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); | 5322 RegExpCapture* capture = GetCapture(capture_index); |
5325 captures_->at(capture_index - 1) = capture; | 5323 capture->set_body(body); |
5326 body = capture; | 5324 body = capture; |
5327 } else if (group_type != GROUPING) { | 5325 } else if (group_type != GROUPING) { |
5328 DCHECK(group_type == POSITIVE_LOOKAHEAD || | 5326 DCHECK(group_type == POSITIVE_LOOKAROUND || |
5329 group_type == NEGATIVE_LOOKAHEAD); | 5327 group_type == NEGATIVE_LOOKAROUND); |
5330 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); | 5328 bool is_positive = (group_type == POSITIVE_LOOKAROUND); |
5331 body = new(zone()) RegExpLookahead(body, | 5329 body = new (zone()) RegExpLookaround( |
5332 is_positive, | 5330 body, is_positive, end_capture_index - capture_index, capture_index, |
5333 end_capture_index - capture_index, | 5331 state->lookaround_type()); |
5334 capture_index); | |
5335 } | 5332 } |
| 5333 |
| 5334 // Restore previous state. |
| 5335 state = state->previous_state(); |
| 5336 builder = state->builder(); |
| 5337 |
5336 builder->AddAtom(body); | 5338 builder->AddAtom(body); |
5337 // For compatability with JSC and ES3, we allow quantifiers after | 5339 // For compatability with JSC and ES3, we allow quantifiers after |
5338 // lookaheads, and break in all cases. | 5340 // lookaheads, and break in all cases. |
5339 break; | 5341 break; |
5340 } | 5342 } |
5341 case '|': { | 5343 case '|': { |
5342 Advance(); | 5344 Advance(); |
5343 builder->NewAlternative(); | 5345 builder->NewAlternative(); |
5344 continue; | 5346 continue; |
5345 } | 5347 } |
(...skipping 26 matching lines...) Expand all Loading... |
5372 // everything except \x0a, \x0d, \u2028 and \u2029 | 5374 // everything except \x0a, \x0d, \u2028 and \u2029 |
5373 ZoneList<CharacterRange>* ranges = | 5375 ZoneList<CharacterRange>* ranges = |
5374 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5376 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5375 CharacterRange::AddClassEscape('.', ranges, zone()); | 5377 CharacterRange::AddClassEscape('.', ranges, zone()); |
5376 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5378 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5377 builder->AddAtom(atom); | 5379 builder->AddAtom(atom); |
5378 break; | 5380 break; |
5379 } | 5381 } |
5380 case '(': { | 5382 case '(': { |
5381 SubexpressionType subexpr_type = CAPTURE; | 5383 SubexpressionType subexpr_type = CAPTURE; |
| 5384 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
5382 Advance(); | 5385 Advance(); |
5383 if (current() == '?') { | 5386 if (current() == '?') { |
5384 switch (Next()) { | 5387 switch (Next()) { |
5385 case ':': | 5388 case ':': |
5386 subexpr_type = GROUPING; | 5389 subexpr_type = GROUPING; |
5387 break; | 5390 break; |
5388 case '=': | 5391 case '=': |
5389 subexpr_type = POSITIVE_LOOKAHEAD; | 5392 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5393 subexpr_type = POSITIVE_LOOKAROUND; |
5390 break; | 5394 break; |
5391 case '!': | 5395 case '!': |
5392 subexpr_type = NEGATIVE_LOOKAHEAD; | 5396 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5397 subexpr_type = NEGATIVE_LOOKAROUND; |
5393 break; | 5398 break; |
| 5399 case '<': |
| 5400 if (FLAG_harmony_regexp_lookbehind) { |
| 5401 Advance(); |
| 5402 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 5403 if (Next() == '=') { |
| 5404 subexpr_type = POSITIVE_LOOKAROUND; |
| 5405 break; |
| 5406 } else if (Next() == '!') { |
| 5407 subexpr_type = NEGATIVE_LOOKAROUND; |
| 5408 break; |
| 5409 } |
| 5410 } |
| 5411 // Fall through. |
5394 default: | 5412 default: |
5395 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5413 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
5396 break; | 5414 break; |
5397 } | 5415 } |
5398 Advance(2); | 5416 Advance(2); |
5399 } else { | 5417 } else { |
5400 if (captures_ == NULL) { | 5418 if (captures_started_ >= kMaxCaptures) { |
5401 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); | |
5402 } | |
5403 if (captures_started() >= kMaxCaptures) { | |
5404 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5419 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
5405 } | 5420 } |
5406 captures_->Add(NULL, zone()); | 5421 captures_started_++; |
5407 } | 5422 } |
5408 // Store current state and begin new disjunction parsing. | 5423 // Store current state and begin new disjunction parsing. |
5409 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, | 5424 state = new (zone()) RegExpParserState( |
5410 captures_started(), zone()); | 5425 state, subexpr_type, lookaround_type, captures_started_, zone()); |
5411 builder = stored_state->builder(); | 5426 builder = state->builder(); |
5412 continue; | 5427 continue; |
5413 } | 5428 } |
5414 case '[': { | 5429 case '[': { |
5415 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5430 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
5416 builder->AddAtom(atom); | 5431 builder->AddAtom(atom); |
5417 break; | 5432 break; |
5418 } | 5433 } |
5419 // Atom :: | 5434 // Atom :: |
5420 // \ AtomEscape | 5435 // \ AtomEscape |
5421 case '\\': | 5436 case '\\': |
(...skipping 22 matching lines...) Expand all Loading... |
5444 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5459 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5445 CharacterRange::AddClassEscape(c, ranges, zone()); | 5460 CharacterRange::AddClassEscape(c, ranges, zone()); |
5446 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5461 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5447 builder->AddAtom(atom); | 5462 builder->AddAtom(atom); |
5448 break; | 5463 break; |
5449 } | 5464 } |
5450 case '1': case '2': case '3': case '4': case '5': case '6': | 5465 case '1': case '2': case '3': case '4': case '5': case '6': |
5451 case '7': case '8': case '9': { | 5466 case '7': case '8': case '9': { |
5452 int index = 0; | 5467 int index = 0; |
5453 if (ParseBackReferenceIndex(&index)) { | 5468 if (ParseBackReferenceIndex(&index)) { |
5454 RegExpCapture* capture = NULL; | 5469 if (state->IsInsideCaptureGroup(index)) { |
5455 if (captures_ != NULL && index <= captures_->length()) { | 5470 // The backreference is inside the capture group it refers to. |
5456 capture = captures_->at(index - 1); | 5471 // Nothing can possibly have been captured yet. |
| 5472 builder->AddEmpty(); |
| 5473 } else { |
| 5474 RegExpCapture* capture = GetCapture(index); |
| 5475 RegExpTree* atom = new (zone()) RegExpBackReference(capture); |
| 5476 builder->AddAtom(atom); |
5457 } | 5477 } |
5458 if (capture == NULL) { | |
5459 builder->AddEmpty(); | |
5460 break; | |
5461 } | |
5462 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | |
5463 builder->AddAtom(atom); | |
5464 break; | 5478 break; |
5465 } | 5479 } |
5466 uc32 first_digit = Next(); | 5480 uc32 first_digit = Next(); |
5467 if (first_digit == '8' || first_digit == '9') { | 5481 if (first_digit == '8' || first_digit == '9') { |
5468 // If the 'u' flag is present, only syntax characters can be escaped, | 5482 // If the 'u' flag is present, only syntax characters can be escaped, |
5469 // no other identity escapes are allowed. If the 'u' flag is not | 5483 // no other identity escapes are allowed. If the 'u' flag is not |
5470 // present, all identity escapes are allowed. | 5484 // present, all identity escapes are allowed. |
5471 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5485 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
5472 builder->AddCharacter(first_digit); | 5486 builder->AddCharacter(first_digit); |
5473 Advance(2); | 5487 Advance(2); |
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5714 if (value > capture_count_) { | 5728 if (value > capture_count_) { |
5715 Reset(start); | 5729 Reset(start); |
5716 return false; | 5730 return false; |
5717 } | 5731 } |
5718 } | 5732 } |
5719 *index_out = value; | 5733 *index_out = value; |
5720 return true; | 5734 return true; |
5721 } | 5735 } |
5722 | 5736 |
5723 | 5737 |
| 5738 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 5739 // The index for the capture groups are one-based. Its index in the list is |
| 5740 // zero-based. |
| 5741 int know_captures = |
| 5742 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 5743 DCHECK(index <= know_captures); |
| 5744 if (captures_ == NULL) { |
| 5745 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 5746 } |
| 5747 while (captures_->length() < know_captures) { |
| 5748 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 5749 } |
| 5750 return captures_->at(index - 1); |
| 5751 } |
| 5752 |
| 5753 |
| 5754 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) { |
| 5755 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { |
| 5756 if (s->group_type() != CAPTURE) continue; |
| 5757 // Return true if we found the matching capture index. |
| 5758 if (index == s->capture_index()) return true; |
| 5759 // Abort if index is larger than what has been parsed up till this state. |
| 5760 if (index > s->capture_index()) return false; |
| 5761 } |
| 5762 return false; |
| 5763 } |
| 5764 |
| 5765 |
5724 // QuantifierPrefix :: | 5766 // QuantifierPrefix :: |
5725 // { DecimalDigits } | 5767 // { DecimalDigits } |
5726 // { DecimalDigits , } | 5768 // { DecimalDigits , } |
5727 // { DecimalDigits , DecimalDigits } | 5769 // { DecimalDigits , DecimalDigits } |
5728 // | 5770 // |
5729 // Returns true if parsing succeeds, and set the min_out and max_out | 5771 // Returns true if parsing succeeds, and set the min_out and max_out |
5730 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5772 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
5731 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5773 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
5732 DCHECK_EQ(current(), '{'); | 5774 DCHECK_EQ(current(), '{'); |
5733 int start = position(); | 5775 int start = position(); |
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6045 } | 6087 } |
6046 } | 6088 } |
6047 if (!has_more()) { | 6089 if (!has_more()) { |
6048 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6090 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
6049 } | 6091 } |
6050 Advance(); | 6092 Advance(); |
6051 if (ranges->length() == 0) { | 6093 if (ranges->length() == 0) { |
6052 ranges->Add(CharacterRange::Everything(), zone()); | 6094 ranges->Add(CharacterRange::Everything(), zone()); |
6053 is_negated = !is_negated; | 6095 is_negated = !is_negated; |
6054 } | 6096 } |
6055 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 6097 return new (zone()) RegExpCharacterClass(ranges, is_negated); |
6056 } | 6098 } |
6057 | 6099 |
6058 | 6100 |
6059 // ---------------------------------------------------------------------------- | 6101 // ---------------------------------------------------------------------------- |
6060 // The Parser interface. | 6102 // The Parser interface. |
6061 | 6103 |
6062 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6104 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
6063 FlatStringReader* input, bool multiline, | 6105 FlatStringReader* input, bool multiline, |
6064 bool unicode, RegExpCompileData* result) { | 6106 bool unicode, RegExpCompileData* result) { |
6065 DCHECK(result != NULL); | 6107 DCHECK(result != NULL); |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6414 } | 6456 } |
6415 | 6457 |
6416 | 6458 |
6417 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6459 void Parser::RaiseLanguageMode(LanguageMode mode) { |
6418 SetLanguageMode(scope_, | 6460 SetLanguageMode(scope_, |
6419 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6461 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
6420 } | 6462 } |
6421 | 6463 |
6422 } // namespace internal | 6464 } // namespace internal |
6423 } // namespace v8 | 6465 } // namespace v8 |
OLD | NEW |