| OLD | NEW |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/parser.h" | 5 #include "src/parser.h" |
| 6 | 6 |
| 7 #include "src/api.h" | 7 #include "src/api.h" |
| 8 #include "src/ast.h" | 8 #include "src/ast.h" |
| 9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
| 10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
| (...skipping 5175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
| 5187 bool multiline, bool unicode, Isolate* isolate, | 5187 bool multiline, bool unicode, Isolate* isolate, |
| 5188 Zone* zone) | 5188 Zone* zone) |
| 5189 : isolate_(isolate), | 5189 : isolate_(isolate), |
| 5190 zone_(zone), | 5190 zone_(zone), |
| 5191 error_(error), | 5191 error_(error), |
| 5192 captures_(NULL), | 5192 captures_(NULL), |
| 5193 in_(in), | 5193 in_(in), |
| 5194 current_(kEndMarker), | 5194 current_(kEndMarker), |
| 5195 next_pos_(0), | 5195 next_pos_(0), |
| 5196 captures_started_(0), |
| 5196 capture_count_(0), | 5197 capture_count_(0), |
| 5197 has_more_(true), | 5198 has_more_(true), |
| 5198 multiline_(multiline), | 5199 multiline_(multiline), |
| 5199 unicode_(unicode), | 5200 unicode_(unicode), |
| 5200 simple_(false), | 5201 simple_(false), |
| 5201 contains_anchor_(false), | 5202 contains_anchor_(false), |
| 5202 is_scanned_for_captures_(false), | 5203 is_scanned_for_captures_(false), |
| 5203 failed_(false) { | 5204 failed_(false) { |
| 5204 Advance(); | 5205 Advance(); |
| 5205 } | 5206 } |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5289 // Alternative | Disjunction | 5290 // Alternative | Disjunction |
| 5290 // Alternative :: | 5291 // Alternative :: |
| 5291 // [empty] | 5292 // [empty] |
| 5292 // Term Alternative | 5293 // Term Alternative |
| 5293 // Term :: | 5294 // Term :: |
| 5294 // Assertion | 5295 // Assertion |
| 5295 // Atom | 5296 // Atom |
| 5296 // Atom Quantifier | 5297 // Atom Quantifier |
| 5297 RegExpTree* RegExpParser::ParseDisjunction() { | 5298 RegExpTree* RegExpParser::ParseDisjunction() { |
| 5298 // Used to store current state while parsing subexpressions. | 5299 // Used to store current state while parsing subexpressions. |
| 5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); | 5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
| 5300 RegExpParserState* stored_state = &initial_state; | 5301 zone()); |
| 5302 RegExpParserState* state = &initial_state; |
| 5301 // Cache the builder in a local variable for quick access. | 5303 // Cache the builder in a local variable for quick access. |
| 5302 RegExpBuilder* builder = initial_state.builder(); | 5304 RegExpBuilder* builder = initial_state.builder(); |
| 5303 while (true) { | 5305 while (true) { |
| 5304 switch (current()) { | 5306 switch (current()) { |
| 5305 case kEndMarker: | 5307 case kEndMarker: |
| 5306 if (stored_state->IsSubexpression()) { | 5308 if (state->IsSubexpression()) { |
| 5307 // Inside a parenthesized group when hitting end of input. | 5309 // Inside a parenthesized group when hitting end of input. |
| 5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
| 5309 } | 5311 } |
| 5310 DCHECK_EQ(INITIAL, stored_state->group_type()); | 5312 DCHECK_EQ(INITIAL, state->group_type()); |
| 5311 // Parsing completed successfully. | 5313 // Parsing completed successfully. |
| 5312 return builder->ToRegExp(); | 5314 return builder->ToRegExp(); |
| 5313 case ')': { | 5315 case ')': { |
| 5314 if (!stored_state->IsSubexpression()) { | 5316 if (!state->IsSubexpression()) { |
| 5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
| 5316 } | 5318 } |
| 5317 DCHECK_NE(INITIAL, stored_state->group_type()); | 5319 DCHECK_NE(INITIAL, state->group_type()); |
| 5318 | 5320 |
| 5319 Advance(); | 5321 Advance(); |
| 5320 // End disjunction parsing and convert builder content to new single | 5322 // End disjunction parsing and convert builder content to new single |
| 5321 // regexp atom. | 5323 // regexp atom. |
| 5322 RegExpTree* body = builder->ToRegExp(); | 5324 RegExpTree* body = builder->ToRegExp(); |
| 5323 | 5325 |
| 5324 int end_capture_index = captures_started(); | 5326 int end_capture_index = captures_started(); |
| 5325 | 5327 |
| 5326 int capture_index = stored_state->capture_index(); | 5328 int capture_index = state->capture_index(); |
| 5327 SubexpressionType group_type = stored_state->group_type(); | 5329 SubexpressionType group_type = state->group_type(); |
| 5328 | |
| 5329 // Restore previous state. | |
| 5330 stored_state = stored_state->previous_state(); | |
| 5331 builder = stored_state->builder(); | |
| 5332 | 5330 |
| 5333 // Build result of subexpression. | 5331 // Build result of subexpression. |
| 5334 if (group_type == CAPTURE) { | 5332 if (group_type == CAPTURE) { |
| 5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); | 5333 RegExpCapture* capture = GetCapture(capture_index); |
| 5336 captures_->at(capture_index - 1) = capture; | 5334 capture->set_body(body); |
| 5337 body = capture; | 5335 body = capture; |
| 5338 } else if (group_type != GROUPING) { | 5336 } else if (group_type != GROUPING) { |
| 5339 DCHECK(group_type == POSITIVE_LOOKAHEAD || | 5337 DCHECK(group_type == POSITIVE_LOOKAROUND || |
| 5340 group_type == NEGATIVE_LOOKAHEAD); | 5338 group_type == NEGATIVE_LOOKAROUND); |
| 5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); | 5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND); |
| 5342 body = new(zone()) RegExpLookahead(body, | 5340 body = new (zone()) RegExpLookaround( |
| 5343 is_positive, | 5341 body, is_positive, end_capture_index - capture_index, capture_index, |
| 5344 end_capture_index - capture_index, | 5342 state->lookaround_type()); |
| 5345 capture_index); | |
| 5346 } | 5343 } |
| 5344 |
| 5345 // Restore previous state. |
| 5346 state = state->previous_state(); |
| 5347 builder = state->builder(); |
| 5348 |
| 5347 builder->AddAtom(body); | 5349 builder->AddAtom(body); |
| 5348 // For compatability with JSC and ES3, we allow quantifiers after | 5350 // For compatability with JSC and ES3, we allow quantifiers after |
| 5349 // lookaheads, and break in all cases. | 5351 // lookaheads, and break in all cases. |
| 5350 break; | 5352 break; |
| 5351 } | 5353 } |
| 5352 case '|': { | 5354 case '|': { |
| 5353 Advance(); | 5355 Advance(); |
| 5354 builder->NewAlternative(); | 5356 builder->NewAlternative(); |
| 5355 continue; | 5357 continue; |
| 5356 } | 5358 } |
| (...skipping 26 matching lines...) Expand all Loading... |
| 5383 // everything except \x0a, \x0d, \u2028 and \u2029 | 5385 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 5384 ZoneList<CharacterRange>* ranges = | 5386 ZoneList<CharacterRange>* ranges = |
| 5385 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5387 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5386 CharacterRange::AddClassEscape('.', ranges, zone()); | 5388 CharacterRange::AddClassEscape('.', ranges, zone()); |
| 5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
| 5388 builder->AddAtom(atom); | 5390 builder->AddAtom(atom); |
| 5389 break; | 5391 break; |
| 5390 } | 5392 } |
| 5391 case '(': { | 5393 case '(': { |
| 5392 SubexpressionType subexpr_type = CAPTURE; | 5394 SubexpressionType subexpr_type = CAPTURE; |
| 5395 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
| 5393 Advance(); | 5396 Advance(); |
| 5394 if (current() == '?') { | 5397 if (current() == '?') { |
| 5395 switch (Next()) { | 5398 switch (Next()) { |
| 5396 case ':': | 5399 case ':': |
| 5397 subexpr_type = GROUPING; | 5400 subexpr_type = GROUPING; |
| 5398 break; | 5401 break; |
| 5399 case '=': | 5402 case '=': |
| 5400 subexpr_type = POSITIVE_LOOKAHEAD; | 5403 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5404 subexpr_type = POSITIVE_LOOKAROUND; |
| 5401 break; | 5405 break; |
| 5402 case '!': | 5406 case '!': |
| 5403 subexpr_type = NEGATIVE_LOOKAHEAD; | 5407 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5408 subexpr_type = NEGATIVE_LOOKAROUND; |
| 5404 break; | 5409 break; |
| 5410 case '<': |
| 5411 if (FLAG_harmony_regexp_lookbehind) { |
| 5412 Advance(); |
| 5413 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 5414 if (Next() == '=') { |
| 5415 subexpr_type = POSITIVE_LOOKAROUND; |
| 5416 break; |
| 5417 } else if (Next() == '!') { |
| 5418 subexpr_type = NEGATIVE_LOOKAROUND; |
| 5419 break; |
| 5420 } |
| 5421 } |
| 5422 // Fall through. |
| 5405 default: | 5423 default: |
| 5406 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5424 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
| 5407 break; | 5425 break; |
| 5408 } | 5426 } |
| 5409 Advance(2); | 5427 Advance(2); |
| 5410 } else { | 5428 } else { |
| 5411 if (captures_ == NULL) { | 5429 if (captures_started_ >= kMaxCaptures) { |
| 5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); | |
| 5413 } | |
| 5414 if (captures_started() >= kMaxCaptures) { | |
| 5415 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5430 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
| 5416 } | 5431 } |
| 5417 captures_->Add(NULL, zone()); | 5432 captures_started_++; |
| 5418 } | 5433 } |
| 5419 // Store current state and begin new disjunction parsing. | 5434 // Store current state and begin new disjunction parsing. |
| 5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, | 5435 state = new (zone()) RegExpParserState( |
| 5421 captures_started(), zone()); | 5436 state, subexpr_type, lookaround_type, captures_started_, zone()); |
| 5422 builder = stored_state->builder(); | 5437 builder = state->builder(); |
| 5423 continue; | 5438 continue; |
| 5424 } | 5439 } |
| 5425 case '[': { | 5440 case '[': { |
| 5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
| 5427 builder->AddAtom(atom); | 5442 builder->AddAtom(atom); |
| 5428 break; | 5443 break; |
| 5429 } | 5444 } |
| 5430 // Atom :: | 5445 // Atom :: |
| 5431 // \ AtomEscape | 5446 // \ AtomEscape |
| 5432 case '\\': | 5447 case '\\': |
| (...skipping 22 matching lines...) Expand all Loading... |
| 5455 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5470 new(zone()) ZoneList<CharacterRange>(2, zone()); |
| 5456 CharacterRange::AddClassEscape(c, ranges, zone()); | 5471 CharacterRange::AddClassEscape(c, ranges, zone()); |
| 5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
| 5458 builder->AddAtom(atom); | 5473 builder->AddAtom(atom); |
| 5459 break; | 5474 break; |
| 5460 } | 5475 } |
| 5461 case '1': case '2': case '3': case '4': case '5': case '6': | 5476 case '1': case '2': case '3': case '4': case '5': case '6': |
| 5462 case '7': case '8': case '9': { | 5477 case '7': case '8': case '9': { |
| 5463 int index = 0; | 5478 int index = 0; |
| 5464 if (ParseBackReferenceIndex(&index)) { | 5479 if (ParseBackReferenceIndex(&index)) { |
| 5465 RegExpCapture* capture = NULL; | 5480 RegExpCapture* capture = GetCapture(index); |
| 5466 if (captures_ != NULL && index <= captures_->length()) { | 5481 RegExpTree* atom = new (zone()) RegExpBackReference(capture); |
| 5467 capture = captures_->at(index - 1); | |
| 5468 } | |
| 5469 if (capture == NULL) { | |
| 5470 builder->AddEmpty(); | |
| 5471 break; | |
| 5472 } | |
| 5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | |
| 5474 builder->AddAtom(atom); | 5482 builder->AddAtom(atom); |
| 5475 break; | 5483 break; |
| 5476 } | 5484 } |
| 5477 uc32 first_digit = Next(); | 5485 uc32 first_digit = Next(); |
| 5478 if (first_digit == '8' || first_digit == '9') { | 5486 if (first_digit == '8' || first_digit == '9') { |
| 5479 // If the 'u' flag is present, only syntax characters can be escaped, | 5487 // If the 'u' flag is present, only syntax characters can be escaped, |
| 5480 // no other identity escapes are allowed. If the 'u' flag is not | 5488 // no other identity escapes are allowed. If the 'u' flag is not |
| 5481 // present, all identity escapes are allowed. | 5489 // present, all identity escapes are allowed. |
| 5482 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5490 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
| 5483 builder->AddCharacter(first_digit); | 5491 builder->AddCharacter(first_digit); |
| (...skipping 241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5725 if (value > capture_count_) { | 5733 if (value > capture_count_) { |
| 5726 Reset(start); | 5734 Reset(start); |
| 5727 return false; | 5735 return false; |
| 5728 } | 5736 } |
| 5729 } | 5737 } |
| 5730 *index_out = value; | 5738 *index_out = value; |
| 5731 return true; | 5739 return true; |
| 5732 } | 5740 } |
| 5733 | 5741 |
| 5734 | 5742 |
| 5743 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 5744 // The index for the capture groups are one-based. Its index in the list is |
| 5745 // zero-based. |
| 5746 int know_captures = |
| 5747 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 5748 DCHECK(index <= know_captures); |
| 5749 if (captures_ == NULL) { |
| 5750 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 5751 } |
| 5752 while (captures_->length() < know_captures) { |
| 5753 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 5754 } |
| 5755 return captures_->at(index - 1); |
| 5756 } |
| 5757 |
| 5758 |
| 5735 // QuantifierPrefix :: | 5759 // QuantifierPrefix :: |
| 5736 // { DecimalDigits } | 5760 // { DecimalDigits } |
| 5737 // { DecimalDigits , } | 5761 // { DecimalDigits , } |
| 5738 // { DecimalDigits , DecimalDigits } | 5762 // { DecimalDigits , DecimalDigits } |
| 5739 // | 5763 // |
| 5740 // Returns true if parsing succeeds, and set the min_out and max_out | 5764 // Returns true if parsing succeeds, and set the min_out and max_out |
| 5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5765 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
| 5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5766 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 5743 DCHECK_EQ(current(), '{'); | 5767 DCHECK_EQ(current(), '{'); |
| 5744 int start = position(); | 5768 int start = position(); |
| (...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6056 } | 6080 } |
| 6057 } | 6081 } |
| 6058 if (!has_more()) { | 6082 if (!has_more()) { |
| 6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6083 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
| 6060 } | 6084 } |
| 6061 Advance(); | 6085 Advance(); |
| 6062 if (ranges->length() == 0) { | 6086 if (ranges->length() == 0) { |
| 6063 ranges->Add(CharacterRange::Everything(), zone()); | 6087 ranges->Add(CharacterRange::Everything(), zone()); |
| 6064 is_negated = !is_negated; | 6088 is_negated = !is_negated; |
| 6065 } | 6089 } |
| 6066 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 6090 return new (zone()) RegExpCharacterClass(ranges, is_negated); |
| 6067 } | 6091 } |
| 6068 | 6092 |
| 6069 | 6093 |
| 6070 // ---------------------------------------------------------------------------- | 6094 // ---------------------------------------------------------------------------- |
| 6071 // The Parser interface. | 6095 // The Parser interface. |
| 6072 | 6096 |
| 6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6097 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
| 6074 FlatStringReader* input, bool multiline, | 6098 FlatStringReader* input, bool multiline, |
| 6075 bool unicode, RegExpCompileData* result) { | 6099 bool unicode, RegExpCompileData* result) { |
| 6076 DCHECK(result != NULL); | 6100 DCHECK(result != NULL); |
| (...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6425 } | 6449 } |
| 6426 | 6450 |
| 6427 | 6451 |
| 6428 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6452 void Parser::RaiseLanguageMode(LanguageMode mode) { |
| 6429 SetLanguageMode(scope_, | 6453 SetLanguageMode(scope_, |
| 6430 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6454 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
| 6431 } | 6455 } |
| 6432 | 6456 |
| 6433 } // namespace internal | 6457 } // namespace internal |
| 6434 } // namespace v8 | 6458 } // namespace v8 |
| OLD | NEW |