OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/parser.h" | 5 #include "src/parser.h" |
6 | 6 |
7 #include "src/api.h" | 7 #include "src/api.h" |
8 #include "src/ast.h" | 8 #include "src/ast.h" |
9 #include "src/ast-literal-reindexer.h" | 9 #include "src/ast-literal-reindexer.h" |
10 #include "src/bailout-reason.h" | 10 #include "src/bailout-reason.h" |
(...skipping 5175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, | 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, |
5187 bool multiline, bool unicode, Isolate* isolate, | 5187 bool multiline, bool unicode, Isolate* isolate, |
5188 Zone* zone) | 5188 Zone* zone) |
5189 : isolate_(isolate), | 5189 : isolate_(isolate), |
5190 zone_(zone), | 5190 zone_(zone), |
5191 error_(error), | 5191 error_(error), |
5192 captures_(NULL), | 5192 captures_(NULL), |
5193 in_(in), | 5193 in_(in), |
5194 current_(kEndMarker), | 5194 current_(kEndMarker), |
5195 next_pos_(0), | 5195 next_pos_(0), |
| 5196 captures_started_(0), |
5196 capture_count_(0), | 5197 capture_count_(0), |
5197 has_more_(true), | 5198 has_more_(true), |
5198 multiline_(multiline), | 5199 multiline_(multiline), |
5199 unicode_(unicode), | 5200 unicode_(unicode), |
5200 simple_(false), | 5201 simple_(false), |
5201 contains_anchor_(false), | 5202 contains_anchor_(false), |
5202 is_scanned_for_captures_(false), | 5203 is_scanned_for_captures_(false), |
5203 failed_(false) { | 5204 failed_(false) { |
5204 Advance(); | 5205 Advance(); |
5205 } | 5206 } |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5289 // Alternative | Disjunction | 5290 // Alternative | Disjunction |
5290 // Alternative :: | 5291 // Alternative :: |
5291 // [empty] | 5292 // [empty] |
5292 // Term Alternative | 5293 // Term Alternative |
5293 // Term :: | 5294 // Term :: |
5294 // Assertion | 5295 // Assertion |
5295 // Atom | 5296 // Atom |
5296 // Atom Quantifier | 5297 // Atom Quantifier |
5297 RegExpTree* RegExpParser::ParseDisjunction() { | 5298 RegExpTree* RegExpParser::ParseDisjunction() { |
5298 // Used to store current state while parsing subexpressions. | 5299 // Used to store current state while parsing subexpressions. |
5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); | 5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, |
5300 RegExpParserState* stored_state = &initial_state; | 5301 zone()); |
| 5302 RegExpParserState* state = &initial_state; |
5301 // Cache the builder in a local variable for quick access. | 5303 // Cache the builder in a local variable for quick access. |
5302 RegExpBuilder* builder = initial_state.builder(); | 5304 RegExpBuilder* builder = initial_state.builder(); |
5303 while (true) { | 5305 while (true) { |
5304 switch (current()) { | 5306 switch (current()) { |
5305 case kEndMarker: | 5307 case kEndMarker: |
5306 if (stored_state->IsSubexpression()) { | 5308 if (state->IsSubexpression()) { |
5307 // Inside a parenthesized group when hitting end of input. | 5309 // Inside a parenthesized group when hitting end of input. |
5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED); | 5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED); |
5309 } | 5311 } |
5310 DCHECK_EQ(INITIAL, stored_state->group_type()); | 5312 DCHECK_EQ(INITIAL, state->group_type()); |
5311 // Parsing completed successfully. | 5313 // Parsing completed successfully. |
5312 return builder->ToRegExp(); | 5314 return builder->ToRegExp(); |
5313 case ')': { | 5315 case ')': { |
5314 if (!stored_state->IsSubexpression()) { | 5316 if (!state->IsSubexpression()) { |
5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); | 5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); |
5316 } | 5318 } |
5317 DCHECK_NE(INITIAL, stored_state->group_type()); | 5319 DCHECK_NE(INITIAL, state->group_type()); |
5318 | 5320 |
5319 Advance(); | 5321 Advance(); |
5320 // End disjunction parsing and convert builder content to new single | 5322 // End disjunction parsing and convert builder content to new single |
5321 // regexp atom. | 5323 // regexp atom. |
5322 RegExpTree* body = builder->ToRegExp(); | 5324 RegExpTree* body = builder->ToRegExp(); |
5323 | 5325 |
5324 int end_capture_index = captures_started(); | 5326 int end_capture_index = captures_started(); |
5325 | 5327 |
5326 int capture_index = stored_state->capture_index(); | 5328 int capture_index = state->capture_index(); |
5327 SubexpressionType group_type = stored_state->group_type(); | 5329 SubexpressionType group_type = state->group_type(); |
5328 | |
5329 // Restore previous state. | |
5330 stored_state = stored_state->previous_state(); | |
5331 builder = stored_state->builder(); | |
5332 | 5330 |
5333 // Build result of subexpression. | 5331 // Build result of subexpression. |
5334 if (group_type == CAPTURE) { | 5332 if (group_type == CAPTURE) { |
5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); | 5333 RegExpCapture* capture = GetCapture(capture_index); |
5336 captures_->at(capture_index - 1) = capture; | 5334 capture->set_body(body); |
5337 body = capture; | 5335 body = capture; |
5338 } else if (group_type != GROUPING) { | 5336 } else if (group_type != GROUPING) { |
5339 DCHECK(group_type == POSITIVE_LOOKAHEAD || | 5337 DCHECK(group_type == POSITIVE_LOOKAROUND || |
5340 group_type == NEGATIVE_LOOKAHEAD); | 5338 group_type == NEGATIVE_LOOKAROUND); |
5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); | 5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND); |
5342 body = new(zone()) RegExpLookahead(body, | 5340 body = new (zone()) RegExpLookaround( |
5343 is_positive, | 5341 body, is_positive, end_capture_index - capture_index, capture_index, |
5344 end_capture_index - capture_index, | 5342 state->lookaround_type()); |
5345 capture_index); | |
5346 } | 5343 } |
| 5344 |
| 5345 // Restore previous state. |
| 5346 state = state->previous_state(); |
| 5347 builder = state->builder(); |
| 5348 |
5347 builder->AddAtom(body); | 5349 builder->AddAtom(body); |
5348 // For compatability with JSC and ES3, we allow quantifiers after | 5350 // For compatability with JSC and ES3, we allow quantifiers after |
5349 // lookaheads, and break in all cases. | 5351 // lookaheads, and break in all cases. |
5350 break; | 5352 break; |
5351 } | 5353 } |
5352 case '|': { | 5354 case '|': { |
5353 Advance(); | 5355 Advance(); |
5354 builder->NewAlternative(); | 5356 builder->NewAlternative(); |
5355 continue; | 5357 continue; |
5356 } | 5358 } |
(...skipping 26 matching lines...) Expand all Loading... |
5383 // everything except \x0a, \x0d, \u2028 and \u2029 | 5385 // everything except \x0a, \x0d, \u2028 and \u2029 |
5384 ZoneList<CharacterRange>* ranges = | 5386 ZoneList<CharacterRange>* ranges = |
5385 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5387 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5386 CharacterRange::AddClassEscape('.', ranges, zone()); | 5388 CharacterRange::AddClassEscape('.', ranges, zone()); |
5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5388 builder->AddAtom(atom); | 5390 builder->AddAtom(atom); |
5389 break; | 5391 break; |
5390 } | 5392 } |
5391 case '(': { | 5393 case '(': { |
5392 SubexpressionType subexpr_type = CAPTURE; | 5394 SubexpressionType subexpr_type = CAPTURE; |
| 5395 RegExpLookaround::Type lookaround_type = state->lookaround_type(); |
5393 Advance(); | 5396 Advance(); |
5394 if (current() == '?') { | 5397 if (current() == '?') { |
5395 switch (Next()) { | 5398 switch (Next()) { |
5396 case ':': | 5399 case ':': |
5397 subexpr_type = GROUPING; | 5400 subexpr_type = GROUPING; |
5398 break; | 5401 break; |
5399 case '=': | 5402 case '=': |
5400 subexpr_type = POSITIVE_LOOKAHEAD; | 5403 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5404 subexpr_type = POSITIVE_LOOKAROUND; |
5401 break; | 5405 break; |
5402 case '!': | 5406 case '!': |
5403 subexpr_type = NEGATIVE_LOOKAHEAD; | 5407 lookaround_type = RegExpLookaround::LOOKAHEAD; |
| 5408 subexpr_type = NEGATIVE_LOOKAROUND; |
5404 break; | 5409 break; |
| 5410 case '<': |
| 5411 if (FLAG_harmony_regexp_lookbehind) { |
| 5412 Advance(); |
| 5413 lookaround_type = RegExpLookaround::LOOKBEHIND; |
| 5414 if (Next() == '=') { |
| 5415 subexpr_type = POSITIVE_LOOKAROUND; |
| 5416 break; |
| 5417 } else if (Next() == '!') { |
| 5418 subexpr_type = NEGATIVE_LOOKAROUND; |
| 5419 break; |
| 5420 } |
| 5421 } |
| 5422 // Fall through. |
5405 default: | 5423 default: |
5406 ReportError(CStrVector("Invalid group") CHECK_FAILED); | 5424 ReportError(CStrVector("Invalid group") CHECK_FAILED); |
5407 break; | 5425 break; |
5408 } | 5426 } |
5409 Advance(2); | 5427 Advance(2); |
5410 } else { | 5428 } else { |
5411 if (captures_ == NULL) { | 5429 if (captures_started_ >= kMaxCaptures) { |
5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone()); | |
5413 } | |
5414 if (captures_started() >= kMaxCaptures) { | |
5415 ReportError(CStrVector("Too many captures") CHECK_FAILED); | 5430 ReportError(CStrVector("Too many captures") CHECK_FAILED); |
5416 } | 5431 } |
5417 captures_->Add(NULL, zone()); | 5432 captures_started_++; |
5418 } | 5433 } |
5419 // Store current state and begin new disjunction parsing. | 5434 // Store current state and begin new disjunction parsing. |
5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, | 5435 state = new (zone()) RegExpParserState( |
5421 captures_started(), zone()); | 5436 state, subexpr_type, lookaround_type, captures_started_, zone()); |
5422 builder = stored_state->builder(); | 5437 builder = state->builder(); |
5423 continue; | 5438 continue; |
5424 } | 5439 } |
5425 case '[': { | 5440 case '[': { |
5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); | 5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); |
5427 builder->AddAtom(atom); | 5442 builder->AddAtom(atom); |
5428 break; | 5443 break; |
5429 } | 5444 } |
5430 // Atom :: | 5445 // Atom :: |
5431 // \ AtomEscape | 5446 // \ AtomEscape |
5432 case '\\': | 5447 case '\\': |
(...skipping 22 matching lines...) Expand all Loading... |
5455 new(zone()) ZoneList<CharacterRange>(2, zone()); | 5470 new(zone()) ZoneList<CharacterRange>(2, zone()); |
5456 CharacterRange::AddClassEscape(c, ranges, zone()); | 5471 CharacterRange::AddClassEscape(c, ranges, zone()); |
5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); | 5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); |
5458 builder->AddAtom(atom); | 5473 builder->AddAtom(atom); |
5459 break; | 5474 break; |
5460 } | 5475 } |
5461 case '1': case '2': case '3': case '4': case '5': case '6': | 5476 case '1': case '2': case '3': case '4': case '5': case '6': |
5462 case '7': case '8': case '9': { | 5477 case '7': case '8': case '9': { |
5463 int index = 0; | 5478 int index = 0; |
5464 if (ParseBackReferenceIndex(&index)) { | 5479 if (ParseBackReferenceIndex(&index)) { |
5465 RegExpCapture* capture = NULL; | 5480 RegExpCapture* capture = GetCapture(index); |
5466 if (captures_ != NULL && index <= captures_->length()) { | 5481 RegExpTree* atom = new (zone()) RegExpBackReference(capture); |
5467 capture = captures_->at(index - 1); | |
5468 } | |
5469 if (capture == NULL) { | |
5470 builder->AddEmpty(); | |
5471 break; | |
5472 } | |
5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture); | |
5474 builder->AddAtom(atom); | 5482 builder->AddAtom(atom); |
5475 break; | 5483 break; |
5476 } | 5484 } |
5477 uc32 first_digit = Next(); | 5485 uc32 first_digit = Next(); |
5478 if (first_digit == '8' || first_digit == '9') { | 5486 if (first_digit == '8' || first_digit == '9') { |
5479 // If the 'u' flag is present, only syntax characters can be escaped, | 5487 // If the 'u' flag is present, only syntax characters can be escaped, |
5480 // no other identity escapes are allowed. If the 'u' flag is not | 5488 // no other identity escapes are allowed. If the 'u' flag is not |
5481 // present, all identity escapes are allowed. | 5489 // present, all identity escapes are allowed. |
5482 if (!FLAG_harmony_unicode_regexps || !unicode_) { | 5490 if (!FLAG_harmony_unicode_regexps || !unicode_) { |
5483 builder->AddCharacter(first_digit); | 5491 builder->AddCharacter(first_digit); |
(...skipping 241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5725 if (value > capture_count_) { | 5733 if (value > capture_count_) { |
5726 Reset(start); | 5734 Reset(start); |
5727 return false; | 5735 return false; |
5728 } | 5736 } |
5729 } | 5737 } |
5730 *index_out = value; | 5738 *index_out = value; |
5731 return true; | 5739 return true; |
5732 } | 5740 } |
5733 | 5741 |
5734 | 5742 |
| 5743 RegExpCapture* RegExpParser::GetCapture(int index) { |
| 5744 // The index for the capture groups are one-based. Its index in the list is |
| 5745 // zero-based. |
| 5746 int know_captures = |
| 5747 is_scanned_for_captures_ ? capture_count_ : captures_started_; |
| 5748 DCHECK(index <= know_captures); |
| 5749 if (captures_ == NULL) { |
| 5750 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone()); |
| 5751 } |
| 5752 while (captures_->length() < know_captures) { |
| 5753 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone()); |
| 5754 } |
| 5755 return captures_->at(index - 1); |
| 5756 } |
| 5757 |
| 5758 |
5735 // QuantifierPrefix :: | 5759 // QuantifierPrefix :: |
5736 // { DecimalDigits } | 5760 // { DecimalDigits } |
5737 // { DecimalDigits , } | 5761 // { DecimalDigits , } |
5738 // { DecimalDigits , DecimalDigits } | 5762 // { DecimalDigits , DecimalDigits } |
5739 // | 5763 // |
5740 // Returns true if parsing succeeds, and set the min_out and max_out | 5764 // Returns true if parsing succeeds, and set the min_out and max_out |
5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow. | 5765 // values. Values are truncated to RegExpTree::kInfinity if they overflow. |
5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | 5766 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
5743 DCHECK_EQ(current(), '{'); | 5767 DCHECK_EQ(current(), '{'); |
5744 int start = position(); | 5768 int start = position(); |
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6056 } | 6080 } |
6057 } | 6081 } |
6058 if (!has_more()) { | 6082 if (!has_more()) { |
6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 6083 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
6060 } | 6084 } |
6061 Advance(); | 6085 Advance(); |
6062 if (ranges->length() == 0) { | 6086 if (ranges->length() == 0) { |
6063 ranges->Add(CharacterRange::Everything(), zone()); | 6087 ranges->Add(CharacterRange::Everything(), zone()); |
6064 is_negated = !is_negated; | 6088 is_negated = !is_negated; |
6065 } | 6089 } |
6066 return new(zone()) RegExpCharacterClass(ranges, is_negated); | 6090 return new (zone()) RegExpCharacterClass(ranges, is_negated); |
6067 } | 6091 } |
6068 | 6092 |
6069 | 6093 |
6070 // ---------------------------------------------------------------------------- | 6094 // ---------------------------------------------------------------------------- |
6071 // The Parser interface. | 6095 // The Parser interface. |
6072 | 6096 |
6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, | 6097 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, |
6074 FlatStringReader* input, bool multiline, | 6098 FlatStringReader* input, bool multiline, |
6075 bool unicode, RegExpCompileData* result) { | 6099 bool unicode, RegExpCompileData* result) { |
6076 DCHECK(result != NULL); | 6100 DCHECK(result != NULL); |
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6425 } | 6449 } |
6426 | 6450 |
6427 | 6451 |
6428 void Parser::RaiseLanguageMode(LanguageMode mode) { | 6452 void Parser::RaiseLanguageMode(LanguageMode mode) { |
6429 SetLanguageMode(scope_, | 6453 SetLanguageMode(scope_, |
6430 static_cast<LanguageMode>(scope_->language_mode() | mode)); | 6454 static_cast<LanguageMode>(scope_->language_mode() | mode)); |
6431 } | 6455 } |
6432 | 6456 |
6433 } // namespace internal | 6457 } // namespace internal |
6434 } // namespace v8 | 6458 } // namespace v8 |
OLD | NEW |