src/parser.cc - Issue 1451373003: Revert of Experimental support for RegExp lookbehind.

Side by Side Diff: src/parser.cc

Issue 1451373003: Revert of Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/parser.h"	5 #include "src/parser.h"

6	6

7 #include "src/api.h"	7 #include "src/api.h"

8 #include "src/ast.h"	8 #include "src/ast.h"

9 #include "src/ast-literal-reindexer.h"	9 #include "src/ast-literal-reindexer.h"

10 #include "src/bailout-reason.h"	10 #include "src/bailout-reason.h"

(...skipping 5175 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,	5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,

5187 bool multiline, bool unicode, Isolate* isolate,	5187 bool multiline, bool unicode, Isolate* isolate,

5188 Zone* zone)	5188 Zone* zone)

5189 : isolate_(isolate),	5189 : isolate_(isolate),

5190 zone_(zone),	5190 zone_(zone),

5191 error_(error),	5191 error_(error),

5192 captures_(NULL),	5192 captures_(NULL),

5193 in_(in),	5193 in_(in),

5194 current_(kEndMarker),	5194 current_(kEndMarker),

5195 next_pos_(0),	5195 next_pos_(0),

5196 captures_started_(0),

5197 capture_count_(0),	5196 capture_count_(0),

5198 has_more_(true),	5197 has_more_(true),

5199 multiline_(multiline),	5198 multiline_(multiline),

5200 unicode_(unicode),	5199 unicode_(unicode),

5201 simple_(false),	5200 simple_(false),

5202 contains_anchor_(false),	5201 contains_anchor_(false),

5203 is_scanned_for_captures_(false),	5202 is_scanned_for_captures_(false),

5204 failed_(false) {	5203 failed_(false) {

5205 Advance();	5204 Advance();

5206 }	5205 }

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5290 // Alternative \| Disjunction	5289 // Alternative \| Disjunction

5291 // Alternative ::	5290 // Alternative ::

5292 // [empty]	5291 // [empty]

5293 // Term Alternative	5292 // Term Alternative

5294 // Term ::	5293 // Term ::

5295 // Assertion	5294 // Assertion

5296 // Atom	5295 // Atom

5297 // Atom Quantifier	5296 // Atom Quantifier

5298 RegExpTree* RegExpParser::ParseDisjunction() {	5297 RegExpTree* RegExpParser::ParseDisjunction() {

5299 // Used to store current state while parsing subexpressions.	5298 // Used to store current state while parsing subexpressions.

5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,	5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone());

5301 zone());	5300 RegExpParserState* stored_state = &initial_state;

5302 RegExpParserState* state = &initial_state;

5303 // Cache the builder in a local variable for quick access.	5301 // Cache the builder in a local variable for quick access.

5304 RegExpBuilder* builder = initial_state.builder();	5302 RegExpBuilder* builder = initial_state.builder();

5305 while (true) {	5303 while (true) {

5306 switch (current()) {	5304 switch (current()) {

5307 case kEndMarker:	5305 case kEndMarker:

5308 if (state->IsSubexpression()) {	5306 if (stored_state->IsSubexpression()) {

5309 // Inside a parenthesized group when hitting end of input.	5307 // Inside a parenthesized group when hitting end of input.

5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED);	5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED);

5311 }	5309 }

5312 DCHECK_EQ(INITIAL, state->group_type());	5310 DCHECK_EQ(INITIAL, stored_state->group_type());

5313 // Parsing completed successfully.	5311 // Parsing completed successfully.

5314 return builder->ToRegExp();	5312 return builder->ToRegExp();

5315 case ')': {	5313 case ')': {

5316 if (!state->IsSubexpression()) {	5314 if (!stored_state->IsSubexpression()) {

5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);	5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);

5318 }	5316 }

5319 DCHECK_NE(INITIAL, state->group_type());	5317 DCHECK_NE(INITIAL, stored_state->group_type());

5320	5318

5321 Advance();	5319 Advance();

5322 // End disjunction parsing and convert builder content to new single	5320 // End disjunction parsing and convert builder content to new single

5323 // regexp atom.	5321 // regexp atom.

5324 RegExpTree* body = builder->ToRegExp();	5322 RegExpTree* body = builder->ToRegExp();

5325	5323

5326 int end_capture_index = captures_started();	5324 int end_capture_index = captures_started();

5327	5325

5328 int capture_index = state->capture_index();	5326 int capture_index = stored_state->capture_index();

5329 SubexpressionType group_type = state->group_type();	5327 SubexpressionType group_type = stored_state->group_type();

	5328

	5329 // Restore previous state.

	5330 stored_state = stored_state->previous_state();

	5331 builder = stored_state->builder();

5330	5332

5331 // Build result of subexpression.	5333 // Build result of subexpression.

5332 if (group_type == CAPTURE) {	5334 if (group_type == CAPTURE) {

5333 RegExpCapture* capture = GetCapture(capture_index);	5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);

5334 capture->set_body(body);	5336 captures_->at(capture_index - 1) = capture;

5335 body = capture;	5337 body = capture;

5336 } else if (group_type != GROUPING) {	5338 } else if (group_type != GROUPING) {

5337 DCHECK(group_type == POSITIVE_LOOKAROUND \|\|	5339 DCHECK(group_type == POSITIVE_LOOKAHEAD \|\|

5338 group_type == NEGATIVE_LOOKAROUND);	5340 group_type == NEGATIVE_LOOKAHEAD);

5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND);	5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);

5340 body = new (zone()) RegExpLookaround(	5342 body = new(zone()) RegExpLookahead(body,

5341 body, is_positive, end_capture_index - capture_index, capture_index,	5343 is_positive,

5342 state->lookaround_type());	5344 end_capture_index - capture_index,

	5345 capture_index);

5343 }	5346 }

5344

5345 // Restore previous state.

5346 state = state->previous_state();

5347 builder = state->builder();

5348

5349 builder->AddAtom(body);	5347 builder->AddAtom(body);

5350 // For compatability with JSC and ES3, we allow quantifiers after	5348 // For compatability with JSC and ES3, we allow quantifiers after

5351 // lookaheads, and break in all cases.	5349 // lookaheads, and break in all cases.

5352 break;	5350 break;

5353 }	5351 }

5354 case '\|': {	5352 case '\|': {

5355 Advance();	5353 Advance();

5356 builder->NewAlternative();	5354 builder->NewAlternative();

5357 continue;	5355 continue;

5358 }	5356 }

(...skipping 26 matching lines...) Expand all Loading...
5385 // everything except \x0a, \x0d, \u2028 and \u2029	5383 // everything except \x0a, \x0d, \u2028 and \u2029

5386 ZoneList<CharacterRange>* ranges =	5384 ZoneList<CharacterRange>* ranges =

5387 new(zone()) ZoneList<CharacterRange>(2, zone());	5385 new(zone()) ZoneList<CharacterRange>(2, zone());

5388 CharacterRange::AddClassEscape('.', ranges, zone());	5386 CharacterRange::AddClassEscape('.', ranges, zone());

5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);	5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);

5390 builder->AddAtom(atom);	5388 builder->AddAtom(atom);

5391 break;	5389 break;

5392 }	5390 }

5393 case '(': {	5391 case '(': {

5394 SubexpressionType subexpr_type = CAPTURE;	5392 SubexpressionType subexpr_type = CAPTURE;

5395 RegExpLookaround::Type lookaround_type = state->lookaround_type();

5396 Advance();	5393 Advance();

5397 if (current() == '?') {	5394 if (current() == '?') {

5398 switch (Next()) {	5395 switch (Next()) {

5399 case ':':	5396 case ':':

5400 subexpr_type = GROUPING;	5397 subexpr_type = GROUPING;

5401 break;	5398 break;

5402 case '=':	5399 case '=':

5403 lookaround_type = RegExpLookaround::LOOKAHEAD;	5400 subexpr_type = POSITIVE_LOOKAHEAD;

5404 subexpr_type = POSITIVE_LOOKAROUND;

5405 break;	5401 break;

5406 case '!':	5402 case '!':

5407 lookaround_type = RegExpLookaround::LOOKAHEAD;	5403 subexpr_type = NEGATIVE_LOOKAHEAD;

5408 subexpr_type = NEGATIVE_LOOKAROUND;

5409 break;	5404 break;

5410 case '<':

5411 if (FLAG_harmony_regexp_lookbehind) {

5412 Advance();

5413 lookaround_type = RegExpLookaround::LOOKBEHIND;

5414 if (Next() == '=') {

5415 subexpr_type = POSITIVE_LOOKAROUND;

5416 break;

5417 } else if (Next() == '!') {

5418 subexpr_type = NEGATIVE_LOOKAROUND;

5419 break;

5420 }

5421 }

5422 // Fall through.

5423 default:	5405 default:

5424 ReportError(CStrVector("Invalid group") CHECK_FAILED);	5406 ReportError(CStrVector("Invalid group") CHECK_FAILED);

5425 break;	5407 break;

5426 }	5408 }

5427 Advance(2);	5409 Advance(2);

5428 } else {	5410 } else {

5429 if (captures_started_ >= kMaxCaptures) {	5411 if (captures_ == NULL) {

	5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());

	5413 }

	5414 if (captures_started() >= kMaxCaptures) {

5430 ReportError(CStrVector("Too many captures") CHECK_FAILED);	5415 ReportError(CStrVector("Too many captures") CHECK_FAILED);

5431 }	5416 }

5432 captures_started_++;	5417 captures_->Add(NULL, zone());

5433 }	5418 }

5434 // Store current state and begin new disjunction parsing.	5419 // Store current state and begin new disjunction parsing.

5435 state = new (zone()) RegExpParserState(	5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,

5436 state, subexpr_type, lookaround_type, captures_started_, zone());	5421 captures_started(), zone());

5437 builder = state->builder();	5422 builder = stored_state->builder();

5438 continue;	5423 continue;

5439 }	5424 }

5440 case '[': {	5425 case '[': {

5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);	5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);

5442 builder->AddAtom(atom);	5427 builder->AddAtom(atom);

5443 break;	5428 break;

5444 }	5429 }

5445 // Atom ::	5430 // Atom ::

5446 // \ AtomEscape	5431 // \ AtomEscape

5447 case '\\':	5432 case '\\':

(...skipping 22 matching lines...) Expand all Loading...
5470 new(zone()) ZoneList<CharacterRange>(2, zone());	5455 new(zone()) ZoneList<CharacterRange>(2, zone());

5471 CharacterRange::AddClassEscape(c, ranges, zone());	5456 CharacterRange::AddClassEscape(c, ranges, zone());

5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);	5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);

5473 builder->AddAtom(atom);	5458 builder->AddAtom(atom);

5474 break;	5459 break;

5475 }	5460 }

5476 case '1': case '2': case '3': case '4': case '5': case '6':	5461 case '1': case '2': case '3': case '4': case '5': case '6':

5477 case '7': case '8': case '9': {	5462 case '7': case '8': case '9': {

5478 int index = 0;	5463 int index = 0;

5479 if (ParseBackReferenceIndex(&index)) {	5464 if (ParseBackReferenceIndex(&index)) {

5480 if (state->IsInsideCaptureGroup(index)) {	5465 RegExpCapture* capture = NULL;

5481 // The backreference is inside the capture group it refers to.	5466 if (captures_ != NULL && index <= captures_->length()) {

5482 // Nothing can possibly have been captured yet.	5467 capture = captures_->at(index - 1);

	5468 }

	5469 if (capture == NULL) {

5483 builder->AddEmpty();	5470 builder->AddEmpty();

5484 } else {	5471 break;

5485 RegExpCapture* capture = GetCapture(index);

5486 RegExpTree* atom = new (zone()) RegExpBackReference(capture);

5487 builder->AddAtom(atom);

5488 }	5472 }

	5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture);

	5474 builder->AddAtom(atom);

5489 break;	5475 break;

5490 }	5476 }

5491 uc32 first_digit = Next();	5477 uc32 first_digit = Next();

5492 if (first_digit == '8' \|\| first_digit == '9') {	5478 if (first_digit == '8' \|\| first_digit == '9') {

5493 // If the 'u' flag is present, only syntax characters can be escaped,	5479 // If the 'u' flag is present, only syntax characters can be escaped,

5494 // no other identity escapes are allowed. If the 'u' flag is not	5480 // no other identity escapes are allowed. If the 'u' flag is not

5495 // present, all identity escapes are allowed.	5481 // present, all identity escapes are allowed.

5496 if (!FLAG_harmony_unicode_regexps \|\| !unicode_) {	5482 if (!FLAG_harmony_unicode_regexps \|\| !unicode_) {

5497 builder->AddCharacter(first_digit);	5483 builder->AddCharacter(first_digit);

5498 Advance(2);	5484 Advance(2);

(...skipping 240 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5739 if (value > capture_count_) {	5725 if (value > capture_count_) {

5740 Reset(start);	5726 Reset(start);

5741 return false;	5727 return false;

5742 }	5728 }

5743 }	5729 }

5744 *index_out = value;	5730 *index_out = value;

5745 return true;	5731 return true;

5746 }	5732 }

5747	5733

5748	5734

5749 RegExpCapture* RegExpParser::GetCapture(int index) {

5750 // The index for the capture groups are one-based. Its index in the list is

5751 // zero-based.

5752 int know_captures =

5753 is_scanned_for_captures_ ? capture_count_ : captures_started_;

5754 DCHECK(index <= know_captures);

5755 if (captures_ == NULL) {

5756 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());

5757 }

5758 while (captures_->length() < know_captures) {

5759 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());

5760 }

5761 return captures_->at(index - 1);

5762 }

5763

5764

5765 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {

5766 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {

5767 if (s->group_type() != CAPTURE) continue;

5768 // Return true if we found the matching capture index.

5769 if (index == s->capture_index()) return true;

5770 // Abort if index is larger than what has been parsed up till this state.

5771 if (index > s->capture_index()) return false;

5772 }

5773 return false;

5774 }

5775

5776

5777 // QuantifierPrefix ::	5735 // QuantifierPrefix ::

5778 // { DecimalDigits }	5736 // { DecimalDigits }

5779 // { DecimalDigits , }	5737 // { DecimalDigits , }

5780 // { DecimalDigits , DecimalDigits }	5738 // { DecimalDigits , DecimalDigits }

5781 //	5739 //

5782 // Returns true if parsing succeeds, and set the min_out and max_out	5740 // Returns true if parsing succeeds, and set the min_out and max_out

5783 // values. Values are truncated to RegExpTree::kInfinity if they overflow.	5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow.

5784 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {	5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {

5785 DCHECK_EQ(current(), '{');	5743 DCHECK_EQ(current(), '{');

5786 int start = position();	5744 int start = position();

(...skipping 311 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6098 }	6056 }

6099 }	6057 }

6100 if (!has_more()) {	6058 if (!has_more()) {

6101 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);	6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);

6102 }	6060 }

6103 Advance();	6061 Advance();

6104 if (ranges->length() == 0) {	6062 if (ranges->length() == 0) {

6105 ranges->Add(CharacterRange::Everything(), zone());	6063 ranges->Add(CharacterRange::Everything(), zone());

6106 is_negated = !is_negated;	6064 is_negated = !is_negated;

6107 }	6065 }

6108 return new (zone()) RegExpCharacterClass(ranges, is_negated);	6066 return new(zone()) RegExpCharacterClass(ranges, is_negated);

6109 }	6067 }

6110	6068

6111	6069

6112 // ----------------------------------------------------------------------------	6070 // ----------------------------------------------------------------------------

6113 // The Parser interface.	6071 // The Parser interface.

6114	6072

6115 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,	6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,

6116 FlatStringReader* input, bool multiline,	6074 FlatStringReader* input, bool multiline,

6117 bool unicode, RegExpCompileData* result) {	6075 bool unicode, RegExpCompileData* result) {

6118 DCHECK(result != NULL);	6076 DCHECK(result != NULL);

(...skipping 348 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6467 }	6425 }

6468	6426

6469	6427

6470 void Parser::RaiseLanguageMode(LanguageMode mode) {	6428 void Parser::RaiseLanguageMode(LanguageMode mode) {

6471 SetLanguageMode(scope_,	6429 SetLanguageMode(scope_,

6472 static_cast<LanguageMode>(scope_->language_mode() \| mode));	6430 static_cast<LanguageMode>(scope_->language_mode() \| mode));

6473 }	6431 }

6474	6432

6475 } // namespace internal	6433 } // namespace internal

6476 } // namespace v8	6434 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »