Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(294)

Side by Side Diff: src/parser.cc

Issue 1451373003: Revert of Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parser.h" 5 #include "src/parser.h"
6 6
7 #include "src/api.h" 7 #include "src/api.h"
8 #include "src/ast.h" 8 #include "src/ast.h"
9 #include "src/ast-literal-reindexer.h" 9 #include "src/ast-literal-reindexer.h"
10 #include "src/bailout-reason.h" 10 #include "src/bailout-reason.h"
(...skipping 5175 matching lines...) Expand 10 before | Expand all | Expand 10 after
5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 5186 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
5187 bool multiline, bool unicode, Isolate* isolate, 5187 bool multiline, bool unicode, Isolate* isolate,
5188 Zone* zone) 5188 Zone* zone)
5189 : isolate_(isolate), 5189 : isolate_(isolate),
5190 zone_(zone), 5190 zone_(zone),
5191 error_(error), 5191 error_(error),
5192 captures_(NULL), 5192 captures_(NULL),
5193 in_(in), 5193 in_(in),
5194 current_(kEndMarker), 5194 current_(kEndMarker),
5195 next_pos_(0), 5195 next_pos_(0),
5196 captures_started_(0),
5197 capture_count_(0), 5196 capture_count_(0),
5198 has_more_(true), 5197 has_more_(true),
5199 multiline_(multiline), 5198 multiline_(multiline),
5200 unicode_(unicode), 5199 unicode_(unicode),
5201 simple_(false), 5200 simple_(false),
5202 contains_anchor_(false), 5201 contains_anchor_(false),
5203 is_scanned_for_captures_(false), 5202 is_scanned_for_captures_(false),
5204 failed_(false) { 5203 failed_(false) {
5205 Advance(); 5204 Advance();
5206 } 5205 }
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
5290 // Alternative | Disjunction 5289 // Alternative | Disjunction
5291 // Alternative :: 5290 // Alternative ::
5292 // [empty] 5291 // [empty]
5293 // Term Alternative 5292 // Term Alternative
5294 // Term :: 5293 // Term ::
5295 // Assertion 5294 // Assertion
5296 // Atom 5295 // Atom
5297 // Atom Quantifier 5296 // Atom Quantifier
5298 RegExpTree* RegExpParser::ParseDisjunction() { 5297 RegExpTree* RegExpParser::ParseDisjunction() {
5299 // Used to store current state while parsing subexpressions. 5298 // Used to store current state while parsing subexpressions.
5300 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0, 5299 RegExpParserState initial_state(NULL, INITIAL, 0, zone());
5301 zone()); 5300 RegExpParserState* stored_state = &initial_state;
5302 RegExpParserState* state = &initial_state;
5303 // Cache the builder in a local variable for quick access. 5301 // Cache the builder in a local variable for quick access.
5304 RegExpBuilder* builder = initial_state.builder(); 5302 RegExpBuilder* builder = initial_state.builder();
5305 while (true) { 5303 while (true) {
5306 switch (current()) { 5304 switch (current()) {
5307 case kEndMarker: 5305 case kEndMarker:
5308 if (state->IsSubexpression()) { 5306 if (stored_state->IsSubexpression()) {
5309 // Inside a parenthesized group when hitting end of input. 5307 // Inside a parenthesized group when hitting end of input.
5310 ReportError(CStrVector("Unterminated group") CHECK_FAILED); 5308 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
5311 } 5309 }
5312 DCHECK_EQ(INITIAL, state->group_type()); 5310 DCHECK_EQ(INITIAL, stored_state->group_type());
5313 // Parsing completed successfully. 5311 // Parsing completed successfully.
5314 return builder->ToRegExp(); 5312 return builder->ToRegExp();
5315 case ')': { 5313 case ')': {
5316 if (!state->IsSubexpression()) { 5314 if (!stored_state->IsSubexpression()) {
5317 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); 5315 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
5318 } 5316 }
5319 DCHECK_NE(INITIAL, state->group_type()); 5317 DCHECK_NE(INITIAL, stored_state->group_type());
5320 5318
5321 Advance(); 5319 Advance();
5322 // End disjunction parsing and convert builder content to new single 5320 // End disjunction parsing and convert builder content to new single
5323 // regexp atom. 5321 // regexp atom.
5324 RegExpTree* body = builder->ToRegExp(); 5322 RegExpTree* body = builder->ToRegExp();
5325 5323
5326 int end_capture_index = captures_started(); 5324 int end_capture_index = captures_started();
5327 5325
5328 int capture_index = state->capture_index(); 5326 int capture_index = stored_state->capture_index();
5329 SubexpressionType group_type = state->group_type(); 5327 SubexpressionType group_type = stored_state->group_type();
5328
5329 // Restore previous state.
5330 stored_state = stored_state->previous_state();
5331 builder = stored_state->builder();
5330 5332
5331 // Build result of subexpression. 5333 // Build result of subexpression.
5332 if (group_type == CAPTURE) { 5334 if (group_type == CAPTURE) {
5333 RegExpCapture* capture = GetCapture(capture_index); 5335 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);
5334 capture->set_body(body); 5336 captures_->at(capture_index - 1) = capture;
5335 body = capture; 5337 body = capture;
5336 } else if (group_type != GROUPING) { 5338 } else if (group_type != GROUPING) {
5337 DCHECK(group_type == POSITIVE_LOOKAROUND || 5339 DCHECK(group_type == POSITIVE_LOOKAHEAD ||
5338 group_type == NEGATIVE_LOOKAROUND); 5340 group_type == NEGATIVE_LOOKAHEAD);
5339 bool is_positive = (group_type == POSITIVE_LOOKAROUND); 5341 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
5340 body = new (zone()) RegExpLookaround( 5342 body = new(zone()) RegExpLookahead(body,
5341 body, is_positive, end_capture_index - capture_index, capture_index, 5343 is_positive,
5342 state->lookaround_type()); 5344 end_capture_index - capture_index,
5345 capture_index);
5343 } 5346 }
5344
5345 // Restore previous state.
5346 state = state->previous_state();
5347 builder = state->builder();
5348
5349 builder->AddAtom(body); 5347 builder->AddAtom(body);
5350 // For compatability with JSC and ES3, we allow quantifiers after 5348 // For compatability with JSC and ES3, we allow quantifiers after
5351 // lookaheads, and break in all cases. 5349 // lookaheads, and break in all cases.
5352 break; 5350 break;
5353 } 5351 }
5354 case '|': { 5352 case '|': {
5355 Advance(); 5353 Advance();
5356 builder->NewAlternative(); 5354 builder->NewAlternative();
5357 continue; 5355 continue;
5358 } 5356 }
(...skipping 26 matching lines...) Expand all
5385 // everything except \x0a, \x0d, \u2028 and \u2029 5383 // everything except \x0a, \x0d, \u2028 and \u2029
5386 ZoneList<CharacterRange>* ranges = 5384 ZoneList<CharacterRange>* ranges =
5387 new(zone()) ZoneList<CharacterRange>(2, zone()); 5385 new(zone()) ZoneList<CharacterRange>(2, zone());
5388 CharacterRange::AddClassEscape('.', ranges, zone()); 5386 CharacterRange::AddClassEscape('.', ranges, zone());
5389 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5387 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
5390 builder->AddAtom(atom); 5388 builder->AddAtom(atom);
5391 break; 5389 break;
5392 } 5390 }
5393 case '(': { 5391 case '(': {
5394 SubexpressionType subexpr_type = CAPTURE; 5392 SubexpressionType subexpr_type = CAPTURE;
5395 RegExpLookaround::Type lookaround_type = state->lookaround_type();
5396 Advance(); 5393 Advance();
5397 if (current() == '?') { 5394 if (current() == '?') {
5398 switch (Next()) { 5395 switch (Next()) {
5399 case ':': 5396 case ':':
5400 subexpr_type = GROUPING; 5397 subexpr_type = GROUPING;
5401 break; 5398 break;
5402 case '=': 5399 case '=':
5403 lookaround_type = RegExpLookaround::LOOKAHEAD; 5400 subexpr_type = POSITIVE_LOOKAHEAD;
5404 subexpr_type = POSITIVE_LOOKAROUND;
5405 break; 5401 break;
5406 case '!': 5402 case '!':
5407 lookaround_type = RegExpLookaround::LOOKAHEAD; 5403 subexpr_type = NEGATIVE_LOOKAHEAD;
5408 subexpr_type = NEGATIVE_LOOKAROUND;
5409 break; 5404 break;
5410 case '<':
5411 if (FLAG_harmony_regexp_lookbehind) {
5412 Advance();
5413 lookaround_type = RegExpLookaround::LOOKBEHIND;
5414 if (Next() == '=') {
5415 subexpr_type = POSITIVE_LOOKAROUND;
5416 break;
5417 } else if (Next() == '!') {
5418 subexpr_type = NEGATIVE_LOOKAROUND;
5419 break;
5420 }
5421 }
5422 // Fall through.
5423 default: 5405 default:
5424 ReportError(CStrVector("Invalid group") CHECK_FAILED); 5406 ReportError(CStrVector("Invalid group") CHECK_FAILED);
5425 break; 5407 break;
5426 } 5408 }
5427 Advance(2); 5409 Advance(2);
5428 } else { 5410 } else {
5429 if (captures_started_ >= kMaxCaptures) { 5411 if (captures_ == NULL) {
5412 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
5413 }
5414 if (captures_started() >= kMaxCaptures) {
5430 ReportError(CStrVector("Too many captures") CHECK_FAILED); 5415 ReportError(CStrVector("Too many captures") CHECK_FAILED);
5431 } 5416 }
5432 captures_started_++; 5417 captures_->Add(NULL, zone());
5433 } 5418 }
5434 // Store current state and begin new disjunction parsing. 5419 // Store current state and begin new disjunction parsing.
5435 state = new (zone()) RegExpParserState( 5420 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,
5436 state, subexpr_type, lookaround_type, captures_started_, zone()); 5421 captures_started(), zone());
5437 builder = state->builder(); 5422 builder = stored_state->builder();
5438 continue; 5423 continue;
5439 } 5424 }
5440 case '[': { 5425 case '[': {
5441 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); 5426 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
5442 builder->AddAtom(atom); 5427 builder->AddAtom(atom);
5443 break; 5428 break;
5444 } 5429 }
5445 // Atom :: 5430 // Atom ::
5446 // \ AtomEscape 5431 // \ AtomEscape
5447 case '\\': 5432 case '\\':
(...skipping 22 matching lines...) Expand all
5470 new(zone()) ZoneList<CharacterRange>(2, zone()); 5455 new(zone()) ZoneList<CharacterRange>(2, zone());
5471 CharacterRange::AddClassEscape(c, ranges, zone()); 5456 CharacterRange::AddClassEscape(c, ranges, zone());
5472 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5457 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
5473 builder->AddAtom(atom); 5458 builder->AddAtom(atom);
5474 break; 5459 break;
5475 } 5460 }
5476 case '1': case '2': case '3': case '4': case '5': case '6': 5461 case '1': case '2': case '3': case '4': case '5': case '6':
5477 case '7': case '8': case '9': { 5462 case '7': case '8': case '9': {
5478 int index = 0; 5463 int index = 0;
5479 if (ParseBackReferenceIndex(&index)) { 5464 if (ParseBackReferenceIndex(&index)) {
5480 if (state->IsInsideCaptureGroup(index)) { 5465 RegExpCapture* capture = NULL;
5481 // The backreference is inside the capture group it refers to. 5466 if (captures_ != NULL && index <= captures_->length()) {
5482 // Nothing can possibly have been captured yet. 5467 capture = captures_->at(index - 1);
5468 }
5469 if (capture == NULL) {
5483 builder->AddEmpty(); 5470 builder->AddEmpty();
5484 } else { 5471 break;
5485 RegExpCapture* capture = GetCapture(index);
5486 RegExpTree* atom = new (zone()) RegExpBackReference(capture);
5487 builder->AddAtom(atom);
5488 } 5472 }
5473 RegExpTree* atom = new(zone()) RegExpBackReference(capture);
5474 builder->AddAtom(atom);
5489 break; 5475 break;
5490 } 5476 }
5491 uc32 first_digit = Next(); 5477 uc32 first_digit = Next();
5492 if (first_digit == '8' || first_digit == '9') { 5478 if (first_digit == '8' || first_digit == '9') {
5493 // If the 'u' flag is present, only syntax characters can be escaped, 5479 // If the 'u' flag is present, only syntax characters can be escaped,
5494 // no other identity escapes are allowed. If the 'u' flag is not 5480 // no other identity escapes are allowed. If the 'u' flag is not
5495 // present, all identity escapes are allowed. 5481 // present, all identity escapes are allowed.
5496 if (!FLAG_harmony_unicode_regexps || !unicode_) { 5482 if (!FLAG_harmony_unicode_regexps || !unicode_) {
5497 builder->AddCharacter(first_digit); 5483 builder->AddCharacter(first_digit);
5498 Advance(2); 5484 Advance(2);
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
5739 if (value > capture_count_) { 5725 if (value > capture_count_) {
5740 Reset(start); 5726 Reset(start);
5741 return false; 5727 return false;
5742 } 5728 }
5743 } 5729 }
5744 *index_out = value; 5730 *index_out = value;
5745 return true; 5731 return true;
5746 } 5732 }
5747 5733
5748 5734
5749 RegExpCapture* RegExpParser::GetCapture(int index) {
5750 // The index for the capture groups are one-based. Its index in the list is
5751 // zero-based.
5752 int know_captures =
5753 is_scanned_for_captures_ ? capture_count_ : captures_started_;
5754 DCHECK(index <= know_captures);
5755 if (captures_ == NULL) {
5756 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
5757 }
5758 while (captures_->length() < know_captures) {
5759 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
5760 }
5761 return captures_->at(index - 1);
5762 }
5763
5764
5765 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
5766 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
5767 if (s->group_type() != CAPTURE) continue;
5768 // Return true if we found the matching capture index.
5769 if (index == s->capture_index()) return true;
5770 // Abort if index is larger than what has been parsed up till this state.
5771 if (index > s->capture_index()) return false;
5772 }
5773 return false;
5774 }
5775
5776
5777 // QuantifierPrefix :: 5735 // QuantifierPrefix ::
5778 // { DecimalDigits } 5736 // { DecimalDigits }
5779 // { DecimalDigits , } 5737 // { DecimalDigits , }
5780 // { DecimalDigits , DecimalDigits } 5738 // { DecimalDigits , DecimalDigits }
5781 // 5739 //
5782 // Returns true if parsing succeeds, and set the min_out and max_out 5740 // Returns true if parsing succeeds, and set the min_out and max_out
5783 // values. Values are truncated to RegExpTree::kInfinity if they overflow. 5741 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
5784 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 5742 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
5785 DCHECK_EQ(current(), '{'); 5743 DCHECK_EQ(current(), '{');
5786 int start = position(); 5744 int start = position();
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after
6098 } 6056 }
6099 } 6057 }
6100 if (!has_more()) { 6058 if (!has_more()) {
6101 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); 6059 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
6102 } 6060 }
6103 Advance(); 6061 Advance();
6104 if (ranges->length() == 0) { 6062 if (ranges->length() == 0) {
6105 ranges->Add(CharacterRange::Everything(), zone()); 6063 ranges->Add(CharacterRange::Everything(), zone());
6106 is_negated = !is_negated; 6064 is_negated = !is_negated;
6107 } 6065 }
6108 return new (zone()) RegExpCharacterClass(ranges, is_negated); 6066 return new(zone()) RegExpCharacterClass(ranges, is_negated);
6109 } 6067 }
6110 6068
6111 6069
6112 // ---------------------------------------------------------------------------- 6070 // ----------------------------------------------------------------------------
6113 // The Parser interface. 6071 // The Parser interface.
6114 6072
6115 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, 6073 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
6116 FlatStringReader* input, bool multiline, 6074 FlatStringReader* input, bool multiline,
6117 bool unicode, RegExpCompileData* result) { 6075 bool unicode, RegExpCompileData* result) {
6118 DCHECK(result != NULL); 6076 DCHECK(result != NULL);
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
6467 } 6425 }
6468 6426
6469 6427
6470 void Parser::RaiseLanguageMode(LanguageMode mode) { 6428 void Parser::RaiseLanguageMode(LanguageMode mode) {
6471 SetLanguageMode(scope_, 6429 SetLanguageMode(scope_,
6472 static_cast<LanguageMode>(scope_->language_mode() | mode)); 6430 static_cast<LanguageMode>(scope_->language_mode() | mode));
6473 } 6431 }
6474 6432
6475 } // namespace internal 6433 } // namespace internal
6476 } // namespace v8 6434 } // namespace v8
OLDNEW
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698