Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/parser.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fix arm64 debug code assertion Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parser.h" 5 #include "src/parser.h"
6 6
7 #include "src/api.h" 7 #include "src/api.h"
8 #include "src/ast.h" 8 #include "src/ast.h"
9 #include "src/ast-literal-reindexer.h" 9 #include "src/ast-literal-reindexer.h"
10 #include "src/bailout-reason.h" 10 #include "src/bailout-reason.h"
(...skipping 5164 matching lines...) Expand 10 before | Expand all | Expand 10 after
5175 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 5175 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
5176 bool multiline, bool unicode, Isolate* isolate, 5176 bool multiline, bool unicode, Isolate* isolate,
5177 Zone* zone) 5177 Zone* zone)
5178 : isolate_(isolate), 5178 : isolate_(isolate),
5179 zone_(zone), 5179 zone_(zone),
5180 error_(error), 5180 error_(error),
5181 captures_(NULL), 5181 captures_(NULL),
5182 in_(in), 5182 in_(in),
5183 current_(kEndMarker), 5183 current_(kEndMarker),
5184 next_pos_(0), 5184 next_pos_(0),
5185 captures_started_(0),
5185 capture_count_(0), 5186 capture_count_(0),
5186 has_more_(true), 5187 has_more_(true),
5187 multiline_(multiline), 5188 multiline_(multiline),
5188 unicode_(unicode), 5189 unicode_(unicode),
5189 simple_(false), 5190 simple_(false),
5190 contains_anchor_(false), 5191 contains_anchor_(false),
5191 is_scanned_for_captures_(false), 5192 is_scanned_for_captures_(false),
5192 failed_(false) { 5193 failed_(false) {
5193 Advance(); 5194 Advance();
5194 } 5195 }
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
5278 // Alternative | Disjunction 5279 // Alternative | Disjunction
5279 // Alternative :: 5280 // Alternative ::
5280 // [empty] 5281 // [empty]
5281 // Term Alternative 5282 // Term Alternative
5282 // Term :: 5283 // Term ::
5283 // Assertion 5284 // Assertion
5284 // Atom 5285 // Atom
5285 // Atom Quantifier 5286 // Atom Quantifier
5286 RegExpTree* RegExpParser::ParseDisjunction() { 5287 RegExpTree* RegExpParser::ParseDisjunction() {
5287 // Used to store current state while parsing subexpressions. 5288 // Used to store current state while parsing subexpressions.
5288 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); 5289 RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
5289 RegExpParserState* stored_state = &initial_state; 5290 zone());
5291 RegExpParserState* state = &initial_state;
5290 // Cache the builder in a local variable for quick access. 5292 // Cache the builder in a local variable for quick access.
5291 RegExpBuilder* builder = initial_state.builder(); 5293 RegExpBuilder* builder = initial_state.builder();
5292 while (true) { 5294 while (true) {
5293 switch (current()) { 5295 switch (current()) {
5294 case kEndMarker: 5296 case kEndMarker:
5295 if (stored_state->IsSubexpression()) { 5297 if (state->IsSubexpression()) {
5296 // Inside a parenthesized group when hitting end of input. 5298 // Inside a parenthesized group when hitting end of input.
5297 ReportError(CStrVector("Unterminated group") CHECK_FAILED); 5299 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
5298 } 5300 }
5299 DCHECK_EQ(INITIAL, stored_state->group_type()); 5301 DCHECK_EQ(INITIAL, state->group_type());
5300 // Parsing completed successfully. 5302 // Parsing completed successfully.
5301 return builder->ToRegExp(); 5303 return builder->ToRegExp();
5302 case ')': { 5304 case ')': {
5303 if (!stored_state->IsSubexpression()) { 5305 if (!state->IsSubexpression()) {
5304 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); 5306 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
5305 } 5307 }
5306 DCHECK_NE(INITIAL, stored_state->group_type()); 5308 DCHECK_NE(INITIAL, state->group_type());
5307 5309
5308 Advance(); 5310 Advance();
5309 // End disjunction parsing and convert builder content to new single 5311 // End disjunction parsing and convert builder content to new single
5310 // regexp atom. 5312 // regexp atom.
5311 RegExpTree* body = builder->ToRegExp(); 5313 RegExpTree* body = builder->ToRegExp();
5312 5314
5313 int end_capture_index = captures_started(); 5315 int end_capture_index = captures_started();
5314 5316
5315 int capture_index = stored_state->capture_index(); 5317 int capture_index = state->capture_index();
5316 SubexpressionType group_type = stored_state->group_type(); 5318 SubexpressionType group_type = state->group_type();
5317
5318 // Restore previous state.
5319 stored_state = stored_state->previous_state();
5320 builder = stored_state->builder();
5321 5319
5322 // Build result of subexpression. 5320 // Build result of subexpression.
5323 if (group_type == CAPTURE) { 5321 if (group_type == CAPTURE) {
5324 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); 5322 RegExpCapture* capture = GetCapture(capture_index);
5325 captures_->at(capture_index - 1) = capture; 5323 capture->set_body(body);
5326 body = capture; 5324 body = capture;
5327 } else if (group_type != GROUPING) { 5325 } else if (group_type != GROUPING) {
5328 DCHECK(group_type == POSITIVE_LOOKAHEAD || 5326 DCHECK(group_type == POSITIVE_LOOKAROUND ||
5329 group_type == NEGATIVE_LOOKAHEAD); 5327 group_type == NEGATIVE_LOOKAROUND);
5330 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); 5328 bool is_positive = (group_type == POSITIVE_LOOKAROUND);
5331 body = new(zone()) RegExpLookahead(body, 5329 body = new (zone()) RegExpLookaround(
5332 is_positive, 5330 body, is_positive, end_capture_index - capture_index, capture_index,
5333 end_capture_index - capture_index, 5331 state->lookaround_type());
5334 capture_index);
5335 } 5332 }
5333
5334 // Restore previous state.
5335 state = state->previous_state();
5336 builder = state->builder();
5337
5336 builder->AddAtom(body); 5338 builder->AddAtom(body);
5337 // For compatability with JSC and ES3, we allow quantifiers after 5339 // For compatability with JSC and ES3, we allow quantifiers after
5338 // lookaheads, and break in all cases. 5340 // lookaheads, and break in all cases.
5339 break; 5341 break;
5340 } 5342 }
5341 case '|': { 5343 case '|': {
5342 Advance(); 5344 Advance();
5343 builder->NewAlternative(); 5345 builder->NewAlternative();
5344 continue; 5346 continue;
5345 } 5347 }
(...skipping 26 matching lines...) Expand all
5372 // everything except \x0a, \x0d, \u2028 and \u2029 5374 // everything except \x0a, \x0d, \u2028 and \u2029
5373 ZoneList<CharacterRange>* ranges = 5375 ZoneList<CharacterRange>* ranges =
5374 new(zone()) ZoneList<CharacterRange>(2, zone()); 5376 new(zone()) ZoneList<CharacterRange>(2, zone());
5375 CharacterRange::AddClassEscape('.', ranges, zone()); 5377 CharacterRange::AddClassEscape('.', ranges, zone());
5376 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5378 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
5377 builder->AddAtom(atom); 5379 builder->AddAtom(atom);
5378 break; 5380 break;
5379 } 5381 }
5380 case '(': { 5382 case '(': {
5381 SubexpressionType subexpr_type = CAPTURE; 5383 SubexpressionType subexpr_type = CAPTURE;
5384 RegExpLookaround::Type lookaround_type = state->lookaround_type();
5382 Advance(); 5385 Advance();
5383 if (current() == '?') { 5386 if (current() == '?') {
5384 switch (Next()) { 5387 switch (Next()) {
5385 case ':': 5388 case ':':
5386 subexpr_type = GROUPING; 5389 subexpr_type = GROUPING;
5387 break; 5390 break;
5388 case '=': 5391 case '=':
5389 subexpr_type = POSITIVE_LOOKAHEAD; 5392 lookaround_type = RegExpLookaround::LOOKAHEAD;
5393 subexpr_type = POSITIVE_LOOKAROUND;
5390 break; 5394 break;
5391 case '!': 5395 case '!':
5392 subexpr_type = NEGATIVE_LOOKAHEAD; 5396 lookaround_type = RegExpLookaround::LOOKAHEAD;
5397 subexpr_type = NEGATIVE_LOOKAROUND;
5393 break; 5398 break;
5399 case '<':
5400 if (FLAG_harmony_regexp_lookbehind) {
5401 Advance();
5402 lookaround_type = RegExpLookaround::LOOKBEHIND;
5403 if (Next() == '=') {
5404 subexpr_type = POSITIVE_LOOKAROUND;
5405 break;
5406 } else if (Next() == '!') {
5407 subexpr_type = NEGATIVE_LOOKAROUND;
5408 break;
5409 }
5410 }
5411 // Fall through.
5394 default: 5412 default:
5395 ReportError(CStrVector("Invalid group") CHECK_FAILED); 5413 ReportError(CStrVector("Invalid group") CHECK_FAILED);
5396 break; 5414 break;
5397 } 5415 }
5398 Advance(2); 5416 Advance(2);
5399 } else { 5417 } else {
5400 if (captures_ == NULL) { 5418 if (captures_started_ >= kMaxCaptures) {
5401 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
5402 }
5403 if (captures_started() >= kMaxCaptures) {
5404 ReportError(CStrVector("Too many captures") CHECK_FAILED); 5419 ReportError(CStrVector("Too many captures") CHECK_FAILED);
5405 } 5420 }
5406 captures_->Add(NULL, zone()); 5421 captures_started_++;
5407 } 5422 }
5408 // Store current state and begin new disjunction parsing. 5423 // Store current state and begin new disjunction parsing.
5409 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, 5424 state = new (zone()) RegExpParserState(
5410 captures_started(), zone()); 5425 state, subexpr_type, lookaround_type, captures_started_, zone());
5411 builder = stored_state->builder(); 5426 builder = state->builder();
5412 continue; 5427 continue;
5413 } 5428 }
5414 case '[': { 5429 case '[': {
5415 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); 5430 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);
5416 builder->AddAtom(atom); 5431 builder->AddAtom(atom);
5417 break; 5432 break;
5418 } 5433 }
5419 // Atom :: 5434 // Atom ::
5420 // \ AtomEscape 5435 // \ AtomEscape
5421 case '\\': 5436 case '\\':
(...skipping 22 matching lines...) Expand all
5444 new(zone()) ZoneList<CharacterRange>(2, zone()); 5459 new(zone()) ZoneList<CharacterRange>(2, zone());
5445 CharacterRange::AddClassEscape(c, ranges, zone()); 5460 CharacterRange::AddClassEscape(c, ranges, zone());
5446 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5461 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);
5447 builder->AddAtom(atom); 5462 builder->AddAtom(atom);
5448 break; 5463 break;
5449 } 5464 }
5450 case '1': case '2': case '3': case '4': case '5': case '6': 5465 case '1': case '2': case '3': case '4': case '5': case '6':
5451 case '7': case '8': case '9': { 5466 case '7': case '8': case '9': {
5452 int index = 0; 5467 int index = 0;
5453 if (ParseBackReferenceIndex(&index)) { 5468 if (ParseBackReferenceIndex(&index)) {
5454 RegExpCapture* capture = NULL; 5469 if (state->IsInsideCaptureGroup(index)) {
5455 if (captures_ != NULL && index <= captures_->length()) { 5470 // The backreference is inside the capture group it refers to.
5456 capture = captures_->at(index - 1); 5471 // Nothing can possibly have been captured yet.
5472 builder->AddEmpty();
5473 } else {
5474 RegExpCapture* capture = GetCapture(index);
5475 RegExpTree* atom = new (zone()) RegExpBackReference(capture);
5476 builder->AddAtom(atom);
5457 } 5477 }
5458 if (capture == NULL) {
5459 builder->AddEmpty();
5460 break;
5461 }
5462 RegExpTree* atom = new(zone()) RegExpBackReference(capture);
5463 builder->AddAtom(atom);
5464 break; 5478 break;
5465 } 5479 }
5466 uc32 first_digit = Next(); 5480 uc32 first_digit = Next();
5467 if (first_digit == '8' || first_digit == '9') { 5481 if (first_digit == '8' || first_digit == '9') {
5468 // If the 'u' flag is present, only syntax characters can be escaped, 5482 // If the 'u' flag is present, only syntax characters can be escaped,
5469 // no other identity escapes are allowed. If the 'u' flag is not 5483 // no other identity escapes are allowed. If the 'u' flag is not
5470 // present, all identity escapes are allowed. 5484 // present, all identity escapes are allowed.
5471 if (!FLAG_harmony_unicode_regexps || !unicode_) { 5485 if (!FLAG_harmony_unicode_regexps || !unicode_) {
5472 builder->AddCharacter(first_digit); 5486 builder->AddCharacter(first_digit);
5473 Advance(2); 5487 Advance(2);
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after
5714 if (value > capture_count_) { 5728 if (value > capture_count_) {
5715 Reset(start); 5729 Reset(start);
5716 return false; 5730 return false;
5717 } 5731 }
5718 } 5732 }
5719 *index_out = value; 5733 *index_out = value;
5720 return true; 5734 return true;
5721 } 5735 }
5722 5736
5723 5737
5738 RegExpCapture* RegExpParser::GetCapture(int index) {
5739 // The index for the capture groups are one-based. Its index in the list is
5740 // zero-based.
5741 int know_captures =
5742 is_scanned_for_captures_ ? capture_count_ : captures_started_;
5743 DCHECK(index <= know_captures);
5744 if (captures_ == NULL) {
5745 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
5746 }
5747 while (captures_->length() < know_captures) {
5748 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
5749 }
5750 return captures_->at(index - 1);
5751 }
5752
5753
5754 bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
5755 for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
5756 if (s->group_type() != CAPTURE) continue;
5757 // Return true if we found the matching capture index.
5758 if (index == s->capture_index()) return true;
5759 // Abort if index is larger than what has been parsed up till this state.
5760 if (index > s->capture_index()) return false;
5761 }
5762 return false;
5763 }
5764
5765
5724 // QuantifierPrefix :: 5766 // QuantifierPrefix ::
5725 // { DecimalDigits } 5767 // { DecimalDigits }
5726 // { DecimalDigits , } 5768 // { DecimalDigits , }
5727 // { DecimalDigits , DecimalDigits } 5769 // { DecimalDigits , DecimalDigits }
5728 // 5770 //
5729 // Returns true if parsing succeeds, and set the min_out and max_out 5771 // Returns true if parsing succeeds, and set the min_out and max_out
5730 // values. Values are truncated to RegExpTree::kInfinity if they overflow. 5772 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
5731 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 5773 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
5732 DCHECK_EQ(current(), '{'); 5774 DCHECK_EQ(current(), '{');
5733 int start = position(); 5775 int start = position();
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after
6045 } 6087 }
6046 } 6088 }
6047 if (!has_more()) { 6089 if (!has_more()) {
6048 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); 6090 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
6049 } 6091 }
6050 Advance(); 6092 Advance();
6051 if (ranges->length() == 0) { 6093 if (ranges->length() == 0) {
6052 ranges->Add(CharacterRange::Everything(), zone()); 6094 ranges->Add(CharacterRange::Everything(), zone());
6053 is_negated = !is_negated; 6095 is_negated = !is_negated;
6054 } 6096 }
6055 return new(zone()) RegExpCharacterClass(ranges, is_negated); 6097 return new (zone()) RegExpCharacterClass(ranges, is_negated);
6056 } 6098 }
6057 6099
6058 6100
6059 // ---------------------------------------------------------------------------- 6101 // ----------------------------------------------------------------------------
6060 // The Parser interface. 6102 // The Parser interface.
6061 6103
6062 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, 6104 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
6063 FlatStringReader* input, bool multiline, 6105 FlatStringReader* input, bool multiline,
6064 bool unicode, RegExpCompileData* result) { 6106 bool unicode, RegExpCompileData* result) {
6065 DCHECK(result != NULL); 6107 DCHECK(result != NULL);
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
6414 } 6456 }
6415 6457
6416 6458
6417 void Parser::RaiseLanguageMode(LanguageMode mode) { 6459 void Parser::RaiseLanguageMode(LanguageMode mode) {
6418 SetLanguageMode(scope_, 6460 SetLanguageMode(scope_,
6419 static_cast<LanguageMode>(scope_->language_mode() | mode)); 6461 static_cast<LanguageMode>(scope_->language_mode() | mode));
6420 } 6462 }
6421 6463
6422 } // namespace internal 6464 } // namespace internal
6423 } // namespace v8 6465 } // namespace v8
OLDNEW
« no previous file with comments | « src/parser.h ('k') | src/regexp/arm/regexp-macro-assembler-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698