Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(57)

Side by Side Diff: src/parser.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: fixed test cases Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/parser.h" 5 #include "src/parser.h"
6 6
7 #include "src/api.h" 7 #include "src/api.h"
8 #include "src/ast.h" 8 #include "src/ast.h"
9 #include "src/ast-literal-reindexer.h" 9 #include "src/ast-literal-reindexer.h"
10 #include "src/bailout-reason.h" 10 #include "src/bailout-reason.h"
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
87 set_stack_limit(isolate_->stack_guard()->real_climit()); 87 set_stack_limit(isolate_->stack_guard()->real_climit());
88 set_unicode_cache(isolate_->unicode_cache()); 88 set_unicode_cache(isolate_->unicode_cache());
89 set_script(script); 89 set_script(script);
90 90
91 if (script->type() == Script::TYPE_NATIVE) { 91 if (script->type() == Script::TYPE_NATIVE) {
92 set_native(); 92 set_native();
93 } 93 }
94 } 94 }
95 95
96 96
97 RegExpBuilder::RegExpBuilder(Zone* zone) 97 RegExpBuilder::RegExpBuilder(Zone* zone,
98 RegExpTree::ReadDirection read_direction)
98 : zone_(zone), 99 : zone_(zone),
99 pending_empty_(false), 100 pending_empty_(false),
100 characters_(NULL), 101 characters_(NULL),
101 terms_(), 102 terms_(),
102 alternatives_() 103 alternatives_(),
104 read_direction_(read_direction)
103 #ifdef DEBUG 105 #ifdef DEBUG
104 , last_added_(ADD_NONE) 106 , last_added_(ADD_NONE)
105 #endif 107 #endif
106 {} 108 {}
107 109
108 110
109 void RegExpBuilder::FlushCharacters() { 111 void RegExpBuilder::FlushCharacters() {
110 pending_empty_ = false; 112 pending_empty_ = false;
111 if (characters_ != NULL) { 113 if (characters_ != NULL) {
112 RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector()); 114 RegExpTree* atom =
115 new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_);
113 characters_ = NULL; 116 characters_ = NULL;
114 text_.Add(atom, zone()); 117 text_.Add(atom, zone());
115 LAST(ADD_ATOM); 118 LAST(ADD_ATOM);
116 } 119 }
117 } 120 }
118 121
119 122
120 void RegExpBuilder::FlushText() { 123 void RegExpBuilder::FlushText() {
121 FlushCharacters(); 124 FlushCharacters();
122 int num_text = text_.length(); 125 int num_text = text_.length();
123 if (num_text == 0) { 126 if (num_text == 0) {
124 return; 127 return;
125 } else if (num_text == 1) { 128 } else if (num_text == 1) {
126 terms_.Add(text_.last(), zone()); 129 terms_.Add(text_.last(), zone());
127 } else { 130 } else {
128 RegExpText* text = new(zone()) RegExpText(zone()); 131 RegExpText* text = new (zone()) RegExpText(zone(), read_direction_);
129 for (int i = 0; i < num_text; i++) 132 for (int i = 0; i < num_text; i++)
130 text_.Get(i)->AppendToText(text, zone()); 133 text_.Get(i)->AppendToText(text, zone());
131 terms_.Add(text, zone()); 134 terms_.Add(text, zone());
132 } 135 }
133 text_.Clear(); 136 text_.Clear();
134 } 137 }
135 138
136 139
137 void RegExpBuilder::AddCharacter(uc16 c) { 140 void RegExpBuilder::AddCharacter(uc16 c) {
138 pending_empty_ = false; 141 pending_empty_ = false;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 182
180 void RegExpBuilder::FlushTerms() { 183 void RegExpBuilder::FlushTerms() {
181 FlushText(); 184 FlushText();
182 int num_terms = terms_.length(); 185 int num_terms = terms_.length();
183 RegExpTree* alternative; 186 RegExpTree* alternative;
184 if (num_terms == 0) { 187 if (num_terms == 0) {
185 alternative = new (zone()) RegExpEmpty(); 188 alternative = new (zone()) RegExpEmpty();
186 } else if (num_terms == 1) { 189 } else if (num_terms == 1) {
187 alternative = terms_.last(); 190 alternative = terms_.last();
188 } else { 191 } else {
189 alternative = new(zone()) RegExpAlternative(terms_.GetList(zone())); 192 alternative =
193 new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_);
190 } 194 }
191 alternatives_.Add(alternative, zone()); 195 alternatives_.Add(alternative, zone());
192 terms_.Clear(); 196 terms_.Clear();
193 LAST(ADD_NONE); 197 LAST(ADD_NONE);
194 } 198 }
195 199
196 200
197 RegExpTree* RegExpBuilder::ToRegExp() { 201 RegExpTree* RegExpBuilder::ToRegExp() {
198 FlushTerms(); 202 FlushTerms();
199 int num_alternatives = alternatives_.length(); 203 int num_alternatives = alternatives_.length();
200 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); 204 if (num_alternatives == 0) return new (zone()) RegExpEmpty();
201 if (num_alternatives == 1) return alternatives_.last(); 205 if (num_alternatives == 1) return alternatives_.last();
202 return new(zone()) RegExpDisjunction(alternatives_.GetList(zone())); 206 return new (zone())
207 RegExpDisjunction(alternatives_.GetList(zone()), read_direction_);
203 } 208 }
204 209
205 210
206 void RegExpBuilder::AddQuantifierToAtom( 211 void RegExpBuilder::AddQuantifierToAtom(
207 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { 212 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
208 if (pending_empty_) { 213 if (pending_empty_) {
209 pending_empty_ = false; 214 pending_empty_ = false;
210 return; 215 return;
211 } 216 }
212 RegExpTree* atom; 217 RegExpTree* atom;
213 if (characters_ != NULL) { 218 if (characters_ != NULL) {
214 DCHECK(last_added_ == ADD_CHAR); 219 DCHECK(last_added_ == ADD_CHAR);
215 // Last atom was character. 220 // Last atom was character.
216 Vector<const uc16> char_vector = characters_->ToConstVector(); 221 Vector<const uc16> char_vector = characters_->ToConstVector();
217 int num_chars = char_vector.length(); 222 int num_chars = char_vector.length();
218 if (num_chars > 1) { 223 if (num_chars > 1) {
219 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); 224 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
220 text_.Add(new(zone()) RegExpAtom(prefix), zone()); 225 text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone());
221 char_vector = char_vector.SubVector(num_chars - 1, num_chars); 226 char_vector = char_vector.SubVector(num_chars - 1, num_chars);
222 } 227 }
223 characters_ = NULL; 228 characters_ = NULL;
224 atom = new(zone()) RegExpAtom(char_vector); 229 atom = new (zone()) RegExpAtom(char_vector, read_direction_);
225 FlushText(); 230 FlushText();
226 } else if (text_.length() > 0) { 231 } else if (text_.length() > 0) {
227 DCHECK(last_added_ == ADD_ATOM); 232 DCHECK(last_added_ == ADD_ATOM);
228 atom = text_.RemoveLast(); 233 atom = text_.RemoveLast();
229 FlushText(); 234 FlushText();
230 } else if (terms_.length() > 0) { 235 } else if (terms_.length() > 0) {
231 DCHECK(last_added_ == ADD_ATOM); 236 DCHECK(last_added_ == ADD_ATOM);
232 atom = terms_.RemoveLast(); 237 atom = terms_.RemoveLast();
233 if (atom->max_match() == 0) { 238 if (atom->max_match() == 0) {
234 // Guaranteed to only match an empty string. 239 // Guaranteed to only match an empty string.
235 LAST(ADD_TERM); 240 LAST(ADD_TERM);
236 if (min == 0) { 241 if (min == 0) {
237 return; 242 return;
238 } 243 }
239 terms_.Add(atom, zone()); 244 terms_.Add(atom, zone());
240 return; 245 return;
241 } 246 }
242 } else { 247 } else {
243 // Only call immediately after adding an atom or character! 248 // Only call immediately after adding an atom or character!
244 UNREACHABLE(); 249 UNREACHABLE();
245 return; 250 return;
246 } 251 }
247 terms_.Add( 252 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom,
248 new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone()); 253 read_direction_),
254 zone());
249 LAST(ADD_TERM); 255 LAST(ADD_TERM);
250 } 256 }
251 257
252 258
253 FunctionEntry ParseData::GetFunctionEntry(int start) { 259 FunctionEntry ParseData::GetFunctionEntry(int start) {
254 // The current pre-data entry must be a FunctionEntry with the given 260 // The current pre-data entry must be a FunctionEntry with the given
255 // start position. 261 // start position.
256 if ((function_index_ + FunctionEntry::kSize <= Length()) && 262 if ((function_index_ + FunctionEntry::kSize <= Length()) &&
257 (static_cast<int>(Data()[function_index_]) == start)) { 263 (static_cast<int>(Data()[function_index_]) == start)) {
258 int index = function_index_; 264 int index = function_index_;
(...skipping 4955 matching lines...) Expand 10 before | Expand all | Expand 10 after
5214 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 5220 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
5215 bool multiline, bool unicode, Isolate* isolate, 5221 bool multiline, bool unicode, Isolate* isolate,
5216 Zone* zone) 5222 Zone* zone)
5217 : isolate_(isolate), 5223 : isolate_(isolate),
5218 zone_(zone), 5224 zone_(zone),
5219 error_(error), 5225 error_(error),
5220 captures_(NULL), 5226 captures_(NULL),
5221 in_(in), 5227 in_(in),
5222 current_(kEndMarker), 5228 current_(kEndMarker),
5223 next_pos_(0), 5229 next_pos_(0),
5230 captures_started_(0),
5224 capture_count_(0), 5231 capture_count_(0),
5225 has_more_(true), 5232 has_more_(true),
5226 multiline_(multiline), 5233 multiline_(multiline),
5227 unicode_(unicode), 5234 unicode_(unicode),
5228 simple_(false), 5235 simple_(false),
5229 contains_anchor_(false), 5236 contains_anchor_(false),
5230 is_scanned_for_captures_(false), 5237 is_scanned_for_captures_(false),
5231 failed_(false) { 5238 failed_(false) {
5232 Advance(); 5239 Advance();
5233 } 5240 }
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
5295 current_ = kEndMarker; 5302 current_ = kEndMarker;
5296 next_pos_ = in()->length(); 5303 next_pos_ = in()->length();
5297 return NULL; 5304 return NULL;
5298 } 5305 }
5299 5306
5300 5307
5301 // Pattern :: 5308 // Pattern ::
5302 // Disjunction 5309 // Disjunction
5303 RegExpTree* RegExpParser::ParsePattern() { 5310 RegExpTree* RegExpParser::ParsePattern() {
5304 RegExpTree* result = ParseDisjunction(CHECK_FAILED); 5311 RegExpTree* result = ParseDisjunction(CHECK_FAILED);
5312
5305 DCHECK(!has_more()); 5313 DCHECK(!has_more());
5306 // If the result of parsing is a literal string atom, and it has the 5314 // If the result of parsing is a literal string atom, and it has the
5307 // same length as the input, then the atom is identical to the input. 5315 // same length as the input, then the atom is identical to the input.
5308 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) { 5316 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
5309 simple_ = true; 5317 simple_ = true;
5310 } 5318 }
5311 return result; 5319 return result;
5312 } 5320 }
5313 5321
5314 5322
5315 // Disjunction :: 5323 // Disjunction ::
5316 // Alternative 5324 // Alternative
5317 // Alternative | Disjunction 5325 // Alternative | Disjunction
5318 // Alternative :: 5326 // Alternative ::
5319 // [empty] 5327 // [empty]
5320 // Term Alternative 5328 // Term Alternative
5321 // Term :: 5329 // Term ::
5322 // Assertion 5330 // Assertion
5323 // Atom 5331 // Atom
5324 // Atom Quantifier 5332 // Atom Quantifier
5325 RegExpTree* RegExpParser::ParseDisjunction() { 5333 RegExpTree* RegExpParser::ParseDisjunction() {
5326 // Used to store current state while parsing subexpressions. 5334 // Used to store current state while parsing subexpressions.
5327 RegExpParserState initial_state(NULL, INITIAL, 0, zone()); 5335 RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0,
5328 RegExpParserState* stored_state = &initial_state; 5336 zone());
5337 RegExpParserState* state = &initial_state;
5329 // Cache the builder in a local variable for quick access. 5338 // Cache the builder in a local variable for quick access.
5330 RegExpBuilder* builder = initial_state.builder(); 5339 RegExpBuilder* builder = initial_state.builder();
5331 while (true) { 5340 while (true) {
5332 switch (current()) { 5341 switch (current()) {
5333 case kEndMarker: 5342 case kEndMarker:
5334 if (stored_state->IsSubexpression()) { 5343 if (state->IsSubexpression()) {
5335 // Inside a parenthesized group when hitting end of input. 5344 // Inside a parenthesized group when hitting end of input.
5336 ReportError(CStrVector("Unterminated group") CHECK_FAILED); 5345 ReportError(CStrVector("Unterminated group") CHECK_FAILED);
5337 } 5346 }
5338 DCHECK_EQ(INITIAL, stored_state->group_type()); 5347 DCHECK_EQ(INITIAL, state->group_type());
5339 // Parsing completed successfully. 5348 // Parsing completed successfully.
5340 return builder->ToRegExp(); 5349 return builder->ToRegExp();
5341 case ')': { 5350 case ')': {
5342 if (!stored_state->IsSubexpression()) { 5351 if (!state->IsSubexpression()) {
5343 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED); 5352 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
5344 } 5353 }
5345 DCHECK_NE(INITIAL, stored_state->group_type()); 5354 DCHECK_NE(INITIAL, state->group_type());
5346 5355
5347 Advance(); 5356 Advance();
5348 // End disjunction parsing and convert builder content to new single 5357 // End disjunction parsing and convert builder content to new single
5349 // regexp atom. 5358 // regexp atom.
5350 RegExpTree* body = builder->ToRegExp(); 5359 RegExpTree* body = builder->ToRegExp();
5351 5360
5352 int end_capture_index = captures_started(); 5361 int end_capture_index = captures_started_;
5353 5362
5354 int capture_index = stored_state->capture_index(); 5363 int capture_index = state->capture_index();
5355 SubexpressionType group_type = stored_state->group_type(); 5364 SubexpressionType group_type = state->group_type();
5356
5357 // Restore previous state.
5358 stored_state = stored_state->previous_state();
5359 builder = stored_state->builder();
5360 5365
5361 // Build result of subexpression. 5366 // Build result of subexpression.
5362 if (group_type == CAPTURE) { 5367 if (group_type == CAPTURE) {
5363 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index); 5368 RegExpCapture* capture = GetCapture(capture_index);
5364 captures_->at(capture_index - 1) = capture; 5369 capture->set_body(body);
5370 capture->set_read_direction(state->read_direction());
5365 body = capture; 5371 body = capture;
5366 } else if (group_type != GROUPING) { 5372 } else if (group_type != GROUPING) {
5367 DCHECK(group_type == POSITIVE_LOOKAHEAD || 5373 DCHECK(group_type == POSITIVE_LOOKAHEAD ||
5368 group_type == NEGATIVE_LOOKAHEAD); 5374 group_type == NEGATIVE_LOOKAHEAD);
5369 bool is_positive = (group_type == POSITIVE_LOOKAHEAD); 5375 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
5370 body = new(zone()) RegExpLookahead(body, 5376 body = new (zone()) RegExpLookaround(
5371 is_positive, 5377 body, is_positive, end_capture_index - capture_index, capture_index,
5372 end_capture_index - capture_index, 5378 state->read_direction());
5373 capture_index);
5374 } 5379 }
5380
5381 // Restore previous state.
5382 state = state->previous_state();
5383 builder = state->builder();
5384
5375 builder->AddAtom(body); 5385 builder->AddAtom(body);
5376 // For compatability with JSC and ES3, we allow quantifiers after 5386 // For compatability with JSC and ES3, we allow quantifiers after
5377 // lookaheads, and break in all cases. 5387 // lookaheads, and break in all cases.
5378 break; 5388 break;
5379 } 5389 }
5380 case '|': { 5390 case '|': {
5381 Advance(); 5391 Advance();
5382 builder->NewAlternative(); 5392 builder->NewAlternative();
5383 continue; 5393 continue;
5384 } 5394 }
5385 case '*': 5395 case '*':
5386 case '+': 5396 case '+':
5387 case '?': 5397 case '?':
5388 return ReportError(CStrVector("Nothing to repeat")); 5398 return ReportError(CStrVector("Nothing to repeat"));
5389 case '^': { 5399 case '^': {
5390 Advance(); 5400 Advance();
5391 if (multiline_) { 5401 if (multiline_) {
5392 builder->AddAssertion( 5402 builder->AddAssertion(new (zone()) RegExpAssertion(
5393 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE)); 5403 RegExpAssertion::START_OF_LINE, state->read_direction()));
5394 } else { 5404 } else {
5395 builder->AddAssertion( 5405 builder->AddAssertion(new (zone()) RegExpAssertion(
5396 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT)); 5406 RegExpAssertion::START_OF_INPUT, state->read_direction()));
5397 set_contains_anchor(); 5407 set_contains_anchor();
5398 } 5408 }
5399 continue; 5409 continue;
5400 } 5410 }
5401 case '$': { 5411 case '$': {
5402 Advance(); 5412 Advance();
5403 RegExpAssertion::AssertionType assertion_type = 5413 RegExpAssertion::AssertionType assertion_type =
5404 multiline_ ? RegExpAssertion::END_OF_LINE : 5414 multiline_ ? RegExpAssertion::END_OF_LINE :
5405 RegExpAssertion::END_OF_INPUT; 5415 RegExpAssertion::END_OF_INPUT;
5406 builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type)); 5416 builder->AddAssertion(new (zone()) RegExpAssertion(
5417 assertion_type, state->read_direction()));
5407 continue; 5418 continue;
5408 } 5419 }
5409 case '.': { 5420 case '.': {
5410 Advance(); 5421 Advance();
5411 // everything except \x0a, \x0d, \u2028 and \u2029 5422 // everything except \x0a, \x0d, \u2028 and \u2029
5412 ZoneList<CharacterRange>* ranges = 5423 ZoneList<CharacterRange>* ranges =
5413 new(zone()) ZoneList<CharacterRange>(2, zone()); 5424 new(zone()) ZoneList<CharacterRange>(2, zone());
5414 CharacterRange::AddClassEscape('.', ranges, zone()); 5425 CharacterRange::AddClassEscape('.', ranges, zone());
5415 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5426 RegExpTree* atom = new (zone())
5427 RegExpCharacterClass(ranges, false, state->read_direction());
5416 builder->AddAtom(atom); 5428 builder->AddAtom(atom);
5417 break; 5429 break;
5418 } 5430 }
5419 case '(': { 5431 case '(': {
5420 SubexpressionType subexpr_type = CAPTURE; 5432 SubexpressionType subexpr_type = CAPTURE;
5433 RegExpTree::ReadDirection read_direction = state->read_direction();
5421 Advance(); 5434 Advance();
5422 if (current() == '?') { 5435 if (current() == '?') {
5423 switch (Next()) { 5436 switch (Next()) {
5424 case ':': 5437 case ':':
5425 subexpr_type = GROUPING; 5438 subexpr_type = GROUPING;
5426 break; 5439 break;
5427 case '=': 5440 case '=':
5441 read_direction = RegExpTree::READ_FORWARD;
5428 subexpr_type = POSITIVE_LOOKAHEAD; 5442 subexpr_type = POSITIVE_LOOKAHEAD;
5429 break; 5443 break;
5430 case '!': 5444 case '!':
5445 read_direction = RegExpTree::READ_FORWARD;
5431 subexpr_type = NEGATIVE_LOOKAHEAD; 5446 subexpr_type = NEGATIVE_LOOKAHEAD;
5432 break; 5447 break;
5448 case '<':
5449 if (FLAG_harmony_regexp_lookbehind) {
5450 Advance();
5451 read_direction = RegExpTree::READ_BACKWARD;
5452 if (Next() == '=') {
5453 subexpr_type = POSITIVE_LOOKAHEAD;
5454 break;
5455 } else if (Next() == '!') {
5456 subexpr_type = NEGATIVE_LOOKAHEAD;
5457 break;
5458 }
5459 }
5460 // Fall through.
5433 default: 5461 default:
5434 ReportError(CStrVector("Invalid group") CHECK_FAILED); 5462 ReportError(CStrVector("Invalid group") CHECK_FAILED);
5435 break; 5463 break;
5436 } 5464 }
5437 Advance(2); 5465 Advance(2);
5438 } else { 5466 } else {
5439 if (captures_ == NULL) { 5467 if (captures_started_ >= kMaxCaptures) {
5440 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
5441 }
5442 if (captures_started() >= kMaxCaptures) {
5443 ReportError(CStrVector("Too many captures") CHECK_FAILED); 5468 ReportError(CStrVector("Too many captures") CHECK_FAILED);
5444 } 5469 }
5445 captures_->Add(NULL, zone()); 5470 captures_started_++;
5446 } 5471 }
5447 // Store current state and begin new disjunction parsing. 5472 // Store current state and begin new disjunction parsing.
5448 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type, 5473 state = new (zone()) RegExpParserState(
5449 captures_started(), zone()); 5474 state, subexpr_type, read_direction, captures_started_, zone());
5450 builder = stored_state->builder(); 5475 builder = state->builder();
5451 continue; 5476 continue;
5452 } 5477 }
5453 case '[': { 5478 case '[': {
5454 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED); 5479 RegExpTree* atom =
5480 ParseCharacterClass(state->read_direction() CHECK_FAILED);
5455 builder->AddAtom(atom); 5481 builder->AddAtom(atom);
5456 break; 5482 break;
5457 } 5483 }
5458 // Atom :: 5484 // Atom ::
5459 // \ AtomEscape 5485 // \ AtomEscape
5460 case '\\': 5486 case '\\':
5461 switch (Next()) { 5487 switch (Next()) {
5462 case kEndMarker: 5488 case kEndMarker:
5463 return ReportError(CStrVector("\\ at end of pattern")); 5489 return ReportError(CStrVector("\\ at end of pattern"));
5464 case 'b': 5490 case 'b':
5465 Advance(2); 5491 Advance(2);
5466 builder->AddAssertion( 5492 builder->AddAssertion(new (zone()) RegExpAssertion(
5467 new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY)); 5493 RegExpAssertion::BOUNDARY, state->read_direction()));
5468 continue; 5494 continue;
5469 case 'B': 5495 case 'B':
5470 Advance(2); 5496 Advance(2);
5471 builder->AddAssertion( 5497 builder->AddAssertion(new (zone()) RegExpAssertion(
5472 new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); 5498 RegExpAssertion::NON_BOUNDARY, state->read_direction()));
5473 continue; 5499 continue;
5474 // AtomEscape :: 5500 // AtomEscape ::
5475 // CharacterClassEscape 5501 // CharacterClassEscape
5476 // 5502 //
5477 // CharacterClassEscape :: one of 5503 // CharacterClassEscape :: one of
5478 // d D s S w W 5504 // d D s S w W
5479 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { 5505 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {
5480 uc32 c = Next(); 5506 uc32 c = Next();
5481 Advance(2); 5507 Advance(2);
5482 ZoneList<CharacterRange>* ranges = 5508 ZoneList<CharacterRange>* ranges =
5483 new(zone()) ZoneList<CharacterRange>(2, zone()); 5509 new(zone()) ZoneList<CharacterRange>(2, zone());
5484 CharacterRange::AddClassEscape(c, ranges, zone()); 5510 CharacterRange::AddClassEscape(c, ranges, zone());
5485 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false); 5511 RegExpTree* atom = new (zone())
5512 RegExpCharacterClass(ranges, false, state->read_direction());
5486 builder->AddAtom(atom); 5513 builder->AddAtom(atom);
5487 break; 5514 break;
5488 } 5515 }
5489 case '1': case '2': case '3': case '4': case '5': case '6': 5516 case '1': case '2': case '3': case '4': case '5': case '6':
5490 case '7': case '8': case '9': { 5517 case '7': case '8': case '9': {
5491 int index = 0; 5518 int index = 0;
5492 if (ParseBackReferenceIndex(&index)) { 5519 if (ParseBackReferenceIndex(&index)) {
5493 RegExpCapture* capture = NULL; 5520 RegExpCapture* capture = GetCapture(index);
5494 if (captures_ != NULL && index <= captures_->length()) { 5521 RegExpTree* atom = new (zone())
5495 capture = captures_->at(index - 1); 5522 RegExpBackReference(capture, state->read_direction());
5496 }
5497 if (capture == NULL) {
5498 builder->AddEmpty();
5499 break;
5500 }
5501 RegExpTree* atom = new(zone()) RegExpBackReference(capture);
5502 builder->AddAtom(atom); 5523 builder->AddAtom(atom);
5503 break; 5524 break;
5504 } 5525 }
5505 uc32 first_digit = Next(); 5526 uc32 first_digit = Next();
5506 if (first_digit == '8' || first_digit == '9') { 5527 if (first_digit == '8' || first_digit == '9') {
5507 // If the 'u' flag is present, only syntax characters can be escaped, 5528 // If the 'u' flag is present, only syntax characters can be escaped,
5508 // no other identity escapes are allowed. If the 'u' flag is not 5529 // no other identity escapes are allowed. If the 'u' flag is not
5509 // present, all identity escapes are allowed. 5530 // present, all identity escapes are allowed.
5510 if (!FLAG_harmony_unicode_regexps || !unicode_) { 5531 if (!FLAG_harmony_unicode_regexps || !unicode_) {
5511 builder->AddCharacter(first_digit); 5532 builder->AddCharacter(first_digit);
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
5685 5706
5686 5707
5687 // In order to know whether an escape is a backreference or not we have to scan 5708 // In order to know whether an escape is a backreference or not we have to scan
5688 // the entire regexp and find the number of capturing parentheses. However we 5709 // the entire regexp and find the number of capturing parentheses. However we
5689 // don't want to scan the regexp twice unless it is necessary. This mini-parser 5710 // don't want to scan the regexp twice unless it is necessary. This mini-parser
5690 // is called when needed. It can see the difference between capturing and 5711 // is called when needed. It can see the difference between capturing and
5691 // noncapturing parentheses and can skip character classes and backslash-escaped 5712 // noncapturing parentheses and can skip character classes and backslash-escaped
5692 // characters. 5713 // characters.
5693 void RegExpParser::ScanForCaptures() { 5714 void RegExpParser::ScanForCaptures() {
5694 // Start with captures started previous to current position 5715 // Start with captures started previous to current position
5695 int capture_count = captures_started(); 5716 int capture_count = captures_started_;
5696 // Add count of captures after this position. 5717 // Add count of captures after this position.
5697 int n; 5718 int n;
5698 while ((n = current()) != kEndMarker) { 5719 while ((n = current()) != kEndMarker) {
5699 Advance(); 5720 Advance();
5700 switch (n) { 5721 switch (n) {
5701 case '\\': 5722 case '\\':
5702 Advance(); 5723 Advance();
5703 break; 5724 break;
5704 case '[': { 5725 case '[': {
5705 int c; 5726 int c;
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
5737 value = 10 * value + (c - '0'); 5758 value = 10 * value + (c - '0');
5738 if (value > kMaxCaptures) { 5759 if (value > kMaxCaptures) {
5739 Reset(start); 5760 Reset(start);
5740 return false; 5761 return false;
5741 } 5762 }
5742 Advance(); 5763 Advance();
5743 } else { 5764 } else {
5744 break; 5765 break;
5745 } 5766 }
5746 } 5767 }
5747 if (value > captures_started()) { 5768 if (value > captures_started_) {
5748 if (!is_scanned_for_captures_) { 5769 if (!is_scanned_for_captures_) {
5749 int saved_position = position(); 5770 int saved_position = position();
5750 ScanForCaptures(); 5771 ScanForCaptures();
5751 Reset(saved_position); 5772 Reset(saved_position);
5752 } 5773 }
5753 if (value > capture_count_) { 5774 if (value > capture_count_) {
5754 Reset(start); 5775 Reset(start);
5755 return false; 5776 return false;
5756 } 5777 }
5757 } 5778 }
5758 *index_out = value; 5779 *index_out = value;
5759 return true; 5780 return true;
5760 } 5781 }
5761 5782
5762 5783
5784 RegExpCapture* RegExpParser::GetCapture(int index) {
5785 // The index for the capture groups are one-based. Its index in the list is
5786 // zero-based.
5787 int know_captures =
5788 is_scanned_for_captures_ ? capture_count_ : captures_started_;
5789 DCHECK(index <= know_captures);
5790 if (captures_ == NULL) {
5791 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
5792 }
5793 while (captures_->length() < know_captures) {
5794 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
5795 }
5796 return captures_->at(index - 1);
5797 }
5798
5799
5763 // QuantifierPrefix :: 5800 // QuantifierPrefix ::
5764 // { DecimalDigits } 5801 // { DecimalDigits }
5765 // { DecimalDigits , } 5802 // { DecimalDigits , }
5766 // { DecimalDigits , DecimalDigits } 5803 // { DecimalDigits , DecimalDigits }
5767 // 5804 //
5768 // Returns true if parsing succeeds, and set the min_out and max_out 5805 // Returns true if parsing succeeds, and set the min_out and max_out
5769 // values. Values are truncated to RegExpTree::kInfinity if they overflow. 5806 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
5770 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { 5807 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
5771 DCHECK_EQ(current(), '{'); 5808 DCHECK_EQ(current(), '{');
5772 int start = position(); 5809 int start = position();
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after
6032 CharacterRange range, 6069 CharacterRange range,
6033 Zone* zone) { 6070 Zone* zone) {
6034 if (char_class != kNoCharClass) { 6071 if (char_class != kNoCharClass) {
6035 CharacterRange::AddClassEscape(char_class, ranges, zone); 6072 CharacterRange::AddClassEscape(char_class, ranges, zone);
6036 } else { 6073 } else {
6037 ranges->Add(range, zone); 6074 ranges->Add(range, zone);
6038 } 6075 }
6039 } 6076 }
6040 6077
6041 6078
6042 RegExpTree* RegExpParser::ParseCharacterClass() { 6079 RegExpTree* RegExpParser::ParseCharacterClass(
6080 RegExpTree::ReadDirection read_direction) {
6043 static const char* kUnterminated = "Unterminated character class"; 6081 static const char* kUnterminated = "Unterminated character class";
6044 static const char* kRangeOutOfOrder = "Range out of order in character class"; 6082 static const char* kRangeOutOfOrder = "Range out of order in character class";
6045 6083
6046 DCHECK_EQ(current(), '['); 6084 DCHECK_EQ(current(), '[');
6047 Advance(); 6085 Advance();
6048 bool is_negated = false; 6086 bool is_negated = false;
6049 if (current() == '^') { 6087 if (current() == '^') {
6050 is_negated = true; 6088 is_negated = true;
6051 Advance(); 6089 Advance();
6052 } 6090 }
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
6084 } 6122 }
6085 } 6123 }
6086 if (!has_more()) { 6124 if (!has_more()) {
6087 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); 6125 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
6088 } 6126 }
6089 Advance(); 6127 Advance();
6090 if (ranges->length() == 0) { 6128 if (ranges->length() == 0) {
6091 ranges->Add(CharacterRange::Everything(), zone()); 6129 ranges->Add(CharacterRange::Everything(), zone());
6092 is_negated = !is_negated; 6130 is_negated = !is_negated;
6093 } 6131 }
6094 return new(zone()) RegExpCharacterClass(ranges, is_negated); 6132 return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction);
6095 } 6133 }
6096 6134
6097 6135
6098 // ---------------------------------------------------------------------------- 6136 // ----------------------------------------------------------------------------
6099 // The Parser interface. 6137 // The Parser interface.
6100 6138
6101 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone, 6139 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
6102 FlatStringReader* input, bool multiline, 6140 FlatStringReader* input, bool multiline,
6103 bool unicode, RegExpCompileData* result) { 6141 bool unicode, RegExpCompileData* result) {
6104 DCHECK(result != NULL); 6142 DCHECK(result != NULL);
(...skipping 348 matching lines...) Expand 10 before | Expand all | Expand 10 after
6453 } 6491 }
6454 6492
6455 6493
6456 void Parser::RaiseLanguageMode(LanguageMode mode) { 6494 void Parser::RaiseLanguageMode(LanguageMode mode) {
6457 SetLanguageMode(scope_, 6495 SetLanguageMode(scope_,
6458 static_cast<LanguageMode>(scope_->language_mode() | mode)); 6496 static_cast<LanguageMode>(scope_->language_mode() | mode));
6459 } 6497 }
6460 6498
6461 } // namespace internal 6499 } // namespace internal
6462 } // namespace v8 6500 } // namespace v8
OLDNEW
« no previous file with comments | « src/parser.h ('k') | src/regexp/bytecodes-irregexp.h » ('j') | src/regexp/jsregexp.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698