src/parser.cc - Issue 1418963009: Experimental support for RegExp lookbehind.

Side by Side Diff: src/parser.cc

Issue 1418963009: Experimental support for RegExp lookbehind. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: fixed test cases Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/parser.h"	5 #include "src/parser.h"

6	6

7 #include "src/api.h"	7 #include "src/api.h"

8 #include "src/ast.h"	8 #include "src/ast.h"

9 #include "src/ast-literal-reindexer.h"	9 #include "src/ast-literal-reindexer.h"

10 #include "src/bailout-reason.h"	10 #include "src/bailout-reason.h"

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
87 set_stack_limit(isolate_->stack_guard()->real_climit());	87 set_stack_limit(isolate_->stack_guard()->real_climit());

88 set_unicode_cache(isolate_->unicode_cache());	88 set_unicode_cache(isolate_->unicode_cache());

89 set_script(script);	89 set_script(script);

90	90

91 if (script->type() == Script::TYPE_NATIVE) {	91 if (script->type() == Script::TYPE_NATIVE) {

92 set_native();	92 set_native();

93 }	93 }

94 }	94 }

95	95

96	96

97 RegExpBuilder::RegExpBuilder(Zone* zone)	97 RegExpBuilder::RegExpBuilder(Zone* zone,

	98 RegExpTree::ReadDirection read_direction)

98 : zone_(zone),	99 : zone_(zone),

99 pending_empty_(false),	100 pending_empty_(false),

100 characters_(NULL),	101 characters_(NULL),

101 terms_(),	102 terms_(),

102 alternatives_()	103 alternatives_(),

	104 read_direction_(read_direction)

103 #ifdef DEBUG	105 #ifdef DEBUG

104 , last_added_(ADD_NONE)	106 , last_added_(ADD_NONE)

105 #endif	107 #endif

106 {}	108 {}

107	109

108	110

109 void RegExpBuilder::FlushCharacters() {	111 void RegExpBuilder::FlushCharacters() {

110 pending_empty_ = false;	112 pending_empty_ = false;

111 if (characters_ != NULL) {	113 if (characters_ != NULL) {

112 RegExpTree* atom = new(zone()) RegExpAtom(characters_->ToConstVector());	114 RegExpTree* atom =

	115 new (zone()) RegExpAtom(characters_->ToConstVector(), read_direction_);

113 characters_ = NULL;	116 characters_ = NULL;

114 text_.Add(atom, zone());	117 text_.Add(atom, zone());

115 LAST(ADD_ATOM);	118 LAST(ADD_ATOM);

116 }	119 }

117 }	120 }

118	121

119	122

120 void RegExpBuilder::FlushText() {	123 void RegExpBuilder::FlushText() {

121 FlushCharacters();	124 FlushCharacters();

122 int num_text = text_.length();	125 int num_text = text_.length();

123 if (num_text == 0) {	126 if (num_text == 0) {

124 return;	127 return;

125 } else if (num_text == 1) {	128 } else if (num_text == 1) {

126 terms_.Add(text_.last(), zone());	129 terms_.Add(text_.last(), zone());

127 } else {	130 } else {

128 RegExpText* text = new(zone()) RegExpText(zone());	131 RegExpText* text = new (zone()) RegExpText(zone(), read_direction_);

129 for (int i = 0; i < num_text; i++)	132 for (int i = 0; i < num_text; i++)

130 text_.Get(i)->AppendToText(text, zone());	133 text_.Get(i)->AppendToText(text, zone());

131 terms_.Add(text, zone());	134 terms_.Add(text, zone());

132 }	135 }

133 text_.Clear();	136 text_.Clear();

134 }	137 }

135	138

136	139

137 void RegExpBuilder::AddCharacter(uc16 c) {	140 void RegExpBuilder::AddCharacter(uc16 c) {

138 pending_empty_ = false;	141 pending_empty_ = false;

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
179	182

180 void RegExpBuilder::FlushTerms() {	183 void RegExpBuilder::FlushTerms() {

181 FlushText();	184 FlushText();

182 int num_terms = terms_.length();	185 int num_terms = terms_.length();

183 RegExpTree* alternative;	186 RegExpTree* alternative;

184 if (num_terms == 0) {	187 if (num_terms == 0) {

185 alternative = new (zone()) RegExpEmpty();	188 alternative = new (zone()) RegExpEmpty();

186 } else if (num_terms == 1) {	189 } else if (num_terms == 1) {

187 alternative = terms_.last();	190 alternative = terms_.last();

188 } else {	191 } else {

189 alternative = new(zone()) RegExpAlternative(terms_.GetList(zone()));	192 alternative =

	193 new (zone()) RegExpAlternative(terms_.GetList(zone()), read_direction_);

190 }	194 }

191 alternatives_.Add(alternative, zone());	195 alternatives_.Add(alternative, zone());

192 terms_.Clear();	196 terms_.Clear();

193 LAST(ADD_NONE);	197 LAST(ADD_NONE);

194 }	198 }

195	199

196	200

197 RegExpTree* RegExpBuilder::ToRegExp() {	201 RegExpTree* RegExpBuilder::ToRegExp() {

198 FlushTerms();	202 FlushTerms();

199 int num_alternatives = alternatives_.length();	203 int num_alternatives = alternatives_.length();

200 if (num_alternatives == 0) return new (zone()) RegExpEmpty();	204 if (num_alternatives == 0) return new (zone()) RegExpEmpty();

201 if (num_alternatives == 1) return alternatives_.last();	205 if (num_alternatives == 1) return alternatives_.last();

202 return new(zone()) RegExpDisjunction(alternatives_.GetList(zone()));	206 return new (zone())

	207 RegExpDisjunction(alternatives_.GetList(zone()), read_direction_);

203 }	208 }

204	209

205	210

206 void RegExpBuilder::AddQuantifierToAtom(	211 void RegExpBuilder::AddQuantifierToAtom(

207 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {	212 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {

208 if (pending_empty_) {	213 if (pending_empty_) {

209 pending_empty_ = false;	214 pending_empty_ = false;

210 return;	215 return;

211 }	216 }

212 RegExpTree* atom;	217 RegExpTree* atom;

213 if (characters_ != NULL) {	218 if (characters_ != NULL) {

214 DCHECK(last_added_ == ADD_CHAR);	219 DCHECK(last_added_ == ADD_CHAR);

215 // Last atom was character.	220 // Last atom was character.

216 Vector<const uc16> char_vector = characters_->ToConstVector();	221 Vector<const uc16> char_vector = characters_->ToConstVector();

217 int num_chars = char_vector.length();	222 int num_chars = char_vector.length();

218 if (num_chars > 1) {	223 if (num_chars > 1) {

219 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);	224 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);

220 text_.Add(new(zone()) RegExpAtom(prefix), zone());	225 text_.Add(new (zone()) RegExpAtom(prefix, read_direction_), zone());

221 char_vector = char_vector.SubVector(num_chars - 1, num_chars);	226 char_vector = char_vector.SubVector(num_chars - 1, num_chars);

222 }	227 }

223 characters_ = NULL;	228 characters_ = NULL;

224 atom = new(zone()) RegExpAtom(char_vector);	229 atom = new (zone()) RegExpAtom(char_vector, read_direction_);

225 FlushText();	230 FlushText();

226 } else if (text_.length() > 0) {	231 } else if (text_.length() > 0) {

227 DCHECK(last_added_ == ADD_ATOM);	232 DCHECK(last_added_ == ADD_ATOM);

228 atom = text_.RemoveLast();	233 atom = text_.RemoveLast();

229 FlushText();	234 FlushText();

230 } else if (terms_.length() > 0) {	235 } else if (terms_.length() > 0) {

231 DCHECK(last_added_ == ADD_ATOM);	236 DCHECK(last_added_ == ADD_ATOM);

232 atom = terms_.RemoveLast();	237 atom = terms_.RemoveLast();

233 if (atom->max_match() == 0) {	238 if (atom->max_match() == 0) {

234 // Guaranteed to only match an empty string.	239 // Guaranteed to only match an empty string.

235 LAST(ADD_TERM);	240 LAST(ADD_TERM);

236 if (min == 0) {	241 if (min == 0) {

237 return;	242 return;

238 }	243 }

239 terms_.Add(atom, zone());	244 terms_.Add(atom, zone());

240 return;	245 return;

241 }	246 }

242 } else {	247 } else {

243 // Only call immediately after adding an atom or character!	248 // Only call immediately after adding an atom or character!

244 UNREACHABLE();	249 UNREACHABLE();

245 return;	250 return;

246 }	251 }

247 terms_.Add(	252 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom,

248 new(zone()) RegExpQuantifier(min, max, quantifier_type, atom), zone());	253 read_direction_),

	254 zone());

249 LAST(ADD_TERM);	255 LAST(ADD_TERM);

250 }	256 }

251	257

252	258

253 FunctionEntry ParseData::GetFunctionEntry(int start) {	259 FunctionEntry ParseData::GetFunctionEntry(int start) {

254 // The current pre-data entry must be a FunctionEntry with the given	260 // The current pre-data entry must be a FunctionEntry with the given

255 // start position.	261 // start position.

256 if ((function_index_ + FunctionEntry::kSize <= Length()) &&	262 if ((function_index_ + FunctionEntry::kSize <= Length()) &&

257 (static_cast<int>(Data()[function_index_]) == start)) {	263 (static_cast<int>(Data()[function_index_]) == start)) {

258 int index = function_index_;	264 int index = function_index_;

(...skipping 4955 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5214 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,	5220 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,

5215 bool multiline, bool unicode, Isolate* isolate,	5221 bool multiline, bool unicode, Isolate* isolate,

5216 Zone* zone)	5222 Zone* zone)

5217 : isolate_(isolate),	5223 : isolate_(isolate),

5218 zone_(zone),	5224 zone_(zone),

5219 error_(error),	5225 error_(error),

5220 captures_(NULL),	5226 captures_(NULL),

5221 in_(in),	5227 in_(in),

5222 current_(kEndMarker),	5228 current_(kEndMarker),

5223 next_pos_(0),	5229 next_pos_(0),

	5230 captures_started_(0),

5224 capture_count_(0),	5231 capture_count_(0),

5225 has_more_(true),	5232 has_more_(true),

5226 multiline_(multiline),	5233 multiline_(multiline),

5227 unicode_(unicode),	5234 unicode_(unicode),

5228 simple_(false),	5235 simple_(false),

5229 contains_anchor_(false),	5236 contains_anchor_(false),

5230 is_scanned_for_captures_(false),	5237 is_scanned_for_captures_(false),

5231 failed_(false) {	5238 failed_(false) {

5232 Advance();	5239 Advance();

5233 }	5240 }

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5295 current_ = kEndMarker;	5302 current_ = kEndMarker;

5296 next_pos_ = in()->length();	5303 next_pos_ = in()->length();

5297 return NULL;	5304 return NULL;

5298 }	5305 }

5299	5306

5300	5307

5301 // Pattern ::	5308 // Pattern ::

5302 // Disjunction	5309 // Disjunction

5303 RegExpTree* RegExpParser::ParsePattern() {	5310 RegExpTree* RegExpParser::ParsePattern() {

5304 RegExpTree* result = ParseDisjunction(CHECK_FAILED);	5311 RegExpTree* result = ParseDisjunction(CHECK_FAILED);

	5312

5305 DCHECK(!has_more());	5313 DCHECK(!has_more());

5306 // If the result of parsing is a literal string atom, and it has the	5314 // If the result of parsing is a literal string atom, and it has the

5307 // same length as the input, then the atom is identical to the input.	5315 // same length as the input, then the atom is identical to the input.

5308 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {	5316 if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {

5309 simple_ = true;	5317 simple_ = true;

5310 }	5318 }

5311 return result;	5319 return result;

5312 }	5320 }

5313	5321

5314	5322

5315 // Disjunction ::	5323 // Disjunction ::

5316 // Alternative	5324 // Alternative

5317 // Alternative \| Disjunction	5325 // Alternative \| Disjunction

5318 // Alternative ::	5326 // Alternative ::

5319 // [empty]	5327 // [empty]

5320 // Term Alternative	5328 // Term Alternative

5321 // Term ::	5329 // Term ::

5322 // Assertion	5330 // Assertion

5323 // Atom	5331 // Atom

5324 // Atom Quantifier	5332 // Atom Quantifier

5325 RegExpTree* RegExpParser::ParseDisjunction() {	5333 RegExpTree* RegExpParser::ParseDisjunction() {

5326 // Used to store current state while parsing subexpressions.	5334 // Used to store current state while parsing subexpressions.

5327 RegExpParserState initial_state(NULL, INITIAL, 0, zone());	5335 RegExpParserState initial_state(NULL, INITIAL, RegExpTree::READ_FORWARD, 0,

5328 RegExpParserState* stored_state = &initial_state;	5336 zone());

	5337 RegExpParserState* state = &initial_state;

5329 // Cache the builder in a local variable for quick access.	5338 // Cache the builder in a local variable for quick access.

5330 RegExpBuilder* builder = initial_state.builder();	5339 RegExpBuilder* builder = initial_state.builder();

5331 while (true) {	5340 while (true) {

5332 switch (current()) {	5341 switch (current()) {

5333 case kEndMarker:	5342 case kEndMarker:

5334 if (stored_state->IsSubexpression()) {	5343 if (state->IsSubexpression()) {

5335 // Inside a parenthesized group when hitting end of input.	5344 // Inside a parenthesized group when hitting end of input.

5336 ReportError(CStrVector("Unterminated group") CHECK_FAILED);	5345 ReportError(CStrVector("Unterminated group") CHECK_FAILED);

5337 }	5346 }

5338 DCHECK_EQ(INITIAL, stored_state->group_type());	5347 DCHECK_EQ(INITIAL, state->group_type());

5339 // Parsing completed successfully.	5348 // Parsing completed successfully.

5340 return builder->ToRegExp();	5349 return builder->ToRegExp();

5341 case ')': {	5350 case ')': {

5342 if (!stored_state->IsSubexpression()) {	5351 if (!state->IsSubexpression()) {

5343 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);	5352 ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);

5344 }	5353 }

5345 DCHECK_NE(INITIAL, stored_state->group_type());	5354 DCHECK_NE(INITIAL, state->group_type());

5346	5355

5347 Advance();	5356 Advance();

5348 // End disjunction parsing and convert builder content to new single	5357 // End disjunction parsing and convert builder content to new single

5349 // regexp atom.	5358 // regexp atom.

5350 RegExpTree* body = builder->ToRegExp();	5359 RegExpTree* body = builder->ToRegExp();

5351	5360

5352 int end_capture_index = captures_started();	5361 int end_capture_index = captures_started_;

5353	5362

5354 int capture_index = stored_state->capture_index();	5363 int capture_index = state->capture_index();

5355 SubexpressionType group_type = stored_state->group_type();	5364 SubexpressionType group_type = state->group_type();

5356

5357 // Restore previous state.

5358 stored_state = stored_state->previous_state();

5359 builder = stored_state->builder();

5360	5365

5361 // Build result of subexpression.	5366 // Build result of subexpression.

5362 if (group_type == CAPTURE) {	5367 if (group_type == CAPTURE) {

5363 RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);	5368 RegExpCapture* capture = GetCapture(capture_index);

5364 captures_->at(capture_index - 1) = capture;	5369 capture->set_body(body);

	5370 capture->set_read_direction(state->read_direction());

5365 body = capture;	5371 body = capture;

5366 } else if (group_type != GROUPING) {	5372 } else if (group_type != GROUPING) {

5367 DCHECK(group_type == POSITIVE_LOOKAHEAD \|\|	5373 DCHECK(group_type == POSITIVE_LOOKAHEAD \|\|

5368 group_type == NEGATIVE_LOOKAHEAD);	5374 group_type == NEGATIVE_LOOKAHEAD);

5369 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);	5375 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);

5370 body = new(zone()) RegExpLookahead(body,	5376 body = new (zone()) RegExpLookaround(

5371 is_positive,	5377 body, is_positive, end_capture_index - capture_index, capture_index,

5372 end_capture_index - capture_index,	5378 state->read_direction());

5373 capture_index);

5374 }	5379 }

	5380

	5381 // Restore previous state.

	5382 state = state->previous_state();

	5383 builder = state->builder();

	5384

5375 builder->AddAtom(body);	5385 builder->AddAtom(body);

5376 // For compatability with JSC and ES3, we allow quantifiers after	5386 // For compatability with JSC and ES3, we allow quantifiers after

5377 // lookaheads, and break in all cases.	5387 // lookaheads, and break in all cases.

5378 break;	5388 break;

5379 }	5389 }

5380 case '\|': {	5390 case '\|': {

5381 Advance();	5391 Advance();

5382 builder->NewAlternative();	5392 builder->NewAlternative();

5383 continue;	5393 continue;

5384 }	5394 }

5385 case '*':	5395 case '*':

5386 case '+':	5396 case '+':

5387 case '?':	5397 case '?':

5388 return ReportError(CStrVector("Nothing to repeat"));	5398 return ReportError(CStrVector("Nothing to repeat"));

5389 case '^': {	5399 case '^': {

5390 Advance();	5400 Advance();

5391 if (multiline_) {	5401 if (multiline_) {

5392 builder->AddAssertion(	5402 builder->AddAssertion(new (zone()) RegExpAssertion(

5393 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));	5403 RegExpAssertion::START_OF_LINE, state->read_direction()));

5394 } else {	5404 } else {

5395 builder->AddAssertion(	5405 builder->AddAssertion(new (zone()) RegExpAssertion(

5396 new(zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT));	5406 RegExpAssertion::START_OF_INPUT, state->read_direction()));

5397 set_contains_anchor();	5407 set_contains_anchor();

5398 }	5408 }

5399 continue;	5409 continue;

5400 }	5410 }

5401 case '$': {	5411 case '$': {

5402 Advance();	5412 Advance();

5403 RegExpAssertion::AssertionType assertion_type =	5413 RegExpAssertion::AssertionType assertion_type =

5404 multiline_ ? RegExpAssertion::END_OF_LINE :	5414 multiline_ ? RegExpAssertion::END_OF_LINE :

5405 RegExpAssertion::END_OF_INPUT;	5415 RegExpAssertion::END_OF_INPUT;

5406 builder->AddAssertion(new(zone()) RegExpAssertion(assertion_type));	5416 builder->AddAssertion(new (zone()) RegExpAssertion(

	5417 assertion_type, state->read_direction()));

5407 continue;	5418 continue;

5408 }	5419 }

5409 case '.': {	5420 case '.': {

5410 Advance();	5421 Advance();

5411 // everything except \x0a, \x0d, \u2028 and \u2029	5422 // everything except \x0a, \x0d, \u2028 and \u2029

5412 ZoneList<CharacterRange>* ranges =	5423 ZoneList<CharacterRange>* ranges =

5413 new(zone()) ZoneList<CharacterRange>(2, zone());	5424 new(zone()) ZoneList<CharacterRange>(2, zone());

5414 CharacterRange::AddClassEscape('.', ranges, zone());	5425 CharacterRange::AddClassEscape('.', ranges, zone());

5415 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);	5426 RegExpTree* atom = new (zone())

	5427 RegExpCharacterClass(ranges, false, state->read_direction());

5416 builder->AddAtom(atom);	5428 builder->AddAtom(atom);

5417 break;	5429 break;

5418 }	5430 }

5419 case '(': {	5431 case '(': {

5420 SubexpressionType subexpr_type = CAPTURE;	5432 SubexpressionType subexpr_type = CAPTURE;

	5433 RegExpTree::ReadDirection read_direction = state->read_direction();

5421 Advance();	5434 Advance();

5422 if (current() == '?') {	5435 if (current() == '?') {

5423 switch (Next()) {	5436 switch (Next()) {

5424 case ':':	5437 case ':':

5425 subexpr_type = GROUPING;	5438 subexpr_type = GROUPING;

5426 break;	5439 break;

5427 case '=':	5440 case '=':

	5441 read_direction = RegExpTree::READ_FORWARD;

5428 subexpr_type = POSITIVE_LOOKAHEAD;	5442 subexpr_type = POSITIVE_LOOKAHEAD;

5429 break;	5443 break;

5430 case '!':	5444 case '!':

	5445 read_direction = RegExpTree::READ_FORWARD;

5431 subexpr_type = NEGATIVE_LOOKAHEAD;	5446 subexpr_type = NEGATIVE_LOOKAHEAD;

5432 break;	5447 break;

	5448 case '<':

	5449 if (FLAG_harmony_regexp_lookbehind) {

	5450 Advance();

	5451 read_direction = RegExpTree::READ_BACKWARD;

	5452 if (Next() == '=') {

	5453 subexpr_type = POSITIVE_LOOKAHEAD;

	5454 break;

	5455 } else if (Next() == '!') {

	5456 subexpr_type = NEGATIVE_LOOKAHEAD;

	5457 break;

	5458 }

	5459 }

	5460 // Fall through.

5433 default:	5461 default:

5434 ReportError(CStrVector("Invalid group") CHECK_FAILED);	5462 ReportError(CStrVector("Invalid group") CHECK_FAILED);

5435 break;	5463 break;

5436 }	5464 }

5437 Advance(2);	5465 Advance(2);

5438 } else {	5466 } else {

5439 if (captures_ == NULL) {	5467 if (captures_started_ >= kMaxCaptures) {

5440 captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());

5441 }

5442 if (captures_started() >= kMaxCaptures) {

5443 ReportError(CStrVector("Too many captures") CHECK_FAILED);	5468 ReportError(CStrVector("Too many captures") CHECK_FAILED);

5444 }	5469 }

5445 captures_->Add(NULL, zone());	5470 captures_started_++;

5446 }	5471 }

5447 // Store current state and begin new disjunction parsing.	5472 // Store current state and begin new disjunction parsing.

5448 stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,	5473 state = new (zone()) RegExpParserState(

5449 captures_started(), zone());	5474 state, subexpr_type, read_direction, captures_started_, zone());

5450 builder = stored_state->builder();	5475 builder = state->builder();

5451 continue;	5476 continue;

5452 }	5477 }

5453 case '[': {	5478 case '[': {

5454 RegExpTree* atom = ParseCharacterClass(CHECK_FAILED);	5479 RegExpTree* atom =

	5480 ParseCharacterClass(state->read_direction() CHECK_FAILED);

5455 builder->AddAtom(atom);	5481 builder->AddAtom(atom);

5456 break;	5482 break;

5457 }	5483 }

5458 // Atom ::	5484 // Atom ::

5459 // \ AtomEscape	5485 // \ AtomEscape

5460 case '\\':	5486 case '\\':

5461 switch (Next()) {	5487 switch (Next()) {

5462 case kEndMarker:	5488 case kEndMarker:

5463 return ReportError(CStrVector("\\ at end of pattern"));	5489 return ReportError(CStrVector("\\ at end of pattern"));

5464 case 'b':	5490 case 'b':

5465 Advance(2);	5491 Advance(2);

5466 builder->AddAssertion(	5492 builder->AddAssertion(new (zone()) RegExpAssertion(

5467 new(zone()) RegExpAssertion(RegExpAssertion::BOUNDARY));	5493 RegExpAssertion::BOUNDARY, state->read_direction()));

5468 continue;	5494 continue;

5469 case 'B':	5495 case 'B':

5470 Advance(2);	5496 Advance(2);

5471 builder->AddAssertion(	5497 builder->AddAssertion(new (zone()) RegExpAssertion(

5472 new(zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY));	5498 RegExpAssertion::NON_BOUNDARY, state->read_direction()));

5473 continue;	5499 continue;

5474 // AtomEscape ::	5500 // AtomEscape ::

5475 // CharacterClassEscape	5501 // CharacterClassEscape

5476 //	5502 //

5477 // CharacterClassEscape :: one of	5503 // CharacterClassEscape :: one of

5478 // d D s S w W	5504 // d D s S w W

5479 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {	5505 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

5480 uc32 c = Next();	5506 uc32 c = Next();

5481 Advance(2);	5507 Advance(2);

5482 ZoneList<CharacterRange>* ranges =	5508 ZoneList<CharacterRange>* ranges =

5483 new(zone()) ZoneList<CharacterRange>(2, zone());	5509 new(zone()) ZoneList<CharacterRange>(2, zone());

5484 CharacterRange::AddClassEscape(c, ranges, zone());	5510 CharacterRange::AddClassEscape(c, ranges, zone());

5485 RegExpTree* atom = new(zone()) RegExpCharacterClass(ranges, false);	5511 RegExpTree* atom = new (zone())

	5512 RegExpCharacterClass(ranges, false, state->read_direction());

5486 builder->AddAtom(atom);	5513 builder->AddAtom(atom);

5487 break;	5514 break;

5488 }	5515 }

5489 case '1': case '2': case '3': case '4': case '5': case '6':	5516 case '1': case '2': case '3': case '4': case '5': case '6':

5490 case '7': case '8': case '9': {	5517 case '7': case '8': case '9': {

5491 int index = 0;	5518 int index = 0;

5492 if (ParseBackReferenceIndex(&index)) {	5519 if (ParseBackReferenceIndex(&index)) {

5493 RegExpCapture* capture = NULL;	5520 RegExpCapture* capture = GetCapture(index);

5494 if (captures_ != NULL && index <= captures_->length()) {	5521 RegExpTree* atom = new (zone())

5495 capture = captures_->at(index - 1);	5522 RegExpBackReference(capture, state->read_direction());

5496 }

5497 if (capture == NULL) {

5498 builder->AddEmpty();

5499 break;

5500 }

5501 RegExpTree* atom = new(zone()) RegExpBackReference(capture);

5502 builder->AddAtom(atom);	5523 builder->AddAtom(atom);

5503 break;	5524 break;

5504 }	5525 }

5505 uc32 first_digit = Next();	5526 uc32 first_digit = Next();

5506 if (first_digit == '8' \|\| first_digit == '9') {	5527 if (first_digit == '8' \|\| first_digit == '9') {

5507 // If the 'u' flag is present, only syntax characters can be escaped,	5528 // If the 'u' flag is present, only syntax characters can be escaped,

5508 // no other identity escapes are allowed. If the 'u' flag is not	5529 // no other identity escapes are allowed. If the 'u' flag is not

5509 // present, all identity escapes are allowed.	5530 // present, all identity escapes are allowed.

5510 if (!FLAG_harmony_unicode_regexps \|\| !unicode_) {	5531 if (!FLAG_harmony_unicode_regexps \|\| !unicode_) {

5511 builder->AddCharacter(first_digit);	5532 builder->AddCharacter(first_digit);

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5685	5706

5686	5707

5687 // In order to know whether an escape is a backreference or not we have to scan	5708 // In order to know whether an escape is a backreference or not we have to scan

5688 // the entire regexp and find the number of capturing parentheses. However we	5709 // the entire regexp and find the number of capturing parentheses. However we

5689 // don't want to scan the regexp twice unless it is necessary. This mini-parser	5710 // don't want to scan the regexp twice unless it is necessary. This mini-parser

5690 // is called when needed. It can see the difference between capturing and	5711 // is called when needed. It can see the difference between capturing and

5691 // noncapturing parentheses and can skip character classes and backslash-escaped	5712 // noncapturing parentheses and can skip character classes and backslash-escaped

5692 // characters.	5713 // characters.

5693 void RegExpParser::ScanForCaptures() {	5714 void RegExpParser::ScanForCaptures() {

5694 // Start with captures started previous to current position	5715 // Start with captures started previous to current position

5695 int capture_count = captures_started();	5716 int capture_count = captures_started_;

5696 // Add count of captures after this position.	5717 // Add count of captures after this position.

5697 int n;	5718 int n;

5698 while ((n = current()) != kEndMarker) {	5719 while ((n = current()) != kEndMarker) {

5699 Advance();	5720 Advance();

5700 switch (n) {	5721 switch (n) {

5701 case '\\':	5722 case '\\':

5702 Advance();	5723 Advance();

5703 break;	5724 break;

5704 case '[': {	5725 case '[': {

5705 int c;	5726 int c;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5737 value = 10 * value + (c - '0');	5758 value = 10 * value + (c - '0');

5738 if (value > kMaxCaptures) {	5759 if (value > kMaxCaptures) {

5739 Reset(start);	5760 Reset(start);

5740 return false;	5761 return false;

5741 }	5762 }

5742 Advance();	5763 Advance();

5743 } else {	5764 } else {

5744 break;	5765 break;

5745 }	5766 }

5746 }	5767 }

5747 if (value > captures_started()) {	5768 if (value > captures_started_) {

5748 if (!is_scanned_for_captures_) {	5769 if (!is_scanned_for_captures_) {

5749 int saved_position = position();	5770 int saved_position = position();

5750 ScanForCaptures();	5771 ScanForCaptures();

5751 Reset(saved_position);	5772 Reset(saved_position);

5752 }	5773 }

5753 if (value > capture_count_) {	5774 if (value > capture_count_) {

5754 Reset(start);	5775 Reset(start);

5755 return false;	5776 return false;

5756 }	5777 }

5757 }	5778 }

5758 *index_out = value;	5779 *index_out = value;

5759 return true;	5780 return true;

5760 }	5781 }

5761	5782

5762	5783

	5784 RegExpCapture* RegExpParser::GetCapture(int index) {

	5785 // The index for the capture groups are one-based. Its index in the list is

	5786 // zero-based.

	5787 int know_captures =

	5788 is_scanned_for_captures_ ? capture_count_ : captures_started_;

	5789 DCHECK(index <= know_captures);

	5790 if (captures_ == NULL) {

	5791 captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());

	5792 }

	5793 while (captures_->length() < know_captures) {

	5794 captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());

	5795 }

	5796 return captures_->at(index - 1);

	5797 }

	5798

	5799

5763 // QuantifierPrefix ::	5800 // QuantifierPrefix ::

5764 // { DecimalDigits }	5801 // { DecimalDigits }

5765 // { DecimalDigits , }	5802 // { DecimalDigits , }

5766 // { DecimalDigits , DecimalDigits }	5803 // { DecimalDigits , DecimalDigits }

5767 //	5804 //

5768 // Returns true if parsing succeeds, and set the min_out and max_out	5805 // Returns true if parsing succeeds, and set the min_out and max_out

5769 // values. Values are truncated to RegExpTree::kInfinity if they overflow.	5806 // values. Values are truncated to RegExpTree::kInfinity if they overflow.

5770 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {	5807 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {

5771 DCHECK_EQ(current(), '{');	5808 DCHECK_EQ(current(), '{');

5772 int start = position();	5809 int start = position();

(...skipping 259 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6032 CharacterRange range,	6069 CharacterRange range,

6033 Zone* zone) {	6070 Zone* zone) {

6034 if (char_class != kNoCharClass) {	6071 if (char_class != kNoCharClass) {

6035 CharacterRange::AddClassEscape(char_class, ranges, zone);	6072 CharacterRange::AddClassEscape(char_class, ranges, zone);

6036 } else {	6073 } else {

6037 ranges->Add(range, zone);	6074 ranges->Add(range, zone);

6038 }	6075 }

6039 }	6076 }

6040	6077

6041	6078

6042 RegExpTree* RegExpParser::ParseCharacterClass() {	6079 RegExpTree* RegExpParser::ParseCharacterClass(

	6080 RegExpTree::ReadDirection read_direction) {

6043 static const char* kUnterminated = "Unterminated character class";	6081 static const char* kUnterminated = "Unterminated character class";

6044 static const char* kRangeOutOfOrder = "Range out of order in character class";	6082 static const char* kRangeOutOfOrder = "Range out of order in character class";

6045	6083

6046 DCHECK_EQ(current(), '[');	6084 DCHECK_EQ(current(), '[');

6047 Advance();	6085 Advance();

6048 bool is_negated = false;	6086 bool is_negated = false;

6049 if (current() == '^') {	6087 if (current() == '^') {

6050 is_negated = true;	6088 is_negated = true;

6051 Advance();	6089 Advance();

6052 }	6090 }

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6084 }	6122 }

6085 }	6123 }

6086 if (!has_more()) {	6124 if (!has_more()) {

6087 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);	6125 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);

6088 }	6126 }

6089 Advance();	6127 Advance();

6090 if (ranges->length() == 0) {	6128 if (ranges->length() == 0) {

6091 ranges->Add(CharacterRange::Everything(), zone());	6129 ranges->Add(CharacterRange::Everything(), zone());

6092 is_negated = !is_negated;	6130 is_negated = !is_negated;

6093 }	6131 }

6094 return new(zone()) RegExpCharacterClass(ranges, is_negated);	6132 return new (zone()) RegExpCharacterClass(ranges, is_negated, read_direction);

6095 }	6133 }

6096	6134

6097	6135

6098 // ----------------------------------------------------------------------------	6136 // ----------------------------------------------------------------------------

6099 // The Parser interface.	6137 // The Parser interface.

6100	6138

6101 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,	6139 bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,

6102 FlatStringReader* input, bool multiline,	6140 FlatStringReader* input, bool multiline,

6103 bool unicode, RegExpCompileData* result) {	6141 bool unicode, RegExpCompileData* result) {

6104 DCHECK(result != NULL);	6142 DCHECK(result != NULL);

(...skipping 348 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6453 }	6491 }

6454	6492

6455	6493

6456 void Parser::RaiseLanguageMode(LanguageMode mode) {	6494 void Parser::RaiseLanguageMode(LanguageMode mode) {

6457 SetLanguageMode(scope_,	6495 SetLanguageMode(scope_,

6458 static_cast<LanguageMode>(scope_->language_mode() \| mode));	6496 static_cast<LanguageMode>(scope_->language_mode() \| mode));

6459 }	6497 }

6460	6498

6461 } // namespace internal	6499 } // namespace internal

6462 } // namespace v8	6500 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parser.h ('k') | src/regexp/bytecodes-irregexp.h » ('j') | src/regexp/jsregexp.cc » ('J')