regexp2000/src/parser.cc - Issue 9110: Experimental: Fixed bug in RegExp Parser. Added feature counting in parser.

Side by Side Diff: regexp2000/src/parser.cc

Issue 9110: Experimental: Fixed bug in RegExp Parser. Added feature counting in parser. (Closed)

Patch Set: Merged changes to tip of experimental branch. Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
221 Handle<String> type,	221 Handle<String> type,

222 Vector< Handle<Object> > arguments);	222 Vector< Handle<Object> > arguments);

223	223

224 friend class Target;	224 friend class Target;

225 friend class TargetScope;	225 friend class TargetScope;

226 friend class LexicalScope;	226 friend class LexicalScope;

227 friend class TemporaryScope;	227 friend class TemporaryScope;

228 };	228 };

229	229

230	230

	231 template <typename T, int initial_size>

	232 class BufferedZoneList {

	233 public:

	234

	235 BufferedZoneList() :

	236 list_(NULL), last_(NULL) {}

	237

	238 // Adds element at end of list. This element is buffered and can

	239 // be read using last() or removed using RemoveLast until a new Add or until

	240 // RemoveLast or GetList has been called.

	241 void Add(T* value) {

	242 if (last_ != NULL) {

	243 if (list_ == NULL) {

	244 list_ = new ZoneList<T*>(initial_size);

	245 }

	246 list_->Add(last_);

	247 }

	248 last_ = value;

	249 }

	250

	251 T* last() {

	252 ASSERT(last_ != NULL);

	253 return last_;

	254 }

	255

	256 T* RemoveLast() {

	257 ASSERT(last_ != NULL);

	258 T* result = last_;

	259 last_ = NULL;

	260 return result;

	261 }

	262

	263 void Clear() {

	264 list_ = NULL;

	265 last_ = NULL;

	266 }

	267

	268 int length() {

	269 int length = (list_ == NULL) ? 0 : list_->length();

	270 return length + ((last_ == NULL) ? 0 : 1);

	271 }

	272

	273 ZoneList<T> GetList() {

	274 if (list_ == NULL) {

	275 list_ = new ZoneList<T*>(initial_size);

	276 }

	277 if (last_ != NULL) {

	278 list_->Add(last_);

	279 last_ = NULL;

	280 }

	281 return list_;

	282 }

	283

	284 private:

	285 ZoneList<T> list_;

	286 T* last_;

	287 };

	288

	289 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.

	290 class RegExpBuilder {

	291 public:

	292 RegExpBuilder();

	293 void AddCharacter(uc16 character);

	294 void AddAtom(RegExpTree* tree);

	295 void AddAssertion(RegExpTree* tree);

	296 void NewAlternative(); // '\|'

	297 void AddQuantifierToAtom(int min, int max, bool is_greedy);

	298 RegExpTree* ToRegExp();

	299 private:

	300 void FlushCharacters();

	301 bool FlushTerms();

	302 ZoneList<uc16>* characters_;

	303 BufferedZoneList<RegExpTree, 2> terms_;

	304 BufferedZoneList<RegExpTree, 2> alternatives_;

	305 #ifdef DEBUG

	306 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;

	307 #define LAST(x) last_added_ = x;

	308 #else

	309 #define LAST(x)

	310 #endif

	311 };

	312

	313

	314 RegExpBuilder::RegExpBuilder() : characters_(NULL), terms_(), alternatives_()

	315 #ifdef DEBUG

	316 , last_added_(ADD_NONE)

	317 #endif

	318 {}

	319

	320

	321 void RegExpBuilder::FlushCharacters() {

	322 if (characters_ != NULL) {

	323 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector());

	324 characters_ = NULL;

	325 terms_.Add(atom);

	326 LAST(ADD_ATOM);

	327 }

	328 }

	329

	330

	331 void RegExpBuilder::AddCharacter(uc16 c) {

	332 if (characters_ == NULL) {

	333 characters_ = new ZoneList<uc16>(4);

	334 }

	335 characters_->Add(c);

	336 LAST(ADD_CHAR);

	337 }

	338

	339

	340 void RegExpBuilder::AddAtom(RegExpTree* atom) {

	341 FlushCharacters();

	342 terms_.Add(atom);

	343 LAST(ADD_ATOM);

	344 }

	345

	346

	347 void RegExpBuilder::AddAssertion(RegExpTree* assert) {

	348 FlushCharacters();

	349 terms_.Add(assert);

	350 LAST(ADD_ASSERT);

	351 }

	352

	353

	354 void RegExpBuilder::NewAlternative() {

	355 if (!FlushTerms()) {

	356 alternatives_.Add(RegExpEmpty::GetInstance());

	357 }

	358 }

	359

	360

	361 bool RegExpBuilder::FlushTerms() {

	362 FlushCharacters();

	363 int num_terms = terms_.length();

	364 if (num_terms == 0) {

	365 return false;

	366 }

	367 RegExpTree* alternative;

	368 if (num_terms == 1) {

	369 alternative = terms_.last();

	370 } else {

	371 alternative = new RegExpAlternative(terms_.GetList());

	372 }

	373 alternatives_.Add(alternative);

	374 terms_.Clear();

	375 LAST(ADD_NONE);

	376 return true;

	377 }

	378

	379

	380 RegExpTree* RegExpBuilder::ToRegExp() {

	381 FlushTerms();

	382 int num_alternatives = alternatives_.length();

	383 if (num_alternatives == 0) {

	384 return RegExpEmpty::GetInstance();

	385 }

	386 if (num_alternatives == 1) {

	387 return alternatives_.last();

	388 }

	389 return new RegExpDisjunction(alternatives_.GetList());

	390 }

	391

	392

	393 void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) {

	394 RegExpTree* atom;

	395 if (characters_ != NULL) {

	396 ASSERT(last_added_ == ADD_CHAR);

	397 // Last atom was character.

	398 Vector<const uc16> char_vector = characters_->ToConstVector();

	399 int num_chars = char_vector.length();

	400 if (num_chars > 1) {

	401 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);

	402 terms_.Add(new RegExpAtom(prefix));

	403 char_vector = char_vector.SubVector(num_chars - 1, num_chars);

	404 }

	405 characters_ = NULL;

	406 atom = new RegExpAtom(char_vector);

	407 } else if (terms_.length() > 0) {

	408 ASSERT(last_added_ == ADD_ATOM);

	409 atom = terms_.RemoveLast();

	410 } else {

	411 // Only call immediately after adding an atom or character!

	412 UNREACHABLE();

	413 return;

	414 }

	415 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom));

	416 LAST(ADD_TERM);

	417 }

	418

	419

231 class RegExpParser {	420 class RegExpParser {

232 public:	421 public:

233 RegExpParser(unibrow::CharacterStream* in,	422 RegExpParser(unibrow::CharacterStream* in,

234 Handle<String>* error,	423 Handle<String>* error,

235 bool multiline_mode);	424 bool multiline_mode);

236 RegExpTree* ParsePattern(bool* ok);	425 RegExpTree* ParsePattern(bool* ok);

237 RegExpTree* ParseDisjunction(bool* ok);	426 RegExpTree* ParseDisjunction(bool* ok);

238 RegExpTree* ParseAlternative(bool* ok);

239 RegExpTree* ParseTerm(bool* ok);

240 RegExpTree* ParseAtom(bool* ok);

241 RegExpTree* ParseGroup(bool* ok);	427 RegExpTree* ParseGroup(bool* ok);

242 RegExpTree* ParseCharacterClass(bool* ok);	428 RegExpTree* ParseCharacterClass(bool* ok);

243	429

244 // Parses a {...,...} quantifier and stores the range in the given	430 // Parses a {...,...} quantifier and stores the range in the given

245 // out parameters.	431 // out parameters.

246 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);	432 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);

247	433

248 // Parses and returns a single escaped character. The character	434 // Parses and returns a single escaped character. The character

249 // must not be 'b' or 'B' since they are usually handle specially.	435 // must not be 'b' or 'B' since they are usually handle specially.

250 uc32 ParseCharacterEscape(bool* ok);	436 uc32 ParseClassCharacterEscape(bool* ok);

251	437

252 // Checks whether the following is a length-digit hexadecimal number,	438 // Checks whether the following is a length-digit hexadecimal number,

253 // and sets the value if it is.	439 // and sets the value if it is.

254 bool ParseHexEscape(int length, uc32* value);	440 bool ParseHexEscape(int length, uc32* value);

255	441

256 uc32 ParseControlEscape(bool* ok);	442 uc32 ParseControlLetterEscape(bool* ok);

257 uc32 ParseOctalLiteral(bool* ok);	443 uc32 ParseOctalLiteral();

258	444

259 // Tries to parse the input as a backreference. If successful it	445 // Tries to parse the input as a backreference. If successful it

260 // stores the result in the output parameter and returns true. If	446 // stores the result in the output parameter and returns true. If

261 // it fails it will push back the characters read so the same characters	447 // it fails it will push back the characters read so the same characters

262 // can be reparsed.	448 // can be reparsed.

263 bool ParseBackreferenceIndex(int* index_out);	449 bool ParseBackreferenceIndex(int* index_out);

264	450

265 CharacterRange ParseClassAtom(bool* is_char_class,	451 CharacterRange ParseClassAtom(bool* is_char_class,

266 ZoneList<CharacterRange>* ranges,	452 ZoneList<CharacterRange>* ranges,

267 bool* ok);	453 bool* ok);

268 RegExpTree* ReportError(Vector<const char> message, bool* ok);	454 RegExpTree* ReportError(Vector<const char> message, bool* ok);

269 void Advance();	455 void Advance();

270 void Advance(int dist);	456 void Advance(int dist);

271 // Pushes a read character (or potentially some other character) back	457 // Pushes a read character (or potentially some other character) back

272 // on the input stream. After pushing it back, it becomes the character	458 // on the input stream. After pushing it back, it becomes the character

273 // returned by current(). There is a limited amount of push-back buffer.	459 // returned by current(). There is a limited amount of push-back buffer.

274 // A function using PushBack should check that it doesn't push back more	460 // A function using PushBack should check that it doesn't push back more

275 // than kMaxPushback characters, and it should not push back more characters	461 // than kMaxPushback characters, and it should not push back more characters

276 // than it has read, or that it knows had been read prior to calling it.	462 // than it has read.

277 void PushBack(uc32 character);	463 void PushBack(uc32 character);

278 bool CanPushBack();	464 bool CanPushBack();

	465

	466 bool HasCharacterEscapes();

	467

279 static const uc32 kEndMarker = unibrow::Utf8::kBadChar;	468 static const uc32 kEndMarker = unibrow::Utf8::kBadChar;

280 private:	469 private:

281 uc32 current() { return current_; }	470 uc32 current() { return current_; }

282 uc32 next() { return next_; }	471 uc32 next() { return next_; }

283 bool has_more() { return has_more_; }	472 bool has_more() { return has_more_; }

284 bool has_next() { return has_next_; }	473 bool has_next() { return has_next_; }

285 unibrow::CharacterStream* in() { return in_; }	474 unibrow::CharacterStream* in() { return in_; }

286 uc32 current_;	475 uc32 current_;

287 uc32 next_;	476 uc32 next_;

288 bool has_more_;	477 bool has_more_;

289 bool has_next_;	478 bool has_next_;

290 bool multiline_mode_;	479 bool multiline_mode_;

291 int captures_seen_;	480 int captures_started_;

292 unibrow::CharacterStream* in_;	481 unibrow::CharacterStream* in_;

293 Handle<String>* error_;	482 Handle<String>* error_;

294 static const int kMaxPushback = 5;	483 static const int kMaxPushback = 5;

295 int pushback_count_;	484 int pushback_count_;

296 uc32 pushback_buffer_[kMaxPushback];	485 uc32 pushback_buffer_[kMaxPushback];

	486 bool has_character_escapes_;

297 };	487 };

298	488

299	489

300 // A temporary scope stores information during parsing, just like	490 // A temporary scope stores information during parsing, just like

301 // a plain scope. However, temporary scopes are not kept around	491 // a plain scope. However, temporary scopes are not kept around

302 // after parsing or referenced by syntax trees so they can be stack-	492 // after parsing or referenced by syntax trees so they can be stack-

303 // allocated and hence used by the pre-parser.	493 // allocated and hence used by the pre-parser.

304 class TemporaryScope BASE_EMBEDDED {	494 class TemporaryScope BASE_EMBEDDED {

305 public:	495 public:

306 explicit TemporaryScope(Parser* parser);	496 explicit TemporaryScope(Parser* parser);

(...skipping 2931 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3238	3428

3239	3429

3240 RegExpParser::RegExpParser(unibrow::CharacterStream* in,	3430 RegExpParser::RegExpParser(unibrow::CharacterStream* in,

3241 Handle<String>* error,	3431 Handle<String>* error,

3242 bool multiline_mode)	3432 bool multiline_mode)

3243 : current_(kEndMarker),	3433 : current_(kEndMarker),

3244 next_(kEndMarker),	3434 next_(kEndMarker),

3245 has_more_(true),	3435 has_more_(true),

3246 has_next_(true),	3436 has_next_(true),

3247 multiline_mode_(multiline_mode),	3437 multiline_mode_(multiline_mode),

3248 captures_seen_(0),	3438 captures_started_(0),

3249 in_(in),	3439 in_(in),

3250 error_(error),	3440 error_(error),

3251 pushback_count_(0) {	3441 pushback_count_(0),

	3442 has_character_escapes_(false) {

3252 Advance(2);	3443 Advance(2);

3253 }	3444 }

3254	3445

3255	3446

3256 void RegExpParser::Advance() {	3447 void RegExpParser::Advance() {

3257 current_ = next_;	3448 current_ = next_;

3258 has_more_ = has_next_;	3449 has_more_ = has_next_;

3259 if (pushback_count_ > 0) {	3450 if (pushback_count_ > 0) {

3260 pushback_count_--;	3451 pushback_count_--;

3261 next_ = pushback_buffer_[pushback_count_];	3452 next_ = pushback_buffer_[pushback_count_];

3262 has_next_ = true;

3263 } else if (in()->has_more()) {	3453 } else if (in()->has_more()) {

3264 next_ = in()->GetNext();	3454 next_ = in()->GetNext();

3265 } else {	3455 } else {

3266 next_ = kEndMarker;	3456 next_ = kEndMarker;

3267 has_next_ = false;	3457 has_next_ = false;

3268 }	3458 }

3269 }	3459 }

3270	3460

3271	3461

3272 void RegExpParser::Advance(int dist) {	3462 void RegExpParser::Advance(int dist) {

3273 for (int i = 0; i < dist; i++)	3463 for (int i = 0; i < dist; i++)

3274 Advance();	3464 Advance();

3275 }	3465 }

3276	3466

3277	3467

3278 void RegExpParser::PushBack(uc32 character) {	3468 void RegExpParser::PushBack(uc32 character) {

3279 if (has_next_) {	3469 if (has_next_) {

3280 ASSERT(pushback_count_ < kMaxPushback);	3470 ASSERT(pushback_count_ < kMaxPushback);

3281 pushback_buffer_[pushback_count_] = next_;	3471 pushback_buffer_[pushback_count_] = next_;

3282 pushback_count_++;	3472 pushback_count_++;

3283 }	3473 }

3284 if (has_more_) {	3474

3285 next_ = current_;	3475 next_ = current_;

3286 has_next_ = true;	3476 has_next_ = has_more_;

3287 }	3477

3288 current_ = character;	3478 current_ = character;

3289 has_more_ = true;	3479 has_more_ = true;

3290 }	3480 }

3291	3481

3292	3482

3293 bool RegExpParser::CanPushBack() {	3483 bool RegExpParser::CanPushBack() {

3294 return (pushback_count_ < kMaxPushback);	3484 return (pushback_count_ < kMaxPushback);

3295 }	3485 }

3296	3486

	3487 // Reports whether the parsed string atoms contain any characters that were

	3488 // escaped in the original pattern. If not, all atoms are proper substrings

	3489 // of the original pattern.

	3490 bool RegExpParser::HasCharacterEscapes() {

	3491 return has_character_escapes_;

	3492 }

3297	3493

3298 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {	3494 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {

3299 *ok = false;	3495 *ok = false;

3300 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);	3496 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);

3301 return NULL;	3497 return NULL;

3302 }	3498 }

3303	3499

3304	3500

3305 // Pattern ::	3501 // Pattern ::

3306 // Disjunction	3502 // Disjunction

3307 RegExpTree* RegExpParser::ParsePattern(bool* ok) {	3503 RegExpTree* RegExpParser::ParsePattern(bool* ok) {

3308 return ParseDisjunction(ok);	3504 RegExpTree* result = ParseDisjunction(CHECK_OK);

	3505 if (has_more()) {

	3506 ReportError(CStrVector("Unmatched ')'"), CHECK_OK);

	3507 }

	3508 return result;

3309 }	3509 }

3310	3510

3311	3511

3312 // Disjunction ::	3512 // Disjunction ::

3313 // Alternative	3513 // Alternative

3314 // Alternative \| Disjunction	3514 // Alternative \| Disjunction

	3515 // Alternative ::

	3516 // [empty]

	3517 // Term Alternative

	3518 // Term ::

	3519 // Assertion

	3520 // Atom

	3521 // Atom Quantifier

3315 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {	3522 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {

3316 RegExpTree* first = ParseAlternative(CHECK_OK);	3523 RegExpBuilder builder;

3317 if (current() == '\|') {	3524 while (true) {

3318 ZoneList<RegExpTree> nodes = new ZoneList<RegExpTree*>(2);	3525 switch (current()) {

3319 nodes->Add(first);	3526 case kEndMarker:

3320 while (current() == '\|') {	3527 case ')':

3321 Advance();	3528 return builder.ToRegExp();

3322 RegExpTree* next = ParseAlternative(CHECK_OK);	3529 case '\|':

3323 nodes->Add(next);	3530 Advance();

3324 }	3531 builder.NewAlternative();

3325 return new RegExpDisjunction(nodes);	3532 continue;

3326 } else {	3533 case '*':

3327 return first;	3534 case '+':

	3535 case '?':

	3536 case '{':

	3537 ReportError(CStrVector("Nothing to repeat."), CHECK_OK);

	3538 case '^': {

	3539 Advance();

	3540 RegExpAssertion::Type type =

	3541 multiline_mode_ ? RegExpAssertion::START_OF_LINE :

	3542 RegExpAssertion::START_OF_INPUT;

	3543 builder.AddAssertion(new RegExpAssertion(type));

	3544 continue;

	3545 }

	3546 case '$': {

	3547 Advance();

	3548 RegExpAssertion::Type type =

	3549 multiline_mode_ ? RegExpAssertion::END_OF_LINE :

	3550 RegExpAssertion::END_OF_INPUT;

	3551 builder.AddAssertion(new RegExpAssertion(type));

	3552 continue;

	3553 }

	3554 case '.': {

	3555 Advance();

	3556 // everything except \x0a, \x0d, \u2028 and \u2029

	3557 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	3558 CharacterRange::AddClassEscape('.', ranges);

	3559 RegExpTree* atom = new RegExpCharacterClass(ranges, false);

	3560 builder.AddAtom(atom);

	3561 break;

	3562 }

	3563 case '(': {

	3564 RegExpTree* atom = ParseGroup(CHECK_OK);

	3565 builder.AddAtom(atom);

	3566 break;

	3567 }

	3568 case '[': {

	3569 RegExpTree* atom = ParseCharacterClass(CHECK_OK);

	3570 builder.AddAtom(atom);

	3571 break;

	3572 }

	3573 // Atom ::

	3574 // \ AtomEscape

	3575 case '\\':

	3576 switch (next()) {

	3577 case kEndMarker:

	3578 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);

	3579 case 'b':

	3580 Advance(2);

	3581 builder.AddAssertion(

	3582 new RegExpAssertion(RegExpAssertion::BOUNDARY));

	3583 continue;

	3584 case 'B':

	3585 Advance(2);

	3586 builder.AddAssertion(

	3587 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY));

	3588 continue;

	3589 // AtomEscape ::

	3590 // CharacterClassEscape

	3591 //

	3592 // CharacterClassEscape :: one of

	3593 // d D s S w W

	3594 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

	3595 uc32 c = next();

	3596 Advance(2);

	3597 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	3598 CharacterRange::AddClassEscape(c, ranges);

	3599 RegExpTree* atom = new RegExpCharacterClass(ranges, false);

	3600 builder.AddAtom(atom);

	3601 goto has_read_atom; // Avoid setting has_character_escapes_.

	3602 }

	3603 case '1': case '2': case '3': case '4': case '5': case '6':

	3604 case '7': case '8': case '9': {

	3605 int index = 0;

	3606 if (ParseBackreferenceIndex(&index)) {

	3607 RegExpTree* atom = new RegExpBackreference(index);

	3608 builder.AddAtom(atom);

	3609 goto has_read_atom; // Avoid setting has_character_escapes_.

	3610 }

	3611 uc32 first_digit = next();

	3612 if (first_digit == '8' \|\| first_digit == '9') {

	3613 // Treat as identity escape

	3614 builder.AddCharacter(first_digit);

	3615 Advance(2);

	3616 break;

	3617 }

	3618 }

	3619 // FALLTHROUGH

	3620 case '0': {

	3621 Advance();

	3622 uc32 octal = ParseOctalLiteral();

	3623 builder.AddCharacter(octal);

	3624 break;

	3625 }

	3626 // ControlEscape :: one of

	3627 // f n r t v

	3628 case 'f':

	3629 Advance(2);

	3630 builder.AddCharacter('\f');

	3631 break;

	3632 case 'n':

	3633 Advance(2);

	3634 builder.AddCharacter('\n');

	3635 break;

	3636 case 'r':

	3637 Advance(2);

	3638 builder.AddCharacter('\r');

	3639 break;

	3640 case 't':

	3641 Advance(2);

	3642 builder.AddCharacter('\t');

	3643 break;

	3644 case 'v':

	3645 Advance(2);

	3646 builder.AddCharacter('\v');

	3647 break;

	3648 case 'c': {

	3649 Advance(2);

	3650 uc32 control = ParseControlLetterEscape(ok);

	3651 builder.AddCharacter(control);

	3652 break;

	3653 }

	3654 case 'x': {

	3655 Advance(2);

	3656 uc32 value;

	3657 if (ParseHexEscape(2, &value)) {

	3658 builder.AddCharacter(value);

	3659 } else {

	3660 builder.AddCharacter('x');

	3661 }

	3662 break;

	3663 }

	3664 case 'u': {

	3665 Advance(2);

	3666 uc32 value;

	3667 if (ParseHexEscape(4, &value)) {

	3668 builder.AddCharacter(value);

	3669 } else {

	3670 builder.AddCharacter('u');

	3671 }

	3672 break;

	3673 }

	3674 default:

	3675 // Identity escape.

	3676 builder.AddCharacter(next());

	3677 Advance(2);

	3678 break;

	3679 }

	3680 has_character_escapes_ = true;

	3681 break;

	3682 default:

	3683 builder.AddCharacter(current());

	3684 Advance();

	3685 break;

	3686 } // end switch(current())

	3687

	3688 has_read_atom:

	3689 int min;

	3690 int max;

	3691 switch (current()) {

	3692 // QuantifierPrefix ::

	3693 // *

	3694 // +

	3695 // ?

	3696 // {

	3697 case '*':

	3698 min = 0;

	3699 max = RegExpQuantifier::kInfinity;

	3700 Advance();

	3701 break;

	3702 case '+':

	3703 min = 1;

	3704 max = RegExpQuantifier::kInfinity;

	3705 Advance();

	3706 break;

	3707 case '?':

	3708 min = 0;

	3709 max = 1;

	3710 Advance();

	3711 break;

	3712 case '{':

	3713 ParseIntervalQuantifier(&min, &max, CHECK_OK);

	3714 break;

	3715 default:

	3716 continue;

	3717 }

	3718 bool is_greedy = true;

	3719 if (current() == '?') {

	3720 is_greedy = false;

	3721 Advance();

	3722 }

	3723 builder.AddQuantifierToAtom(min, max, is_greedy);

3328 }	3724 }

3329 }	3725 }

3330	3726

3331

3332 static bool IsAlternativeTerminator(uc32 c) {

3333 return c == '\|' \|\| c == ')' \|\| c == RegExpParser::kEndMarker;

3334 }

3335

3336

3337 // Alternative ::

3338 // [empty]

3339 // Alternative Term

3340 RegExpTree* RegExpParser::ParseAlternative(bool* ok) {

3341 if (!IsAlternativeTerminator(current())) {

3342 RegExpTree* first = ParseTerm(CHECK_OK);

3343 if (!IsAlternativeTerminator(current())) {

3344 ZoneList<RegExpTree> nodes = new ZoneList<RegExpTree*>(2);

3345 nodes->Add(first);

3346 while (!IsAlternativeTerminator(current())) {

3347 RegExpTree* next = ParseTerm(CHECK_OK);

3348 nodes->Add(next);

3349 }

3350 return new RegExpAlternative(nodes);

3351 } else {

3352 return first;

3353 }

3354 } else {

3355 return RegExpEmpty::GetInstance();

3356 }

3357 }

3358

3359

3360 class SourceCharacter {	3727 class SourceCharacter {

3361 public:	3728 public:

3362 static bool Is(uc32 c) {	3729 static bool Is(uc32 c) {

3363 switch (c) {	3730 switch (c) {

3364 // case ']': case '}':	3731 // case ']': case '}':

3365 // In spidermonkey and jsc these are treated as source characters	3732 // In spidermonkey and jsc these are treated as source characters

3366 // so we do too.	3733 // so we do too.

3367 case '^': case '$': case '\\': case '.': case '*': case '+':	3734 case '^': case '$': case '\\': case '.': case '*': case '+':

3368 case '?': case '(': case ')': case '[': case '{': case '\|':	3735 case '?': case '(': case ')': case '[': case '{': case '\|':

3369 case RegExpParser::kEndMarker:	3736 case RegExpParser::kEndMarker:

3370 return false;	3737 return false;

3371 default:	3738 default:

3372 return true;	3739 return true;

3373 }	3740 }

3374 }	3741 }

3375 };	3742 };

3376	3743

3377	3744

3378 static unibrow::Predicate<SourceCharacter> source_character;	3745 static unibrow::Predicate<SourceCharacter> source_character;

3379	3746

3380	3747

3381 static inline bool IsSourceCharacter(uc32 c) {	3748 static inline bool IsSourceCharacter(uc32 c) {

3382 return source_character.get(c);	3749 return source_character.get(c);

3383 }	3750 }

3384	3751

3385	3752 #ifdef DEBUG

3386 static bool IsSpecialEscape(uc32 c) {	3753 // Currently only used in an ASSERT.

	3754 static bool IsSpecialClassEscape(uc32 c) {

3387 switch (c) {	3755 switch (c) {

3388 case 'b': case 'B': case 'd': case 'D': case 's': case 'S':	3756 case 'd': case 'D':

	3757 case 's': case 'S':

3389 case 'w': case 'W':	3758 case 'w': case 'W':

3390 return true;	3759 return true;

3391 default:	3760 default:

3392 return false;	3761 return false;

3393 }	3762 }

3394 }	3763 }

	3764 #endif

3395	3765

3396	3766

3397 bool RegExpParser::ParseBackreferenceIndex(int* index_out) {	3767 bool RegExpParser::ParseBackreferenceIndex(int* index_out) {

3398 ASSERT_EQ('\\', current());	3768 ASSERT_EQ('\\', current());

3399 ASSERT('1' <= next() && next() <= '9');	3769 ASSERT('1' <= next() && next() <= '9');

3400 ASSERT_EQ(0, pushback_count_);	3770 ASSERT_EQ(0, pushback_count_);

3401 // Try to parse a decimal literal that is less than then number	3771 // Try to parse a decimal literal that is no greater than the number

3402 // of previously encountered left capturing parentheses.	3772 // of previously encountered left capturing parentheses.

3403 // This is a not according the the ECMAScript specification. According to	3773 // This is a not according the the ECMAScript specification. According to

3404 // that, one must accept values up to the total number of left capturing	3774 // that, one must accept values up to the total number of left capturing

3405 // parentheses in the entire input, even if they are meaningless.	3775 // parentheses in the entire input, even if they are meaningless.

3406 if (captures_seen_ == 0)	3776 if (captures_started_ == 0)

3407 return false;	3777 return false;

3408 int value = next() - '0';	3778 int value = next() - '0';

3409 if (value > captures_seen_)	3779 if (value > captures_started_)

3410 return false;	3780 return false;

3411 static const int kMaxChars = kMaxPushback - 2;	3781 static const int kMaxChars = kMaxPushback - 2;

3412 EmbeddedVector<uc32, kMaxChars> chars_seen;	3782 EmbeddedVector<uc32, kMaxChars> chars_seen;

3413 chars_seen[0] = next();	3783 chars_seen[0] = next();

3414 int char_count = 1;	3784 int char_count = 1;

3415 Advance(2);	3785 Advance(2);

3416 while (true) {	3786 while (true) {

3417 uc32 c = current();	3787 uc32 c = current();

3418 if (IsDecimalDigit(c)) {	3788 if (IsDecimalDigit(c)) {

3419 int next_value = 10 * value + (c - '0');	3789 value = 10 * value + (c - '0');

3420 // To avoid reading past the end of the stack-allocated pushback	3790 // To avoid reading past the end of the stack-allocated pushback

3421 // buffers we only read kMaxChars before giving up.	3791 // buffers we only read kMaxChars before giving up.

3422 if (next_value > captures_seen_ \|\| char_count > kMaxChars) {	3792 if (value > captures_started_ \|\| char_count > kMaxChars) {

3423 // If we give up we have to push the characters we read back	3793 // If we give up we have to push the characters we read back

3424 // onto the pushback buffer in the reverse order.	3794 // onto the pushback buffer in the reverse order.

3425 for (int i = 0; i < char_count; i++) {	3795 for (int i = 0; i < char_count; i++) {

3426 PushBack(chars_seen[char_count - i - 1]);	3796 PushBack(chars_seen[char_count - i - 1]);

3427 }	3797 }

3428 PushBack('\\');	3798 PushBack('\\');

3429 return false;	3799 return false;

3430 }	3800 }

3431 value = next_value;

3432 chars_seen[char_count++] = current();	3801 chars_seen[char_count++] = current();

3433 Advance();	3802 Advance();

3434 } else {	3803 } else {

3435 *index_out = value;	3804 break;

3436 return true;

3437 }	3805 }

3438 }	3806 }

	3807 *index_out = value;

	3808 return true;

3439 }	3809 }

3440	3810

3441	3811

3442 // Term ::

3443 // Assertion

3444 // Atom

3445 // Atom Quantifier

3446 RegExpTree* RegExpParser::ParseTerm(bool* ok) {

3447 RegExpTree* atom = NULL;

3448 switch (current()) {

3449 // Assertion ::

3450 // ^

3451 // $

3452 // \ b

3453 // \ B

3454 case '^':

3455 Advance();

3456 return new RegExpAssertion(

3457 multiline_mode_ ? RegExpAssertion::START_OF_LINE

3458 : RegExpAssertion::START_OF_INPUT);

3459 case '$':

3460 Advance();

3461 return new RegExpAssertion(

3462 multiline_mode_ ? RegExpAssertion::END_OF_LINE

3463 : RegExpAssertion::END_OF_INPUT);

3464 case '.': {

3465 Advance();

3466 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

3467 CharacterRange::AddClassEscape('.', ranges);

3468 atom = new RegExpCharacterClass(ranges, false);

3469 break;

3470 }

3471 case '(':

3472 atom = ParseGroup(CHECK_OK);

3473 break;

3474 case '[':

3475 atom = ParseCharacterClass(CHECK_OK);

3476 break;

3477 // Atom ::

3478 // \ AtomEscape

3479 case '\\':

3480 if (has_next()) {

3481 switch (next()) {

3482 case 'b':

3483 Advance(2);

3484 return new RegExpAssertion(RegExpAssertion::BOUNDARY);

3485 case 'B':

3486 Advance(2);

3487 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY);

3488 // AtomEscape ::

3489 // CharacterClassEscape

3490 //

3491 // CharacterClassEscape :: one of

3492 // d D s S w W

3493 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

3494 uc32 c = next();

3495 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

3496 CharacterRange::AddClassEscape(c, ranges);

3497 Advance(2);

3498 atom = new RegExpCharacterClass(ranges, false);

3499 goto has_read_atom;

3500 }

3501 case '1': case '2': case '3': case '4': case '5': case '6':

3502 case '7': case '8': case '9': {

3503 int index = 0;

3504 if (ParseBackreferenceIndex(&index)) {

3505 atom = new RegExpBackreference(index);

3506 goto has_read_atom;

3507 } else {

3508 // If this is not a backreference we go to the atom parser

3509 // which will read it as an octal escape or identity escape.

3510 goto parse_atom;

3511 }

3512 }

3513 default:

3514 goto parse_atom;

3515 }

3516 }

3517 // All other escapes fall through to the default case since

3518 // they correspond to single characters that can be

3519 // represented within atoms.

3520 default: {

3521 parse_atom:

3522 atom = ParseAtom(CHECK_OK);

3523 break;

3524 }

3525 }

3526 has_read_atom:

3527 int min;

3528 int max;

3529 switch (current()) {

3530 // QuantifierPrefix ::

3531 // *

3532 // +

3533 // ?

3534 // {

3535 case '*':

3536 min = 0;

3537 max = RegExpQuantifier::kInfinity;

3538 Advance();

3539 break;

3540 case '+':

3541 min = 1;

3542 max = RegExpQuantifier::kInfinity;

3543 Advance();

3544 break;

3545 case '?':

3546 min = 0;

3547 max = 1;

3548 Advance();

3549 break;

3550 case '{':

3551 ParseIntervalQuantifier(&min, &max, CHECK_OK);

3552 break;

3553 default:

3554 return atom;

3555 }

3556 bool is_greedy = true;

3557 if (current() == '?') {

3558 is_greedy = false;

3559 Advance();

3560 }

3561 return new RegExpQuantifier(min, max, is_greedy, atom);

3562 }

3563

3564

3565 // QuantifierPrefix ::	3812 // QuantifierPrefix ::

3566 // { DecimalDigits }	3813 // { DecimalDigits }

3567 // { DecimalDigits , }	3814 // { DecimalDigits , }

3568 // { DecimalDigits , DecimalDigits }	3815 // { DecimalDigits , DecimalDigits }

3569 void* RegExpParser::ParseIntervalQuantifier(int* min_out,	3816 void* RegExpParser::ParseIntervalQuantifier(int* min_out,

3570 int* max_out,	3817 int* max_out,

3571 bool* ok) {	3818 bool* ok) {

3572 ASSERT_EQ(current(), '{');	3819 ASSERT_EQ(current(), '{');

3573 static const char* kInvalidQuantifier = "Invalid quantifier";	3820 static const char* kInvalidQuantifier = "Invalid quantifier";

3574 Advance();	3821 Advance();

(...skipping 30 matching lines...) Expand all Loading...
3605 }	3852 }

3606 } else {	3853 } else {

3607 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);	3854 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);

3608 }	3855 }

3609 *min_out = min;	3856 *min_out = min;

3610 *max_out = max;	3857 *max_out = max;

3611 return NULL;	3858 return NULL;

3612 }	3859 }

3613	3860

3614	3861

3615 RegExpTree* RegExpParser::ParseAtom(bool* ok) {

3616 ASSERT(current() == '\\' \|\| IsSourceCharacter(current()));

3617 ZoneList<uc16>* buf = new ZoneList<uc16>(4);

3618 while (true) {

3619 if (IsSourceCharacter(current())) {

3620 buf->Add(current());

3621 Advance();

3622 } else if (current() == '\\') {

3623 if (!has_next()) {

3624 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);

3625 } else if (IsSpecialEscape(next())) {

3626 // If the next thing we see is a special escape we stop

3627 // reading this atom.

3628 break;

3629 } else {

3630 uc32 escape = ParseCharacterEscape(CHECK_OK);

3631 buf->Add(escape);

3632 }

3633 } else {

3634 break;

3635 }

3636 }

3637 return new RegExpAtom(buf->ToConstVector());

3638 }

3639

3640 // Upper and lower case letters differ by one bit.	3862 // Upper and lower case letters differ by one bit.

3641 STATIC_CHECK('a'^'A' == 0x20);	3863 STATIC_CHECK('a'^'A' == 0x20);

3642	3864

3643 uc32 RegExpParser::ParseControlEscape(bool* ok) {	3865 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) {

3644 ASSERT(current() == 'c');

3645 Advance();

3646 if (!has_more()) {	3866 if (!has_more()) {

3647 ReportError(CStrVector("\\c at end of pattern"), ok);	3867 ReportError(CStrVector("\\c at end of pattern"), ok);

3648 return '\0';	3868 return '\0';

3649 }	3869 }

3650 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.	3870 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.

3651 if (letter < 'A' \|\| 'Z' < letter) {	3871 if (letter < 'A' \|\| 'Z' < letter) {

3652 // Non-spec error-correction: "\c" followed by non-control letter is	3872 // Non-spec error-correction: "\c" followed by non-control letter is

3653 // interpreted as an IdentityEscape.	3873 // interpreted as an IdentityEscape of 'c'.

3654 return 'c';	3874 return 'c';

3655 }	3875 }

3656 Advance();	3876 Advance();

3657 return letter & 0x1f; // Remainder modulo 32, per specification.	3877 return letter & 0x1f; // Remainder modulo 32, per specification.

3658 }	3878 }

3659	3879

3660	3880

3661 uc32 RegExpParser::ParseOctalLiteral(bool* ok) {	3881 uc32 RegExpParser::ParseOctalLiteral() {

3662 ASSERT('0' <= current() && current() <= '7');	3882 ASSERT('0' <= current() && current() <= '7');

3663 // For compatibility with some other browsers (not all), we parse	3883 // For compatibility with some other browsers (not all), we parse

3664 // up to three octal digits with a value below 256.	3884 // up to three octal digits with a value below 256.

3665 uc32 value = current() - '0';	3885 uc32 value = current() - '0';

3666 Advance();	3886 Advance();

3667 if ('0' <= current() && current() <= '7') {	3887 if ('0' <= current() && current() <= '7') {

3668 value = value * 8 + current() - '0';	3888 value = value * 8 + current() - '0';

3669 Advance();	3889 Advance();

3670 if (value < 32 && '0' <= current() && current() <= '7') {	3890 if (value < 32 && '0' <= current() && current() <= '7') {

3671 value = value * 8 + current() - '0';	3891 value = value * 8 + current() - '0';

3672 Advance();	3892 Advance();

3673 }	3893 }

3674 }	3894 }

3675 return value;	3895 return value;

3676 }	3896 }

3677	3897

	3898

3678 bool RegExpParser::ParseHexEscape(int length, uc32 *value) {	3899 bool RegExpParser::ParseHexEscape(int length, uc32 *value) {

3679 static const int kMaxChars = kMaxPushback;	3900 static const int kMaxChars = kMaxPushback;

3680 EmbeddedVector<uc32, kMaxChars> chars_seen;	3901 EmbeddedVector<uc32, kMaxChars> chars_seen;

3681 ASSERT(length <= kMaxChars);	3902 ASSERT(length <= kMaxChars);

3682 uc32 val = 0;	3903 uc32 val = 0;

3683 bool done = false;	3904 bool done = false;

3684 for (int i = 0; !done; i++) {	3905 for (int i = 0; !done; i++) {

3685 uc32 c = current();	3906 uc32 c = current();

3686 int d = HexValue(c);	3907 int d = HexValue(c);

3687 if (d < 0) {	3908 if (d < 0) {

3688 while (i > 0) {	3909 while (i > 0) {

3689 i--;	3910 i--;

3690 PushBack(chars_seen[i]);	3911 PushBack(chars_seen[i]);

3691 }	3912 }

3692 return false;	3913 return false;

3693 }	3914 }

3694 val = val * 16 + d;	3915 val = val * 16 + d;

3695 Advance();	3916 Advance();

3696 if (i < length - 1) {	3917 if (i < length - 1) {

3697 chars_seen[i] = c;	3918 chars_seen[i] = c;

3698 } else {	3919 } else {

3699 done = true;	3920 done = true;

3700 }	3921 }

3701 }	3922 }

3702 *value = val;	3923 *value = val;

3703 return true;	3924 return true;

3704 }	3925 }

3705	3926

3706	3927

3707 uc32 RegExpParser::ParseCharacterEscape(bool* ok) {	3928 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) {

3708 ASSERT(current() == '\\');	3929 ASSERT(current() == '\\');

3709 ASSERT(has_next() && !IsSpecialEscape(next()));	3930 ASSERT(has_next() && !IsSpecialClassEscape(next()));

3710 Advance();	3931 Advance();

3711 ASSERT(current() != 'b' && current() != 'B');

3712 switch (current()) {	3932 switch (current()) {

3713 // ControlEscape :: one of	3933 // ControlEscape :: one of

3714 // f n r t v	3934 // f n r t v

3715 case 'f':	3935 case 'f':

3716 Advance();	3936 Advance();

3717 return '\f';	3937 return '\f';

3718 case 'n':	3938 case 'n':

3719 Advance();	3939 Advance();

3720 return '\n';	3940 return '\n';

3721 case 'r':	3941 case 'r':

3722 Advance();	3942 Advance();

3723 return '\r';	3943 return '\r';

3724 case 't':	3944 case 't':

3725 Advance();	3945 Advance();

3726 return '\t';	3946 return '\t';

3727 case 'v':	3947 case 'v':

3728 Advance();	3948 Advance();

3729 return '\v';	3949 return '\v';

3730 case 'c':	3950 case 'c':

3731 // Spec mandates that next character is ASCII letter.	3951 return ParseControlLetterEscape(ok);

3732 // If not, we error-correct by interpreting "\c" as "c".

3733 return ParseControlEscape(ok);

3734 case '0': case '1': case '2': case '3': case '4': case '5':	3952 case '0': case '1': case '2': case '3': case '4': case '5':

3735 case '6': case '7':	3953 case '6': case '7':

3736 // For compatibility, we interpret a decimal escape that isn't	3954 // For compatibility, we interpret a decimal escape that isn't

3737 // a back reference (and therefore either \0 or not valid according	3955 // a back reference (and therefore either \0 or not valid according

3738 // to the specification) as a 1..3 digit octal character code.	3956 // to the specification) as a 1..3 digit octal character code.

3739 return ParseOctalLiteral(ok);	3957 return ParseOctalLiteral();

3740 case 'x': {	3958 case 'x': {

3741 Advance();	3959 Advance();

3742 uc32 value;	3960 uc32 value;

3743 if (ParseHexEscape(2, &value)) {	3961 if (ParseHexEscape(2, &value)) {

3744 return value;	3962 return value;

3745 }	3963 }

3746 // If \x is not followed by a two-digit hexadecimal, treat it	3964 // If \x is not followed by a two-digit hexadecimal, treat it

3747 // as an identity escape.	3965 // as an identity escape.

3748 return 'x';	3966 return 'x';

3749 }	3967 }

(...skipping 27 matching lines...) Expand all Loading...
3777 if (current() == '?') {	3995 if (current() == '?') {

3778 switch (next()) {	3996 switch (next()) {

3779 case ':': case '=': case '!':	3997 case ':': case '=': case '!':

3780 type = next();	3998 type = next();

3781 Advance(2);	3999 Advance(2);

3782 break;	4000 break;

3783 default:	4001 default:

3784 ReportError(CStrVector("Invalid group"), CHECK_OK);	4002 ReportError(CStrVector("Invalid group"), CHECK_OK);

3785 break;	4003 break;

3786 }	4004 }

	4005 } else {

	4006 captures_started_++;

3787 }	4007 }

	4008 int capture_index = captures_started_;

3788 RegExpTree* body = ParseDisjunction(CHECK_OK);	4009 RegExpTree* body = ParseDisjunction(CHECK_OK);

3789 if (current() != ')') {	4010 if (current() != ')') {

3790 ReportError(CStrVector("Unterminated group"), CHECK_OK);	4011 ReportError(CStrVector("Unterminated group"), CHECK_OK);

3791 }	4012 }

3792 Advance();	4013 Advance();

3793 if (type == '(') {	4014 if (type == '(') {

3794 captures_seen_++;	4015 return new RegExpCapture(body, capture_index);

3795 return new RegExpCapture(body);

3796 } else if (type == ':') {	4016 } else if (type == ':') {

3797 return body;	4017 return body;

3798 } else {	4018 } else {

3799 ASSERT(type == '=' \|\| type == '!');	4019 ASSERT(type == '=' \|\| type == '!');

3800 bool is_positive = (type == '=');	4020 bool is_positive = (type == '=');

3801 return new RegExpLookahead(body, is_positive);	4021 return new RegExpLookahead(body, is_positive);

3802 }	4022 }

3803 }	4023 }

3804	4024

3805	4025

3806 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,	4026 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,

3807 ZoneList<CharacterRange>* ranges,	4027 ZoneList<CharacterRange>* ranges,

3808 bool* ok) {	4028 bool* ok) {

3809 ASSERT_EQ(false, *is_char_class);	4029 ASSERT_EQ(false, *is_char_class);

3810 uc32 first = current();	4030 uc32 first = current();

3811 if (first == '\\') {	4031 if (first == '\\') {

3812 switch (next()) {	4032 switch (next()) {

3813 case 'b':

3814 Advance(2);

3815 return CharacterRange::Singleton('\b');

3816 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {	4033 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {

3817 *is_char_class = true;	4034 *is_char_class = true;

3818 uc32 c = next();	4035 uc32 c = next();

3819 CharacterRange::AddClassEscape(c, ranges);	4036 CharacterRange::AddClassEscape(c, ranges);

3820 Advance(2);	4037 Advance(2);

3821 return NULL;	4038 return NULL;

3822 }	4039 }

3823 default:	4040 default:

3824 uc32 c = ParseCharacterEscape(CHECK_OK);	4041 uc32 c = ParseClassCharacterEscape(CHECK_OK);

3825 return CharacterRange::Singleton(c);	4042 return CharacterRange::Singleton(c);

3826 }	4043 }

3827 } else {	4044 } else {

3828 Advance();	4045 Advance();

3829 return CharacterRange::Singleton(first);	4046 return CharacterRange::Singleton(first);

3830 }	4047 }

3831 }	4048 }

3832	4049

3833	4050

3834 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {	4051 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {

(...skipping 12 matching lines...) Expand all Loading...
3847 while (has_more() && current() != ']') {	4064 while (has_more() && current() != ']') {

3848 if (current() == '-') {	4065 if (current() == '-') {

3849 Advance();	4066 Advance();

3850 ranges->Add(CharacterRange::Singleton('-'));	4067 ranges->Add(CharacterRange::Singleton('-'));

3851 } else {	4068 } else {

3852 bool is_char_class = false;	4069 bool is_char_class = false;

3853 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);	4070 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);

3854 if (!is_char_class) {	4071 if (!is_char_class) {

3855 if (current() == '-') {	4072 if (current() == '-') {

3856 Advance();	4073 Advance();

3857 CharacterRange next = ParseClassAtom(&is_char_class, ranges, CHECK_OK) ;	4074 CharacterRange next =

	4075 ParseClassAtom(&is_char_class, ranges, CHECK_OK);

3858 if (is_char_class) {	4076 if (is_char_class) {

3859 return ReportError(CStrVector(kIllegal), CHECK_OK);	4077 return ReportError(CStrVector(kIllegal), CHECK_OK);

3860 }	4078 }

3861 if (first.from() > next.to()) {	4079 if (first.from() > next.to()) {

3862 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);	4080 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);

3863 }	4081 }

3864 ranges->Add(CharacterRange::Range(first.from(), next.to()));	4082 ranges->Add(CharacterRange::Range(first.from(), next.to()));

3865 } else {	4083 } else {

3866 ranges->Add(first);	4084 ranges->Add(first);

3867 }	4085 }

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3922 if (!parser.PreParseProgram(stream)) return NULL;	4140 if (!parser.PreParseProgram(stream)) return NULL;

3923 // The list owns the backing store so we need to clone the vector.	4141 // The list owns the backing store so we need to clone the vector.

3924 // That way, the result will be exactly the right size rather than	4142 // That way, the result will be exactly the right size rather than

3925 // the expected 50% too large.	4143 // the expected 50% too large.

3926 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();	4144 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();

3927 return new ScriptDataImpl(store);	4145 return new ScriptDataImpl(store);

3928 }	4146 }

3929	4147

3930	4148

3931 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,	4149 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,

3932 Handle<String>* error) {	4150 Handle<String>* error,

	4151 bool* has_character_escapes) {

3933 ASSERT(error->is_null());	4152 ASSERT(error->is_null());

3934 RegExpParser parser(stream, error, false); // Get multiline flag somehow	4153 RegExpParser parser(stream, error, false); // Get multiline flag somehow

3935 bool ok = true;	4154 bool ok = true;

3936 RegExpTree* result = parser.ParsePattern(&ok);	4155 RegExpTree* result = parser.ParsePattern(&ok);

3937 if (!ok) {	4156 if (!ok) {

3938 ASSERT(result == NULL);	4157 ASSERT(result == NULL);

3939 ASSERT(!error->is_null());	4158 ASSERT(!error->is_null());

3940 } else {	4159 } else {

3941 ASSERT(result != NULL);	4160 ASSERT(result != NULL);

3942 ASSERT(error->is_null());	4161 ASSERT(error->is_null());

3943 }	4162 }

	4163 if (ok && has_character_escapes != NULL) {

	4164 *has_character_escapes = parser.HasCharacterEscapes();

	4165 }

3944 return result;	4166 return result;

3945 }	4167 }

3946	4168

3947	4169

3948 FunctionLiteral* MakeAST(bool compile_in_global_context,	4170 FunctionLiteral* MakeAST(bool compile_in_global_context,

3949 Handle<Script> script,	4171 Handle<Script> script,

3950 v8::Extension* extension,	4172 v8::Extension* extension,

3951 ScriptDataImpl* pre_data) {	4173 ScriptDataImpl* pre_data) {

3952 bool allow_natives_syntax =	4174 bool allow_natives_syntax =

3953 always_allow_natives_syntax \|\|	4175 always_allow_natives_syntax \|\|

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3990 start_position,	4212 start_position,

3991 is_expression);	4213 is_expression);

3992 return result;	4214 return result;

3993 }	4215 }

3994	4216

3995	4217

3996 #undef NEW	4218 #undef NEW

3997	4219

3998	4220

3999 } } // namespace v8::internal	4221 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « regexp2000/src/parser.h ('k') | regexp2000/test/cctest/test-regexp.cc » ('j') | no next file with comments »