regexp2000/src/parser.cc - Issue 8871: Experimental RegExp: changed handling of non-standard escape sequences.

Side by Side Diff: regexp2000/src/parser.cc

Issue 8871: Experimental RegExp: changed handling of non-standard escape sequences. (Closed)

Patch Set: RegExp escape handling, with review comments. Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
223	223

224 friend class Target;	224 friend class Target;

225 friend class TargetScope;	225 friend class TargetScope;

226 friend class LexicalScope;	226 friend class LexicalScope;

227 friend class TemporaryScope;	227 friend class TemporaryScope;

228 };	228 };

229	229

230	230

231 class RegExpParser {	231 class RegExpParser {

232 public:	232 public:

233 RegExpParser(unibrow::CharacterStream* in, Handle<String>* error);	233 RegExpParser(unibrow::CharacterStream* in,

	234 Handle<String>* error,

	235 bool multiline_mode);

234 RegExpTree* ParsePattern(bool* ok);	236 RegExpTree* ParsePattern(bool* ok);

235 RegExpTree* ParseDisjunction(bool* ok);	237 RegExpTree* ParseDisjunction(bool* ok);

236 RegExpTree* ParseAlternative(bool* ok);	238 RegExpTree* ParseAlternative(bool* ok);

237 RegExpTree* ParseTerm(bool* ok);	239 RegExpTree* ParseTerm(bool* ok);

238 RegExpTree* ParseAtom(bool* ok);	240 RegExpTree* ParseAtom(bool* ok);

239 RegExpTree* ParseGroup(bool* ok);	241 RegExpTree* ParseGroup(bool* ok);

240 RegExpTree* ParseCharacterClass(bool* ok);	242 RegExpTree* ParseCharacterClass(bool* ok);

241	243

242 // Parses a {...,...} quantifier and stores the range in the given	244 // Parses a {...,...} quantifier and stores the range in the given

243 // out parameters.	245 // out parameters.

244 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);	246 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);

245	247

246 // Parses and returns a single escaped character. The character	248 // Parses and returns a single escaped character. The character

247 // must not be 'b' or 'B' since they are usually handle specially.	249 // must not be 'b' or 'B' since they are usually handle specially.

248 uc32 ParseCharacterEscape(bool* ok);	250 uc32 ParseCharacterEscape(bool* ok);

249	251

250 uc32 ParseHexEscape(int length);	252 // Checks whether the following is a length-digit hexadecimal number,

	253 // and sets the value if it is.

	254 bool ParseHexEscape(int length, uc32* value);

251	255

252 uc32 ParseControlEscape(bool* ok);	256 uc32 ParseControlEscape(bool* ok);

253 uc32 ParseOctalLiteral(bool* ok);	257 uc32 ParseOctalLiteral(bool* ok);

254	258

255 // Tries to parse the input as a backreference. If successful it	259 // Tries to parse the input as a backreference. If successful it

256 // stores the result in the output parameter and returns true. If	260 // stores the result in the output parameter and returns true. If

257 // it fails it will push back the characters read so the same characters	261 // it fails it will push back the characters read so the same characters

258 // can be reparsed.	262 // can be reparsed.

259 bool ParseBackreferenceIndex(int* index_out);	263 bool ParseBackreferenceIndex(int* index_out);

260	264

261 CharacterRange ParseClassAtom(bool* ok);	265 CharacterRange ParseClassAtom(bool* ok);

262 RegExpTree* ReportError(Vector<const char> message, bool* ok);	266 RegExpTree* ReportError(Vector<const char> message, bool* ok);

263 void Advance();	267 void Advance();

264 void Advance(int dist);	268 void Advance(int dist);

	269 // Pushes a read character (or potentially some other character) back

	270 // on the input stream. After pushing it back, it becomes the character

	271 // returned by current(). There is a limited amount of push-back buffer.

	272 // A function using PushBack should check that it doesn't push back more

	273 // than kMaxPushback characters, and it should not push back more characters

	274 // than it has read, or that it knows had been read prior to calling it.

	275 void PushBack(uc32 character);

	276 bool CanPushBack();

265 static const uc32 kEndMarker = unibrow::Utf8::kBadChar;	277 static const uc32 kEndMarker = unibrow::Utf8::kBadChar;

266 private:	278 private:

267 uc32 current() { return current_; }	279 uc32 current() { return current_; }

268 uc32 next() { return next_; }	280 uc32 next() { return next_; }

269 bool has_more() { return has_more_; }	281 bool has_more() { return has_more_; }

270 bool has_next() { return has_next_; }	282 bool has_next() { return has_next_; }

271 unibrow::CharacterStream* in() { return in_; }	283 unibrow::CharacterStream* in() { return in_; }

272 uc32 current_;	284 uc32 current_;

273 uc32 next_;	285 uc32 next_;

274 bool has_more_;	286 bool has_more_;

275 bool has_next_;	287 bool has_next_;

	288 bool multiline_mode_;

276 int captures_seen_;	289 int captures_seen_;

277 unibrow::CharacterStream* in_;	290 unibrow::CharacterStream* in_;

278 Handle<String>* error_;	291 Handle<String>* error_;

279 static const int kMaxPushback = 5;	292 static const int kMaxPushback = 5;

280 int pushback_count_;	293 int pushback_count_;

281 uc32 pushback_buffer_[kMaxPushback];	294 uc32 pushback_buffer_[kMaxPushback];

282 };	295 };

283	296

284	297

285 // A temporary scope stores information during parsing, just like	298 // A temporary scope stores information during parsing, just like

(...skipping 2927 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3213 args->Add(new Literal(array));	3226 args->Add(new Literal(array));

3214 return new Throw(new CallRuntime(constructor, NULL, args),	3227 return new Throw(new CallRuntime(constructor, NULL, args),

3215 scanner().location().beg_pos);	3228 scanner().location().beg_pos);

3216 }	3229 }

3217	3230

3218	3231

3219 // ----------------------------------------------------------------------------	3232 // ----------------------------------------------------------------------------

3220 // Regular expressions	3233 // Regular expressions

3221	3234

3222	3235

3223 RegExpParser::RegExpParser(unibrow::CharacterStream* in, Handle<String>* error)	3236 RegExpParser::RegExpParser(unibrow::CharacterStream* in,

	3237 Handle<String>* error,

	3238 bool multiline_mode)

3224 : current_(kEndMarker),	3239 : current_(kEndMarker),

3225 next_(kEndMarker),	3240 next_(kEndMarker),

3226 has_more_(true),	3241 has_more_(true),

3227 has_next_(true),	3242 has_next_(true),

	3243 multiline_mode_(multiline_mode),

3228 captures_seen_(0),	3244 captures_seen_(0),

3229 in_(in),	3245 in_(in),

3230 error_(error),	3246 error_(error),

3231 pushback_count_(0) {	3247 pushback_count_(0) {

3232 Advance(2);	3248 Advance(2);

3233 }	3249 }

3234	3250

3235	3251

3236 void RegExpParser::Advance() {	3252 void RegExpParser::Advance() {

3237 current_ = next_;	3253 current_ = next_;

(...skipping 10 matching lines...) Expand all Loading...
3248 }	3264 }

3249 }	3265 }

3250	3266

3251	3267

3252 void RegExpParser::Advance(int dist) {	3268 void RegExpParser::Advance(int dist) {

3253 for (int i = 0; i < dist; i++)	3269 for (int i = 0; i < dist; i++)

3254 Advance();	3270 Advance();

3255 }	3271 }

3256	3272

3257	3273

	3274 void RegExpParser::PushBack(uc32 character) {

	3275 if (has_next_) {

	3276 ASSERT(pushback_count_ < kMaxPushback);

	3277 pushback_buffer_[pushback_count_] = next_;

	3278 pushback_count_++;

	3279 }

	3280 if (has_more_) {

	3281 next_ = current_;

	3282 has_next_ = true;

	3283 }

	3284 current_ = character;

	3285 has_more_ = true;

	3286 }

	3287

	3288

	3289 bool RegExpParser::CanPushBack() {

	3290 return (pushback_count_ < kMaxPushback);

	3291 }

	3292

	3293

3258 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {	3294 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {

3259 *ok = false;	3295 *ok = false;

3260 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);	3296 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);

3261 return NULL;	3297 return NULL;

3262 }	3298 }

3263	3299

3264	3300

3265 // Pattern ::	3301 // Pattern ::

3266 // Disjunction	3302 // Disjunction

3267 RegExpTree* RegExpParser::ParsePattern(bool* ok) {	3303 RegExpTree* RegExpParser::ParsePattern(bool* ok) {

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3351 default:	3387 default:

3352 return false;	3388 return false;

3353 }	3389 }

3354 }	3390 }

3355	3391

3356	3392

3357 bool RegExpParser::ParseBackreferenceIndex(int* index_out) {	3393 bool RegExpParser::ParseBackreferenceIndex(int* index_out) {

3358 ASSERT_EQ('\\', current());	3394 ASSERT_EQ('\\', current());

3359 ASSERT('1' <= next() && next() <= '9');	3395 ASSERT('1' <= next() && next() <= '9');

3360 ASSERT_EQ(0, pushback_count_);	3396 ASSERT_EQ(0, pushback_count_);

	3397 // Try to parse a decimal literal that is less than then number

	3398 // of previously encountered left capturing parentheses.

	3399 // This is a not according the the ECMAScript specification. According to

	3400 // that, one must accept values up to the total number of left capturing

	3401 // parentheses in the entire input, even if they are meaningless.

3361 if (captures_seen_ == 0)	3402 if (captures_seen_ == 0)

3362 return false;	3403 return false;

3363 int value = next() - '0';	3404 int value = next() - '0';

3364 if (value > captures_seen_)	3405 if (value > captures_seen_)

3365 return false;	3406 return false;

3366 static const int kMaxChars = kMaxPushback - 2;	3407 static const int kMaxChars = kMaxPushback - 2;

3367 EmbeddedVector<uc32, kMaxChars> chars_seen;	3408 EmbeddedVector<uc32, kMaxChars> chars_seen;

3368 chars_seen[0] = next();	3409 chars_seen[0] = next();

3369 int char_count = 1;	3410 int char_count = 1;

3370 Advance(2);	3411 Advance(2);

3371 while (true) {	3412 while (true) {

3372 uc32 c = current();	3413 uc32 c = current();

3373 if (IsDecimalDigit(c)) {	3414 if (IsDecimalDigit(c)) {

3374 int next_value = 10 * value + (c - '0');	3415 int next_value = 10 * value + (c - '0');

3375 // To avoid reading past the end of the stack-allocated pushback	3416 // To avoid reading past the end of the stack-allocated pushback

3376 // buffers we only read kMaxChars before giving up.	3417 // buffers we only read kMaxChars before giving up.

3377 if (next_value > captures_seen_ \|\| char_count > kMaxChars) {	3418 if (next_value > captures_seen_ \|\| char_count > kMaxChars) {

3378 // If we give up we have to push the characters we read back	3419 // If we give up we have to push the characters we read back

3379 // onto the pushback buffer in the reverse order.	3420 // onto the pushback buffer in the reverse order.

3380 pushback_buffer_[0] = current();	3421 for (int i = 0; i < char_count; i++) {

3381 for (int i = 0; i < char_count; i++)	3422 PushBack(chars_seen[char_count - i - 1]);

3382 pushback_buffer_[i + 1] = chars_seen[char_count - i - 1];	3423 }

3383 pushback_buffer_[char_count + 1] = '\\';	3424 PushBack('\\');

3384 pushback_count_ = char_count + 2;

3385 // Then, once we've filled up the buffer, we read the two

3386 // first characters into the lookahead. This is a roundabout

3387 // way of doing it but makes the code simpler.

3388 Advance(2);

3389 return false;	3425 return false;

3390 } else {

3391 value = next_value;

3392 chars_seen[char_count++] = current();

3393 Advance();

3394 }	3426 }

	3427 value = next_value;

	3428 chars_seen[char_count++] = current();

	3429 Advance();

3395 } else {	3430 } else {

3396 *index_out = value;	3431 *index_out = value;

3397 return true;	3432 return true;

3398 }	3433 }

3399 }	3434 }

3400 }	3435 }

3401	3436

3402	3437

3403 // Term ::	3438 // Term ::

3404 // Assertion	3439 // Assertion

3405 // Atom	3440 // Atom

3406 // Atom Quantifier	3441 // Atom Quantifier

3407 RegExpTree* RegExpParser::ParseTerm(bool* ok) {	3442 RegExpTree* RegExpParser::ParseTerm(bool* ok) {

3408 RegExpTree* atom = NULL;	3443 RegExpTree* atom = NULL;

3409 switch (current()) {	3444 switch (current()) {

3410 // Assertion ::	3445 // Assertion ::

3411 // ^	3446 // ^

3412 // $	3447 // $

3413 // \ b	3448 // \ b

3414 // \ B	3449 // \ B

3415 case '^':	3450 case '^':

3416 Advance();	3451 Advance();

3417 // Make the type of assertion dependent on multi/nonmultiline.	3452 return new RegExpAssertion(

3418 return new RegExpAssertion(RegExpAssertion::START_OF_INPUT);	3453 multiline_mode_ ? RegExpAssertion::START_OF_LINE

	3454 : RegExpAssertion::START_OF_INPUT);

3419 case '$':	3455 case '$':

3420 Advance();	3456 Advance();

3421 // Make the type of assertion dependent on multi/nonmultiline.	3457 return new RegExpAssertion(

3422 return new RegExpAssertion(RegExpAssertion::END_OF_INPUT);	3458 multiline_mode_ ? RegExpAssertion::END_OF_LINE

	3459 : RegExpAssertion::END_OF_INPUT);

3423 case '.':	3460 case '.':

3424 Advance();	3461 Advance();

3425 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.'));	3462 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.'));

3426 break;	3463 break;

3427 case '(':	3464 case '(':

3428 atom = ParseGroup(CHECK_OK);	3465 atom = ParseGroup(CHECK_OK);

3429 break;	3466 break;

3430 case '[':	3467 case '[':

3431 atom = ParseCharacterClass(CHECK_OK);	3468 atom = ParseCharacterClass(CHECK_OK);

3432 break;	3469 break;

(...skipping 20 matching lines...) Expand all Loading...
3453 goto has_read_atom;	3490 goto has_read_atom;

3454 }	3491 }

3455 case '1': case '2': case '3': case '4': case '5': case '6':	3492 case '1': case '2': case '3': case '4': case '5': case '6':

3456 case '7': case '8': case '9': {	3493 case '7': case '8': case '9': {

3457 int index = 0;	3494 int index = 0;

3458 if (ParseBackreferenceIndex(&index)) {	3495 if (ParseBackreferenceIndex(&index)) {

3459 atom = new RegExpBackreference(index);	3496 atom = new RegExpBackreference(index);

3460 goto has_read_atom;	3497 goto has_read_atom;

3461 } else {	3498 } else {

3462 // If this is not a backreference we go to the atom parser	3499 // If this is not a backreference we go to the atom parser

3463 // which will read it as an octal escape.	3500 // which will read it as an octal escape or identity escape.

3464 goto parse_atom;	3501 goto parse_atom;

3465 }	3502 }

3466 }	3503 }

3467 default:	3504 default:

3468 goto parse_atom;	3505 goto parse_atom;

3469 }	3506 }

3470 }	3507 }

3471 // All other escapes fall through to the default case since	3508 // All other escapes fall through to the default case since

3472 // they correspond to single characters that can be	3509 // they correspond to single characters that can be

3473 // represented within atoms.	3510 // represented within atoms.

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3584 uc32 escape = ParseCharacterEscape(CHECK_OK);	3621 uc32 escape = ParseCharacterEscape(CHECK_OK);

3585 buf->Add(escape);	3622 buf->Add(escape);

3586 }	3623 }

3587 } else {	3624 } else {

3588 break;	3625 break;

3589 }	3626 }

3590 }	3627 }

3591 return new RegExpAtom(buf->ToConstVector());	3628 return new RegExpAtom(buf->ToConstVector());

3592 }	3629 }

3593	3630

	3631 // Upper and lower case letters differ by one bit.

	3632 STATIC_CHECK('a'^'A' == 0x20);

3594	3633

3595 uc32 RegExpParser::ParseControlEscape(bool* ok) {	3634 uc32 RegExpParser::ParseControlEscape(bool* ok) {

3596 ASSERT(current() == 'c');	3635 ASSERT(current() == 'c');

3597 Advance();	3636 Advance();

3598 if (!has_more()) {	3637 if (!has_more()) {

3599 ReportError(CStrVector("\\c at end of pattern"), ok);	3638 ReportError(CStrVector("\\c at end of pattern"), ok);

3600 return '\0';	3639 return '\0';

3601 } else {

3602 uc32 letter = current();

3603 if (!('a' <= letter && letter <= 'z') &&

3604 !('A' <= letter && letter <= 'Z')) {

3605 ReportError(CStrVector("Illegal control letter"), ok);

3606 return '\0';

3607 }

3608 Advance();

3609 return letter & ((1 << 5) - 1);

3610 }	3640 }

	3641 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters.

	3642 if (letter < 'A' \|\| 'Z' < letter) {

	3643 // Non-spec error-correction: "\c" followed by non-control letter is

	3644 // interpreted as an IdentityEscape.

	3645 return 'c';

	3646 }

	3647 Advance();

	3648 return letter & 0x1f; // Remainder modulo 32, per specification.

3611 }	3649 }

3612	3650

3613	3651

3614 uc32 RegExpParser::ParseOctalLiteral(bool* ok) {	3652 uc32 RegExpParser::ParseOctalLiteral(bool* ok) {

3615 ASSERT('0' <= current() && current() <= '7');	3653 ASSERT('0' <= current() && current() <= '7');

3616 // Here we're really supposed to break out after the first digit	3654 // For compatibility with some other browsers (not all), we parse

3617 // if it is '0' but the other implementations don't do that so	3655 // up to three octal digits with a value below 256.

3618 // neither do we. Is this deviation from the spec error prone?	3656 uc32 value = current() - '0';

3619 // Yes, it's probably as error prone as it's possible to get. Isn't	3657 Advance();

3620 // JavaScript wonderful?	3658 if ('0' <= current() && current() <= '7') {

3621 uc32 value = 0;	3659 value = value * 8 + current() - '0';

3622 while ('0' <= current() && current() <= '7') {	3660 Advance();

3623 int next = (8 * value) + (current() - '0');	3661 if (value < 32 && '0' <= current() && current() <= '7') {

3624 if (next >= 256) {	3662 value = value * 8 + current() - '0';

3625 break;

3626 } else {

3627 value = next;

3628 Advance();	3663 Advance();

3629 }	3664 }

3630 }	3665 }

3631 return value;	3666 return value;

3632 }	3667 }

3633	3668

3634	3669 bool RegExpParser::ParseHexEscape(int length, uc32 *value) {

3635 uc32 RegExpParser::ParseHexEscape(int length) {	3670 static const int kMaxChars = kMaxPushback;

3636 uc32 value = 0;	3671 EmbeddedVector<uc32, kMaxChars> chars_seen;

3637 for (int i = 0; i < length; i++) {	3672 ASSERT(length <= kMaxChars);

3638 int d = HexValue(current());	3673 uc32 val = 0;

3639 if (d < 0)	3674 bool done = false;

3640 return value;	3675 for (int i = 0; !done; i++) {

3641 value = value * 16 + d;	3676 uc32 c = current();

	3677 int d = HexValue(c);

	3678 if (d < 0) {

	3679 while (i > 0) {

	3680 i--;

	3681 PushBack(chars_seen[i]);

	3682 }

	3683 return false;

	3684 }

	3685 val = val * 16 + d;

3642 Advance();	3686 Advance();

	3687 if (i < length - 1) {

	3688 chars_seen[i] = c;

	3689 } else {

	3690 done = true;

	3691 }

3643 }	3692 }

3644	3693 *value = val;

3645 return value;	3694 return true;

3646 }	3695 }

3647	3696

3648	3697

3649 uc32 RegExpParser::ParseCharacterEscape(bool* ok) {	3698 uc32 RegExpParser::ParseCharacterEscape(bool* ok) {

3650 ASSERT(current() == '\\');	3699 ASSERT(current() == '\\');

3651 ASSERT(has_next() && !IsSpecialEscape(next()));	3700 ASSERT(has_next() && !IsSpecialEscape(next()));

3652 Advance();	3701 Advance();

3653 ASSERT(current() != 'b' && current() != 'B');	3702 ASSERT(current() != 'b' && current() != 'B');

3654 switch (current()) {	3703 switch (current()) {

3655 // ControlEscape :: one of	3704 // ControlEscape :: one of

3656 // f n r t v	3705 // f n r t v

3657 case 'f':	3706 case 'f':

3658 Advance();	3707 Advance();

3659 return '\f';	3708 return '\f';

3660 case 'n':	3709 case 'n':

3661 Advance();	3710 Advance();

3662 return '\n';	3711 return '\n';

3663 case 'r':	3712 case 'r':

3664 Advance();	3713 Advance();

3665 return '\r';	3714 return '\r';

3666 case 't':	3715 case 't':

3667 Advance();	3716 Advance();

3668 return '\t';	3717 return '\t';

3669 case 'v':	3718 case 'v':

3670 Advance();	3719 Advance();

3671 return '\v';	3720 return '\v';

3672 case 'c':	3721 case 'c':

	3722 // Spec mandates that next character is ASCII letter.

	3723 // If not, we error-correct by interpreting "\c" as "c".

3673 return ParseControlEscape(ok);	3724 return ParseControlEscape(ok);

3674 case '0': case '1': case '2': case '3': case '4': case '5':	3725 case '0': case '1': case '2': case '3': case '4': case '5':

3675 case '6': case '7':	3726 case '6': case '7':

3676 // We're really supposed to read this as a decimal integer	3727 // For compatibility, we interpret a decimal escape that isn't

3677 // literal which is base 10 but for whatever reason the other	3728 // a back reference (and therefore either \0 or not valid according

3678 // implementations read base 8. It's hard to believe that the	3729 // to the specification) as a 1..3 digit octal character code.

3679 // spec was written by some ofthe same people that wrote the

3680 // other implementations...

3681 return ParseOctalLiteral(ok);	3730 return ParseOctalLiteral(ok);

3682 case 'x':	3731 case 'x': {

3683 Advance();	3732 Advance();

3684 return ParseHexEscape(2);	3733 uc32 value;

3685 case 'A': case 'Z': {	3734 if (ParseHexEscape(2, &value)) {

	3735 return value;

	3736 }

	3737 // If \x is not followed by a two-digit hexadecimal, treat it

	3738 // as an identity escape.

	3739 return 'x';

	3740 }

	3741 case 'u': {

	3742 Advance();

	3743 uc32 value;

	3744 if (ParseHexEscape(4, &value)) {

	3745 return value;

	3746 }

	3747 // If \u is not followed by a four-digit hexadecimal, treat it

	3748 // as an identity escape.

	3749 return 'u';

	3750 }

	3751 default: {

	3752 // Extended identity escape. We accept any character that hasn't

	3753 // been matched by a more specific case, not just the subset required

	3754 // by the ECMAScript specification.

3686 uc32 result = current();	3755 uc32 result = current();

3687 Advance();	3756 Advance();

3688 return result;	3757 return result;

3689 }

3690 default: {

3691 ASSERT(!Scanner::kIsIdentifierPart.get(current()));

3692 uc32 result = current();

3693 Advance();

3694 return result;

3695 }	3758 }

3696 }	3759 }

3697 return 0;	3760 return 0;

3698 }	3761 }

3699	3762

3700	3763

3701 RegExpTree* RegExpParser::ParseGroup(bool* ok) {	3764 RegExpTree* RegExpParser::ParseGroup(bool* ok) {

3702 ASSERT_EQ(current(), '(');	3765 ASSERT_EQ(current(), '(');

3703 char type = '(';	3766 char type = '(';

3704 Advance();	3767 Advance();

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3840 // That way, the result will be exactly the right size rather than	3903 // That way, the result will be exactly the right size rather than

3841 // the expected 50% too large.	3904 // the expected 50% too large.

3842 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();	3905 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();

3843 return new ScriptDataImpl(store);	3906 return new ScriptDataImpl(store);

3844 }	3907 }

3845	3908

3846	3909

3847 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,	3910 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,

3848 Handle<String>* error) {	3911 Handle<String>* error) {

3849 ASSERT(error->is_null());	3912 ASSERT(error->is_null());

3850 RegExpParser parser(stream, error);	3913 RegExpParser parser(stream, error, false); // Get multiline flag somehow

3851 bool ok = true;	3914 bool ok = true;

3852 RegExpTree* result = parser.ParsePattern(&ok);	3915 RegExpTree* result = parser.ParsePattern(&ok);

3853 if (!ok) {	3916 if (!ok) {

3854 ASSERT(result == NULL);	3917 ASSERT(result == NULL);

3855 ASSERT(!error->is_null());	3918 ASSERT(!error->is_null());

3856 } else {	3919 } else {

3857 ASSERT(result != NULL);	3920 ASSERT(result != NULL);

3858 ASSERT(error->is_null());	3921 ASSERT(error->is_null());

3859 }	3922 }

3860 return result;	3923 return result;

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3906 start_position,	3969 start_position,

3907 is_expression);	3970 is_expression);

3908 return result;	3971 return result;

3909 }	3972 }

3910	3973

3911	3974

3912 #undef NEW	3975 #undef NEW

3913	3976

3914	3977

3915 } } // namespace v8::internal	3978 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « no previous file | regexp2000/test/cctest/test-regexp.cc » ('j') | no next file with comments »