src/parser.cc - Issue 8188: Some new regexp infrastructure.

Side by Side Diff: src/parser.cc

Issue 8188: Some new regexp infrastructure. (Closed)

Patch Set: Created 12 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 16 matching lines...) Expand all Loading...
27	27

28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "api.h"	30 #include "api.h"

31 #include "ast.h"	31 #include "ast.h"

32 #include "bootstrapper.h"	32 #include "bootstrapper.h"

33 #include "platform.h"	33 #include "platform.h"

34 #include "runtime.h"	34 #include "runtime.h"

35 #include "parser.h"	35 #include "parser.h"

36 #include "scopes.h"	36 #include "scopes.h"

	37 #include "string-stream.h"

37	38

38 namespace v8 { namespace internal {	39 namespace v8 { namespace internal {

39	40

40 class ParserFactory;	41 class ParserFactory;

41 class ParserLog;	42 class ParserLog;

42 class TemporaryScope;	43 class TemporaryScope;

43 template <typename T> class ZoneListWrapper;	44 template <typename T> class ZoneListWrapper;

44	45

45	46

46 class Parser {	47 class Parser {

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
220 Handle<String> type,	221 Handle<String> type,

221 Vector< Handle<Object> > arguments);	222 Vector< Handle<Object> > arguments);

222	223

223 friend class Target;	224 friend class Target;

224 friend class TargetScope;	225 friend class TargetScope;

225 friend class LexicalScope;	226 friend class LexicalScope;

226 friend class TemporaryScope;	227 friend class TemporaryScope;

227 };	228 };

228	229

229	230

	231 class RegExpParser {

	232 public:

	233 RegExpParser(unibrow::CharacterStream* in, Handle<String>* error);

	234 RegExpTree* ParsePattern(bool* ok);

	235 RegExpTree* ParseDisjunction(bool* ok);

	236 RegExpTree* ParseAlternative(bool* ok);

	237 RegExpTree* ParseTerm(bool* ok);

	238 RegExpTree* ParseAtom(bool* ok);

	239 RegExpTree* ParseGroup(bool* ok);

	240 RegExpTree* ParseCharacterClass(bool* ok);

	241

	242 // Parses a {...,...} quantifier and stores the range in the given

	243 // out parameters.

	244 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok);

	245

	246 // Parses and returns a single escaped character. The character

	247 // must not be 'b' or 'B' since they are usually handle specially.

	248 uc32 ParseCharacterEscape(bool* ok);

	249

	250 uc32 ParseHexEscape(int length);

	251

	252 uc32 ParseControlEscape(bool* ok);

	253 uc32 ParseOctalLiteral(bool* ok);

	254

	255 CharacterRange ParseClassAtom(bool* ok);

	256 RegExpTree* ReportError(Vector<const char> message, bool* ok);

	257 void Advance();

	258 void Advance(int dist);

	259 private:

	260 uc32 current() { return current_; }

	261 uc32 next() { return next_; }

	262 bool has_more() { return has_more_; }

	263 bool has_next() { return has_next_; }

	264 unibrow::CharacterStream* in() { return in_; }

	265 uc32 current_;

	266 uc32 next_;

	267 bool has_more_;

	268 bool has_next_;

	269 int captures_seen_;

	270 unibrow::CharacterStream* in_;

	271 Handle<String>* error_;

	272 };

	273

	274

230 // A temporary scope stores information during parsing, just like	275 // A temporary scope stores information during parsing, just like

231 // a plain scope. However, temporary scopes are not kept around	276 // a plain scope. However, temporary scopes are not kept around

232 // after parsing or referenced by syntax trees so they can be stack-	277 // after parsing or referenced by syntax trees so they can be stack-

233 // allocated and hence used by the pre-parser.	278 // allocated and hence used by the pre-parser.

234 class TemporaryScope BASE_EMBEDDED {	279 class TemporaryScope BASE_EMBEDDED {

235 public:	280 public:

236 explicit TemporaryScope(Parser* parser);	281 explicit TemporaryScope(Parser* parser);

237 ~TemporaryScope();	282 ~TemporaryScope();

238	283

239 int NextMaterializedLiteralIndex() {	284 int NextMaterializedLiteralIndex() {

(...skipping 2915 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3155 }	3200 }

3156 ZoneList<Expression> args = new ZoneList<Expression*>(2);	3201 ZoneList<Expression> args = new ZoneList<Expression*>(2);

3157 args->Add(new Literal(type));	3202 args->Add(new Literal(type));

3158 args->Add(new Literal(array));	3203 args->Add(new Literal(array));

3159 return new Throw(new CallRuntime(constructor, NULL, args),	3204 return new Throw(new CallRuntime(constructor, NULL, args),

3160 scanner().location().beg_pos);	3205 scanner().location().beg_pos);

3161 }	3206 }

3162	3207

3163	3208

3164 // ----------------------------------------------------------------------------	3209 // ----------------------------------------------------------------------------

	3210 // Regular expressions.

	3211

	3212 RegExpParser::RegExpParser(unibrow::CharacterStream* in, Handle<String>* error)

	3213 : current_('\0'),

	3214 next_('\0'),

	3215 has_more_(true),

	3216 has_next_(true),

	3217 captures_seen_(0),

	3218 in_(in),

	3219 error_(error) {

	3220 Advance(2);

	3221 }

	3222

	3223 void RegExpParser::Advance() {

	3224 current_ = next_;

	3225 has_more_ = has_next_;

	3226 if (in()->has_more()) {

	3227 next_ = in()->GetNext();

	3228 } else {

	3229 next_ = '\0';

	3230 has_next_ = false;

	3231 }

	3232 }

	3233

	3234 void RegExpParser::Advance(int dist) {

	3235 for (int i = 0; i < dist; i++)

	3236 Advance();

	3237 }

	3238

	3239 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) {

	3240 *ok = false;

	3241 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED);

	3242 return NULL;

	3243 }

	3244

	3245 // Pattern ::

	3246 // Disjunction

	3247 RegExpTree* RegExpParser::ParsePattern(bool* ok) {

	3248 return ParseDisjunction(ok);

	3249 }

	3250

	3251 // Disjunction ::

	3252 // Alternative

	3253 // Alternative \| Disjunction

	3254 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {

	3255 RegExpTree* first = ParseAlternative(CHECK_OK);

	3256 if (current() == '\|') {

	3257 ZoneList<RegExpTree> nodes = new ZoneList<RegExpTree*>(2);

	3258 nodes->Add(first);

	3259 while (current() == '\|') {

	3260 Advance();

	3261 RegExpTree* next = ParseAlternative(CHECK_OK);

	3262 nodes->Add(next);

	3263 }

	3264 return new RegExpDisjunction(nodes);

	3265 } else {

	3266 return first;

	3267 }

	3268 }

	3269

	3270 static bool IsAlternativeTerminator(uc32 c) {

	3271 return c == '\|' \|\| c == ')' \|\| c == '\0';
	Erik Corry 2008/10/27 14:58:44 This probably won't work in the long run, can't JS This probably won't work in the long run, can't JS strings contain nulls? Christian Plesner Hansen 2008/10/27 18:57:02 Hmm. I've replaced it with a special kEndMarker w Hmm. I've replaced it with a special kEndMarker which is just a different name for kBadChar.
	3272 }

	3273

	3274 // Alternative ::

	3275 // [empty]

	3276 // Alternative Term

	3277 RegExpTree* RegExpParser::ParseAlternative(bool* ok) {

	3278 if (!IsAlternativeTerminator(current())) {

	3279 RegExpTree* first = ParseTerm(CHECK_OK);

	3280 if (!IsAlternativeTerminator(current())) {

	3281 ZoneList<RegExpTree> nodes = new ZoneList<RegExpTree*>(2);

	3282 nodes->Add(first);

	3283 while (!IsAlternativeTerminator(current())) {

	3284 RegExpTree* next = ParseTerm(CHECK_OK);

	3285 nodes->Add(next);

	3286 }

	3287 return new RegExpAlternative(nodes);

	3288 } else {

	3289 return first;

	3290 }

	3291 } else {

	3292 return RegExpEmpty::GetInstance();

	3293 }

	3294 }

	3295

	3296

	3297 class SourceCharacter {

	3298 public:

	3299 static bool Is(uc32 c) {

	3300 switch (c) {

	3301 // case ']': case '}':

	3302 // In spidermonkey and jsc these are treated as source characters

	3303 // so we do too.

	3304 case '^': case '$': case '\\': case '.': case '*': case '+':

	3305 case '?': case '(': case ')': case '[': case '{': case '\|':

	3306 return false;

	3307 default:

	3308 return true;

	3309 }

	3310 }

	3311 };

	3312

	3313

	3314 static unibrow::Predicate<SourceCharacter> source_character;

	3315

	3316

	3317 static inline bool IsSourceCharacter(uc32 c) {

	3318 return source_character.get(c);

	3319 }

	3320

	3321

	3322 static bool IsSpecialEscape(uc32 c) {

	3323 switch (c) {

	3324 case 'b': case 'B': case 'd': case 'D': case 's': case 'S':

	3325 case 'w': case 'W':

	3326 return true;

	3327 default:

	3328 return false;

	3329 }

	3330 }

	3331

	3332

	3333 // Term ::

	3334 // Assertion

	3335 // Atom

	3336 // Atom Quantifier

	3337 RegExpTree* RegExpParser::ParseTerm(bool* ok) {

	3338 RegExpTree* atom = NULL;

	3339 switch (current()) {

	3340 // Assertion ::

	3341 // ^

	3342 // $

	3343 // \ b

	3344 // \ B

	3345 case '^':

	3346 Advance();

	3347 return new RegExpAssertion(RegExpAssertion::START);

	3348 case '$':

	3349 Advance();

	3350 return new RegExpAssertion(RegExpAssertion::END);

	3351 case '.':

	3352 Advance();

	3353 atom = new RegExpCharacterClass(CharacterRange::Special('.'));

	3354 break;

	3355 case '(':

	3356 atom = ParseGroup(CHECK_OK);

	3357 break;

	3358 case '[':

	3359 atom = ParseCharacterClass(CHECK_OK);

	3360 break;

	3361 // Atom ::

	3362 // \ AtomEscape

	3363 case '\\':

	3364 if (has_next()) {

	3365 switch (next()) {

	3366 case 'b':

	3367 Advance(2);

	3368 return new RegExpAssertion(RegExpAssertion::BOUNDARY);

	3369 case 'B':

	3370 Advance(2);

	3371 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY);

	3372 // AtomEscape ::

	3373 // CharacterClassEscape

	3374 //

	3375 // CharacterClassEscape :: one of

	3376 // d D s S w W

	3377 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': {

	3378 uc32 c = next();

	3379 Advance(2);

	3380 atom = new RegExpCharacterClass(CharacterRange::Special(c));

	3381 goto has_read_atom;

	3382 }

	3383 // TODO: backreferences

	3384 default:

	3385 break;

	3386 }

	3387 }

	3388 // All other escapes fall through to the default case since

	3389 // they correspond to single characters that can be

	3390 // represented within atoms.

	3391 default: {

	3392 atom = ParseAtom(CHECK_OK);

	3393 break;

	3394 }

	3395 }

	3396 has_read_atom:

	3397 int min;

	3398 int max;

	3399 switch (current()) {

	3400 // QuantifierPrefix ::

	3401 // *

	3402 // +

	3403 // ?

	3404 case '*':

	3405 min = 0;

	3406 max = RegExpQuantifier::kInfinity;

	3407 Advance();

	3408 break;

	3409 case '+':

	3410 min = 1;

	3411 max = RegExpQuantifier::kInfinity;

	3412 Advance();

	3413 break;

	3414 case '?':

	3415 min = 0;

	3416 max = 1;

	3417 Advance();

	3418 break;

	3419 case '{':

	3420 ParseIntervalQuantifier(&min, &max, CHECK_OK);

	3421 break;

	3422 default:

	3423 return atom;

	3424 }

	3425 bool is_greedy = true;

	3426 if (current() == '?') {

	3427 is_greedy = false;

	3428 Advance();

	3429 }

	3430 return new RegExpQuantifier(min, max, is_greedy, atom);

	3431 }

	3432

	3433

	3434 // QuantifierPrefix ::

	3435 // { DecimalDigits }

	3436 // { DecimalDigits , }

	3437 // { DecimalDigits , DecimalDigits }

	3438 void* RegExpParser::ParseIntervalQuantifier(int* min_out,

	3439 int* max_out,

	3440 bool* ok) {

	3441 ASSERT_EQ(current(), '{');

	3442 static const char* kInvalidQuantifier = "Invalid quantifier";

	3443 Advance();

	3444 int min = 0;

	3445 if (!IsDecimalDigit(current())) {

	3446 // JSC allows {} and {,} as quantifiers (and { and } and all

	3447 // sorts of crazy stuff) but my puny human brain has been unable

	3448 // to figure out what they mean exactly, if anything. For now

	3449 // we follow the spec and report a syntax error.

	3450 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);

	3451 }

	3452 while (IsDecimalDigit(current())) {

	3453 min = 10 * min + (current() - '0');

	3454 Advance();

	3455 }

	3456 int max = 0;

	3457 if (current() == '}') {

	3458 max = min;

	3459 Advance();

	3460 } else if (current() == ',') {

	3461 Advance();

	3462 if (current() == '}') {

	3463 Advance();

	3464 max = RegExpQuantifier::kInfinity;
	Lasse Reichstein 2008/10/27 13:12:58 All the other cases has the Advance() call after t All the other cases has the Advance() call after the assignment. Swapping the two lines might cause slightly less confusion.
	3465 } else {

	3466 while (IsDecimalDigit(current())) {

	3467 max = 10 * max + (current() - '0');

	3468 Advance();

	3469 }

	3470 if (current() != '}') {

	3471 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);

	3472 }

	3473 Advance();

	3474 }

	3475 } else {

	3476 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK);

	3477 }

	3478 *min_out = min;

	3479 *max_out = max;

	3480 return NULL;

	3481 }

	3482

	3483

	3484 RegExpTree* RegExpParser::ParseAtom(bool* ok) {

	3485 ASSERT(current() == '\\' \|\| IsSourceCharacter(current()));

	3486 ZoneList<uc16>* buf = new ZoneList<uc16>(4);

	3487 while (true) {

	3488 if (IsSourceCharacter(current())) {

	3489 buf->Add(current());

	3490 Advance();

	3491 } else if (current() == '\\') {

	3492 if (!has_next()) {

	3493 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK);

	3494 } else if (IsSpecialEscape(next())) {

	3495 // If the next thing we see is a special escape we stop

	3496 // reading this atom.

	3497 break;

	3498 } else {

	3499 uc32 escape = ParseCharacterEscape(CHECK_OK);

	3500 buf->Add(escape);

	3501 }

	3502 } else {

	3503 break;

	3504 }

	3505 }

	3506 return new RegExpAtom(buf->ToConstVector());

	3507 }

	3508

	3509

	3510 uc32 RegExpParser::ParseControlEscape(bool* ok) {

	3511 ASSERT(current() == 'c');

	3512 Advance();

	3513 if (!has_more()) {

	3514 ReportError(CStrVector("\\c at end of pattern"), ok);

	3515 return '\0';

	3516 } else {

	3517 uc32 letter = current();

	3518 if (!('a' <= letter && letter <= 'z') &&

	3519 !('A' <= letter && letter <= 'Z')) {

	3520 ReportError(CStrVector("Illegal control letter"), ok);

	3521 return '\0';

	3522 }

	3523 Advance();

	3524 return letter & ((1 << 5) - 1);

	3525 }

	3526 }

	3527

	3528

	3529 uc32 RegExpParser::ParseOctalLiteral(bool* ok) {

	3530 ASSERT('0' <= current() && current() <= '7');

	3531 // Here we're really supposed to break out after the first digit

	3532 // if it is '0' but the other implementations don't do that so

	3533 // neither do we. Is this deviation from the spec error prone?

	3534 // Yes, it's probably as error prone as it's possible to get. Isn't

	3535 // JavaScript wonderful?

	3536 uc32 value = 0;

	3537 while ('0' <= current() && current() <= '7') {

	3538 int next = (8 * value) + (current() - '0');

	3539 if (next >= 256) {

	3540 break;

	3541 } else {

	3542 value = next;

	3543 Advance();

	3544 }

	3545 }

	3546 return value;

	3547 }

	3548

	3549

	3550 uc32 RegExpParser::ParseHexEscape(int length) {

	3551 uc32 value = 0;

	3552 for (int i = 0; i < length; i++) {

	3553 int d = HexValue(current());

	3554 if (d < 0)

	3555 return value;

	3556 value = value * 16 + d;

	3557 Advance();

	3558 }

	3559

	3560 return value;

	3561 }

	3562

	3563

	3564 uc32 RegExpParser::ParseCharacterEscape(bool* ok) {

	3565 ASSERT(current() == '\\');

	3566 ASSERT(has_next() && !IsSpecialEscape(next()));

	3567 Advance();

	3568 ASSERT(current() != 'b' && current() != 'B');

	3569 switch (current()) {

	3570 // ControlEscape :: one of

	3571 // f n r t v

	3572 case 'f':

	3573 Advance();

	3574 return '\f';

	3575 case 'n':

	3576 Advance();

	3577 return '\n';

	3578 case 'r':

	3579 Advance();

	3580 return '\r';

	3581 case 't':

	3582 Advance();

	3583 return '\t';

	3584 case 'v':

	3585 Advance();

	3586 return '\v';

	3587 case 'c':

	3588 return ParseControlEscape(ok);

	3589 case '0': case '1': case '2': case '3': case '4': case '5':

	3590 case '6': case '7':

	3591 // We're really supposed to read this as a decimal integer

	3592 // literal which is base 10 but for whatever reason the other

	3593 // implementations read base 8. It's hard to believe that the

	3594 // spec was written by some ofthe same people that wrote the

	3595 // other implementations...
	Lasse Reichstein 2008/10/27 13:12:58 So bitter! ;P Spec says to read as decimal literal So bitter! ;P Spec says to read as decimal literal and treat as back-reference (if valid as such). It doesn't say to treat it as a character escape at all. What appears to happen, in at least one browser, is to parse the number as decimal if it could be a back-reference, and as an octal character escape if it can't be a back-reference but is an octal number (using as many octal digits as is available), and as a literal backslash if the first digit is 8 or 9. Seems like a deliberate strategy to never give an error! Other browsers have different strategies. Maybe we shouldn't try to be bug-compatible in this particular case. Christian Plesner Hansen 2008/10/27 18:57:02 I think we're free to do whatever we want with \8 I think we're free to do whatever we want with \8 and \9 and I suggest we treat them as '8' and '9'. Otherwise we're probably stuck with the octal/decimal-back-reference confusion. By the way, you forgot to mention leading zeros. The spec explicitly disallows them but lo and behold: js> /\000011/.test("\x09") true
	3596 return ParseOctalLiteral(ok);

	3597 case 'x':

	3598 Advance();

	3599 return ParseHexEscape(2);

	3600 case 'A': case 'Z': {

	3601 uc32 result = current();

	3602 Advance();

	3603 return result;

	3604 }

	3605 default: {

	3606 ASSERT(!Scanner::kIsIdentifierPart.get(current()));

	3607 uc32 result = current();

	3608 Advance();

	3609 return result;

	3610 }

	3611 }

	3612 return 0;

	3613 }

	3614

	3615

	3616 RegExpTree* RegExpParser::ParseGroup(bool* ok) {

	3617 ASSERT_EQ(current(), '(');

	3618 char type = '(';

	3619 Advance();

	3620 if (current() == '?') {

	3621 switch (next()) {

	3622 case ':': case '=': case '!':

	3623 type = next();

	3624 Advance(2);

	3625 break;

	3626 default:

	3627 ReportError(CStrVector("Invalid group"), CHECK_OK);

	3628 break;

	3629 }

	3630 }

	3631 RegExpTree* body = ParseDisjunction(CHECK_OK);

	3632 if (current() != ')') {

	3633 ReportError(CStrVector("Unterminated group"), CHECK_OK);

	3634 }

	3635 Advance();

	3636 if (type == '(') {

	3637 captures_seen_++;

	3638 return new RegExpCapture(body);

	3639 } else if (type == ':') {

	3640 return body;

	3641 } else {

	3642 ASSERT(type == '=' \|\| type == '!');

	3643 bool is_positive = (type == '=');

	3644 return new RegExpLookahead(body, is_positive);

	3645 }

	3646 }

	3647

	3648

	3649 CharacterRange RegExpParser::ParseClassAtom(bool* ok) {

	3650 uc32 first = current();

	3651 if (first == '\\') {

	3652 switch (next()) {

	3653 case 'b':

	3654 Advance(2);

	3655 return CharacterRange::Singleton('\b');

	3656 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {

	3657 uc32 c = next();

	3658 Advance(2);

	3659 return CharacterRange::Special(c);

	3660 }

	3661 default:

	3662 uc32 c = ParseCharacterEscape(CHECK_OK);
	Lasse Reichstein 2008/10/27 13:12:58 If we add parsing of back-references, ParseCharact If we add parsing of back-references, ParseCharacterEscape will not be the same outside and inside a character class definition. Again, browsers don't agree what to do anyway. Christian Plesner Hansen 2008/10/27 18:57:02 Right, I expect what we'll do is check for backref Right, I expect what we'll do is check for backreferences first in the atom parser and then if we can verify that it isn't one fall through to here.
	3663 return CharacterRange::Singleton(c);

	3664 }

	3665 } else {

	3666 Advance();

	3667 return CharacterRange::Singleton(first);

	3668 }

	3669 }

	3670

	3671

	3672 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {

	3673 static const char* kUnterminated = "Unterminated character class";

	3674 static const char* kIllegal = "Illegal character class";

	3675

	3676 ASSERT_EQ(current(), '[');

	3677 Advance();

	3678 bool is_negated = false;

	3679 if (current() == '^') {

	3680 is_negated = true;

	3681 Advance();

	3682 }

	3683 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);

	3684 while (has_more() && current() != ']') {

	3685 if (current() == '-') {

	3686 Advance();

	3687 ranges->Add(CharacterRange::Singleton('-'));

	3688 } else {

	3689 CharacterRange first = ParseClassAtom(CHECK_OK);

	3690 if (!first.is_special() && current() == '-') {

	3691 Advance();

	3692 CharacterRange next = ParseClassAtom(CHECK_OK);

	3693 if (next.is_special()) {

	3694 return ReportError(CStrVector(kIllegal), CHECK_OK);

	3695 }

	3696 ranges->Add(CharacterRange::Range(first.from(), next.to()));

	3697 } else {

	3698 ranges->Add(first);

	3699 }

	3700 }

	3701 }

	3702 if (!has_more()) {

	3703 return ReportError(CStrVector(kUnterminated), CHECK_OK);

	3704 }

	3705 Advance();

	3706 if (ranges->length() == 0) {

	3707 return RegExpEmpty::GetInstance();

	3708 } else {

	3709 return new RegExpCharacterClass(ranges, is_negated);

	3710 }

	3711 }

	3712

	3713

	3714 // ----------------------------------------------------------------------------

3165 // The Parser interface.	3715 // The Parser interface.

3166	3716

3167 // MakeAST() is just a wrapper for the corresponding Parser calls	3717 // MakeAST() is just a wrapper for the corresponding Parser calls

3168 // so we don't have to expose the entire Parser class in the .h file.	3718 // so we don't have to expose the entire Parser class in the .h file.

3169	3719

3170 static bool always_allow_natives_syntax = false;	3720 static bool always_allow_natives_syntax = false;

3171	3721

3172	3722

3173 ParserMessage::~ParserMessage() {	3723 ParserMessage::~ParserMessage() {

3174 for (int i = 0; i < args().length(); i++)	3724 for (int i = 0; i < args().length(); i++)

(...skipping 27 matching lines...) Expand all Loading...
3202 PreParser parser(no_script, allow_natives_syntax, extension);	3752 PreParser parser(no_script, allow_natives_syntax, extension);

3203 if (!parser.PreParseProgram(stream)) return NULL;	3753 if (!parser.PreParseProgram(stream)) return NULL;

3204 // The list owns the backing store so we need to clone the vector.	3754 // The list owns the backing store so we need to clone the vector.

3205 // That way, the result will be exactly the right size rather than	3755 // That way, the result will be exactly the right size rather than

3206 // the expected 50% too large.	3756 // the expected 50% too large.

3207 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();	3757 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone();

3208 return new ScriptDataImpl(store);	3758 return new ScriptDataImpl(store);

3209 }	3759 }

3210	3760

3211	3761

	3762 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream,

	3763 Handle<String>* error) {

	3764 ASSERT(error->is_null());

	3765 RegExpParser parser(stream, error);

	3766 bool ok = true;

	3767 RegExpTree* result = parser.ParsePattern(&ok);

	3768 if (!ok) {

	3769 ASSERT(result == NULL);

	3770 ASSERT(!error->is_null());

	3771 } else {

	3772 ASSERT(result != NULL);

	3773 ASSERT(error->is_null());

	3774 }

	3775 return result;

	3776 }

	3777

	3778

3212 FunctionLiteral* MakeAST(bool compile_in_global_context,	3779 FunctionLiteral* MakeAST(bool compile_in_global_context,

3213 Handle<Script> script,	3780 Handle<Script> script,

3214 v8::Extension* extension,	3781 v8::Extension* extension,

3215 ScriptDataImpl* pre_data) {	3782 ScriptDataImpl* pre_data) {

3216 bool allow_natives_syntax =	3783 bool allow_natives_syntax =

3217 always_allow_natives_syntax \|\|	3784 always_allow_natives_syntax \|\|

3218 FLAG_allow_natives_syntax \|\|	3785 FLAG_allow_natives_syntax \|\|

3219 Bootstrapper::IsActive();	3786 Bootstrapper::IsActive();

3220 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data);	3787 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data);

3221 if (pre_data != NULL && pre_data->has_error()) {	3788 if (pre_data != NULL && pre_data->has_error()) {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3254 start_position,	3821 start_position,

3255 is_expression);	3822 is_expression);

3256 return result;	3823 return result;

3257 }	3824 }

3258	3825

3259	3826

3260 #undef NEW	3827 #undef NEW

3261	3828

3262	3829

3263 } } // namespace v8::internal	3830 } } // namespace v8::internal

OLD	NEW

« src/jsregexp.cc ('K') | « src/parser.h ('k') | src/string-stream.h » ('j') | no next file with comments »