Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 27 | 27 |
| 28 #include "v8.h" | 28 #include "v8.h" |
| 29 | 29 |
| 30 #include "api.h" | 30 #include "api.h" |
| 31 #include "ast.h" | 31 #include "ast.h" |
| 32 #include "bootstrapper.h" | 32 #include "bootstrapper.h" |
| 33 #include "platform.h" | 33 #include "platform.h" |
| 34 #include "runtime.h" | 34 #include "runtime.h" |
| 35 #include "parser.h" | 35 #include "parser.h" |
| 36 #include "scopes.h" | 36 #include "scopes.h" |
| 37 #include "string-stream.h" | |
| 37 | 38 |
| 38 namespace v8 { namespace internal { | 39 namespace v8 { namespace internal { |
| 39 | 40 |
| 40 class ParserFactory; | 41 class ParserFactory; |
| 41 class ParserLog; | 42 class ParserLog; |
| 42 class TemporaryScope; | 43 class TemporaryScope; |
| 43 template <typename T> class ZoneListWrapper; | 44 template <typename T> class ZoneListWrapper; |
| 44 | 45 |
| 45 | 46 |
| 46 class Parser { | 47 class Parser { |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 220 Handle<String> type, | 221 Handle<String> type, |
| 221 Vector< Handle<Object> > arguments); | 222 Vector< Handle<Object> > arguments); |
| 222 | 223 |
| 223 friend class Target; | 224 friend class Target; |
| 224 friend class TargetScope; | 225 friend class TargetScope; |
| 225 friend class LexicalScope; | 226 friend class LexicalScope; |
| 226 friend class TemporaryScope; | 227 friend class TemporaryScope; |
| 227 }; | 228 }; |
| 228 | 229 |
| 229 | 230 |
| 231 class RegExpParser { | |
| 232 public: | |
| 233 RegExpParser(unibrow::CharacterStream* in, Handle<String>* error); | |
| 234 RegExpTree* ParsePattern(bool* ok); | |
| 235 RegExpTree* ParseDisjunction(bool* ok); | |
| 236 RegExpTree* ParseAlternative(bool* ok); | |
| 237 RegExpTree* ParseTerm(bool* ok); | |
| 238 RegExpTree* ParseAtom(bool* ok); | |
| 239 RegExpTree* ParseGroup(bool* ok); | |
| 240 RegExpTree* ParseCharacterClass(bool* ok); | |
| 241 | |
| 242 // Parses a {...,...} quantifier and stores the range in the given | |
| 243 // out parameters. | |
| 244 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); | |
| 245 | |
| 246 // Parses and returns a single escaped character. The character | |
| 247 // must not be 'b' or 'B' since they are usually handle specially. | |
| 248 uc32 ParseCharacterEscape(bool* ok); | |
| 249 | |
| 250 uc32 ParseHexEscape(int length); | |
| 251 | |
| 252 uc32 ParseControlEscape(bool* ok); | |
| 253 uc32 ParseOctalLiteral(bool* ok); | |
| 254 | |
| 255 CharacterRange ParseClassAtom(bool* ok); | |
| 256 RegExpTree* ReportError(Vector<const char> message, bool* ok); | |
| 257 void Advance(); | |
| 258 void Advance(int dist); | |
| 259 private: | |
| 260 uc32 current() { return current_; } | |
| 261 uc32 next() { return next_; } | |
| 262 bool has_more() { return has_more_; } | |
| 263 bool has_next() { return has_next_; } | |
| 264 unibrow::CharacterStream* in() { return in_; } | |
| 265 uc32 current_; | |
| 266 uc32 next_; | |
| 267 bool has_more_; | |
| 268 bool has_next_; | |
| 269 int captures_seen_; | |
| 270 unibrow::CharacterStream* in_; | |
| 271 Handle<String>* error_; | |
| 272 }; | |
| 273 | |
| 274 | |
| 230 // A temporary scope stores information during parsing, just like | 275 // A temporary scope stores information during parsing, just like |
| 231 // a plain scope. However, temporary scopes are not kept around | 276 // a plain scope. However, temporary scopes are not kept around |
| 232 // after parsing or referenced by syntax trees so they can be stack- | 277 // after parsing or referenced by syntax trees so they can be stack- |
| 233 // allocated and hence used by the pre-parser. | 278 // allocated and hence used by the pre-parser. |
| 234 class TemporaryScope BASE_EMBEDDED { | 279 class TemporaryScope BASE_EMBEDDED { |
| 235 public: | 280 public: |
| 236 explicit TemporaryScope(Parser* parser); | 281 explicit TemporaryScope(Parser* parser); |
| 237 ~TemporaryScope(); | 282 ~TemporaryScope(); |
| 238 | 283 |
| 239 int NextMaterializedLiteralIndex() { | 284 int NextMaterializedLiteralIndex() { |
| (...skipping 2915 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3155 } | 3200 } |
| 3156 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); | 3201 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); |
| 3157 args->Add(new Literal(type)); | 3202 args->Add(new Literal(type)); |
| 3158 args->Add(new Literal(array)); | 3203 args->Add(new Literal(array)); |
| 3159 return new Throw(new CallRuntime(constructor, NULL, args), | 3204 return new Throw(new CallRuntime(constructor, NULL, args), |
| 3160 scanner().location().beg_pos); | 3205 scanner().location().beg_pos); |
| 3161 } | 3206 } |
| 3162 | 3207 |
| 3163 | 3208 |
| 3164 // ---------------------------------------------------------------------------- | 3209 // ---------------------------------------------------------------------------- |
| 3210 // Regular expressions. | |
| 3211 | |
| 3212 RegExpParser::RegExpParser(unibrow::CharacterStream* in, Handle<String>* error) | |
| 3213 : current_('\0'), | |
| 3214 next_('\0'), | |
| 3215 has_more_(true), | |
| 3216 has_next_(true), | |
| 3217 captures_seen_(0), | |
| 3218 in_(in), | |
| 3219 error_(error) { | |
| 3220 Advance(2); | |
| 3221 } | |
| 3222 | |
| 3223 void RegExpParser::Advance() { | |
| 3224 current_ = next_; | |
| 3225 has_more_ = has_next_; | |
| 3226 if (in()->has_more()) { | |
| 3227 next_ = in()->GetNext(); | |
| 3228 } else { | |
| 3229 next_ = '\0'; | |
| 3230 has_next_ = false; | |
| 3231 } | |
| 3232 } | |
| 3233 | |
| 3234 void RegExpParser::Advance(int dist) { | |
| 3235 for (int i = 0; i < dist; i++) | |
| 3236 Advance(); | |
| 3237 } | |
| 3238 | |
| 3239 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { | |
| 3240 *ok = false; | |
| 3241 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); | |
| 3242 return NULL; | |
| 3243 } | |
| 3244 | |
| 3245 // Pattern :: | |
| 3246 // Disjunction | |
| 3247 RegExpTree* RegExpParser::ParsePattern(bool* ok) { | |
| 3248 return ParseDisjunction(ok); | |
| 3249 } | |
| 3250 | |
| 3251 // Disjunction :: | |
| 3252 // Alternative | |
| 3253 // Alternative | Disjunction | |
| 3254 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { | |
| 3255 RegExpTree* first = ParseAlternative(CHECK_OK); | |
| 3256 if (current() == '|') { | |
| 3257 ZoneList<RegExpTree*>* nodes = new ZoneList<RegExpTree*>(2); | |
| 3258 nodes->Add(first); | |
| 3259 while (current() == '|') { | |
| 3260 Advance(); | |
| 3261 RegExpTree* next = ParseAlternative(CHECK_OK); | |
| 3262 nodes->Add(next); | |
| 3263 } | |
| 3264 return new RegExpDisjunction(nodes); | |
| 3265 } else { | |
| 3266 return first; | |
| 3267 } | |
| 3268 } | |
| 3269 | |
| 3270 static bool IsAlternativeTerminator(uc32 c) { | |
| 3271 return c == '|' || c == ')' || c == '\0'; | |
|
Erik Corry
2008/10/27 14:58:44
This probably won't work in the long run, can't JS
Christian Plesner Hansen
2008/10/27 18:57:02
Hmm. I've replaced it with a special kEndMarker w
| |
| 3272 } | |
| 3273 | |
| 3274 // Alternative :: | |
| 3275 // [empty] | |
| 3276 // Alternative Term | |
| 3277 RegExpTree* RegExpParser::ParseAlternative(bool* ok) { | |
| 3278 if (!IsAlternativeTerminator(current())) { | |
| 3279 RegExpTree* first = ParseTerm(CHECK_OK); | |
| 3280 if (!IsAlternativeTerminator(current())) { | |
| 3281 ZoneList<RegExpTree*>* nodes = new ZoneList<RegExpTree*>(2); | |
| 3282 nodes->Add(first); | |
| 3283 while (!IsAlternativeTerminator(current())) { | |
| 3284 RegExpTree* next = ParseTerm(CHECK_OK); | |
| 3285 nodes->Add(next); | |
| 3286 } | |
| 3287 return new RegExpAlternative(nodes); | |
| 3288 } else { | |
| 3289 return first; | |
| 3290 } | |
| 3291 } else { | |
| 3292 return RegExpEmpty::GetInstance(); | |
| 3293 } | |
| 3294 } | |
| 3295 | |
| 3296 | |
| 3297 class SourceCharacter { | |
| 3298 public: | |
| 3299 static bool Is(uc32 c) { | |
| 3300 switch (c) { | |
| 3301 // case ']': case '}': | |
| 3302 // In spidermonkey and jsc these are treated as source characters | |
| 3303 // so we do too. | |
| 3304 case '^': case '$': case '\\': case '.': case '*': case '+': | |
| 3305 case '?': case '(': case ')': case '[': case '{': case '|': | |
| 3306 return false; | |
| 3307 default: | |
| 3308 return true; | |
| 3309 } | |
| 3310 } | |
| 3311 }; | |
| 3312 | |
| 3313 | |
| 3314 static unibrow::Predicate<SourceCharacter> source_character; | |
| 3315 | |
| 3316 | |
| 3317 static inline bool IsSourceCharacter(uc32 c) { | |
| 3318 return source_character.get(c); | |
| 3319 } | |
| 3320 | |
| 3321 | |
| 3322 static bool IsSpecialEscape(uc32 c) { | |
| 3323 switch (c) { | |
| 3324 case 'b': case 'B': case 'd': case 'D': case 's': case 'S': | |
| 3325 case 'w': case 'W': | |
| 3326 return true; | |
| 3327 default: | |
| 3328 return false; | |
| 3329 } | |
| 3330 } | |
| 3331 | |
| 3332 | |
| 3333 // Term :: | |
| 3334 // Assertion | |
| 3335 // Atom | |
| 3336 // Atom Quantifier | |
| 3337 RegExpTree* RegExpParser::ParseTerm(bool* ok) { | |
| 3338 RegExpTree* atom = NULL; | |
| 3339 switch (current()) { | |
| 3340 // Assertion :: | |
| 3341 // ^ | |
| 3342 // $ | |
| 3343 // \ b | |
| 3344 // \ B | |
| 3345 case '^': | |
| 3346 Advance(); | |
| 3347 return new RegExpAssertion(RegExpAssertion::START); | |
| 3348 case '$': | |
| 3349 Advance(); | |
| 3350 return new RegExpAssertion(RegExpAssertion::END); | |
| 3351 case '.': | |
| 3352 Advance(); | |
| 3353 atom = new RegExpCharacterClass(CharacterRange::Special('.')); | |
| 3354 break; | |
| 3355 case '(': | |
| 3356 atom = ParseGroup(CHECK_OK); | |
| 3357 break; | |
| 3358 case '[': | |
| 3359 atom = ParseCharacterClass(CHECK_OK); | |
| 3360 break; | |
| 3361 // Atom :: | |
| 3362 // \ AtomEscape | |
| 3363 case '\\': | |
| 3364 if (has_next()) { | |
| 3365 switch (next()) { | |
| 3366 case 'b': | |
| 3367 Advance(2); | |
| 3368 return new RegExpAssertion(RegExpAssertion::BOUNDARY); | |
| 3369 case 'B': | |
| 3370 Advance(2); | |
| 3371 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY); | |
| 3372 // AtomEscape :: | |
| 3373 // CharacterClassEscape | |
| 3374 // | |
| 3375 // CharacterClassEscape :: one of | |
| 3376 // d D s S w W | |
| 3377 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | |
| 3378 uc32 c = next(); | |
| 3379 Advance(2); | |
| 3380 atom = new RegExpCharacterClass(CharacterRange::Special(c)); | |
| 3381 goto has_read_atom; | |
| 3382 } | |
| 3383 // TODO: backreferences | |
| 3384 default: | |
| 3385 break; | |
| 3386 } | |
| 3387 } | |
| 3388 // All other escapes fall through to the default case since | |
| 3389 // they correspond to single characters that can be | |
| 3390 // represented within atoms. | |
| 3391 default: { | |
| 3392 atom = ParseAtom(CHECK_OK); | |
| 3393 break; | |
| 3394 } | |
| 3395 } | |
| 3396 has_read_atom: | |
| 3397 int min; | |
| 3398 int max; | |
| 3399 switch (current()) { | |
| 3400 // QuantifierPrefix :: | |
| 3401 // * | |
| 3402 // + | |
| 3403 // ? | |
| 3404 case '*': | |
| 3405 min = 0; | |
| 3406 max = RegExpQuantifier::kInfinity; | |
| 3407 Advance(); | |
| 3408 break; | |
| 3409 case '+': | |
| 3410 min = 1; | |
| 3411 max = RegExpQuantifier::kInfinity; | |
| 3412 Advance(); | |
| 3413 break; | |
| 3414 case '?': | |
| 3415 min = 0; | |
| 3416 max = 1; | |
| 3417 Advance(); | |
| 3418 break; | |
| 3419 case '{': | |
| 3420 ParseIntervalQuantifier(&min, &max, CHECK_OK); | |
| 3421 break; | |
| 3422 default: | |
| 3423 return atom; | |
| 3424 } | |
| 3425 bool is_greedy = true; | |
| 3426 if (current() == '?') { | |
| 3427 is_greedy = false; | |
| 3428 Advance(); | |
| 3429 } | |
| 3430 return new RegExpQuantifier(min, max, is_greedy, atom); | |
| 3431 } | |
| 3432 | |
| 3433 | |
| 3434 // QuantifierPrefix :: | |
| 3435 // { DecimalDigits } | |
| 3436 // { DecimalDigits , } | |
| 3437 // { DecimalDigits , DecimalDigits } | |
| 3438 void* RegExpParser::ParseIntervalQuantifier(int* min_out, | |
| 3439 int* max_out, | |
| 3440 bool* ok) { | |
| 3441 ASSERT_EQ(current(), '{'); | |
| 3442 static const char* kInvalidQuantifier = "Invalid quantifier"; | |
| 3443 Advance(); | |
| 3444 int min = 0; | |
| 3445 if (!IsDecimalDigit(current())) { | |
| 3446 // JSC allows {} and {,} as quantifiers (and { and } and all | |
| 3447 // sorts of crazy stuff) but my puny human brain has been unable | |
| 3448 // to figure out what they mean exactly, if anything. For now | |
| 3449 // we follow the spec and report a syntax error. | |
| 3450 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | |
| 3451 } | |
| 3452 while (IsDecimalDigit(current())) { | |
| 3453 min = 10 * min + (current() - '0'); | |
| 3454 Advance(); | |
| 3455 } | |
| 3456 int max = 0; | |
| 3457 if (current() == '}') { | |
| 3458 max = min; | |
| 3459 Advance(); | |
| 3460 } else if (current() == ',') { | |
| 3461 Advance(); | |
| 3462 if (current() == '}') { | |
| 3463 Advance(); | |
| 3464 max = RegExpQuantifier::kInfinity; | |
|
Lasse Reichstein
2008/10/27 13:12:58
All the other cases has the Advance() call after t
| |
| 3465 } else { | |
| 3466 while (IsDecimalDigit(current())) { | |
| 3467 max = 10 * max + (current() - '0'); | |
| 3468 Advance(); | |
| 3469 } | |
| 3470 if (current() != '}') { | |
| 3471 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | |
| 3472 } | |
| 3473 Advance(); | |
| 3474 } | |
| 3475 } else { | |
| 3476 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | |
| 3477 } | |
| 3478 *min_out = min; | |
| 3479 *max_out = max; | |
| 3480 return NULL; | |
| 3481 } | |
| 3482 | |
| 3483 | |
| 3484 RegExpTree* RegExpParser::ParseAtom(bool* ok) { | |
| 3485 ASSERT(current() == '\\' || IsSourceCharacter(current())); | |
| 3486 ZoneList<uc16>* buf = new ZoneList<uc16>(4); | |
| 3487 while (true) { | |
| 3488 if (IsSourceCharacter(current())) { | |
| 3489 buf->Add(current()); | |
| 3490 Advance(); | |
| 3491 } else if (current() == '\\') { | |
| 3492 if (!has_next()) { | |
| 3493 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); | |
| 3494 } else if (IsSpecialEscape(next())) { | |
| 3495 // If the next thing we see is a special escape we stop | |
| 3496 // reading this atom. | |
| 3497 break; | |
| 3498 } else { | |
| 3499 uc32 escape = ParseCharacterEscape(CHECK_OK); | |
| 3500 buf->Add(escape); | |
| 3501 } | |
| 3502 } else { | |
| 3503 break; | |
| 3504 } | |
| 3505 } | |
| 3506 return new RegExpAtom(buf->ToConstVector()); | |
| 3507 } | |
| 3508 | |
| 3509 | |
| 3510 uc32 RegExpParser::ParseControlEscape(bool* ok) { | |
| 3511 ASSERT(current() == 'c'); | |
| 3512 Advance(); | |
| 3513 if (!has_more()) { | |
| 3514 ReportError(CStrVector("\\c at end of pattern"), ok); | |
| 3515 return '\0'; | |
| 3516 } else { | |
| 3517 uc32 letter = current(); | |
| 3518 if (!('a' <= letter && letter <= 'z') && | |
| 3519 !('A' <= letter && letter <= 'Z')) { | |
| 3520 ReportError(CStrVector("Illegal control letter"), ok); | |
| 3521 return '\0'; | |
| 3522 } | |
| 3523 Advance(); | |
| 3524 return letter & ((1 << 5) - 1); | |
| 3525 } | |
| 3526 } | |
| 3527 | |
| 3528 | |
| 3529 uc32 RegExpParser::ParseOctalLiteral(bool* ok) { | |
| 3530 ASSERT('0' <= current() && current() <= '7'); | |
| 3531 // Here we're really supposed to break out after the first digit | |
| 3532 // if it is '0' but the other implementations don't do that so | |
| 3533 // neither do we. Is this deviation from the spec error prone? | |
| 3534 // Yes, it's probably as error prone as it's possible to get. Isn't | |
| 3535 // JavaScript wonderful? | |
| 3536 uc32 value = 0; | |
| 3537 while ('0' <= current() && current() <= '7') { | |
| 3538 int next = (8 * value) + (current() - '0'); | |
| 3539 if (next >= 256) { | |
| 3540 break; | |
| 3541 } else { | |
| 3542 value = next; | |
| 3543 Advance(); | |
| 3544 } | |
| 3545 } | |
| 3546 return value; | |
| 3547 } | |
| 3548 | |
| 3549 | |
| 3550 uc32 RegExpParser::ParseHexEscape(int length) { | |
| 3551 uc32 value = 0; | |
| 3552 for (int i = 0; i < length; i++) { | |
| 3553 int d = HexValue(current()); | |
| 3554 if (d < 0) | |
| 3555 return value; | |
| 3556 value = value * 16 + d; | |
| 3557 Advance(); | |
| 3558 } | |
| 3559 | |
| 3560 return value; | |
| 3561 } | |
| 3562 | |
| 3563 | |
| 3564 uc32 RegExpParser::ParseCharacterEscape(bool* ok) { | |
| 3565 ASSERT(current() == '\\'); | |
| 3566 ASSERT(has_next() && !IsSpecialEscape(next())); | |
| 3567 Advance(); | |
| 3568 ASSERT(current() != 'b' && current() != 'B'); | |
| 3569 switch (current()) { | |
| 3570 // ControlEscape :: one of | |
| 3571 // f n r t v | |
| 3572 case 'f': | |
| 3573 Advance(); | |
| 3574 return '\f'; | |
| 3575 case 'n': | |
| 3576 Advance(); | |
| 3577 return '\n'; | |
| 3578 case 'r': | |
| 3579 Advance(); | |
| 3580 return '\r'; | |
| 3581 case 't': | |
| 3582 Advance(); | |
| 3583 return '\t'; | |
| 3584 case 'v': | |
| 3585 Advance(); | |
| 3586 return '\v'; | |
| 3587 case 'c': | |
| 3588 return ParseControlEscape(ok); | |
| 3589 case '0': case '1': case '2': case '3': case '4': case '5': | |
| 3590 case '6': case '7': | |
| 3591 // We're really supposed to read this as a decimal integer | |
| 3592 // literal which is base 10 but for whatever reason the other | |
| 3593 // implementations read base 8. It's hard to believe that the | |
| 3594 // spec was written by some ofthe same people that wrote the | |
| 3595 // other implementations... | |
|
Lasse Reichstein
2008/10/27 13:12:58
So bitter! ;P
Spec says to read as decimal literal
Christian Plesner Hansen
2008/10/27 18:57:02
I think we're free to do whatever we want with \8
| |
| 3596 return ParseOctalLiteral(ok); | |
| 3597 case 'x': | |
| 3598 Advance(); | |
| 3599 return ParseHexEscape(2); | |
| 3600 case 'A': case 'Z': { | |
| 3601 uc32 result = current(); | |
| 3602 Advance(); | |
| 3603 return result; | |
| 3604 } | |
| 3605 default: { | |
| 3606 ASSERT(!Scanner::kIsIdentifierPart.get(current())); | |
| 3607 uc32 result = current(); | |
| 3608 Advance(); | |
| 3609 return result; | |
| 3610 } | |
| 3611 } | |
| 3612 return 0; | |
| 3613 } | |
| 3614 | |
| 3615 | |
| 3616 RegExpTree* RegExpParser::ParseGroup(bool* ok) { | |
| 3617 ASSERT_EQ(current(), '('); | |
| 3618 char type = '('; | |
| 3619 Advance(); | |
| 3620 if (current() == '?') { | |
| 3621 switch (next()) { | |
| 3622 case ':': case '=': case '!': | |
| 3623 type = next(); | |
| 3624 Advance(2); | |
| 3625 break; | |
| 3626 default: | |
| 3627 ReportError(CStrVector("Invalid group"), CHECK_OK); | |
| 3628 break; | |
| 3629 } | |
| 3630 } | |
| 3631 RegExpTree* body = ParseDisjunction(CHECK_OK); | |
| 3632 if (current() != ')') { | |
| 3633 ReportError(CStrVector("Unterminated group"), CHECK_OK); | |
| 3634 } | |
| 3635 Advance(); | |
| 3636 if (type == '(') { | |
| 3637 captures_seen_++; | |
| 3638 return new RegExpCapture(body); | |
| 3639 } else if (type == ':') { | |
| 3640 return body; | |
| 3641 } else { | |
| 3642 ASSERT(type == '=' || type == '!'); | |
| 3643 bool is_positive = (type == '='); | |
| 3644 return new RegExpLookahead(body, is_positive); | |
| 3645 } | |
| 3646 } | |
| 3647 | |
| 3648 | |
| 3649 CharacterRange RegExpParser::ParseClassAtom(bool* ok) { | |
| 3650 uc32 first = current(); | |
| 3651 if (first == '\\') { | |
| 3652 switch (next()) { | |
| 3653 case 'b': | |
| 3654 Advance(2); | |
| 3655 return CharacterRange::Singleton('\b'); | |
| 3656 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | |
| 3657 uc32 c = next(); | |
| 3658 Advance(2); | |
| 3659 return CharacterRange::Special(c); | |
| 3660 } | |
| 3661 default: | |
| 3662 uc32 c = ParseCharacterEscape(CHECK_OK); | |
|
Lasse Reichstein
2008/10/27 13:12:58
If we add parsing of back-references, ParseCharact
Christian Plesner Hansen
2008/10/27 18:57:02
Right, I expect what we'll do is check for backref
| |
| 3663 return CharacterRange::Singleton(c); | |
| 3664 } | |
| 3665 } else { | |
| 3666 Advance(); | |
| 3667 return CharacterRange::Singleton(first); | |
| 3668 } | |
| 3669 } | |
| 3670 | |
| 3671 | |
| 3672 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { | |
| 3673 static const char* kUnterminated = "Unterminated character class"; | |
| 3674 static const char* kIllegal = "Illegal character class"; | |
| 3675 | |
| 3676 ASSERT_EQ(current(), '['); | |
| 3677 Advance(); | |
| 3678 bool is_negated = false; | |
| 3679 if (current() == '^') { | |
| 3680 is_negated = true; | |
| 3681 Advance(); | |
| 3682 } | |
| 3683 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 3684 while (has_more() && current() != ']') { | |
| 3685 if (current() == '-') { | |
| 3686 Advance(); | |
| 3687 ranges->Add(CharacterRange::Singleton('-')); | |
| 3688 } else { | |
| 3689 CharacterRange first = ParseClassAtom(CHECK_OK); | |
| 3690 if (!first.is_special() && current() == '-') { | |
| 3691 Advance(); | |
| 3692 CharacterRange next = ParseClassAtom(CHECK_OK); | |
| 3693 if (next.is_special()) { | |
| 3694 return ReportError(CStrVector(kIllegal), CHECK_OK); | |
| 3695 } | |
| 3696 ranges->Add(CharacterRange::Range(first.from(), next.to())); | |
| 3697 } else { | |
| 3698 ranges->Add(first); | |
| 3699 } | |
| 3700 } | |
| 3701 } | |
| 3702 if (!has_more()) { | |
| 3703 return ReportError(CStrVector(kUnterminated), CHECK_OK); | |
| 3704 } | |
| 3705 Advance(); | |
| 3706 if (ranges->length() == 0) { | |
| 3707 return RegExpEmpty::GetInstance(); | |
| 3708 } else { | |
| 3709 return new RegExpCharacterClass(ranges, is_negated); | |
| 3710 } | |
| 3711 } | |
| 3712 | |
| 3713 | |
| 3714 // ---------------------------------------------------------------------------- | |
| 3165 // The Parser interface. | 3715 // The Parser interface. |
| 3166 | 3716 |
| 3167 // MakeAST() is just a wrapper for the corresponding Parser calls | 3717 // MakeAST() is just a wrapper for the corresponding Parser calls |
| 3168 // so we don't have to expose the entire Parser class in the .h file. | 3718 // so we don't have to expose the entire Parser class in the .h file. |
| 3169 | 3719 |
| 3170 static bool always_allow_natives_syntax = false; | 3720 static bool always_allow_natives_syntax = false; |
| 3171 | 3721 |
| 3172 | 3722 |
| 3173 ParserMessage::~ParserMessage() { | 3723 ParserMessage::~ParserMessage() { |
| 3174 for (int i = 0; i < args().length(); i++) | 3724 for (int i = 0; i < args().length(); i++) |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 3202 PreParser parser(no_script, allow_natives_syntax, extension); | 3752 PreParser parser(no_script, allow_natives_syntax, extension); |
| 3203 if (!parser.PreParseProgram(stream)) return NULL; | 3753 if (!parser.PreParseProgram(stream)) return NULL; |
| 3204 // The list owns the backing store so we need to clone the vector. | 3754 // The list owns the backing store so we need to clone the vector. |
| 3205 // That way, the result will be exactly the right size rather than | 3755 // That way, the result will be exactly the right size rather than |
| 3206 // the expected 50% too large. | 3756 // the expected 50% too large. |
| 3207 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 3757 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 3208 return new ScriptDataImpl(store); | 3758 return new ScriptDataImpl(store); |
| 3209 } | 3759 } |
| 3210 | 3760 |
| 3211 | 3761 |
| 3762 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream, | |
| 3763 Handle<String>* error) { | |
| 3764 ASSERT(error->is_null()); | |
| 3765 RegExpParser parser(stream, error); | |
| 3766 bool ok = true; | |
| 3767 RegExpTree* result = parser.ParsePattern(&ok); | |
| 3768 if (!ok) { | |
| 3769 ASSERT(result == NULL); | |
| 3770 ASSERT(!error->is_null()); | |
| 3771 } else { | |
| 3772 ASSERT(result != NULL); | |
| 3773 ASSERT(error->is_null()); | |
| 3774 } | |
| 3775 return result; | |
| 3776 } | |
| 3777 | |
| 3778 | |
| 3212 FunctionLiteral* MakeAST(bool compile_in_global_context, | 3779 FunctionLiteral* MakeAST(bool compile_in_global_context, |
| 3213 Handle<Script> script, | 3780 Handle<Script> script, |
| 3214 v8::Extension* extension, | 3781 v8::Extension* extension, |
| 3215 ScriptDataImpl* pre_data) { | 3782 ScriptDataImpl* pre_data) { |
| 3216 bool allow_natives_syntax = | 3783 bool allow_natives_syntax = |
| 3217 always_allow_natives_syntax || | 3784 always_allow_natives_syntax || |
| 3218 FLAG_allow_natives_syntax || | 3785 FLAG_allow_natives_syntax || |
| 3219 Bootstrapper::IsActive(); | 3786 Bootstrapper::IsActive(); |
| 3220 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); | 3787 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); |
| 3221 if (pre_data != NULL && pre_data->has_error()) { | 3788 if (pre_data != NULL && pre_data->has_error()) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3254 start_position, | 3821 start_position, |
| 3255 is_expression); | 3822 is_expression); |
| 3256 return result; | 3823 return result; |
| 3257 } | 3824 } |
| 3258 | 3825 |
| 3259 | 3826 |
| 3260 #undef NEW | 3827 #undef NEW |
| 3261 | 3828 |
| 3262 | 3829 |
| 3263 } } // namespace v8::internal | 3830 } } // namespace v8::internal |
| OLD | NEW |