| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 223 | 223 |
| 224 friend class Target; | 224 friend class Target; |
| 225 friend class TargetScope; | 225 friend class TargetScope; |
| 226 friend class LexicalScope; | 226 friend class LexicalScope; |
| 227 friend class TemporaryScope; | 227 friend class TemporaryScope; |
| 228 }; | 228 }; |
| 229 | 229 |
| 230 | 230 |
| 231 class RegExpParser { | 231 class RegExpParser { |
| 232 public: | 232 public: |
| 233 RegExpParser(unibrow::CharacterStream* in, Handle<String>* error); | 233 RegExpParser(unibrow::CharacterStream* in, |
| 234 Handle<String>* error, |
| 235 bool multiline_mode); |
| 234 RegExpTree* ParsePattern(bool* ok); | 236 RegExpTree* ParsePattern(bool* ok); |
| 235 RegExpTree* ParseDisjunction(bool* ok); | 237 RegExpTree* ParseDisjunction(bool* ok); |
| 236 RegExpTree* ParseAlternative(bool* ok); | 238 RegExpTree* ParseAlternative(bool* ok); |
| 237 RegExpTree* ParseTerm(bool* ok); | 239 RegExpTree* ParseTerm(bool* ok); |
| 238 RegExpTree* ParseAtom(bool* ok); | 240 RegExpTree* ParseAtom(bool* ok); |
| 239 RegExpTree* ParseGroup(bool* ok); | 241 RegExpTree* ParseGroup(bool* ok); |
| 240 RegExpTree* ParseCharacterClass(bool* ok); | 242 RegExpTree* ParseCharacterClass(bool* ok); |
| 241 | 243 |
| 242 // Parses a {...,...} quantifier and stores the range in the given | 244 // Parses a {...,...} quantifier and stores the range in the given |
| 243 // out parameters. | 245 // out parameters. |
| 244 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); | 246 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); |
| 245 | 247 |
| 246 // Parses and returns a single escaped character. The character | 248 // Parses and returns a single escaped character. The character |
| 247 // must not be 'b' or 'B' since they are usually handle specially. | 249 // must not be 'b' or 'B' since they are usually handle specially. |
| 248 uc32 ParseCharacterEscape(bool* ok); | 250 uc32 ParseCharacterEscape(bool* ok); |
| 249 | 251 |
| 250 uc32 ParseHexEscape(int length); | 252 // Checks whether the following is a length-digit hexadecimal number, |
| 253 // and sets the value if it is. |
| 254 bool ParseHexEscape(int length, uc32* value); |
| 251 | 255 |
| 252 uc32 ParseControlEscape(bool* ok); | 256 uc32 ParseControlEscape(bool* ok); |
| 253 uc32 ParseOctalLiteral(bool* ok); | 257 uc32 ParseOctalLiteral(bool* ok); |
| 254 | 258 |
| 255 // Tries to parse the input as a backreference. If successful it | 259 // Tries to parse the input as a backreference. If successful it |
| 256 // stores the result in the output parameter and returns true. If | 260 // stores the result in the output parameter and returns true. If |
| 257 // it fails it will push back the characters read so the same characters | 261 // it fails it will push back the characters read so the same characters |
| 258 // can be reparsed. | 262 // can be reparsed. |
| 259 bool ParseBackreferenceIndex(int* index_out); | 263 bool ParseBackreferenceIndex(int* index_out); |
| 260 | 264 |
| 261 CharacterRange ParseClassAtom(bool* ok); | 265 CharacterRange ParseClassAtom(bool* ok); |
| 262 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 266 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 263 void Advance(); | 267 void Advance(); |
| 264 void Advance(int dist); | 268 void Advance(int dist); |
| 269 // Pushes a read character (or potentially some other character) back |
| 270 // on the input stream. After pushing it back, it becomes the character |
| 271 // returned by current(). There is a limited amount of push-back buffer. |
| 272 // A function using PushBack should check that it doesn't push back more |
| 273 // than kMaxPushback characters, and it should not push back more characters |
| 274 // than it has read, or that it knows had been read prior to calling it. |
| 275 void PushBack(uc32 character); |
| 276 bool CanPushBack(); |
| 265 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; | 277 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; |
| 266 private: | 278 private: |
| 267 uc32 current() { return current_; } | 279 uc32 current() { return current_; } |
| 268 uc32 next() { return next_; } | 280 uc32 next() { return next_; } |
| 269 bool has_more() { return has_more_; } | 281 bool has_more() { return has_more_; } |
| 270 bool has_next() { return has_next_; } | 282 bool has_next() { return has_next_; } |
| 271 unibrow::CharacterStream* in() { return in_; } | 283 unibrow::CharacterStream* in() { return in_; } |
| 272 uc32 current_; | 284 uc32 current_; |
| 273 uc32 next_; | 285 uc32 next_; |
| 274 bool has_more_; | 286 bool has_more_; |
| 275 bool has_next_; | 287 bool has_next_; |
| 288 bool multiline_mode_; |
| 276 int captures_seen_; | 289 int captures_seen_; |
| 277 unibrow::CharacterStream* in_; | 290 unibrow::CharacterStream* in_; |
| 278 Handle<String>* error_; | 291 Handle<String>* error_; |
| 279 static const int kMaxPushback = 5; | 292 static const int kMaxPushback = 5; |
| 280 int pushback_count_; | 293 int pushback_count_; |
| 281 uc32 pushback_buffer_[kMaxPushback]; | 294 uc32 pushback_buffer_[kMaxPushback]; |
| 282 }; | 295 }; |
| 283 | 296 |
| 284 | 297 |
| 285 // A temporary scope stores information during parsing, just like | 298 // A temporary scope stores information during parsing, just like |
| (...skipping 2927 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3213 args->Add(new Literal(array)); | 3226 args->Add(new Literal(array)); |
| 3214 return new Throw(new CallRuntime(constructor, NULL, args), | 3227 return new Throw(new CallRuntime(constructor, NULL, args), |
| 3215 scanner().location().beg_pos); | 3228 scanner().location().beg_pos); |
| 3216 } | 3229 } |
| 3217 | 3230 |
| 3218 | 3231 |
| 3219 // ---------------------------------------------------------------------------- | 3232 // ---------------------------------------------------------------------------- |
| 3220 // Regular expressions | 3233 // Regular expressions |
| 3221 | 3234 |
| 3222 | 3235 |
| 3223 RegExpParser::RegExpParser(unibrow::CharacterStream* in, Handle<String>* error) | 3236 RegExpParser::RegExpParser(unibrow::CharacterStream* in, |
| 3237 Handle<String>* error, |
| 3238 bool multiline_mode) |
| 3224 : current_(kEndMarker), | 3239 : current_(kEndMarker), |
| 3225 next_(kEndMarker), | 3240 next_(kEndMarker), |
| 3226 has_more_(true), | 3241 has_more_(true), |
| 3227 has_next_(true), | 3242 has_next_(true), |
| 3243 multiline_mode_(multiline_mode), |
| 3228 captures_seen_(0), | 3244 captures_seen_(0), |
| 3229 in_(in), | 3245 in_(in), |
| 3230 error_(error), | 3246 error_(error), |
| 3231 pushback_count_(0) { | 3247 pushback_count_(0) { |
| 3232 Advance(2); | 3248 Advance(2); |
| 3233 } | 3249 } |
| 3234 | 3250 |
| 3235 | 3251 |
| 3236 void RegExpParser::Advance() { | 3252 void RegExpParser::Advance() { |
| 3237 current_ = next_; | 3253 current_ = next_; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 3248 } | 3264 } |
| 3249 } | 3265 } |
| 3250 | 3266 |
| 3251 | 3267 |
| 3252 void RegExpParser::Advance(int dist) { | 3268 void RegExpParser::Advance(int dist) { |
| 3253 for (int i = 0; i < dist; i++) | 3269 for (int i = 0; i < dist; i++) |
| 3254 Advance(); | 3270 Advance(); |
| 3255 } | 3271 } |
| 3256 | 3272 |
| 3257 | 3273 |
| 3274 void RegExpParser::PushBack(uc32 character) { |
| 3275 if (has_next_) { |
| 3276 ASSERT(pushback_count_ < kMaxPushback); |
| 3277 pushback_buffer_[pushback_count_] = next_; |
| 3278 pushback_count_++; |
| 3279 } |
| 3280 if (has_more_) { |
| 3281 next_ = current_; |
| 3282 has_next_ = true; |
| 3283 } |
| 3284 current_ = character; |
| 3285 has_more_ = true; |
| 3286 } |
| 3287 |
| 3288 |
| 3289 bool RegExpParser::CanPushBack() { |
| 3290 return (pushback_count_ < kMaxPushback); |
| 3291 } |
| 3292 |
| 3293 |
| 3258 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { | 3294 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { |
| 3259 *ok = false; | 3295 *ok = false; |
| 3260 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); | 3296 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); |
| 3261 return NULL; | 3297 return NULL; |
| 3262 } | 3298 } |
| 3263 | 3299 |
| 3264 | 3300 |
| 3265 // Pattern :: | 3301 // Pattern :: |
| 3266 // Disjunction | 3302 // Disjunction |
| 3267 RegExpTree* RegExpParser::ParsePattern(bool* ok) { | 3303 RegExpTree* RegExpParser::ParsePattern(bool* ok) { |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3351 default: | 3387 default: |
| 3352 return false; | 3388 return false; |
| 3353 } | 3389 } |
| 3354 } | 3390 } |
| 3355 | 3391 |
| 3356 | 3392 |
| 3357 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { | 3393 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { |
| 3358 ASSERT_EQ('\\', current()); | 3394 ASSERT_EQ('\\', current()); |
| 3359 ASSERT('1' <= next() && next() <= '9'); | 3395 ASSERT('1' <= next() && next() <= '9'); |
| 3360 ASSERT_EQ(0, pushback_count_); | 3396 ASSERT_EQ(0, pushback_count_); |
| 3397 // Try to parse a decimal literal that is less than then number |
| 3398 // of previously encountered left capturing parentheses. |
| 3399 // This is a not according the the ECMAScript specification. According to |
| 3400 // that, one must accept values up to the total number of left capturing |
| 3401 // parentheses in the entire input, even if they are meaningless. |
| 3361 if (captures_seen_ == 0) | 3402 if (captures_seen_ == 0) |
| 3362 return false; | 3403 return false; |
| 3363 int value = next() - '0'; | 3404 int value = next() - '0'; |
| 3364 if (value > captures_seen_) | 3405 if (value > captures_seen_) |
| 3365 return false; | 3406 return false; |
| 3366 static const int kMaxChars = kMaxPushback - 2; | 3407 static const int kMaxChars = kMaxPushback - 2; |
| 3367 EmbeddedVector<uc32, kMaxChars> chars_seen; | 3408 EmbeddedVector<uc32, kMaxChars> chars_seen; |
| 3368 chars_seen[0] = next(); | 3409 chars_seen[0] = next(); |
| 3369 int char_count = 1; | 3410 int char_count = 1; |
| 3370 Advance(2); | 3411 Advance(2); |
| 3371 while (true) { | 3412 while (true) { |
| 3372 uc32 c = current(); | 3413 uc32 c = current(); |
| 3373 if (IsDecimalDigit(c)) { | 3414 if (IsDecimalDigit(c)) { |
| 3374 int next_value = 10 * value + (c - '0'); | 3415 int next_value = 10 * value + (c - '0'); |
| 3375 // To avoid reading past the end of the stack-allocated pushback | 3416 // To avoid reading past the end of the stack-allocated pushback |
| 3376 // buffers we only read kMaxChars before giving up. | 3417 // buffers we only read kMaxChars before giving up. |
| 3377 if (next_value > captures_seen_ || char_count > kMaxChars) { | 3418 if (next_value > captures_seen_ || char_count > kMaxChars) { |
| 3378 // If we give up we have to push the characters we read back | 3419 // If we give up we have to push the characters we read back |
| 3379 // onto the pushback buffer in the reverse order. | 3420 // onto the pushback buffer in the reverse order. |
| 3380 pushback_buffer_[0] = current(); | 3421 for (int i = 0; i < char_count; i++) { |
| 3381 for (int i = 0; i < char_count; i++) | 3422 PushBack(chars_seen[char_count - i - 1]); |
| 3382 pushback_buffer_[i + 1] = chars_seen[char_count - i - 1]; | 3423 } |
| 3383 pushback_buffer_[char_count + 1] = '\\'; | 3424 PushBack('\\'); |
| 3384 pushback_count_ = char_count + 2; | |
| 3385 // Then, once we've filled up the buffer, we read the two | |
| 3386 // first characters into the lookahead. This is a roundabout | |
| 3387 // way of doing it but makes the code simpler. | |
| 3388 Advance(2); | |
| 3389 return false; | 3425 return false; |
| 3390 } else { | |
| 3391 value = next_value; | |
| 3392 chars_seen[char_count++] = current(); | |
| 3393 Advance(); | |
| 3394 } | 3426 } |
| 3427 value = next_value; |
| 3428 chars_seen[char_count++] = current(); |
| 3429 Advance(); |
| 3395 } else { | 3430 } else { |
| 3396 *index_out = value; | 3431 *index_out = value; |
| 3397 return true; | 3432 return true; |
| 3398 } | 3433 } |
| 3399 } | 3434 } |
| 3400 } | 3435 } |
| 3401 | 3436 |
| 3402 | 3437 |
| 3403 // Term :: | 3438 // Term :: |
| 3404 // Assertion | 3439 // Assertion |
| 3405 // Atom | 3440 // Atom |
| 3406 // Atom Quantifier | 3441 // Atom Quantifier |
| 3407 RegExpTree* RegExpParser::ParseTerm(bool* ok) { | 3442 RegExpTree* RegExpParser::ParseTerm(bool* ok) { |
| 3408 RegExpTree* atom = NULL; | 3443 RegExpTree* atom = NULL; |
| 3409 switch (current()) { | 3444 switch (current()) { |
| 3410 // Assertion :: | 3445 // Assertion :: |
| 3411 // ^ | 3446 // ^ |
| 3412 // $ | 3447 // $ |
| 3413 // \ b | 3448 // \ b |
| 3414 // \ B | 3449 // \ B |
| 3415 case '^': | 3450 case '^': |
| 3416 Advance(); | 3451 Advance(); |
| 3417 // Make the type of assertion dependent on multi/nonmultiline. | 3452 return new RegExpAssertion( |
| 3418 return new RegExpAssertion(RegExpAssertion::START_OF_INPUT); | 3453 multiline_mode_ ? RegExpAssertion::START_OF_LINE |
| 3454 : RegExpAssertion::START_OF_INPUT); |
| 3419 case '$': | 3455 case '$': |
| 3420 Advance(); | 3456 Advance(); |
| 3421 // Make the type of assertion dependent on multi/nonmultiline. | 3457 return new RegExpAssertion( |
| 3422 return new RegExpAssertion(RegExpAssertion::END_OF_INPUT); | 3458 multiline_mode_ ? RegExpAssertion::END_OF_LINE |
| 3459 : RegExpAssertion::END_OF_INPUT); |
| 3423 case '.': | 3460 case '.': |
| 3424 Advance(); | 3461 Advance(); |
| 3425 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.')); | 3462 atom = new RegExpCharacterClass(CharacterRange::CharacterClass('.')); |
| 3426 break; | 3463 break; |
| 3427 case '(': | 3464 case '(': |
| 3428 atom = ParseGroup(CHECK_OK); | 3465 atom = ParseGroup(CHECK_OK); |
| 3429 break; | 3466 break; |
| 3430 case '[': | 3467 case '[': |
| 3431 atom = ParseCharacterClass(CHECK_OK); | 3468 atom = ParseCharacterClass(CHECK_OK); |
| 3432 break; | 3469 break; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 3453 goto has_read_atom; | 3490 goto has_read_atom; |
| 3454 } | 3491 } |
| 3455 case '1': case '2': case '3': case '4': case '5': case '6': | 3492 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3456 case '7': case '8': case '9': { | 3493 case '7': case '8': case '9': { |
| 3457 int index = 0; | 3494 int index = 0; |
| 3458 if (ParseBackreferenceIndex(&index)) { | 3495 if (ParseBackreferenceIndex(&index)) { |
| 3459 atom = new RegExpBackreference(index); | 3496 atom = new RegExpBackreference(index); |
| 3460 goto has_read_atom; | 3497 goto has_read_atom; |
| 3461 } else { | 3498 } else { |
| 3462 // If this is not a backreference we go to the atom parser | 3499 // If this is not a backreference we go to the atom parser |
| 3463 // which will read it as an octal escape. | 3500 // which will read it as an octal escape or identity escape. |
| 3464 goto parse_atom; | 3501 goto parse_atom; |
| 3465 } | 3502 } |
| 3466 } | 3503 } |
| 3467 default: | 3504 default: |
| 3468 goto parse_atom; | 3505 goto parse_atom; |
| 3469 } | 3506 } |
| 3470 } | 3507 } |
| 3471 // All other escapes fall through to the default case since | 3508 // All other escapes fall through to the default case since |
| 3472 // they correspond to single characters that can be | 3509 // they correspond to single characters that can be |
| 3473 // represented within atoms. | 3510 // represented within atoms. |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3584 uc32 escape = ParseCharacterEscape(CHECK_OK); | 3621 uc32 escape = ParseCharacterEscape(CHECK_OK); |
| 3585 buf->Add(escape); | 3622 buf->Add(escape); |
| 3586 } | 3623 } |
| 3587 } else { | 3624 } else { |
| 3588 break; | 3625 break; |
| 3589 } | 3626 } |
| 3590 } | 3627 } |
| 3591 return new RegExpAtom(buf->ToConstVector()); | 3628 return new RegExpAtom(buf->ToConstVector()); |
| 3592 } | 3629 } |
| 3593 | 3630 |
| 3631 // Upper and lower case letters differ by one bit. |
| 3632 STATIC_CHECK('a'^'A' == 0x20); |
| 3594 | 3633 |
| 3595 uc32 RegExpParser::ParseControlEscape(bool* ok) { | 3634 uc32 RegExpParser::ParseControlEscape(bool* ok) { |
| 3596 ASSERT(current() == 'c'); | 3635 ASSERT(current() == 'c'); |
| 3597 Advance(); | 3636 Advance(); |
| 3598 if (!has_more()) { | 3637 if (!has_more()) { |
| 3599 ReportError(CStrVector("\\c at end of pattern"), ok); | 3638 ReportError(CStrVector("\\c at end of pattern"), ok); |
| 3600 return '\0'; | 3639 return '\0'; |
| 3601 } else { | |
| 3602 uc32 letter = current(); | |
| 3603 if (!('a' <= letter && letter <= 'z') && | |
| 3604 !('A' <= letter && letter <= 'Z')) { | |
| 3605 ReportError(CStrVector("Illegal control letter"), ok); | |
| 3606 return '\0'; | |
| 3607 } | |
| 3608 Advance(); | |
| 3609 return letter & ((1 << 5) - 1); | |
| 3610 } | 3640 } |
| 3641 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. |
| 3642 if (letter < 'A' || 'Z' < letter) { |
| 3643 // Non-spec error-correction: "\c" followed by non-control letter is |
| 3644 // interpreted as an IdentityEscape. |
| 3645 return 'c'; |
| 3646 } |
| 3647 Advance(); |
| 3648 return letter & 0x1f; // Remainder modulo 32, per specification. |
| 3611 } | 3649 } |
| 3612 | 3650 |
| 3613 | 3651 |
| 3614 uc32 RegExpParser::ParseOctalLiteral(bool* ok) { | 3652 uc32 RegExpParser::ParseOctalLiteral(bool* ok) { |
| 3615 ASSERT('0' <= current() && current() <= '7'); | 3653 ASSERT('0' <= current() && current() <= '7'); |
| 3616 // Here we're really supposed to break out after the first digit | 3654 // For compatibility with some other browsers (not all), we parse |
| 3617 // if it is '0' but the other implementations don't do that so | 3655 // up to three octal digits with a value below 256. |
| 3618 // neither do we. Is this deviation from the spec error prone? | 3656 uc32 value = current() - '0'; |
| 3619 // Yes, it's probably as error prone as it's possible to get. Isn't | 3657 Advance(); |
| 3620 // JavaScript wonderful? | 3658 if ('0' <= current() && current() <= '7') { |
| 3621 uc32 value = 0; | 3659 value = value * 8 + current() - '0'; |
| 3622 while ('0' <= current() && current() <= '7') { | 3660 Advance(); |
| 3623 int next = (8 * value) + (current() - '0'); | 3661 if (value < 32 && '0' <= current() && current() <= '7') { |
| 3624 if (next >= 256) { | 3662 value = value * 8 + current() - '0'; |
| 3625 break; | |
| 3626 } else { | |
| 3627 value = next; | |
| 3628 Advance(); | 3663 Advance(); |
| 3629 } | 3664 } |
| 3630 } | 3665 } |
| 3631 return value; | 3666 return value; |
| 3632 } | 3667 } |
| 3633 | 3668 |
| 3634 | 3669 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| 3635 uc32 RegExpParser::ParseHexEscape(int length) { | 3670 static const int kMaxChars = kMaxPushback; |
| 3636 uc32 value = 0; | 3671 EmbeddedVector<uc32, kMaxChars> chars_seen; |
| 3637 for (int i = 0; i < length; i++) { | 3672 ASSERT(length <= kMaxChars); |
| 3638 int d = HexValue(current()); | 3673 uc32 val = 0; |
| 3639 if (d < 0) | 3674 bool done = false; |
| 3640 return value; | 3675 for (int i = 0; !done; i++) { |
| 3641 value = value * 16 + d; | 3676 uc32 c = current(); |
| 3677 int d = HexValue(c); |
| 3678 if (d < 0) { |
| 3679 while (i > 0) { |
| 3680 i--; |
| 3681 PushBack(chars_seen[i]); |
| 3682 } |
| 3683 return false; |
| 3684 } |
| 3685 val = val * 16 + d; |
| 3642 Advance(); | 3686 Advance(); |
| 3687 if (i < length - 1) { |
| 3688 chars_seen[i] = c; |
| 3689 } else { |
| 3690 done = true; |
| 3691 } |
| 3643 } | 3692 } |
| 3644 | 3693 *value = val; |
| 3645 return value; | 3694 return true; |
| 3646 } | 3695 } |
| 3647 | 3696 |
| 3648 | 3697 |
| 3649 uc32 RegExpParser::ParseCharacterEscape(bool* ok) { | 3698 uc32 RegExpParser::ParseCharacterEscape(bool* ok) { |
| 3650 ASSERT(current() == '\\'); | 3699 ASSERT(current() == '\\'); |
| 3651 ASSERT(has_next() && !IsSpecialEscape(next())); | 3700 ASSERT(has_next() && !IsSpecialEscape(next())); |
| 3652 Advance(); | 3701 Advance(); |
| 3653 ASSERT(current() != 'b' && current() != 'B'); | 3702 ASSERT(current() != 'b' && current() != 'B'); |
| 3654 switch (current()) { | 3703 switch (current()) { |
| 3655 // ControlEscape :: one of | 3704 // ControlEscape :: one of |
| 3656 // f n r t v | 3705 // f n r t v |
| 3657 case 'f': | 3706 case 'f': |
| 3658 Advance(); | 3707 Advance(); |
| 3659 return '\f'; | 3708 return '\f'; |
| 3660 case 'n': | 3709 case 'n': |
| 3661 Advance(); | 3710 Advance(); |
| 3662 return '\n'; | 3711 return '\n'; |
| 3663 case 'r': | 3712 case 'r': |
| 3664 Advance(); | 3713 Advance(); |
| 3665 return '\r'; | 3714 return '\r'; |
| 3666 case 't': | 3715 case 't': |
| 3667 Advance(); | 3716 Advance(); |
| 3668 return '\t'; | 3717 return '\t'; |
| 3669 case 'v': | 3718 case 'v': |
| 3670 Advance(); | 3719 Advance(); |
| 3671 return '\v'; | 3720 return '\v'; |
| 3672 case 'c': | 3721 case 'c': |
| 3722 // Spec mandates that next character is ASCII letter. |
| 3723 // If not, we error-correct by interpreting "\c" as "c". |
| 3673 return ParseControlEscape(ok); | 3724 return ParseControlEscape(ok); |
| 3674 case '0': case '1': case '2': case '3': case '4': case '5': | 3725 case '0': case '1': case '2': case '3': case '4': case '5': |
| 3675 case '6': case '7': | 3726 case '6': case '7': |
| 3676 // We're really supposed to read this as a decimal integer | 3727 // For compatibility, we interpret a decimal escape that isn't |
| 3677 // literal which is base 10 but for whatever reason the other | 3728 // a back reference (and therefore either \0 or not valid according |
| 3678 // implementations read base 8. It's hard to believe that the | 3729 // to the specification) as a 1..3 digit octal character code. |
| 3679 // spec was written by some ofthe same people that wrote the | |
| 3680 // other implementations... | |
| 3681 return ParseOctalLiteral(ok); | 3730 return ParseOctalLiteral(ok); |
| 3682 case 'x': | 3731 case 'x': { |
| 3683 Advance(); | 3732 Advance(); |
| 3684 return ParseHexEscape(2); | 3733 uc32 value; |
| 3685 case 'A': case 'Z': { | 3734 if (ParseHexEscape(2, &value)) { |
| 3735 return value; |
| 3736 } |
| 3737 // If \x is not followed by a two-digit hexadecimal, treat it |
| 3738 // as an identity escape. |
| 3739 return 'x'; |
| 3740 } |
| 3741 case 'u': { |
| 3742 Advance(); |
| 3743 uc32 value; |
| 3744 if (ParseHexEscape(4, &value)) { |
| 3745 return value; |
| 3746 } |
| 3747 // If \u is not followed by a four-digit hexadecimal, treat it |
| 3748 // as an identity escape. |
| 3749 return 'u'; |
| 3750 } |
| 3751 default: { |
| 3752 // Extended identity escape. We accept any character that hasn't |
| 3753 // been matched by a more specific case, not just the subset required |
| 3754 // by the ECMAScript specification. |
| 3686 uc32 result = current(); | 3755 uc32 result = current(); |
| 3687 Advance(); | 3756 Advance(); |
| 3688 return result; | 3757 return result; |
| 3689 } | |
| 3690 default: { | |
| 3691 ASSERT(!Scanner::kIsIdentifierPart.get(current())); | |
| 3692 uc32 result = current(); | |
| 3693 Advance(); | |
| 3694 return result; | |
| 3695 } | 3758 } |
| 3696 } | 3759 } |
| 3697 return 0; | 3760 return 0; |
| 3698 } | 3761 } |
| 3699 | 3762 |
| 3700 | 3763 |
| 3701 RegExpTree* RegExpParser::ParseGroup(bool* ok) { | 3764 RegExpTree* RegExpParser::ParseGroup(bool* ok) { |
| 3702 ASSERT_EQ(current(), '('); | 3765 ASSERT_EQ(current(), '('); |
| 3703 char type = '('; | 3766 char type = '('; |
| 3704 Advance(); | 3767 Advance(); |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3840 // That way, the result will be exactly the right size rather than | 3903 // That way, the result will be exactly the right size rather than |
| 3841 // the expected 50% too large. | 3904 // the expected 50% too large. |
| 3842 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 3905 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 3843 return new ScriptDataImpl(store); | 3906 return new ScriptDataImpl(store); |
| 3844 } | 3907 } |
| 3845 | 3908 |
| 3846 | 3909 |
| 3847 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream, | 3910 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream, |
| 3848 Handle<String>* error) { | 3911 Handle<String>* error) { |
| 3849 ASSERT(error->is_null()); | 3912 ASSERT(error->is_null()); |
| 3850 RegExpParser parser(stream, error); | 3913 RegExpParser parser(stream, error, false); // Get multiline flag somehow |
| 3851 bool ok = true; | 3914 bool ok = true; |
| 3852 RegExpTree* result = parser.ParsePattern(&ok); | 3915 RegExpTree* result = parser.ParsePattern(&ok); |
| 3853 if (!ok) { | 3916 if (!ok) { |
| 3854 ASSERT(result == NULL); | 3917 ASSERT(result == NULL); |
| 3855 ASSERT(!error->is_null()); | 3918 ASSERT(!error->is_null()); |
| 3856 } else { | 3919 } else { |
| 3857 ASSERT(result != NULL); | 3920 ASSERT(result != NULL); |
| 3858 ASSERT(error->is_null()); | 3921 ASSERT(error->is_null()); |
| 3859 } | 3922 } |
| 3860 return result; | 3923 return result; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3906 start_position, | 3969 start_position, |
| 3907 is_expression); | 3970 is_expression); |
| 3908 return result; | 3971 return result; |
| 3909 } | 3972 } |
| 3910 | 3973 |
| 3911 | 3974 |
| 3912 #undef NEW | 3975 #undef NEW |
| 3913 | 3976 |
| 3914 | 3977 |
| 3915 } } // namespace v8::internal | 3978 } } // namespace v8::internal |
| OLD | NEW |