| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 221 Handle<String> type, | 221 Handle<String> type, |
| 222 Vector< Handle<Object> > arguments); | 222 Vector< Handle<Object> > arguments); |
| 223 | 223 |
| 224 friend class Target; | 224 friend class Target; |
| 225 friend class TargetScope; | 225 friend class TargetScope; |
| 226 friend class LexicalScope; | 226 friend class LexicalScope; |
| 227 friend class TemporaryScope; | 227 friend class TemporaryScope; |
| 228 }; | 228 }; |
| 229 | 229 |
| 230 | 230 |
| 231 template <typename T, int initial_size> |
| 232 class BufferedZoneList { |
| 233 public: |
| 234 |
| 235 BufferedZoneList() : |
| 236 list_(NULL), last_(NULL) {} |
| 237 |
| 238 // Adds element at end of list. This element is buffered and can |
| 239 // be read using last() or removed using RemoveLast until a new Add or until |
| 240 // RemoveLast or GetList has been called. |
| 241 void Add(T* value) { |
| 242 if (last_ != NULL) { |
| 243 if (list_ == NULL) { |
| 244 list_ = new ZoneList<T*>(initial_size); |
| 245 } |
| 246 list_->Add(last_); |
| 247 } |
| 248 last_ = value; |
| 249 } |
| 250 |
| 251 T* last() { |
| 252 ASSERT(last_ != NULL); |
| 253 return last_; |
| 254 } |
| 255 |
| 256 T* RemoveLast() { |
| 257 ASSERT(last_ != NULL); |
| 258 T* result = last_; |
| 259 last_ = NULL; |
| 260 return result; |
| 261 } |
| 262 |
| 263 void Clear() { |
| 264 list_ = NULL; |
| 265 last_ = NULL; |
| 266 } |
| 267 |
| 268 int length() { |
| 269 int length = (list_ == NULL) ? 0 : list_->length(); |
| 270 return length + ((last_ == NULL) ? 0 : 1); |
| 271 } |
| 272 |
| 273 ZoneList<T*>* GetList() { |
| 274 if (list_ == NULL) { |
| 275 list_ = new ZoneList<T*>(initial_size); |
| 276 } |
| 277 if (last_ != NULL) { |
| 278 list_->Add(last_); |
| 279 last_ = NULL; |
| 280 } |
| 281 return list_; |
| 282 } |
| 283 |
| 284 private: |
| 285 ZoneList<T*>* list_; |
| 286 T* last_; |
| 287 }; |
| 288 |
| 289 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 290 class RegExpBuilder { |
| 291 public: |
| 292 RegExpBuilder(); |
| 293 void AddCharacter(uc16 character); |
| 294 void AddAtom(RegExpTree* tree); |
| 295 void AddAssertion(RegExpTree* tree); |
| 296 void NewAlternative(); // '|' |
| 297 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| 298 RegExpTree* ToRegExp(); |
| 299 private: |
| 300 void FlushCharacters(); |
| 301 bool FlushTerms(); |
| 302 ZoneList<uc16>* characters_; |
| 303 BufferedZoneList<RegExpTree, 2> terms_; |
| 304 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 305 #ifdef DEBUG |
| 306 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 307 #define LAST(x) last_added_ = x; |
| 308 #else |
| 309 #define LAST(x) |
| 310 #endif |
| 311 }; |
| 312 |
| 313 |
| 314 RegExpBuilder::RegExpBuilder() : characters_(NULL), terms_(), alternatives_() |
| 315 #ifdef DEBUG |
| 316 , last_added_(ADD_NONE) |
| 317 #endif |
| 318 {} |
| 319 |
| 320 |
| 321 void RegExpBuilder::FlushCharacters() { |
| 322 if (characters_ != NULL) { |
| 323 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
| 324 characters_ = NULL; |
| 325 terms_.Add(atom); |
| 326 LAST(ADD_ATOM); |
| 327 } |
| 328 } |
| 329 |
| 330 |
| 331 void RegExpBuilder::AddCharacter(uc16 c) { |
| 332 if (characters_ == NULL) { |
| 333 characters_ = new ZoneList<uc16>(4); |
| 334 } |
| 335 characters_->Add(c); |
| 336 LAST(ADD_CHAR); |
| 337 } |
| 338 |
| 339 |
| 340 void RegExpBuilder::AddAtom(RegExpTree* atom) { |
| 341 FlushCharacters(); |
| 342 terms_.Add(atom); |
| 343 LAST(ADD_ATOM); |
| 344 } |
| 345 |
| 346 |
| 347 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| 348 FlushCharacters(); |
| 349 terms_.Add(assert); |
| 350 LAST(ADD_ASSERT); |
| 351 } |
| 352 |
| 353 |
| 354 void RegExpBuilder::NewAlternative() { |
| 355 if (!FlushTerms()) { |
| 356 alternatives_.Add(RegExpEmpty::GetInstance()); |
| 357 } |
| 358 } |
| 359 |
| 360 |
| 361 bool RegExpBuilder::FlushTerms() { |
| 362 FlushCharacters(); |
| 363 int num_terms = terms_.length(); |
| 364 if (num_terms == 0) { |
| 365 return false; |
| 366 } |
| 367 RegExpTree* alternative; |
| 368 if (num_terms == 1) { |
| 369 alternative = terms_.last(); |
| 370 } else { |
| 371 alternative = new RegExpAlternative(terms_.GetList()); |
| 372 } |
| 373 alternatives_.Add(alternative); |
| 374 terms_.Clear(); |
| 375 LAST(ADD_NONE); |
| 376 return true; |
| 377 } |
| 378 |
| 379 |
| 380 RegExpTree* RegExpBuilder::ToRegExp() { |
| 381 FlushTerms(); |
| 382 int num_alternatives = alternatives_.length(); |
| 383 if (num_alternatives == 0) { |
| 384 return RegExpEmpty::GetInstance(); |
| 385 } |
| 386 if (num_alternatives == 1) { |
| 387 return alternatives_.last(); |
| 388 } |
| 389 return new RegExpDisjunction(alternatives_.GetList()); |
| 390 } |
| 391 |
| 392 |
| 393 void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) { |
| 394 RegExpTree* atom; |
| 395 if (characters_ != NULL) { |
| 396 ASSERT(last_added_ == ADD_CHAR); |
| 397 // Last atom was character. |
| 398 Vector<const uc16> char_vector = characters_->ToConstVector(); |
| 399 int num_chars = char_vector.length(); |
| 400 if (num_chars > 1) { |
| 401 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| 402 terms_.Add(new RegExpAtom(prefix)); |
| 403 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| 404 } |
| 405 characters_ = NULL; |
| 406 atom = new RegExpAtom(char_vector); |
| 407 } else if (terms_.length() > 0) { |
| 408 ASSERT(last_added_ == ADD_ATOM); |
| 409 atom = terms_.RemoveLast(); |
| 410 } else { |
| 411 // Only call immediately after adding an atom or character! |
| 412 UNREACHABLE(); |
| 413 return; |
| 414 } |
| 415 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); |
| 416 LAST(ADD_TERM); |
| 417 } |
| 418 |
| 419 |
| 231 class RegExpParser { | 420 class RegExpParser { |
| 232 public: | 421 public: |
| 233 RegExpParser(unibrow::CharacterStream* in, | 422 RegExpParser(unibrow::CharacterStream* in, |
| 234 Handle<String>* error, | 423 Handle<String>* error, |
| 235 bool multiline_mode); | 424 bool multiline_mode); |
| 236 RegExpTree* ParsePattern(bool* ok); | 425 RegExpTree* ParsePattern(bool* ok); |
| 237 RegExpTree* ParseDisjunction(bool* ok); | 426 RegExpTree* ParseDisjunction(bool* ok); |
| 238 RegExpTree* ParseAlternative(bool* ok); | |
| 239 RegExpTree* ParseTerm(bool* ok); | |
| 240 RegExpTree* ParseAtom(bool* ok); | |
| 241 RegExpTree* ParseGroup(bool* ok); | 427 RegExpTree* ParseGroup(bool* ok); |
| 242 RegExpTree* ParseCharacterClass(bool* ok); | 428 RegExpTree* ParseCharacterClass(bool* ok); |
| 243 | 429 |
| 244 // Parses a {...,...} quantifier and stores the range in the given | 430 // Parses a {...,...} quantifier and stores the range in the given |
| 245 // out parameters. | 431 // out parameters. |
| 246 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); | 432 void* ParseIntervalQuantifier(int* min_out, int* max_out, bool* ok); |
| 247 | 433 |
| 248 // Parses and returns a single escaped character. The character | 434 // Parses and returns a single escaped character. The character |
| 249 // must not be 'b' or 'B' since they are usually handle specially. | 435 // must not be 'b' or 'B' since they are usually handle specially. |
| 250 uc32 ParseCharacterEscape(bool* ok); | 436 uc32 ParseClassCharacterEscape(bool* ok); |
| 251 | 437 |
| 252 // Checks whether the following is a length-digit hexadecimal number, | 438 // Checks whether the following is a length-digit hexadecimal number, |
| 253 // and sets the value if it is. | 439 // and sets the value if it is. |
| 254 bool ParseHexEscape(int length, uc32* value); | 440 bool ParseHexEscape(int length, uc32* value); |
| 255 | 441 |
| 256 uc32 ParseControlEscape(bool* ok); | 442 uc32 ParseControlLetterEscape(bool* ok); |
| 257 uc32 ParseOctalLiteral(bool* ok); | 443 uc32 ParseOctalLiteral(); |
| 258 | 444 |
| 259 // Tries to parse the input as a backreference. If successful it | 445 // Tries to parse the input as a backreference. If successful it |
| 260 // stores the result in the output parameter and returns true. If | 446 // stores the result in the output parameter and returns true. If |
| 261 // it fails it will push back the characters read so the same characters | 447 // it fails it will push back the characters read so the same characters |
| 262 // can be reparsed. | 448 // can be reparsed. |
| 263 bool ParseBackreferenceIndex(int* index_out); | 449 bool ParseBackreferenceIndex(int* index_out); |
| 264 | 450 |
| 265 CharacterRange ParseClassAtom(bool* is_char_class, | 451 CharacterRange ParseClassAtom(bool* is_char_class, |
| 266 ZoneList<CharacterRange>* ranges, | 452 ZoneList<CharacterRange>* ranges, |
| 267 bool* ok); | 453 bool* ok); |
| 268 RegExpTree* ReportError(Vector<const char> message, bool* ok); | 454 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 269 void Advance(); | 455 void Advance(); |
| 270 void Advance(int dist); | 456 void Advance(int dist); |
| 271 // Pushes a read character (or potentially some other character) back | 457 // Pushes a read character (or potentially some other character) back |
| 272 // on the input stream. After pushing it back, it becomes the character | 458 // on the input stream. After pushing it back, it becomes the character |
| 273 // returned by current(). There is a limited amount of push-back buffer. | 459 // returned by current(). There is a limited amount of push-back buffer. |
| 274 // A function using PushBack should check that it doesn't push back more | 460 // A function using PushBack should check that it doesn't push back more |
| 275 // than kMaxPushback characters, and it should not push back more characters | 461 // than kMaxPushback characters, and it should not push back more characters |
| 276 // than it has read, or that it knows had been read prior to calling it. | 462 // than it has read. |
| 277 void PushBack(uc32 character); | 463 void PushBack(uc32 character); |
| 278 bool CanPushBack(); | 464 bool CanPushBack(); |
| 465 |
| 466 bool HasCharacterEscapes(); |
| 467 |
| 279 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; | 468 static const uc32 kEndMarker = unibrow::Utf8::kBadChar; |
| 280 private: | 469 private: |
| 281 uc32 current() { return current_; } | 470 uc32 current() { return current_; } |
| 282 uc32 next() { return next_; } | 471 uc32 next() { return next_; } |
| 283 bool has_more() { return has_more_; } | 472 bool has_more() { return has_more_; } |
| 284 bool has_next() { return has_next_; } | 473 bool has_next() { return has_next_; } |
| 285 unibrow::CharacterStream* in() { return in_; } | 474 unibrow::CharacterStream* in() { return in_; } |
| 286 uc32 current_; | 475 uc32 current_; |
| 287 uc32 next_; | 476 uc32 next_; |
| 288 bool has_more_; | 477 bool has_more_; |
| 289 bool has_next_; | 478 bool has_next_; |
| 290 bool multiline_mode_; | 479 bool multiline_mode_; |
| 291 int captures_seen_; | 480 int captures_started_; |
| 292 unibrow::CharacterStream* in_; | 481 unibrow::CharacterStream* in_; |
| 293 Handle<String>* error_; | 482 Handle<String>* error_; |
| 294 static const int kMaxPushback = 5; | 483 static const int kMaxPushback = 5; |
| 295 int pushback_count_; | 484 int pushback_count_; |
| 296 uc32 pushback_buffer_[kMaxPushback]; | 485 uc32 pushback_buffer_[kMaxPushback]; |
| 486 bool has_character_escapes_; |
| 297 }; | 487 }; |
| 298 | 488 |
| 299 | 489 |
| 300 // A temporary scope stores information during parsing, just like | 490 // A temporary scope stores information during parsing, just like |
| 301 // a plain scope. However, temporary scopes are not kept around | 491 // a plain scope. However, temporary scopes are not kept around |
| 302 // after parsing or referenced by syntax trees so they can be stack- | 492 // after parsing or referenced by syntax trees so they can be stack- |
| 303 // allocated and hence used by the pre-parser. | 493 // allocated and hence used by the pre-parser. |
| 304 class TemporaryScope BASE_EMBEDDED { | 494 class TemporaryScope BASE_EMBEDDED { |
| 305 public: | 495 public: |
| 306 explicit TemporaryScope(Parser* parser); | 496 explicit TemporaryScope(Parser* parser); |
| (...skipping 2931 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3238 | 3428 |
| 3239 | 3429 |
| 3240 RegExpParser::RegExpParser(unibrow::CharacterStream* in, | 3430 RegExpParser::RegExpParser(unibrow::CharacterStream* in, |
| 3241 Handle<String>* error, | 3431 Handle<String>* error, |
| 3242 bool multiline_mode) | 3432 bool multiline_mode) |
| 3243 : current_(kEndMarker), | 3433 : current_(kEndMarker), |
| 3244 next_(kEndMarker), | 3434 next_(kEndMarker), |
| 3245 has_more_(true), | 3435 has_more_(true), |
| 3246 has_next_(true), | 3436 has_next_(true), |
| 3247 multiline_mode_(multiline_mode), | 3437 multiline_mode_(multiline_mode), |
| 3248 captures_seen_(0), | 3438 captures_started_(0), |
| 3249 in_(in), | 3439 in_(in), |
| 3250 error_(error), | 3440 error_(error), |
| 3251 pushback_count_(0) { | 3441 pushback_count_(0), |
| 3442 has_character_escapes_(false) { |
| 3252 Advance(2); | 3443 Advance(2); |
| 3253 } | 3444 } |
| 3254 | 3445 |
| 3255 | 3446 |
| 3256 void RegExpParser::Advance() { | 3447 void RegExpParser::Advance() { |
| 3257 current_ = next_; | 3448 current_ = next_; |
| 3258 has_more_ = has_next_; | 3449 has_more_ = has_next_; |
| 3259 if (pushback_count_ > 0) { | 3450 if (pushback_count_ > 0) { |
| 3260 pushback_count_--; | 3451 pushback_count_--; |
| 3261 next_ = pushback_buffer_[pushback_count_]; | 3452 next_ = pushback_buffer_[pushback_count_]; |
| 3262 has_next_ = true; | |
| 3263 } else if (in()->has_more()) { | 3453 } else if (in()->has_more()) { |
| 3264 next_ = in()->GetNext(); | 3454 next_ = in()->GetNext(); |
| 3265 } else { | 3455 } else { |
| 3266 next_ = kEndMarker; | 3456 next_ = kEndMarker; |
| 3267 has_next_ = false; | 3457 has_next_ = false; |
| 3268 } | 3458 } |
| 3269 } | 3459 } |
| 3270 | 3460 |
| 3271 | 3461 |
| 3272 void RegExpParser::Advance(int dist) { | 3462 void RegExpParser::Advance(int dist) { |
| 3273 for (int i = 0; i < dist; i++) | 3463 for (int i = 0; i < dist; i++) |
| 3274 Advance(); | 3464 Advance(); |
| 3275 } | 3465 } |
| 3276 | 3466 |
| 3277 | 3467 |
| 3278 void RegExpParser::PushBack(uc32 character) { | 3468 void RegExpParser::PushBack(uc32 character) { |
| 3279 if (has_next_) { | 3469 if (has_next_) { |
| 3280 ASSERT(pushback_count_ < kMaxPushback); | 3470 ASSERT(pushback_count_ < kMaxPushback); |
| 3281 pushback_buffer_[pushback_count_] = next_; | 3471 pushback_buffer_[pushback_count_] = next_; |
| 3282 pushback_count_++; | 3472 pushback_count_++; |
| 3283 } | 3473 } |
| 3284 if (has_more_) { | 3474 |
| 3285 next_ = current_; | 3475 next_ = current_; |
| 3286 has_next_ = true; | 3476 has_next_ = has_more_; |
| 3287 } | 3477 |
| 3288 current_ = character; | 3478 current_ = character; |
| 3289 has_more_ = true; | 3479 has_more_ = true; |
| 3290 } | 3480 } |
| 3291 | 3481 |
| 3292 | 3482 |
| 3293 bool RegExpParser::CanPushBack() { | 3483 bool RegExpParser::CanPushBack() { |
| 3294 return (pushback_count_ < kMaxPushback); | 3484 return (pushback_count_ < kMaxPushback); |
| 3295 } | 3485 } |
| 3296 | 3486 |
| 3487 // Reports whether the parsed string atoms contain any characters that were |
| 3488 // escaped in the original pattern. If not, all atoms are proper substrings |
| 3489 // of the original pattern. |
| 3490 bool RegExpParser::HasCharacterEscapes() { |
| 3491 return has_character_escapes_; |
| 3492 } |
| 3297 | 3493 |
| 3298 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { | 3494 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { |
| 3299 *ok = false; | 3495 *ok = false; |
| 3300 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); | 3496 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); |
| 3301 return NULL; | 3497 return NULL; |
| 3302 } | 3498 } |
| 3303 | 3499 |
| 3304 | 3500 |
| 3305 // Pattern :: | 3501 // Pattern :: |
| 3306 // Disjunction | 3502 // Disjunction |
| 3307 RegExpTree* RegExpParser::ParsePattern(bool* ok) { | 3503 RegExpTree* RegExpParser::ParsePattern(bool* ok) { |
| 3308 return ParseDisjunction(ok); | 3504 RegExpTree* result = ParseDisjunction(CHECK_OK); |
| 3505 if (has_more()) { |
| 3506 ReportError(CStrVector("Unmatched ')'"), CHECK_OK); |
| 3507 } |
| 3508 return result; |
| 3309 } | 3509 } |
| 3310 | 3510 |
| 3311 | 3511 |
| 3312 // Disjunction :: | 3512 // Disjunction :: |
| 3313 // Alternative | 3513 // Alternative |
| 3314 // Alternative | Disjunction | 3514 // Alternative | Disjunction |
| 3515 // Alternative :: |
| 3516 // [empty] |
| 3517 // Term Alternative |
| 3518 // Term :: |
| 3519 // Assertion |
| 3520 // Atom |
| 3521 // Atom Quantifier |
| 3315 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { | 3522 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { |
| 3316 RegExpTree* first = ParseAlternative(CHECK_OK); | 3523 RegExpBuilder builder; |
| 3317 if (current() == '|') { | 3524 while (true) { |
| 3318 ZoneList<RegExpTree*>* nodes = new ZoneList<RegExpTree*>(2); | 3525 switch (current()) { |
| 3319 nodes->Add(first); | 3526 case kEndMarker: |
| 3320 while (current() == '|') { | 3527 case ')': |
| 3321 Advance(); | 3528 return builder.ToRegExp(); |
| 3322 RegExpTree* next = ParseAlternative(CHECK_OK); | 3529 case '|': |
| 3323 nodes->Add(next); | 3530 Advance(); |
| 3324 } | 3531 builder.NewAlternative(); |
| 3325 return new RegExpDisjunction(nodes); | 3532 continue; |
| 3326 } else { | 3533 case '*': |
| 3327 return first; | 3534 case '+': |
| 3535 case '?': |
| 3536 case '{': |
| 3537 ReportError(CStrVector("Nothing to repeat."), CHECK_OK); |
| 3538 case '^': { |
| 3539 Advance(); |
| 3540 RegExpAssertion::Type type = |
| 3541 multiline_mode_ ? RegExpAssertion::START_OF_LINE : |
| 3542 RegExpAssertion::START_OF_INPUT; |
| 3543 builder.AddAssertion(new RegExpAssertion(type)); |
| 3544 continue; |
| 3545 } |
| 3546 case '$': { |
| 3547 Advance(); |
| 3548 RegExpAssertion::Type type = |
| 3549 multiline_mode_ ? RegExpAssertion::END_OF_LINE : |
| 3550 RegExpAssertion::END_OF_INPUT; |
| 3551 builder.AddAssertion(new RegExpAssertion(type)); |
| 3552 continue; |
| 3553 } |
| 3554 case '.': { |
| 3555 Advance(); |
| 3556 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 3557 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3558 CharacterRange::AddClassEscape('.', ranges); |
| 3559 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3560 builder.AddAtom(atom); |
| 3561 break; |
| 3562 } |
| 3563 case '(': { |
| 3564 RegExpTree* atom = ParseGroup(CHECK_OK); |
| 3565 builder.AddAtom(atom); |
| 3566 break; |
| 3567 } |
| 3568 case '[': { |
| 3569 RegExpTree* atom = ParseCharacterClass(CHECK_OK); |
| 3570 builder.AddAtom(atom); |
| 3571 break; |
| 3572 } |
| 3573 // Atom :: |
| 3574 // \ AtomEscape |
| 3575 case '\\': |
| 3576 switch (next()) { |
| 3577 case kEndMarker: |
| 3578 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); |
| 3579 case 'b': |
| 3580 Advance(2); |
| 3581 builder.AddAssertion( |
| 3582 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
| 3583 continue; |
| 3584 case 'B': |
| 3585 Advance(2); |
| 3586 builder.AddAssertion( |
| 3587 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
| 3588 continue; |
| 3589 // AtomEscape :: |
| 3590 // CharacterClassEscape |
| 3591 // |
| 3592 // CharacterClassEscape :: one of |
| 3593 // d D s S w W |
| 3594 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3595 uc32 c = next(); |
| 3596 Advance(2); |
| 3597 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3598 CharacterRange::AddClassEscape(c, ranges); |
| 3599 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3600 builder.AddAtom(atom); |
| 3601 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3602 } |
| 3603 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3604 case '7': case '8': case '9': { |
| 3605 int index = 0; |
| 3606 if (ParseBackreferenceIndex(&index)) { |
| 3607 RegExpTree* atom = new RegExpBackreference(index); |
| 3608 builder.AddAtom(atom); |
| 3609 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3610 } |
| 3611 uc32 first_digit = next(); |
| 3612 if (first_digit == '8' || first_digit == '9') { |
| 3613 // Treat as identity escape |
| 3614 builder.AddCharacter(first_digit); |
| 3615 Advance(2); |
| 3616 break; |
| 3617 } |
| 3618 } |
| 3619 // FALLTHROUGH |
| 3620 case '0': { |
| 3621 Advance(); |
| 3622 uc32 octal = ParseOctalLiteral(); |
| 3623 builder.AddCharacter(octal); |
| 3624 break; |
| 3625 } |
| 3626 // ControlEscape :: one of |
| 3627 // f n r t v |
| 3628 case 'f': |
| 3629 Advance(2); |
| 3630 builder.AddCharacter('\f'); |
| 3631 break; |
| 3632 case 'n': |
| 3633 Advance(2); |
| 3634 builder.AddCharacter('\n'); |
| 3635 break; |
| 3636 case 'r': |
| 3637 Advance(2); |
| 3638 builder.AddCharacter('\r'); |
| 3639 break; |
| 3640 case 't': |
| 3641 Advance(2); |
| 3642 builder.AddCharacter('\t'); |
| 3643 break; |
| 3644 case 'v': |
| 3645 Advance(2); |
| 3646 builder.AddCharacter('\v'); |
| 3647 break; |
| 3648 case 'c': { |
| 3649 Advance(2); |
| 3650 uc32 control = ParseControlLetterEscape(ok); |
| 3651 builder.AddCharacter(control); |
| 3652 break; |
| 3653 } |
| 3654 case 'x': { |
| 3655 Advance(2); |
| 3656 uc32 value; |
| 3657 if (ParseHexEscape(2, &value)) { |
| 3658 builder.AddCharacter(value); |
| 3659 } else { |
| 3660 builder.AddCharacter('x'); |
| 3661 } |
| 3662 break; |
| 3663 } |
| 3664 case 'u': { |
| 3665 Advance(2); |
| 3666 uc32 value; |
| 3667 if (ParseHexEscape(4, &value)) { |
| 3668 builder.AddCharacter(value); |
| 3669 } else { |
| 3670 builder.AddCharacter('u'); |
| 3671 } |
| 3672 break; |
| 3673 } |
| 3674 default: |
| 3675 // Identity escape. |
| 3676 builder.AddCharacter(next()); |
| 3677 Advance(2); |
| 3678 break; |
| 3679 } |
| 3680 has_character_escapes_ = true; |
| 3681 break; |
| 3682 default: |
| 3683 builder.AddCharacter(current()); |
| 3684 Advance(); |
| 3685 break; |
| 3686 } // end switch(current()) |
| 3687 |
| 3688 has_read_atom: |
| 3689 int min; |
| 3690 int max; |
| 3691 switch (current()) { |
| 3692 // QuantifierPrefix :: |
| 3693 // * |
| 3694 // + |
| 3695 // ? |
| 3696 // { |
| 3697 case '*': |
| 3698 min = 0; |
| 3699 max = RegExpQuantifier::kInfinity; |
| 3700 Advance(); |
| 3701 break; |
| 3702 case '+': |
| 3703 min = 1; |
| 3704 max = RegExpQuantifier::kInfinity; |
| 3705 Advance(); |
| 3706 break; |
| 3707 case '?': |
| 3708 min = 0; |
| 3709 max = 1; |
| 3710 Advance(); |
| 3711 break; |
| 3712 case '{': |
| 3713 ParseIntervalQuantifier(&min, &max, CHECK_OK); |
| 3714 break; |
| 3715 default: |
| 3716 continue; |
| 3717 } |
| 3718 bool is_greedy = true; |
| 3719 if (current() == '?') { |
| 3720 is_greedy = false; |
| 3721 Advance(); |
| 3722 } |
| 3723 builder.AddQuantifierToAtom(min, max, is_greedy); |
| 3328 } | 3724 } |
| 3329 } | 3725 } |
| 3330 | 3726 |
| 3331 | |
| 3332 static bool IsAlternativeTerminator(uc32 c) { | |
| 3333 return c == '|' || c == ')' || c == RegExpParser::kEndMarker; | |
| 3334 } | |
| 3335 | |
| 3336 | |
| 3337 // Alternative :: | |
| 3338 // [empty] | |
| 3339 // Alternative Term | |
| 3340 RegExpTree* RegExpParser::ParseAlternative(bool* ok) { | |
| 3341 if (!IsAlternativeTerminator(current())) { | |
| 3342 RegExpTree* first = ParseTerm(CHECK_OK); | |
| 3343 if (!IsAlternativeTerminator(current())) { | |
| 3344 ZoneList<RegExpTree*>* nodes = new ZoneList<RegExpTree*>(2); | |
| 3345 nodes->Add(first); | |
| 3346 while (!IsAlternativeTerminator(current())) { | |
| 3347 RegExpTree* next = ParseTerm(CHECK_OK); | |
| 3348 nodes->Add(next); | |
| 3349 } | |
| 3350 return new RegExpAlternative(nodes); | |
| 3351 } else { | |
| 3352 return first; | |
| 3353 } | |
| 3354 } else { | |
| 3355 return RegExpEmpty::GetInstance(); | |
| 3356 } | |
| 3357 } | |
| 3358 | |
| 3359 | |
| 3360 class SourceCharacter { | 3727 class SourceCharacter { |
| 3361 public: | 3728 public: |
| 3362 static bool Is(uc32 c) { | 3729 static bool Is(uc32 c) { |
| 3363 switch (c) { | 3730 switch (c) { |
| 3364 // case ']': case '}': | 3731 // case ']': case '}': |
| 3365 // In spidermonkey and jsc these are treated as source characters | 3732 // In spidermonkey and jsc these are treated as source characters |
| 3366 // so we do too. | 3733 // so we do too. |
| 3367 case '^': case '$': case '\\': case '.': case '*': case '+': | 3734 case '^': case '$': case '\\': case '.': case '*': case '+': |
| 3368 case '?': case '(': case ')': case '[': case '{': case '|': | 3735 case '?': case '(': case ')': case '[': case '{': case '|': |
| 3369 case RegExpParser::kEndMarker: | 3736 case RegExpParser::kEndMarker: |
| 3370 return false; | 3737 return false; |
| 3371 default: | 3738 default: |
| 3372 return true; | 3739 return true; |
| 3373 } | 3740 } |
| 3374 } | 3741 } |
| 3375 }; | 3742 }; |
| 3376 | 3743 |
| 3377 | 3744 |
| 3378 static unibrow::Predicate<SourceCharacter> source_character; | 3745 static unibrow::Predicate<SourceCharacter> source_character; |
| 3379 | 3746 |
| 3380 | 3747 |
| 3381 static inline bool IsSourceCharacter(uc32 c) { | 3748 static inline bool IsSourceCharacter(uc32 c) { |
| 3382 return source_character.get(c); | 3749 return source_character.get(c); |
| 3383 } | 3750 } |
| 3384 | 3751 |
| 3385 | 3752 #ifdef DEBUG |
| 3386 static bool IsSpecialEscape(uc32 c) { | 3753 // Currently only used in an ASSERT. |
| 3754 static bool IsSpecialClassEscape(uc32 c) { |
| 3387 switch (c) { | 3755 switch (c) { |
| 3388 case 'b': case 'B': case 'd': case 'D': case 's': case 'S': | 3756 case 'd': case 'D': |
| 3757 case 's': case 'S': |
| 3389 case 'w': case 'W': | 3758 case 'w': case 'W': |
| 3390 return true; | 3759 return true; |
| 3391 default: | 3760 default: |
| 3392 return false; | 3761 return false; |
| 3393 } | 3762 } |
| 3394 } | 3763 } |
| 3764 #endif |
| 3395 | 3765 |
| 3396 | 3766 |
| 3397 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { | 3767 bool RegExpParser::ParseBackreferenceIndex(int* index_out) { |
| 3398 ASSERT_EQ('\\', current()); | 3768 ASSERT_EQ('\\', current()); |
| 3399 ASSERT('1' <= next() && next() <= '9'); | 3769 ASSERT('1' <= next() && next() <= '9'); |
| 3400 ASSERT_EQ(0, pushback_count_); | 3770 ASSERT_EQ(0, pushback_count_); |
| 3401 // Try to parse a decimal literal that is less than then number | 3771 // Try to parse a decimal literal that is no greater than the number |
| 3402 // of previously encountered left capturing parentheses. | 3772 // of previously encountered left capturing parentheses. |
| 3403 // This is a not according the the ECMAScript specification. According to | 3773 // This is a not according the the ECMAScript specification. According to |
| 3404 // that, one must accept values up to the total number of left capturing | 3774 // that, one must accept values up to the total number of left capturing |
| 3405 // parentheses in the entire input, even if they are meaningless. | 3775 // parentheses in the entire input, even if they are meaningless. |
| 3406 if (captures_seen_ == 0) | 3776 if (captures_started_ == 0) |
| 3407 return false; | 3777 return false; |
| 3408 int value = next() - '0'; | 3778 int value = next() - '0'; |
| 3409 if (value > captures_seen_) | 3779 if (value > captures_started_) |
| 3410 return false; | 3780 return false; |
| 3411 static const int kMaxChars = kMaxPushback - 2; | 3781 static const int kMaxChars = kMaxPushback - 2; |
| 3412 EmbeddedVector<uc32, kMaxChars> chars_seen; | 3782 EmbeddedVector<uc32, kMaxChars> chars_seen; |
| 3413 chars_seen[0] = next(); | 3783 chars_seen[0] = next(); |
| 3414 int char_count = 1; | 3784 int char_count = 1; |
| 3415 Advance(2); | 3785 Advance(2); |
| 3416 while (true) { | 3786 while (true) { |
| 3417 uc32 c = current(); | 3787 uc32 c = current(); |
| 3418 if (IsDecimalDigit(c)) { | 3788 if (IsDecimalDigit(c)) { |
| 3419 int next_value = 10 * value + (c - '0'); | 3789 value = 10 * value + (c - '0'); |
| 3420 // To avoid reading past the end of the stack-allocated pushback | 3790 // To avoid reading past the end of the stack-allocated pushback |
| 3421 // buffers we only read kMaxChars before giving up. | 3791 // buffers we only read kMaxChars before giving up. |
| 3422 if (next_value > captures_seen_ || char_count > kMaxChars) { | 3792 if (value > captures_started_ || char_count > kMaxChars) { |
| 3423 // If we give up we have to push the characters we read back | 3793 // If we give up we have to push the characters we read back |
| 3424 // onto the pushback buffer in the reverse order. | 3794 // onto the pushback buffer in the reverse order. |
| 3425 for (int i = 0; i < char_count; i++) { | 3795 for (int i = 0; i < char_count; i++) { |
| 3426 PushBack(chars_seen[char_count - i - 1]); | 3796 PushBack(chars_seen[char_count - i - 1]); |
| 3427 } | 3797 } |
| 3428 PushBack('\\'); | 3798 PushBack('\\'); |
| 3429 return false; | 3799 return false; |
| 3430 } | 3800 } |
| 3431 value = next_value; | |
| 3432 chars_seen[char_count++] = current(); | 3801 chars_seen[char_count++] = current(); |
| 3433 Advance(); | 3802 Advance(); |
| 3434 } else { | 3803 } else { |
| 3435 *index_out = value; | 3804 break; |
| 3436 return true; | |
| 3437 } | 3805 } |
| 3438 } | 3806 } |
| 3807 *index_out = value; |
| 3808 return true; |
| 3439 } | 3809 } |
| 3440 | 3810 |
| 3441 | 3811 |
| 3442 // Term :: | |
| 3443 // Assertion | |
| 3444 // Atom | |
| 3445 // Atom Quantifier | |
| 3446 RegExpTree* RegExpParser::ParseTerm(bool* ok) { | |
| 3447 RegExpTree* atom = NULL; | |
| 3448 switch (current()) { | |
| 3449 // Assertion :: | |
| 3450 // ^ | |
| 3451 // $ | |
| 3452 // \ b | |
| 3453 // \ B | |
| 3454 case '^': | |
| 3455 Advance(); | |
| 3456 return new RegExpAssertion( | |
| 3457 multiline_mode_ ? RegExpAssertion::START_OF_LINE | |
| 3458 : RegExpAssertion::START_OF_INPUT); | |
| 3459 case '$': | |
| 3460 Advance(); | |
| 3461 return new RegExpAssertion( | |
| 3462 multiline_mode_ ? RegExpAssertion::END_OF_LINE | |
| 3463 : RegExpAssertion::END_OF_INPUT); | |
| 3464 case '.': { | |
| 3465 Advance(); | |
| 3466 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 3467 CharacterRange::AddClassEscape('.', ranges); | |
| 3468 atom = new RegExpCharacterClass(ranges, false); | |
| 3469 break; | |
| 3470 } | |
| 3471 case '(': | |
| 3472 atom = ParseGroup(CHECK_OK); | |
| 3473 break; | |
| 3474 case '[': | |
| 3475 atom = ParseCharacterClass(CHECK_OK); | |
| 3476 break; | |
| 3477 // Atom :: | |
| 3478 // \ AtomEscape | |
| 3479 case '\\': | |
| 3480 if (has_next()) { | |
| 3481 switch (next()) { | |
| 3482 case 'b': | |
| 3483 Advance(2); | |
| 3484 return new RegExpAssertion(RegExpAssertion::BOUNDARY); | |
| 3485 case 'B': | |
| 3486 Advance(2); | |
| 3487 return new RegExpAssertion(RegExpAssertion::NON_BOUNDARY); | |
| 3488 // AtomEscape :: | |
| 3489 // CharacterClassEscape | |
| 3490 // | |
| 3491 // CharacterClassEscape :: one of | |
| 3492 // d D s S w W | |
| 3493 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | |
| 3494 uc32 c = next(); | |
| 3495 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 3496 CharacterRange::AddClassEscape(c, ranges); | |
| 3497 Advance(2); | |
| 3498 atom = new RegExpCharacterClass(ranges, false); | |
| 3499 goto has_read_atom; | |
| 3500 } | |
| 3501 case '1': case '2': case '3': case '4': case '5': case '6': | |
| 3502 case '7': case '8': case '9': { | |
| 3503 int index = 0; | |
| 3504 if (ParseBackreferenceIndex(&index)) { | |
| 3505 atom = new RegExpBackreference(index); | |
| 3506 goto has_read_atom; | |
| 3507 } else { | |
| 3508 // If this is not a backreference we go to the atom parser | |
| 3509 // which will read it as an octal escape or identity escape. | |
| 3510 goto parse_atom; | |
| 3511 } | |
| 3512 } | |
| 3513 default: | |
| 3514 goto parse_atom; | |
| 3515 } | |
| 3516 } | |
| 3517 // All other escapes fall through to the default case since | |
| 3518 // they correspond to single characters that can be | |
| 3519 // represented within atoms. | |
| 3520 default: { | |
| 3521 parse_atom: | |
| 3522 atom = ParseAtom(CHECK_OK); | |
| 3523 break; | |
| 3524 } | |
| 3525 } | |
| 3526 has_read_atom: | |
| 3527 int min; | |
| 3528 int max; | |
| 3529 switch (current()) { | |
| 3530 // QuantifierPrefix :: | |
| 3531 // * | |
| 3532 // + | |
| 3533 // ? | |
| 3534 // { | |
| 3535 case '*': | |
| 3536 min = 0; | |
| 3537 max = RegExpQuantifier::kInfinity; | |
| 3538 Advance(); | |
| 3539 break; | |
| 3540 case '+': | |
| 3541 min = 1; | |
| 3542 max = RegExpQuantifier::kInfinity; | |
| 3543 Advance(); | |
| 3544 break; | |
| 3545 case '?': | |
| 3546 min = 0; | |
| 3547 max = 1; | |
| 3548 Advance(); | |
| 3549 break; | |
| 3550 case '{': | |
| 3551 ParseIntervalQuantifier(&min, &max, CHECK_OK); | |
| 3552 break; | |
| 3553 default: | |
| 3554 return atom; | |
| 3555 } | |
| 3556 bool is_greedy = true; | |
| 3557 if (current() == '?') { | |
| 3558 is_greedy = false; | |
| 3559 Advance(); | |
| 3560 } | |
| 3561 return new RegExpQuantifier(min, max, is_greedy, atom); | |
| 3562 } | |
| 3563 | |
| 3564 | |
| 3565 // QuantifierPrefix :: | 3812 // QuantifierPrefix :: |
| 3566 // { DecimalDigits } | 3813 // { DecimalDigits } |
| 3567 // { DecimalDigits , } | 3814 // { DecimalDigits , } |
| 3568 // { DecimalDigits , DecimalDigits } | 3815 // { DecimalDigits , DecimalDigits } |
| 3569 void* RegExpParser::ParseIntervalQuantifier(int* min_out, | 3816 void* RegExpParser::ParseIntervalQuantifier(int* min_out, |
| 3570 int* max_out, | 3817 int* max_out, |
| 3571 bool* ok) { | 3818 bool* ok) { |
| 3572 ASSERT_EQ(current(), '{'); | 3819 ASSERT_EQ(current(), '{'); |
| 3573 static const char* kInvalidQuantifier = "Invalid quantifier"; | 3820 static const char* kInvalidQuantifier = "Invalid quantifier"; |
| 3574 Advance(); | 3821 Advance(); |
| (...skipping 30 matching lines...) Expand all Loading... |
| 3605 } | 3852 } |
| 3606 } else { | 3853 } else { |
| 3607 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); | 3854 ReportError(CStrVector(kInvalidQuantifier), CHECK_OK); |
| 3608 } | 3855 } |
| 3609 *min_out = min; | 3856 *min_out = min; |
| 3610 *max_out = max; | 3857 *max_out = max; |
| 3611 return NULL; | 3858 return NULL; |
| 3612 } | 3859 } |
| 3613 | 3860 |
| 3614 | 3861 |
| 3615 RegExpTree* RegExpParser::ParseAtom(bool* ok) { | |
| 3616 ASSERT(current() == '\\' || IsSourceCharacter(current())); | |
| 3617 ZoneList<uc16>* buf = new ZoneList<uc16>(4); | |
| 3618 while (true) { | |
| 3619 if (IsSourceCharacter(current())) { | |
| 3620 buf->Add(current()); | |
| 3621 Advance(); | |
| 3622 } else if (current() == '\\') { | |
| 3623 if (!has_next()) { | |
| 3624 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); | |
| 3625 } else if (IsSpecialEscape(next())) { | |
| 3626 // If the next thing we see is a special escape we stop | |
| 3627 // reading this atom. | |
| 3628 break; | |
| 3629 } else { | |
| 3630 uc32 escape = ParseCharacterEscape(CHECK_OK); | |
| 3631 buf->Add(escape); | |
| 3632 } | |
| 3633 } else { | |
| 3634 break; | |
| 3635 } | |
| 3636 } | |
| 3637 return new RegExpAtom(buf->ToConstVector()); | |
| 3638 } | |
| 3639 | |
| 3640 // Upper and lower case letters differ by one bit. | 3862 // Upper and lower case letters differ by one bit. |
| 3641 STATIC_CHECK('a'^'A' == 0x20); | 3863 STATIC_CHECK('a'^'A' == 0x20); |
| 3642 | 3864 |
| 3643 uc32 RegExpParser::ParseControlEscape(bool* ok) { | 3865 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { |
| 3644 ASSERT(current() == 'c'); | |
| 3645 Advance(); | |
| 3646 if (!has_more()) { | 3866 if (!has_more()) { |
| 3647 ReportError(CStrVector("\\c at end of pattern"), ok); | 3867 ReportError(CStrVector("\\c at end of pattern"), ok); |
| 3648 return '\0'; | 3868 return '\0'; |
| 3649 } | 3869 } |
| 3650 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. | 3870 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. |
| 3651 if (letter < 'A' || 'Z' < letter) { | 3871 if (letter < 'A' || 'Z' < letter) { |
| 3652 // Non-spec error-correction: "\c" followed by non-control letter is | 3872 // Non-spec error-correction: "\c" followed by non-control letter is |
| 3653 // interpreted as an IdentityEscape. | 3873 // interpreted as an IdentityEscape of 'c'. |
| 3654 return 'c'; | 3874 return 'c'; |
| 3655 } | 3875 } |
| 3656 Advance(); | 3876 Advance(); |
| 3657 return letter & 0x1f; // Remainder modulo 32, per specification. | 3877 return letter & 0x1f; // Remainder modulo 32, per specification. |
| 3658 } | 3878 } |
| 3659 | 3879 |
| 3660 | 3880 |
| 3661 uc32 RegExpParser::ParseOctalLiteral(bool* ok) { | 3881 uc32 RegExpParser::ParseOctalLiteral() { |
| 3662 ASSERT('0' <= current() && current() <= '7'); | 3882 ASSERT('0' <= current() && current() <= '7'); |
| 3663 // For compatibility with some other browsers (not all), we parse | 3883 // For compatibility with some other browsers (not all), we parse |
| 3664 // up to three octal digits with a value below 256. | 3884 // up to three octal digits with a value below 256. |
| 3665 uc32 value = current() - '0'; | 3885 uc32 value = current() - '0'; |
| 3666 Advance(); | 3886 Advance(); |
| 3667 if ('0' <= current() && current() <= '7') { | 3887 if ('0' <= current() && current() <= '7') { |
| 3668 value = value * 8 + current() - '0'; | 3888 value = value * 8 + current() - '0'; |
| 3669 Advance(); | 3889 Advance(); |
| 3670 if (value < 32 && '0' <= current() && current() <= '7') { | 3890 if (value < 32 && '0' <= current() && current() <= '7') { |
| 3671 value = value * 8 + current() - '0'; | 3891 value = value * 8 + current() - '0'; |
| 3672 Advance(); | 3892 Advance(); |
| 3673 } | 3893 } |
| 3674 } | 3894 } |
| 3675 return value; | 3895 return value; |
| 3676 } | 3896 } |
| 3677 | 3897 |
| 3898 |
| 3678 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { | 3899 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| 3679 static const int kMaxChars = kMaxPushback; | 3900 static const int kMaxChars = kMaxPushback; |
| 3680 EmbeddedVector<uc32, kMaxChars> chars_seen; | 3901 EmbeddedVector<uc32, kMaxChars> chars_seen; |
| 3681 ASSERT(length <= kMaxChars); | 3902 ASSERT(length <= kMaxChars); |
| 3682 uc32 val = 0; | 3903 uc32 val = 0; |
| 3683 bool done = false; | 3904 bool done = false; |
| 3684 for (int i = 0; !done; i++) { | 3905 for (int i = 0; !done; i++) { |
| 3685 uc32 c = current(); | 3906 uc32 c = current(); |
| 3686 int d = HexValue(c); | 3907 int d = HexValue(c); |
| 3687 if (d < 0) { | 3908 if (d < 0) { |
| 3688 while (i > 0) { | 3909 while (i > 0) { |
| 3689 i--; | 3910 i--; |
| 3690 PushBack(chars_seen[i]); | 3911 PushBack(chars_seen[i]); |
| 3691 } | 3912 } |
| 3692 return false; | 3913 return false; |
| 3693 } | 3914 } |
| 3694 val = val * 16 + d; | 3915 val = val * 16 + d; |
| 3695 Advance(); | 3916 Advance(); |
| 3696 if (i < length - 1) { | 3917 if (i < length - 1) { |
| 3697 chars_seen[i] = c; | 3918 chars_seen[i] = c; |
| 3698 } else { | 3919 } else { |
| 3699 done = true; | 3920 done = true; |
| 3700 } | 3921 } |
| 3701 } | 3922 } |
| 3702 *value = val; | 3923 *value = val; |
| 3703 return true; | 3924 return true; |
| 3704 } | 3925 } |
| 3705 | 3926 |
| 3706 | 3927 |
| 3707 uc32 RegExpParser::ParseCharacterEscape(bool* ok) { | 3928 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { |
| 3708 ASSERT(current() == '\\'); | 3929 ASSERT(current() == '\\'); |
| 3709 ASSERT(has_next() && !IsSpecialEscape(next())); | 3930 ASSERT(has_next() && !IsSpecialClassEscape(next())); |
| 3710 Advance(); | 3931 Advance(); |
| 3711 ASSERT(current() != 'b' && current() != 'B'); | |
| 3712 switch (current()) { | 3932 switch (current()) { |
| 3713 // ControlEscape :: one of | 3933 // ControlEscape :: one of |
| 3714 // f n r t v | 3934 // f n r t v |
| 3715 case 'f': | 3935 case 'f': |
| 3716 Advance(); | 3936 Advance(); |
| 3717 return '\f'; | 3937 return '\f'; |
| 3718 case 'n': | 3938 case 'n': |
| 3719 Advance(); | 3939 Advance(); |
| 3720 return '\n'; | 3940 return '\n'; |
| 3721 case 'r': | 3941 case 'r': |
| 3722 Advance(); | 3942 Advance(); |
| 3723 return '\r'; | 3943 return '\r'; |
| 3724 case 't': | 3944 case 't': |
| 3725 Advance(); | 3945 Advance(); |
| 3726 return '\t'; | 3946 return '\t'; |
| 3727 case 'v': | 3947 case 'v': |
| 3728 Advance(); | 3948 Advance(); |
| 3729 return '\v'; | 3949 return '\v'; |
| 3730 case 'c': | 3950 case 'c': |
| 3731 // Spec mandates that next character is ASCII letter. | 3951 return ParseControlLetterEscape(ok); |
| 3732 // If not, we error-correct by interpreting "\c" as "c". | |
| 3733 return ParseControlEscape(ok); | |
| 3734 case '0': case '1': case '2': case '3': case '4': case '5': | 3952 case '0': case '1': case '2': case '3': case '4': case '5': |
| 3735 case '6': case '7': | 3953 case '6': case '7': |
| 3736 // For compatibility, we interpret a decimal escape that isn't | 3954 // For compatibility, we interpret a decimal escape that isn't |
| 3737 // a back reference (and therefore either \0 or not valid according | 3955 // a back reference (and therefore either \0 or not valid according |
| 3738 // to the specification) as a 1..3 digit octal character code. | 3956 // to the specification) as a 1..3 digit octal character code. |
| 3739 return ParseOctalLiteral(ok); | 3957 return ParseOctalLiteral(); |
| 3740 case 'x': { | 3958 case 'x': { |
| 3741 Advance(); | 3959 Advance(); |
| 3742 uc32 value; | 3960 uc32 value; |
| 3743 if (ParseHexEscape(2, &value)) { | 3961 if (ParseHexEscape(2, &value)) { |
| 3744 return value; | 3962 return value; |
| 3745 } | 3963 } |
| 3746 // If \x is not followed by a two-digit hexadecimal, treat it | 3964 // If \x is not followed by a two-digit hexadecimal, treat it |
| 3747 // as an identity escape. | 3965 // as an identity escape. |
| 3748 return 'x'; | 3966 return 'x'; |
| 3749 } | 3967 } |
| (...skipping 27 matching lines...) Expand all Loading... |
| 3777 if (current() == '?') { | 3995 if (current() == '?') { |
| 3778 switch (next()) { | 3996 switch (next()) { |
| 3779 case ':': case '=': case '!': | 3997 case ':': case '=': case '!': |
| 3780 type = next(); | 3998 type = next(); |
| 3781 Advance(2); | 3999 Advance(2); |
| 3782 break; | 4000 break; |
| 3783 default: | 4001 default: |
| 3784 ReportError(CStrVector("Invalid group"), CHECK_OK); | 4002 ReportError(CStrVector("Invalid group"), CHECK_OK); |
| 3785 break; | 4003 break; |
| 3786 } | 4004 } |
| 4005 } else { |
| 4006 captures_started_++; |
| 3787 } | 4007 } |
| 4008 int capture_index = captures_started_; |
| 3788 RegExpTree* body = ParseDisjunction(CHECK_OK); | 4009 RegExpTree* body = ParseDisjunction(CHECK_OK); |
| 3789 if (current() != ')') { | 4010 if (current() != ')') { |
| 3790 ReportError(CStrVector("Unterminated group"), CHECK_OK); | 4011 ReportError(CStrVector("Unterminated group"), CHECK_OK); |
| 3791 } | 4012 } |
| 3792 Advance(); | 4013 Advance(); |
| 3793 if (type == '(') { | 4014 if (type == '(') { |
| 3794 captures_seen_++; | 4015 return new RegExpCapture(body, capture_index); |
| 3795 return new RegExpCapture(body); | |
| 3796 } else if (type == ':') { | 4016 } else if (type == ':') { |
| 3797 return body; | 4017 return body; |
| 3798 } else { | 4018 } else { |
| 3799 ASSERT(type == '=' || type == '!'); | 4019 ASSERT(type == '=' || type == '!'); |
| 3800 bool is_positive = (type == '='); | 4020 bool is_positive = (type == '='); |
| 3801 return new RegExpLookahead(body, is_positive); | 4021 return new RegExpLookahead(body, is_positive); |
| 3802 } | 4022 } |
| 3803 } | 4023 } |
| 3804 | 4024 |
| 3805 | 4025 |
| 3806 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, | 4026 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, |
| 3807 ZoneList<CharacterRange>* ranges, | 4027 ZoneList<CharacterRange>* ranges, |
| 3808 bool* ok) { | 4028 bool* ok) { |
| 3809 ASSERT_EQ(false, *is_char_class); | 4029 ASSERT_EQ(false, *is_char_class); |
| 3810 uc32 first = current(); | 4030 uc32 first = current(); |
| 3811 if (first == '\\') { | 4031 if (first == '\\') { |
| 3812 switch (next()) { | 4032 switch (next()) { |
| 3813 case 'b': | |
| 3814 Advance(2); | |
| 3815 return CharacterRange::Singleton('\b'); | |
| 3816 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | 4033 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
| 3817 *is_char_class = true; | 4034 *is_char_class = true; |
| 3818 uc32 c = next(); | 4035 uc32 c = next(); |
| 3819 CharacterRange::AddClassEscape(c, ranges); | 4036 CharacterRange::AddClassEscape(c, ranges); |
| 3820 Advance(2); | 4037 Advance(2); |
| 3821 return NULL; | 4038 return NULL; |
| 3822 } | 4039 } |
| 3823 default: | 4040 default: |
| 3824 uc32 c = ParseCharacterEscape(CHECK_OK); | 4041 uc32 c = ParseClassCharacterEscape(CHECK_OK); |
| 3825 return CharacterRange::Singleton(c); | 4042 return CharacterRange::Singleton(c); |
| 3826 } | 4043 } |
| 3827 } else { | 4044 } else { |
| 3828 Advance(); | 4045 Advance(); |
| 3829 return CharacterRange::Singleton(first); | 4046 return CharacterRange::Singleton(first); |
| 3830 } | 4047 } |
| 3831 } | 4048 } |
| 3832 | 4049 |
| 3833 | 4050 |
| 3834 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { | 4051 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { |
| (...skipping 12 matching lines...) Expand all Loading... |
| 3847 while (has_more() && current() != ']') { | 4064 while (has_more() && current() != ']') { |
| 3848 if (current() == '-') { | 4065 if (current() == '-') { |
| 3849 Advance(); | 4066 Advance(); |
| 3850 ranges->Add(CharacterRange::Singleton('-')); | 4067 ranges->Add(CharacterRange::Singleton('-')); |
| 3851 } else { | 4068 } else { |
| 3852 bool is_char_class = false; | 4069 bool is_char_class = false; |
| 3853 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); | 4070 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 3854 if (!is_char_class) { | 4071 if (!is_char_class) { |
| 3855 if (current() == '-') { | 4072 if (current() == '-') { |
| 3856 Advance(); | 4073 Advance(); |
| 3857 CharacterRange next = ParseClassAtom(&is_char_class, ranges, CHECK_OK)
; | 4074 CharacterRange next = |
| 4075 ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 3858 if (is_char_class) { | 4076 if (is_char_class) { |
| 3859 return ReportError(CStrVector(kIllegal), CHECK_OK); | 4077 return ReportError(CStrVector(kIllegal), CHECK_OK); |
| 3860 } | 4078 } |
| 3861 if (first.from() > next.to()) { | 4079 if (first.from() > next.to()) { |
| 3862 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); | 4080 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); |
| 3863 } | 4081 } |
| 3864 ranges->Add(CharacterRange::Range(first.from(), next.to())); | 4082 ranges->Add(CharacterRange::Range(first.from(), next.to())); |
| 3865 } else { | 4083 } else { |
| 3866 ranges->Add(first); | 4084 ranges->Add(first); |
| 3867 } | 4085 } |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3922 if (!parser.PreParseProgram(stream)) return NULL; | 4140 if (!parser.PreParseProgram(stream)) return NULL; |
| 3923 // The list owns the backing store so we need to clone the vector. | 4141 // The list owns the backing store so we need to clone the vector. |
| 3924 // That way, the result will be exactly the right size rather than | 4142 // That way, the result will be exactly the right size rather than |
| 3925 // the expected 50% too large. | 4143 // the expected 50% too large. |
| 3926 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 4144 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 3927 return new ScriptDataImpl(store); | 4145 return new ScriptDataImpl(store); |
| 3928 } | 4146 } |
| 3929 | 4147 |
| 3930 | 4148 |
| 3931 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream, | 4149 RegExpTree* ParseRegExp(unibrow::CharacterStream* stream, |
| 3932 Handle<String>* error) { | 4150 Handle<String>* error, |
| 4151 bool* has_character_escapes) { |
| 3933 ASSERT(error->is_null()); | 4152 ASSERT(error->is_null()); |
| 3934 RegExpParser parser(stream, error, false); // Get multiline flag somehow | 4153 RegExpParser parser(stream, error, false); // Get multiline flag somehow |
| 3935 bool ok = true; | 4154 bool ok = true; |
| 3936 RegExpTree* result = parser.ParsePattern(&ok); | 4155 RegExpTree* result = parser.ParsePattern(&ok); |
| 3937 if (!ok) { | 4156 if (!ok) { |
| 3938 ASSERT(result == NULL); | 4157 ASSERT(result == NULL); |
| 3939 ASSERT(!error->is_null()); | 4158 ASSERT(!error->is_null()); |
| 3940 } else { | 4159 } else { |
| 3941 ASSERT(result != NULL); | 4160 ASSERT(result != NULL); |
| 3942 ASSERT(error->is_null()); | 4161 ASSERT(error->is_null()); |
| 3943 } | 4162 } |
| 4163 if (ok && has_character_escapes != NULL) { |
| 4164 *has_character_escapes = parser.HasCharacterEscapes(); |
| 4165 } |
| 3944 return result; | 4166 return result; |
| 3945 } | 4167 } |
| 3946 | 4168 |
| 3947 | 4169 |
| 3948 FunctionLiteral* MakeAST(bool compile_in_global_context, | 4170 FunctionLiteral* MakeAST(bool compile_in_global_context, |
| 3949 Handle<Script> script, | 4171 Handle<Script> script, |
| 3950 v8::Extension* extension, | 4172 v8::Extension* extension, |
| 3951 ScriptDataImpl* pre_data) { | 4173 ScriptDataImpl* pre_data) { |
| 3952 bool allow_natives_syntax = | 4174 bool allow_natives_syntax = |
| 3953 always_allow_natives_syntax || | 4175 always_allow_natives_syntax || |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3990 start_position, | 4212 start_position, |
| 3991 is_expression); | 4213 is_expression); |
| 3992 return result; | 4214 return result; |
| 3993 } | 4215 } |
| 3994 | 4216 |
| 3995 | 4217 |
| 3996 #undef NEW | 4218 #undef NEW |
| 3997 | 4219 |
| 3998 | 4220 |
| 3999 } } // namespace v8::internal | 4221 } } // namespace v8::internal |
| OLD | NEW |