Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 27 | 27 |
| 28 #include "v8.h" | 28 #include "v8.h" |
| 29 | 29 |
| 30 #include "api.h" | 30 #include "api.h" |
| 31 #include "ast.h" | 31 #include "ast.h" |
| 32 #include "bootstrapper.h" | 32 #include "bootstrapper.h" |
| 33 #include "platform.h" | 33 #include "platform.h" |
| 34 #include "runtime.h" | 34 #include "runtime.h" |
| 35 #include "parser.h" | 35 #include "parser.h" |
| 36 #include "scopes.h" | 36 #include "scopes.h" |
| 37 #include "string-stream.h" | |
| 37 | 38 |
| 38 namespace v8 { namespace internal { | 39 namespace v8 { namespace internal { |
| 39 | 40 |
| 40 class ParserFactory; | 41 class ParserFactory; |
| 41 class ParserLog; | 42 class ParserLog; |
| 42 class TemporaryScope; | 43 class TemporaryScope; |
| 43 template <typename T> class ZoneListWrapper; | 44 template <typename T> class ZoneListWrapper; |
| 44 | 45 |
| 45 | 46 |
| 46 class Parser { | 47 class Parser { |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 220 Handle<String> type, | 221 Handle<String> type, |
| 221 Vector< Handle<Object> > arguments); | 222 Vector< Handle<Object> > arguments); |
| 222 | 223 |
| 223 friend class Target; | 224 friend class Target; |
| 224 friend class TargetScope; | 225 friend class TargetScope; |
| 225 friend class LexicalScope; | 226 friend class LexicalScope; |
| 226 friend class TemporaryScope; | 227 friend class TemporaryScope; |
| 227 }; | 228 }; |
| 228 | 229 |
| 229 | 230 |
| 231 template <typename T, int initial_size> | |
| 232 class BufferedZoneList { | |
| 233 public: | |
| 234 | |
| 235 BufferedZoneList() : | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
All on one line.
| |
| 236 list_(NULL), last_(NULL) {} | |
| 237 | |
| 238 // Adds element at end of list. This element is buffered and can | |
| 239 // be read using last() or removed using RemoveLast until a new Add or until | |
| 240 // RemoveLast or GetList has been called. | |
| 241 void Add(T* value) { | |
| 242 if (last_ != NULL) { | |
| 243 if (list_ == NULL) { | |
| 244 list_ = new ZoneList<T*>(initial_size); | |
| 245 } | |
| 246 list_->Add(last_); | |
| 247 } | |
| 248 last_ = value; | |
| 249 } | |
| 250 | |
| 251 T* last() { | |
| 252 ASSERT(last_ != NULL); | |
| 253 return last_; | |
| 254 } | |
| 255 | |
| 256 T* RemoveLast() { | |
| 257 ASSERT(last_ != NULL); | |
| 258 T* result = last_; | |
| 259 if (list_ != NULL && list_->length() > 0) | |
| 260 last_ = list_->RemoveLast(); | |
| 261 else | |
| 262 last_ = NULL; | |
| 263 return result; | |
| 264 } | |
| 265 | |
| 266 T* Get(int i) { | |
| 267 ASSERT(0 <= i && i < length()); | |
| 268 if (list_ == NULL) { | |
| 269 ASSERT_EQ(0, i); | |
| 270 return last_; | |
| 271 } else { | |
| 272 if (i == list_->length()) { | |
| 273 ASSERT(last_ != NULL); | |
| 274 return last_; | |
| 275 } else { | |
| 276 return list_->at(i); | |
| 277 } | |
| 278 } | |
| 279 } | |
| 280 | |
| 281 void Clear() { | |
| 282 list_ = NULL; | |
| 283 last_ = NULL; | |
| 284 } | |
| 285 | |
| 286 int length() { | |
| 287 int length = (list_ == NULL) ? 0 : list_->length(); | |
| 288 return length + ((last_ == NULL) ? 0 : 1); | |
| 289 } | |
| 290 | |
| 291 ZoneList<T*>* GetList() { | |
| 292 if (list_ == NULL) { | |
| 293 list_ = new ZoneList<T*>(initial_size); | |
| 294 } | |
| 295 if (last_ != NULL) { | |
| 296 list_->Add(last_); | |
| 297 last_ = NULL; | |
| 298 } | |
| 299 return list_; | |
| 300 } | |
| 301 | |
| 302 private: | |
| 303 ZoneList<T*>* list_; | |
| 304 T* last_; | |
| 305 }; | |
| 306 | |
| 307 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. | |
| 308 class RegExpBuilder { | |
| 309 public: | |
| 310 RegExpBuilder(); | |
| 311 void AddCharacter(uc16 character); | |
| 312 // "Adds" an empty expression. Does nothing except consume a | |
| 313 // following quantifier | |
| 314 void AddEmpty(); | |
| 315 void AddAtom(RegExpTree* tree); | |
| 316 void AddAssertion(RegExpTree* tree); | |
| 317 void NewAlternative(); // '|' | |
| 318 void AddQuantifierToAtom(int min, int max, bool is_greedy); | |
| 319 RegExpTree* ToRegExp(); | |
| 320 private: | |
| 321 void FlushCharacters(); | |
| 322 void FlushText(); | |
| 323 void FlushTerms(); | |
| 324 bool pending_empty_; | |
| 325 ZoneList<uc16>* characters_; | |
| 326 BufferedZoneList<RegExpTree, 2> terms_; | |
| 327 BufferedZoneList<RegExpTree, 2> text_; | |
| 328 BufferedZoneList<RegExpTree, 2> alternatives_; | |
| 329 #ifdef DEBUG | |
| 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; | |
| 331 #define LAST(x) last_added_ = x; | |
| 332 #else | |
| 333 #define LAST(x) | |
| 334 #endif | |
| 335 }; | |
| 336 | |
| 337 | |
| 338 RegExpBuilder::RegExpBuilder() | |
| 339 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Four space indent. More occurrences below.
| |
| 340 #ifdef DEBUG | |
| 341 , last_added_(ADD_NONE) | |
| 342 #endif | |
| 343 {} | |
| 344 | |
| 345 | |
| 346 void RegExpBuilder::FlushCharacters() { | |
| 347 pending_empty_ = false; | |
| 348 if (characters_ != NULL) { | |
| 349 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); | |
| 350 characters_ = NULL; | |
| 351 text_.Add(atom); | |
| 352 LAST(ADD_ATOM); | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 | |
| 357 void RegExpBuilder::FlushText() { | |
| 358 FlushCharacters(); | |
| 359 int num_text = text_.length(); | |
| 360 if (num_text == 0) { | |
| 361 return; | |
| 362 } else if (num_text == 1) { | |
| 363 terms_.Add(text_.last()); | |
| 364 } else { | |
| 365 RegExpText* text = new RegExpText(); | |
| 366 for (int i = 0; i < num_text; i++) | |
| 367 text_.Get(i)->AppendToText(text); | |
| 368 terms_.Add(text); | |
| 369 } | |
| 370 text_.Clear(); | |
| 371 } | |
| 372 | |
| 373 | |
| 374 void RegExpBuilder::AddCharacter(uc16 c) { | |
| 375 pending_empty_ = false; | |
| 376 if (characters_ == NULL) { | |
| 377 characters_ = new ZoneList<uc16>(4); | |
| 378 } | |
| 379 characters_->Add(c); | |
| 380 LAST(ADD_CHAR); | |
| 381 } | |
| 382 | |
| 383 | |
| 384 void RegExpBuilder::AddEmpty() { | |
| 385 pending_empty_ = true; | |
| 386 } | |
| 387 | |
| 388 | |
| 389 void RegExpBuilder::AddAtom(RegExpTree* term) { | |
| 390 if (term->IsEmpty()) { | |
| 391 AddEmpty(); | |
| 392 return; | |
| 393 } | |
| 394 if (term->IsTextElement()) { | |
| 395 FlushCharacters(); | |
| 396 text_.Add(term); | |
| 397 } else { | |
| 398 FlushText(); | |
| 399 terms_.Add(term); | |
| 400 } | |
| 401 LAST(ADD_ATOM); | |
| 402 } | |
| 403 | |
| 404 | |
| 405 void RegExpBuilder::AddAssertion(RegExpTree* assert) { | |
| 406 FlushText(); | |
| 407 terms_.Add(assert); | |
| 408 LAST(ADD_ASSERT); | |
| 409 } | |
| 410 | |
| 411 | |
| 412 void RegExpBuilder::NewAlternative() { | |
| 413 FlushTerms(); | |
| 414 } | |
| 415 | |
| 416 | |
| 417 void RegExpBuilder::FlushTerms() { | |
| 418 FlushText(); | |
| 419 int num_terms = terms_.length(); | |
| 420 RegExpTree* alternative; | |
| 421 if (num_terms == 0) { | |
| 422 alternative = RegExpEmpty::GetInstance(); | |
| 423 } else if (num_terms == 1) { | |
| 424 alternative = terms_.last(); | |
| 425 } else { | |
| 426 alternative = new RegExpAlternative(terms_.GetList()); | |
| 427 } | |
| 428 alternatives_.Add(alternative); | |
| 429 terms_.Clear(); | |
| 430 LAST(ADD_NONE); | |
| 431 } | |
| 432 | |
| 433 | |
| 434 RegExpTree* RegExpBuilder::ToRegExp() { | |
| 435 FlushTerms(); | |
| 436 int num_alternatives = alternatives_.length(); | |
| 437 if (num_alternatives == 0) { | |
| 438 return RegExpEmpty::GetInstance(); | |
| 439 } | |
| 440 if (num_alternatives == 1) { | |
| 441 return alternatives_.last(); | |
| 442 } | |
| 443 return new RegExpDisjunction(alternatives_.GetList()); | |
| 444 } | |
| 445 | |
| 446 | |
| 447 void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) { | |
| 448 if (pending_empty_) { | |
| 449 pending_empty_ = false; | |
| 450 return; | |
| 451 } | |
| 452 RegExpTree* atom; | |
| 453 if (characters_ != NULL) { | |
| 454 ASSERT(last_added_ == ADD_CHAR); | |
| 455 // Last atom was character. | |
| 456 Vector<const uc16> char_vector = characters_->ToConstVector(); | |
| 457 int num_chars = char_vector.length(); | |
| 458 if (num_chars > 1) { | |
| 459 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | |
| 460 text_.Add(new RegExpAtom(prefix)); | |
| 461 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | |
| 462 } | |
| 463 characters_ = NULL; | |
| 464 atom = new RegExpAtom(char_vector); | |
| 465 FlushText(); | |
| 466 } else if (text_.length() > 0) { | |
| 467 ASSERT(last_added_ == ADD_ATOM); | |
| 468 atom = text_.RemoveLast(); | |
| 469 FlushText(); | |
| 470 } else if (terms_.length() > 0) { | |
| 471 ASSERT(last_added_ == ADD_ATOM); | |
| 472 atom = terms_.RemoveLast(); | |
| 473 if (atom->IsLookahead() || atom->IsAssertion()) { | |
| 474 // Guaranteed not to match a non-empty string. | |
| 475 // Assertion as an atom can happen as, e.g., (?:\b) | |
| 476 LAST(ADD_TERM); | |
| 477 if (min == 0) { | |
| 478 return; | |
| 479 } | |
| 480 terms_.Add(atom); | |
| 481 return; | |
| 482 } | |
| 483 } else { | |
| 484 // Only call immediately after adding an atom or character! | |
| 485 UNREACHABLE(); | |
| 486 return; | |
| 487 } | |
| 488 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); | |
| 489 LAST(ADD_TERM); | |
| 490 } | |
| 491 | |
| 492 | |
| 493 class RegExpParser { | |
| 494 public: | |
| 495 RegExpParser(FlatStringReader* in, | |
| 496 Handle<String>* error, | |
| 497 bool multiline_mode); | |
| 498 RegExpTree* ParsePattern(bool* ok); | |
| 499 RegExpTree* ParseDisjunction(bool* ok); | |
| 500 RegExpTree* ParseGroup(bool* ok); | |
| 501 RegExpTree* ParseCharacterClass(bool* ok); | |
| 502 | |
| 503 // Parses a {...,...} quantifier and stores the range in the given | |
| 504 // out parameters. | |
| 505 bool ParseIntervalQuantifier(int* min_out, int* max_out); | |
| 506 | |
| 507 // Parses and returns a single escaped character. The character | |
| 508 // must not be 'b' or 'B' since they are usually handle specially. | |
| 509 uc32 ParseClassCharacterEscape(bool* ok); | |
| 510 | |
| 511 // Checks whether the following is a length-digit hexadecimal number, | |
| 512 // and sets the value if it is. | |
| 513 bool ParseHexEscape(int length, uc32* value); | |
| 514 | |
| 515 uc32 ParseControlLetterEscape(bool* ok); | |
| 516 uc32 ParseOctalLiteral(); | |
| 517 | |
| 518 // Tries to parse the input as a back reference. If successful it | |
| 519 // stores the result in the output parameter and returns true. If | |
| 520 // it fails it will push back the characters read so the same characters | |
| 521 // can be reparsed. | |
| 522 bool ParseBackReferenceIndex(int* index_out); | |
| 523 | |
| 524 CharacterRange ParseClassAtom(bool* is_char_class, | |
| 525 ZoneList<CharacterRange>* ranges, | |
| 526 bool* ok); | |
| 527 RegExpTree* ReportError(Vector<const char> message, bool* ok); | |
| 528 void Advance(); | |
| 529 void Advance(int dist); | |
| 530 void Reset(int pos); | |
| 531 | |
| 532 bool HasCharacterEscapes(); | |
| 533 | |
| 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } | |
| 535 int position() { return next_pos_ - 1; } | |
| 536 | |
| 537 static const uc32 kEndMarker = (1 << 21); | |
| 538 private: | |
| 539 uc32 current() { return current_; } | |
| 540 bool has_more() { return has_more_; } | |
| 541 bool has_next() { return next_pos_ < in()->length(); } | |
| 542 uc32 Next(); | |
| 543 FlatStringReader* in() { return in_; } | |
| 544 void ScanForCaptures(); | |
| 545 bool CaptureAvailable(int index); | |
| 546 uc32 current_; | |
| 547 bool has_more_; | |
| 548 bool multiline_mode_; | |
| 549 int next_pos_; | |
| 550 FlatStringReader* in_; | |
| 551 Handle<String>* error_; | |
| 552 bool has_character_escapes_; | |
| 553 bool is_scanned_for_captures_; | |
| 554 ZoneList<RegExpCapture*>* captures_; | |
| 555 int capture_count_; | |
| 556 }; | |
| 557 | |
| 558 | |
| 230 // A temporary scope stores information during parsing, just like | 559 // A temporary scope stores information during parsing, just like |
| 231 // a plain scope. However, temporary scopes are not kept around | 560 // a plain scope. However, temporary scopes are not kept around |
| 232 // after parsing or referenced by syntax trees so they can be stack- | 561 // after parsing or referenced by syntax trees so they can be stack- |
| 233 // allocated and hence used by the pre-parser. | 562 // allocated and hence used by the pre-parser. |
| 234 class TemporaryScope BASE_EMBEDDED { | 563 class TemporaryScope BASE_EMBEDDED { |
| 235 public: | 564 public: |
| 236 explicit TemporaryScope(Parser* parser); | 565 explicit TemporaryScope(Parser* parser); |
| 237 ~TemporaryScope(); | 566 ~TemporaryScope(); |
| 238 | 567 |
| 239 int NextMaterializedLiteralIndex() { | 568 int NextMaterializedLiteralIndex() { |
| (...skipping 2917 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3157 } | 3486 } |
| 3158 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); | 3487 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); |
| 3159 args->Add(new Literal(type)); | 3488 args->Add(new Literal(type)); |
| 3160 args->Add(new Literal(array)); | 3489 args->Add(new Literal(array)); |
| 3161 return new Throw(new CallRuntime(constructor, NULL, args), | 3490 return new Throw(new CallRuntime(constructor, NULL, args), |
| 3162 scanner().location().beg_pos); | 3491 scanner().location().beg_pos); |
| 3163 } | 3492 } |
| 3164 | 3493 |
| 3165 | 3494 |
| 3166 // ---------------------------------------------------------------------------- | 3495 // ---------------------------------------------------------------------------- |
| 3496 // Regular expressions | |
| 3497 | |
| 3498 | |
| 3499 RegExpParser::RegExpParser(FlatStringReader* in, | |
| 3500 Handle<String>* error, | |
| 3501 bool multiline_mode) | |
| 3502 : current_(kEndMarker), | |
| 3503 has_more_(true), | |
| 3504 multiline_mode_(multiline_mode), | |
| 3505 next_pos_(0), | |
| 3506 in_(in), | |
| 3507 error_(error), | |
| 3508 has_character_escapes_(false), | |
| 3509 is_scanned_for_captures_(false), | |
| 3510 captures_(NULL), | |
| 3511 capture_count_(0) { | |
| 3512 Advance(1); | |
| 3513 } | |
| 3514 | |
| 3515 | |
| 3516 uc32 RegExpParser::Next() { | |
| 3517 if (has_next()) { | |
| 3518 return in()->Get(next_pos_); | |
| 3519 } else { | |
| 3520 return kEndMarker; | |
| 3521 } | |
| 3522 } | |
| 3523 | |
| 3524 | |
| 3525 void RegExpParser::Advance() { | |
| 3526 if (next_pos_ < in()->length()) { | |
| 3527 current_ = in()->Get(next_pos_); | |
| 3528 next_pos_++; | |
| 3529 } else { | |
| 3530 current_ = kEndMarker; | |
| 3531 has_more_ = false; | |
| 3532 } | |
| 3533 } | |
| 3534 | |
| 3535 | |
| 3536 void RegExpParser::Reset(int pos) { | |
| 3537 next_pos_ = pos; | |
| 3538 Advance(); | |
| 3539 } | |
| 3540 | |
| 3541 | |
| 3542 void RegExpParser::Advance(int dist) { | |
| 3543 for (int i = 0; i < dist; i++) | |
| 3544 Advance(); | |
| 3545 } | |
| 3546 | |
| 3547 | |
| 3548 // Reports whether the parsed string atoms contain any characters that were | |
| 3549 // escaped in the original pattern. If not, all atoms are proper substrings | |
| 3550 // of the original pattern. | |
| 3551 bool RegExpParser::HasCharacterEscapes() { | |
| 3552 return has_character_escapes_; | |
| 3553 } | |
| 3554 | |
| 3555 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { | |
| 3556 *ok = false; | |
| 3557 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); | |
| 3558 return NULL; | |
| 3559 } | |
| 3560 | |
| 3561 | |
| 3562 // Pattern :: | |
| 3563 // Disjunction | |
| 3564 RegExpTree* RegExpParser::ParsePattern(bool* ok) { | |
| 3565 RegExpTree* result = ParseDisjunction(CHECK_OK); | |
| 3566 if (has_more()) { | |
| 3567 ReportError(CStrVector("Unmatched ')'"), CHECK_OK); | |
| 3568 } | |
| 3569 return result; | |
| 3570 } | |
| 3571 | |
| 3572 | |
| 3573 bool RegExpParser::CaptureAvailable(int index) { | |
| 3574 if (captures_ == NULL) return false; | |
| 3575 if (index >= captures_->length()) return false; | |
| 3576 RegExpCapture* capture = captures_->at(index); | |
| 3577 return capture != NULL && capture->available() == CAPTURE_AVAILABLE; | |
| 3578 } | |
| 3579 | |
| 3580 | |
| 3581 // Disjunction :: | |
| 3582 // Alternative | |
| 3583 // Alternative | Disjunction | |
| 3584 // Alternative :: | |
| 3585 // [empty] | |
| 3586 // Term Alternative | |
| 3587 // Term :: | |
| 3588 // Assertion | |
| 3589 // Atom | |
| 3590 // Atom Quantifier | |
| 3591 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { | |
| 3592 RegExpBuilder builder; | |
| 3593 int capture_start_index = captures_started(); | |
| 3594 while (true) { | |
| 3595 switch (current()) { | |
| 3596 case kEndMarker: | |
| 3597 case ')': | |
| 3598 return builder.ToRegExp(); | |
| 3599 case '|': { | |
| 3600 Advance(); | |
| 3601 builder.NewAlternative(); | |
| 3602 int capture_new_alt_start_index = captures_started(); | |
| 3603 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) { | |
| 3604 RegExpCapture* capture = captures_->at(i); | |
| 3605 if (capture->available() == CAPTURE_AVAILABLE) { | |
| 3606 capture->set_available(CAPTURE_UNREACHABLE); | |
| 3607 } | |
| 3608 } | |
| 3609 capture_start_index = capture_new_alt_start_index; | |
| 3610 continue; | |
| 3611 } | |
| 3612 case '*': | |
| 3613 case '+': | |
| 3614 case '?': | |
| 3615 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); | |
| 3616 case '^': { | |
| 3617 Advance(); | |
| 3618 RegExpAssertion::Type type = | |
| 3619 multiline_mode_ ? RegExpAssertion::START_OF_LINE : | |
| 3620 RegExpAssertion::START_OF_INPUT; | |
| 3621 builder.AddAssertion(new RegExpAssertion(type)); | |
| 3622 continue; | |
| 3623 } | |
| 3624 case '$': { | |
| 3625 Advance(); | |
| 3626 RegExpAssertion::Type type = | |
| 3627 multiline_mode_ ? RegExpAssertion::END_OF_LINE : | |
| 3628 RegExpAssertion::END_OF_INPUT; | |
| 3629 builder.AddAssertion(new RegExpAssertion(type)); | |
| 3630 continue; | |
| 3631 } | |
| 3632 case '.': { | |
| 3633 Advance(); | |
| 3634 // everything except \x0a, \x0d, \u2028 and \u2029 | |
| 3635 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 3636 CharacterRange::AddClassEscape('.', ranges); | |
| 3637 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | |
| 3638 builder.AddAtom(atom); | |
| 3639 break; | |
| 3640 } | |
| 3641 case '(': { | |
| 3642 RegExpTree* atom = ParseGroup(CHECK_OK); | |
| 3643 builder.AddAtom(atom); | |
| 3644 break; | |
| 3645 } | |
| 3646 case '[': { | |
| 3647 RegExpTree* atom = ParseCharacterClass(CHECK_OK); | |
| 3648 builder.AddAtom(atom); | |
| 3649 break; | |
| 3650 } | |
| 3651 // Atom :: | |
| 3652 // \ AtomEscape | |
| 3653 case '\\': | |
| 3654 switch (Next()) { | |
| 3655 case kEndMarker: | |
| 3656 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); | |
| 3657 case 'b': | |
| 3658 Advance(2); | |
| 3659 builder.AddAssertion( | |
| 3660 new RegExpAssertion(RegExpAssertion::BOUNDARY)); | |
| 3661 continue; | |
| 3662 case 'B': | |
| 3663 Advance(2); | |
| 3664 builder.AddAssertion( | |
| 3665 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); | |
| 3666 continue; | |
| 3667 // AtomEscape :: | |
| 3668 // CharacterClassEscape | |
| 3669 // | |
| 3670 // CharacterClassEscape :: one of | |
| 3671 // d D s S w W | |
| 3672 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { | |
| 3673 uc32 c = Next(); | |
| 3674 Advance(2); | |
| 3675 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 3676 CharacterRange::AddClassEscape(c, ranges); | |
| 3677 RegExpTree* atom = new RegExpCharacterClass(ranges, false); | |
| 3678 builder.AddAtom(atom); | |
| 3679 goto has_read_atom; // Avoid setting has_character_escapes_. | |
| 3680 } | |
| 3681 case '1': case '2': case '3': case '4': case '5': case '6': | |
| 3682 case '7': case '8': case '9': { | |
| 3683 int index = 0; | |
| 3684 if (ParseBackReferenceIndex(&index)) { | |
| 3685 if (!CaptureAvailable(index - 1)) { | |
| 3686 // Prepare to ignore a following quantifier | |
| 3687 builder.AddEmpty(); | |
| 3688 goto has_read_atom; | |
| 3689 } | |
| 3690 RegExpCapture* capture = captures_->at(index - 1); | |
| 3691 RegExpTree* atom = new RegExpBackReference(capture); | |
| 3692 builder.AddAtom(atom); | |
| 3693 goto has_read_atom; // Avoid setting has_character_escapes_. | |
| 3694 } | |
| 3695 uc32 first_digit = Next(); | |
| 3696 if (first_digit == '8' || first_digit == '9') { | |
| 3697 // Treat as identity escape | |
| 3698 builder.AddCharacter(first_digit); | |
| 3699 Advance(2); | |
| 3700 break; | |
| 3701 } | |
| 3702 } | |
| 3703 // FALLTHROUGH | |
| 3704 case '0': { | |
| 3705 Advance(); | |
| 3706 uc32 octal = ParseOctalLiteral(); | |
| 3707 builder.AddCharacter(octal); | |
| 3708 break; | |
| 3709 } | |
| 3710 // ControlEscape :: one of | |
| 3711 // f n r t v | |
| 3712 case 'f': | |
| 3713 Advance(2); | |
| 3714 builder.AddCharacter('\f'); | |
| 3715 break; | |
| 3716 case 'n': | |
| 3717 Advance(2); | |
| 3718 builder.AddCharacter('\n'); | |
| 3719 break; | |
| 3720 case 'r': | |
| 3721 Advance(2); | |
| 3722 builder.AddCharacter('\r'); | |
| 3723 break; | |
| 3724 case 't': | |
| 3725 Advance(2); | |
| 3726 builder.AddCharacter('\t'); | |
| 3727 break; | |
| 3728 case 'v': | |
| 3729 Advance(2); | |
| 3730 builder.AddCharacter('\v'); | |
| 3731 break; | |
| 3732 case 'c': { | |
| 3733 Advance(2); | |
| 3734 uc32 control = ParseControlLetterEscape(ok); | |
| 3735 builder.AddCharacter(control); | |
| 3736 break; | |
| 3737 } | |
| 3738 case 'x': { | |
| 3739 Advance(2); | |
| 3740 uc32 value; | |
| 3741 if (ParseHexEscape(2, &value)) { | |
| 3742 builder.AddCharacter(value); | |
| 3743 } else { | |
| 3744 builder.AddCharacter('x'); | |
| 3745 } | |
| 3746 break; | |
| 3747 } | |
| 3748 case 'u': { | |
| 3749 Advance(2); | |
| 3750 uc32 value; | |
| 3751 if (ParseHexEscape(4, &value)) { | |
| 3752 builder.AddCharacter(value); | |
| 3753 } else { | |
| 3754 builder.AddCharacter('u'); | |
| 3755 } | |
| 3756 break; | |
| 3757 } | |
| 3758 default: | |
| 3759 // Identity escape. | |
| 3760 builder.AddCharacter(Next()); | |
| 3761 Advance(2); | |
| 3762 break; | |
| 3763 } | |
| 3764 has_character_escapes_ = true; | |
| 3765 break; | |
| 3766 case '{': { | |
| 3767 int dummy; | |
| 3768 if (ParseIntervalQuantifier(&dummy, &dummy)) { | |
| 3769 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); | |
| 3770 } | |
| 3771 // fallthrough | |
| 3772 } | |
| 3773 default: | |
| 3774 builder.AddCharacter(current()); | |
| 3775 Advance(); | |
| 3776 break; | |
| 3777 } // end switch(current()) | |
| 3778 | |
| 3779 has_read_atom: | |
| 3780 int min; | |
| 3781 int max; | |
| 3782 switch (current()) { | |
| 3783 // QuantifierPrefix :: | |
| 3784 // * | |
| 3785 // + | |
| 3786 // ? | |
| 3787 // { | |
| 3788 case '*': | |
| 3789 min = 0; | |
| 3790 max = RegExpQuantifier::kInfinity; | |
| 3791 Advance(); | |
| 3792 break; | |
| 3793 case '+': | |
| 3794 min = 1; | |
| 3795 max = RegExpQuantifier::kInfinity; | |
| 3796 Advance(); | |
| 3797 break; | |
| 3798 case '?': | |
| 3799 min = 0; | |
| 3800 max = 1; | |
| 3801 Advance(); | |
| 3802 break; | |
| 3803 case '{': | |
| 3804 if (ParseIntervalQuantifier(&min, &max)) { | |
| 3805 break; | |
| 3806 } else { | |
| 3807 continue; | |
| 3808 } | |
| 3809 default: | |
| 3810 continue; | |
| 3811 } | |
| 3812 bool is_greedy = true; | |
| 3813 if (current() == '?') { | |
| 3814 is_greedy = false; | |
| 3815 Advance(); | |
| 3816 } | |
| 3817 builder.AddQuantifierToAtom(min, max, is_greedy); | |
| 3818 } | |
| 3819 } | |
| 3820 | |
| 3821 class SourceCharacter { | |
| 3822 public: | |
| 3823 static bool Is(uc32 c) { | |
| 3824 switch (c) { | |
| 3825 // case ']': case '}': | |
| 3826 // In spidermonkey and jsc these are treated as source characters | |
| 3827 // so we do too. | |
| 3828 case '^': case '$': case '\\': case '.': case '*': case '+': | |
| 3829 case '?': case '(': case ')': case '[': case '{': case '|': | |
| 3830 case RegExpParser::kEndMarker: | |
| 3831 return false; | |
| 3832 default: | |
| 3833 return true; | |
| 3834 } | |
| 3835 } | |
| 3836 }; | |
| 3837 | |
| 3838 | |
| 3839 static unibrow::Predicate<SourceCharacter> source_character; | |
| 3840 | |
| 3841 | |
| 3842 static inline bool IsSourceCharacter(uc32 c) { | |
| 3843 return source_character.get(c); | |
| 3844 } | |
| 3845 | |
| 3846 #ifdef DEBUG | |
| 3847 // Currently only used in an ASSERT. | |
| 3848 static bool IsSpecialClassEscape(uc32 c) { | |
| 3849 switch (c) { | |
| 3850 case 'd': case 'D': | |
| 3851 case 's': case 'S': | |
| 3852 case 'w': case 'W': | |
| 3853 return true; | |
| 3854 default: | |
| 3855 return false; | |
| 3856 } | |
| 3857 } | |
| 3858 #endif | |
| 3859 | |
| 3860 | |
| 3861 // In order to know whether an escape is a backreference or not we have to scan | |
| 3862 // the entire regexp and find the number of capturing parentheses. However we | |
| 3863 // don't want to scan the regexp twice unless it is necessary. This mini-parser | |
| 3864 // is called when needed. It can see the difference between capturing and | |
| 3865 // noncapturing parentheses and can skip character classes and backslash-escaped | |
| 3866 // characters. | |
| 3867 void RegExpParser::ScanForCaptures() { | |
| 3868 int n; | |
| 3869 while ((n = current()) != kEndMarker) { | |
| 3870 Advance(); | |
| 3871 switch (n) { | |
| 3872 case '\\': | |
| 3873 Advance(); | |
| 3874 break; | |
| 3875 case '[': { | |
| 3876 int c; | |
| 3877 while ((c = current()) != kEndMarker) { | |
| 3878 Advance(); | |
| 3879 if (c == '\\') { | |
| 3880 Advance(); | |
| 3881 } else { | |
| 3882 if (c == ']') break; | |
| 3883 } | |
| 3884 } | |
| 3885 break; | |
| 3886 } | |
| 3887 case '(': | |
| 3888 if (current() != '?') capture_count_++; | |
| 3889 break; | |
| 3890 } | |
| 3891 } | |
| 3892 is_scanned_for_captures_ = true; | |
| 3893 } | |
| 3894 | |
| 3895 | |
| 3896 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { | |
| 3897 ASSERT_EQ('\\', current()); | |
| 3898 ASSERT('1' <= Next() && Next() <= '9'); | |
| 3899 // Try to parse a decimal literal that is no greater than the number | |
| 3900 // of previously encountered left capturing parentheses. | |
| 3901 // This is a not according the the ECMAScript specification. According to | |
| 3902 // that, one must accept values up to the total number of left capturing | |
| 3903 // parentheses in the entire input, even if they are meaningless. | |
| 3904 if (!is_scanned_for_captures_) { | |
| 3905 int saved_position = position(); | |
| 3906 ScanForCaptures(); | |
| 3907 Reset(saved_position); | |
| 3908 } | |
| 3909 if (capture_count_ == 0) return false; | |
| 3910 int start = position(); | |
| 3911 int value = Next() - '0'; | |
| 3912 if (value > capture_count_) return false; | |
| 3913 Advance(2); | |
| 3914 while (true) { | |
| 3915 uc32 c = current(); | |
| 3916 if (IsDecimalDigit(c)) { | |
| 3917 value = 10 * value + (c - '0'); | |
| 3918 if (value > capture_count_) { | |
| 3919 Reset(start); | |
| 3920 return false; | |
| 3921 } | |
| 3922 Advance(); | |
| 3923 } else { | |
| 3924 break; | |
| 3925 } | |
| 3926 } | |
| 3927 *index_out = value; | |
| 3928 return true; | |
| 3929 } | |
| 3930 | |
| 3931 | |
| 3932 // QuantifierPrefix :: | |
| 3933 // { DecimalDigits } | |
| 3934 // { DecimalDigits , } | |
| 3935 // { DecimalDigits , DecimalDigits } | |
| 3936 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { | |
| 3937 ASSERT_EQ(current(), '{'); | |
| 3938 int start = position(); | |
| 3939 Advance(); | |
| 3940 int min = 0; | |
| 3941 if (!IsDecimalDigit(current())) { | |
| 3942 Reset(start); | |
| 3943 return false; | |
| 3944 } | |
| 3945 while (IsDecimalDigit(current())) { | |
| 3946 min = 10 * min + (current() - '0'); | |
| 3947 Advance(); | |
| 3948 } | |
| 3949 int max = 0; | |
| 3950 if (current() == '}') { | |
| 3951 max = min; | |
| 3952 Advance(); | |
| 3953 } else if (current() == ',') { | |
| 3954 Advance(); | |
| 3955 if (current() == '}') { | |
| 3956 max = RegExpQuantifier::kInfinity; | |
| 3957 Advance(); | |
| 3958 } else { | |
| 3959 while (IsDecimalDigit(current())) { | |
| 3960 max = 10 * max + (current() - '0'); | |
| 3961 Advance(); | |
| 3962 } | |
| 3963 if (current() != '}') { | |
| 3964 Reset(start); | |
| 3965 return false; | |
| 3966 } | |
| 3967 Advance(); | |
| 3968 } | |
| 3969 } else { | |
| 3970 Reset(start); | |
| 3971 return false; | |
| 3972 } | |
| 3973 *min_out = min; | |
| 3974 *max_out = max; | |
| 3975 return true; | |
| 3976 } | |
| 3977 | |
| 3978 | |
| 3979 // Upper and lower case letters differ by one bit. | |
| 3980 STATIC_CHECK(('a' ^ 'A') == 0x20); | |
| 3981 | |
| 3982 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { | |
| 3983 if (!has_more()) { | |
| 3984 ReportError(CStrVector("\\c at end of pattern"), ok); | |
| 3985 return '\0'; | |
| 3986 } | |
| 3987 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. | |
| 3988 if (letter < 'A' || 'Z' < letter) { | |
| 3989 // Non-spec error-correction: "\c" followed by non-control letter is | |
| 3990 // interpreted as an IdentityEscape of 'c'. | |
| 3991 return 'c'; | |
| 3992 } | |
| 3993 Advance(); | |
| 3994 return letter & 0x1f; // Remainder modulo 32, per specification. | |
| 3995 } | |
| 3996 | |
| 3997 | |
| 3998 uc32 RegExpParser::ParseOctalLiteral() { | |
| 3999 ASSERT('0' <= current() && current() <= '7'); | |
| 4000 // For compatibility with some other browsers (not all), we parse | |
| 4001 // up to three octal digits with a value below 256. | |
| 4002 uc32 value = current() - '0'; | |
| 4003 Advance(); | |
| 4004 if ('0' <= current() && current() <= '7') { | |
| 4005 value = value * 8 + current() - '0'; | |
| 4006 Advance(); | |
| 4007 if (value < 32 && '0' <= current() && current() <= '7') { | |
| 4008 value = value * 8 + current() - '0'; | |
| 4009 Advance(); | |
| 4010 } | |
| 4011 } | |
| 4012 return value; | |
| 4013 } | |
| 4014 | |
| 4015 | |
| 4016 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { | |
| 4017 int start = position(); | |
| 4018 uc32 val = 0; | |
| 4019 bool done = false; | |
| 4020 for (int i = 0; !done; i++) { | |
| 4021 uc32 c = current(); | |
| 4022 int d = HexValue(c); | |
| 4023 if (d < 0) { | |
| 4024 Reset(start); | |
| 4025 return false; | |
| 4026 } | |
| 4027 val = val * 16 + d; | |
| 4028 Advance(); | |
| 4029 if (i == length - 1) { | |
| 4030 done = true; | |
| 4031 } | |
| 4032 } | |
| 4033 *value = val; | |
| 4034 return true; | |
| 4035 } | |
| 4036 | |
| 4037 | |
| 4038 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { | |
| 4039 ASSERT(current() == '\\'); | |
| 4040 ASSERT(has_next() && !IsSpecialClassEscape(Next())); | |
| 4041 Advance(); | |
| 4042 switch (current()) { | |
| 4043 case 'b': | |
| 4044 Advance(); | |
| 4045 return '\b'; | |
| 4046 // ControlEscape :: one of | |
| 4047 // f n r t v | |
| 4048 case 'f': | |
| 4049 Advance(); | |
| 4050 return '\f'; | |
| 4051 case 'n': | |
| 4052 Advance(); | |
| 4053 return '\n'; | |
| 4054 case 'r': | |
| 4055 Advance(); | |
| 4056 return '\r'; | |
| 4057 case 't': | |
| 4058 Advance(); | |
| 4059 return '\t'; | |
| 4060 case 'v': | |
| 4061 Advance(); | |
| 4062 return '\v'; | |
| 4063 case 'c': | |
| 4064 return ParseControlLetterEscape(ok); | |
| 4065 case '0': case '1': case '2': case '3': case '4': case '5': | |
| 4066 case '6': case '7': | |
| 4067 // For compatibility, we interpret a decimal escape that isn't | |
| 4068 // a back reference (and therefore either \0 or not valid according | |
| 4069 // to the specification) as a 1..3 digit octal character code. | |
| 4070 return ParseOctalLiteral(); | |
| 4071 case 'x': { | |
| 4072 Advance(); | |
| 4073 uc32 value; | |
| 4074 if (ParseHexEscape(2, &value)) { | |
| 4075 return value; | |
| 4076 } | |
| 4077 // If \x is not followed by a two-digit hexadecimal, treat it | |
| 4078 // as an identity escape. | |
| 4079 return 'x'; | |
| 4080 } | |
| 4081 case 'u': { | |
| 4082 Advance(); | |
| 4083 uc32 value; | |
| 4084 if (ParseHexEscape(4, &value)) { | |
| 4085 return value; | |
| 4086 } | |
| 4087 // If \u is not followed by a four-digit hexadecimal, treat it | |
| 4088 // as an identity escape. | |
| 4089 return 'u'; | |
| 4090 } | |
| 4091 default: { | |
| 4092 // Extended identity escape. We accept any character that hasn't | |
| 4093 // been matched by a more specific case, not just the subset required | |
| 4094 // by the ECMAScript specification. | |
| 4095 uc32 result = current(); | |
| 4096 Advance(); | |
| 4097 return result; | |
| 4098 } | |
| 4099 } | |
| 4100 return 0; | |
| 4101 } | |
| 4102 | |
| 4103 | |
| 4104 RegExpTree* RegExpParser::ParseGroup(bool* ok) { | |
| 4105 ASSERT_EQ(current(), '('); | |
| 4106 char type = '('; | |
| 4107 Advance(); | |
| 4108 if (current() == '?') { | |
| 4109 switch (Next()) { | |
| 4110 case ':': case '=': case '!': | |
| 4111 type = Next(); | |
| 4112 Advance(2); | |
| 4113 break; | |
| 4114 default: | |
| 4115 ReportError(CStrVector("Invalid group"), CHECK_OK); | |
| 4116 break; | |
| 4117 } | |
| 4118 } else { | |
| 4119 if (captures_ == NULL) { | |
| 4120 captures_ = new ZoneList<RegExpCapture*>(2); | |
| 4121 } | |
| 4122 captures_->Add(NULL); | |
| 4123 if (!is_scanned_for_captures_) capture_count_++; | |
| 4124 } | |
| 4125 int capture_index = captures_started(); | |
| 4126 RegExpTree* body = ParseDisjunction(CHECK_OK); | |
| 4127 if (current() != ')') { | |
| 4128 ReportError(CStrVector("Unterminated group"), CHECK_OK); | |
| 4129 } | |
| 4130 Advance(); | |
| 4131 | |
| 4132 int end_capture_index = captures_started(); | |
| 4133 if (type == '!') { | |
| 4134 // Captures inside a negative lookahead are never available outside it. | |
| 4135 for (int i = capture_index; i < end_capture_index; i++) { | |
| 4136 RegExpCapture* capture = captures_->at(i); | |
| 4137 ASSERT(capture != NULL); | |
| 4138 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE); | |
| 4139 } | |
| 4140 } else { | |
| 4141 // Captures temporarily unavailable because they are in different | |
| 4142 // alternatives are all available after the disjunction. | |
| 4143 for (int i = capture_index; i < end_capture_index; i++) { | |
| 4144 RegExpCapture* capture = captures_->at(i); | |
| 4145 ASSERT(capture != NULL); | |
| 4146 if (capture->available() == CAPTURE_UNREACHABLE) { | |
| 4147 capture->set_available(CAPTURE_AVAILABLE); | |
| 4148 } | |
| 4149 } | |
| 4150 } | |
| 4151 | |
| 4152 if (type == '(') { | |
| 4153 RegExpCapture* capture = new RegExpCapture(body, capture_index); | |
| 4154 captures_->at(capture_index - 1) = capture; | |
| 4155 return capture; | |
| 4156 } else if (type == ':') { | |
| 4157 return body; | |
| 4158 } else { | |
| 4159 ASSERT(type == '=' || type == '!'); | |
| 4160 bool is_positive = (type == '='); | |
| 4161 return new RegExpLookahead(body, is_positive); | |
| 4162 } | |
| 4163 } | |
| 4164 | |
| 4165 | |
| 4166 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, | |
| 4167 ZoneList<CharacterRange>* ranges, | |
| 4168 bool* ok) { | |
| 4169 ASSERT_EQ(false, *is_char_class); | |
| 4170 uc32 first = current(); | |
| 4171 if (first == '\\') { | |
| 4172 switch (Next()) { | |
| 4173 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { | |
| 4174 *is_char_class = true; | |
| 4175 uc32 c = Next(); | |
| 4176 CharacterRange::AddClassEscape(c, ranges); | |
| 4177 Advance(2); | |
| 4178 return NULL; | |
| 4179 } | |
| 4180 default: | |
| 4181 uc32 c = ParseClassCharacterEscape(CHECK_OK); | |
| 4182 return CharacterRange::Singleton(c); | |
| 4183 } | |
| 4184 } else { | |
| 4185 Advance(); | |
| 4186 return CharacterRange::Singleton(first); | |
| 4187 } | |
| 4188 } | |
| 4189 | |
| 4190 | |
| 4191 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { | |
| 4192 static const char* kUnterminated = "Unterminated character class"; | |
| 4193 static const char* kIllegal = "Illegal character class"; | |
| 4194 static const char* kRangeOutOfOrder = "Range out of order in character class"; | |
| 4195 | |
| 4196 ASSERT_EQ(current(), '['); | |
| 4197 Advance(); | |
| 4198 bool is_negated = false; | |
| 4199 if (current() == '^') { | |
| 4200 is_negated = true; | |
| 4201 Advance(); | |
| 4202 } | |
| 4203 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); | |
| 4204 while (has_more() && current() != ']') { | |
| 4205 bool is_char_class = false; | |
| 4206 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); | |
| 4207 if (!is_char_class) { | |
| 4208 if (current() == '-') { | |
| 4209 Advance(); | |
| 4210 if (current() == kEndMarker) { | |
| 4211 // If we reach the end we break out of the loop and let the | |
| 4212 // following code report an error. | |
| 4213 break; | |
| 4214 } else if (current() == ']') { | |
| 4215 ranges->Add(first); | |
| 4216 ranges->Add(CharacterRange::Singleton('-')); | |
| 4217 break; | |
| 4218 } | |
| 4219 CharacterRange next = | |
| 4220 ParseClassAtom(&is_char_class, ranges, CHECK_OK); | |
| 4221 if (is_char_class) { | |
| 4222 return ReportError(CStrVector(kIllegal), CHECK_OK); | |
| 4223 } | |
| 4224 if (first.from() > next.to()) { | |
| 4225 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); | |
| 4226 } | |
| 4227 ranges->Add(CharacterRange::Range(first.from(), next.to())); | |
| 4228 } else { | |
| 4229 ranges->Add(first); | |
| 4230 } | |
| 4231 } | |
| 4232 } | |
| 4233 if (!has_more()) { | |
| 4234 return ReportError(CStrVector(kUnterminated), CHECK_OK); | |
| 4235 } | |
| 4236 Advance(); | |
| 4237 if (ranges->length() == 0) { | |
| 4238 ranges->Add(CharacterRange::Range(0, 0xffff)); | |
| 4239 is_negated = !is_negated; | |
| 4240 } | |
| 4241 return new RegExpCharacterClass(ranges, is_negated); | |
| 4242 } | |
| 4243 | |
| 4244 | |
| 4245 // ---------------------------------------------------------------------------- | |
| 3167 // The Parser interface. | 4246 // The Parser interface. |
| 3168 | 4247 |
| 3169 // MakeAST() is just a wrapper for the corresponding Parser calls | 4248 // MakeAST() is just a wrapper for the corresponding Parser calls |
| 3170 // so we don't have to expose the entire Parser class in the .h file. | 4249 // so we don't have to expose the entire Parser class in the .h file. |
| 3171 | 4250 |
| 3172 static bool always_allow_natives_syntax = false; | 4251 static bool always_allow_natives_syntax = false; |
| 3173 | 4252 |
| 3174 | 4253 |
| 3175 ParserMessage::~ParserMessage() { | 4254 ParserMessage::~ParserMessage() { |
| 3176 for (int i = 0; i < args().length(); i++) | 4255 for (int i = 0; i < args().length(); i++) |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 3204 PreParser parser(no_script, allow_natives_syntax, extension); | 4283 PreParser parser(no_script, allow_natives_syntax, extension); |
| 3205 if (!parser.PreParseProgram(stream)) return NULL; | 4284 if (!parser.PreParseProgram(stream)) return NULL; |
| 3206 // The list owns the backing store so we need to clone the vector. | 4285 // The list owns the backing store so we need to clone the vector. |
| 3207 // That way, the result will be exactly the right size rather than | 4286 // That way, the result will be exactly the right size rather than |
| 3208 // the expected 50% too large. | 4287 // the expected 50% too large. |
| 3209 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 4288 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 3210 return new ScriptDataImpl(store); | 4289 return new ScriptDataImpl(store); |
| 3211 } | 4290 } |
| 3212 | 4291 |
| 3213 | 4292 |
| 4293 bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) { | |
| 4294 ASSERT(result != NULL); | |
| 4295 // Get multiline flag somehow | |
|
Mads Ager (chromium)
2008/11/25 21:09:41
Is this a TODO or a comment on what the code is do
Erik Corry
2008/11/26 12:18:36
It's a TODO. Fixed.
| |
| 4296 RegExpParser parser(input, &result->error, false); | |
| 4297 bool ok = true; | |
| 4298 result->tree = parser.ParsePattern(&ok); | |
| 4299 if (!ok) { | |
| 4300 ASSERT(result->tree == NULL); | |
| 4301 ASSERT(!result->error.is_null()); | |
| 4302 } else { | |
| 4303 ASSERT(result->tree != NULL); | |
| 4304 ASSERT(result->error.is_null()); | |
| 4305 } | |
| 4306 if (ok) { | |
| 4307 result->has_character_escapes = parser.HasCharacterEscapes(); | |
| 4308 result->capture_count = parser.captures_started(); | |
| 4309 } | |
| 4310 return ok; | |
| 4311 } | |
| 4312 | |
| 4313 | |
| 3214 FunctionLiteral* MakeAST(bool compile_in_global_context, | 4314 FunctionLiteral* MakeAST(bool compile_in_global_context, |
| 3215 Handle<Script> script, | 4315 Handle<Script> script, |
| 3216 v8::Extension* extension, | 4316 v8::Extension* extension, |
| 3217 ScriptDataImpl* pre_data) { | 4317 ScriptDataImpl* pre_data) { |
| 3218 bool allow_natives_syntax = | 4318 bool allow_natives_syntax = |
| 3219 always_allow_natives_syntax || | 4319 always_allow_natives_syntax || |
| 3220 FLAG_allow_natives_syntax || | 4320 FLAG_allow_natives_syntax || |
| 3221 Bootstrapper::IsActive(); | 4321 Bootstrapper::IsActive(); |
| 3222 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); | 4322 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); |
| 3223 if (pre_data != NULL && pre_data->has_error()) { | 4323 if (pre_data != NULL && pre_data->has_error()) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3256 start_position, | 4356 start_position, |
| 3257 is_expression); | 4357 is_expression); |
| 3258 return result; | 4358 return result; |
| 3259 } | 4359 } |
| 3260 | 4360 |
| 3261 | 4361 |
| 3262 #undef NEW | 4362 #undef NEW |
| 3263 | 4363 |
| 3264 | 4364 |
| 3265 } } // namespace v8::internal | 4365 } } // namespace v8::internal |
| OLD | NEW |