| OLD | NEW |
| 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 | 27 |
| 28 #include "v8.h" | 28 #include "v8.h" |
| 29 | 29 |
| 30 #include "api.h" | 30 #include "api.h" |
| 31 #include "ast.h" | 31 #include "ast.h" |
| 32 #include "bootstrapper.h" | 32 #include "bootstrapper.h" |
| 33 #include "platform.h" | 33 #include "platform.h" |
| 34 #include "runtime.h" | 34 #include "runtime.h" |
| 35 #include "parser.h" | 35 #include "parser.h" |
| 36 #include "scopes.h" | 36 #include "scopes.h" |
| 37 #include "string-stream.h" |
| 37 | 38 |
| 38 namespace v8 { namespace internal { | 39 namespace v8 { namespace internal { |
| 39 | 40 |
| 40 class ParserFactory; | 41 class ParserFactory; |
| 41 class ParserLog; | 42 class ParserLog; |
| 42 class TemporaryScope; | 43 class TemporaryScope; |
| 43 template <typename T> class ZoneListWrapper; | 44 template <typename T> class ZoneListWrapper; |
| 44 | 45 |
| 45 | 46 |
| 46 class Parser { | 47 class Parser { |
| (...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 220 Handle<String> type, | 221 Handle<String> type, |
| 221 Vector< Handle<Object> > arguments); | 222 Vector< Handle<Object> > arguments); |
| 222 | 223 |
| 223 friend class Target; | 224 friend class Target; |
| 224 friend class TargetScope; | 225 friend class TargetScope; |
| 225 friend class LexicalScope; | 226 friend class LexicalScope; |
| 226 friend class TemporaryScope; | 227 friend class TemporaryScope; |
| 227 }; | 228 }; |
| 228 | 229 |
| 229 | 230 |
| 231 template <typename T, int initial_size> |
| 232 class BufferedZoneList { |
| 233 public: |
| 234 |
| 235 BufferedZoneList() : |
| 236 list_(NULL), last_(NULL) {} |
| 237 |
| 238 // Adds element at end of list. This element is buffered and can |
| 239 // be read using last() or removed using RemoveLast until a new Add or until |
| 240 // RemoveLast or GetList has been called. |
| 241 void Add(T* value) { |
| 242 if (last_ != NULL) { |
| 243 if (list_ == NULL) { |
| 244 list_ = new ZoneList<T*>(initial_size); |
| 245 } |
| 246 list_->Add(last_); |
| 247 } |
| 248 last_ = value; |
| 249 } |
| 250 |
| 251 T* last() { |
| 252 ASSERT(last_ != NULL); |
| 253 return last_; |
| 254 } |
| 255 |
| 256 T* RemoveLast() { |
| 257 ASSERT(last_ != NULL); |
| 258 T* result = last_; |
| 259 if (list_ != NULL && list_->length() > 0) |
| 260 last_ = list_->RemoveLast(); |
| 261 else |
| 262 last_ = NULL; |
| 263 return result; |
| 264 } |
| 265 |
| 266 T* Get(int i) { |
| 267 ASSERT(0 <= i && i < length()); |
| 268 if (list_ == NULL) { |
| 269 ASSERT_EQ(0, i); |
| 270 return last_; |
| 271 } else { |
| 272 if (i == list_->length()) { |
| 273 ASSERT(last_ != NULL); |
| 274 return last_; |
| 275 } else { |
| 276 return list_->at(i); |
| 277 } |
| 278 } |
| 279 } |
| 280 |
| 281 void Clear() { |
| 282 list_ = NULL; |
| 283 last_ = NULL; |
| 284 } |
| 285 |
| 286 int length() { |
| 287 int length = (list_ == NULL) ? 0 : list_->length(); |
| 288 return length + ((last_ == NULL) ? 0 : 1); |
| 289 } |
| 290 |
| 291 ZoneList<T*>* GetList() { |
| 292 if (list_ == NULL) { |
| 293 list_ = new ZoneList<T*>(initial_size); |
| 294 } |
| 295 if (last_ != NULL) { |
| 296 list_->Add(last_); |
| 297 last_ = NULL; |
| 298 } |
| 299 return list_; |
| 300 } |
| 301 |
| 302 private: |
| 303 ZoneList<T*>* list_; |
| 304 T* last_; |
| 305 }; |
| 306 |
| 307 // Accumulates RegExp atoms and assertions into lists of terms and alternatives. |
| 308 class RegExpBuilder { |
| 309 public: |
| 310 RegExpBuilder(); |
| 311 void AddCharacter(uc16 character); |
| 312 // "Adds" an empty expression. Does nothing except consume a |
| 313 // following quantifier |
| 314 void AddEmpty(); |
| 315 void AddAtom(RegExpTree* tree); |
| 316 void AddAssertion(RegExpTree* tree); |
| 317 void NewAlternative(); // '|' |
| 318 void AddQuantifierToAtom(int min, int max, bool is_greedy); |
| 319 RegExpTree* ToRegExp(); |
| 320 private: |
| 321 void FlushCharacters(); |
| 322 void FlushText(); |
| 323 void FlushTerms(); |
| 324 bool pending_empty_; |
| 325 ZoneList<uc16>* characters_; |
| 326 BufferedZoneList<RegExpTree, 2> terms_; |
| 327 BufferedZoneList<RegExpTree, 2> text_; |
| 328 BufferedZoneList<RegExpTree, 2> alternatives_; |
| 329 #ifdef DEBUG |
| 330 enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; |
| 331 #define LAST(x) last_added_ = x; |
| 332 #else |
| 333 #define LAST(x) |
| 334 #endif |
| 335 }; |
| 336 |
| 337 |
| 338 RegExpBuilder::RegExpBuilder() |
| 339 : pending_empty_(false), characters_(NULL), terms_(), alternatives_() |
| 340 #ifdef DEBUG |
| 341 , last_added_(ADD_NONE) |
| 342 #endif |
| 343 {} |
| 344 |
| 345 |
| 346 void RegExpBuilder::FlushCharacters() { |
| 347 pending_empty_ = false; |
| 348 if (characters_ != NULL) { |
| 349 RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); |
| 350 characters_ = NULL; |
| 351 text_.Add(atom); |
| 352 LAST(ADD_ATOM); |
| 353 } |
| 354 } |
| 355 |
| 356 |
| 357 void RegExpBuilder::FlushText() { |
| 358 FlushCharacters(); |
| 359 int num_text = text_.length(); |
| 360 if (num_text == 0) { |
| 361 return; |
| 362 } else if (num_text == 1) { |
| 363 terms_.Add(text_.last()); |
| 364 } else { |
| 365 RegExpText* text = new RegExpText(); |
| 366 for (int i = 0; i < num_text; i++) |
| 367 text_.Get(i)->AppendToText(text); |
| 368 terms_.Add(text); |
| 369 } |
| 370 text_.Clear(); |
| 371 } |
| 372 |
| 373 |
| 374 void RegExpBuilder::AddCharacter(uc16 c) { |
| 375 pending_empty_ = false; |
| 376 if (characters_ == NULL) { |
| 377 characters_ = new ZoneList<uc16>(4); |
| 378 } |
| 379 characters_->Add(c); |
| 380 LAST(ADD_CHAR); |
| 381 } |
| 382 |
| 383 |
| 384 void RegExpBuilder::AddEmpty() { |
| 385 pending_empty_ = true; |
| 386 } |
| 387 |
| 388 |
| 389 void RegExpBuilder::AddAtom(RegExpTree* term) { |
| 390 if (term->IsEmpty()) { |
| 391 AddEmpty(); |
| 392 return; |
| 393 } |
| 394 if (term->IsTextElement()) { |
| 395 FlushCharacters(); |
| 396 text_.Add(term); |
| 397 } else { |
| 398 FlushText(); |
| 399 terms_.Add(term); |
| 400 } |
| 401 LAST(ADD_ATOM); |
| 402 } |
| 403 |
| 404 |
| 405 void RegExpBuilder::AddAssertion(RegExpTree* assert) { |
| 406 FlushText(); |
| 407 terms_.Add(assert); |
| 408 LAST(ADD_ASSERT); |
| 409 } |
| 410 |
| 411 |
| 412 void RegExpBuilder::NewAlternative() { |
| 413 FlushTerms(); |
| 414 } |
| 415 |
| 416 |
| 417 void RegExpBuilder::FlushTerms() { |
| 418 FlushText(); |
| 419 int num_terms = terms_.length(); |
| 420 RegExpTree* alternative; |
| 421 if (num_terms == 0) { |
| 422 alternative = RegExpEmpty::GetInstance(); |
| 423 } else if (num_terms == 1) { |
| 424 alternative = terms_.last(); |
| 425 } else { |
| 426 alternative = new RegExpAlternative(terms_.GetList()); |
| 427 } |
| 428 alternatives_.Add(alternative); |
| 429 terms_.Clear(); |
| 430 LAST(ADD_NONE); |
| 431 } |
| 432 |
| 433 |
| 434 RegExpTree* RegExpBuilder::ToRegExp() { |
| 435 FlushTerms(); |
| 436 int num_alternatives = alternatives_.length(); |
| 437 if (num_alternatives == 0) { |
| 438 return RegExpEmpty::GetInstance(); |
| 439 } |
| 440 if (num_alternatives == 1) { |
| 441 return alternatives_.last(); |
| 442 } |
| 443 return new RegExpDisjunction(alternatives_.GetList()); |
| 444 } |
| 445 |
| 446 |
| 447 void RegExpBuilder::AddQuantifierToAtom(int min, int max, bool is_greedy) { |
| 448 if (pending_empty_) { |
| 449 pending_empty_ = false; |
| 450 return; |
| 451 } |
| 452 RegExpTree* atom; |
| 453 if (characters_ != NULL) { |
| 454 ASSERT(last_added_ == ADD_CHAR); |
| 455 // Last atom was character. |
| 456 Vector<const uc16> char_vector = characters_->ToConstVector(); |
| 457 int num_chars = char_vector.length(); |
| 458 if (num_chars > 1) { |
| 459 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| 460 text_.Add(new RegExpAtom(prefix)); |
| 461 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| 462 } |
| 463 characters_ = NULL; |
| 464 atom = new RegExpAtom(char_vector); |
| 465 FlushText(); |
| 466 } else if (text_.length() > 0) { |
| 467 ASSERT(last_added_ == ADD_ATOM); |
| 468 atom = text_.RemoveLast(); |
| 469 FlushText(); |
| 470 } else if (terms_.length() > 0) { |
| 471 ASSERT(last_added_ == ADD_ATOM); |
| 472 atom = terms_.RemoveLast(); |
| 473 if (atom->IsLookahead() || atom->IsAssertion()) { |
| 474 // Guaranteed not to match a non-empty string. |
| 475 // Assertion as an atom can happen as, e.g., (?:\b) |
| 476 LAST(ADD_TERM); |
| 477 if (min == 0) { |
| 478 return; |
| 479 } |
| 480 terms_.Add(atom); |
| 481 return; |
| 482 } |
| 483 } else { |
| 484 // Only call immediately after adding an atom or character! |
| 485 UNREACHABLE(); |
| 486 return; |
| 487 } |
| 488 terms_.Add(new RegExpQuantifier(min, max, is_greedy, atom)); |
| 489 LAST(ADD_TERM); |
| 490 } |
| 491 |
| 492 |
| 493 class RegExpParser { |
| 494 public: |
| 495 RegExpParser(FlatStringReader* in, |
| 496 Handle<String>* error, |
| 497 bool multiline_mode); |
| 498 RegExpTree* ParsePattern(bool* ok); |
| 499 RegExpTree* ParseDisjunction(bool* ok); |
| 500 RegExpTree* ParseGroup(bool* ok); |
| 501 RegExpTree* ParseCharacterClass(bool* ok); |
| 502 |
| 503 // Parses a {...,...} quantifier and stores the range in the given |
| 504 // out parameters. |
| 505 bool ParseIntervalQuantifier(int* min_out, int* max_out); |
| 506 |
| 507 // Parses and returns a single escaped character. The character |
| 508 // must not be 'b' or 'B' since they are usually handle specially. |
| 509 uc32 ParseClassCharacterEscape(bool* ok); |
| 510 |
| 511 // Checks whether the following is a length-digit hexadecimal number, |
| 512 // and sets the value if it is. |
| 513 bool ParseHexEscape(int length, uc32* value); |
| 514 |
| 515 uc32 ParseControlLetterEscape(bool* ok); |
| 516 uc32 ParseOctalLiteral(); |
| 517 |
| 518 // Tries to parse the input as a back reference. If successful it |
| 519 // stores the result in the output parameter and returns true. If |
| 520 // it fails it will push back the characters read so the same characters |
| 521 // can be reparsed. |
| 522 bool ParseBackReferenceIndex(int* index_out); |
| 523 |
| 524 CharacterRange ParseClassAtom(bool* is_char_class, |
| 525 ZoneList<CharacterRange>* ranges, |
| 526 bool* ok); |
| 527 RegExpTree* ReportError(Vector<const char> message, bool* ok); |
| 528 void Advance(); |
| 529 void Advance(int dist); |
| 530 void Reset(int pos); |
| 531 |
| 532 bool HasCharacterEscapes(); |
| 533 |
| 534 int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } |
| 535 int position() { return next_pos_ - 1; } |
| 536 |
| 537 static const uc32 kEndMarker = (1 << 21); |
| 538 private: |
| 539 uc32 current() { return current_; } |
| 540 bool has_more() { return has_more_; } |
| 541 bool has_next() { return next_pos_ < in()->length(); } |
| 542 uc32 Next(); |
| 543 FlatStringReader* in() { return in_; } |
| 544 void ScanForCaptures(); |
| 545 bool CaptureAvailable(int index); |
| 546 uc32 current_; |
| 547 bool has_more_; |
| 548 bool multiline_; |
| 549 int next_pos_; |
| 550 FlatStringReader* in_; |
| 551 Handle<String>* error_; |
| 552 bool has_character_escapes_; |
| 553 ZoneList<RegExpCapture*>* captures_; |
| 554 bool is_scanned_for_captures_; |
| 555 // The capture count is only valid after we have scanned for captures. |
| 556 int capture_count_; |
| 557 }; |
| 558 |
| 559 |
| 230 // A temporary scope stores information during parsing, just like | 560 // A temporary scope stores information during parsing, just like |
| 231 // a plain scope. However, temporary scopes are not kept around | 561 // a plain scope. However, temporary scopes are not kept around |
| 232 // after parsing or referenced by syntax trees so they can be stack- | 562 // after parsing or referenced by syntax trees so they can be stack- |
| 233 // allocated and hence used by the pre-parser. | 563 // allocated and hence used by the pre-parser. |
| 234 class TemporaryScope BASE_EMBEDDED { | 564 class TemporaryScope BASE_EMBEDDED { |
| 235 public: | 565 public: |
| 236 explicit TemporaryScope(Parser* parser); | 566 explicit TemporaryScope(Parser* parser); |
| 237 ~TemporaryScope(); | 567 ~TemporaryScope(); |
| 238 | 568 |
| 239 int NextMaterializedLiteralIndex() { | 569 int NextMaterializedLiteralIndex() { |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { | 660 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { |
| 331 if (obj == VariableProxySentinel::this_proxy()) { | 661 if (obj == VariableProxySentinel::this_proxy()) { |
| 332 return Property::this_property(); | 662 return Property::this_property(); |
| 333 } else { | 663 } else { |
| 334 return ValidLeftHandSideSentinel::instance(); | 664 return ValidLeftHandSideSentinel::instance(); |
| 335 } | 665 } |
| 336 } | 666 } |
| 337 | 667 |
| 338 virtual Expression* NewCall(Expression* expression, | 668 virtual Expression* NewCall(Expression* expression, |
| 339 ZoneList<Expression*>* arguments, | 669 ZoneList<Expression*>* arguments, |
| 340 bool is_eval, int pos) { | 670 int pos) { |
| 341 return Call::sentinel(); | 671 return Call::sentinel(); |
| 342 } | 672 } |
| 343 | 673 |
| 674 virtual Expression* NewCallEval(Expression* expression, |
| 675 ZoneList<Expression*>* arguments, |
| 676 int pos) { |
| 677 return CallEval::sentinel(); |
| 678 } |
| 679 |
| 344 virtual Statement* EmptyStatement() { | 680 virtual Statement* EmptyStatement() { |
| 345 return NULL; | 681 return NULL; |
| 346 } | 682 } |
| 347 | 683 |
| 348 template <typename T> ZoneListWrapper<T> NewList(int size) { | 684 template <typename T> ZoneListWrapper<T> NewList(int size) { |
| 349 return is_pre_parsing_ ? ZoneListWrapper<T>() : ZoneListWrapper<T>(size); | 685 return is_pre_parsing_ ? ZoneListWrapper<T>() : ZoneListWrapper<T>(size); |
| 350 } | 686 } |
| 351 | 687 |
| 352 private: | 688 private: |
| 353 bool is_pre_parsing_; | 689 bool is_pre_parsing_; |
| (...skipping 26 matching lines...) Expand all Loading... |
| 380 virtual Handle<String> EmptySymbol() { | 716 virtual Handle<String> EmptySymbol() { |
| 381 return Factory::empty_symbol(); | 717 return Factory::empty_symbol(); |
| 382 } | 718 } |
| 383 | 719 |
| 384 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { | 720 virtual Expression* NewProperty(Expression* obj, Expression* key, int pos) { |
| 385 return new Property(obj, key, pos); | 721 return new Property(obj, key, pos); |
| 386 } | 722 } |
| 387 | 723 |
| 388 virtual Expression* NewCall(Expression* expression, | 724 virtual Expression* NewCall(Expression* expression, |
| 389 ZoneList<Expression*>* arguments, | 725 ZoneList<Expression*>* arguments, |
| 390 bool is_eval, int pos) { | 726 int pos) { |
| 391 return new Call(expression, arguments, is_eval, pos); | 727 return new Call(expression, arguments, pos); |
| 728 } |
| 729 |
| 730 virtual Expression* NewCallEval(Expression* expression, |
| 731 ZoneList<Expression*>* arguments, |
| 732 int pos) { |
| 733 return new CallEval(expression, arguments, pos); |
| 392 } | 734 } |
| 393 | 735 |
| 394 virtual Statement* EmptyStatement() { | 736 virtual Statement* EmptyStatement() { |
| 395 // Use a statically allocated empty statement singleton to avoid | 737 // Use a statically allocated empty statement singleton to avoid |
| 396 // allocating lots and lots of empty statements. | 738 // allocating lots and lots of empty statements. |
| 397 static v8::internal::EmptyStatement empty; | 739 static v8::internal::EmptyStatement empty; |
| 398 return ∅ | 740 return ∅ |
| 399 } | 741 } |
| 400 }; | 742 }; |
| 401 | 743 |
| (...skipping 604 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1006 // is always the function scope. | 1348 // is always the function scope. |
| 1007 | 1349 |
| 1008 // If a function scope exists, then we can statically declare this | 1350 // If a function scope exists, then we can statically declare this |
| 1009 // variable and also set its mode. In any case, a Declaration node | 1351 // variable and also set its mode. In any case, a Declaration node |
| 1010 // will be added to the scope so that the declaration can be added | 1352 // will be added to the scope so that the declaration can be added |
| 1011 // to the corresponding activation frame at runtime if necessary. | 1353 // to the corresponding activation frame at runtime if necessary. |
| 1012 // For instance declarations inside an eval scope need to be added | 1354 // For instance declarations inside an eval scope need to be added |
| 1013 // to the calling function context. | 1355 // to the calling function context. |
| 1014 if (top_scope_->is_function_scope()) { | 1356 if (top_scope_->is_function_scope()) { |
| 1015 // Declare the variable in the function scope. | 1357 // Declare the variable in the function scope. |
| 1016 var = top_scope_->Lookup(name); | 1358 var = top_scope_->LookupLocal(name); |
| 1017 if (var == NULL) { | 1359 if (var == NULL) { |
| 1018 // Declare the name. | 1360 // Declare the name. |
| 1019 var = top_scope_->Declare(name, mode); | 1361 var = top_scope_->Declare(name, mode); |
| 1020 } else { | 1362 } else { |
| 1021 // The name was declared before; check for conflicting | 1363 // The name was declared before; check for conflicting |
| 1022 // re-declarations. If the previous declaration was a const or the | 1364 // re-declarations. If the previous declaration was a const or the |
| 1023 // current declaration is a const then we have a conflict. There is | 1365 // current declaration is a const then we have a conflict. There is |
| 1024 // similar code in runtime.cc in the Declare functions. | 1366 // similar code in runtime.cc in the Declare functions. |
| 1025 if ((mode == Variable::CONST) || (var->mode() == Variable::CONST)) { | 1367 if ((mode == Variable::CONST) || (var->mode() == Variable::CONST)) { |
| 1026 // We only have vars and consts in declarations. | 1368 // We only have vars and consts in declarations. |
| (...skipping 1259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2286 result = factory()->NewProperty(result, index, pos); | 2628 result = factory()->NewProperty(result, index, pos); |
| 2287 Expect(Token::RBRACK, CHECK_OK); | 2629 Expect(Token::RBRACK, CHECK_OK); |
| 2288 break; | 2630 break; |
| 2289 } | 2631 } |
| 2290 | 2632 |
| 2291 case Token::LPAREN: { | 2633 case Token::LPAREN: { |
| 2292 int pos = scanner().location().beg_pos; | 2634 int pos = scanner().location().beg_pos; |
| 2293 ZoneList<Expression*>* args = ParseArguments(CHECK_OK); | 2635 ZoneList<Expression*>* args = ParseArguments(CHECK_OK); |
| 2294 | 2636 |
| 2295 // Keep track of eval() calls since they disable all local variable | 2637 // Keep track of eval() calls since they disable all local variable |
| 2296 // optimizations. We can ignore locally declared variables with | 2638 // optimizations. |
| 2297 // name 'eval' since they override the global 'eval' function. We | 2639 // The calls that need special treatment are the |
| 2298 // only need to look at unresolved variables (VariableProxies). | 2640 // direct (i.e. not aliased) eval calls. These calls are all of the |
| 2641 // form eval(...) with no explicit receiver object where eval is not |
| 2642 // declared in the current scope chain. These calls are marked as |
| 2643 // potentially direct eval calls. Whether they are actually direct calls |
| 2644 // to eval is determined at run time. |
| 2299 | 2645 |
| 2646 bool is_potentially_direct_eval = false; |
| 2300 if (!is_pre_parsing_) { | 2647 if (!is_pre_parsing_) { |
| 2301 // We assume that only a function called 'eval' can be used | |
| 2302 // to invoke the global eval() implementation. This permits | |
| 2303 // for massive optimizations. | |
| 2304 VariableProxy* callee = result->AsVariableProxy(); | 2648 VariableProxy* callee = result->AsVariableProxy(); |
| 2305 if (callee != NULL && callee->IsVariable(Factory::eval_symbol())) { | 2649 if (callee != NULL && callee->IsVariable(Factory::eval_symbol())) { |
| 2306 // We do not allow direct calls to 'eval' in our internal | 2650 Handle<String> name = callee->name(); |
| 2307 // JS files. Use builtin functions instead. | 2651 Variable* var = top_scope_->Lookup(name); |
| 2308 ASSERT(!Bootstrapper::IsActive()); | 2652 if (var == NULL) { |
| 2309 top_scope_->RecordEvalCall(); | 2653 // We do not allow direct calls to 'eval' in our internal |
| 2310 } else { | 2654 // JS files. Use builtin functions instead. |
| 2311 // This is rather convoluted code to check if we're calling | 2655 ASSERT(!Bootstrapper::IsActive()); |
| 2312 // a function named 'eval' through a property access. If so, | 2656 top_scope_->RecordEvalCall(); |
| 2313 // we mark it as a possible eval call (we don't know if the | 2657 is_potentially_direct_eval = true; |
| 2314 // receiver will resolve to the global object or not), but | |
| 2315 // we do not treat the call as an eval() call - we let the | |
| 2316 // call get through to the JavaScript eval code defined in | |
| 2317 // v8natives.js. | |
| 2318 Property* property = result->AsProperty(); | |
| 2319 if (property != NULL) { | |
| 2320 Literal* key = property->key()->AsLiteral(); | |
| 2321 if (key != NULL && | |
| 2322 key->handle().is_identical_to(Factory::eval_symbol())) { | |
| 2323 // We do not allow direct calls to 'eval' in our | |
| 2324 // internal JS files. Use builtin functions instead. | |
| 2325 ASSERT(!Bootstrapper::IsActive()); | |
| 2326 top_scope_->RecordEvalCall(); | |
| 2327 } | |
| 2328 } | 2658 } |
| 2329 } | 2659 } |
| 2330 } | 2660 } |
| 2331 | 2661 |
| 2332 // Optimize the eval() case w/o arguments so we | 2662 if (is_potentially_direct_eval) { |
| 2333 // don't need to handle it every time at runtime. | 2663 result = factory()->NewCallEval(result, args, pos); |
| 2334 // | |
| 2335 // Note: For now we don't do static eval analysis | |
| 2336 // as it appears that we need to be able to call | |
| 2337 // eval() via alias names. We leave the code as | |
| 2338 // is, in case we want to enable this again in the | |
| 2339 // future. | |
| 2340 const bool is_eval = false; | |
| 2341 if (is_eval && args->length() == 0) { | |
| 2342 result = NEW(Literal(Factory::undefined_value())); | |
| 2343 } else { | 2664 } else { |
| 2344 result = factory()->NewCall(result, args, is_eval, pos); | 2665 result = factory()->NewCall(result, args, pos); |
| 2345 } | 2666 } |
| 2346 break; | 2667 break; |
| 2347 } | 2668 } |
| 2348 | 2669 |
| 2349 case Token::PERIOD: { | 2670 case Token::PERIOD: { |
| 2350 Consume(Token::PERIOD); | 2671 Consume(Token::PERIOD); |
| 2351 int pos = scanner().location().beg_pos; | 2672 int pos = scanner().location().beg_pos; |
| 2352 Handle<String> name = ParseIdentifier(CHECK_OK); | 2673 Handle<String> name = ParseIdentifier(CHECK_OK); |
| 2353 result = factory()->NewProperty(result, NEW(Literal(name)), pos); | 2674 result = factory()->NewProperty(result, NEW(Literal(name)), pos); |
| 2354 break; | 2675 break; |
| (...skipping 801 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3156 } | 3477 } |
| 3157 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); | 3478 ZoneList<Expression*>* args = new ZoneList<Expression*>(2); |
| 3158 args->Add(new Literal(type)); | 3479 args->Add(new Literal(type)); |
| 3159 args->Add(new Literal(array)); | 3480 args->Add(new Literal(array)); |
| 3160 return new Throw(new CallRuntime(constructor, NULL, args), | 3481 return new Throw(new CallRuntime(constructor, NULL, args), |
| 3161 scanner().location().beg_pos); | 3482 scanner().location().beg_pos); |
| 3162 } | 3483 } |
| 3163 | 3484 |
| 3164 | 3485 |
| 3165 // ---------------------------------------------------------------------------- | 3486 // ---------------------------------------------------------------------------- |
| 3487 // Regular expressions |
| 3488 |
| 3489 |
| 3490 RegExpParser::RegExpParser(FlatStringReader* in, |
| 3491 Handle<String>* error, |
| 3492 bool multiline) |
| 3493 : current_(kEndMarker), |
| 3494 has_more_(true), |
| 3495 multiline_(multiline), |
| 3496 next_pos_(0), |
| 3497 in_(in), |
| 3498 error_(error), |
| 3499 has_character_escapes_(false), |
| 3500 captures_(NULL), |
| 3501 is_scanned_for_captures_(false), |
| 3502 capture_count_(0) { |
| 3503 Advance(1); |
| 3504 } |
| 3505 |
| 3506 |
| 3507 uc32 RegExpParser::Next() { |
| 3508 if (has_next()) { |
| 3509 return in()->Get(next_pos_); |
| 3510 } else { |
| 3511 return kEndMarker; |
| 3512 } |
| 3513 } |
| 3514 |
| 3515 |
| 3516 void RegExpParser::Advance() { |
| 3517 if (next_pos_ < in()->length()) { |
| 3518 current_ = in()->Get(next_pos_); |
| 3519 next_pos_++; |
| 3520 } else { |
| 3521 current_ = kEndMarker; |
| 3522 has_more_ = false; |
| 3523 } |
| 3524 } |
| 3525 |
| 3526 |
| 3527 void RegExpParser::Reset(int pos) { |
| 3528 next_pos_ = pos; |
| 3529 Advance(); |
| 3530 } |
| 3531 |
| 3532 |
| 3533 void RegExpParser::Advance(int dist) { |
| 3534 for (int i = 0; i < dist; i++) |
| 3535 Advance(); |
| 3536 } |
| 3537 |
| 3538 |
| 3539 // Reports whether the parsed string atoms contain any characters that were |
| 3540 // escaped in the original pattern. If not, all atoms are proper substrings |
| 3541 // of the original pattern. |
| 3542 bool RegExpParser::HasCharacterEscapes() { |
| 3543 return has_character_escapes_; |
| 3544 } |
| 3545 |
| 3546 RegExpTree* RegExpParser::ReportError(Vector<const char> message, bool* ok) { |
| 3547 *ok = false; |
| 3548 *error_ = Factory::NewStringFromAscii(message, NOT_TENURED); |
| 3549 return NULL; |
| 3550 } |
| 3551 |
| 3552 |
| 3553 // Pattern :: |
| 3554 // Disjunction |
| 3555 RegExpTree* RegExpParser::ParsePattern(bool* ok) { |
| 3556 RegExpTree* result = ParseDisjunction(CHECK_OK); |
| 3557 if (has_more()) { |
| 3558 ReportError(CStrVector("Unmatched ')'"), CHECK_OK); |
| 3559 } |
| 3560 return result; |
| 3561 } |
| 3562 |
| 3563 |
| 3564 bool RegExpParser::CaptureAvailable(int index) { |
| 3565 if (captures_ == NULL) return false; |
| 3566 if (index >= captures_->length()) return false; |
| 3567 RegExpCapture* capture = captures_->at(index); |
| 3568 return capture != NULL && capture->available() == CAPTURE_AVAILABLE; |
| 3569 } |
| 3570 |
| 3571 |
| 3572 // Disjunction :: |
| 3573 // Alternative |
| 3574 // Alternative | Disjunction |
| 3575 // Alternative :: |
| 3576 // [empty] |
| 3577 // Term Alternative |
| 3578 // Term :: |
| 3579 // Assertion |
| 3580 // Atom |
| 3581 // Atom Quantifier |
| 3582 RegExpTree* RegExpParser::ParseDisjunction(bool* ok) { |
| 3583 RegExpBuilder builder; |
| 3584 int capture_start_index = captures_started(); |
| 3585 while (true) { |
| 3586 switch (current()) { |
| 3587 case kEndMarker: |
| 3588 case ')': |
| 3589 return builder.ToRegExp(); |
| 3590 case '|': { |
| 3591 Advance(); |
| 3592 builder.NewAlternative(); |
| 3593 int capture_new_alt_start_index = captures_started(); |
| 3594 for (int i = capture_start_index; i < capture_new_alt_start_index; i++) { |
| 3595 RegExpCapture* capture = captures_->at(i); |
| 3596 if (capture->available() == CAPTURE_AVAILABLE) { |
| 3597 capture->set_available(CAPTURE_UNREACHABLE); |
| 3598 } |
| 3599 } |
| 3600 capture_start_index = capture_new_alt_start_index; |
| 3601 continue; |
| 3602 } |
| 3603 case '*': |
| 3604 case '+': |
| 3605 case '?': |
| 3606 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); |
| 3607 case '^': { |
| 3608 Advance(); |
| 3609 RegExpAssertion::Type type = |
| 3610 multiline_ ? RegExpAssertion::START_OF_LINE : |
| 3611 RegExpAssertion::START_OF_INPUT; |
| 3612 builder.AddAssertion(new RegExpAssertion(type)); |
| 3613 continue; |
| 3614 } |
| 3615 case '$': { |
| 3616 Advance(); |
| 3617 RegExpAssertion::Type type = |
| 3618 multiline_ ? RegExpAssertion::END_OF_LINE : |
| 3619 RegExpAssertion::END_OF_INPUT; |
| 3620 builder.AddAssertion(new RegExpAssertion(type)); |
| 3621 continue; |
| 3622 } |
| 3623 case '.': { |
| 3624 Advance(); |
| 3625 // everything except \x0a, \x0d, \u2028 and \u2029 |
| 3626 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3627 CharacterRange::AddClassEscape('.', ranges); |
| 3628 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3629 builder.AddAtom(atom); |
| 3630 break; |
| 3631 } |
| 3632 case '(': { |
| 3633 RegExpTree* atom = ParseGroup(CHECK_OK); |
| 3634 builder.AddAtom(atom); |
| 3635 break; |
| 3636 } |
| 3637 case '[': { |
| 3638 RegExpTree* atom = ParseCharacterClass(CHECK_OK); |
| 3639 builder.AddAtom(atom); |
| 3640 break; |
| 3641 } |
| 3642 // Atom :: |
| 3643 // \ AtomEscape |
| 3644 case '\\': |
| 3645 switch (Next()) { |
| 3646 case kEndMarker: |
| 3647 ReportError(CStrVector("\\ at end of pattern"), CHECK_OK); |
| 3648 case 'b': |
| 3649 Advance(2); |
| 3650 builder.AddAssertion( |
| 3651 new RegExpAssertion(RegExpAssertion::BOUNDARY)); |
| 3652 continue; |
| 3653 case 'B': |
| 3654 Advance(2); |
| 3655 builder.AddAssertion( |
| 3656 new RegExpAssertion(RegExpAssertion::NON_BOUNDARY)); |
| 3657 continue; |
| 3658 // AtomEscape :: |
| 3659 // CharacterClassEscape |
| 3660 // |
| 3661 // CharacterClassEscape :: one of |
| 3662 // d D s S w W |
| 3663 case 'd': case 'D': case 's': case 'S': case 'w': case 'W': { |
| 3664 uc32 c = Next(); |
| 3665 Advance(2); |
| 3666 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 3667 CharacterRange::AddClassEscape(c, ranges); |
| 3668 RegExpTree* atom = new RegExpCharacterClass(ranges, false); |
| 3669 builder.AddAtom(atom); |
| 3670 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3671 } |
| 3672 case '1': case '2': case '3': case '4': case '5': case '6': |
| 3673 case '7': case '8': case '9': { |
| 3674 int index = 0; |
| 3675 if (ParseBackReferenceIndex(&index)) { |
| 3676 if (!CaptureAvailable(index - 1)) { |
| 3677 // Prepare to ignore a following quantifier |
| 3678 builder.AddEmpty(); |
| 3679 goto has_read_atom; |
| 3680 } |
| 3681 RegExpCapture* capture = captures_->at(index - 1); |
| 3682 RegExpTree* atom = new RegExpBackReference(capture); |
| 3683 builder.AddAtom(atom); |
| 3684 goto has_read_atom; // Avoid setting has_character_escapes_. |
| 3685 } |
| 3686 uc32 first_digit = Next(); |
| 3687 if (first_digit == '8' || first_digit == '9') { |
| 3688 // Treat as identity escape |
| 3689 builder.AddCharacter(first_digit); |
| 3690 Advance(2); |
| 3691 break; |
| 3692 } |
| 3693 } |
| 3694 // FALLTHROUGH |
| 3695 case '0': { |
| 3696 Advance(); |
| 3697 uc32 octal = ParseOctalLiteral(); |
| 3698 builder.AddCharacter(octal); |
| 3699 break; |
| 3700 } |
| 3701 // ControlEscape :: one of |
| 3702 // f n r t v |
| 3703 case 'f': |
| 3704 Advance(2); |
| 3705 builder.AddCharacter('\f'); |
| 3706 break; |
| 3707 case 'n': |
| 3708 Advance(2); |
| 3709 builder.AddCharacter('\n'); |
| 3710 break; |
| 3711 case 'r': |
| 3712 Advance(2); |
| 3713 builder.AddCharacter('\r'); |
| 3714 break; |
| 3715 case 't': |
| 3716 Advance(2); |
| 3717 builder.AddCharacter('\t'); |
| 3718 break; |
| 3719 case 'v': |
| 3720 Advance(2); |
| 3721 builder.AddCharacter('\v'); |
| 3722 break; |
| 3723 case 'c': { |
| 3724 Advance(2); |
| 3725 uc32 control = ParseControlLetterEscape(ok); |
| 3726 builder.AddCharacter(control); |
| 3727 break; |
| 3728 } |
| 3729 case 'x': { |
| 3730 Advance(2); |
| 3731 uc32 value; |
| 3732 if (ParseHexEscape(2, &value)) { |
| 3733 builder.AddCharacter(value); |
| 3734 } else { |
| 3735 builder.AddCharacter('x'); |
| 3736 } |
| 3737 break; |
| 3738 } |
| 3739 case 'u': { |
| 3740 Advance(2); |
| 3741 uc32 value; |
| 3742 if (ParseHexEscape(4, &value)) { |
| 3743 builder.AddCharacter(value); |
| 3744 } else { |
| 3745 builder.AddCharacter('u'); |
| 3746 } |
| 3747 break; |
| 3748 } |
| 3749 default: |
| 3750 // Identity escape. |
| 3751 builder.AddCharacter(Next()); |
| 3752 Advance(2); |
| 3753 break; |
| 3754 } |
| 3755 has_character_escapes_ = true; |
| 3756 break; |
| 3757 case '{': { |
| 3758 int dummy; |
| 3759 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
| 3760 ReportError(CStrVector("Nothing to repeat"), CHECK_OK); |
| 3761 } |
| 3762 // fallthrough |
| 3763 } |
| 3764 default: |
| 3765 builder.AddCharacter(current()); |
| 3766 Advance(); |
| 3767 break; |
| 3768 } // end switch(current()) |
| 3769 |
| 3770 has_read_atom: |
| 3771 int min; |
| 3772 int max; |
| 3773 switch (current()) { |
| 3774 // QuantifierPrefix :: |
| 3775 // * |
| 3776 // + |
| 3777 // ? |
| 3778 // { |
| 3779 case '*': |
| 3780 min = 0; |
| 3781 max = RegExpQuantifier::kInfinity; |
| 3782 Advance(); |
| 3783 break; |
| 3784 case '+': |
| 3785 min = 1; |
| 3786 max = RegExpQuantifier::kInfinity; |
| 3787 Advance(); |
| 3788 break; |
| 3789 case '?': |
| 3790 min = 0; |
| 3791 max = 1; |
| 3792 Advance(); |
| 3793 break; |
| 3794 case '{': |
| 3795 if (ParseIntervalQuantifier(&min, &max)) { |
| 3796 break; |
| 3797 } else { |
| 3798 continue; |
| 3799 } |
| 3800 default: |
| 3801 continue; |
| 3802 } |
| 3803 bool is_greedy = true; |
| 3804 if (current() == '?') { |
| 3805 is_greedy = false; |
| 3806 Advance(); |
| 3807 } |
| 3808 builder.AddQuantifierToAtom(min, max, is_greedy); |
| 3809 } |
| 3810 } |
| 3811 |
| 3812 class SourceCharacter { |
| 3813 public: |
| 3814 static bool Is(uc32 c) { |
| 3815 switch (c) { |
| 3816 // case ']': case '}': |
| 3817 // In spidermonkey and jsc these are treated as source characters |
| 3818 // so we do too. |
| 3819 case '^': case '$': case '\\': case '.': case '*': case '+': |
| 3820 case '?': case '(': case ')': case '[': case '{': case '|': |
| 3821 case RegExpParser::kEndMarker: |
| 3822 return false; |
| 3823 default: |
| 3824 return true; |
| 3825 } |
| 3826 } |
| 3827 }; |
| 3828 |
| 3829 |
| 3830 static unibrow::Predicate<SourceCharacter> source_character; |
| 3831 |
| 3832 |
| 3833 static inline bool IsSourceCharacter(uc32 c) { |
| 3834 return source_character.get(c); |
| 3835 } |
| 3836 |
| 3837 #ifdef DEBUG |
| 3838 // Currently only used in an ASSERT. |
| 3839 static bool IsSpecialClassEscape(uc32 c) { |
| 3840 switch (c) { |
| 3841 case 'd': case 'D': |
| 3842 case 's': case 'S': |
| 3843 case 'w': case 'W': |
| 3844 return true; |
| 3845 default: |
| 3846 return false; |
| 3847 } |
| 3848 } |
| 3849 #endif |
| 3850 |
| 3851 |
| 3852 // In order to know whether an escape is a backreference or not we have to scan |
| 3853 // the entire regexp and find the number of capturing parentheses. However we |
| 3854 // don't want to scan the regexp twice unless it is necessary. This mini-parser |
| 3855 // is called when needed. It can see the difference between capturing and |
| 3856 // noncapturing parentheses and can skip character classes and backslash-escaped |
| 3857 // characters. |
| 3858 void RegExpParser::ScanForCaptures() { |
| 3859 // Start with captures started previous to current position |
| 3860 int capture_count = captures_started(); |
| 3861 // Add count of captures after this position. |
| 3862 int n; |
| 3863 while ((n = current()) != kEndMarker) { |
| 3864 Advance(); |
| 3865 switch (n) { |
| 3866 case '\\': |
| 3867 Advance(); |
| 3868 break; |
| 3869 case '[': { |
| 3870 int c; |
| 3871 while ((c = current()) != kEndMarker) { |
| 3872 Advance(); |
| 3873 if (c == '\\') { |
| 3874 Advance(); |
| 3875 } else { |
| 3876 if (c == ']') break; |
| 3877 } |
| 3878 } |
| 3879 break; |
| 3880 } |
| 3881 case '(': |
| 3882 if (current() != '?') capture_count++; |
| 3883 break; |
| 3884 } |
| 3885 } |
| 3886 capture_count_ = capture_count; |
| 3887 is_scanned_for_captures_ = true; |
| 3888 } |
| 3889 |
| 3890 |
| 3891 bool RegExpParser::ParseBackReferenceIndex(int* index_out) { |
| 3892 ASSERT_EQ('\\', current()); |
| 3893 ASSERT('1' <= Next() && Next() <= '9'); |
| 3894 // Try to parse a decimal literal that is no greater than the number |
| 3895 // of previously encountered left capturing parentheses. |
| 3896 // This is a not according the the ECMAScript specification. According to |
| 3897 // that, one must accept values up to the total number of left capturing |
| 3898 // parentheses in the entire input, even if they are meaningless. |
| 3899 int start = position(); |
| 3900 int value = Next() - '0'; |
| 3901 Advance(2); |
| 3902 while (true) { |
| 3903 uc32 c = current(); |
| 3904 if (IsDecimalDigit(c)) { |
| 3905 value = 10 * value + (c - '0'); |
| 3906 Advance(); |
| 3907 } else { |
| 3908 break; |
| 3909 } |
| 3910 } |
| 3911 if (value > captures_started()) { |
| 3912 if (!is_scanned_for_captures_) { |
| 3913 int saved_position = position(); |
| 3914 ScanForCaptures(); |
| 3915 Reset(saved_position); |
| 3916 } |
| 3917 if (value > capture_count_) { |
| 3918 Reset(start); |
| 3919 return false; |
| 3920 } |
| 3921 } |
| 3922 *index_out = value; |
| 3923 return true; |
| 3924 } |
| 3925 |
| 3926 |
| 3927 // QuantifierPrefix :: |
| 3928 // { DecimalDigits } |
| 3929 // { DecimalDigits , } |
| 3930 // { DecimalDigits , DecimalDigits } |
| 3931 bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) { |
| 3932 ASSERT_EQ(current(), '{'); |
| 3933 int start = position(); |
| 3934 Advance(); |
| 3935 int min = 0; |
| 3936 if (!IsDecimalDigit(current())) { |
| 3937 Reset(start); |
| 3938 return false; |
| 3939 } |
| 3940 while (IsDecimalDigit(current())) { |
| 3941 min = 10 * min + (current() - '0'); |
| 3942 Advance(); |
| 3943 } |
| 3944 int max = 0; |
| 3945 if (current() == '}') { |
| 3946 max = min; |
| 3947 Advance(); |
| 3948 } else if (current() == ',') { |
| 3949 Advance(); |
| 3950 if (current() == '}') { |
| 3951 max = RegExpQuantifier::kInfinity; |
| 3952 Advance(); |
| 3953 } else { |
| 3954 while (IsDecimalDigit(current())) { |
| 3955 max = 10 * max + (current() - '0'); |
| 3956 Advance(); |
| 3957 } |
| 3958 if (current() != '}') { |
| 3959 Reset(start); |
| 3960 return false; |
| 3961 } |
| 3962 Advance(); |
| 3963 } |
| 3964 } else { |
| 3965 Reset(start); |
| 3966 return false; |
| 3967 } |
| 3968 *min_out = min; |
| 3969 *max_out = max; |
| 3970 return true; |
| 3971 } |
| 3972 |
| 3973 |
| 3974 // Upper and lower case letters differ by one bit. |
| 3975 STATIC_CHECK(('a' ^ 'A') == 0x20); |
| 3976 |
| 3977 uc32 RegExpParser::ParseControlLetterEscape(bool* ok) { |
| 3978 if (!has_more()) { |
| 3979 ReportError(CStrVector("\\c at end of pattern"), ok); |
| 3980 return '\0'; |
| 3981 } |
| 3982 uc32 letter = current() & ~(0x20); // Collapse upper and lower case letters. |
| 3983 if (letter < 'A' || 'Z' < letter) { |
| 3984 // Non-spec error-correction: "\c" followed by non-control letter is |
| 3985 // interpreted as an IdentityEscape of 'c'. |
| 3986 return 'c'; |
| 3987 } |
| 3988 Advance(); |
| 3989 return letter & 0x1f; // Remainder modulo 32, per specification. |
| 3990 } |
| 3991 |
| 3992 |
| 3993 uc32 RegExpParser::ParseOctalLiteral() { |
| 3994 ASSERT('0' <= current() && current() <= '7'); |
| 3995 // For compatibility with some other browsers (not all), we parse |
| 3996 // up to three octal digits with a value below 256. |
| 3997 uc32 value = current() - '0'; |
| 3998 Advance(); |
| 3999 if ('0' <= current() && current() <= '7') { |
| 4000 value = value * 8 + current() - '0'; |
| 4001 Advance(); |
| 4002 if (value < 32 && '0' <= current() && current() <= '7') { |
| 4003 value = value * 8 + current() - '0'; |
| 4004 Advance(); |
| 4005 } |
| 4006 } |
| 4007 return value; |
| 4008 } |
| 4009 |
| 4010 |
| 4011 bool RegExpParser::ParseHexEscape(int length, uc32 *value) { |
| 4012 int start = position(); |
| 4013 uc32 val = 0; |
| 4014 bool done = false; |
| 4015 for (int i = 0; !done; i++) { |
| 4016 uc32 c = current(); |
| 4017 int d = HexValue(c); |
| 4018 if (d < 0) { |
| 4019 Reset(start); |
| 4020 return false; |
| 4021 } |
| 4022 val = val * 16 + d; |
| 4023 Advance(); |
| 4024 if (i == length - 1) { |
| 4025 done = true; |
| 4026 } |
| 4027 } |
| 4028 *value = val; |
| 4029 return true; |
| 4030 } |
| 4031 |
| 4032 |
| 4033 uc32 RegExpParser::ParseClassCharacterEscape(bool* ok) { |
| 4034 ASSERT(current() == '\\'); |
| 4035 ASSERT(has_next() && !IsSpecialClassEscape(Next())); |
| 4036 Advance(); |
| 4037 switch (current()) { |
| 4038 case 'b': |
| 4039 Advance(); |
| 4040 return '\b'; |
| 4041 // ControlEscape :: one of |
| 4042 // f n r t v |
| 4043 case 'f': |
| 4044 Advance(); |
| 4045 return '\f'; |
| 4046 case 'n': |
| 4047 Advance(); |
| 4048 return '\n'; |
| 4049 case 'r': |
| 4050 Advance(); |
| 4051 return '\r'; |
| 4052 case 't': |
| 4053 Advance(); |
| 4054 return '\t'; |
| 4055 case 'v': |
| 4056 Advance(); |
| 4057 return '\v'; |
| 4058 case 'c': |
| 4059 return ParseControlLetterEscape(ok); |
| 4060 case '0': case '1': case '2': case '3': case '4': case '5': |
| 4061 case '6': case '7': |
| 4062 // For compatibility, we interpret a decimal escape that isn't |
| 4063 // a back reference (and therefore either \0 or not valid according |
| 4064 // to the specification) as a 1..3 digit octal character code. |
| 4065 return ParseOctalLiteral(); |
| 4066 case 'x': { |
| 4067 Advance(); |
| 4068 uc32 value; |
| 4069 if (ParseHexEscape(2, &value)) { |
| 4070 return value; |
| 4071 } |
| 4072 // If \x is not followed by a two-digit hexadecimal, treat it |
| 4073 // as an identity escape. |
| 4074 return 'x'; |
| 4075 } |
| 4076 case 'u': { |
| 4077 Advance(); |
| 4078 uc32 value; |
| 4079 if (ParseHexEscape(4, &value)) { |
| 4080 return value; |
| 4081 } |
| 4082 // If \u is not followed by a four-digit hexadecimal, treat it |
| 4083 // as an identity escape. |
| 4084 return 'u'; |
| 4085 } |
| 4086 default: { |
| 4087 // Extended identity escape. We accept any character that hasn't |
| 4088 // been matched by a more specific case, not just the subset required |
| 4089 // by the ECMAScript specification. |
| 4090 uc32 result = current(); |
| 4091 Advance(); |
| 4092 return result; |
| 4093 } |
| 4094 } |
| 4095 return 0; |
| 4096 } |
| 4097 |
| 4098 |
| 4099 RegExpTree* RegExpParser::ParseGroup(bool* ok) { |
| 4100 ASSERT_EQ(current(), '('); |
| 4101 char type = '('; |
| 4102 Advance(); |
| 4103 if (current() == '?') { |
| 4104 switch (Next()) { |
| 4105 case ':': case '=': case '!': |
| 4106 type = Next(); |
| 4107 Advance(2); |
| 4108 break; |
| 4109 default: |
| 4110 ReportError(CStrVector("Invalid group"), CHECK_OK); |
| 4111 break; |
| 4112 } |
| 4113 } else { |
| 4114 if (captures_ == NULL) { |
| 4115 captures_ = new ZoneList<RegExpCapture*>(2); |
| 4116 } |
| 4117 captures_->Add(NULL); |
| 4118 } |
| 4119 int capture_index = captures_started(); |
| 4120 RegExpTree* body = ParseDisjunction(CHECK_OK); |
| 4121 if (current() != ')') { |
| 4122 ReportError(CStrVector("Unterminated group"), CHECK_OK); |
| 4123 } |
| 4124 Advance(); |
| 4125 |
| 4126 int end_capture_index = captures_started(); |
| 4127 if (type == '!') { |
| 4128 // Captures inside a negative lookahead are never available outside it. |
| 4129 for (int i = capture_index; i < end_capture_index; i++) { |
| 4130 RegExpCapture* capture = captures_->at(i); |
| 4131 ASSERT(capture != NULL); |
| 4132 capture->set_available(CAPTURE_PERMANENTLY_UNREACHABLE); |
| 4133 } |
| 4134 } else { |
| 4135 // Captures temporarily unavailable because they are in different |
| 4136 // alternatives are all available after the disjunction. |
| 4137 for (int i = capture_index; i < end_capture_index; i++) { |
| 4138 RegExpCapture* capture = captures_->at(i); |
| 4139 ASSERT(capture != NULL); |
| 4140 if (capture->available() == CAPTURE_UNREACHABLE) { |
| 4141 capture->set_available(CAPTURE_AVAILABLE); |
| 4142 } |
| 4143 } |
| 4144 } |
| 4145 |
| 4146 if (type == '(') { |
| 4147 RegExpCapture* capture = new RegExpCapture(body, capture_index); |
| 4148 captures_->at(capture_index - 1) = capture; |
| 4149 return capture; |
| 4150 } else if (type == ':') { |
| 4151 return body; |
| 4152 } else { |
| 4153 ASSERT(type == '=' || type == '!'); |
| 4154 bool is_positive = (type == '='); |
| 4155 return new RegExpLookahead(body, is_positive); |
| 4156 } |
| 4157 } |
| 4158 |
| 4159 |
| 4160 CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class, |
| 4161 ZoneList<CharacterRange>* ranges, |
| 4162 bool* ok) { |
| 4163 ASSERT_EQ(false, *is_char_class); |
| 4164 uc32 first = current(); |
| 4165 if (first == '\\') { |
| 4166 switch (Next()) { |
| 4167 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { |
| 4168 *is_char_class = true; |
| 4169 uc32 c = Next(); |
| 4170 CharacterRange::AddClassEscape(c, ranges); |
| 4171 Advance(2); |
| 4172 return NULL; |
| 4173 } |
| 4174 default: |
| 4175 uc32 c = ParseClassCharacterEscape(CHECK_OK); |
| 4176 return CharacterRange::Singleton(c); |
| 4177 } |
| 4178 } else { |
| 4179 Advance(); |
| 4180 return CharacterRange::Singleton(first); |
| 4181 } |
| 4182 } |
| 4183 |
| 4184 |
| 4185 RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) { |
| 4186 static const char* kUnterminated = "Unterminated character class"; |
| 4187 static const char* kIllegal = "Illegal character class"; |
| 4188 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
| 4189 |
| 4190 ASSERT_EQ(current(), '['); |
| 4191 Advance(); |
| 4192 bool is_negated = false; |
| 4193 if (current() == '^') { |
| 4194 is_negated = true; |
| 4195 Advance(); |
| 4196 } |
| 4197 ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2); |
| 4198 while (has_more() && current() != ']') { |
| 4199 bool is_char_class = false; |
| 4200 CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 4201 if (!is_char_class) { |
| 4202 if (current() == '-') { |
| 4203 Advance(); |
| 4204 if (current() == kEndMarker) { |
| 4205 // If we reach the end we break out of the loop and let the |
| 4206 // following code report an error. |
| 4207 break; |
| 4208 } else if (current() == ']') { |
| 4209 ranges->Add(first); |
| 4210 ranges->Add(CharacterRange::Singleton('-')); |
| 4211 break; |
| 4212 } |
| 4213 CharacterRange next = |
| 4214 ParseClassAtom(&is_char_class, ranges, CHECK_OK); |
| 4215 if (is_char_class) { |
| 4216 return ReportError(CStrVector(kIllegal), CHECK_OK); |
| 4217 } |
| 4218 if (first.from() > next.to()) { |
| 4219 return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK); |
| 4220 } |
| 4221 ranges->Add(CharacterRange::Range(first.from(), next.to())); |
| 4222 } else { |
| 4223 ranges->Add(first); |
| 4224 } |
| 4225 } |
| 4226 } |
| 4227 if (!has_more()) { |
| 4228 return ReportError(CStrVector(kUnterminated), CHECK_OK); |
| 4229 } |
| 4230 Advance(); |
| 4231 if (ranges->length() == 0) { |
| 4232 ranges->Add(CharacterRange::Range(0, 0xffff)); |
| 4233 is_negated = !is_negated; |
| 4234 } |
| 4235 return new RegExpCharacterClass(ranges, is_negated); |
| 4236 } |
| 4237 |
| 4238 |
| 4239 // ---------------------------------------------------------------------------- |
| 3166 // The Parser interface. | 4240 // The Parser interface. |
| 3167 | 4241 |
| 3168 // MakeAST() is just a wrapper for the corresponding Parser calls | 4242 // MakeAST() is just a wrapper for the corresponding Parser calls |
| 3169 // so we don't have to expose the entire Parser class in the .h file. | 4243 // so we don't have to expose the entire Parser class in the .h file. |
| 3170 | 4244 |
| 3171 static bool always_allow_natives_syntax = false; | 4245 static bool always_allow_natives_syntax = false; |
| 3172 | 4246 |
| 3173 | 4247 |
| 3174 ParserMessage::~ParserMessage() { | 4248 ParserMessage::~ParserMessage() { |
| 3175 for (int i = 0; i < args().length(); i++) | 4249 for (int i = 0; i < args().length(); i++) |
| (...skipping 27 matching lines...) Expand all Loading... |
| 3203 PreParser parser(no_script, allow_natives_syntax, extension); | 4277 PreParser parser(no_script, allow_natives_syntax, extension); |
| 3204 if (!parser.PreParseProgram(stream)) return NULL; | 4278 if (!parser.PreParseProgram(stream)) return NULL; |
| 3205 // The list owns the backing store so we need to clone the vector. | 4279 // The list owns the backing store so we need to clone the vector. |
| 3206 // That way, the result will be exactly the right size rather than | 4280 // That way, the result will be exactly the right size rather than |
| 3207 // the expected 50% too large. | 4281 // the expected 50% too large. |
| 3208 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); | 4282 Vector<unsigned> store = parser.recorder()->store()->ToVector().Clone(); |
| 3209 return new ScriptDataImpl(store); | 4283 return new ScriptDataImpl(store); |
| 3210 } | 4284 } |
| 3211 | 4285 |
| 3212 | 4286 |
| 4287 bool ParseRegExp(FlatStringReader* input, |
| 4288 bool multiline, |
| 4289 RegExpParseResult* result) { |
| 4290 ASSERT(result != NULL); |
| 4291 RegExpParser parser(input, &result->error, multiline); |
| 4292 bool ok = true; |
| 4293 result->tree = parser.ParsePattern(&ok); |
| 4294 if (!ok) { |
| 4295 ASSERT(result->tree == NULL); |
| 4296 ASSERT(!result->error.is_null()); |
| 4297 } else { |
| 4298 ASSERT(result->tree != NULL); |
| 4299 ASSERT(result->error.is_null()); |
| 4300 } |
| 4301 if (ok) { |
| 4302 result->has_character_escapes = parser.HasCharacterEscapes(); |
| 4303 result->capture_count = parser.captures_started(); |
| 4304 } |
| 4305 return ok; |
| 4306 } |
| 4307 |
| 4308 |
| 3213 FunctionLiteral* MakeAST(bool compile_in_global_context, | 4309 FunctionLiteral* MakeAST(bool compile_in_global_context, |
| 3214 Handle<Script> script, | 4310 Handle<Script> script, |
| 3215 v8::Extension* extension, | 4311 v8::Extension* extension, |
| 3216 ScriptDataImpl* pre_data) { | 4312 ScriptDataImpl* pre_data) { |
| 3217 bool allow_natives_syntax = | 4313 bool allow_natives_syntax = |
| 3218 always_allow_natives_syntax || | 4314 always_allow_natives_syntax || |
| 3219 FLAG_allow_natives_syntax || | 4315 FLAG_allow_natives_syntax || |
| 3220 Bootstrapper::IsActive(); | 4316 Bootstrapper::IsActive(); |
| 3221 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); | 4317 AstBuildingParser parser(script, allow_natives_syntax, extension, pre_data); |
| 3222 if (pre_data != NULL && pre_data->has_error()) { | 4318 if (pre_data != NULL && pre_data->has_error()) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3255 start_position, | 4351 start_position, |
| 3256 is_expression); | 4352 is_expression); |
| 3257 return result; | 4353 return result; |
| 3258 } | 4354 } |
| 3259 | 4355 |
| 3260 | 4356 |
| 3261 #undef NEW | 4357 #undef NEW |
| 3262 | 4358 |
| 3263 | 4359 |
| 3264 } } // namespace v8::internal | 4360 } } // namespace v8::internal |
| OLD | NEW |