Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 95 | 95 |
| 96 | 96 |
| 97 void RegExpParser::Advance(int dist) { | 97 void RegExpParser::Advance(int dist) { |
| 98 next_pos_ += dist - 1; | 98 next_pos_ += dist - 1; |
| 99 Advance(); | 99 Advance(); |
| 100 } | 100 } |
| 101 | 101 |
| 102 | 102 |
| 103 bool RegExpParser::simple() { return simple_; } | 103 bool RegExpParser::simple() { return simple_; } |
| 104 | 104 |
| 105 | 105 bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) { |
| 106 bool RegExpParser::IsSyntaxCharacter(uc32 c) { | 106 switch (c) { |
| 107 return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || | 107 case '^': |
| 108 c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || | 108 case '$': |
| 109 c == '{' || c == '}' || c == '|'; | 109 case '\\': |
| 110 case '.': | |
| 111 case '*': | |
| 112 case '+': | |
| 113 case '?': | |
| 114 case '(': | |
| 115 case ')': | |
| 116 case '[': | |
| 117 case ']': | |
| 118 case '{': | |
| 119 case '}': | |
| 120 case '|': | |
| 121 case '/': | |
| 122 return true; | |
| 123 default: | |
| 124 break; | |
| 125 } | |
| 126 return false; | |
| 110 } | 127 } |
| 111 | 128 |
| 112 | 129 |
| 113 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { | 130 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { |
| 114 failed_ = true; | 131 failed_ = true; |
| 115 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); | 132 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); |
| 116 // Zip to the end to make sure the no more input is read. | 133 // Zip to the end to make sure the no more input is read. |
| 117 current_ = kEndMarker; | 134 current_ = kEndMarker; |
| 118 next_pos_ = in()->length(); | 135 next_pos_ = in()->length(); |
| 119 return NULL; | 136 return NULL; |
| (...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 353 // the capture registers of the referenced capture are either | 370 // the capture registers of the referenced capture are either |
| 354 // both set or both cleared. | 371 // both set or both cleared. |
| 355 builder->AddEmpty(); | 372 builder->AddEmpty(); |
| 356 } else { | 373 } else { |
| 357 RegExpCapture* capture = GetCapture(index); | 374 RegExpCapture* capture = GetCapture(index); |
| 358 RegExpTree* atom = new (zone()) RegExpBackReference(capture); | 375 RegExpTree* atom = new (zone()) RegExpBackReference(capture); |
| 359 builder->AddAtom(atom); | 376 builder->AddAtom(atom); |
| 360 } | 377 } |
| 361 break; | 378 break; |
| 362 } | 379 } |
| 380 // With /u, no identity escapes except for syntax characters | |
| 381 // are allowed. Otherwise, all identity escapes are allowed. | |
| 382 if (unicode()) { | |
| 383 return ReportError(CStrVector("Invalid escape")); | |
| 384 } | |
| 363 uc32 first_digit = Next(); | 385 uc32 first_digit = Next(); |
| 364 if (first_digit == '8' || first_digit == '9') { | 386 if (first_digit == '8' || first_digit == '9') { |
| 365 // If the 'u' flag is present, only syntax characters can be | 387 builder->AddCharacter(first_digit); |
| 366 // escaped, | 388 Advance(2); |
| 367 // no other identity escapes are allowed. If the 'u' flag is not | |
| 368 // present, all identity escapes are allowed. | |
| 369 if (!unicode()) { | |
| 370 builder->AddCharacter(first_digit); | |
| 371 Advance(2); | |
| 372 } else { | |
| 373 return ReportError(CStrVector("Invalid escape")); | |
| 374 } | |
| 375 break; | 389 break; |
| 376 } | 390 } |
| 377 } | 391 } |
| 378 // FALLTHROUGH | 392 // FALLTHROUGH |
| 379 case '0': { | 393 case '0': { |
| 380 Advance(); | 394 Advance(); |
| 395 if (unicode() && Next() >= '0' && Next() <= '9') { | |
| 396 // With /u, decimal escape with leading 0 are not parsed as octal. | |
| 397 return ReportError(CStrVector("Invalid decimal escape")); | |
| 398 } | |
| 381 uc32 octal = ParseOctalLiteral(); | 399 uc32 octal = ParseOctalLiteral(); |
| 382 builder->AddCharacter(octal); | 400 builder->AddCharacter(octal); |
| 383 break; | 401 break; |
| 384 } | 402 } |
| 385 // ControlEscape :: one of | 403 // ControlEscape :: one of |
| 386 // f n r t v | 404 // f n r t v |
| 387 case 'f': | 405 case 'f': |
| 388 Advance(2); | 406 Advance(2); |
| 389 builder->AddCharacter('\f'); | 407 builder->AddCharacter('\f'); |
| 390 break; | 408 break; |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 408 Advance(); | 426 Advance(); |
| 409 uc32 controlLetter = Next(); | 427 uc32 controlLetter = Next(); |
| 410 // Special case if it is an ASCII letter. | 428 // Special case if it is an ASCII letter. |
| 411 // Convert lower case letters to uppercase. | 429 // Convert lower case letters to uppercase. |
| 412 uc32 letter = controlLetter & ~('a' ^ 'A'); | 430 uc32 letter = controlLetter & ~('a' ^ 'A'); |
| 413 if (letter < 'A' || 'Z' < letter) { | 431 if (letter < 'A' || 'Z' < letter) { |
| 414 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. | 432 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. |
| 415 // This is outside the specification. We match JSC in | 433 // This is outside the specification. We match JSC in |
| 416 // reading the backslash as a literal character instead | 434 // reading the backslash as a literal character instead |
| 417 // of as starting an escape. | 435 // of as starting an escape. |
| 436 if (unicode()) { | |
| 437 // With /u, invalid escapes are not treated as identity escapes. | |
| 438 return ReportError(CStrVector("Invalid unicode escape")); | |
| 439 } | |
| 418 builder->AddCharacter('\\'); | 440 builder->AddCharacter('\\'); |
| 419 } else { | 441 } else { |
| 420 Advance(2); | 442 Advance(2); |
| 421 builder->AddCharacter(controlLetter & 0x1f); | 443 builder->AddCharacter(controlLetter & 0x1f); |
| 422 } | 444 } |
| 423 break; | 445 break; |
| 424 } | 446 } |
| 425 case 'x': { | 447 case 'x': { |
| 426 Advance(2); | 448 Advance(2); |
| 427 uc32 value; | 449 uc32 value; |
| 428 if (ParseHexEscape(2, &value)) { | 450 if (ParseHexEscape(2, &value)) { |
| 429 builder->AddCharacter(value); | 451 builder->AddCharacter(value); |
| 430 } else if (!unicode()) { | 452 } else if (!unicode()) { |
| 431 builder->AddCharacter('x'); | 453 builder->AddCharacter('x'); |
| 432 } else { | 454 } else { |
| 433 // If the 'u' flag is present, invalid escapes are not treated as | 455 // With /u, invalid escapes are not treated as identity escapes. |
| 434 // identity escapes. | |
| 435 return ReportError(CStrVector("Invalid escape")); | 456 return ReportError(CStrVector("Invalid escape")); |
| 436 } | 457 } |
| 437 break; | 458 break; |
| 438 } | 459 } |
| 439 case 'u': { | 460 case 'u': { |
| 440 Advance(2); | 461 Advance(2); |
| 441 uc32 value; | 462 uc32 value; |
| 442 if (ParseUnicodeEscape(&value)) { | 463 if (ParseUnicodeEscape(&value)) { |
| 443 builder->AddUnicodeCharacter(value); | 464 builder->AddUnicodeCharacter(value); |
| 444 } else if (!unicode()) { | 465 } else if (!unicode()) { |
| 445 builder->AddCharacter('u'); | 466 builder->AddCharacter('u'); |
| 446 } else { | 467 } else { |
| 447 // If the 'u' flag is present, invalid escapes are not treated as | 468 // With /u, invalid escapes are not treated as identity escapes. |
| 448 // identity escapes. | |
| 449 return ReportError(CStrVector("Invalid unicode escape")); | 469 return ReportError(CStrVector("Invalid unicode escape")); |
| 450 } | 470 } |
| 451 break; | 471 break; |
| 452 } | 472 } |
| 453 default: | 473 default: |
| 454 Advance(); | 474 Advance(); |
| 455 // If the 'u' flag is present, only syntax characters can be | 475 // With /u, no identity escapes except for syntax characters |
| 456 // escaped, no | 476 // are allowed. Otherwise, all identity escapes are allowed. |
| 457 // other identity escapes are allowed. If the 'u' flag is not | 477 if (!unicode() || IsSyntaxCharacterOrSlash(current())) { |
| 458 // present, | |
| 459 // all identity escapes are allowed. | |
| 460 if (!unicode() || IsSyntaxCharacter(current())) { | |
| 461 builder->AddCharacter(current()); | 478 builder->AddCharacter(current()); |
| 462 Advance(); | 479 Advance(); |
| 463 } else { | 480 } else { |
| 464 return ReportError(CStrVector("Invalid escape")); | 481 return ReportError(CStrVector("Invalid escape")); |
| 465 } | 482 } |
| 466 break; | 483 break; |
| 467 } | 484 } |
| 468 break; | 485 break; |
| 469 case '{': { | 486 case '{': { |
| 470 int dummy; | 487 int dummy; |
| 471 if (ParseIntervalQuantifier(&dummy, &dummy)) { | 488 if (ParseIntervalQuantifier(&dummy, &dummy)) { |
| 472 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); | 489 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); |
| 473 } | 490 } |
| 474 // fallthrough | 491 // fallthrough |
| 475 } | 492 } |
| 493 case '}': | |
| 494 case ']': | |
| 495 if (unicode()) { | |
| 496 ReportError(CStrVector("Lone quantifier brackets") CHECK_FAILED); | |
|
vogelheim
2016/01/28 13:38:21
I don't get the point of the ReportError(... CHECK
Yang
2016/01/28 14:01:07
Done.
| |
| 497 } | |
| 498 // fallthrough | |
| 476 default: | 499 default: |
| 477 builder->AddUnicodeCharacter(current()); | 500 builder->AddUnicodeCharacter(current()); |
| 478 Advance(); | 501 Advance(); |
| 479 break; | 502 break; |
| 480 } // end switch(current()) | 503 } // end switch(current()) |
| 481 | 504 |
| 482 int min; | 505 int min; |
| 483 int max; | 506 int max; |
| 484 switch (current()) { | 507 switch (current()) { |
| 485 // QuantifierPrefix :: | 508 // QuantifierPrefix :: |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 498 Advance(); | 521 Advance(); |
| 499 break; | 522 break; |
| 500 case '?': | 523 case '?': |
| 501 min = 0; | 524 min = 0; |
| 502 max = 1; | 525 max = 1; |
| 503 Advance(); | 526 Advance(); |
| 504 break; | 527 break; |
| 505 case '{': | 528 case '{': |
| 506 if (ParseIntervalQuantifier(&min, &max)) { | 529 if (ParseIntervalQuantifier(&min, &max)) { |
| 507 if (max < min) { | 530 if (max < min) { |
| 508 ReportError(CStrVector("numbers out of order in {} quantifier.") | 531 ReportError(CStrVector("numbers out of order in {} quantifier") |
| 509 CHECK_FAILED); | 532 CHECK_FAILED); |
| 510 } | 533 } |
| 511 break; | 534 break; |
| 512 } else { | 535 } else { |
| 536 if (unicode()) { | |
| 537 // With /u, incomplete quantifiers are not allowed. | |
| 538 ReportError(CStrVector("Incomplete quantifier") CHECK_FAILED); | |
| 539 } | |
| 513 continue; | 540 continue; |
| 514 } | 541 } |
| 515 default: | 542 default: |
| 516 continue; | 543 continue; |
| 517 } | 544 } |
| 518 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; | 545 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; |
| 519 if (current() == '?') { | 546 if (current() == '?') { |
| 520 quantifier_type = RegExpQuantifier::NON_GREEDY; | 547 quantifier_type = RegExpQuantifier::NON_GREEDY; |
| 521 Advance(); | 548 Advance(); |
| 522 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { | 549 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { |
| 523 // FLAG_regexp_possessive_quantifier is a debug-only flag. | 550 // FLAG_regexp_possessive_quantifier is a debug-only flag. |
| 524 quantifier_type = RegExpQuantifier::POSSESSIVE; | 551 quantifier_type = RegExpQuantifier::POSSESSIVE; |
| 525 Advance(); | 552 Advance(); |
| 526 } | 553 } |
| 527 builder->AddQuantifierToAtom(min, max, quantifier_type); | 554 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) { |
| 555 ReportError(CStrVector("Invalid quantifier") CHECK_FAILED); | |
| 556 } | |
| 528 } | 557 } |
| 529 } | 558 } |
| 530 | 559 |
| 531 | 560 |
| 532 #ifdef DEBUG | 561 #ifdef DEBUG |
| 533 // Currently only used in an DCHECK. | 562 // Currently only used in an DCHECK. |
| 534 static bool IsSpecialClassEscape(uc32 c) { | 563 static bool IsSpecialClassEscape(uc32 c) { |
| 535 switch (c) { | 564 switch (c) { |
| 536 case 'd': | 565 case 'd': |
| 537 case 'D': | 566 case 'D': |
| (...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 815 return '\r'; | 844 return '\r'; |
| 816 case 't': | 845 case 't': |
| 817 Advance(); | 846 Advance(); |
| 818 return '\t'; | 847 return '\t'; |
| 819 case 'v': | 848 case 'v': |
| 820 Advance(); | 849 Advance(); |
| 821 return '\v'; | 850 return '\v'; |
| 822 case 'c': { | 851 case 'c': { |
| 823 uc32 controlLetter = Next(); | 852 uc32 controlLetter = Next(); |
| 824 uc32 letter = controlLetter & ~('A' ^ 'a'); | 853 uc32 letter = controlLetter & ~('A' ^ 'a'); |
| 825 // For compatibility with JSC, inside a character class | 854 // For compatibility with JSC, inside a character class. We also accept |
| 826 // we also accept digits and underscore as control characters. | 855 // digits and underscore as control characters, unless with /u. |
| 827 if ((controlLetter >= '0' && controlLetter <= '9') || | 856 if (letter >= 'A' && letter <= 'Z') { |
| 828 controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) { | |
| 829 Advance(2); | 857 Advance(2); |
| 830 // Control letters mapped to ASCII control characters in the range | 858 // Control letters mapped to ASCII control characters in the range |
| 831 // 0x00-0x1f. | 859 // 0x00-0x1f. |
| 832 return controlLetter & 0x1f; | 860 return controlLetter & 0x1f; |
| 833 } | 861 } |
| 862 if (unicode()) { | |
| 863 // With /u, invalid escapes are not treated as identity escapes. | |
| 864 ReportError(CStrVector("Invalid class escape")); | |
| 865 return 0; | |
| 866 } | |
| 867 if ((controlLetter >= '0' && controlLetter <= '9') || | |
| 868 controlLetter == '_') { | |
| 869 Advance(2); | |
| 870 return controlLetter & 0x1f; | |
| 871 } | |
| 834 // We match JSC in reading the backslash as a literal | 872 // We match JSC in reading the backslash as a literal |
| 835 // character instead of as starting an escape. | 873 // character instead of as starting an escape. |
| 836 return '\\'; | 874 return '\\'; |
| 837 } | 875 } |
| 838 case '0': | 876 case '0': |
| 839 case '1': | 877 case '1': |
| 840 case '2': | 878 case '2': |
| 841 case '3': | 879 case '3': |
| 842 case '4': | 880 case '4': |
| 843 case '5': | 881 case '5': |
| 844 case '6': | 882 case '6': |
| 845 case '7': | 883 case '7': |
| 846 // For compatibility, we interpret a decimal escape that isn't | 884 // For compatibility, we interpret a decimal escape that isn't |
| 847 // a back reference (and therefore either \0 or not valid according | 885 // a back reference (and therefore either \0 or not valid according |
| 848 // to the specification) as a 1..3 digit octal character code. | 886 // to the specification) as a 1..3 digit octal character code. |
| 887 if (unicode()) { | |
| 888 // With /u, decimal escape is not interpreted as octal character code. | |
| 889 ReportError(CStrVector("Invalid class escape")); | |
| 890 return 0; | |
| 891 } | |
| 849 return ParseOctalLiteral(); | 892 return ParseOctalLiteral(); |
| 850 case 'x': { | 893 case 'x': { |
| 851 Advance(); | 894 Advance(); |
| 852 uc32 value; | 895 uc32 value; |
| 853 if (ParseHexEscape(2, &value)) { | 896 if (ParseHexEscape(2, &value)) return value; |
| 854 return value; | 897 if (unicode()) { |
| 898 // With /u, invalid escapes are not treated as identity escapes. | |
| 899 ReportError(CStrVector("Invalid escape")); | |
| 900 return 0; | |
| 855 } | 901 } |
| 856 if (!unicode()) { | 902 // If \x is not followed by a two-digit hexadecimal, treat it |
| 857 // If \x is not followed by a two-digit hexadecimal, treat it | 903 // as an identity escape. |
| 858 // as an identity escape. | 904 return 'x'; |
| 859 return 'x'; | |
| 860 } | |
| 861 // If the 'u' flag is present, invalid escapes are not treated as | |
| 862 // identity escapes. | |
| 863 ReportError(CStrVector("Invalid escape")); | |
| 864 return 0; | |
| 865 } | 905 } |
| 866 case 'u': { | 906 case 'u': { |
| 867 Advance(); | 907 Advance(); |
| 868 uc32 value; | 908 uc32 value; |
| 869 if (ParseUnicodeEscape(&value)) { | 909 if (ParseUnicodeEscape(&value)) return value; |
| 870 return value; | 910 if (unicode()) { |
| 911 // With /u, invalid escapes are not treated as identity escapes. | |
| 912 ReportError(CStrVector("Invalid unicode escape")); | |
| 913 return 0; | |
| 871 } | 914 } |
| 872 if (!unicode()) { | 915 // If \u is not followed by a two-digit hexadecimal, treat it |
| 873 return 'u'; | 916 // as an identity escape. |
| 874 } | 917 return 'u'; |
| 875 // If the 'u' flag is present, invalid escapes are not treated as | |
| 876 // identity escapes. | |
| 877 ReportError(CStrVector("Invalid unicode escape")); | |
| 878 return 0; | |
| 879 } | 918 } |
| 880 default: { | 919 default: { |
| 881 uc32 result = current(); | 920 uc32 result = current(); |
| 882 // If the 'u' flag is present, only syntax characters can be escaped, no | 921 // With /u, no identity escapes except for syntax characters are |
| 883 // other identity escapes are allowed. If the 'u' flag is not present, all | 922 // allowed. Otherwise, all identity escapes are allowed. |
| 884 // identity escapes are allowed. | 923 if (!unicode() || IsSyntaxCharacterOrSlash(result)) { |
| 885 if (!unicode() || IsSyntaxCharacter(result)) { | |
| 886 Advance(); | 924 Advance(); |
| 887 return result; | 925 return result; |
| 888 } | 926 } |
| 889 ReportError(CStrVector("Invalid escape")); | 927 ReportError(CStrVector("Invalid escape")); |
| 890 return 0; | 928 return 0; |
| 891 } | 929 } |
| 892 } | 930 } |
| 893 return 0; | 931 return 0; |
| 894 } | 932 } |
| 895 | 933 |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 949 if (char_class != kNoCharClass) { | 987 if (char_class != kNoCharClass) { |
| 950 CharacterRange::AddClassEscape(char_class, ranges, zone); | 988 CharacterRange::AddClassEscape(char_class, ranges, zone); |
| 951 } else { | 989 } else { |
| 952 ranges->Add(range, zone); | 990 ranges->Add(range, zone); |
| 953 } | 991 } |
| 954 } | 992 } |
| 955 | 993 |
| 956 | 994 |
| 957 RegExpTree* RegExpParser::ParseCharacterClass() { | 995 RegExpTree* RegExpParser::ParseCharacterClass() { |
| 958 static const char* kUnterminated = "Unterminated character class"; | 996 static const char* kUnterminated = "Unterminated character class"; |
| 997 static const char* kRangeInvalid = "Invalid character class"; | |
| 959 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 998 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
| 960 | 999 |
| 961 DCHECK_EQ(current(), '['); | 1000 DCHECK_EQ(current(), '['); |
| 962 Advance(); | 1001 Advance(); |
| 963 bool is_negated = false; | 1002 bool is_negated = false; |
| 964 if (current() == '^') { | 1003 if (current() == '^') { |
| 965 is_negated = true; | 1004 is_negated = true; |
| 966 Advance(); | 1005 Advance(); |
| 967 } | 1006 } |
| 968 ZoneList<CharacterRange>* ranges = | 1007 ZoneList<CharacterRange>* ranges = |
| 969 new (zone()) ZoneList<CharacterRange>(2, zone()); | 1008 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 970 while (has_more() && current() != ']') { | 1009 while (has_more() && current() != ']') { |
| 971 uc16 char_class = kNoCharClass; | 1010 uc16 char_class = kNoCharClass; |
| 972 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); | 1011 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); |
| 973 if (current() == '-') { | 1012 if (current() == '-') { |
| 974 Advance(); | 1013 Advance(); |
| 975 if (current() == kEndMarker) { | 1014 if (current() == kEndMarker) { |
| 976 // If we reach the end we break out of the loop and let the | 1015 // If we reach the end we break out of the loop and let the |
| 977 // following code report an error. | 1016 // following code report an error. |
| 978 break; | 1017 break; |
| 979 } else if (current() == ']') { | 1018 } else if (current() == ']') { |
| 980 AddRangeOrEscape(ranges, char_class, first, zone()); | 1019 AddRangeOrEscape(ranges, char_class, first, zone()); |
| 981 ranges->Add(CharacterRange::Singleton('-'), zone()); | 1020 ranges->Add(CharacterRange::Singleton('-'), zone()); |
| 982 break; | 1021 break; |
| 983 } | 1022 } |
| 984 uc16 char_class_2 = kNoCharClass; | 1023 uc16 char_class_2 = kNoCharClass; |
| 985 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); | 1024 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); |
| 986 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { | 1025 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { |
| 987 // Either end is an escaped character class. Treat the '-' verbatim. | 1026 // Either end is an escaped character class. Treat the '-' verbatim. |
| 1027 if (unicode()) { | |
| 1028 // ES2015 21.2.2.15.1 step 1. | |
| 1029 return ReportError(CStrVector(kRangeInvalid) CHECK_FAILED); | |
|
vogelheim
2016/01/28 13:38:21
CHECK_FAILED adds a return, after the unconditiona
Yang
2016/01/28 14:01:07
You are completely right. I simply copied the code
| |
| 1030 } | |
| 988 AddRangeOrEscape(ranges, char_class, first, zone()); | 1031 AddRangeOrEscape(ranges, char_class, first, zone()); |
| 989 ranges->Add(CharacterRange::Singleton('-'), zone()); | 1032 ranges->Add(CharacterRange::Singleton('-'), zone()); |
| 990 AddRangeOrEscape(ranges, char_class_2, next, zone()); | 1033 AddRangeOrEscape(ranges, char_class_2, next, zone()); |
| 991 continue; | 1034 continue; |
| 992 } | 1035 } |
| 1036 // ES2015 21.2.2.15.1 step 6. | |
| 993 if (first.from() > next.to()) { | 1037 if (first.from() > next.to()) { |
| 994 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); | 1038 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); |
| 995 } | 1039 } |
| 996 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); | 1040 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); |
| 997 } else { | 1041 } else { |
| 998 AddRangeOrEscape(ranges, char_class, first, zone()); | 1042 AddRangeOrEscape(ranges, char_class, first, zone()); |
| 999 } | 1043 } |
| 1000 } | 1044 } |
| 1001 if (!has_more()) { | 1045 if (!has_more()) { |
| 1002 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); | 1046 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1155 AddCharacter(static_cast<uc16>(c)); | 1199 AddCharacter(static_cast<uc16>(c)); |
| 1156 } | 1200 } |
| 1157 } | 1201 } |
| 1158 | 1202 |
| 1159 | 1203 |
| 1160 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } | 1204 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } |
| 1161 | 1205 |
| 1162 | 1206 |
| 1163 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) { | 1207 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) { |
| 1164 if (NeedsDesugaringForUnicode(cc)) { | 1208 if (NeedsDesugaringForUnicode(cc)) { |
| 1165 // In unicode mode, character class needs to be desugared, so it | 1209 // With /u, character class needs to be desugared, so it |
| 1166 // must be a standalone term instead of being part of a RegExpText. | 1210 // must be a standalone term instead of being part of a RegExpText. |
| 1167 AddTerm(cc); | 1211 AddTerm(cc); |
| 1168 } else { | 1212 } else { |
| 1169 AddAtom(cc); | 1213 AddAtom(cc); |
| 1170 } | 1214 } |
| 1171 } | 1215 } |
| 1172 | 1216 |
| 1173 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) { | 1217 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) { |
| 1174 AddTerm(new (zone()) RegExpCharacterClass( | 1218 AddTerm(new (zone()) RegExpCharacterClass( |
| 1175 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false)); | 1219 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false)); |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1268 | 1312 |
| 1269 | 1313 |
| 1270 RegExpTree* RegExpBuilder::ToRegExp() { | 1314 RegExpTree* RegExpBuilder::ToRegExp() { |
| 1271 FlushTerms(); | 1315 FlushTerms(); |
| 1272 int num_alternatives = alternatives_.length(); | 1316 int num_alternatives = alternatives_.length(); |
| 1273 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); | 1317 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); |
| 1274 if (num_alternatives == 1) return alternatives_.last(); | 1318 if (num_alternatives == 1) return alternatives_.last(); |
| 1275 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone())); | 1319 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone())); |
| 1276 } | 1320 } |
| 1277 | 1321 |
| 1278 | 1322 bool RegExpBuilder::AddQuantifierToAtom( |
| 1279 void RegExpBuilder::AddQuantifierToAtom( | |
| 1280 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { | 1323 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { |
| 1281 FlushPendingSurrogate(); | 1324 FlushPendingSurrogate(); |
| 1282 if (pending_empty_) { | 1325 if (pending_empty_) { |
| 1283 pending_empty_ = false; | 1326 pending_empty_ = false; |
| 1284 return; | 1327 return true; |
| 1285 } | 1328 } |
| 1286 RegExpTree* atom; | 1329 RegExpTree* atom; |
| 1287 if (characters_ != NULL) { | 1330 if (characters_ != NULL) { |
| 1288 DCHECK(last_added_ == ADD_CHAR); | 1331 DCHECK(last_added_ == ADD_CHAR); |
| 1289 // Last atom was character. | 1332 // Last atom was character. |
| 1290 Vector<const uc16> char_vector = characters_->ToConstVector(); | 1333 Vector<const uc16> char_vector = characters_->ToConstVector(); |
| 1291 int num_chars = char_vector.length(); | 1334 int num_chars = char_vector.length(); |
| 1292 if (num_chars > 1) { | 1335 if (num_chars > 1) { |
| 1293 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); | 1336 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); |
| 1294 text_.Add(new (zone()) RegExpAtom(prefix), zone()); | 1337 text_.Add(new (zone()) RegExpAtom(prefix), zone()); |
| 1295 char_vector = char_vector.SubVector(num_chars - 1, num_chars); | 1338 char_vector = char_vector.SubVector(num_chars - 1, num_chars); |
| 1296 } | 1339 } |
| 1297 characters_ = NULL; | 1340 characters_ = NULL; |
| 1298 atom = new (zone()) RegExpAtom(char_vector); | 1341 atom = new (zone()) RegExpAtom(char_vector); |
| 1299 FlushText(); | 1342 FlushText(); |
| 1300 } else if (text_.length() > 0) { | 1343 } else if (text_.length() > 0) { |
| 1301 DCHECK(last_added_ == ADD_ATOM); | 1344 DCHECK(last_added_ == ADD_ATOM); |
| 1302 atom = text_.RemoveLast(); | 1345 atom = text_.RemoveLast(); |
| 1303 FlushText(); | 1346 FlushText(); |
| 1304 } else if (terms_.length() > 0) { | 1347 } else if (terms_.length() > 0) { |
| 1305 DCHECK(last_added_ == ADD_ATOM); | 1348 DCHECK(last_added_ == ADD_ATOM); |
| 1306 atom = terms_.RemoveLast(); | 1349 atom = terms_.RemoveLast(); |
| 1350 // With /u, lookarounds are not quantifiable. | |
| 1351 if (unicode() && atom->IsLookaround()) return false; | |
| 1307 if (atom->max_match() == 0) { | 1352 if (atom->max_match() == 0) { |
| 1308 // Guaranteed to only match an empty string. | 1353 // Guaranteed to only match an empty string. |
| 1309 LAST(ADD_TERM); | 1354 LAST(ADD_TERM); |
| 1310 if (min == 0) { | 1355 if (min == 0) { |
| 1311 return; | 1356 return true; |
| 1312 } | 1357 } |
| 1313 terms_.Add(atom, zone()); | 1358 terms_.Add(atom, zone()); |
| 1314 return; | 1359 return true; |
| 1315 } | 1360 } |
| 1316 } else { | 1361 } else { |
| 1317 // Only call immediately after adding an atom or character! | 1362 // Only call immediately after adding an atom or character! |
| 1318 UNREACHABLE(); | 1363 UNREACHABLE(); |
| 1319 return; | 1364 return false; |
| 1320 } | 1365 } |
| 1321 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1366 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1322 zone()); | 1367 zone()); |
| 1323 LAST(ADD_TERM); | 1368 LAST(ADD_TERM); |
| 1369 return true; | |
| 1324 } | 1370 } |
| 1325 | 1371 |
| 1326 } // namespace internal | 1372 } // namespace internal |
| 1327 } // namespace v8 | 1373 } // namespace v8 |
| OLD | NEW |