Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1645573002: [regexp] restrict pattern syntax for unicode mode. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@stage
Patch Set: allow forward slash as identity escape Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/unicode-regexp-restricted-syntax.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 95
96 96
97 void RegExpParser::Advance(int dist) { 97 void RegExpParser::Advance(int dist) {
98 next_pos_ += dist - 1; 98 next_pos_ += dist - 1;
99 Advance(); 99 Advance();
100 } 100 }
101 101
102 102
103 bool RegExpParser::simple() { return simple_; } 103 bool RegExpParser::simple() { return simple_; }
104 104
105 105 bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {
106 bool RegExpParser::IsSyntaxCharacter(uc32 c) { 106 switch (c) {
107 return c == '^' || c == '$' || c == '\\' || c == '.' || c == '*' || 107 case '^':
108 c == '+' || c == '?' || c == '(' || c == ')' || c == '[' || c == ']' || 108 case '$':
109 c == '{' || c == '}' || c == '|'; 109 case '\\':
110 case '.':
111 case '*':
112 case '+':
113 case '?':
114 case '(':
115 case ')':
116 case '[':
117 case ']':
118 case '{':
119 case '}':
120 case '|':
121 case '/':
122 return true;
123 default:
124 break;
125 }
126 return false;
110 } 127 }
111 128
112 129
113 RegExpTree* RegExpParser::ReportError(Vector<const char> message) { 130 RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
114 failed_ = true; 131 failed_ = true;
115 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked(); 132 *error_ = isolate()->factory()->NewStringFromAscii(message).ToHandleChecked();
116 // Zip to the end to make sure the no more input is read. 133 // Zip to the end to make sure the no more input is read.
117 current_ = kEndMarker; 134 current_ = kEndMarker;
118 next_pos_ = in()->length(); 135 next_pos_ = in()->length();
119 return NULL; 136 return NULL;
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
353 // the capture registers of the referenced capture are either 370 // the capture registers of the referenced capture are either
354 // both set or both cleared. 371 // both set or both cleared.
355 builder->AddEmpty(); 372 builder->AddEmpty();
356 } else { 373 } else {
357 RegExpCapture* capture = GetCapture(index); 374 RegExpCapture* capture = GetCapture(index);
358 RegExpTree* atom = new (zone()) RegExpBackReference(capture); 375 RegExpTree* atom = new (zone()) RegExpBackReference(capture);
359 builder->AddAtom(atom); 376 builder->AddAtom(atom);
360 } 377 }
361 break; 378 break;
362 } 379 }
380 // With /u, no identity escapes except for syntax characters
381 // are allowed. Otherwise, all identity escapes are allowed.
382 if (unicode()) {
383 return ReportError(CStrVector("Invalid escape"));
384 }
363 uc32 first_digit = Next(); 385 uc32 first_digit = Next();
364 if (first_digit == '8' || first_digit == '9') { 386 if (first_digit == '8' || first_digit == '9') {
365 // If the 'u' flag is present, only syntax characters can be 387 builder->AddCharacter(first_digit);
366 // escaped, 388 Advance(2);
367 // no other identity escapes are allowed. If the 'u' flag is not
368 // present, all identity escapes are allowed.
369 if (!unicode()) {
370 builder->AddCharacter(first_digit);
371 Advance(2);
372 } else {
373 return ReportError(CStrVector("Invalid escape"));
374 }
375 break; 389 break;
376 } 390 }
377 } 391 }
378 // FALLTHROUGH 392 // FALLTHROUGH
379 case '0': { 393 case '0': {
380 Advance(); 394 Advance();
395 if (unicode() && Next() >= '0' && Next() <= '9') {
396 // With /u, decimal escape with leading 0 are not parsed as octal.
397 return ReportError(CStrVector("Invalid decimal escape"));
398 }
381 uc32 octal = ParseOctalLiteral(); 399 uc32 octal = ParseOctalLiteral();
382 builder->AddCharacter(octal); 400 builder->AddCharacter(octal);
383 break; 401 break;
384 } 402 }
385 // ControlEscape :: one of 403 // ControlEscape :: one of
386 // f n r t v 404 // f n r t v
387 case 'f': 405 case 'f':
388 Advance(2); 406 Advance(2);
389 builder->AddCharacter('\f'); 407 builder->AddCharacter('\f');
390 break; 408 break;
(...skipping 17 matching lines...) Expand all
408 Advance(); 426 Advance();
409 uc32 controlLetter = Next(); 427 uc32 controlLetter = Next();
410 // Special case if it is an ASCII letter. 428 // Special case if it is an ASCII letter.
411 // Convert lower case letters to uppercase. 429 // Convert lower case letters to uppercase.
412 uc32 letter = controlLetter & ~('a' ^ 'A'); 430 uc32 letter = controlLetter & ~('a' ^ 'A');
413 if (letter < 'A' || 'Z' < letter) { 431 if (letter < 'A' || 'Z' < letter) {
414 // controlLetter is not in range 'A'-'Z' or 'a'-'z'. 432 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
415 // This is outside the specification. We match JSC in 433 // This is outside the specification. We match JSC in
416 // reading the backslash as a literal character instead 434 // reading the backslash as a literal character instead
417 // of as starting an escape. 435 // of as starting an escape.
436 if (unicode()) {
437 // With /u, invalid escapes are not treated as identity escapes.
438 return ReportError(CStrVector("Invalid unicode escape"));
439 }
418 builder->AddCharacter('\\'); 440 builder->AddCharacter('\\');
419 } else { 441 } else {
420 Advance(2); 442 Advance(2);
421 builder->AddCharacter(controlLetter & 0x1f); 443 builder->AddCharacter(controlLetter & 0x1f);
422 } 444 }
423 break; 445 break;
424 } 446 }
425 case 'x': { 447 case 'x': {
426 Advance(2); 448 Advance(2);
427 uc32 value; 449 uc32 value;
428 if (ParseHexEscape(2, &value)) { 450 if (ParseHexEscape(2, &value)) {
429 builder->AddCharacter(value); 451 builder->AddCharacter(value);
430 } else if (!unicode()) { 452 } else if (!unicode()) {
431 builder->AddCharacter('x'); 453 builder->AddCharacter('x');
432 } else { 454 } else {
433 // If the 'u' flag is present, invalid escapes are not treated as 455 // With /u, invalid escapes are not treated as identity escapes.
434 // identity escapes.
435 return ReportError(CStrVector("Invalid escape")); 456 return ReportError(CStrVector("Invalid escape"));
436 } 457 }
437 break; 458 break;
438 } 459 }
439 case 'u': { 460 case 'u': {
440 Advance(2); 461 Advance(2);
441 uc32 value; 462 uc32 value;
442 if (ParseUnicodeEscape(&value)) { 463 if (ParseUnicodeEscape(&value)) {
443 builder->AddUnicodeCharacter(value); 464 builder->AddUnicodeCharacter(value);
444 } else if (!unicode()) { 465 } else if (!unicode()) {
445 builder->AddCharacter('u'); 466 builder->AddCharacter('u');
446 } else { 467 } else {
447 // If the 'u' flag is present, invalid escapes are not treated as 468 // With /u, invalid escapes are not treated as identity escapes.
448 // identity escapes.
449 return ReportError(CStrVector("Invalid unicode escape")); 469 return ReportError(CStrVector("Invalid unicode escape"));
450 } 470 }
451 break; 471 break;
452 } 472 }
453 default: 473 default:
454 Advance(); 474 Advance();
455 // If the 'u' flag is present, only syntax characters can be 475 // With /u, no identity escapes except for syntax characters
456 // escaped, no 476 // are allowed. Otherwise, all identity escapes are allowed.
457 // other identity escapes are allowed. If the 'u' flag is not 477 if (!unicode() || IsSyntaxCharacterOrSlash(current())) {
458 // present,
459 // all identity escapes are allowed.
460 if (!unicode() || IsSyntaxCharacter(current())) {
461 builder->AddCharacter(current()); 478 builder->AddCharacter(current());
462 Advance(); 479 Advance();
463 } else { 480 } else {
464 return ReportError(CStrVector("Invalid escape")); 481 return ReportError(CStrVector("Invalid escape"));
465 } 482 }
466 break; 483 break;
467 } 484 }
468 break; 485 break;
469 case '{': { 486 case '{': {
470 int dummy; 487 int dummy;
471 if (ParseIntervalQuantifier(&dummy, &dummy)) { 488 if (ParseIntervalQuantifier(&dummy, &dummy)) {
472 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); 489 ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
473 } 490 }
474 // fallthrough 491 // fallthrough
475 } 492 }
493 case '}':
494 case ']':
495 if (unicode()) {
496 ReportError(CStrVector("Lone quantifier brackets") CHECK_FAILED);
vogelheim 2016/01/28 13:38:21 I don't get the point of the ReportError(... CHECK
Yang 2016/01/28 14:01:07 Done.
497 }
498 // fallthrough
476 default: 499 default:
477 builder->AddUnicodeCharacter(current()); 500 builder->AddUnicodeCharacter(current());
478 Advance(); 501 Advance();
479 break; 502 break;
480 } // end switch(current()) 503 } // end switch(current())
481 504
482 int min; 505 int min;
483 int max; 506 int max;
484 switch (current()) { 507 switch (current()) {
485 // QuantifierPrefix :: 508 // QuantifierPrefix ::
(...skipping 12 matching lines...) Expand all
498 Advance(); 521 Advance();
499 break; 522 break;
500 case '?': 523 case '?':
501 min = 0; 524 min = 0;
502 max = 1; 525 max = 1;
503 Advance(); 526 Advance();
504 break; 527 break;
505 case '{': 528 case '{':
506 if (ParseIntervalQuantifier(&min, &max)) { 529 if (ParseIntervalQuantifier(&min, &max)) {
507 if (max < min) { 530 if (max < min) {
508 ReportError(CStrVector("numbers out of order in {} quantifier.") 531 ReportError(CStrVector("numbers out of order in {} quantifier")
509 CHECK_FAILED); 532 CHECK_FAILED);
510 } 533 }
511 break; 534 break;
512 } else { 535 } else {
536 if (unicode()) {
537 // With /u, incomplete quantifiers are not allowed.
538 ReportError(CStrVector("Incomplete quantifier") CHECK_FAILED);
539 }
513 continue; 540 continue;
514 } 541 }
515 default: 542 default:
516 continue; 543 continue;
517 } 544 }
518 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY; 545 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
519 if (current() == '?') { 546 if (current() == '?') {
520 quantifier_type = RegExpQuantifier::NON_GREEDY; 547 quantifier_type = RegExpQuantifier::NON_GREEDY;
521 Advance(); 548 Advance();
522 } else if (FLAG_regexp_possessive_quantifier && current() == '+') { 549 } else if (FLAG_regexp_possessive_quantifier && current() == '+') {
523 // FLAG_regexp_possessive_quantifier is a debug-only flag. 550 // FLAG_regexp_possessive_quantifier is a debug-only flag.
524 quantifier_type = RegExpQuantifier::POSSESSIVE; 551 quantifier_type = RegExpQuantifier::POSSESSIVE;
525 Advance(); 552 Advance();
526 } 553 }
527 builder->AddQuantifierToAtom(min, max, quantifier_type); 554 if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
555 ReportError(CStrVector("Invalid quantifier") CHECK_FAILED);
556 }
528 } 557 }
529 } 558 }
530 559
531 560
532 #ifdef DEBUG 561 #ifdef DEBUG
533 // Currently only used in an DCHECK. 562 // Currently only used in an DCHECK.
534 static bool IsSpecialClassEscape(uc32 c) { 563 static bool IsSpecialClassEscape(uc32 c) {
535 switch (c) { 564 switch (c) {
536 case 'd': 565 case 'd':
537 case 'D': 566 case 'D':
(...skipping 277 matching lines...) Expand 10 before | Expand all | Expand 10 after
815 return '\r'; 844 return '\r';
816 case 't': 845 case 't':
817 Advance(); 846 Advance();
818 return '\t'; 847 return '\t';
819 case 'v': 848 case 'v':
820 Advance(); 849 Advance();
821 return '\v'; 850 return '\v';
822 case 'c': { 851 case 'c': {
823 uc32 controlLetter = Next(); 852 uc32 controlLetter = Next();
824 uc32 letter = controlLetter & ~('A' ^ 'a'); 853 uc32 letter = controlLetter & ~('A' ^ 'a');
825 // For compatibility with JSC, inside a character class 854 // For compatibility with JSC, inside a character class. We also accept
826 // we also accept digits and underscore as control characters. 855 // digits and underscore as control characters, unless with /u.
827 if ((controlLetter >= '0' && controlLetter <= '9') || 856 if (letter >= 'A' && letter <= 'Z') {
828 controlLetter == '_' || (letter >= 'A' && letter <= 'Z')) {
829 Advance(2); 857 Advance(2);
830 // Control letters mapped to ASCII control characters in the range 858 // Control letters mapped to ASCII control characters in the range
831 // 0x00-0x1f. 859 // 0x00-0x1f.
832 return controlLetter & 0x1f; 860 return controlLetter & 0x1f;
833 } 861 }
862 if (unicode()) {
863 // With /u, invalid escapes are not treated as identity escapes.
864 ReportError(CStrVector("Invalid class escape"));
865 return 0;
866 }
867 if ((controlLetter >= '0' && controlLetter <= '9') ||
868 controlLetter == '_') {
869 Advance(2);
870 return controlLetter & 0x1f;
871 }
834 // We match JSC in reading the backslash as a literal 872 // We match JSC in reading the backslash as a literal
835 // character instead of as starting an escape. 873 // character instead of as starting an escape.
836 return '\\'; 874 return '\\';
837 } 875 }
838 case '0': 876 case '0':
839 case '1': 877 case '1':
840 case '2': 878 case '2':
841 case '3': 879 case '3':
842 case '4': 880 case '4':
843 case '5': 881 case '5':
844 case '6': 882 case '6':
845 case '7': 883 case '7':
846 // For compatibility, we interpret a decimal escape that isn't 884 // For compatibility, we interpret a decimal escape that isn't
847 // a back reference (and therefore either \0 or not valid according 885 // a back reference (and therefore either \0 or not valid according
848 // to the specification) as a 1..3 digit octal character code. 886 // to the specification) as a 1..3 digit octal character code.
887 if (unicode()) {
888 // With /u, decimal escape is not interpreted as octal character code.
889 ReportError(CStrVector("Invalid class escape"));
890 return 0;
891 }
849 return ParseOctalLiteral(); 892 return ParseOctalLiteral();
850 case 'x': { 893 case 'x': {
851 Advance(); 894 Advance();
852 uc32 value; 895 uc32 value;
853 if (ParseHexEscape(2, &value)) { 896 if (ParseHexEscape(2, &value)) return value;
854 return value; 897 if (unicode()) {
898 // With /u, invalid escapes are not treated as identity escapes.
899 ReportError(CStrVector("Invalid escape"));
900 return 0;
855 } 901 }
856 if (!unicode()) { 902 // If \x is not followed by a two-digit hexadecimal, treat it
857 // If \x is not followed by a two-digit hexadecimal, treat it 903 // as an identity escape.
858 // as an identity escape. 904 return 'x';
859 return 'x';
860 }
861 // If the 'u' flag is present, invalid escapes are not treated as
862 // identity escapes.
863 ReportError(CStrVector("Invalid escape"));
864 return 0;
865 } 905 }
866 case 'u': { 906 case 'u': {
867 Advance(); 907 Advance();
868 uc32 value; 908 uc32 value;
869 if (ParseUnicodeEscape(&value)) { 909 if (ParseUnicodeEscape(&value)) return value;
870 return value; 910 if (unicode()) {
911 // With /u, invalid escapes are not treated as identity escapes.
912 ReportError(CStrVector("Invalid unicode escape"));
913 return 0;
871 } 914 }
872 if (!unicode()) { 915 // If \u is not followed by a two-digit hexadecimal, treat it
873 return 'u'; 916 // as an identity escape.
874 } 917 return 'u';
875 // If the 'u' flag is present, invalid escapes are not treated as
876 // identity escapes.
877 ReportError(CStrVector("Invalid unicode escape"));
878 return 0;
879 } 918 }
880 default: { 919 default: {
881 uc32 result = current(); 920 uc32 result = current();
882 // If the 'u' flag is present, only syntax characters can be escaped, no 921 // With /u, no identity escapes except for syntax characters are
883 // other identity escapes are allowed. If the 'u' flag is not present, all 922 // allowed. Otherwise, all identity escapes are allowed.
884 // identity escapes are allowed. 923 if (!unicode() || IsSyntaxCharacterOrSlash(result)) {
885 if (!unicode() || IsSyntaxCharacter(result)) {
886 Advance(); 924 Advance();
887 return result; 925 return result;
888 } 926 }
889 ReportError(CStrVector("Invalid escape")); 927 ReportError(CStrVector("Invalid escape"));
890 return 0; 928 return 0;
891 } 929 }
892 } 930 }
893 return 0; 931 return 0;
894 } 932 }
895 933
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
949 if (char_class != kNoCharClass) { 987 if (char_class != kNoCharClass) {
950 CharacterRange::AddClassEscape(char_class, ranges, zone); 988 CharacterRange::AddClassEscape(char_class, ranges, zone);
951 } else { 989 } else {
952 ranges->Add(range, zone); 990 ranges->Add(range, zone);
953 } 991 }
954 } 992 }
955 993
956 994
957 RegExpTree* RegExpParser::ParseCharacterClass() { 995 RegExpTree* RegExpParser::ParseCharacterClass() {
958 static const char* kUnterminated = "Unterminated character class"; 996 static const char* kUnterminated = "Unterminated character class";
997 static const char* kRangeInvalid = "Invalid character class";
959 static const char* kRangeOutOfOrder = "Range out of order in character class"; 998 static const char* kRangeOutOfOrder = "Range out of order in character class";
960 999
961 DCHECK_EQ(current(), '['); 1000 DCHECK_EQ(current(), '[');
962 Advance(); 1001 Advance();
963 bool is_negated = false; 1002 bool is_negated = false;
964 if (current() == '^') { 1003 if (current() == '^') {
965 is_negated = true; 1004 is_negated = true;
966 Advance(); 1005 Advance();
967 } 1006 }
968 ZoneList<CharacterRange>* ranges = 1007 ZoneList<CharacterRange>* ranges =
969 new (zone()) ZoneList<CharacterRange>(2, zone()); 1008 new (zone()) ZoneList<CharacterRange>(2, zone());
970 while (has_more() && current() != ']') { 1009 while (has_more() && current() != ']') {
971 uc16 char_class = kNoCharClass; 1010 uc16 char_class = kNoCharClass;
972 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); 1011 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
973 if (current() == '-') { 1012 if (current() == '-') {
974 Advance(); 1013 Advance();
975 if (current() == kEndMarker) { 1014 if (current() == kEndMarker) {
976 // If we reach the end we break out of the loop and let the 1015 // If we reach the end we break out of the loop and let the
977 // following code report an error. 1016 // following code report an error.
978 break; 1017 break;
979 } else if (current() == ']') { 1018 } else if (current() == ']') {
980 AddRangeOrEscape(ranges, char_class, first, zone()); 1019 AddRangeOrEscape(ranges, char_class, first, zone());
981 ranges->Add(CharacterRange::Singleton('-'), zone()); 1020 ranges->Add(CharacterRange::Singleton('-'), zone());
982 break; 1021 break;
983 } 1022 }
984 uc16 char_class_2 = kNoCharClass; 1023 uc16 char_class_2 = kNoCharClass;
985 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED); 1024 CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
986 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { 1025 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
987 // Either end is an escaped character class. Treat the '-' verbatim. 1026 // Either end is an escaped character class. Treat the '-' verbatim.
1027 if (unicode()) {
1028 // ES2015 21.2.2.15.1 step 1.
1029 return ReportError(CStrVector(kRangeInvalid) CHECK_FAILED);
vogelheim 2016/01/28 13:38:21 CHECK_FAILED adds a return, after the unconditiona
Yang 2016/01/28 14:01:07 You are completely right. I simply copied the code
1030 }
988 AddRangeOrEscape(ranges, char_class, first, zone()); 1031 AddRangeOrEscape(ranges, char_class, first, zone());
989 ranges->Add(CharacterRange::Singleton('-'), zone()); 1032 ranges->Add(CharacterRange::Singleton('-'), zone());
990 AddRangeOrEscape(ranges, char_class_2, next, zone()); 1033 AddRangeOrEscape(ranges, char_class_2, next, zone());
991 continue; 1034 continue;
992 } 1035 }
1036 // ES2015 21.2.2.15.1 step 6.
993 if (first.from() > next.to()) { 1037 if (first.from() > next.to()) {
994 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED); 1038 return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
995 } 1039 }
996 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone()); 1040 ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
997 } else { 1041 } else {
998 AddRangeOrEscape(ranges, char_class, first, zone()); 1042 AddRangeOrEscape(ranges, char_class, first, zone());
999 } 1043 }
1000 } 1044 }
1001 if (!has_more()) { 1045 if (!has_more()) {
1002 return ReportError(CStrVector(kUnterminated) CHECK_FAILED); 1046 return ReportError(CStrVector(kUnterminated) CHECK_FAILED);
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
1155 AddCharacter(static_cast<uc16>(c)); 1199 AddCharacter(static_cast<uc16>(c));
1156 } 1200 }
1157 } 1201 }
1158 1202
1159 1203
1160 void RegExpBuilder::AddEmpty() { pending_empty_ = true; } 1204 void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
1161 1205
1162 1206
1163 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) { 1207 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
1164 if (NeedsDesugaringForUnicode(cc)) { 1208 if (NeedsDesugaringForUnicode(cc)) {
1165 // In unicode mode, character class needs to be desugared, so it 1209 // With /u, character class needs to be desugared, so it
1166 // must be a standalone term instead of being part of a RegExpText. 1210 // must be a standalone term instead of being part of a RegExpText.
1167 AddTerm(cc); 1211 AddTerm(cc);
1168 } else { 1212 } else {
1169 AddAtom(cc); 1213 AddAtom(cc);
1170 } 1214 }
1171 } 1215 }
1172 1216
1173 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) { 1217 void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
1174 AddTerm(new (zone()) RegExpCharacterClass( 1218 AddTerm(new (zone()) RegExpCharacterClass(
1175 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false)); 1219 CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
1268 1312
1269 1313
1270 RegExpTree* RegExpBuilder::ToRegExp() { 1314 RegExpTree* RegExpBuilder::ToRegExp() {
1271 FlushTerms(); 1315 FlushTerms();
1272 int num_alternatives = alternatives_.length(); 1316 int num_alternatives = alternatives_.length();
1273 if (num_alternatives == 0) return new (zone()) RegExpEmpty(); 1317 if (num_alternatives == 0) return new (zone()) RegExpEmpty();
1274 if (num_alternatives == 1) return alternatives_.last(); 1318 if (num_alternatives == 1) return alternatives_.last();
1275 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone())); 1319 return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));
1276 } 1320 }
1277 1321
1278 1322 bool RegExpBuilder::AddQuantifierToAtom(
1279 void RegExpBuilder::AddQuantifierToAtom(
1280 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) { 1323 int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
1281 FlushPendingSurrogate(); 1324 FlushPendingSurrogate();
1282 if (pending_empty_) { 1325 if (pending_empty_) {
1283 pending_empty_ = false; 1326 pending_empty_ = false;
1284 return; 1327 return true;
1285 } 1328 }
1286 RegExpTree* atom; 1329 RegExpTree* atom;
1287 if (characters_ != NULL) { 1330 if (characters_ != NULL) {
1288 DCHECK(last_added_ == ADD_CHAR); 1331 DCHECK(last_added_ == ADD_CHAR);
1289 // Last atom was character. 1332 // Last atom was character.
1290 Vector<const uc16> char_vector = characters_->ToConstVector(); 1333 Vector<const uc16> char_vector = characters_->ToConstVector();
1291 int num_chars = char_vector.length(); 1334 int num_chars = char_vector.length();
1292 if (num_chars > 1) { 1335 if (num_chars > 1) {
1293 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1); 1336 Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
1294 text_.Add(new (zone()) RegExpAtom(prefix), zone()); 1337 text_.Add(new (zone()) RegExpAtom(prefix), zone());
1295 char_vector = char_vector.SubVector(num_chars - 1, num_chars); 1338 char_vector = char_vector.SubVector(num_chars - 1, num_chars);
1296 } 1339 }
1297 characters_ = NULL; 1340 characters_ = NULL;
1298 atom = new (zone()) RegExpAtom(char_vector); 1341 atom = new (zone()) RegExpAtom(char_vector);
1299 FlushText(); 1342 FlushText();
1300 } else if (text_.length() > 0) { 1343 } else if (text_.length() > 0) {
1301 DCHECK(last_added_ == ADD_ATOM); 1344 DCHECK(last_added_ == ADD_ATOM);
1302 atom = text_.RemoveLast(); 1345 atom = text_.RemoveLast();
1303 FlushText(); 1346 FlushText();
1304 } else if (terms_.length() > 0) { 1347 } else if (terms_.length() > 0) {
1305 DCHECK(last_added_ == ADD_ATOM); 1348 DCHECK(last_added_ == ADD_ATOM);
1306 atom = terms_.RemoveLast(); 1349 atom = terms_.RemoveLast();
1350 // With /u, lookarounds are not quantifiable.
1351 if (unicode() && atom->IsLookaround()) return false;
1307 if (atom->max_match() == 0) { 1352 if (atom->max_match() == 0) {
1308 // Guaranteed to only match an empty string. 1353 // Guaranteed to only match an empty string.
1309 LAST(ADD_TERM); 1354 LAST(ADD_TERM);
1310 if (min == 0) { 1355 if (min == 0) {
1311 return; 1356 return true;
1312 } 1357 }
1313 terms_.Add(atom, zone()); 1358 terms_.Add(atom, zone());
1314 return; 1359 return true;
1315 } 1360 }
1316 } else { 1361 } else {
1317 // Only call immediately after adding an atom or character! 1362 // Only call immediately after adding an atom or character!
1318 UNREACHABLE(); 1363 UNREACHABLE();
1319 return; 1364 return false;
1320 } 1365 }
1321 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1366 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1322 zone()); 1367 zone());
1323 LAST(ADD_TERM); 1368 LAST(ADD_TERM);
1369 return true;
1324 } 1370 }
1325 1371
1326 } // namespace internal 1372 } // namespace internal
1327 } // namespace v8 1373 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/unicode-regexp-restricted-syntax.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698