OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 RegExpCharacterClass* cc = | 351 RegExpCharacterClass* cc = |
352 new (zone()) RegExpCharacterClass(ranges, false); | 352 new (zone()) RegExpCharacterClass(ranges, false); |
353 builder->AddCharacterClass(cc); | 353 builder->AddCharacterClass(cc); |
354 break; | 354 break; |
355 } | 355 } |
356 case 'p': | 356 case 'p': |
357 case 'P': { | 357 case 'P': { |
358 uc32 p = Next(); | 358 uc32 p = Next(); |
359 Advance(2); | 359 Advance(2); |
360 if (unicode()) { | 360 if (unicode()) { |
361 ZoneList<CharacterRange>* ranges = ParsePropertyClass(); | 361 if (FLAG_harmony_regexp_property) { |
362 if (ranges == nullptr) { | 362 ZoneList<CharacterRange>* ranges = |
363 return ReportError(CStrVector("Invalid property name")); | 363 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 364 if (!ParsePropertyClass(ranges)) { |
| 365 return ReportError(CStrVector("Invalid property name")); |
| 366 } |
| 367 RegExpCharacterClass* cc = |
| 368 new (zone()) RegExpCharacterClass(ranges, p == 'P'); |
| 369 builder->AddCharacterClass(cc); |
| 370 } else { |
| 371 // With /u, no identity escapes except for syntax characters |
| 372 // are allowed. Otherwise, all identity escapes are allowed. |
| 373 return ReportError(CStrVector("Invalid escape")); |
364 } | 374 } |
365 RegExpCharacterClass* cc = | |
366 new (zone()) RegExpCharacterClass(ranges, p == 'P'); | |
367 builder->AddCharacterClass(cc); | |
368 } else { | 375 } else { |
369 builder->AddCharacter(p); | 376 builder->AddCharacter(p); |
370 } | 377 } |
371 break; | 378 break; |
372 } | 379 } |
373 case '1': | 380 case '1': |
374 case '2': | 381 case '2': |
375 case '3': | 382 case '3': |
376 case '4': | 383 case '4': |
377 case '5': | 384 case '5': |
(...skipping 451 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), | 836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), |
830 static_cast<uc16>(trail)); | 837 static_cast<uc16>(trail)); |
831 return true; | 838 return true; |
832 } | 839 } |
833 } | 840 } |
834 Reset(start); | 841 Reset(start); |
835 } | 842 } |
836 return result; | 843 return result; |
837 } | 844 } |
838 | 845 |
839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { | 846 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
840 #ifdef V8_I18N_SUPPORT | 847 #ifdef V8_I18N_SUPPORT |
841 ZoneList<char> property_name(0, zone()); | 848 ZoneList<char> property_name(0, zone()); |
842 if (current() == '{') { | 849 if (current() == '{') { |
843 for (Advance(); current() != '}'; Advance()) { | 850 for (Advance(); current() != '}'; Advance()) { |
844 if (!has_next()) return nullptr; | 851 if (!has_next()) return false; |
845 property_name.Add(static_cast<char>(current()), zone()); | 852 property_name.Add(static_cast<char>(current()), zone()); |
846 } | 853 } |
847 } else if (current() != kEndMarker) { | 854 } else if (current() != kEndMarker) { |
848 property_name.Add(static_cast<char>(current()), zone()); | 855 property_name.Add(static_cast<char>(current()), zone()); |
849 } else { | 856 } else { |
850 return nullptr; | 857 return false; |
851 } | 858 } |
852 Advance(); | 859 Advance(); |
853 property_name.Add(0, zone()); // null-terminate string. | 860 property_name.Add(0, zone()); // null-terminate string. |
854 | 861 |
855 // Property names are defined in unicode database files. For aliases of | 862 // Property names are defined in unicode database files. For aliases of |
856 // these property names, see PropertyValueAliases.txt. | 863 // these property names, see PropertyValueAliases.txt. |
857 UProperty kPropertyClasses[] = { | 864 UProperty kPropertyClasses[] = { |
858 // General_Category (gc) found in PropertyValueAliases.txt | 865 // General_Category (gc) found in PropertyValueAliases.txt |
859 UCHAR_GENERAL_CATEGORY_MASK, | 866 UCHAR_GENERAL_CATEGORY_MASK, |
860 // Script (sc) found in Scripts.txt | 867 // Script (sc) found in Scripts.txt |
861 UCHAR_SCRIPT, | 868 UCHAR_SCRIPT, |
862 }; | 869 }; |
863 | 870 |
864 for (int i = 0; i < arraysize(kPropertyClasses); i++) { | 871 for (int i = 0; i < arraysize(kPropertyClasses); i++) { |
865 UProperty property_class = kPropertyClasses[i]; | 872 UProperty property_class = kPropertyClasses[i]; |
866 int32_t category = u_getPropertyValueEnum( | 873 int32_t category = u_getPropertyValueEnum( |
867 property_class, property_name.ToConstVector().start()); | 874 property_class, property_name.ToConstVector().start()); |
868 if (category == UCHAR_INVALID_CODE) continue; | 875 if (category == UCHAR_INVALID_CODE) continue; |
869 | 876 |
870 USet* set = uset_openEmpty(); | 877 USet* set = uset_openEmpty(); |
871 UErrorCode ec = U_ZERO_ERROR; | 878 UErrorCode ec = U_ZERO_ERROR; |
872 uset_applyIntPropertyValue(set, property_class, category, &ec); | 879 uset_applyIntPropertyValue(set, property_class, category, &ec); |
873 ZoneList<CharacterRange>* ranges = nullptr; | |
874 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { | 880 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
875 uset_removeAllStrings(set); | 881 uset_removeAllStrings(set); |
876 int item_count = uset_getItemCount(set); | 882 int item_count = uset_getItemCount(set); |
877 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); | |
878 int item_result = 0; | 883 int item_result = 0; |
879 for (int i = 0; i < item_count; i++) { | 884 for (int i = 0; i < item_count; i++) { |
880 uc32 start = 0; | 885 uc32 start = 0; |
881 uc32 end = 0; | 886 uc32 end = 0; |
882 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | 887 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
883 ranges->Add(CharacterRange::Range(start, end), zone()); | 888 result->Add(CharacterRange::Range(start, end), zone()); |
884 } | 889 } |
885 DCHECK_EQ(U_ZERO_ERROR, ec); | 890 DCHECK_EQ(U_ZERO_ERROR, ec); |
886 DCHECK_EQ(0, item_result); | 891 DCHECK_EQ(0, item_result); |
887 } | 892 } |
888 uset_close(set); | 893 uset_close(set); |
889 return ranges; | 894 return true; |
890 } | 895 } |
891 #endif // V8_I18N_SUPPORT | 896 #endif // V8_I18N_SUPPORT |
892 | 897 |
893 return nullptr; | 898 return false; |
894 } | 899 } |
895 | 900 |
896 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 901 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
897 uc32 x = 0; | 902 uc32 x = 0; |
898 int d = HexValue(current()); | 903 int d = HexValue(current()); |
899 if (d < 0) { | 904 if (d < 0) { |
900 return false; | 905 return false; |
901 } | 906 } |
902 while (d >= 0) { | 907 while (d >= 0) { |
903 x = x * 16 + d; | 908 x = x * 16 + d; |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1063 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1068 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
1064 uc16 char_class, CharacterRange range, | 1069 uc16 char_class, CharacterRange range, |
1065 Zone* zone) { | 1070 Zone* zone) { |
1066 if (char_class != kNoCharClass) { | 1071 if (char_class != kNoCharClass) { |
1067 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1072 CharacterRange::AddClassEscape(char_class, ranges, zone); |
1068 } else { | 1073 } else { |
1069 ranges->Add(range, zone); | 1074 ranges->Add(range, zone); |
1070 } | 1075 } |
1071 } | 1076 } |
1072 | 1077 |
| 1078 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
| 1079 if (!FLAG_harmony_regexp_property) return false; |
| 1080 if (!unicode()) return false; |
| 1081 if (current() != '\\') return false; |
| 1082 uc32 next = Next(); |
| 1083 bool parse_success = false; |
| 1084 if (next == 'p') { |
| 1085 Advance(2); |
| 1086 parse_success = ParsePropertyClass(ranges); |
| 1087 } else if (next == 'P') { |
| 1088 Advance(2); |
| 1089 ZoneList<CharacterRange>* property_class = |
| 1090 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1091 parse_success = ParsePropertyClass(property_class); |
| 1092 if (parse_success) { |
| 1093 ZoneList<CharacterRange>* negated = |
| 1094 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1095 CharacterRange::Negate(property_class, negated, zone()); |
| 1096 const Vector<CharacterRange> negated_vector = negated->ToVector(); |
| 1097 ranges->AddAll(negated_vector, zone()); |
| 1098 } |
| 1099 } else { |
| 1100 return false; |
| 1101 } |
| 1102 if (!parse_success) |
| 1103 ReportError(CStrVector("Invalid property name in character class")); |
| 1104 return parse_success; |
| 1105 } |
1073 | 1106 |
1074 RegExpTree* RegExpParser::ParseCharacterClass() { | 1107 RegExpTree* RegExpParser::ParseCharacterClass() { |
1075 static const char* kUnterminated = "Unterminated character class"; | 1108 static const char* kUnterminated = "Unterminated character class"; |
1076 static const char* kRangeInvalid = "Invalid character class"; | 1109 static const char* kRangeInvalid = "Invalid character class"; |
1077 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 1110 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
1078 | 1111 |
1079 DCHECK_EQ(current(), '['); | 1112 DCHECK_EQ(current(), '['); |
1080 Advance(); | 1113 Advance(); |
1081 bool is_negated = false; | 1114 bool is_negated = false; |
1082 if (current() == '^') { | 1115 if (current() == '^') { |
1083 is_negated = true; | 1116 is_negated = true; |
1084 Advance(); | 1117 Advance(); |
1085 } | 1118 } |
1086 ZoneList<CharacterRange>* ranges = | 1119 ZoneList<CharacterRange>* ranges = |
1087 new (zone()) ZoneList<CharacterRange>(2, zone()); | 1120 new (zone()) ZoneList<CharacterRange>(2, zone()); |
1088 while (has_more() && current() != ']') { | 1121 while (has_more() && current() != ']') { |
| 1122 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); |
| 1123 if (parsed_property) continue; |
1089 uc16 char_class = kNoCharClass; | 1124 uc16 char_class = kNoCharClass; |
1090 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); | 1125 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); |
1091 if (current() == '-') { | 1126 if (current() == '-') { |
1092 Advance(); | 1127 Advance(); |
1093 if (current() == kEndMarker) { | 1128 if (current() == kEndMarker) { |
1094 // If we reach the end we break out of the loop and let the | 1129 // If we reach the end we break out of the loop and let the |
1095 // following code report an error. | 1130 // following code report an error. |
1096 break; | 1131 break; |
1097 } else if (current() == ']') { | 1132 } else if (current() == ']') { |
1098 AddRangeOrEscape(ranges, char_class, first, zone()); | 1133 AddRangeOrEscape(ranges, char_class, first, zone()); |
(...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1450 return false; | 1485 return false; |
1451 } | 1486 } |
1452 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1487 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1453 zone()); | 1488 zone()); |
1454 LAST(ADD_TERM); | 1489 LAST(ADD_TERM); |
1455 return true; | 1490 return true; |
1456 } | 1491 } |
1457 | 1492 |
1458 } // namespace internal | 1493 } // namespace internal |
1459 } // namespace v8 | 1494 } // namespace v8 |
OLD | NEW |