| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 351 RegExpCharacterClass* cc = | 351 RegExpCharacterClass* cc = |
| 352 new (zone()) RegExpCharacterClass(ranges, false); | 352 new (zone()) RegExpCharacterClass(ranges, false); |
| 353 builder->AddCharacterClass(cc); | 353 builder->AddCharacterClass(cc); |
| 354 break; | 354 break; |
| 355 } | 355 } |
| 356 case 'p': | 356 case 'p': |
| 357 case 'P': { | 357 case 'P': { |
| 358 uc32 p = Next(); | 358 uc32 p = Next(); |
| 359 Advance(2); | 359 Advance(2); |
| 360 if (unicode()) { | 360 if (unicode()) { |
| 361 ZoneList<CharacterRange>* ranges = ParsePropertyClass(); | 361 if (FLAG_harmony_regexp_property) { |
| 362 if (ranges == nullptr) { | 362 ZoneList<CharacterRange>* ranges = |
| 363 return ReportError(CStrVector("Invalid property name")); | 363 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 364 if (!ParsePropertyClass(ranges)) { |
| 365 return ReportError(CStrVector("Invalid property name")); |
| 366 } |
| 367 RegExpCharacterClass* cc = |
| 368 new (zone()) RegExpCharacterClass(ranges, p == 'P'); |
| 369 builder->AddCharacterClass(cc); |
| 370 } else { |
| 371 // With /u, no identity escapes except for syntax characters |
| 372 // are allowed. Otherwise, all identity escapes are allowed. |
| 373 return ReportError(CStrVector("Invalid escape")); |
| 364 } | 374 } |
| 365 RegExpCharacterClass* cc = | |
| 366 new (zone()) RegExpCharacterClass(ranges, p == 'P'); | |
| 367 builder->AddCharacterClass(cc); | |
| 368 } else { | 375 } else { |
| 369 builder->AddCharacter(p); | 376 builder->AddCharacter(p); |
| 370 } | 377 } |
| 371 break; | 378 break; |
| 372 } | 379 } |
| 373 case '1': | 380 case '1': |
| 374 case '2': | 381 case '2': |
| 375 case '3': | 382 case '3': |
| 376 case '4': | 383 case '4': |
| 377 case '5': | 384 case '5': |
| (...skipping 451 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), | 836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), |
| 830 static_cast<uc16>(trail)); | 837 static_cast<uc16>(trail)); |
| 831 return true; | 838 return true; |
| 832 } | 839 } |
| 833 } | 840 } |
| 834 Reset(start); | 841 Reset(start); |
| 835 } | 842 } |
| 836 return result; | 843 return result; |
| 837 } | 844 } |
| 838 | 845 |
| 839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { | 846 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
| 840 #ifdef V8_I18N_SUPPORT | 847 #ifdef V8_I18N_SUPPORT |
| 841 ZoneList<char> property_name(0, zone()); | 848 ZoneList<char> property_name(0, zone()); |
| 842 if (current() == '{') { | 849 if (current() == '{') { |
| 843 for (Advance(); current() != '}'; Advance()) { | 850 for (Advance(); current() != '}'; Advance()) { |
| 844 if (!has_next()) return nullptr; | 851 if (!has_next()) return false; |
| 845 property_name.Add(static_cast<char>(current()), zone()); | 852 property_name.Add(static_cast<char>(current()), zone()); |
| 846 } | 853 } |
| 847 } else if (current() != kEndMarker) { | 854 } else if (current() != kEndMarker) { |
| 848 property_name.Add(static_cast<char>(current()), zone()); | 855 property_name.Add(static_cast<char>(current()), zone()); |
| 849 } else { | 856 } else { |
| 850 return nullptr; | 857 return false; |
| 851 } | 858 } |
| 852 Advance(); | 859 Advance(); |
| 853 property_name.Add(0, zone()); // null-terminate string. | 860 property_name.Add(0, zone()); // null-terminate string. |
| 854 | 861 |
| 855 // Property names are defined in unicode database files. For aliases of | 862 // Property names are defined in unicode database files. For aliases of |
| 856 // these property names, see PropertyValueAliases.txt. | 863 // these property names, see PropertyValueAliases.txt. |
| 857 UProperty kPropertyClasses[] = { | 864 UProperty kPropertyClasses[] = { |
| 858 // General_Category (gc) found in PropertyValueAliases.txt | 865 // General_Category (gc) found in PropertyValueAliases.txt |
| 859 UCHAR_GENERAL_CATEGORY_MASK, | 866 UCHAR_GENERAL_CATEGORY_MASK, |
| 860 // Script (sc) found in Scripts.txt | 867 // Script (sc) found in Scripts.txt |
| 861 UCHAR_SCRIPT, | 868 UCHAR_SCRIPT, |
| 862 }; | 869 }; |
| 863 | 870 |
| 864 for (int i = 0; i < arraysize(kPropertyClasses); i++) { | 871 for (int i = 0; i < arraysize(kPropertyClasses); i++) { |
| 865 UProperty property_class = kPropertyClasses[i]; | 872 UProperty property_class = kPropertyClasses[i]; |
| 866 int32_t category = u_getPropertyValueEnum( | 873 int32_t category = u_getPropertyValueEnum( |
| 867 property_class, property_name.ToConstVector().start()); | 874 property_class, property_name.ToConstVector().start()); |
| 868 if (category == UCHAR_INVALID_CODE) continue; | 875 if (category == UCHAR_INVALID_CODE) continue; |
| 869 | 876 |
| 870 USet* set = uset_openEmpty(); | 877 USet* set = uset_openEmpty(); |
| 871 UErrorCode ec = U_ZERO_ERROR; | 878 UErrorCode ec = U_ZERO_ERROR; |
| 872 uset_applyIntPropertyValue(set, property_class, category, &ec); | 879 uset_applyIntPropertyValue(set, property_class, category, &ec); |
| 873 ZoneList<CharacterRange>* ranges = nullptr; | |
| 874 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { | 880 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
| 875 uset_removeAllStrings(set); | 881 uset_removeAllStrings(set); |
| 876 int item_count = uset_getItemCount(set); | 882 int item_count = uset_getItemCount(set); |
| 877 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); | |
| 878 int item_result = 0; | 883 int item_result = 0; |
| 879 for (int i = 0; i < item_count; i++) { | 884 for (int i = 0; i < item_count; i++) { |
| 880 uc32 start = 0; | 885 uc32 start = 0; |
| 881 uc32 end = 0; | 886 uc32 end = 0; |
| 882 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | 887 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
| 883 ranges->Add(CharacterRange::Range(start, end), zone()); | 888 result->Add(CharacterRange::Range(start, end), zone()); |
| 884 } | 889 } |
| 885 DCHECK_EQ(U_ZERO_ERROR, ec); | 890 DCHECK_EQ(U_ZERO_ERROR, ec); |
| 886 DCHECK_EQ(0, item_result); | 891 DCHECK_EQ(0, item_result); |
| 887 } | 892 } |
| 888 uset_close(set); | 893 uset_close(set); |
| 889 return ranges; | 894 return true; |
| 890 } | 895 } |
| 891 #endif // V8_I18N_SUPPORT | 896 #endif // V8_I18N_SUPPORT |
| 892 | 897 |
| 893 return nullptr; | 898 return false; |
| 894 } | 899 } |
| 895 | 900 |
| 896 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 901 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
| 897 uc32 x = 0; | 902 uc32 x = 0; |
| 898 int d = HexValue(current()); | 903 int d = HexValue(current()); |
| 899 if (d < 0) { | 904 if (d < 0) { |
| 900 return false; | 905 return false; |
| 901 } | 906 } |
| 902 while (d >= 0) { | 907 while (d >= 0) { |
| 903 x = x * 16 + d; | 908 x = x * 16 + d; |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1063 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, | 1068 static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges, |
| 1064 uc16 char_class, CharacterRange range, | 1069 uc16 char_class, CharacterRange range, |
| 1065 Zone* zone) { | 1070 Zone* zone) { |
| 1066 if (char_class != kNoCharClass) { | 1071 if (char_class != kNoCharClass) { |
| 1067 CharacterRange::AddClassEscape(char_class, ranges, zone); | 1072 CharacterRange::AddClassEscape(char_class, ranges, zone); |
| 1068 } else { | 1073 } else { |
| 1069 ranges->Add(range, zone); | 1074 ranges->Add(range, zone); |
| 1070 } | 1075 } |
| 1071 } | 1076 } |
| 1072 | 1077 |
| 1078 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
| 1079 if (!FLAG_harmony_regexp_property) return false; |
| 1080 if (!unicode()) return false; |
| 1081 if (current() != '\\') return false; |
| 1082 uc32 next = Next(); |
| 1083 bool parse_success = false; |
| 1084 if (next == 'p') { |
| 1085 Advance(2); |
| 1086 parse_success = ParsePropertyClass(ranges); |
| 1087 } else if (next == 'P') { |
| 1088 Advance(2); |
| 1089 ZoneList<CharacterRange>* property_class = |
| 1090 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1091 parse_success = ParsePropertyClass(property_class); |
| 1092 if (parse_success) { |
| 1093 ZoneList<CharacterRange>* negated = |
| 1094 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1095 CharacterRange::Negate(property_class, negated, zone()); |
| 1096 const Vector<CharacterRange> negated_vector = negated->ToVector(); |
| 1097 ranges->AddAll(negated_vector, zone()); |
| 1098 } |
| 1099 } else { |
| 1100 return false; |
| 1101 } |
| 1102 if (!parse_success) |
| 1103 ReportError(CStrVector("Invalid property name in character class")); |
| 1104 return parse_success; |
| 1105 } |
| 1073 | 1106 |
| 1074 RegExpTree* RegExpParser::ParseCharacterClass() { | 1107 RegExpTree* RegExpParser::ParseCharacterClass() { |
| 1075 static const char* kUnterminated = "Unterminated character class"; | 1108 static const char* kUnterminated = "Unterminated character class"; |
| 1076 static const char* kRangeInvalid = "Invalid character class"; | 1109 static const char* kRangeInvalid = "Invalid character class"; |
| 1077 static const char* kRangeOutOfOrder = "Range out of order in character class"; | 1110 static const char* kRangeOutOfOrder = "Range out of order in character class"; |
| 1078 | 1111 |
| 1079 DCHECK_EQ(current(), '['); | 1112 DCHECK_EQ(current(), '['); |
| 1080 Advance(); | 1113 Advance(); |
| 1081 bool is_negated = false; | 1114 bool is_negated = false; |
| 1082 if (current() == '^') { | 1115 if (current() == '^') { |
| 1083 is_negated = true; | 1116 is_negated = true; |
| 1084 Advance(); | 1117 Advance(); |
| 1085 } | 1118 } |
| 1086 ZoneList<CharacterRange>* ranges = | 1119 ZoneList<CharacterRange>* ranges = |
| 1087 new (zone()) ZoneList<CharacterRange>(2, zone()); | 1120 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 1088 while (has_more() && current() != ']') { | 1121 while (has_more() && current() != ']') { |
| 1122 bool parsed_property = ParseClassProperty(ranges CHECK_FAILED); |
| 1123 if (parsed_property) continue; |
| 1089 uc16 char_class = kNoCharClass; | 1124 uc16 char_class = kNoCharClass; |
| 1090 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); | 1125 CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED); |
| 1091 if (current() == '-') { | 1126 if (current() == '-') { |
| 1092 Advance(); | 1127 Advance(); |
| 1093 if (current() == kEndMarker) { | 1128 if (current() == kEndMarker) { |
| 1094 // If we reach the end we break out of the loop and let the | 1129 // If we reach the end we break out of the loop and let the |
| 1095 // following code report an error. | 1130 // following code report an error. |
| 1096 break; | 1131 break; |
| 1097 } else if (current() == ']') { | 1132 } else if (current() == ']') { |
| 1098 AddRangeOrEscape(ranges, char_class, first, zone()); | 1133 AddRangeOrEscape(ranges, char_class, first, zone()); |
| (...skipping 351 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1450 return false; | 1485 return false; |
| 1451 } | 1486 } |
| 1452 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1487 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1453 zone()); | 1488 zone()); |
| 1454 LAST(ADD_TERM); | 1489 LAST(ADD_TERM); |
| 1455 return true; | 1490 return true; |
| 1456 } | 1491 } |
| 1457 | 1492 |
| 1458 } // namespace internal | 1493 } // namespace internal |
| 1459 } // namespace v8 | 1494 } // namespace v8 |
| OLD | NEW |