| OLD | NEW |
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 355 break; | 355 break; |
| 356 } | 356 } |
| 357 case 'p': | 357 case 'p': |
| 358 case 'P': { | 358 case 'P': { |
| 359 uc32 p = Next(); | 359 uc32 p = Next(); |
| 360 Advance(2); | 360 Advance(2); |
| 361 if (unicode()) { | 361 if (unicode()) { |
| 362 if (FLAG_harmony_regexp_property) { | 362 if (FLAG_harmony_regexp_property) { |
| 363 ZoneList<CharacterRange>* ranges = | 363 ZoneList<CharacterRange>* ranges = |
| 364 new (zone()) ZoneList<CharacterRange>(2, zone()); | 364 new (zone()) ZoneList<CharacterRange>(2, zone()); |
| 365 if (!ParsePropertyClass(ranges)) { | 365 if (!ParsePropertyClass(ranges, p == 'P')) { |
| 366 return ReportError(CStrVector("Invalid property name")); | 366 return ReportError(CStrVector("Invalid property name")); |
| 367 } | 367 } |
| 368 RegExpCharacterClass* cc = | 368 RegExpCharacterClass* cc = |
| 369 new (zone()) RegExpCharacterClass(ranges, p == 'P'); | 369 new (zone()) RegExpCharacterClass(ranges, false); |
| 370 builder->AddCharacterClass(cc); | 370 builder->AddCharacterClass(cc); |
| 371 } else { | 371 } else { |
| 372 // With /u, no identity escapes except for syntax characters | 372 // With /u, no identity escapes except for syntax characters |
| 373 // are allowed. Otherwise, all identity escapes are allowed. | 373 // are allowed. Otherwise, all identity escapes are allowed. |
| 374 return ReportError(CStrVector("Invalid escape")); | 374 return ReportError(CStrVector("Invalid escape")); |
| 375 } | 375 } |
| 376 } else { | 376 } else { |
| 377 builder->AddCharacter(p); | 377 builder->AddCharacter(p); |
| 378 } | 378 } |
| 379 break; | 379 break; |
| (...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 838 static_cast<uc16>(trail)); | 838 static_cast<uc16>(trail)); |
| 839 return true; | 839 return true; |
| 840 } | 840 } |
| 841 } | 841 } |
| 842 Reset(start); | 842 Reset(start); |
| 843 } | 843 } |
| 844 return result; | 844 return result; |
| 845 } | 845 } |
| 846 | 846 |
| 847 #ifdef V8_I18N_SUPPORT | 847 #ifdef V8_I18N_SUPPORT |
| 848 |
| 849 namespace { |
| 850 |
| 848 bool IsExactPropertyAlias(const char* property_name, UProperty property) { | 851 bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
| 849 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); | 852 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
| 850 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; | 853 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
| 851 for (int i = 0;; i++) { | 854 for (int i = 0;; i++) { |
| 852 const char* long_name = u_getPropertyName( | 855 const char* long_name = u_getPropertyName( |
| 853 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 856 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
| 854 if (long_name == NULL) break; | 857 if (long_name == NULL) break; |
| 855 if (strcmp(property_name, long_name) == 0) return true; | 858 if (strcmp(property_name, long_name) == 0) return true; |
| 856 } | 859 } |
| 857 return false; | 860 return false; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 868 const char* long_name = u_getPropertyValueName( | 871 const char* long_name = u_getPropertyValueName( |
| 869 property, property_value, | 872 property, property_value, |
| 870 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 873 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
| 871 if (long_name == NULL) break; | 874 if (long_name == NULL) break; |
| 872 if (strcmp(property_value_name, long_name) == 0) return true; | 875 if (strcmp(property_value_name, long_name) == 0) return true; |
| 873 } | 876 } |
| 874 return false; | 877 return false; |
| 875 } | 878 } |
| 876 | 879 |
| 877 bool LookupPropertyValueName(UProperty property, | 880 bool LookupPropertyValueName(UProperty property, |
| 878 const char* property_value_name, | 881 const char* property_value_name, bool negate, |
| 879 ZoneList<CharacterRange>* result, Zone* zone) { | 882 ZoneList<CharacterRange>* result, Zone* zone) { |
| 880 int32_t property_value = | 883 int32_t property_value = |
| 881 u_getPropertyValueEnum(property, property_value_name); | 884 u_getPropertyValueEnum(property, property_value_name); |
| 882 if (property_value == UCHAR_INVALID_CODE) return false; | 885 if (property_value == UCHAR_INVALID_CODE) return false; |
| 883 | 886 |
| 884 // We require the property name to match exactly to one of the property value | 887 // We require the property name to match exactly to one of the property value |
| 885 // aliases. However, u_getPropertyValueEnum uses loose matching. | 888 // aliases. However, u_getPropertyValueEnum uses loose matching. |
| 886 if (!IsExactPropertyValueAlias(property_value_name, property, | 889 if (!IsExactPropertyValueAlias(property_value_name, property, |
| 887 property_value)) { | 890 property_value)) { |
| 888 return false; | 891 return false; |
| 889 } | 892 } |
| 890 | 893 |
| 891 USet* set = uset_openEmpty(); | 894 USet* set = uset_openEmpty(); |
| 892 UErrorCode ec = U_ZERO_ERROR; | 895 UErrorCode ec = U_ZERO_ERROR; |
| 893 uset_applyIntPropertyValue(set, property, property_value, &ec); | 896 uset_applyIntPropertyValue(set, property, property_value, &ec); |
| 894 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); | 897 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); |
| 895 | 898 |
| 896 if (success) { | 899 if (success) { |
| 897 uset_removeAllStrings(set); | 900 uset_removeAllStrings(set); |
| 901 if (negate) uset_complement(set); |
| 898 int item_count = uset_getItemCount(set); | 902 int item_count = uset_getItemCount(set); |
| 899 int item_result = 0; | 903 int item_result = 0; |
| 900 for (int i = 0; i < item_count; i++) { | 904 for (int i = 0; i < item_count; i++) { |
| 901 uc32 start = 0; | 905 uc32 start = 0; |
| 902 uc32 end = 0; | 906 uc32 end = 0; |
| 903 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | 907 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
| 904 result->Add(CharacterRange::Range(start, end), zone); | 908 result->Add(CharacterRange::Range(start, end), zone); |
| 905 } | 909 } |
| 906 DCHECK_EQ(U_ZERO_ERROR, ec); | 910 DCHECK_EQ(U_ZERO_ERROR, ec); |
| 907 DCHECK_EQ(0, item_result); | 911 DCHECK_EQ(0, item_result); |
| 908 } | 912 } |
| 909 uset_close(set); | 913 uset_close(set); |
| 910 return success; | 914 return success; |
| 911 } | 915 } |
| 912 | 916 |
| 913 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 917 template <size_t N> |
| 918 inline bool NameEquals(const char* name, const char (&literal)[N]) { |
| 919 return strncmp(name, literal, N + 1) == 0; |
| 920 } |
| 921 |
| 922 bool LookupSpecialPropertyValueName(const char* name, |
| 923 ZoneList<CharacterRange>* result, |
| 924 bool negate, Zone* zone) { |
| 925 if (NameEquals(name, "Any")) { |
| 926 if (!negate) result->Add(CharacterRange::Everything(), zone); |
| 927 } else if (NameEquals(name, "ASCII")) { |
| 928 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) |
| 929 : CharacterRange::Range(0x0, 0x7f), |
| 930 zone); |
| 931 } else if (NameEquals(name, "Assigned")) { |
| 932 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
| 933 !negate, result, zone); |
| 934 } else { |
| 935 return false; |
| 936 } |
| 937 return true; |
| 938 } |
| 939 |
| 940 } // anonymous namespace |
| 941 |
| 942 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| 943 bool negate) { |
| 914 // Parse the property class as follows: | 944 // Parse the property class as follows: |
| 915 // - In \p{name}, 'name' is interpreted | 945 // - In \p{name}, 'name' is interpreted |
| 916 // - either as a general category property value name. | 946 // - either as a general category property value name. |
| 917 // - or as a binary property name. | 947 // - or as a binary property name. |
| 918 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 948 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
| 919 // and 'value' is interpreted as one of the available property value names. | 949 // and 'value' is interpreted as one of the available property value names. |
| 920 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. | 950 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. |
| 921 // - Loose matching is not applied. | 951 // - Loose matching is not applied. |
| 922 List<char> first_part; | 952 List<char> first_part; |
| 923 List<char> second_part; | 953 List<char> second_part; |
| (...skipping 12 matching lines...) Expand all Loading... |
| 936 } | 966 } |
| 937 } else { | 967 } else { |
| 938 return false; | 968 return false; |
| 939 } | 969 } |
| 940 Advance(); | 970 Advance(); |
| 941 first_part.Add(0); // null-terminate string. | 971 first_part.Add(0); // null-terminate string. |
| 942 | 972 |
| 943 if (second_part.is_empty()) { | 973 if (second_part.is_empty()) { |
| 944 // First attempt to interpret as general category property value name. | 974 // First attempt to interpret as general category property value name. |
| 945 const char* name = first_part.ToConstVector().start(); | 975 const char* name = first_part.ToConstVector().start(); |
| 946 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, | 976 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
| 947 zone())) { | 977 result, zone())) { |
| 978 return true; |
| 979 } |
| 980 // Interpret "Any", "ASCII", and "Assigned". |
| 981 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
| 948 return true; | 982 return true; |
| 949 } | 983 } |
| 950 // Then attempt to interpret as binary property name with value name 'Y'. | 984 // Then attempt to interpret as binary property name with value name 'Y'. |
| 951 UProperty property = u_getPropertyEnum(name); | 985 UProperty property = u_getPropertyEnum(name); |
| 952 if (property < UCHAR_BINARY_START) return false; | 986 if (property < UCHAR_BINARY_START) return false; |
| 953 if (property >= UCHAR_BINARY_LIMIT) return false; | 987 if (property >= UCHAR_BINARY_LIMIT) return false; |
| 954 if (!IsExactPropertyAlias(name, property)) return false; | 988 if (!IsExactPropertyAlias(name, property)) return false; |
| 955 return LookupPropertyValueName(property, "Y", result, zone()); | 989 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
| 990 zone()); |
| 956 } else { | 991 } else { |
| 957 // Both property name and value name are specified. Attempt to interpret | 992 // Both property name and value name are specified. Attempt to interpret |
| 958 // the property name as enumerated property. | 993 // the property name as enumerated property. |
| 959 const char* property_name = first_part.ToConstVector().start(); | 994 const char* property_name = first_part.ToConstVector().start(); |
| 960 const char* value_name = second_part.ToConstVector().start(); | 995 const char* value_name = second_part.ToConstVector().start(); |
| 961 UProperty property = u_getPropertyEnum(property_name); | 996 UProperty property = u_getPropertyEnum(property_name); |
| 962 if (property < UCHAR_INT_START) return false; | 997 if (property < UCHAR_INT_START) return false; |
| 963 if (property >= UCHAR_INT_LIMIT) return false; | 998 if (property >= UCHAR_INT_LIMIT) return false; |
| 964 if (!IsExactPropertyAlias(property_name, property)) return false; | 999 if (!IsExactPropertyAlias(property_name, property)) return false; |
| 965 return LookupPropertyValueName(property, value_name, result, zone()); | 1000 return LookupPropertyValueName(property, value_name, negate, result, |
| 1001 zone()); |
| 966 } | 1002 } |
| 967 } | 1003 } |
| 968 | 1004 |
| 969 #else // V8_I18N_SUPPORT | 1005 #else // V8_I18N_SUPPORT |
| 970 | 1006 |
| 971 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 1007 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| 1008 bool negate) { |
| 972 return false; | 1009 return false; |
| 973 } | 1010 } |
| 974 | 1011 |
| 975 #endif // V8_I18N_SUPPORT | 1012 #endif // V8_I18N_SUPPORT |
| 976 | 1013 |
| 977 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 1014 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
| 978 uc32 x = 0; | 1015 uc32 x = 0; |
| 979 int d = HexValue(current()); | 1016 int d = HexValue(current()); |
| 980 if (d < 0) { | 1017 if (d < 0) { |
| 981 return false; | 1018 return false; |
| (...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1152 } | 1189 } |
| 1153 | 1190 |
| 1154 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { | 1191 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
| 1155 if (!FLAG_harmony_regexp_property) return false; | 1192 if (!FLAG_harmony_regexp_property) return false; |
| 1156 if (!unicode()) return false; | 1193 if (!unicode()) return false; |
| 1157 if (current() != '\\') return false; | 1194 if (current() != '\\') return false; |
| 1158 uc32 next = Next(); | 1195 uc32 next = Next(); |
| 1159 bool parse_success = false; | 1196 bool parse_success = false; |
| 1160 if (next == 'p') { | 1197 if (next == 'p') { |
| 1161 Advance(2); | 1198 Advance(2); |
| 1162 parse_success = ParsePropertyClass(ranges); | 1199 parse_success = ParsePropertyClass(ranges, false); |
| 1163 } else if (next == 'P') { | 1200 } else if (next == 'P') { |
| 1164 Advance(2); | 1201 Advance(2); |
| 1165 ZoneList<CharacterRange>* property_class = | 1202 parse_success = ParsePropertyClass(ranges, true); |
| 1166 new (zone()) ZoneList<CharacterRange>(2, zone()); | |
| 1167 parse_success = ParsePropertyClass(property_class); | |
| 1168 if (parse_success) { | |
| 1169 ZoneList<CharacterRange>* negated = | |
| 1170 new (zone()) ZoneList<CharacterRange>(2, zone()); | |
| 1171 CharacterRange::Negate(property_class, negated, zone()); | |
| 1172 const Vector<CharacterRange> negated_vector = negated->ToVector(); | |
| 1173 ranges->AddAll(negated_vector, zone()); | |
| 1174 } | |
| 1175 } else { | 1203 } else { |
| 1176 return false; | 1204 return false; |
| 1177 } | 1205 } |
| 1178 if (!parse_success) | 1206 if (!parse_success) |
| 1179 ReportError(CStrVector("Invalid property name in character class")); | 1207 ReportError(CStrVector("Invalid property name in character class")); |
| 1180 return parse_success; | 1208 return parse_success; |
| 1181 } | 1209 } |
| 1182 | 1210 |
| 1183 RegExpTree* RegExpParser::ParseCharacterClass() { | 1211 RegExpTree* RegExpParser::ParseCharacterClass() { |
| 1184 static const char* kUnterminated = "Unterminated character class"; | 1212 static const char* kUnterminated = "Unterminated character class"; |
| (...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1557 return false; | 1585 return false; |
| 1558 } | 1586 } |
| 1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1587 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1560 zone()); | 1588 zone()); |
| 1561 LAST(ADD_TERM); | 1589 LAST(ADD_TERM); |
| 1562 return true; | 1590 return true; |
| 1563 } | 1591 } |
| 1564 | 1592 |
| 1565 } // namespace internal | 1593 } // namespace internal |
| 1566 } // namespace v8 | 1594 } // namespace v8 |
| OLD | NEW |