OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
355 break; | 355 break; |
356 } | 356 } |
357 case 'p': | 357 case 'p': |
358 case 'P': { | 358 case 'P': { |
359 uc32 p = Next(); | 359 uc32 p = Next(); |
360 Advance(2); | 360 Advance(2); |
361 if (unicode()) { | 361 if (unicode()) { |
362 if (FLAG_harmony_regexp_property) { | 362 if (FLAG_harmony_regexp_property) { |
363 ZoneList<CharacterRange>* ranges = | 363 ZoneList<CharacterRange>* ranges = |
364 new (zone()) ZoneList<CharacterRange>(2, zone()); | 364 new (zone()) ZoneList<CharacterRange>(2, zone()); |
365 if (!ParsePropertyClass(ranges)) { | 365 if (!ParsePropertyClass(ranges, p == 'P')) { |
366 return ReportError(CStrVector("Invalid property name")); | 366 return ReportError(CStrVector("Invalid property name")); |
367 } | 367 } |
368 RegExpCharacterClass* cc = | 368 RegExpCharacterClass* cc = |
369 new (zone()) RegExpCharacterClass(ranges, p == 'P'); | 369 new (zone()) RegExpCharacterClass(ranges, false); |
370 builder->AddCharacterClass(cc); | 370 builder->AddCharacterClass(cc); |
371 } else { | 371 } else { |
372 // With /u, no identity escapes except for syntax characters | 372 // With /u, no identity escapes except for syntax characters |
373 // are allowed. Otherwise, all identity escapes are allowed. | 373 // are allowed. Otherwise, all identity escapes are allowed. |
374 return ReportError(CStrVector("Invalid escape")); | 374 return ReportError(CStrVector("Invalid escape")); |
375 } | 375 } |
376 } else { | 376 } else { |
377 builder->AddCharacter(p); | 377 builder->AddCharacter(p); |
378 } | 378 } |
379 break; | 379 break; |
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
838 static_cast<uc16>(trail)); | 838 static_cast<uc16>(trail)); |
839 return true; | 839 return true; |
840 } | 840 } |
841 } | 841 } |
842 Reset(start); | 842 Reset(start); |
843 } | 843 } |
844 return result; | 844 return result; |
845 } | 845 } |
846 | 846 |
847 #ifdef V8_I18N_SUPPORT | 847 #ifdef V8_I18N_SUPPORT |
| 848 |
| 849 namespace { |
| 850 |
848 bool IsExactPropertyAlias(const char* property_name, UProperty property) { | 851 bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
849 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); | 852 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
850 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; | 853 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
851 for (int i = 0;; i++) { | 854 for (int i = 0;; i++) { |
852 const char* long_name = u_getPropertyName( | 855 const char* long_name = u_getPropertyName( |
853 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 856 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
854 if (long_name == NULL) break; | 857 if (long_name == NULL) break; |
855 if (strcmp(property_name, long_name) == 0) return true; | 858 if (strcmp(property_name, long_name) == 0) return true; |
856 } | 859 } |
857 return false; | 860 return false; |
(...skipping 10 matching lines...) Expand all Loading... |
868 const char* long_name = u_getPropertyValueName( | 871 const char* long_name = u_getPropertyValueName( |
869 property, property_value, | 872 property, property_value, |
870 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | 873 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
871 if (long_name == NULL) break; | 874 if (long_name == NULL) break; |
872 if (strcmp(property_value_name, long_name) == 0) return true; | 875 if (strcmp(property_value_name, long_name) == 0) return true; |
873 } | 876 } |
874 return false; | 877 return false; |
875 } | 878 } |
876 | 879 |
877 bool LookupPropertyValueName(UProperty property, | 880 bool LookupPropertyValueName(UProperty property, |
878 const char* property_value_name, | 881 const char* property_value_name, bool negate, |
879 ZoneList<CharacterRange>* result, Zone* zone) { | 882 ZoneList<CharacterRange>* result, Zone* zone) { |
880 int32_t property_value = | 883 int32_t property_value = |
881 u_getPropertyValueEnum(property, property_value_name); | 884 u_getPropertyValueEnum(property, property_value_name); |
882 if (property_value == UCHAR_INVALID_CODE) return false; | 885 if (property_value == UCHAR_INVALID_CODE) return false; |
883 | 886 |
884 // We require the property name to match exactly to one of the property value | 887 // We require the property name to match exactly to one of the property value |
885 // aliases. However, u_getPropertyValueEnum uses loose matching. | 888 // aliases. However, u_getPropertyValueEnum uses loose matching. |
886 if (!IsExactPropertyValueAlias(property_value_name, property, | 889 if (!IsExactPropertyValueAlias(property_value_name, property, |
887 property_value)) { | 890 property_value)) { |
888 return false; | 891 return false; |
889 } | 892 } |
890 | 893 |
891 USet* set = uset_openEmpty(); | 894 USet* set = uset_openEmpty(); |
892 UErrorCode ec = U_ZERO_ERROR; | 895 UErrorCode ec = U_ZERO_ERROR; |
893 uset_applyIntPropertyValue(set, property, property_value, &ec); | 896 uset_applyIntPropertyValue(set, property, property_value, &ec); |
894 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); | 897 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); |
895 | 898 |
896 if (success) { | 899 if (success) { |
897 uset_removeAllStrings(set); | 900 uset_removeAllStrings(set); |
| 901 if (negate) uset_complement(set); |
898 int item_count = uset_getItemCount(set); | 902 int item_count = uset_getItemCount(set); |
899 int item_result = 0; | 903 int item_result = 0; |
900 for (int i = 0; i < item_count; i++) { | 904 for (int i = 0; i < item_count; i++) { |
901 uc32 start = 0; | 905 uc32 start = 0; |
902 uc32 end = 0; | 906 uc32 end = 0; |
903 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | 907 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
904 result->Add(CharacterRange::Range(start, end), zone); | 908 result->Add(CharacterRange::Range(start, end), zone); |
905 } | 909 } |
906 DCHECK_EQ(U_ZERO_ERROR, ec); | 910 DCHECK_EQ(U_ZERO_ERROR, ec); |
907 DCHECK_EQ(0, item_result); | 911 DCHECK_EQ(0, item_result); |
908 } | 912 } |
909 uset_close(set); | 913 uset_close(set); |
910 return success; | 914 return success; |
911 } | 915 } |
912 | 916 |
913 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 917 template <size_t N> |
| 918 inline bool NameEquals(const char* name, const char (&literal)[N]) { |
| 919 return strncmp(name, literal, N + 1) == 0; |
| 920 } |
| 921 |
| 922 bool LookupSpecialPropertyValueName(const char* name, |
| 923 ZoneList<CharacterRange>* result, |
| 924 bool negate, Zone* zone) { |
| 925 if (NameEquals(name, "Any")) { |
| 926 if (!negate) result->Add(CharacterRange::Everything(), zone); |
| 927 } else if (NameEquals(name, "ASCII")) { |
| 928 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) |
| 929 : CharacterRange::Range(0x0, 0x7f), |
| 930 zone); |
| 931 } else if (NameEquals(name, "Assigned")) { |
| 932 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
| 933 !negate, result, zone); |
| 934 } else { |
| 935 return false; |
| 936 } |
| 937 return true; |
| 938 } |
| 939 |
| 940 } // anonymous namespace |
| 941 |
| 942 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| 943 bool negate) { |
914 // Parse the property class as follows: | 944 // Parse the property class as follows: |
915 // - In \p{name}, 'name' is interpreted | 945 // - In \p{name}, 'name' is interpreted |
916 // - either as a general category property value name. | 946 // - either as a general category property value name. |
917 // - or as a binary property name. | 947 // - or as a binary property name. |
918 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, | 948 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
919 // and 'value' is interpreted as one of the available property value names. | 949 // and 'value' is interpreted as one of the available property value names. |
920 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. | 950 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. |
921 // - Loose matching is not applied. | 951 // - Loose matching is not applied. |
922 List<char> first_part; | 952 List<char> first_part; |
923 List<char> second_part; | 953 List<char> second_part; |
(...skipping 12 matching lines...) Expand all Loading... |
936 } | 966 } |
937 } else { | 967 } else { |
938 return false; | 968 return false; |
939 } | 969 } |
940 Advance(); | 970 Advance(); |
941 first_part.Add(0); // null-terminate string. | 971 first_part.Add(0); // null-terminate string. |
942 | 972 |
943 if (second_part.is_empty()) { | 973 if (second_part.is_empty()) { |
944 // First attempt to interpret as general category property value name. | 974 // First attempt to interpret as general category property value name. |
945 const char* name = first_part.ToConstVector().start(); | 975 const char* name = first_part.ToConstVector().start(); |
946 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, | 976 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
947 zone())) { | 977 result, zone())) { |
| 978 return true; |
| 979 } |
| 980 // Interpret "Any", "ASCII", and "Assigned". |
| 981 if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
948 return true; | 982 return true; |
949 } | 983 } |
950 // Then attempt to interpret as binary property name with value name 'Y'. | 984 // Then attempt to interpret as binary property name with value name 'Y'. |
951 UProperty property = u_getPropertyEnum(name); | 985 UProperty property = u_getPropertyEnum(name); |
952 if (property < UCHAR_BINARY_START) return false; | 986 if (property < UCHAR_BINARY_START) return false; |
953 if (property >= UCHAR_BINARY_LIMIT) return false; | 987 if (property >= UCHAR_BINARY_LIMIT) return false; |
954 if (!IsExactPropertyAlias(name, property)) return false; | 988 if (!IsExactPropertyAlias(name, property)) return false; |
955 return LookupPropertyValueName(property, "Y", result, zone()); | 989 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
| 990 zone()); |
956 } else { | 991 } else { |
957 // Both property name and value name are specified. Attempt to interpret | 992 // Both property name and value name are specified. Attempt to interpret |
958 // the property name as enumerated property. | 993 // the property name as enumerated property. |
959 const char* property_name = first_part.ToConstVector().start(); | 994 const char* property_name = first_part.ToConstVector().start(); |
960 const char* value_name = second_part.ToConstVector().start(); | 995 const char* value_name = second_part.ToConstVector().start(); |
961 UProperty property = u_getPropertyEnum(property_name); | 996 UProperty property = u_getPropertyEnum(property_name); |
962 if (property < UCHAR_INT_START) return false; | 997 if (property < UCHAR_INT_START) return false; |
963 if (property >= UCHAR_INT_LIMIT) return false; | 998 if (property >= UCHAR_INT_LIMIT) return false; |
964 if (!IsExactPropertyAlias(property_name, property)) return false; | 999 if (!IsExactPropertyAlias(property_name, property)) return false; |
965 return LookupPropertyValueName(property, value_name, result, zone()); | 1000 return LookupPropertyValueName(property, value_name, negate, result, |
| 1001 zone()); |
966 } | 1002 } |
967 } | 1003 } |
968 | 1004 |
969 #else // V8_I18N_SUPPORT | 1005 #else // V8_I18N_SUPPORT |
970 | 1006 |
971 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 1007 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| 1008 bool negate) { |
972 return false; | 1009 return false; |
973 } | 1010 } |
974 | 1011 |
975 #endif // V8_I18N_SUPPORT | 1012 #endif // V8_I18N_SUPPORT |
976 | 1013 |
977 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 1014 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
978 uc32 x = 0; | 1015 uc32 x = 0; |
979 int d = HexValue(current()); | 1016 int d = HexValue(current()); |
980 if (d < 0) { | 1017 if (d < 0) { |
981 return false; | 1018 return false; |
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1152 } | 1189 } |
1153 | 1190 |
1154 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { | 1191 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
1155 if (!FLAG_harmony_regexp_property) return false; | 1192 if (!FLAG_harmony_regexp_property) return false; |
1156 if (!unicode()) return false; | 1193 if (!unicode()) return false; |
1157 if (current() != '\\') return false; | 1194 if (current() != '\\') return false; |
1158 uc32 next = Next(); | 1195 uc32 next = Next(); |
1159 bool parse_success = false; | 1196 bool parse_success = false; |
1160 if (next == 'p') { | 1197 if (next == 'p') { |
1161 Advance(2); | 1198 Advance(2); |
1162 parse_success = ParsePropertyClass(ranges); | 1199 parse_success = ParsePropertyClass(ranges, false); |
1163 } else if (next == 'P') { | 1200 } else if (next == 'P') { |
1164 Advance(2); | 1201 Advance(2); |
1165 ZoneList<CharacterRange>* property_class = | 1202 parse_success = ParsePropertyClass(ranges, true); |
1166 new (zone()) ZoneList<CharacterRange>(2, zone()); | |
1167 parse_success = ParsePropertyClass(property_class); | |
1168 if (parse_success) { | |
1169 ZoneList<CharacterRange>* negated = | |
1170 new (zone()) ZoneList<CharacterRange>(2, zone()); | |
1171 CharacterRange::Negate(property_class, negated, zone()); | |
1172 const Vector<CharacterRange> negated_vector = negated->ToVector(); | |
1173 ranges->AddAll(negated_vector, zone()); | |
1174 } | |
1175 } else { | 1203 } else { |
1176 return false; | 1204 return false; |
1177 } | 1205 } |
1178 if (!parse_success) | 1206 if (!parse_success) |
1179 ReportError(CStrVector("Invalid property name in character class")); | 1207 ReportError(CStrVector("Invalid property name in character class")); |
1180 return parse_success; | 1208 return parse_success; |
1181 } | 1209 } |
1182 | 1210 |
1183 RegExpTree* RegExpParser::ParseCharacterClass() { | 1211 RegExpTree* RegExpParser::ParseCharacterClass() { |
1184 static const char* kUnterminated = "Unterminated character class"; | 1212 static const char* kUnterminated = "Unterminated character class"; |
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1557 return false; | 1585 return false; |
1558 } | 1586 } |
1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1587 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1560 zone()); | 1588 zone()); |
1561 LAST(ADD_TERM); | 1589 LAST(ADD_TERM); |
1562 return true; | 1590 return true; |
1563 } | 1591 } |
1564 | 1592 |
1565 } // namespace internal | 1593 } // namespace internal |
1566 } // namespace v8 | 1594 } // namespace v8 |
OLD | NEW |