OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 827 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
838 static_cast<uc16>(trail)); | 838 static_cast<uc16>(trail)); |
839 return true; | 839 return true; |
840 } | 840 } |
841 } | 841 } |
842 Reset(start); | 842 Reset(start); |
843 } | 843 } |
844 return result; | 844 return result; |
845 } | 845 } |
846 | 846 |
847 #ifdef V8_I18N_SUPPORT | 847 #ifdef V8_I18N_SUPPORT |
848 bool IsExactPropertyValueAlias(const char* property_name, UProperty property, | 848 bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
849 int32_t property_value) { | 849 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
850 const char* short_name = | |
851 u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); | |
852 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; | 850 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
853 for (int i = 0;; i++) { | 851 for (int i = 0;; i++) { |
854 const char* long_name = u_getPropertyValueName( | 852 const char* long_name = u_getPropertyName( |
855 property, property_value, | 853 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
856 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); | |
857 if (long_name == NULL) break; | 854 if (long_name == NULL) break; |
858 if (strcmp(property_name, long_name) == 0) return true; | 855 if (strcmp(property_name, long_name) == 0) return true; |
859 } | 856 } |
860 return false; | 857 return false; |
861 } | 858 } |
862 | 859 |
863 bool LookupPropertyClass(UProperty property, const char* property_name, | 860 bool IsExactPropertyValueAlias(const char* property_value_name, |
864 ZoneList<CharacterRange>* result, Zone* zone) { | 861 UProperty property, int32_t property_value) { |
865 int32_t property_value = u_getPropertyValueEnum(property, property_name); | 862 const char* short_name = |
| 863 u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); |
| 864 if (short_name != NULL && strcmp(property_value_name, short_name) == 0) { |
| 865 return true; |
| 866 } |
| 867 for (int i = 0;; i++) { |
| 868 const char* long_name = u_getPropertyValueName( |
| 869 property, property_value, |
| 870 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
| 871 if (long_name == NULL) break; |
| 872 if (strcmp(property_value_name, long_name) == 0) return true; |
| 873 } |
| 874 return false; |
| 875 } |
| 876 |
| 877 bool LookupPropertyValueName(UProperty property, |
| 878 const char* property_value_name, |
| 879 ZoneList<CharacterRange>* result, Zone* zone) { |
| 880 int32_t property_value = |
| 881 u_getPropertyValueEnum(property, property_value_name); |
866 if (property_value == UCHAR_INVALID_CODE) return false; | 882 if (property_value == UCHAR_INVALID_CODE) return false; |
867 | 883 |
868 // We require the property name to match exactly to one of the property value | 884 // We require the property name to match exactly to one of the property value |
869 // aliases. However, u_getPropertyValueEnum uses loose matching. | 885 // aliases. However, u_getPropertyValueEnum uses loose matching. |
870 if (!IsExactPropertyValueAlias(property_name, property, property_value)) { | 886 if (!IsExactPropertyValueAlias(property_value_name, property, |
| 887 property_value)) { |
871 return false; | 888 return false; |
872 } | 889 } |
873 | 890 |
874 USet* set = uset_openEmpty(); | 891 USet* set = uset_openEmpty(); |
875 UErrorCode ec = U_ZERO_ERROR; | 892 UErrorCode ec = U_ZERO_ERROR; |
876 uset_applyIntPropertyValue(set, property, property_value, &ec); | 893 uset_applyIntPropertyValue(set, property, property_value, &ec); |
877 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); | 894 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); |
878 | 895 |
879 if (success) { | 896 if (success) { |
880 uset_removeAllStrings(set); | 897 uset_removeAllStrings(set); |
881 int item_count = uset_getItemCount(set); | 898 int item_count = uset_getItemCount(set); |
882 int item_result = 0; | 899 int item_result = 0; |
883 for (int i = 0; i < item_count; i++) { | 900 for (int i = 0; i < item_count; i++) { |
884 uc32 start = 0; | 901 uc32 start = 0; |
885 uc32 end = 0; | 902 uc32 end = 0; |
886 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | 903 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
887 result->Add(CharacterRange::Range(start, end), zone); | 904 result->Add(CharacterRange::Range(start, end), zone); |
888 } | 905 } |
889 DCHECK_EQ(U_ZERO_ERROR, ec); | 906 DCHECK_EQ(U_ZERO_ERROR, ec); |
890 DCHECK_EQ(0, item_result); | 907 DCHECK_EQ(0, item_result); |
891 } | 908 } |
892 uset_close(set); | 909 uset_close(set); |
893 return success; | 910 return success; |
894 } | 911 } |
895 #endif // V8_I18N_SUPPORT | |
896 | 912 |
897 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 913 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
898 #ifdef V8_I18N_SUPPORT | 914 // Parse the property class as follows: |
899 List<char> property_name_list; | 915 // - \pN with a single-character N is equivalent to \p{N} |
| 916 // - In \p{name}, 'name' is interpreted |
| 917 // - either as a general category property value name. |
| 918 // - or as a binary property name. |
| 919 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
| 920 // and 'value' is interpreted as one of the available property value names. |
| 921 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. |
| 922 // - Loose matching is not applied. |
| 923 List<char> first_part; |
| 924 List<char> second_part; |
900 if (current() == '{') { | 925 if (current() == '{') { |
901 for (Advance(); current() != '}'; Advance()) { | 926 // Parse \p{[PropertyName=]PropertyNameValue} |
| 927 for (Advance(); current() != '}' && current() != '='; Advance()) { |
902 if (!has_next()) return false; | 928 if (!has_next()) return false; |
903 property_name_list.Add(static_cast<char>(current())); | 929 first_part.Add(static_cast<char>(current())); |
| 930 } |
| 931 if (current() == '=') { |
| 932 for (Advance(); current() != '}'; Advance()) { |
| 933 if (!has_next()) return false; |
| 934 second_part.Add(static_cast<char>(current())); |
| 935 } |
| 936 second_part.Add(0); // null-terminate string. |
904 } | 937 } |
905 } else if (current() != kEndMarker) { | 938 } else if (current() != kEndMarker) { |
906 property_name_list.Add(static_cast<char>(current())); | 939 // Parse \pN, where N is a single-character property name value. |
| 940 first_part.Add(static_cast<char>(current())); |
907 } else { | 941 } else { |
908 return false; | 942 return false; |
909 } | 943 } |
910 Advance(); | 944 Advance(); |
911 property_name_list.Add(0); // null-terminate string. | 945 first_part.Add(0); // null-terminate string. |
912 | 946 |
913 const char* property_name = property_name_list.ToConstVector().start(); | 947 if (second_part.is_empty()) { |
| 948 // First attempt to interpret as general category property value name. |
| 949 const char* name = first_part.ToConstVector().start(); |
| 950 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, |
| 951 zone())) { |
| 952 return true; |
| 953 } |
| 954 // Then attempt to interpret as binary property name with value name 'Y'. |
| 955 UProperty property = u_getPropertyEnum(name); |
| 956 if (property < UCHAR_BINARY_START) return false; |
| 957 if (property >= UCHAR_BINARY_LIMIT) return false; |
| 958 if (!IsExactPropertyAlias(name, property)) return false; |
| 959 return LookupPropertyValueName(property, "Y", result, zone()); |
| 960 } else { |
| 961 // Both property name and value name are specified. Attempt to interpret |
| 962 // the property name as enumerated property. |
| 963 const char* property_name = first_part.ToConstVector().start(); |
| 964 const char* value_name = second_part.ToConstVector().start(); |
| 965 UProperty property = u_getPropertyEnum(property_name); |
| 966 if (property < UCHAR_INT_START) return false; |
| 967 if (property >= UCHAR_INT_LIMIT) return false; |
| 968 if (!IsExactPropertyAlias(property_name, property)) return false; |
| 969 return LookupPropertyValueName(property, value_name, result, zone()); |
| 970 } |
| 971 } |
914 | 972 |
915 #define PROPERTY_NAME_LOOKUP(PROPERTY) \ | 973 #else // V8_I18N_SUPPORT |
916 do { \ | |
917 if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \ | |
918 return true; \ | |
919 } \ | |
920 } while (false) | |
921 | 974 |
922 // General_Category (gc) found in PropertyValueAliases.txt | 975 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
923 PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); | |
924 // Script (sc) found in Scripts.txt | |
925 PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); | |
926 // To disambiguate from script names, block names have an "In"-prefix. | |
927 if (property_name_list.length() > 3 && property_name[0] == 'I' && | |
928 property_name[1] == 'n') { | |
929 // Block (blk) found in Blocks.txt | |
930 property_name += 2; | |
931 PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); | |
932 } | |
933 #undef PROPERTY_NAME_LOOKUP | |
934 #endif // V8_I18N_SUPPORT | |
935 return false; | 976 return false; |
936 } | 977 } |
937 | 978 |
| 979 #endif // V8_I18N_SUPPORT |
| 980 |
938 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 981 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
939 uc32 x = 0; | 982 uc32 x = 0; |
940 int d = HexValue(current()); | 983 int d = HexValue(current()); |
941 if (d < 0) { | 984 if (d < 0) { |
942 return false; | 985 return false; |
943 } | 986 } |
944 while (d >= 0) { | 987 while (d >= 0) { |
945 x = x * 16 + d; | 988 x = x * 16 + d; |
946 if (x > max_value) { | 989 if (x > max_value) { |
947 return false; | 990 return false; |
(...skipping 574 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1522 return false; | 1565 return false; |
1523 } | 1566 } |
1524 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1567 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1525 zone()); | 1568 zone()); |
1526 LAST(ADD_TERM); | 1569 LAST(ADD_TERM); |
1527 return true; | 1570 return true; |
1528 } | 1571 } |
1529 | 1572 |
1530 } // namespace internal | 1573 } // namespace internal |
1531 } // namespace v8 | 1574 } // namespace v8 |
OLD | NEW |