Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(455)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2065083002: Revert of [regexp] implement \p{Any}, \p{Ascii}, and \p{Assigned}. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/regexp-property-general-category.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after
355 break; 355 break;
356 } 356 }
357 case 'p': 357 case 'p':
358 case 'P': { 358 case 'P': {
359 uc32 p = Next(); 359 uc32 p = Next();
360 Advance(2); 360 Advance(2);
361 if (unicode()) { 361 if (unicode()) {
362 if (FLAG_harmony_regexp_property) { 362 if (FLAG_harmony_regexp_property) {
363 ZoneList<CharacterRange>* ranges = 363 ZoneList<CharacterRange>* ranges =
364 new (zone()) ZoneList<CharacterRange>(2, zone()); 364 new (zone()) ZoneList<CharacterRange>(2, zone());
365 if (!ParsePropertyClass(ranges, p == 'P')) { 365 if (!ParsePropertyClass(ranges)) {
366 return ReportError(CStrVector("Invalid property name")); 366 return ReportError(CStrVector("Invalid property name"));
367 } 367 }
368 RegExpCharacterClass* cc = 368 RegExpCharacterClass* cc =
369 new (zone()) RegExpCharacterClass(ranges, false); 369 new (zone()) RegExpCharacterClass(ranges, p == 'P');
370 builder->AddCharacterClass(cc); 370 builder->AddCharacterClass(cc);
371 } else { 371 } else {
372 // With /u, no identity escapes except for syntax characters 372 // With /u, no identity escapes except for syntax characters
373 // are allowed. Otherwise, all identity escapes are allowed. 373 // are allowed. Otherwise, all identity escapes are allowed.
374 return ReportError(CStrVector("Invalid escape")); 374 return ReportError(CStrVector("Invalid escape"));
375 } 375 }
376 } else { 376 } else {
377 builder->AddCharacter(p); 377 builder->AddCharacter(p);
378 } 378 }
379 break; 379 break;
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after
838 static_cast<uc16>(trail)); 838 static_cast<uc16>(trail));
839 return true; 839 return true;
840 } 840 }
841 } 841 }
842 Reset(start); 842 Reset(start);
843 } 843 }
844 return result; 844 return result;
845 } 845 }
846 846
847 #ifdef V8_I18N_SUPPORT 847 #ifdef V8_I18N_SUPPORT
848
849 namespace {
850
851 bool IsExactPropertyAlias(const char* property_name, UProperty property) { 848 bool IsExactPropertyAlias(const char* property_name, UProperty property) {
852 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); 849 const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
853 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; 850 if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
854 for (int i = 0;; i++) { 851 for (int i = 0;; i++) {
855 const char* long_name = u_getPropertyName( 852 const char* long_name = u_getPropertyName(
856 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 853 property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
857 if (long_name == NULL) break; 854 if (long_name == NULL) break;
858 if (strcmp(property_name, long_name) == 0) return true; 855 if (strcmp(property_name, long_name) == 0) return true;
859 } 856 }
860 return false; 857 return false;
(...skipping 10 matching lines...) Expand all
871 const char* long_name = u_getPropertyValueName( 868 const char* long_name = u_getPropertyValueName(
872 property, property_value, 869 property, property_value,
873 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); 870 static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
874 if (long_name == NULL) break; 871 if (long_name == NULL) break;
875 if (strcmp(property_value_name, long_name) == 0) return true; 872 if (strcmp(property_value_name, long_name) == 0) return true;
876 } 873 }
877 return false; 874 return false;
878 } 875 }
879 876
880 bool LookupPropertyValueName(UProperty property, 877 bool LookupPropertyValueName(UProperty property,
881 const char* property_value_name, bool negate, 878 const char* property_value_name,
882 ZoneList<CharacterRange>* result, Zone* zone) { 879 ZoneList<CharacterRange>* result, Zone* zone) {
883 int32_t property_value = 880 int32_t property_value =
884 u_getPropertyValueEnum(property, property_value_name); 881 u_getPropertyValueEnum(property, property_value_name);
885 if (property_value == UCHAR_INVALID_CODE) return false; 882 if (property_value == UCHAR_INVALID_CODE) return false;
886 883
887 // We require the property name to match exactly to one of the property value 884 // We require the property name to match exactly to one of the property value
888 // aliases. However, u_getPropertyValueEnum uses loose matching. 885 // aliases. However, u_getPropertyValueEnum uses loose matching.
889 if (!IsExactPropertyValueAlias(property_value_name, property, 886 if (!IsExactPropertyValueAlias(property_value_name, property,
890 property_value)) { 887 property_value)) {
891 return false; 888 return false;
892 } 889 }
893 890
894 USet* set = uset_openEmpty(); 891 USet* set = uset_openEmpty();
895 UErrorCode ec = U_ZERO_ERROR; 892 UErrorCode ec = U_ZERO_ERROR;
896 uset_applyIntPropertyValue(set, property, property_value, &ec); 893 uset_applyIntPropertyValue(set, property, property_value, &ec);
897 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); 894 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set);
898 895
899 if (success) { 896 if (success) {
900 uset_removeAllStrings(set); 897 uset_removeAllStrings(set);
901 if (negate) uset_complement(set);
902 int item_count = uset_getItemCount(set); 898 int item_count = uset_getItemCount(set);
903 int item_result = 0; 899 int item_result = 0;
904 for (int i = 0; i < item_count; i++) { 900 for (int i = 0; i < item_count; i++) {
905 uc32 start = 0; 901 uc32 start = 0;
906 uc32 end = 0; 902 uc32 end = 0;
907 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); 903 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
908 result->Add(CharacterRange::Range(start, end), zone); 904 result->Add(CharacterRange::Range(start, end), zone);
909 } 905 }
910 DCHECK_EQ(U_ZERO_ERROR, ec); 906 DCHECK_EQ(U_ZERO_ERROR, ec);
911 DCHECK_EQ(0, item_result); 907 DCHECK_EQ(0, item_result);
912 } 908 }
913 uset_close(set); 909 uset_close(set);
914 return success; 910 return success;
915 } 911 }
916 912
917 template <size_t N> 913 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
918 inline bool NameEquals(const char* name, const char (&literal)[N]) {
919 return strncmp(name, literal, N + 1) == 0;
920 }
921
922 bool LookupSpecialPropertyValueName(const char* name,
923 ZoneList<CharacterRange>* result,
924 bool negate, Zone* zone) {
925 if (NameEquals(name, "Any")) {
926 if (!negate) result->Add(CharacterRange::Everything(), zone);
927 } else if (NameEquals(name, "ASCII")) {
928 result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
929 : CharacterRange::Range(0x0, 0x7f),
930 zone);
931 } else if (NameEquals(name, "Assigned")) {
932 return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
933 !negate, result, zone);
934 } else {
935 return false;
936 }
937 return true;
938 }
939
940 } // anonymous namespace
941
942 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
943 bool negate) {
944 // Parse the property class as follows: 914 // Parse the property class as follows:
945 // - In \p{name}, 'name' is interpreted 915 // - In \p{name}, 'name' is interpreted
946 // - either as a general category property value name. 916 // - either as a general category property value name.
947 // - or as a binary property name. 917 // - or as a binary property name.
948 // - In \p{name=value}, 'name' is interpreted as an enumerated property name, 918 // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
949 // and 'value' is interpreted as one of the available property value names. 919 // and 'value' is interpreted as one of the available property value names.
950 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. 920 // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
951 // - Loose matching is not applied. 921 // - Loose matching is not applied.
952 List<char> first_part; 922 List<char> first_part;
953 List<char> second_part; 923 List<char> second_part;
(...skipping 12 matching lines...) Expand all
966 } 936 }
967 } else { 937 } else {
968 return false; 938 return false;
969 } 939 }
970 Advance(); 940 Advance();
971 first_part.Add(0); // null-terminate string. 941 first_part.Add(0); // null-terminate string.
972 942
973 if (second_part.is_empty()) { 943 if (second_part.is_empty()) {
974 // First attempt to interpret as general category property value name. 944 // First attempt to interpret as general category property value name.
975 const char* name = first_part.ToConstVector().start(); 945 const char* name = first_part.ToConstVector().start();
976 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, 946 if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result,
977 result, zone())) { 947 zone())) {
978 return true;
979 }
980 // Interpret "Any", "ASCII", and "Assigned".
981 if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
982 return true; 948 return true;
983 } 949 }
984 // Then attempt to interpret as binary property name with value name 'Y'. 950 // Then attempt to interpret as binary property name with value name 'Y'.
985 UProperty property = u_getPropertyEnum(name); 951 UProperty property = u_getPropertyEnum(name);
986 if (property < UCHAR_BINARY_START) return false; 952 if (property < UCHAR_BINARY_START) return false;
987 if (property >= UCHAR_BINARY_LIMIT) return false; 953 if (property >= UCHAR_BINARY_LIMIT) return false;
988 if (!IsExactPropertyAlias(name, property)) return false; 954 if (!IsExactPropertyAlias(name, property)) return false;
989 return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, 955 return LookupPropertyValueName(property, "Y", result, zone());
990 zone());
991 } else { 956 } else {
992 // Both property name and value name are specified. Attempt to interpret 957 // Both property name and value name are specified. Attempt to interpret
993 // the property name as enumerated property. 958 // the property name as enumerated property.
994 const char* property_name = first_part.ToConstVector().start(); 959 const char* property_name = first_part.ToConstVector().start();
995 const char* value_name = second_part.ToConstVector().start(); 960 const char* value_name = second_part.ToConstVector().start();
996 UProperty property = u_getPropertyEnum(property_name); 961 UProperty property = u_getPropertyEnum(property_name);
997 if (property < UCHAR_INT_START) return false; 962 if (property < UCHAR_INT_START) return false;
998 if (property >= UCHAR_INT_LIMIT) return false; 963 if (property >= UCHAR_INT_LIMIT) return false;
999 if (!IsExactPropertyAlias(property_name, property)) return false; 964 if (!IsExactPropertyAlias(property_name, property)) return false;
1000 return LookupPropertyValueName(property, value_name, negate, result, 965 return LookupPropertyValueName(property, value_name, result, zone());
1001 zone());
1002 } 966 }
1003 } 967 }
1004 968
1005 #else // V8_I18N_SUPPORT 969 #else // V8_I18N_SUPPORT
1006 970
1007 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { 971 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
1008 return false; 972 return false;
1009 } 973 }
1010 974
1011 #endif // V8_I18N_SUPPORT 975 #endif // V8_I18N_SUPPORT
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
1188 } 1152 }
1189 1153
1190 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { 1154 bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
1191 if (!FLAG_harmony_regexp_property) return false; 1155 if (!FLAG_harmony_regexp_property) return false;
1192 if (!unicode()) return false; 1156 if (!unicode()) return false;
1193 if (current() != '\\') return false; 1157 if (current() != '\\') return false;
1194 uc32 next = Next(); 1158 uc32 next = Next();
1195 bool parse_success = false; 1159 bool parse_success = false;
1196 if (next == 'p') { 1160 if (next == 'p') {
1197 Advance(2); 1161 Advance(2);
1198 parse_success = ParsePropertyClass(ranges, false); 1162 parse_success = ParsePropertyClass(ranges);
1199 } else if (next == 'P') { 1163 } else if (next == 'P') {
1200 Advance(2); 1164 Advance(2);
1201 parse_success = ParsePropertyClass(ranges, true); 1165 ZoneList<CharacterRange>* property_class =
1166 new (zone()) ZoneList<CharacterRange>(2, zone());
1167 parse_success = ParsePropertyClass(property_class);
1168 if (parse_success) {
1169 ZoneList<CharacterRange>* negated =
1170 new (zone()) ZoneList<CharacterRange>(2, zone());
1171 CharacterRange::Negate(property_class, negated, zone());
1172 const Vector<CharacterRange> negated_vector = negated->ToVector();
1173 ranges->AddAll(negated_vector, zone());
1174 }
1202 } else { 1175 } else {
1203 return false; 1176 return false;
1204 } 1177 }
1205 if (!parse_success) 1178 if (!parse_success)
1206 ReportError(CStrVector("Invalid property name in character class")); 1179 ReportError(CStrVector("Invalid property name in character class"));
1207 return parse_success; 1180 return parse_success;
1208 } 1181 }
1209 1182
1210 RegExpTree* RegExpParser::ParseCharacterClass() { 1183 RegExpTree* RegExpParser::ParseCharacterClass() {
1211 static const char* kUnterminated = "Unterminated character class"; 1184 static const char* kUnterminated = "Unterminated character class";
(...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after
1584 return false; 1557 return false;
1585 } 1558 }
1586 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1559 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1587 zone()); 1560 zone());
1588 LAST(ADD_TERM); 1561 LAST(ADD_TERM);
1589 return true; 1562 return true;
1590 } 1563 }
1591 1564
1592 } // namespace internal 1565 } // namespace internal
1593 } // namespace v8 1566 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/regexp-parser.h ('k') | test/mjsunit/harmony/regexp-property-general-category.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698