Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(116)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1780183002: [regexp] extend property classes by unicode blocks. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: . Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after
829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), 829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value),
830 static_cast<uc16>(trail)); 830 static_cast<uc16>(trail));
831 return true; 831 return true;
832 } 832 }
833 } 833 }
834 Reset(start); 834 Reset(start);
835 } 835 }
836 return result; 836 return result;
837 } 837 }
838 838
839 ZoneList<CharacterRange>* LookupPropertyClass(UProperty property,
840 const char* property_name,
841 Zone* zone) {
842 int32_t property_value = u_getPropertyValueEnum(property, property_name);
843 if (property_value == UCHAR_INVALID_CODE) return nullptr;
844
845 USet* set = uset_openEmpty();
846 UErrorCode ec = U_ZERO_ERROR;
847 uset_applyIntPropertyValue(set, property, property_value, &ec);
848 ZoneList<CharacterRange>* ranges = nullptr;
849 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
850 uset_removeAllStrings(set);
851 int item_count = uset_getItemCount(set);
852 ranges = new (zone) ZoneList<CharacterRange>(item_count, zone);
853 int item_result = 0;
854 for (int i = 0; i < item_count; i++) {
855 uc32 start = 0;
856 uc32 end = 0;
857 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
858 ranges->Add(CharacterRange::Range(start, end), zone);
859 }
860 DCHECK_EQ(U_ZERO_ERROR, ec);
861 DCHECK_EQ(0, item_result);
862 }
863 uset_close(set);
864 return ranges;
865 }
866
839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { 867 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() {
840 #ifdef V8_I18N_SUPPORT 868 #ifdef V8_I18N_SUPPORT
841 ZoneList<char> property_name(0, zone()); 869 List<char> property_name_list(0);
842 if (current() == '{') { 870 if (current() == '{') {
843 for (Advance(); current() != '}'; Advance()) { 871 for (Advance(); current() != '}'; Advance()) {
844 if (!has_next()) return nullptr; 872 if (!has_next()) return nullptr;
845 property_name.Add(static_cast<char>(current()), zone()); 873 property_name_list.Add(static_cast<char>(current()));
846 } 874 }
847 } else if (current() != kEndMarker) { 875 } else if (current() != kEndMarker) {
848 property_name.Add(static_cast<char>(current()), zone()); 876 property_name_list.Add(static_cast<char>(current()));
849 } else { 877 } else {
850 return nullptr; 878 return nullptr;
851 } 879 }
852 Advance(); 880 Advance();
853 property_name.Add(0, zone()); // null-terminate string. 881 property_name_list.Add(0); // null-terminate string.
882
883 const char* property_name = property_name_list.ToConstVector().start();
884
885 ZoneList<CharacterRange>* ranges = nullptr;
886
887 #define PROPERTY_NAME_LOOKUP(PROPERTY) \
888 do { \
889 ranges = LookupPropertyClass(PROPERTY, property_name, zone()); \
890 if (ranges != nullptr) return ranges; \
891 } while (false)
854 892
855 // Property names are defined in unicode database files. For aliases of 893 // Property names are defined in unicode database files. For aliases of
856 // these property names, see PropertyValueAliases.txt. 894 // these property names, see PropertyValueAliases.txt.
857 UProperty kPropertyClasses[] = { 895 // General_Category (gc) found in PropertyValueAliases.txt
858 // General_Category (gc) found in PropertyValueAliases.txt 896 PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
859 UCHAR_GENERAL_CATEGORY_MASK, 897 // Script (sc) found in Scripts.txt
860 // Script (sc) found in Scripts.txt 898 PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
861 UCHAR_SCRIPT, 899 // Block (blk) found in Blocks.txt
862 }; 900 PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
863 901 // For disambiguation, script names may have an "Is"-prefix and block names
864 for (int i = 0; i < arraysize(kPropertyClasses); i++) { 902 // may have an "In"-prefix. This convention is up for debate and for the spec
865 UProperty property_class = kPropertyClasses[i]; 903 // to settle.
Dan Ehrenberg 2016/03/10 18:43:20 I don't like this mechanism, of letting them overl
866 int32_t category = u_getPropertyValueEnum( 904 if (property_name_list.length() > 3 && property_name[0] == 'I') {
867 property_class, property_name.ToConstVector().start()); 905 char second_char = property_name[1];
868 if (category == UCHAR_INVALID_CODE) continue; 906 property_name += 2;
869 907 if (second_char == 's') PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
870 USet* set = uset_openEmpty(); 908 if (second_char == 'n') PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
871 UErrorCode ec = U_ZERO_ERROR;
872 uset_applyIntPropertyValue(set, property_class, category, &ec);
873 ZoneList<CharacterRange>* ranges = nullptr;
874 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
875 uset_removeAllStrings(set);
876 int item_count = uset_getItemCount(set);
877 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone());
878 int item_result = 0;
879 for (int i = 0; i < item_count; i++) {
880 uc32 start = 0;
881 uc32 end = 0;
882 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
883 ranges->Add(CharacterRange::Range(start, end), zone());
884 }
885 DCHECK_EQ(U_ZERO_ERROR, ec);
886 DCHECK_EQ(0, item_result);
887 }
888 uset_close(set);
889 return ranges;
890 } 909 }
910 #undef PROPERTY_NAME_LOOKUP
891 #endif // V8_I18N_SUPPORT 911 #endif // V8_I18N_SUPPORT
892
893 return nullptr; 912 return nullptr;
894 } 913 }
895 914
896 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { 915 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
897 uc32 x = 0; 916 uc32 x = 0;
898 int d = HexValue(current()); 917 int d = HexValue(current());
899 if (d < 0) { 918 if (d < 0) {
900 return false; 919 return false;
901 } 920 }
902 while (d >= 0) { 921 while (d >= 0) {
(...skipping 547 matching lines...) Expand 10 before | Expand all | Expand 10 after
1450 return false; 1469 return false;
1451 } 1470 }
1452 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1471 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1453 zone()); 1472 zone());
1454 LAST(ADD_TERM); 1473 LAST(ADD_TERM);
1455 return true; 1474 return true;
1456 } 1475 }
1457 1476
1458 } // namespace internal 1477 } // namespace internal
1459 } // namespace v8 1478 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698