Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1780183002: [regexp] extend property classes by unicode blocks. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: rebase Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 825 matching lines...) Expand 10 before | Expand all | Expand 10 after
836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), 836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value),
837 static_cast<uc16>(trail)); 837 static_cast<uc16>(trail));
838 return true; 838 return true;
839 } 839 }
840 } 840 }
841 Reset(start); 841 Reset(start);
842 } 842 }
843 return result; 843 return result;
844 } 844 }
845 845
846 bool LookupPropertyClass(UProperty property, const char* property_name,
847 ZoneList<CharacterRange>* result, Zone* zone) {
848 int32_t property_value = u_getPropertyValueEnum(property, property_name);
849 if (property_value == UCHAR_INVALID_CODE) return false;
850
851 USet* set = uset_openEmpty();
852 UErrorCode ec = U_ZERO_ERROR;
853 uset_applyIntPropertyValue(set, property, property_value, &ec);
854 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set);
855
856 if (success) {
857 uset_removeAllStrings(set);
858 int item_count = uset_getItemCount(set);
859 int item_result = 0;
860 for (int i = 0; i < item_count; i++) {
861 uc32 start = 0;
862 uc32 end = 0;
863 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
864 result->Add(CharacterRange::Range(start, end), zone);
865 }
866 DCHECK_EQ(U_ZERO_ERROR, ec);
867 DCHECK_EQ(0, item_result);
868 }
869 uset_close(set);
870 return success;
871 }
872
846 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { 873 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
847 #ifdef V8_I18N_SUPPORT 874 #ifdef V8_I18N_SUPPORT
848 ZoneList<char> property_name(0, zone()); 875 List<char> property_name_list;
849 if (current() == '{') { 876 if (current() == '{') {
850 for (Advance(); current() != '}'; Advance()) { 877 for (Advance(); current() != '}'; Advance()) {
851 if (!has_next()) return false; 878 if (!has_next()) return false;
852 property_name.Add(static_cast<char>(current()), zone()); 879 property_name_list.Add(static_cast<char>(current()));
853 } 880 }
854 } else if (current() != kEndMarker) { 881 } else if (current() != kEndMarker) {
855 property_name.Add(static_cast<char>(current()), zone()); 882 property_name_list.Add(static_cast<char>(current()));
856 } else { 883 } else {
857 return false; 884 return false;
858 } 885 }
859 Advance(); 886 Advance();
860 property_name.Add(0, zone()); // null-terminate string. 887 property_name_list.Add(0); // null-terminate string.
861 888
862 // Property names are defined in unicode database files. For aliases of 889 const char* property_name = property_name_list.ToConstVector().start();
863 // these property names, see PropertyValueAliases.txt.
864 UProperty kPropertyClasses[] = {
865 // General_Category (gc) found in PropertyValueAliases.txt
866 UCHAR_GENERAL_CATEGORY_MASK,
867 // Script (sc) found in Scripts.txt
868 UCHAR_SCRIPT,
869 };
870 890
871 for (int i = 0; i < arraysize(kPropertyClasses); i++) { 891 #define PROPERTY_NAME_LOOKUP(PROPERTY) \
872 UProperty property_class = kPropertyClasses[i]; 892 do { \
873 int32_t category = u_getPropertyValueEnum( 893 if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \
874 property_class, property_name.ToConstVector().start()); 894 return true; \
875 if (category == UCHAR_INVALID_CODE) continue; 895 } \
896 } while (false)
876 897
877 USet* set = uset_openEmpty(); 898 // General_Category (gc) found in PropertyValueAliases.txt
878 UErrorCode ec = U_ZERO_ERROR; 899 PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
879 uset_applyIntPropertyValue(set, property_class, category, &ec); 900 // Script (sc) found in Scripts.txt
880 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { 901 PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
881 uset_removeAllStrings(set); 902 // To disambiguate from script names, block names have an "In"-prefix.
882 int item_count = uset_getItemCount(set); 903 if (property_name_list.length() > 3 && property_name[0] == 'I' &&
883 int item_result = 0; 904 property_name[1] == 'n') {
884 for (int i = 0; i < item_count; i++) { 905 // Block (blk) found in Blocks.txt
885 uc32 start = 0; 906 property_name += 2;
886 uc32 end = 0; 907 PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
887 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
888 result->Add(CharacterRange::Range(start, end), zone());
889 }
890 DCHECK_EQ(U_ZERO_ERROR, ec);
891 DCHECK_EQ(0, item_result);
892 }
893 uset_close(set);
894 return true;
895 } 908 }
909 #undef PROPERTY_NAME_LOOKUP
896 #endif // V8_I18N_SUPPORT 910 #endif // V8_I18N_SUPPORT
897
898 return false; 911 return false;
899 } 912 }
900 913
901 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { 914 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
902 uc32 x = 0; 915 uc32 x = 0;
903 int d = HexValue(current()); 916 int d = HexValue(current());
904 if (d < 0) { 917 if (d < 0) {
905 return false; 918 return false;
906 } 919 }
907 while (d >= 0) { 920 while (d >= 0) {
(...skipping 577 matching lines...) Expand 10 before | Expand all | Expand 10 after
1485 return false; 1498 return false;
1486 } 1499 }
1487 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1500 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1488 zone()); 1501 zone());
1489 LAST(ADD_TERM); 1502 LAST(ADD_TERM);
1490 return true; 1503 return true;
1491 } 1504 }
1492 1505
1493 } // namespace internal 1506 } // namespace internal
1494 } // namespace v8 1507 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698