Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(911)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 1774513002: [regexp] extend property classes by script category. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: adapt expectations Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/char-predicates-inl.h ('k') | test/mjsunit/harmony/regexp-property-general-category.js » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
(...skipping 820 matching lines...) Expand 10 before | Expand all | Expand 10 after
831 return true; 831 return true;
832 } 832 }
833 } 833 }
834 Reset(start); 834 Reset(start);
835 } 835 }
836 return result; 836 return result;
837 } 837 }
838 838
839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { 839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() {
840 #ifdef V8_I18N_SUPPORT 840 #ifdef V8_I18N_SUPPORT
841 char property_name[3]; 841 ZoneList<char> property_name(0, zone());
842 memset(property_name, 0, sizeof(property_name));
843 if (current() == '{') { 842 if (current() == '{') {
844 Advance(); 843 for (Advance(); IsAlpha(current()); Advance()) {
845 if (current() < 'A' || current() > 'Z') return nullptr; 844 property_name.Add(static_cast<char>(current()), zone());
846 property_name[0] = static_cast<char>(current());
847 Advance();
848 if (current() >= 'a' && current() <= 'z') {
849 property_name[1] = static_cast<char>(current());
850 Advance();
851 } 845 }
852 if (current() != '}') return nullptr; 846 if (current() != '}') return nullptr;
853 } else if (current() >= 'A' && current() <= 'Z') { 847 } else if (IsAlpha(current())) {
854 property_name[0] = static_cast<char>(current()); 848 property_name.Add(static_cast<char>(current()), zone());
855 } else { 849 } else {
856 return nullptr; 850 return nullptr;
857 } 851 }
858 Advance(); 852 Advance();
853 property_name.Add(0, zone()); // null-terminate string.
859 854
860 int32_t category = 855 // Property names are defined in unicode database files. For aliases of
861 u_getPropertyValueEnum(UCHAR_GENERAL_CATEGORY_MASK, property_name); 856 // these property names, see PropertyValueAliases.txt.
862 if (category == UCHAR_INVALID_CODE) return nullptr; 857 UProperty kPropertyClasses[] = {
858 // General_Category (gc) found in PropertyValueAliases.txt
859 UCHAR_GENERAL_CATEGORY_MASK,
860 // Script (sc) found in Scripts.txt
861 UCHAR_SCRIPT,
862 };
863 863
864 USet* set = uset_openEmpty(); 864 for (int i = 0; i < arraysize(kPropertyClasses); i++) {
865 UErrorCode ec = U_ZERO_ERROR; 865 UProperty property_class = kPropertyClasses[i];
866 uset_applyIntPropertyValue(set, UCHAR_GENERAL_CATEGORY_MASK, category, &ec); 866 int32_t category = u_getPropertyValueEnum(
867 ZoneList<CharacterRange>* ranges = nullptr; 867 property_class, property_name.ToConstVector().start());
868 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { 868 if (category == UCHAR_INVALID_CODE) continue;
869 uset_removeAllStrings(set); 869
870 int item_count = uset_getItemCount(set); 870 USet* set = uset_openEmpty();
871 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); 871 UErrorCode ec = U_ZERO_ERROR;
872 int item_result = 0; 872 uset_applyIntPropertyValue(set, property_class, category, &ec);
873 for (int i = 0; i < item_count; i++) { 873 ZoneList<CharacterRange>* ranges = nullptr;
874 uc32 start = 0; 874 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
875 uc32 end = 0; 875 uset_removeAllStrings(set);
876 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); 876 int item_count = uset_getItemCount(set);
877 ranges->Add(CharacterRange::Range(start, end), zone()); 877 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone());
878 int item_result = 0;
879 for (int i = 0; i < item_count; i++) {
880 uc32 start = 0;
881 uc32 end = 0;
882 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
883 ranges->Add(CharacterRange::Range(start, end), zone());
884 }
885 DCHECK_EQ(U_ZERO_ERROR, ec);
886 DCHECK_EQ(0, item_result);
878 } 887 }
879 DCHECK_EQ(U_ZERO_ERROR, ec); 888 uset_close(set);
880 DCHECK_EQ(0, item_result); 889 return ranges;
881 } 890 }
882 uset_close(set); 891 #endif // V8_I18N_SUPPORT
883 return ranges; 892
884 #else // V8_I18N_SUPPORT
885 return nullptr; 893 return nullptr;
886 #endif // V8_I18N_SUPPORT
887 } 894 }
888 895
889 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { 896 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
890 uc32 x = 0; 897 uc32 x = 0;
891 int d = HexValue(current()); 898 int d = HexValue(current());
892 if (d < 0) { 899 if (d < 0) {
893 return false; 900 return false;
894 } 901 }
895 while (d >= 0) { 902 while (d >= 0) {
896 x = x * 16 + d; 903 x = x * 16 + d;
(...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after
1443 return false; 1450 return false;
1444 } 1451 }
1445 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1452 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1446 zone()); 1453 zone());
1447 LAST(ADD_TERM); 1454 LAST(ADD_TERM);
1448 return true; 1455 return true;
1449 } 1456 }
1450 1457
1451 } // namespace internal 1458 } // namespace internal
1452 } // namespace v8 1459 } // namespace v8
OLDNEW
« no previous file with comments | « src/char-predicates-inl.h ('k') | test/mjsunit/harmony/regexp-property-general-category.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698