OLD | NEW |
1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
6 | 6 |
7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
8 #include "src/factory.h" | 8 #include "src/factory.h" |
9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
(...skipping 825 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), | 836 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), |
837 static_cast<uc16>(trail)); | 837 static_cast<uc16>(trail)); |
838 return true; | 838 return true; |
839 } | 839 } |
840 } | 840 } |
841 Reset(start); | 841 Reset(start); |
842 } | 842 } |
843 return result; | 843 return result; |
844 } | 844 } |
845 | 845 |
| 846 bool LookupPropertyClass(UProperty property, const char* property_name, |
| 847 ZoneList<CharacterRange>* result, Zone* zone) { |
| 848 int32_t property_value = u_getPropertyValueEnum(property, property_name); |
| 849 if (property_value == UCHAR_INVALID_CODE) return false; |
| 850 |
| 851 USet* set = uset_openEmpty(); |
| 852 UErrorCode ec = U_ZERO_ERROR; |
| 853 uset_applyIntPropertyValue(set, property, property_value, &ec); |
| 854 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); |
| 855 |
| 856 if (success) { |
| 857 uset_removeAllStrings(set); |
| 858 int item_count = uset_getItemCount(set); |
| 859 int item_result = 0; |
| 860 for (int i = 0; i < item_count; i++) { |
| 861 uc32 start = 0; |
| 862 uc32 end = 0; |
| 863 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
| 864 result->Add(CharacterRange::Range(start, end), zone); |
| 865 } |
| 866 DCHECK_EQ(U_ZERO_ERROR, ec); |
| 867 DCHECK_EQ(0, item_result); |
| 868 } |
| 869 uset_close(set); |
| 870 return success; |
| 871 } |
| 872 |
846 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { | 873 bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
847 #ifdef V8_I18N_SUPPORT | 874 #ifdef V8_I18N_SUPPORT |
848 ZoneList<char> property_name(0, zone()); | 875 List<char> property_name_list; |
849 if (current() == '{') { | 876 if (current() == '{') { |
850 for (Advance(); current() != '}'; Advance()) { | 877 for (Advance(); current() != '}'; Advance()) { |
851 if (!has_next()) return false; | 878 if (!has_next()) return false; |
852 property_name.Add(static_cast<char>(current()), zone()); | 879 property_name_list.Add(static_cast<char>(current())); |
853 } | 880 } |
854 } else if (current() != kEndMarker) { | 881 } else if (current() != kEndMarker) { |
855 property_name.Add(static_cast<char>(current()), zone()); | 882 property_name_list.Add(static_cast<char>(current())); |
856 } else { | 883 } else { |
857 return false; | 884 return false; |
858 } | 885 } |
859 Advance(); | 886 Advance(); |
860 property_name.Add(0, zone()); // null-terminate string. | 887 property_name_list.Add(0); // null-terminate string. |
861 | 888 |
862 // Property names are defined in unicode database files. For aliases of | 889 const char* property_name = property_name_list.ToConstVector().start(); |
863 // these property names, see PropertyValueAliases.txt. | |
864 UProperty kPropertyClasses[] = { | |
865 // General_Category (gc) found in PropertyValueAliases.txt | |
866 UCHAR_GENERAL_CATEGORY_MASK, | |
867 // Script (sc) found in Scripts.txt | |
868 UCHAR_SCRIPT, | |
869 }; | |
870 | 890 |
871 for (int i = 0; i < arraysize(kPropertyClasses); i++) { | 891 #define PROPERTY_NAME_LOOKUP(PROPERTY) \ |
872 UProperty property_class = kPropertyClasses[i]; | 892 do { \ |
873 int32_t category = u_getPropertyValueEnum( | 893 if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \ |
874 property_class, property_name.ToConstVector().start()); | 894 return true; \ |
875 if (category == UCHAR_INVALID_CODE) continue; | 895 } \ |
| 896 } while (false) |
876 | 897 |
877 USet* set = uset_openEmpty(); | 898 // General_Category (gc) found in PropertyValueAliases.txt |
878 UErrorCode ec = U_ZERO_ERROR; | 899 PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); |
879 uset_applyIntPropertyValue(set, property_class, category, &ec); | 900 // Script (sc) found in Scripts.txt |
880 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { | 901 PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
881 uset_removeAllStrings(set); | 902 // To disambiguate from script names, block names have an "In"-prefix. |
882 int item_count = uset_getItemCount(set); | 903 if (property_name_list.length() > 3 && property_name[0] == 'I' && |
883 int item_result = 0; | 904 property_name[1] == 'n') { |
884 for (int i = 0; i < item_count; i++) { | 905 // Block (blk) found in Blocks.txt |
885 uc32 start = 0; | 906 property_name += 2; |
886 uc32 end = 0; | 907 PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
887 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | |
888 result->Add(CharacterRange::Range(start, end), zone()); | |
889 } | |
890 DCHECK_EQ(U_ZERO_ERROR, ec); | |
891 DCHECK_EQ(0, item_result); | |
892 } | |
893 uset_close(set); | |
894 return true; | |
895 } | 908 } |
| 909 #undef PROPERTY_NAME_LOOKUP |
896 #endif // V8_I18N_SUPPORT | 910 #endif // V8_I18N_SUPPORT |
897 | |
898 return false; | 911 return false; |
899 } | 912 } |
900 | 913 |
901 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 914 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
902 uc32 x = 0; | 915 uc32 x = 0; |
903 int d = HexValue(current()); | 916 int d = HexValue(current()); |
904 if (d < 0) { | 917 if (d < 0) { |
905 return false; | 918 return false; |
906 } | 919 } |
907 while (d >= 0) { | 920 while (d >= 0) { |
(...skipping 577 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1485 return false; | 1498 return false; |
1486 } | 1499 } |
1487 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1500 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
1488 zone()); | 1501 zone()); |
1489 LAST(ADD_TERM); | 1502 LAST(ADD_TERM); |
1490 return true; | 1503 return true; |
1491 } | 1504 } |
1492 | 1505 |
1493 } // namespace internal | 1506 } // namespace internal |
1494 } // namespace v8 | 1507 } // namespace v8 |
OLD | NEW |