Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 the V8 project authors. All rights reserved. | 1 // Copyright 2016 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/regexp/regexp-parser.h" | 5 #include "src/regexp/regexp-parser.h" |
| 6 | 6 |
| 7 #include "src/char-predicates-inl.h" | 7 #include "src/char-predicates-inl.h" |
| 8 #include "src/factory.h" | 8 #include "src/factory.h" |
| 9 #include "src/isolate.h" | 9 #include "src/isolate.h" |
| 10 #include "src/objects-inl.h" | 10 #include "src/objects-inl.h" |
| (...skipping 818 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), | 829 *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value), |
| 830 static_cast<uc16>(trail)); | 830 static_cast<uc16>(trail)); |
| 831 return true; | 831 return true; |
| 832 } | 832 } |
| 833 } | 833 } |
| 834 Reset(start); | 834 Reset(start); |
| 835 } | 835 } |
| 836 return result; | 836 return result; |
| 837 } | 837 } |
| 838 | 838 |
| 839 ZoneList<CharacterRange>* LookupPropertyClass(UProperty property, | |
| 840 const char* property_name, | |
| 841 Zone* zone) { | |
| 842 int32_t property_value = u_getPropertyValueEnum(property, property_name); | |
| 843 if (property_value == UCHAR_INVALID_CODE) return nullptr; | |
| 844 | |
| 845 USet* set = uset_openEmpty(); | |
| 846 UErrorCode ec = U_ZERO_ERROR; | |
| 847 uset_applyIntPropertyValue(set, property, property_value, &ec); | |
| 848 ZoneList<CharacterRange>* ranges = nullptr; | |
| 849 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { | |
| 850 uset_removeAllStrings(set); | |
| 851 int item_count = uset_getItemCount(set); | |
| 852 ranges = new (zone) ZoneList<CharacterRange>(item_count, zone); | |
| 853 int item_result = 0; | |
| 854 for (int i = 0; i < item_count; i++) { | |
| 855 uc32 start = 0; | |
| 856 uc32 end = 0; | |
| 857 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | |
| 858 ranges->Add(CharacterRange::Range(start, end), zone); | |
| 859 } | |
| 860 DCHECK_EQ(U_ZERO_ERROR, ec); | |
| 861 DCHECK_EQ(0, item_result); | |
| 862 } | |
| 863 uset_close(set); | |
| 864 return ranges; | |
| 865 } | |
| 866 | |
| 839 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { | 867 ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { |
| 840 #ifdef V8_I18N_SUPPORT | 868 #ifdef V8_I18N_SUPPORT |
| 841 ZoneList<char> property_name(0, zone()); | 869 List<char> property_name_list(0); |
| 842 if (current() == '{') { | 870 if (current() == '{') { |
| 843 for (Advance(); current() != '}'; Advance()) { | 871 for (Advance(); current() != '}'; Advance()) { |
| 844 if (!has_next()) return nullptr; | 872 if (!has_next()) return nullptr; |
| 845 property_name.Add(static_cast<char>(current()), zone()); | 873 property_name_list.Add(static_cast<char>(current())); |
| 846 } | 874 } |
| 847 } else if (current() != kEndMarker) { | 875 } else if (current() != kEndMarker) { |
| 848 property_name.Add(static_cast<char>(current()), zone()); | 876 property_name_list.Add(static_cast<char>(current())); |
| 849 } else { | 877 } else { |
| 850 return nullptr; | 878 return nullptr; |
| 851 } | 879 } |
| 852 Advance(); | 880 Advance(); |
| 853 property_name.Add(0, zone()); // null-terminate string. | 881 property_name_list.Add(0); // null-terminate string. |
| 882 | |
| 883 const char* property_name = property_name_list.ToConstVector().start(); | |
| 884 | |
| 885 ZoneList<CharacterRange>* ranges = nullptr; | |
| 886 | |
| 887 #define PROPERTY_NAME_LOOKUP(PROPERTY) \ | |
| 888 do { \ | |
| 889 ranges = LookupPropertyClass(PROPERTY, property_name, zone()); \ | |
| 890 if (ranges != nullptr) return ranges; \ | |
| 891 } while (false) | |
| 854 | 892 |
| 855 // Property names are defined in unicode database files. For aliases of | 893 // Property names are defined in unicode database files. For aliases of |
| 856 // these property names, see PropertyValueAliases.txt. | 894 // these property names, see PropertyValueAliases.txt. |
| 857 UProperty kPropertyClasses[] = { | 895 // General_Category (gc) found in PropertyValueAliases.txt |
| 858 // General_Category (gc) found in PropertyValueAliases.txt | 896 PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); |
| 859 UCHAR_GENERAL_CATEGORY_MASK, | 897 // Script (sc) found in Scripts.txt |
| 860 // Script (sc) found in Scripts.txt | 898 PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
| 861 UCHAR_SCRIPT, | 899 // Block (blk) found in Blocks.txt |
| 862 }; | 900 PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
| 863 | 901 // For disambiguation, script names may have an "Is"-prefix and block names |
| 864 for (int i = 0; i < arraysize(kPropertyClasses); i++) { | 902 // may have an "In"-prefix. This convention is up for debate and for the spec |
| 865 UProperty property_class = kPropertyClasses[i]; | 903 // to settle. |
|
Dan Ehrenberg
2016/03/10 18:43:20
I don't like this mechanism, of letting them overl
| |
| 866 int32_t category = u_getPropertyValueEnum( | 904 if (property_name_list.length() > 3 && property_name[0] == 'I') { |
| 867 property_class, property_name.ToConstVector().start()); | 905 char second_char = property_name[1]; |
| 868 if (category == UCHAR_INVALID_CODE) continue; | 906 property_name += 2; |
| 869 | 907 if (second_char == 's') PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
| 870 USet* set = uset_openEmpty(); | 908 if (second_char == 'n') PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
| 871 UErrorCode ec = U_ZERO_ERROR; | |
| 872 uset_applyIntPropertyValue(set, property_class, category, &ec); | |
| 873 ZoneList<CharacterRange>* ranges = nullptr; | |
| 874 if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { | |
| 875 uset_removeAllStrings(set); | |
| 876 int item_count = uset_getItemCount(set); | |
| 877 ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); | |
| 878 int item_result = 0; | |
| 879 for (int i = 0; i < item_count; i++) { | |
| 880 uc32 start = 0; | |
| 881 uc32 end = 0; | |
| 882 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); | |
| 883 ranges->Add(CharacterRange::Range(start, end), zone()); | |
| 884 } | |
| 885 DCHECK_EQ(U_ZERO_ERROR, ec); | |
| 886 DCHECK_EQ(0, item_result); | |
| 887 } | |
| 888 uset_close(set); | |
| 889 return ranges; | |
| 890 } | 909 } |
| 910 #undef PROPERTY_NAME_LOOKUP | |
| 891 #endif // V8_I18N_SUPPORT | 911 #endif // V8_I18N_SUPPORT |
| 892 | |
| 893 return nullptr; | 912 return nullptr; |
| 894 } | 913 } |
| 895 | 914 |
| 896 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { | 915 bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
| 897 uc32 x = 0; | 916 uc32 x = 0; |
| 898 int d = HexValue(current()); | 917 int d = HexValue(current()); |
| 899 if (d < 0) { | 918 if (d < 0) { |
| 900 return false; | 919 return false; |
| 901 } | 920 } |
| 902 while (d >= 0) { | 921 while (d >= 0) { |
| (...skipping 547 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1450 return false; | 1469 return false; |
| 1451 } | 1470 } |
| 1452 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), | 1471 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), |
| 1453 zone()); | 1472 zone()); |
| 1454 LAST(ADD_TERM); | 1473 LAST(ADD_TERM); |
| 1455 return true; | 1474 return true; |
| 1456 } | 1475 } |
| 1457 | 1476 |
| 1458 } // namespace internal | 1477 } // namespace internal |
| 1459 } // namespace v8 | 1478 } // namespace v8 |
| OLD | NEW |