Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(123)

Side by Side Diff: src/regexp/regexp-parser.cc

Issue 2514333002: [regexp] use C++ API for unicode set over C API. (Closed)
Patch Set: addressed comment Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/regexp/jsregexp.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 the V8 project authors. All rights reserved. 1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/regexp/regexp-parser.h" 5 #include "src/regexp/regexp-parser.h"
6 6
7 #include "src/char-predicates-inl.h" 7 #include "src/char-predicates-inl.h"
8 #include "src/factory.h" 8 #include "src/factory.h"
9 #include "src/isolate.h" 9 #include "src/isolate.h"
10 #include "src/objects-inl.h" 10 #include "src/objects-inl.h"
11 #include "src/ostreams.h" 11 #include "src/ostreams.h"
12 #include "src/regexp/jsregexp.h" 12 #include "src/regexp/jsregexp.h"
13 #include "src/utils.h" 13 #include "src/utils.h"
14 14
15 #ifdef V8_I18N_SUPPORT 15 #ifdef V8_I18N_SUPPORT
16 #include "unicode/uset.h" 16 #include "unicode/uniset.h"
17 #endif // V8_I18N_SUPPORT 17 #endif // V8_I18N_SUPPORT
18 18
19 namespace v8 { 19 namespace v8 {
20 namespace internal { 20 namespace internal {
21 21
22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error, 22 RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone) 23 JSRegExp::Flags flags, Isolate* isolate, Zone* zone)
24 : isolate_(isolate), 24 : isolate_(isolate),
25 zone_(zone), 25 zone_(zone),
26 error_(error), 26 error_(error),
(...skipping 1066 matching lines...) Expand 10 before | Expand all | Expand 10 after
1093 u_getPropertyValueEnum(property_for_lookup, property_value_name); 1093 u_getPropertyValueEnum(property_for_lookup, property_value_name);
1094 if (property_value == UCHAR_INVALID_CODE) return false; 1094 if (property_value == UCHAR_INVALID_CODE) return false;
1095 1095
1096 // We require the property name to match exactly to one of the property value 1096 // We require the property name to match exactly to one of the property value
1097 // aliases. However, u_getPropertyValueEnum uses loose matching. 1097 // aliases. However, u_getPropertyValueEnum uses loose matching.
1098 if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, 1098 if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,
1099 property_value)) { 1099 property_value)) {
1100 return false; 1100 return false;
1101 } 1101 }
1102 1102
1103 USet* set = uset_openEmpty();
1104 UErrorCode ec = U_ZERO_ERROR; 1103 UErrorCode ec = U_ZERO_ERROR;
1105 uset_applyIntPropertyValue(set, property, property_value, &ec); 1104 icu::UnicodeSet set;
1106 bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set); 1105 set.applyIntPropertyValue(property, property_value, ec);
1106 bool success = ec == U_ZERO_ERROR && !set.isEmpty();
1107 1107
1108 if (success) { 1108 if (success) {
1109 uset_removeAllStrings(set); 1109 set.removeAllStrings();
1110 if (negate) uset_complement(set); 1110 if (negate) set.complement();
1111 int item_count = uset_getItemCount(set); 1111 for (int i = 0; i < set.getRangeCount(); i++) {
1112 int item_result = 0; 1112 result->Add(
1113 for (int i = 0; i < item_count; i++) { 1113 CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
1114 uc32 start = 0; 1114 zone);
1115 uc32 end = 0;
1116 item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
1117 result->Add(CharacterRange::Range(start, end), zone);
1118 } 1115 }
1119 DCHECK_EQ(U_ZERO_ERROR, ec);
1120 DCHECK_EQ(0, item_result);
1121 } 1116 }
1122 uset_close(set);
1123 return success; 1117 return success;
1124 } 1118 }
1125 1119
1126 template <size_t N> 1120 template <size_t N>
1127 inline bool NameEquals(const char* name, const char (&literal)[N]) { 1121 inline bool NameEquals(const char* name, const char (&literal)[N]) {
1128 return strncmp(name, literal, N + 1) == 0; 1122 return strncmp(name, literal, N + 1) == 0;
1129 } 1123 }
1130 1124
1131 bool LookupSpecialPropertyValueName(const char* name, 1125 bool LookupSpecialPropertyValueName(const char* name,
1132 ZoneList<CharacterRange>* result, 1126 ZoneList<CharacterRange>* result,
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after
1725 // Check for lone surrogates. 1719 // Check for lone surrogates.
1726 if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true; 1720 if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
1727 } 1721 }
1728 return false; 1722 return false;
1729 } 1723 }
1730 1724
1731 1725
1732 bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) { 1726 bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
1733 #ifdef V8_I18N_SUPPORT 1727 #ifdef V8_I18N_SUPPORT
1734 if (unicode() && ignore_case()) { 1728 if (unicode() && ignore_case()) {
1735 USet* set = uset_open(c, c); 1729 icu::UnicodeSet set(c, c);
1736 uset_closeOver(set, USET_CASE_INSENSITIVE); 1730 set.closeOver(USET_CASE_INSENSITIVE);
1737 uset_removeAllStrings(set); 1731 set.removeAllStrings();
1738 bool result = uset_size(set) > 1; 1732 return set.size() > 1;
1739 uset_close(set);
1740 return result;
1741 } 1733 }
1742 // In the case where ICU is not included, we act as if the unicode flag is 1734 // In the case where ICU is not included, we act as if the unicode flag is
1743 // not set, and do not desugar. 1735 // not set, and do not desugar.
1744 #endif // V8_I18N_SUPPORT 1736 #endif // V8_I18N_SUPPORT
1745 return false; 1737 return false;
1746 } 1738 }
1747 1739
1748 1740
1749 RegExpTree* RegExpBuilder::ToRegExp() { 1741 RegExpTree* RegExpBuilder::ToRegExp() {
1750 FlushTerms(); 1742 FlushTerms();
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
1799 return false; 1791 return false;
1800 } 1792 }
1801 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom), 1793 terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
1802 zone()); 1794 zone());
1803 LAST(ADD_TERM); 1795 LAST(ADD_TERM);
1804 return true; 1796 return true;
1805 } 1797 }
1806 1798
1807 } // namespace internal 1799 } // namespace internal
1808 } // namespace v8 1800 } // namespace v8
OLDNEW
« no previous file with comments | « src/regexp/jsregexp.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698