Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index 21992e42a64ee337a5448444f30e6f9cb050bf44..cfbdb6b3e416f7f06ef01bb29bb2bec8768b96b8 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -836,60 +836,79 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
return result; |
} |
+ZoneList<CharacterRange>* LookupPropertyClass(UProperty property, |
+ const char* property_name, |
+ Zone* zone) { |
+ int32_t property_value = u_getPropertyValueEnum(property, property_name); |
+ if (property_value == UCHAR_INVALID_CODE) return nullptr; |
+ |
+ USet* set = uset_openEmpty(); |
+ UErrorCode ec = U_ZERO_ERROR; |
+ uset_applyIntPropertyValue(set, property, property_value, &ec); |
+ ZoneList<CharacterRange>* ranges = nullptr; |
+ if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
+ uset_removeAllStrings(set); |
+ int item_count = uset_getItemCount(set); |
+ ranges = new (zone) ZoneList<CharacterRange>(item_count, zone); |
+ int item_result = 0; |
+ for (int i = 0; i < item_count; i++) { |
+ uc32 start = 0; |
+ uc32 end = 0; |
+ item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
+ ranges->Add(CharacterRange::Range(start, end), zone); |
+ } |
+ DCHECK_EQ(U_ZERO_ERROR, ec); |
+ DCHECK_EQ(0, item_result); |
+ } |
+ uset_close(set); |
+ return ranges; |
+} |
+ |
ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { |
#ifdef V8_I18N_SUPPORT |
- ZoneList<char> property_name(0, zone()); |
+ List<char> property_name_list(0); |
if (current() == '{') { |
for (Advance(); current() != '}'; Advance()) { |
if (!has_next()) return nullptr; |
- property_name.Add(static_cast<char>(current()), zone()); |
+ property_name_list.Add(static_cast<char>(current())); |
} |
} else if (current() != kEndMarker) { |
- property_name.Add(static_cast<char>(current()), zone()); |
+ property_name_list.Add(static_cast<char>(current())); |
} else { |
return nullptr; |
} |
Advance(); |
- property_name.Add(0, zone()); // null-terminate string. |
+ property_name_list.Add(0); // null-terminate string. |
+ |
+ const char* property_name = property_name_list.ToConstVector().start(); |
+ |
+ ZoneList<CharacterRange>* ranges = nullptr; |
+ |
+#define PROPERTY_NAME_LOOKUP(PROPERTY) \ |
+ do { \ |
+ ranges = LookupPropertyClass(PROPERTY, property_name, zone()); \ |
+ if (ranges != nullptr) return ranges; \ |
+ } while (false) |
// Property names are defined in unicode database files. For aliases of |
// these property names, see PropertyValueAliases.txt. |
- UProperty kPropertyClasses[] = { |
- // General_Category (gc) found in PropertyValueAliases.txt |
- UCHAR_GENERAL_CATEGORY_MASK, |
- // Script (sc) found in Scripts.txt |
- UCHAR_SCRIPT, |
- }; |
- |
- for (int i = 0; i < arraysize(kPropertyClasses); i++) { |
- UProperty property_class = kPropertyClasses[i]; |
- int32_t category = u_getPropertyValueEnum( |
- property_class, property_name.ToConstVector().start()); |
- if (category == UCHAR_INVALID_CODE) continue; |
- |
- USet* set = uset_openEmpty(); |
- UErrorCode ec = U_ZERO_ERROR; |
- uset_applyIntPropertyValue(set, property_class, category, &ec); |
- ZoneList<CharacterRange>* ranges = nullptr; |
- if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
- uset_removeAllStrings(set); |
- int item_count = uset_getItemCount(set); |
- ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); |
- int item_result = 0; |
- for (int i = 0; i < item_count; i++) { |
- uc32 start = 0; |
- uc32 end = 0; |
- item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
- ranges->Add(CharacterRange::Range(start, end), zone()); |
- } |
- DCHECK_EQ(U_ZERO_ERROR, ec); |
- DCHECK_EQ(0, item_result); |
- } |
- uset_close(set); |
- return ranges; |
+ // General_Category (gc) found in PropertyValueAliases.txt |
+ PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); |
+ // Script (sc) found in Scripts.txt |
+ PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
+ // Block (blk) found in Blocks.txt |
+ PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
+ // For disambiguation, script names may have an "Is"-prefix and block names |
+ // may have an "In"-prefix. This convention is up for debate and for the spec |
+ // to settle. |
Dan Ehrenberg
2016/03/10 18:43:20
I don't like this mechanism, of letting them overl
|
+ if (property_name_list.length() > 3 && property_name[0] == 'I') { |
+ char second_char = property_name[1]; |
+ property_name += 2; |
+ if (second_char == 's') PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
+ if (second_char == 'n') PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
} |
+#undef PROPERTY_NAME_LOOKUP |
#endif // V8_I18N_SUPPORT |
- |
return nullptr; |
} |