Chromium Code Reviews| Index: src/regexp/regexp-parser.cc |
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
| index 21992e42a64ee337a5448444f30e6f9cb050bf44..cfbdb6b3e416f7f06ef01bb29bb2bec8768b96b8 100644 |
| --- a/src/regexp/regexp-parser.cc |
| +++ b/src/regexp/regexp-parser.cc |
| @@ -836,60 +836,79 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
| return result; |
| } |
| +ZoneList<CharacterRange>* LookupPropertyClass(UProperty property, |
| + const char* property_name, |
| + Zone* zone) { |
| + int32_t property_value = u_getPropertyValueEnum(property, property_name); |
| + if (property_value == UCHAR_INVALID_CODE) return nullptr; |
| + |
| + USet* set = uset_openEmpty(); |
| + UErrorCode ec = U_ZERO_ERROR; |
| + uset_applyIntPropertyValue(set, property, property_value, &ec); |
| + ZoneList<CharacterRange>* ranges = nullptr; |
| + if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
| + uset_removeAllStrings(set); |
| + int item_count = uset_getItemCount(set); |
| + ranges = new (zone) ZoneList<CharacterRange>(item_count, zone); |
| + int item_result = 0; |
| + for (int i = 0; i < item_count; i++) { |
| + uc32 start = 0; |
| + uc32 end = 0; |
| + item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
| + ranges->Add(CharacterRange::Range(start, end), zone); |
| + } |
| + DCHECK_EQ(U_ZERO_ERROR, ec); |
| + DCHECK_EQ(0, item_result); |
| + } |
| + uset_close(set); |
| + return ranges; |
| +} |
| + |
| ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() { |
| #ifdef V8_I18N_SUPPORT |
| - ZoneList<char> property_name(0, zone()); |
| + List<char> property_name_list(0); |
| if (current() == '{') { |
| for (Advance(); current() != '}'; Advance()) { |
| if (!has_next()) return nullptr; |
| - property_name.Add(static_cast<char>(current()), zone()); |
| + property_name_list.Add(static_cast<char>(current())); |
| } |
| } else if (current() != kEndMarker) { |
| - property_name.Add(static_cast<char>(current()), zone()); |
| + property_name_list.Add(static_cast<char>(current())); |
| } else { |
| return nullptr; |
| } |
| Advance(); |
| - property_name.Add(0, zone()); // null-terminate string. |
| + property_name_list.Add(0); // null-terminate string. |
| + |
| + const char* property_name = property_name_list.ToConstVector().start(); |
| + |
| + ZoneList<CharacterRange>* ranges = nullptr; |
| + |
| +#define PROPERTY_NAME_LOOKUP(PROPERTY) \ |
| + do { \ |
| + ranges = LookupPropertyClass(PROPERTY, property_name, zone()); \ |
| + if (ranges != nullptr) return ranges; \ |
| + } while (false) |
| // Property names are defined in unicode database files. For aliases of |
| // these property names, see PropertyValueAliases.txt. |
| - UProperty kPropertyClasses[] = { |
| - // General_Category (gc) found in PropertyValueAliases.txt |
| - UCHAR_GENERAL_CATEGORY_MASK, |
| - // Script (sc) found in Scripts.txt |
| - UCHAR_SCRIPT, |
| - }; |
| - |
| - for (int i = 0; i < arraysize(kPropertyClasses); i++) { |
| - UProperty property_class = kPropertyClasses[i]; |
| - int32_t category = u_getPropertyValueEnum( |
| - property_class, property_name.ToConstVector().start()); |
| - if (category == UCHAR_INVALID_CODE) continue; |
| - |
| - USet* set = uset_openEmpty(); |
| - UErrorCode ec = U_ZERO_ERROR; |
| - uset_applyIntPropertyValue(set, property_class, category, &ec); |
| - ZoneList<CharacterRange>* ranges = nullptr; |
| - if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) { |
| - uset_removeAllStrings(set); |
| - int item_count = uset_getItemCount(set); |
| - ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone()); |
| - int item_result = 0; |
| - for (int i = 0; i < item_count; i++) { |
| - uc32 start = 0; |
| - uc32 end = 0; |
| - item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec); |
| - ranges->Add(CharacterRange::Range(start, end), zone()); |
| - } |
| - DCHECK_EQ(U_ZERO_ERROR, ec); |
| - DCHECK_EQ(0, item_result); |
| - } |
| - uset_close(set); |
| - return ranges; |
| + // General_Category (gc) found in PropertyValueAliases.txt |
| + PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); |
| + // Script (sc) found in Scripts.txt |
| + PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
| + // Block (blk) found in Blocks.txt |
| + PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
| + // For disambiguation, script names may have an "Is"-prefix and block names |
| + // may have an "In"-prefix. This convention is up for debate and for the spec |
| + // to settle. |
|
Dan Ehrenberg
2016/03/10 18:43:20
I don't like this mechanism, of letting them overl
|
| + if (property_name_list.length() > 3 && property_name[0] == 'I') { |
| + char second_char = property_name[1]; |
| + property_name += 2; |
| + if (second_char == 's') PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
| + if (second_char == 'n') PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
| } |
| +#undef PROPERTY_NAME_LOOKUP |
| #endif // V8_I18N_SUPPORT |
| - |
| return nullptr; |
| } |