Chromium Code Reviews| Index: src/regexp/regexp-parser.cc |
| diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
| index bdfa13f719e09416f4af414d696100d2e6101690..323c378cb6c13e4b0548595de17ecc6a72679da0 100644 |
| --- a/src/regexp/regexp-parser.cc |
| +++ b/src/regexp/regexp-parser.cc |
| @@ -362,11 +362,11 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
| if (FLAG_harmony_regexp_property) { |
| ZoneList<CharacterRange>* ranges = |
| new (zone()) ZoneList<CharacterRange>(2, zone()); |
| - if (!ParsePropertyClass(ranges)) { |
| + if (!ParsePropertyClass(ranges, p == 'P')) { |
|
Dan Ehrenberg
2016/06/13 20:23:15
I'm a fan of this refactoring to match the spec be
mathias
2016/06/13 20:43:57
It wouldn’t match all code points, since non-Lette
|
| return ReportError(CStrVector("Invalid property name")); |
| } |
| RegExpCharacterClass* cc = |
| - new (zone()) RegExpCharacterClass(ranges, p == 'P'); |
| + new (zone()) RegExpCharacterClass(ranges, false); |
| builder->AddCharacterClass(cc); |
| } else { |
| // With /u, no identity escapes except for syntax characters |
| @@ -845,6 +845,9 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
| } |
| #ifdef V8_I18N_SUPPORT |
| + |
| +namespace { |
| + |
| bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
| const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
| if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
| @@ -875,7 +878,7 @@ bool IsExactPropertyValueAlias(const char* property_value_name, |
| } |
| bool LookupPropertyValueName(UProperty property, |
| - const char* property_value_name, |
| + const char* property_value_name, bool negate, |
| ZoneList<CharacterRange>* result, Zone* zone) { |
| int32_t property_value = |
| u_getPropertyValueEnum(property, property_value_name); |
| @@ -895,6 +898,7 @@ bool LookupPropertyValueName(UProperty property, |
| if (success) { |
| uset_removeAllStrings(set); |
| + if (negate) uset_complement(set); |
| int item_count = uset_getItemCount(set); |
| int item_result = 0; |
| for (int i = 0; i < item_count; i++) { |
| @@ -910,7 +914,33 @@ bool LookupPropertyValueName(UProperty property, |
| return success; |
| } |
| -bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
| +template <size_t N> |
| +inline bool NameEquals(const char* name, const char (&literal)[N]) { |
| + return strncmp(name, literal, N + 1) == 0; |
| +} |
| + |
| +bool LookupSpecialPropertyValueName(const char* name, |
| + ZoneList<CharacterRange>* result, |
| + bool negate, Zone* zone) { |
| + if (NameEquals(name, "Any")) { |
| + if (!negate) result->Add(CharacterRange::Everything(), zone); |
| + } else if (NameEquals(name, "ASCII")) { |
| + result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) |
| + : CharacterRange::Range(0x0, 0x7f), |
| + zone); |
| + } else if (NameEquals(name, "Assigned")) { |
| + return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
| + !negate, result, zone); |
| + } else { |
| + return false; |
| + } |
| + return true; |
| +} |
| + |
| +} // anonymous namespace |
| + |
| +bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
| + bool negate) { |
| // Parse the property class as follows: |
| // - In \p{name}, 'name' is interpreted |
| // - either as a general category property value name. |
| @@ -943,8 +973,12 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
| if (second_part.is_empty()) { |
| // First attempt to interpret as general category property value name. |
| const char* name = first_part.ToConstVector().start(); |
| - if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, |
| - zone())) { |
| + if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
| + result, zone())) { |
| + return true; |
| + } |
| + // Interpret "Any", "ASCII", and "Assigned". |
| + if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
| return true; |
| } |
| // Then attempt to interpret as binary property name with value name 'Y'. |
| @@ -952,7 +986,8 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
| if (property < UCHAR_BINARY_START) return false; |
| if (property >= UCHAR_BINARY_LIMIT) return false; |
| if (!IsExactPropertyAlias(name, property)) return false; |
| - return LookupPropertyValueName(property, "Y", result, zone()); |
| + return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
| + zone()); |
| } else { |
| // Both property name and value name are specified. Attempt to interpret |
| // the property name as enumerated property. |
| @@ -962,7 +997,8 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
| if (property < UCHAR_INT_START) return false; |
| if (property >= UCHAR_INT_LIMIT) return false; |
| if (!IsExactPropertyAlias(property_name, property)) return false; |
| - return LookupPropertyValueName(property, value_name, result, zone()); |
| + return LookupPropertyValueName(property, value_name, negate, result, |
| + zone()); |
| } |
| } |
| @@ -1159,19 +1195,10 @@ bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
| bool parse_success = false; |
| if (next == 'p') { |
| Advance(2); |
| - parse_success = ParsePropertyClass(ranges); |
| + parse_success = ParsePropertyClass(ranges, false); |
| } else if (next == 'P') { |
| Advance(2); |
| - ZoneList<CharacterRange>* property_class = |
| - new (zone()) ZoneList<CharacterRange>(2, zone()); |
| - parse_success = ParsePropertyClass(property_class); |
| - if (parse_success) { |
| - ZoneList<CharacterRange>* negated = |
| - new (zone()) ZoneList<CharacterRange>(2, zone()); |
| - CharacterRange::Negate(property_class, negated, zone()); |
| - const Vector<CharacterRange> negated_vector = negated->ToVector(); |
| - ranges->AddAll(negated_vector, zone()); |
| - } |
| + parse_success = ParsePropertyClass(ranges, true); |
| } else { |
| return false; |
| } |