Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index bdfa13f719e09416f4af414d696100d2e6101690..5f4a7cb1b22215fc31e275fc32b578586f9dd3b2 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -362,11 +362,11 @@ RegExpTree* RegExpParser::ParseDisjunction() { |
if (FLAG_harmony_regexp_property) { |
ZoneList<CharacterRange>* ranges = |
new (zone()) ZoneList<CharacterRange>(2, zone()); |
- if (!ParsePropertyClass(ranges)) { |
+ if (!ParsePropertyClass(ranges, p == 'P')) { |
return ReportError(CStrVector("Invalid property name")); |
} |
RegExpCharacterClass* cc = |
- new (zone()) RegExpCharacterClass(ranges, p == 'P'); |
+ new (zone()) RegExpCharacterClass(ranges, false); |
builder->AddCharacterClass(cc); |
} else { |
// With /u, no identity escapes except for syntax characters |
@@ -845,6 +845,9 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
} |
#ifdef V8_I18N_SUPPORT |
+ |
+namespace { |
+ |
bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
@@ -875,7 +878,7 @@ bool IsExactPropertyValueAlias(const char* property_value_name, |
} |
bool LookupPropertyValueName(UProperty property, |
- const char* property_value_name, |
+ const char* property_value_name, bool negate, |
ZoneList<CharacterRange>* result, Zone* zone) { |
int32_t property_value = |
u_getPropertyValueEnum(property, property_value_name); |
@@ -895,6 +898,7 @@ bool LookupPropertyValueName(UProperty property, |
if (success) { |
uset_removeAllStrings(set); |
+ if (negate) uset_complement(set); |
int item_count = uset_getItemCount(set); |
int item_result = 0; |
for (int i = 0; i < item_count; i++) { |
@@ -910,7 +914,33 @@ bool LookupPropertyValueName(UProperty property, |
return success; |
} |
-bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
+template <size_t N> |
+inline bool NameEquals(const char* name, const char (&literal)[N]) { |
+ return strncmp(name, literal, N + 1) == 0; |
+} |
+ |
+bool LookupSpecialPropertyValueName(const char* name, |
+ ZoneList<CharacterRange>* result, |
+ bool negate, Zone* zone) { |
+ if (NameEquals(name, "Any")) { |
+ if (!negate) result->Add(CharacterRange::Everything(), zone); |
+ } else if (NameEquals(name, "ASCII")) { |
+ result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint) |
+ : CharacterRange::Range(0x0, 0x7f), |
+ zone); |
+ } else if (NameEquals(name, "Assigned")) { |
+ return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
+ !negate, result, zone); |
+ } else { |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+} // anonymous namespace |
+ |
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
+ bool negate) { |
// Parse the property class as follows: |
// - In \p{name}, 'name' is interpreted |
// - either as a general category property value name. |
@@ -943,8 +973,12 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
if (second_part.is_empty()) { |
// First attempt to interpret as general category property value name. |
const char* name = first_part.ToConstVector().start(); |
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, |
- zone())) { |
+ if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
+ result, zone())) { |
+ return true; |
+ } |
+ // Interpret "Any", "ASCII", and "Assigned". |
+ if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
return true; |
} |
// Then attempt to interpret as binary property name with value name 'Y'. |
@@ -952,7 +986,8 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
if (property < UCHAR_BINARY_START) return false; |
if (property >= UCHAR_BINARY_LIMIT) return false; |
if (!IsExactPropertyAlias(name, property)) return false; |
- return LookupPropertyValueName(property, "Y", result, zone()); |
+ return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
+ zone()); |
} else { |
// Both property name and value name are specified. Attempt to interpret |
// the property name as enumerated property. |
@@ -962,13 +997,15 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
if (property < UCHAR_INT_START) return false; |
if (property >= UCHAR_INT_LIMIT) return false; |
if (!IsExactPropertyAlias(property_name, property)) return false; |
- return LookupPropertyValueName(property, value_name, result, zone()); |
+ return LookupPropertyValueName(property, value_name, negate, result, |
+ zone()); |
} |
} |
#else // V8_I18N_SUPPORT |
-bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
+ bool negate) { |
return false; |
} |
@@ -1159,19 +1196,10 @@ bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) { |
bool parse_success = false; |
if (next == 'p') { |
Advance(2); |
- parse_success = ParsePropertyClass(ranges); |
+ parse_success = ParsePropertyClass(ranges, false); |
} else if (next == 'P') { |
Advance(2); |
- ZoneList<CharacterRange>* property_class = |
- new (zone()) ZoneList<CharacterRange>(2, zone()); |
- parse_success = ParsePropertyClass(property_class); |
- if (parse_success) { |
- ZoneList<CharacterRange>* negated = |
- new (zone()) ZoneList<CharacterRange>(2, zone()); |
- CharacterRange::Negate(property_class, negated, zone()); |
- const Vector<CharacterRange> negated_vector = negated->ToVector(); |
- ranges->AddAll(negated_vector, zone()); |
- } |
+ parse_success = ParsePropertyClass(ranges, true); |
} else { |
return false; |
} |