Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index 3d2261a9199f02638c710579b083369451ba581a..22e0eb12d3b9a055362dc02f3a2c485bc0bda1e6 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -1143,9 +1143,9 @@ bool IsExactPropertyValueAlias(const char* property_value_name, |
return false; |
} |
-bool LookupPropertyValueName(UProperty property, |
- const char* property_value_name, bool negate, |
- ZoneList<CharacterRange>* result, Zone* zone) { |
+bool LookupPropertyName(UProperty property, const char* property_value_name, |
+ bool negate, ZoneList<CharacterRange>* result, |
+ Zone* zone) { |
UProperty property_for_lookup = property; |
if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { |
// For the property Script_Extensions, we have to do the property value |
@@ -1185,9 +1185,9 @@ inline bool NameEquals(const char* name, const char (&literal)[N]) { |
return strncmp(name, literal, N + 1) == 0; |
} |
-bool LookupSpecialPropertyValueName(const char* name, |
- ZoneList<CharacterRange>* result, |
- bool negate, Zone* zone) { |
+bool LookupSpecialPropertyName(const char* name, |
+ ZoneList<CharacterRange>* result, bool negate, |
+ Zone* zone) { |
if (NameEquals(name, "Any")) { |
if (!negate) result->Add(CharacterRange::Everything(), zone); |
} else if (NameEquals(name, "ASCII")) { |
@@ -1195,8 +1195,45 @@ bool LookupSpecialPropertyValueName(const char* name, |
: CharacterRange::Range(0x0, 0x7f), |
zone); |
} else if (NameEquals(name, "Assigned")) { |
- return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned", |
- !negate, result, zone); |
+ return LookupPropertyName(UCHAR_GENERAL_CATEGORY, "Unassigned", !negate, |
+ result, zone); |
+ } else if (NameEquals(name, "Other_ID_Start") || NameEquals(name, "OIDS")) { |
+ // From Unicode 9.0.0 PropList.txt |
+ // 1885..1886 ; Other_ID_Start |
+ // 2118 ; Other_ID_Start |
+ // 212E ; Other_ID_Start |
+ // 309B..309C ; Other_ID_Start |
+ if (negate) { |
+ result->Add(CharacterRange::Range(0x0000, 0x1884), zone); |
+ result->Add(CharacterRange::Range(0x1887, 0x2117), zone); |
+ result->Add(CharacterRange::Range(0x2119, 0x212D), zone); |
+ result->Add(CharacterRange::Range(0x212F, 0x309A), zone); |
+ result->Add(CharacterRange::Range(0x309D, String::kMaxCodePoint), zone); |
+ } else { |
+ result->Add(CharacterRange::Range(0x1885, 0x1886), zone); |
+ result->Add(CharacterRange::Singleton(0x2118), zone); |
+ result->Add(CharacterRange::Singleton(0x212E), zone); |
+ result->Add(CharacterRange::Range(0x309B, 0x309C), zone); |
+ } |
+ } else if (NameEquals(name, "Other_ID_Continue") || |
+ NameEquals(name, "OIDC")) { |
+ // From Unicode 9.0.0 PropList.txt |
+ // 00B7 ; Other_ID_Continue |
+ // 0387 ; Other_ID_Continue |
+ // 1369..1371 ; Other_ID_Continue |
+ // 19DA ; Other_ID_Continue |
+ if (negate) { |
+ result->Add(CharacterRange::Range(0x0000, 0x00B6), zone); |
+ result->Add(CharacterRange::Range(0x00B8, 0x0386), zone); |
+ result->Add(CharacterRange::Range(0x0388, 0x1368), zone); |
+ result->Add(CharacterRange::Range(0x1372, 0x19D9), zone); |
+ result->Add(CharacterRange::Range(0x19DB, String::kMaxCodePoint), zone); |
+ } else { |
+ result->Add(CharacterRange::Singleton(0x00B7), zone); |
+ result->Add(CharacterRange::Singleton(0x0387), zone); |
+ result->Add(CharacterRange::Range(0x1369, 0x1371), zone); |
+ result->Add(CharacterRange::Singleton(0x19DA), zone); |
+ } |
} else { |
return false; |
} |
@@ -1239,21 +1276,23 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
if (second_part.is_empty()) { |
// First attempt to interpret as general category property value name. |
const char* name = first_part.ToConstVector().start(); |
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, |
- result, zone())) { |
- return true; |
- } |
- // Interpret "Any", "ASCII", and "Assigned". |
- if (LookupSpecialPropertyValueName(name, result, negate, zone())) { |
+ if (LookupPropertyName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, result, |
+ zone())) { |
return true; |
} |
+ |
// Then attempt to interpret as binary property name with value name 'Y'. |
UProperty property = u_getPropertyEnum(name); |
+ if (property == UCHAR_INVALID_CODE) { |
+ // Interpret "Any", "ASCII", "Assigned", "Other_ID_Start", and |
+ // "Other_ID_Continue". |
+ return LookupSpecialPropertyName(name, result, negate, zone()); |
+ } |
if (property < UCHAR_BINARY_START) return false; |
if (property >= UCHAR_BINARY_LIMIT) return false; |
if (!IsExactPropertyAlias(name, property)) return false; |
- return LookupPropertyValueName(property, negate ? "N" : "Y", false, result, |
- zone()); |
+ return LookupPropertyName(property, negate ? "N" : "Y", false, result, |
+ zone()); |
} else { |
// Both property name and value name are specified. Attempt to interpret |
// the property name as enumerated property. |
@@ -1268,8 +1307,7 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result, |
property != UCHAR_SCRIPT_EXTENSIONS) { |
return false; |
} |
- return LookupPropertyValueName(property, value_name, negate, result, |
- zone()); |
+ return LookupPropertyName(property, value_name, negate, result, zone()); |
} |
} |