Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(97)

Unified Diff: src/regexp/regexp-parser.cc

Issue 2808803002: [regexp] implement \p{Other_ID_Start} and \p{Other_ID_Continue}. (Closed)
Patch Set: address comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index 3d2261a9199f02638c710579b083369451ba581a..22e0eb12d3b9a055362dc02f3a2c485bc0bda1e6 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -1143,9 +1143,9 @@ bool IsExactPropertyValueAlias(const char* property_value_name,
return false;
}
-bool LookupPropertyValueName(UProperty property,
- const char* property_value_name, bool negate,
- ZoneList<CharacterRange>* result, Zone* zone) {
+bool LookupPropertyName(UProperty property, const char* property_value_name,
+ bool negate, ZoneList<CharacterRange>* result,
+ Zone* zone) {
UProperty property_for_lookup = property;
if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
// For the property Script_Extensions, we have to do the property value
@@ -1185,9 +1185,9 @@ inline bool NameEquals(const char* name, const char (&literal)[N]) {
return strncmp(name, literal, N + 1) == 0;
}
-bool LookupSpecialPropertyValueName(const char* name,
- ZoneList<CharacterRange>* result,
- bool negate, Zone* zone) {
+bool LookupSpecialPropertyName(const char* name,
+ ZoneList<CharacterRange>* result, bool negate,
+ Zone* zone) {
if (NameEquals(name, "Any")) {
if (!negate) result->Add(CharacterRange::Everything(), zone);
} else if (NameEquals(name, "ASCII")) {
@@ -1195,8 +1195,45 @@ bool LookupSpecialPropertyValueName(const char* name,
: CharacterRange::Range(0x0, 0x7f),
zone);
} else if (NameEquals(name, "Assigned")) {
- return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
- !negate, result, zone);
+ return LookupPropertyName(UCHAR_GENERAL_CATEGORY, "Unassigned", !negate,
+ result, zone);
+ } else if (NameEquals(name, "Other_ID_Start") || NameEquals(name, "OIDS")) {
+ // From Unicode 9.0.0 PropList.txt
+ // 1885..1886 ; Other_ID_Start
+ // 2118 ; Other_ID_Start
+ // 212E ; Other_ID_Start
+ // 309B..309C ; Other_ID_Start
+ if (negate) {
+ result->Add(CharacterRange::Range(0x0000, 0x1884), zone);
+ result->Add(CharacterRange::Range(0x1887, 0x2117), zone);
+ result->Add(CharacterRange::Range(0x2119, 0x212D), zone);
+ result->Add(CharacterRange::Range(0x212F, 0x309A), zone);
+ result->Add(CharacterRange::Range(0x309D, String::kMaxCodePoint), zone);
+ } else {
+ result->Add(CharacterRange::Range(0x1885, 0x1886), zone);
+ result->Add(CharacterRange::Singleton(0x2118), zone);
+ result->Add(CharacterRange::Singleton(0x212E), zone);
+ result->Add(CharacterRange::Range(0x309B, 0x309C), zone);
+ }
+ } else if (NameEquals(name, "Other_ID_Continue") ||
+ NameEquals(name, "OIDC")) {
+ // From Unicode 9.0.0 PropList.txt
+ // 00B7 ; Other_ID_Continue
+ // 0387 ; Other_ID_Continue
+ // 1369..1371 ; Other_ID_Continue
+ // 19DA ; Other_ID_Continue
+ if (negate) {
+ result->Add(CharacterRange::Range(0x0000, 0x00B6), zone);
+ result->Add(CharacterRange::Range(0x00B8, 0x0386), zone);
+ result->Add(CharacterRange::Range(0x0388, 0x1368), zone);
+ result->Add(CharacterRange::Range(0x1372, 0x19D9), zone);
+ result->Add(CharacterRange::Range(0x19DB, String::kMaxCodePoint), zone);
+ } else {
+ result->Add(CharacterRange::Singleton(0x00B7), zone);
+ result->Add(CharacterRange::Singleton(0x0387), zone);
+ result->Add(CharacterRange::Range(0x1369, 0x1371), zone);
+ result->Add(CharacterRange::Singleton(0x19DA), zone);
+ }
} else {
return false;
}
@@ -1239,21 +1276,23 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
if (second_part.is_empty()) {
// First attempt to interpret as general category property value name.
const char* name = first_part.ToConstVector().start();
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
- result, zone())) {
- return true;
- }
- // Interpret "Any", "ASCII", and "Assigned".
- if (LookupSpecialPropertyValueName(name, result, negate, zone())) {
+ if (LookupPropertyName(UCHAR_GENERAL_CATEGORY_MASK, name, negate, result,
+ zone())) {
return true;
}
+
// Then attempt to interpret as binary property name with value name 'Y'.
UProperty property = u_getPropertyEnum(name);
+ if (property == UCHAR_INVALID_CODE) {
+ // Interpret "Any", "ASCII", "Assigned", "Other_ID_Start", and
+ // "Other_ID_Continue".
+ return LookupSpecialPropertyName(name, result, negate, zone());
+ }
if (property < UCHAR_BINARY_START) return false;
if (property >= UCHAR_BINARY_LIMIT) return false;
if (!IsExactPropertyAlias(name, property)) return false;
- return LookupPropertyValueName(property, negate ? "N" : "Y", false, result,
- zone());
+ return LookupPropertyName(property, negate ? "N" : "Y", false, result,
+ zone());
} else {
// Both property name and value name are specified. Attempt to interpret
// the property name as enumerated property.
@@ -1268,8 +1307,7 @@ bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result,
property != UCHAR_SCRIPT_EXTENSIONS) {
return false;
}
- return LookupPropertyValueName(property, value_name, negate, result,
- zone());
+ return LookupPropertyName(property, value_name, negate, result, zone());
}
}
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-special.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698