Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Unified Diff: src/regexp/regexp-parser.cc

Issue 1845243002: [regexp] extend \p syntax to binary and enumerated properties. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index b0dcaa8ee95b27cc29959202cd52dc73a4dc8adf..8a9e2a81c114dcc49fb1cae46e0e2ac07afa4567 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -845,29 +845,46 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
}
#ifdef V8_I18N_SUPPORT
-bool IsExactPropertyValueAlias(const char* property_name, UProperty property,
- int32_t property_value) {
+bool IsExactPropertyAlias(const char* property_name, UProperty property) {
+ const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
+ if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
+ for (int i = 0;; i++) {
+ const char* long_name = u_getPropertyName(
+ property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
+ if (long_name == NULL) break;
+ if (strcmp(property_name, long_name) == 0) return true;
+ }
+ return false;
+}
+
+bool IsExactPropertyValueAlias(const char* property_value_name,
+ UProperty property, int32_t property_value) {
const char* short_name =
u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
- if (short_name != NULL && strcmp(property_name, short_name) == 0) return true;
+ if (short_name != NULL && strcmp(property_value_name, short_name) == 0) {
+ return true;
+ }
for (int i = 0;; i++) {
const char* long_name = u_getPropertyValueName(
property, property_value,
static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
if (long_name == NULL) break;
- if (strcmp(property_name, long_name) == 0) return true;
+ if (strcmp(property_value_name, long_name) == 0) return true;
}
return false;
}
-bool LookupPropertyClass(UProperty property, const char* property_name,
- ZoneList<CharacterRange>* result, Zone* zone) {
- int32_t property_value = u_getPropertyValueEnum(property, property_name);
+bool LookupPropertyValueName(UProperty property,
+ const char* property_value_name,
+ ZoneList<CharacterRange>* result, Zone* zone) {
+ int32_t property_value =
+ u_getPropertyValueEnum(property, property_value_name);
if (property_value == UCHAR_INVALID_CODE) return false;
// We require the property name to match exactly to one of the property value
// aliases. However, u_getPropertyValueEnum uses loose matching.
- if (!IsExactPropertyValueAlias(property_name, property, property_value)) {
+ if (!IsExactPropertyValueAlias(property_value_name, property,
+ property_value)) {
return false;
}
@@ -892,49 +909,75 @@ bool LookupPropertyClass(UProperty property, const char* property_name,
uset_close(set);
return success;
}
-#endif // V8_I18N_SUPPORT
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
-#ifdef V8_I18N_SUPPORT
- List<char> property_name_list;
+ // Parse the property class as follows:
+ // - \pN with a single-character N is equivalent to \p{N}
+ // - In \p{name}, 'name' is interpreted
+ // - either as a general category property value name.
+ // - or as a binary property name.
+ // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
+ // and 'value' is interpreted as one of the available property value names.
+ // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
+ // - Loose matching is not applied.
+ List<char> first_part;
+ List<char> second_part;
if (current() == '{') {
- for (Advance(); current() != '}'; Advance()) {
+ // Parse \p{[PropertyName=]PropertyNameValue}
+ for (Advance(); current() != '}' && current() != '='; Advance()) {
if (!has_next()) return false;
- property_name_list.Add(static_cast<char>(current()));
+ first_part.Add(static_cast<char>(current()));
+ }
+ if (current() == '=') {
+ for (Advance(); current() != '}'; Advance()) {
+ if (!has_next()) return false;
+ second_part.Add(static_cast<char>(current()));
+ }
+ second_part.Add(0); // null-terminate string.
}
} else if (current() != kEndMarker) {
- property_name_list.Add(static_cast<char>(current()));
+ // Parse \pN, where N is a single-character property name value.
+ first_part.Add(static_cast<char>(current()));
} else {
return false;
}
Advance();
- property_name_list.Add(0); // null-terminate string.
-
- const char* property_name = property_name_list.ToConstVector().start();
-
-#define PROPERTY_NAME_LOOKUP(PROPERTY) \
- do { \
- if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \
- return true; \
- } \
- } while (false)
-
- // General_Category (gc) found in PropertyValueAliases.txt
- PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
- // Script (sc) found in Scripts.txt
- PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
- // To disambiguate from script names, block names have an "In"-prefix.
- if (property_name_list.length() > 3 && property_name[0] == 'I' &&
- property_name[1] == 'n') {
- // Block (blk) found in Blocks.txt
- property_name += 2;
- PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
+ first_part.Add(0); // null-terminate string.
+
+ if (second_part.is_empty()) {
+ // First attempt to interpret as general category property value name.
+ const char* name = first_part.ToConstVector().start();
+ if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result,
+ zone())) {
+ return true;
+ }
+ // Then attempt to interpret as binary property name with value name 'Y'.
+ UProperty property = u_getPropertyEnum(name);
+ if (property < UCHAR_BINARY_START) return false;
+ if (property >= UCHAR_BINARY_LIMIT) return false;
+ if (!IsExactPropertyAlias(name, property)) return false;
+ return LookupPropertyValueName(property, "Y", result, zone());
+ } else {
+ // Both property name and value name are specified. Attempt to interpret
+ // the property name as enumerated property.
+ const char* property_name = first_part.ToConstVector().start();
+ const char* value_name = second_part.ToConstVector().start();
+ UProperty property = u_getPropertyEnum(property_name);
+ if (property < UCHAR_INT_START) return false;
+ if (property >= UCHAR_INT_LIMIT) return false;
+ if (!IsExactPropertyAlias(property_name, property)) return false;
+ return LookupPropertyValueName(property, value_name, result, zone());
}
-#undef PROPERTY_NAME_LOOKUP
-#endif // V8_I18N_SUPPORT
+}
+
+#else // V8_I18N_SUPPORT
+
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
return false;
}
+#endif // V8_I18N_SUPPORT
+
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
uc32 x = 0;
int d = HexValue(current());
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-binary.js » ('j') | test/mjsunit/harmony/regexp-property-exact-match.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698