Index: src/regexp/regexp-parser.cc |
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc |
index b0dcaa8ee95b27cc29959202cd52dc73a4dc8adf..8a9e2a81c114dcc49fb1cae46e0e2ac07afa4567 100644 |
--- a/src/regexp/regexp-parser.cc |
+++ b/src/regexp/regexp-parser.cc |
@@ -845,29 +845,46 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) { |
} |
#ifdef V8_I18N_SUPPORT |
-bool IsExactPropertyValueAlias(const char* property_name, UProperty property, |
- int32_t property_value) { |
+bool IsExactPropertyAlias(const char* property_name, UProperty property) { |
+ const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); |
+ if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
+ for (int i = 0;; i++) { |
+ const char* long_name = u_getPropertyName( |
+ property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
+ if (long_name == NULL) break; |
+ if (strcmp(property_name, long_name) == 0) return true; |
+ } |
+ return false; |
+} |
+ |
+bool IsExactPropertyValueAlias(const char* property_value_name, |
+ UProperty property, int32_t property_value) { |
const char* short_name = |
u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); |
- if (short_name != NULL && strcmp(property_name, short_name) == 0) return true; |
+ if (short_name != NULL && strcmp(property_value_name, short_name) == 0) { |
+ return true; |
+ } |
for (int i = 0;; i++) { |
const char* long_name = u_getPropertyValueName( |
property, property_value, |
static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); |
if (long_name == NULL) break; |
- if (strcmp(property_name, long_name) == 0) return true; |
+ if (strcmp(property_value_name, long_name) == 0) return true; |
} |
return false; |
} |
-bool LookupPropertyClass(UProperty property, const char* property_name, |
- ZoneList<CharacterRange>* result, Zone* zone) { |
- int32_t property_value = u_getPropertyValueEnum(property, property_name); |
+bool LookupPropertyValueName(UProperty property, |
+ const char* property_value_name, |
+ ZoneList<CharacterRange>* result, Zone* zone) { |
+ int32_t property_value = |
+ u_getPropertyValueEnum(property, property_value_name); |
if (property_value == UCHAR_INVALID_CODE) return false; |
// We require the property name to match exactly to one of the property value |
// aliases. However, u_getPropertyValueEnum uses loose matching. |
- if (!IsExactPropertyValueAlias(property_name, property, property_value)) { |
+ if (!IsExactPropertyValueAlias(property_value_name, property, |
+ property_value)) { |
return false; |
} |
@@ -892,49 +909,75 @@ bool LookupPropertyClass(UProperty property, const char* property_name, |
uset_close(set); |
return success; |
} |
-#endif // V8_I18N_SUPPORT |
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
-#ifdef V8_I18N_SUPPORT |
- List<char> property_name_list; |
+ // Parse the property class as follows: |
+ // - \pN with a single-character N is equivalent to \p{N} |
+ // - In \p{name}, 'name' is interpreted |
+ // - either as a general category property value name. |
+ // - or as a binary property name. |
+ // - In \p{name=value}, 'name' is interpreted as an enumerated property name, |
+ // and 'value' is interpreted as one of the available property value names. |
+ // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. |
+ // - Loose matching is not applied. |
+ List<char> first_part; |
+ List<char> second_part; |
if (current() == '{') { |
- for (Advance(); current() != '}'; Advance()) { |
+ // Parse \p{[PropertyName=]PropertyNameValue} |
+ for (Advance(); current() != '}' && current() != '='; Advance()) { |
if (!has_next()) return false; |
- property_name_list.Add(static_cast<char>(current())); |
+ first_part.Add(static_cast<char>(current())); |
+ } |
+ if (current() == '=') { |
+ for (Advance(); current() != '}'; Advance()) { |
+ if (!has_next()) return false; |
+ second_part.Add(static_cast<char>(current())); |
+ } |
+ second_part.Add(0); // null-terminate string. |
} |
} else if (current() != kEndMarker) { |
- property_name_list.Add(static_cast<char>(current())); |
+ // Parse \pN, where N is a single-character property name value. |
+ first_part.Add(static_cast<char>(current())); |
} else { |
return false; |
} |
Advance(); |
- property_name_list.Add(0); // null-terminate string. |
- |
- const char* property_name = property_name_list.ToConstVector().start(); |
- |
-#define PROPERTY_NAME_LOOKUP(PROPERTY) \ |
- do { \ |
- if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \ |
- return true; \ |
- } \ |
- } while (false) |
- |
- // General_Category (gc) found in PropertyValueAliases.txt |
- PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK); |
- // Script (sc) found in Scripts.txt |
- PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT); |
- // To disambiguate from script names, block names have an "In"-prefix. |
- if (property_name_list.length() > 3 && property_name[0] == 'I' && |
- property_name[1] == 'n') { |
- // Block (blk) found in Blocks.txt |
- property_name += 2; |
- PROPERTY_NAME_LOOKUP(UCHAR_BLOCK); |
+ first_part.Add(0); // null-terminate string. |
+ |
+ if (second_part.is_empty()) { |
+ // First attempt to interpret as general category property value name. |
+ const char* name = first_part.ToConstVector().start(); |
+ if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, result, |
+ zone())) { |
+ return true; |
+ } |
+ // Then attempt to interpret as binary property name with value name 'Y'. |
+ UProperty property = u_getPropertyEnum(name); |
+ if (property < UCHAR_BINARY_START) return false; |
+ if (property >= UCHAR_BINARY_LIMIT) return false; |
+ if (!IsExactPropertyAlias(name, property)) return false; |
+ return LookupPropertyValueName(property, "Y", result, zone()); |
+ } else { |
+ // Both property name and value name are specified. Attempt to interpret |
+ // the property name as enumerated property. |
+ const char* property_name = first_part.ToConstVector().start(); |
+ const char* value_name = second_part.ToConstVector().start(); |
+ UProperty property = u_getPropertyEnum(property_name); |
+ if (property < UCHAR_INT_START) return false; |
+ if (property >= UCHAR_INT_LIMIT) return false; |
+ if (!IsExactPropertyAlias(property_name, property)) return false; |
+ return LookupPropertyValueName(property, value_name, result, zone()); |
} |
-#undef PROPERTY_NAME_LOOKUP |
-#endif // V8_I18N_SUPPORT |
+} |
+ |
+#else // V8_I18N_SUPPORT |
+ |
+bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) { |
return false; |
} |
+#endif // V8_I18N_SUPPORT |
+ |
bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) { |
uc32 x = 0; |
int d = HexValue(current()); |