Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(83)

Unified Diff: src/regexp/regexp-parser.cc

Issue 1780183002: [regexp] extend property classes by unicode blocks. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: rebase Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index cfcc1b43f9bcbb20c1dec7e73c48cdc59a83bcf0..7bf35c4a92b0e1970fab036dfec3e62190738584 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -843,58 +843,71 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
return result;
}
+bool LookupPropertyClass(UProperty property, const char* property_name,
+ ZoneList<CharacterRange>* result, Zone* zone) {
+ int32_t property_value = u_getPropertyValueEnum(property, property_name);
+ if (property_value == UCHAR_INVALID_CODE) return false;
+
+ USet* set = uset_openEmpty();
+ UErrorCode ec = U_ZERO_ERROR;
+ uset_applyIntPropertyValue(set, property, property_value, &ec);
+ bool success = ec == U_ZERO_ERROR && !uset_isEmpty(set);
+
+ if (success) {
+ uset_removeAllStrings(set);
+ int item_count = uset_getItemCount(set);
+ int item_result = 0;
+ for (int i = 0; i < item_count; i++) {
+ uc32 start = 0;
+ uc32 end = 0;
+ item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
+ result->Add(CharacterRange::Range(start, end), zone);
+ }
+ DCHECK_EQ(U_ZERO_ERROR, ec);
+ DCHECK_EQ(0, item_result);
+ }
+ uset_close(set);
+ return success;
+}
+
bool RegExpParser::ParsePropertyClass(ZoneList<CharacterRange>* result) {
#ifdef V8_I18N_SUPPORT
- ZoneList<char> property_name(0, zone());
+ List<char> property_name_list;
if (current() == '{') {
for (Advance(); current() != '}'; Advance()) {
if (!has_next()) return false;
- property_name.Add(static_cast<char>(current()), zone());
+ property_name_list.Add(static_cast<char>(current()));
}
} else if (current() != kEndMarker) {
- property_name.Add(static_cast<char>(current()), zone());
+ property_name_list.Add(static_cast<char>(current()));
} else {
return false;
}
Advance();
- property_name.Add(0, zone()); // null-terminate string.
-
- // Property names are defined in unicode database files. For aliases of
- // these property names, see PropertyValueAliases.txt.
- UProperty kPropertyClasses[] = {
- // General_Category (gc) found in PropertyValueAliases.txt
- UCHAR_GENERAL_CATEGORY_MASK,
- // Script (sc) found in Scripts.txt
- UCHAR_SCRIPT,
- };
-
- for (int i = 0; i < arraysize(kPropertyClasses); i++) {
- UProperty property_class = kPropertyClasses[i];
- int32_t category = u_getPropertyValueEnum(
- property_class, property_name.ToConstVector().start());
- if (category == UCHAR_INVALID_CODE) continue;
-
- USet* set = uset_openEmpty();
- UErrorCode ec = U_ZERO_ERROR;
- uset_applyIntPropertyValue(set, property_class, category, &ec);
- if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
- uset_removeAllStrings(set);
- int item_count = uset_getItemCount(set);
- int item_result = 0;
- for (int i = 0; i < item_count; i++) {
- uc32 start = 0;
- uc32 end = 0;
- item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
- result->Add(CharacterRange::Range(start, end), zone());
- }
- DCHECK_EQ(U_ZERO_ERROR, ec);
- DCHECK_EQ(0, item_result);
- }
- uset_close(set);
- return true;
+ property_name_list.Add(0); // null-terminate string.
+
+ const char* property_name = property_name_list.ToConstVector().start();
+
+#define PROPERTY_NAME_LOOKUP(PROPERTY) \
+ do { \
+ if (LookupPropertyClass(PROPERTY, property_name, result, zone())) { \
+ return true; \
+ } \
+ } while (false)
+
+ // General_Category (gc) found in PropertyValueAliases.txt
+ PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
+ // Script (sc) found in Scripts.txt
+ PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
+ // To disambiguate from script names, block names have an "In"-prefix.
+ if (property_name_list.length() > 3 && property_name[0] == 'I' &&
+ property_name[1] == 'n') {
+ // Block (blk) found in Blocks.txt
+ property_name += 2;
+ PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
}
+#undef PROPERTY_NAME_LOOKUP
#endif // V8_I18N_SUPPORT
-
return false;
}
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698