Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(567)

Unified Diff: src/regexp/regexp-parser.cc

Issue 1780183002: [regexp] extend property classes by unicode blocks. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: . Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/regexp/regexp-parser.cc
diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc
index 21992e42a64ee337a5448444f30e6f9cb050bf44..cfbdb6b3e416f7f06ef01bb29bb2bec8768b96b8 100644
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@@ -836,60 +836,79 @@ bool RegExpParser::ParseUnicodeEscape(uc32* value) {
return result;
}
+ZoneList<CharacterRange>* LookupPropertyClass(UProperty property,
+ const char* property_name,
+ Zone* zone) {
+ int32_t property_value = u_getPropertyValueEnum(property, property_name);
+ if (property_value == UCHAR_INVALID_CODE) return nullptr;
+
+ USet* set = uset_openEmpty();
+ UErrorCode ec = U_ZERO_ERROR;
+ uset_applyIntPropertyValue(set, property, property_value, &ec);
+ ZoneList<CharacterRange>* ranges = nullptr;
+ if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
+ uset_removeAllStrings(set);
+ int item_count = uset_getItemCount(set);
+ ranges = new (zone) ZoneList<CharacterRange>(item_count, zone);
+ int item_result = 0;
+ for (int i = 0; i < item_count; i++) {
+ uc32 start = 0;
+ uc32 end = 0;
+ item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
+ ranges->Add(CharacterRange::Range(start, end), zone);
+ }
+ DCHECK_EQ(U_ZERO_ERROR, ec);
+ DCHECK_EQ(0, item_result);
+ }
+ uset_close(set);
+ return ranges;
+}
+
ZoneList<CharacterRange>* RegExpParser::ParsePropertyClass() {
#ifdef V8_I18N_SUPPORT
- ZoneList<char> property_name(0, zone());
+ List<char> property_name_list(0);
if (current() == '{') {
for (Advance(); current() != '}'; Advance()) {
if (!has_next()) return nullptr;
- property_name.Add(static_cast<char>(current()), zone());
+ property_name_list.Add(static_cast<char>(current()));
}
} else if (current() != kEndMarker) {
- property_name.Add(static_cast<char>(current()), zone());
+ property_name_list.Add(static_cast<char>(current()));
} else {
return nullptr;
}
Advance();
- property_name.Add(0, zone()); // null-terminate string.
+ property_name_list.Add(0); // null-terminate string.
+
+ const char* property_name = property_name_list.ToConstVector().start();
+
+ ZoneList<CharacterRange>* ranges = nullptr;
+
+#define PROPERTY_NAME_LOOKUP(PROPERTY) \
+ do { \
+ ranges = LookupPropertyClass(PROPERTY, property_name, zone()); \
+ if (ranges != nullptr) return ranges; \
+ } while (false)
// Property names are defined in unicode database files. For aliases of
// these property names, see PropertyValueAliases.txt.
- UProperty kPropertyClasses[] = {
- // General_Category (gc) found in PropertyValueAliases.txt
- UCHAR_GENERAL_CATEGORY_MASK,
- // Script (sc) found in Scripts.txt
- UCHAR_SCRIPT,
- };
-
- for (int i = 0; i < arraysize(kPropertyClasses); i++) {
- UProperty property_class = kPropertyClasses[i];
- int32_t category = u_getPropertyValueEnum(
- property_class, property_name.ToConstVector().start());
- if (category == UCHAR_INVALID_CODE) continue;
-
- USet* set = uset_openEmpty();
- UErrorCode ec = U_ZERO_ERROR;
- uset_applyIntPropertyValue(set, property_class, category, &ec);
- ZoneList<CharacterRange>* ranges = nullptr;
- if (ec == U_ZERO_ERROR && !uset_isEmpty(set)) {
- uset_removeAllStrings(set);
- int item_count = uset_getItemCount(set);
- ranges = new (zone()) ZoneList<CharacterRange>(item_count, zone());
- int item_result = 0;
- for (int i = 0; i < item_count; i++) {
- uc32 start = 0;
- uc32 end = 0;
- item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
- ranges->Add(CharacterRange::Range(start, end), zone());
- }
- DCHECK_EQ(U_ZERO_ERROR, ec);
- DCHECK_EQ(0, item_result);
- }
- uset_close(set);
- return ranges;
+ // General_Category (gc) found in PropertyValueAliases.txt
+ PROPERTY_NAME_LOOKUP(UCHAR_GENERAL_CATEGORY_MASK);
+ // Script (sc) found in Scripts.txt
+ PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
+ // Block (blk) found in Blocks.txt
+ PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
+ // For disambiguation, script names may have an "Is"-prefix and block names
+ // may have an "In"-prefix. This convention is up for debate and for the spec
+ // to settle.
Dan Ehrenberg 2016/03/10 18:43:20 I don't like this mechanism, of letting them overl
+ if (property_name_list.length() > 3 && property_name[0] == 'I') {
+ char second_char = property_name[1];
+ property_name += 2;
+ if (second_char == 's') PROPERTY_NAME_LOOKUP(UCHAR_SCRIPT);
+ if (second_char == 'n') PROPERTY_NAME_LOOKUP(UCHAR_BLOCK);
}
+#undef PROPERTY_NAME_LOOKUP
#endif // V8_I18N_SUPPORT
-
return nullptr;
}
« no previous file with comments | « no previous file | test/mjsunit/harmony/regexp-property-blocks.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698