Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(108)

Unified Diff: src/regexp/jsregexp.cc

Issue 1599303002: [regexp] implement case-insensitive unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fix mips Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/regexp/jsregexp.cc
diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..2229dd8e175cc9483741ad75bff773a0c0a7797f 100644
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@@ -25,6 +25,11 @@
#include "src/string-search.h"
#include "src/unicode-decoder.h"
+#ifdef V8_I18N_SUPPORT
+#include "unicode/uset.h"
+#include "unicode/utypes.h"
+#endif // V8_I18N_SUPPORT
+
#ifndef V8_INTERPRETED_REGEXP
#if V8_TARGET_ARCH_IA32
#include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
@@ -4390,8 +4395,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
DCHECK_EQ(start_reg_ + 1, end_reg_);
if (compiler->ignore_case()) {
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
- trace->backtrack());
+ assembler->CheckNotBackReferenceIgnoreCase(
+ start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, read_backward(),
trace->backtrack());
@@ -4861,21 +4866,6 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
}
-bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
- ZoneList<CharacterRange>* ranges = this->ranges(zone);
- CharacterRange::Canonicalize(ranges);
- for (int i = ranges->length() - 1; i >= 0; i--) {
- uc32 from = ranges->at(i).from();
- uc32 to = ranges->at(i).to();
- // Check for non-BMP characters.
- if (to >= kNonBmpStart) return true;
- // Check for lone surrogates.
- if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
- }
- return false;
-}
-
-
UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
ZoneList<CharacterRange>* base)
: zone_(zone),
@@ -5086,11 +5076,49 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
}
+void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
+ ZoneList<CharacterRange>* ranges) {
+#ifdef V8_I18N_SUPPORT
+ // Use ICU to compute the case fold closure over the ranges.
+ DCHECK(compiler->unicode());
+ DCHECK(compiler->ignore_case());
+ USet* set = uset_openEmpty();
+ for (int i = 0; i < ranges->length(); i++) {
+ uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
+ }
+ ranges->Clear();
+ uset_closeOver(set, USET_CASE_INSENSITIVE);
+ // Full case mapping map single characters to multiple characters.
+ // Those are represented as strings in the set. Remove them so that
+ // we end up with only simple and common case mappings.
+ uset_removeAllStrings(set);
+ int item_count = uset_getItemCount(set);
+ int item_result = 0;
+ UErrorCode ec = U_ZERO_ERROR;
+ Zone* zone = compiler->zone();
+ for (int i = 0; i < item_count; i++) {
+ uc32 start = 0;
+ uc32 end = 0;
+ item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
+ ranges->Add(CharacterRange::Range(start, end), zone);
+ }
+ // No errors and everything we collected have been ranges.
+ DCHECK_EQ(U_ZERO_ERROR, ec);
+ DCHECK_EQ(0, item_result);
+ uset_close(set);
+ CharacterRange::Canonicalize(ranges);
+#endif // V8_I18N_SUPPORT
+}
+
+
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
set_.Canonicalize();
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
+ if (compiler->unicode() && compiler->ignore_case()) {
+ AddUnicodeCaseEquivalents(compiler, ranges);
+ }
if (compiler->unicode() && !compiler->one_byte()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =

Powered by Google App Engine
This is Rietveld 408576698