Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(199)

Unified Diff: src/regexp/interpreter-irregexp.cc

Issue 1599303002: [regexp] implement case-insensitive unicode regexps. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@unicodeclass
Patch Set: fixes Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/regexp/interpreter-irregexp.cc
diff --git a/src/regexp/interpreter-irregexp.cc b/src/regexp/interpreter-irregexp.cc
index 67cd025953f300166956b4b2476e6da8163b40fd..14834d512a5c9aba83d1740f6e5d645375035673 100644
--- a/src/regexp/interpreter-irregexp.cc
+++ b/src/regexp/interpreter-irregexp.cc
@@ -15,37 +15,32 @@
#include "src/unicode.h"
#include "src/utils.h"
+#ifdef V8_I18N_SUPPORT
+#include "unicode/uchar.h"
+#endif // V8_I18N_SUPPORT
+
namespace v8 {
namespace internal {
typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
- int from,
- int current,
- int len,
- Vector<const uc16> subject) {
- for (int i = 0; i < len; i++) {
- unibrow::uchar old_char = subject[from++];
- unibrow::uchar new_char = subject[current++];
- if (old_char == new_char) continue;
- unibrow::uchar old_string[1] = { old_char };
- unibrow::uchar new_string[1] = { new_char };
- interp_canonicalize->get(old_char, '\0', old_string);
- interp_canonicalize->get(new_char, '\0', new_string);
- if (old_string[0] != new_string[0]) {
- return false;
- }
- }
- return true;
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
+ int len, Vector<const uc16> subject,
+ bool unicode) {
+ Address offset_a =
+ reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
+ Address offset_b =
+ reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
+ size_t length = len * kUC16Size;
+ return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
+ offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
}
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
- int from,
- int current,
- int len,
- Vector<const uint8_t> subject) {
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
+ int len, Vector<const uint8_t> subject,
+ bool unicode) {
+ // For Latin1 characters the unicode flag makes no difference.
for (int i = 0; i < len; i++) {
unsigned int old_char = subject[from++];
unsigned int new_char = subject[current++];
@@ -523,13 +518,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
+ bool unicode =
+ (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
- !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
- from, current, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current, len, subject,
+ unicode)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
@@ -538,13 +536,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ bool unicode = (insn & BYTECODE_MASK) ==
+ BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
- !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
- from, current - len, len, subject)) {
+ !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
+ unicode)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}

Powered by Google App Engine
This is Rietveld 408576698