Index: src/regexp/regexp-macro-assembler.cc |
diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc |
index caf8b51fe548d6a9e68365654110473ddbdea9b5..21f4f8a1b011b069857a09af97966ec2bdb18d72 100644 |
--- a/src/regexp/regexp-macro-assembler.cc |
+++ b/src/regexp/regexp-macro-assembler.cc |
@@ -9,6 +9,10 @@ |
#include "src/regexp/regexp-stack.h" |
#include "src/simulator.h" |
+#ifdef V8_I18N_SUPPORT |
+#include "unicode/uchar.h" |
+#endif // V8_I18N_SUPPORT |
+ |
namespace v8 { |
namespace internal { |
@@ -23,6 +27,67 @@ RegExpMacroAssembler::~RegExpMacroAssembler() { |
} |
+int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, |
+ Address byte_offset2, |
+ size_t byte_length, |
+ Isolate* isolate) { |
+ unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
+ isolate->regexp_macro_assembler_canonicalize(); |
+ // This function is not allowed to cause a garbage collection. |
+ // A GC might move the calling generated code and invalidate the |
+ // return address on the stack. |
+ DCHECK(byte_length % 2 == 0); |
+ uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
+ uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
+ size_t length = byte_length >> 1; |
+ |
+#ifdef V8_I18N_SUPPORT |
+ if (isolate == nullptr) { |
+ for (size_t i = 0; i < length; i++) { |
+ uc32 c1 = substring1[i]; |
+ uc32 c2 = substring2[i]; |
+ if (unibrow::Utf16::IsLeadSurrogate(c1)) { |
+ // Non-BMP characters do not have case-equivalents in the BMP. |
+ // Both have to be non-BMP for them to be able to match. |
+ if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; |
+ if (i + 1 < length) { |
+ uc16 c1t = substring1[i + 1]; |
+ uc16 c2t = substring2[i + 1]; |
+ if (unibrow::Utf16::IsTrailSurrogate(c1t) && |
+ unibrow::Utf16::IsTrailSurrogate(c2t)) { |
+ c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); |
+ c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); |
+ i++; |
+ } |
+ } |
+ } |
+ c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); |
+ c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); |
+ if (c1 != c2) return 0; |
+ } |
+ return 1; |
+ } |
+#endif // V8_I18N_SUPPORT |
+ DCHECK_NOT_NULL(isolate); |
+ for (size_t i = 0; i < length; i++) { |
+ unibrow::uchar c1 = substring1[i]; |
+ unibrow::uchar c2 = substring2[i]; |
+ if (c1 != c2) { |
+ unibrow::uchar s1[1] = {c1}; |
+ canonicalize->get(c1, '\0', s1); |
+ if (s1[0] != c2) { |
+ unibrow::uchar s2[1] = {c2}; |
+ canonicalize->get(c2, '\0', s2); |
+ if (s1[0] != s2[0]) { |
+ return 0; |
+ } |
+ } |
+ } |
+ } |
+ return 1; |
+} |
+ |
+ |
#ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
@@ -245,40 +310,6 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = { |
}; |
-int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( |
- Address byte_offset1, |
- Address byte_offset2, |
- size_t byte_length, |
- Isolate* isolate) { |
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
- isolate->regexp_macro_assembler_canonicalize(); |
- // This function is not allowed to cause a garbage collection. |
- // A GC might move the calling generated code and invalidate the |
- // return address on the stack. |
- DCHECK(byte_length % 2 == 0); |
- uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
- uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
- size_t length = byte_length >> 1; |
- |
- for (size_t i = 0; i < length; i++) { |
- unibrow::uchar c1 = substring1[i]; |
- unibrow::uchar c2 = substring2[i]; |
- if (c1 != c2) { |
- unibrow::uchar s1[1] = { c1 }; |
- canonicalize->get(c1, '\0', s1); |
- if (s1[0] != c2) { |
- unibrow::uchar s2[1] = { c2 }; |
- canonicalize->get(c2, '\0', s2); |
- if (s1[0] != s2[0]) { |
- return 0; |
- } |
- } |
- } |
- } |
- return 1; |
-} |
- |
- |
Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
Address* stack_base, |
Isolate* isolate) { |