Chromium Code Reviews| Index: src/regexp/regexp-macro-assembler.cc |
| diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc |
| index caf8b51fe548d6a9e68365654110473ddbdea9b5..5d4158d2f29467b7d8a72a2bea7cdcd55ae7cfa9 100644 |
| --- a/src/regexp/regexp-macro-assembler.cc |
| +++ b/src/regexp/regexp-macro-assembler.cc |
| @@ -9,6 +9,10 @@ |
| #include "src/regexp/regexp-stack.h" |
| #include "src/simulator.h" |
| +#ifdef V8_I18N_SUPPORT |
| +#include "unicode/uchar.h" |
| +#endif // V8_I18N_SUPPORT |
| + |
| namespace v8 { |
| namespace internal { |
| @@ -23,6 +27,65 @@ RegExpMacroAssembler::~RegExpMacroAssembler() { |
| } |
| +int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1, |
| + Address byte_offset2, |
| + size_t byte_length, |
| + Isolate* isolate) { |
| + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| + isolate->regexp_macro_assembler_canonicalize(); |
| + // This function is not allowed to cause a garbage collection. |
| + // A GC might move the calling generated code and invalidate the |
| + // return address on the stack. |
| + DCHECK(byte_length % 2 == 0); |
| + uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
| + uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
| + size_t length = byte_length >> 1; |
| + |
| +#ifdef V8_I18N_SUPPORT |
| + if (isolate == nullptr) { |
| + for (size_t i = 0; i < length; i++) { |
| + uc32 c1 = substring1[i]; |
| + uc32 c2 = substring2[i]; |
| + if (unibrow::Utf16::IsLeadSurrogate(c1)) { |
|
erikcorry
2016/01/25 10:26:37
This looks incomplete wrt unpaired surrogates at t
Yang
2016/01/25 11:46:37
I think this is correct. Unpaired lead surrogate a
erikcorry
2016/01/25 11:57:47
I guess we are here making use of the fact that th
Yang
2016/01/25 12:09:02
I added a comment about the fact that we rely on N
|
| + if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0; |
| + if (i + 1 < length) { |
| + uc16 c1t = substring1[i + 1]; |
| + uc16 c2t = substring2[i + 1]; |
| + if (unibrow::Utf16::IsTrailSurrogate(c1t) && |
| + unibrow::Utf16::IsTrailSurrogate(c2t)) { |
| + c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t); |
| + c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t); |
| + i++; |
| + } |
| + } |
| + } |
| + c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT); |
| + c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT); |
| + if (c1 != c2) return 0; |
| + } |
| + return 1; |
| + } |
| +#endif // V8_I18N_SUPPORT |
|
erikcorry
2016/01/25 10:26:37
Should this be a #else ?
Yang
2016/01/25 11:46:37
No. If we have ICU, we decide on whether to take t
|
| + DCHECK_NOT_NULL(isolate); |
| + for (size_t i = 0; i < length; i++) { |
| + unibrow::uchar c1 = substring1[i]; |
| + unibrow::uchar c2 = substring2[i]; |
| + if (c1 != c2) { |
| + unibrow::uchar s1[1] = { c1 }; |
| + canonicalize->get(c1, '\0', s1); |
| + if (s1[0] != c2) { |
| + unibrow::uchar s2[1] = { c2 }; |
| + canonicalize->get(c2, '\0', s2); |
| + if (s1[0] != s2[0]) { |
| + return 0; |
| + } |
| + } |
| + } |
| + } |
| + return 1; |
| +} |
| + |
| + |
| #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. |
| NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate, |
| @@ -245,40 +308,6 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = { |
| }; |
| -int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16( |
| - Address byte_offset1, |
| - Address byte_offset2, |
| - size_t byte_length, |
| - Isolate* isolate) { |
| - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| - isolate->regexp_macro_assembler_canonicalize(); |
| - // This function is not allowed to cause a garbage collection. |
| - // A GC might move the calling generated code and invalidate the |
| - // return address on the stack. |
| - DCHECK(byte_length % 2 == 0); |
| - uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1); |
| - uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2); |
| - size_t length = byte_length >> 1; |
| - |
| - for (size_t i = 0; i < length; i++) { |
| - unibrow::uchar c1 = substring1[i]; |
| - unibrow::uchar c2 = substring2[i]; |
| - if (c1 != c2) { |
| - unibrow::uchar s1[1] = { c1 }; |
| - canonicalize->get(c1, '\0', s1); |
| - if (s1[0] != c2) { |
| - unibrow::uchar s2[1] = { c2 }; |
| - canonicalize->get(c2, '\0', s2); |
| - if (s1[0] != s2[0]) { |
| - return 0; |
| - } |
| - } |
| - } |
| - } |
| - return 1; |
| -} |
| - |
| - |
| Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer, |
| Address* stack_base, |
| Isolate* isolate) { |