| Index: src/regexp/jsregexp.cc
|
| diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
|
| index 3559bcd111a4c19eb5755dd264d6a4a8cf39a88a..2229dd8e175cc9483741ad75bff773a0c0a7797f 100644
|
| --- a/src/regexp/jsregexp.cc
|
| +++ b/src/regexp/jsregexp.cc
|
| @@ -25,6 +25,11 @@
|
| #include "src/string-search.h"
|
| #include "src/unicode-decoder.h"
|
|
|
| +#ifdef V8_I18N_SUPPORT
|
| +#include "unicode/uset.h"
|
| +#include "unicode/utypes.h"
|
| +#endif // V8_I18N_SUPPORT
|
| +
|
| #ifndef V8_INTERPRETED_REGEXP
|
| #if V8_TARGET_ARCH_IA32
|
| #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
|
| @@ -4390,8 +4395,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|
|
| DCHECK_EQ(start_reg_ + 1, end_reg_);
|
| if (compiler->ignore_case()) {
|
| - assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
|
| - trace->backtrack());
|
| + assembler->CheckNotBackReferenceIgnoreCase(
|
| + start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
|
| } else {
|
| assembler->CheckNotBackReference(start_reg_, read_backward(),
|
| trace->backtrack());
|
| @@ -4861,21 +4866,6 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
|
| }
|
|
|
|
|
| -bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
|
| - ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
| - CharacterRange::Canonicalize(ranges);
|
| - for (int i = ranges->length() - 1; i >= 0; i--) {
|
| - uc32 from = ranges->at(i).from();
|
| - uc32 to = ranges->at(i).to();
|
| - // Check for non-BMP characters.
|
| - if (to >= kNonBmpStart) return true;
|
| - // Check for lone surrogates.
|
| - if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -
|
| UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
|
| ZoneList<CharacterRange>* base)
|
| : zone_(zone),
|
| @@ -5086,11 +5076,49 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
|
| }
|
|
|
|
|
| +void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
|
| + ZoneList<CharacterRange>* ranges) {
|
| +#ifdef V8_I18N_SUPPORT
|
| + // Use ICU to compute the case fold closure over the ranges.
|
| + DCHECK(compiler->unicode());
|
| + DCHECK(compiler->ignore_case());
|
| + USet* set = uset_openEmpty();
|
| + for (int i = 0; i < ranges->length(); i++) {
|
| + uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
|
| + }
|
| + ranges->Clear();
|
| + uset_closeOver(set, USET_CASE_INSENSITIVE);
|
| + // Full case mapping map single characters to multiple characters.
|
| + // Those are represented as strings in the set. Remove them so that
|
| + // we end up with only simple and common case mappings.
|
| + uset_removeAllStrings(set);
|
| + int item_count = uset_getItemCount(set);
|
| + int item_result = 0;
|
| + UErrorCode ec = U_ZERO_ERROR;
|
| + Zone* zone = compiler->zone();
|
| + for (int i = 0; i < item_count; i++) {
|
| + uc32 start = 0;
|
| + uc32 end = 0;
|
| + item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
|
| + ranges->Add(CharacterRange::Range(start, end), zone);
|
| + }
|
| + // No errors and everything we collected have been ranges.
|
| + DCHECK_EQ(U_ZERO_ERROR, ec);
|
| + DCHECK_EQ(0, item_result);
|
| + uset_close(set);
|
| + CharacterRange::Canonicalize(ranges);
|
| +#endif // V8_I18N_SUPPORT
|
| +}
|
| +
|
| +
|
| RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
| RegExpNode* on_success) {
|
| set_.Canonicalize();
|
| Zone* zone = compiler->zone();
|
| ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
| + if (compiler->unicode() && compiler->ignore_case()) {
|
| + AddUnicodeCaseEquivalents(compiler, ranges);
|
| + }
|
| if (compiler->unicode() && !compiler->one_byte()) {
|
| if (is_negated()) {
|
| ZoneList<CharacterRange>* negated =
|
|
|