| Index: src/jsregexp.cc
|
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc
|
| index 49a2998fed5c7d7222a36c6b9da8ad553921ea90..e284e8cb15f3233eee2533bc9efda8b6735bd1e3 100644
|
| --- a/src/jsregexp.cc
|
| +++ b/src/jsregexp.cc
|
| @@ -4828,34 +4828,6 @@
|
| }
|
|
|
|
|
| -static unibrow::uchar Canonical(
|
| - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
| - unibrow::uchar c) {
|
| - unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
|
| - int length = canonicalize->get(c, '\0', chars);
|
| - DCHECK_LE(length, 1);
|
| - unibrow::uchar canonical = c;
|
| - if (length == 1) canonical = chars[0];
|
| - return canonical;
|
| -}
|
| -
|
| -
|
| -int CompareFirstCharCaseIndependent(
|
| - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
| - RegExpTree* const* a, RegExpTree* const* b) {
|
| - RegExpAtom* atom1 = (*a)->AsAtom();
|
| - RegExpAtom* atom2 = (*b)->AsAtom();
|
| - unibrow::uchar character1 = atom1->data().at(0);
|
| - unibrow::uchar character2 = atom2->data().at(0);
|
| - if (character1 == character2) return 0;
|
| - if (character1 >= 'a' || character2 >= 'a') {
|
| - character1 = Canonical(canonicalize, character1);
|
| - character2 = Canonical(canonicalize, character2);
|
| - }
|
| - return static_cast<int>(character1) - static_cast<int>(character2);
|
| -}
|
| -
|
| -
|
| // We can stable sort runs of atoms, since the order does not matter if they
|
| // start with different characters.
|
| // Returns true if any consecutive atoms were found.
|
| @@ -4879,23 +4851,15 @@
|
| i++;
|
| }
|
| // Sort atoms to get ones with common prefixes together.
|
| - // This step is more tricky if we are in a case-independent regexp,
|
| + // This step is not valid if we are in a case-independent regexp,
|
| // because it would change /is|I/ to /I|is/, and order matters when
|
| // the regexp parts don't match only disjoint starting points. To fix
|
| - // this we have a version of CompareFirstChar that uses case-
|
| + // this would need a version of CompareFirstChar that uses case-
|
| // independent character classes for comparison.
|
| - DCHECK_LT(first_atom, alternatives->length());
|
| - DCHECK_LE(i, alternatives->length());
|
| - DCHECK_LE(first_atom, i);
|
| - if (compiler->ignore_case()) {
|
| - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
| - compiler->isolate()->regexp_macro_assembler_canonicalize();
|
| - auto compare_closure =
|
| - [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {
|
| - return CompareFirstCharCaseIndependent(canonicalize, a, b);
|
| - };
|
| - alternatives->StableSort(compare_closure, first_atom, i - first_atom);
|
| - } else {
|
| + if (!compiler->ignore_case()) {
|
| + DCHECK_LT(first_atom, alternatives->length());
|
| + DCHECK_LE(i, alternatives->length());
|
| + DCHECK_LE(first_atom, i);
|
| alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
|
| }
|
| if (i - first_atom > 1) found_consecutive_atoms = true;
|
| @@ -4920,7 +4884,7 @@
|
| continue;
|
| }
|
| RegExpAtom* atom = alternative->AsAtom();
|
| - unibrow::uchar common_prefix = atom->data().at(0);
|
| + uc16 common_prefix = atom->data().at(0);
|
| int first_with_prefix = i;
|
| int prefix_length = atom->length();
|
| i++;
|
| @@ -4928,15 +4892,7 @@
|
| alternative = alternatives->at(i);
|
| if (!alternative->IsAtom()) break;
|
| atom = alternative->AsAtom();
|
| - unibrow::uchar new_prefix = atom->data().at(0);
|
| - if (new_prefix != common_prefix) {
|
| - if (!compiler->ignore_case()) break;
|
| - unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
| - compiler->isolate()->regexp_macro_assembler_canonicalize();
|
| - new_prefix = Canonical(canonicalize, new_prefix);
|
| - common_prefix = Canonical(canonicalize, common_prefix);
|
| - if (new_prefix != common_prefix) break;
|
| - }
|
| + if (atom->data().at(0) != common_prefix) break;
|
| prefix_length = Min(prefix_length, atom->length());
|
| i++;
|
| }
|
| @@ -4952,10 +4908,7 @@
|
| RegExpAtom* old_atom =
|
| alternatives->at(j + first_with_prefix)->AsAtom();
|
| for (int k = 1; k < prefix_length; k++) {
|
| - if (atom->data().at(k) != old_atom->data().at(k)) {
|
| - prefix_length = k;
|
| - break;
|
| - }
|
| + if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;
|
| }
|
| }
|
| RegExpAtom* prefix =
|
|
|