Chromium Code Reviews| Index: src/jsregexp.cc |
| diff --git a/src/jsregexp.cc b/src/jsregexp.cc |
| index e284e8cb15f3233eee2533bc9efda8b6735bd1e3..d1a979e5ffeaef86b4f7895664526e1a513dc007 100644 |
| --- a/src/jsregexp.cc |
| +++ b/src/jsregexp.cc |
| @@ -4828,6 +4828,34 @@ int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { |
| } |
| +static unibrow::uchar Canonical( |
| + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize, |
| + unibrow::uchar c) { |
| + unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth]; |
| + int length = canonicalize->get(c, '\0', chars); |
| + DCHECK_LE(length, 1); |
| + unibrow::uchar canonical = c; |
| + if (length == 1) canonical = Min(c, chars[0]); |
| + return canonical; |
| +} |
| + |
| + |
| +int CompareFirstCharCaseIndependent( |
| + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize, |
| + RegExpTree* const* a, RegExpTree* const* b) { |
| + RegExpAtom* atom1 = (*a)->AsAtom(); |
| + RegExpAtom* atom2 = (*b)->AsAtom(); |
| + unibrow::uchar character1 = atom1->data().at(0); |
| + unibrow::uchar character2 = atom2->data().at(0); |
| + if (character1 == character2) return 0; |
| + if (character1 >= 'a' || character2 >= 'a') { |
| + character1 = Canonical(canonicalize, character1); |
| + character2 = Canonical(canonicalize, character2); |
| + } |
| + return static_cast<int>(character1) - static_cast<int>(character2); |
| +} |
| + |
| + |
| // We can stable sort runs of atoms, since the order does not matter if they |
| // start with different characters. |
| // Returns true if any consecutive atoms were found. |
| @@ -4856,10 +4884,18 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { |
| // the regexp parts don't match only disjoint starting points. To fix |
| // this would need a version of CompareFirstChar that uses case- |
| // independent character classes for comparison. |
| - if (!compiler->ignore_case()) { |
| - DCHECK_LT(first_atom, alternatives->length()); |
| - DCHECK_LE(i, alternatives->length()); |
| - DCHECK_LE(first_atom, i); |
| + DCHECK_LT(first_atom, alternatives->length()); |
| + DCHECK_LE(i, alternatives->length()); |
| + DCHECK_LE(first_atom, i); |
| + if (compiler->ignore_case()) { |
| + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| + compiler->isolate()->regexp_macro_assembler_canonicalize(); |
| + auto compare_closure = |
| + [canonicalize](RegExpTree* const* a, RegExpTree* const* b) { |
| + return CompareFirstCharCaseIndependent(canonicalize, a, b); |
| + }; |
| + alternatives->StableSort(compare_closure, first_atom, i - first_atom); |
|
Erik Corry Chromium.org
2015/06/16 20:17:28
Closures have a different type to function pointer
|
| + } else { |
| alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom); |
| } |
| if (i - first_atom > 1) found_consecutive_atoms = true; |
| @@ -4884,7 +4920,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { |
| continue; |
| } |
| RegExpAtom* atom = alternative->AsAtom(); |
| - uc16 common_prefix = atom->data().at(0); |
| + unibrow::uchar common_prefix = atom->data().at(0); |
| int first_with_prefix = i; |
| int prefix_length = atom->length(); |
| i++; |
| @@ -4892,7 +4928,15 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { |
| alternative = alternatives->at(i); |
| if (!alternative->IsAtom()) break; |
| atom = alternative->AsAtom(); |
| - if (atom->data().at(0) != common_prefix) break; |
| + unibrow::uchar new_prefix = atom->data().at(0); |
| + if (new_prefix != common_prefix) { |
| + if (!compiler->ignore_case()) break; |
| + unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = |
| + compiler->isolate()->regexp_macro_assembler_canonicalize(); |
| + new_prefix = Canonical(canonicalize, new_prefix); |
| + common_prefix = Canonical(canonicalize, common_prefix); |
| + if (new_prefix != common_prefix) break; |
| + } |
| prefix_length = Min(prefix_length, atom->length()); |
| i++; |
| } |
| @@ -4908,7 +4952,10 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { |
| RegExpAtom* old_atom = |
| alternatives->at(j + first_with_prefix)->AsAtom(); |
| for (int k = 1; k < prefix_length; k++) { |
| - if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k; |
| + if (atom->data().at(k) != old_atom->data().at(k)) { |
| + prefix_length = k; |
| + break; |
| + } |
| } |
| } |
| RegExpAtom* prefix = |