src/jsregexp.cc - Issue 1204013003: Revert of Extend big-disjunction optimization to case-independent regexps

Unified Diff: src/jsregexp.cc

Issue 1204013003: Revert of Extend big-disjunction optimization to case-independent regexps (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index 49a2998fed5c7d7222a36c6b9da8ad553921ea90..e284e8cb15f3233eee2533bc9efda8b6735bd1e3 100644

--- a/src/jsregexp.cc

+++ b/src/jsregexp.cc

@@ -4828,34 +4828,6 @@

}

-static unibrow::uchar Canonical(

- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,

- unibrow::uchar c) {

- unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];

- int length = canonicalize->get(c, '\0', chars);

- DCHECK_LE(length, 1);

- unibrow::uchar canonical = c;

- if (length == 1) canonical = chars[0];

- return canonical;

-int CompareFirstCharCaseIndependent(

- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,

- RegExpTree* const* a, RegExpTree* const* b) {

- RegExpAtom* atom1 = (*a)->AsAtom();

- RegExpAtom* atom2 = (*b)->AsAtom();

- unibrow::uchar character1 = atom1->data().at(0);

- unibrow::uchar character2 = atom2->data().at(0);

- if (character1 == character2) return 0;

- if (character1 >= 'a' || character2 >= 'a') {

- character1 = Canonical(canonicalize, character1);

- character2 = Canonical(canonicalize, character2);

- }

- return static_cast<int>(character1) - static_cast<int>(character2);

// We can stable sort runs of atoms, since the order does not matter if they

// start with different characters.

// Returns true if any consecutive atoms were found.

@@ -4879,23 +4851,15 @@

i++;

}

// Sort atoms to get ones with common prefixes together.

- // This step is more tricky if we are in a case-independent regexp,

+ // This step is not valid if we are in a case-independent regexp,

// because it would change /is|I/ to /I|is/, and order matters when

// the regexp parts don't match only disjoint starting points. To fix

- // this we have a version of CompareFirstChar that uses case-

+ // this would need a version of CompareFirstChar that uses case-

// independent character classes for comparison.

- DCHECK_LT(first_atom, alternatives->length());

- DCHECK_LE(i, alternatives->length());

- DCHECK_LE(first_atom, i);

- if (compiler->ignore_case()) {

- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =

- compiler->isolate()->regexp_macro_assembler_canonicalize();

- auto compare_closure =

- [canonicalize](RegExpTree* const* a, RegExpTree* const* b) {

- return CompareFirstCharCaseIndependent(canonicalize, a, b);

- };

- alternatives->StableSort(compare_closure, first_atom, i - first_atom);

- } else {

+ if (!compiler->ignore_case()) {

+ DCHECK_LT(first_atom, alternatives->length());

+ DCHECK_LE(i, alternatives->length());

+ DCHECK_LE(first_atom, i);

alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);

}

if (i - first_atom > 1) found_consecutive_atoms = true;

@@ -4920,7 +4884,7 @@

continue;

}

RegExpAtom* atom = alternative->AsAtom();

- unibrow::uchar common_prefix = atom->data().at(0);

+ uc16 common_prefix = atom->data().at(0);

int first_with_prefix = i;

int prefix_length = atom->length();

i++;

@@ -4928,15 +4892,7 @@

alternative = alternatives->at(i);

if (!alternative->IsAtom()) break;

atom = alternative->AsAtom();

- unibrow::uchar new_prefix = atom->data().at(0);

- if (new_prefix != common_prefix) {

- if (!compiler->ignore_case()) break;

- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =

- compiler->isolate()->regexp_macro_assembler_canonicalize();

- new_prefix = Canonical(canonicalize, new_prefix);

- common_prefix = Canonical(canonicalize, common_prefix);

- if (new_prefix != common_prefix) break;

- }

+ if (atom->data().at(0) != common_prefix) break;

prefix_length = Min(prefix_length, atom->length());

i++;

}

@@ -4952,10 +4908,7 @@

RegExpAtom* old_atom =

alternatives->at(j + first_with_prefix)->AsAtom();

for (int k = 1; k < prefix_length; k++) {

- if (atom->data().at(k) != old_atom->data().at(k)) {

- prefix_length = k;

- break;

- }

+ if (atom->data().at(k) != old_atom->data().at(k)) prefix_length = k;

}

RegExpAtom* prefix =

« no previous file with comments | « src/heap-snapshot-generator.cc ('k') | src/list.h » ('j') | no next file with comments »