src/jsregexp.cc - Issue 361033: Fix bug 486, Cyrillic character ranges in case independent regexps....

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/jsregexp.cc

Issue 361033: Fix bug 486, Cyrillic character ranges in case independent regexps.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 11 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/jsregexp.cc

===================================================================

--- src/jsregexp.cc (revision 3220)

+++ src/jsregexp.cc (working copy)

@@ -2440,8 +2440,8 @@

RegExpCharacterClass* cc = elm.data.u_char_class;

ZoneList<CharacterRange>* ranges = cc->ranges();

int range_count = ranges->length();

- for (int i = 0; i < range_count; i++) {

- ranges->at(i).AddCaseEquivalents(ranges);

+ for (int j = 0; j < range_count; j++) {

+ ranges->at(j).AddCaseEquivalents(ranges);

}

@@ -3961,7 +3961,7 @@

} else {

start = pos;

}

- // Then we add the ranges on at a time, incrementing the current

+ // Then we add the ranges one at a time, incrementing the current

// position to be after the last block each time. The position

// always points to the start of a block.

while (pos < to()) {

@@ -3987,8 +3987,45 @@

}

start = pos = block_end + 1;

}

- } else {

- // TODO(plesner) when we've fixed the 2^11 bug in unibrow.

+ } else if (from() > 0 || to() < String::kMaxUC16CharCode) {

+ // Unibrow ranges don't work for high characters due to the "2^11 bug".

+ // Therefore we do something dumber for these ranges. We don't bother

+ // if the range is 0-max (as encountered at the start of an unanchored

+ // regexp).

+ ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);

+ int bottom = from();

+ int top = to();

+ for (int i = bottom; i <= top; i++) {

+ int length = uncanonicalize.get(i, '\0', chars);

+ for (int j = 0; j < length; j++) {

+ uc32 chr = chars[j];

+ if (chr != i && chr < bottom || chr > top) {

+ characters->Add(chr);

+ }

+ if (characters->length() > 0) {

+ int new_from = characters->at(0);

+ int new_to = new_from;

+ for (int i = 1; i < characters->length(); i++) {

+ int chr = characters->at(i);

+ if (chr == new_to + 1) {

+ new_to++;

+ } else {

+ if (new_to == new_from) {

+ ranges->Add(CharacterRange::Singleton(new_from));

+ } else {

+ ranges->Add(CharacterRange(new_from, new_to));

+ }

+ new_from = new_to = chr;

+ }

+ if (new_to == new_from) {

+ ranges->Add(CharacterRange::Singleton(new_from));

+ } else {

+ ranges->Add(CharacterRange(new_from, new_to));

+ }

}

« no previous file with comments | « no previous file | test/mjsunit/cyrillic.js » ('j') | test/mjsunit/regress/regress-486.js » ('J')