Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(899)

Unified Diff: src/jsregexp.cc

Issue 361033: Fix bug 486, Cyrillic character ranges in case independent regexps.... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | test/mjsunit/cyrillic.js » ('j') | test/mjsunit/regress/regress-486.js » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/jsregexp.cc
===================================================================
--- src/jsregexp.cc (revision 3220)
+++ src/jsregexp.cc (working copy)
@@ -2440,8 +2440,8 @@
RegExpCharacterClass* cc = elm.data.u_char_class;
ZoneList<CharacterRange>* ranges = cc->ranges();
int range_count = ranges->length();
- for (int i = 0; i < range_count; i++) {
- ranges->at(i).AddCaseEquivalents(ranges);
+ for (int j = 0; j < range_count; j++) {
+ ranges->at(j).AddCaseEquivalents(ranges);
}
}
}
@@ -3961,7 +3961,7 @@
} else {
start = pos;
}
- // Then we add the ranges on at a time, incrementing the current
+ // Then we add the ranges one at a time, incrementing the current
// position to be after the last block each time. The position
// always points to the start of a block.
while (pos < to()) {
@@ -3987,8 +3987,45 @@
}
start = pos = block_end + 1;
}
- } else {
- // TODO(plesner) when we've fixed the 2^11 bug in unibrow.
+ } else if (from() > 0 || to() < String::kMaxUC16CharCode) {
+ // Unibrow ranges don't work for high characters due to the "2^11 bug".
+ // Therefore we do something dumber for these ranges. We don't bother
+ // if the range is 0-max (as encountered at the start of an unanchored
+ // regexp).
+ ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
+ int bottom = from();
+ int top = to();
+ for (int i = bottom; i <= top; i++) {
+ int length = uncanonicalize.get(i, '\0', chars);
+ for (int j = 0; j < length; j++) {
+ uc32 chr = chars[j];
+ if (chr != i && chr < bottom || chr > top) {
+ characters->Add(chr);
+ }
+ }
+ }
+ if (characters->length() > 0) {
+ int new_from = characters->at(0);
+ int new_to = new_from;
+ for (int i = 1; i < characters->length(); i++) {
+ int chr = characters->at(i);
+ if (chr == new_to + 1) {
+ new_to++;
+ } else {
+ if (new_to == new_from) {
+ ranges->Add(CharacterRange::Singleton(new_from));
+ } else {
+ ranges->Add(CharacterRange(new_from, new_to));
+ }
+ new_from = new_to = chr;
+ }
+ }
+ if (new_to == new_from) {
+ ranges->Add(CharacterRange::Singleton(new_from));
+ } else {
+ ranges->Add(CharacterRange(new_from, new_to));
+ }
+ }
}
}
« no previous file with comments | « no previous file | test/mjsunit/cyrillic.js » ('j') | test/mjsunit/regress/regress-486.js » ('J')

Powered by Google App Engine
This is Rietveld 408576698