Index: src/jsregexp.cc |
=================================================================== |
--- src/jsregexp.cc (revision 3220) |
+++ src/jsregexp.cc (working copy) |
@@ -2440,8 +2440,8 @@ |
RegExpCharacterClass* cc = elm.data.u_char_class; |
ZoneList<CharacterRange>* ranges = cc->ranges(); |
int range_count = ranges->length(); |
- for (int i = 0; i < range_count; i++) { |
- ranges->at(i).AddCaseEquivalents(ranges); |
+ for (int j = 0; j < range_count; j++) { |
+ ranges->at(j).AddCaseEquivalents(ranges); |
} |
} |
} |
@@ -3961,7 +3961,7 @@ |
} else { |
start = pos; |
} |
- // Then we add the ranges on at a time, incrementing the current |
+ // Then we add the ranges one at a time, incrementing the current |
// position to be after the last block each time. The position |
// always points to the start of a block. |
while (pos < to()) { |
@@ -3987,8 +3987,45 @@ |
} |
start = pos = block_end + 1; |
} |
- } else { |
- // TODO(plesner) when we've fixed the 2^11 bug in unibrow. |
+ } else if (from() > 0 || to() < String::kMaxUC16CharCode) { |
+ // Unibrow ranges don't work for high characters due to the "2^11 bug". |
+ // Therefore we do something dumber for these ranges. We don't bother |
+ // if the range is 0-max (as encountered at the start of an unanchored |
+ // regexp). |
+ ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100); |
+ int bottom = from(); |
+ int top = to(); |
+ for (int i = bottom; i <= top; i++) { |
+ int length = uncanonicalize.get(i, '\0', chars); |
+ for (int j = 0; j < length; j++) { |
+ uc32 chr = chars[j]; |
+ if (chr != i && chr < bottom || chr > top) { |
+ characters->Add(chr); |
+ } |
+ } |
+ } |
+ if (characters->length() > 0) { |
+ int new_from = characters->at(0); |
+ int new_to = new_from; |
+ for (int i = 1; i < characters->length(); i++) { |
+ int chr = characters->at(i); |
+ if (chr == new_to + 1) { |
+ new_to++; |
+ } else { |
+ if (new_to == new_from) { |
+ ranges->Add(CharacterRange::Singleton(new_from)); |
+ } else { |
+ ranges->Add(CharacterRange(new_from, new_to)); |
+ } |
+ new_from = new_to = chr; |
+ } |
+ } |
+ if (new_to == new_from) { |
+ ranges->Add(CharacterRange::Singleton(new_from)); |
+ } else { |
+ ranges->Add(CharacterRange(new_from, new_to)); |
+ } |
+ } |
} |
} |