| Index: src/regexp/jsregexp.cc
|
| diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc
|
| index 1c463f648a2b3be9d17300bd517ed1dcb84e386d..7b510b072b0bba0d6e66726397213c52ead70a9f 100644
|
| --- a/src/regexp/jsregexp.cc
|
| +++ b/src/regexp/jsregexp.cc
|
| @@ -5904,58 +5904,58 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
| }
|
| unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| if (top == bottom) {
|
| - // If this is a singleton we just expand the one character.
|
| - int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
|
| - for (int i = 0; i < length; i++) {
|
| - uc32 chr = chars[i];
|
| - if (chr != bottom) {
|
| - ranges->Add(CharacterRange::Singleton(chars[i]), zone);
|
| - }
|
| - }
|
| - } else {
|
| - // If this is a range we expand the characters block by block,
|
| - // expanding contiguous subranges (blocks) one at a time.
|
| - // The approach is as follows. For a given start character we
|
| - // look up the remainder of the block that contains it (represented
|
| - // by the end point), for instance we find 'z' if the character
|
| - // is 'c'. A block is characterized by the property
|
| - // that all characters uncanonicalize in the same way, except that
|
| - // each entry in the result is incremented by the distance from the first
|
| - // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
|
| - // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
|
| - // Once we've found the end point we look up its uncanonicalization
|
| - // and produce a range for each element. For instance for [c-f]
|
| - // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only
|
| - // add a range if it is not already contained in the input, so [c-f]
|
| - // will be skipped but [C-F] will be added. If this range is not
|
| - // completely contained in a block we do this for all the blocks
|
| - // covered by the range (handling characters that is not in a block
|
| - // as a "singleton block").
|
| - unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - int pos = bottom;
|
| - while (pos <= top) {
|
| - int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
|
| - uc32 block_end;
|
| - if (length == 0) {
|
| - block_end = pos;
|
| - } else {
|
| - DCHECK_EQ(1, length);
|
| - block_end = range[0];
|
| - }
|
| - int end = (block_end > top) ? top : block_end;
|
| - length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
|
| + // If this is a singleton we just expand the one character.
|
| + int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
|
| for (int i = 0; i < length; i++) {
|
| - uc32 c = range[i];
|
| - uc32 range_from = c - (block_end - pos);
|
| - uc32 range_to = c - (block_end - end);
|
| - if (!(bottom <= range_from && range_to <= top)) {
|
| - ranges->Add(CharacterRange(range_from, range_to), zone);
|
| + uc32 chr = chars[i];
|
| + if (chr != bottom) {
|
| + ranges->Add(CharacterRange::Singleton(chars[i]), zone);
|
| }
|
| }
|
| - pos = end + 1;
|
| + } else {
|
| + // If this is a range we expand the characters block by block, expanding
|
| + // contiguous subranges (blocks) one at a time. The approach is as
|
| + // follows. For a given start character we look up the remainder of the
|
| + // block that contains it (represented by the end point), for instance we
|
| + // find 'z' if the character is 'c'. A block is characterized by the
|
| + // property that all characters uncanonicalize in the same way, except
|
| + // that each entry in the result is incremented by the distance from the
|
| + // first element. So a-z is a block because 'a' uncanonicalizes to ['a',
|
| + // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. Once
|
| + // we've found the end point we look up its uncanonicalization and
|
| + // produce a range for each element. For instance for [c-f] we look up
|
| + // ['z', 'Z'] and produce [c-f] and [C-F]. We then only add a range if
|
| + // it is not already contained in the input, so [c-f] will be skipped but
|
| + // [C-F] will be added. If this range is not completely contained in a
|
| + // block we do this for all the blocks covered by the range (handling
|
| + // characters that is not in a block as a "singleton block").
|
| + unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| + int pos = bottom;
|
| + while (pos <= top) {
|
| + int length =
|
| + isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
|
| + uc32 block_end;
|
| + if (length == 0) {
|
| + block_end = pos;
|
| + } else {
|
| + DCHECK_EQ(1, length);
|
| + block_end = equivalents[0];
|
| + }
|
| + int end = (block_end > top) ? top : block_end;
|
| + length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
|
| + equivalents);
|
| + for (int i = 0; i < length; i++) {
|
| + uc32 c = equivalents[i];
|
| + uc32 range_from = c - (block_end - pos);
|
| + uc32 range_to = c - (block_end - end);
|
| + if (!(bottom <= range_from && range_to <= top)) {
|
| + ranges->Add(CharacterRange(range_from, range_to), zone);
|
| + }
|
| + }
|
| + pos = end + 1;
|
| + }
|
| }
|
| }
|
| - }
|
| }
|
|
|
|
|
|
|