| Index: test/cctest/test-regexp.cc
|
| diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc
|
| index 00abab47f8b34014d741963b435d043512669941..186350be3d0bf9d7c3dcefed4c89932328008c0a 100644
|
| --- a/test/cctest/test-regexp.cc
|
| +++ b/test/cctest/test-regexp.cc
|
| @@ -1399,7 +1399,8 @@ TEST(LatinCanonicalize) {
|
| for (uc32 c = 128; c < (1 << 21); c++)
|
| CHECK_GE(canonicalize(c), 128);
|
| unibrow::Mapping<unibrow::ToUppercase> to_upper;
|
| - for (uc32 c = 0; c < (1 << 21); c++) {
|
| + // Canonicalization is only defined for the Basic Multilingual Plane.
|
| + for (uc32 c = 0; c < (1 << 16); c++) {
|
| unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
|
| int length = to_upper.get(c, '\0', upper);
|
| if (length == 0) {
|
| @@ -1414,7 +1415,7 @@ TEST(LatinCanonicalize) {
|
| }
|
|
|
|
|
| -static uc32 CanonRange(uc32 c) {
|
| +static uc32 CanonRangeEnd(uc32 c) {
|
| unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
|
| int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
|
| if (count == 0) {
|
| @@ -1427,47 +1428,29 @@ static uc32 CanonRange(uc32 c) {
|
|
|
|
|
| TEST(RangeCanonicalization) {
|
| - CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
|
| // Check that we arrive at the same result when using the basic
|
| // range canonicalization primitives as when using immediate
|
| // canonicalization.
|
| unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
|
| - for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
|
| - int range = CanonRange(i);
|
| - int indirect_length = 0;
|
| - unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - if ((range & CharacterRange::kStartMarker) == 0) {
|
| - indirect_length = un_canonicalize.get(i - range, '\0', indirect);
|
| - for (int i = 0; i < indirect_length; i++)
|
| - indirect[i] += range;
|
| - } else {
|
| - indirect_length = un_canonicalize.get(i, '\0', indirect);
|
| - }
|
| - unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - int direct_length = un_canonicalize.get(i, '\0', direct);
|
| - CHECK_EQ(direct_length, indirect_length);
|
| - }
|
| - // Check that we arrive at the same results when skipping over
|
| - // canonicalization ranges.
|
| - int next_block = 0;
|
| - while (next_block < CharacterRange::kRangeCanonicalizeMax) {
|
| - uc32 start = CanonRange(next_block);
|
| - CHECK_NE((start & CharacterRange::kStartMarker), 0);
|
| - unsigned dist = start & CharacterRange::kPayloadMask;
|
| - unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - int first_length = un_canonicalize.get(next_block, '\0', first);
|
| - for (unsigned i = 1; i < dist; i++) {
|
| - CHECK_EQ(i, CanonRange(next_block + i));
|
| - unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| - int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
|
| - CHECK_EQ(first_length, succ_length);
|
| - for (int j = 0; j < succ_length; j++) {
|
| - int calc = first[j] + i;
|
| - int found = succ[j];
|
| - CHECK_EQ(calc, found);
|
| + int block_start = 0;
|
| + while (block_start <= 0xFFFF) {
|
| + uc32 block_end = CanonRangeEnd(block_start);
|
| + unsigned block_length = block_end - block_start + 1;
|
| + if (block_length > 1) {
|
| + unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| + int first_length = un_canonicalize.get(block_start, '\0', first);
|
| + for (unsigned i = 1; i < block_length; i++) {
|
| + unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
| + int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
|
| + CHECK_EQ(first_length, succ_length);
|
| + for (int j = 0; j < succ_length; j++) {
|
| + int calc = first[j] + i;
|
| + int found = succ[j];
|
| + CHECK_EQ(calc, found);
|
| + }
|
| }
|
| }
|
| - next_block = next_block + dist;
|
| + block_start = block_start + block_length;
|
| }
|
| }
|
|
|
|
|