Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: test/cctest/test-regexp.cc

Issue 3030026: Updated unicode library. (Closed)
Patch Set: Removed outdated comments. Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/unicode-inl.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2008 the V8 project authors. All rights reserved. 1 // Copyright 2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 1381 matching lines...) Expand 10 before | Expand all | Expand 10 after
1392 CHECK_EQ(canonicalize(lower), canonicalize(upper)); 1392 CHECK_EQ(canonicalize(lower), canonicalize(upper));
1393 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1393 unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1394 int length = un_canonicalize.get(lower, '\0', uncanon); 1394 int length = un_canonicalize.get(lower, '\0', uncanon);
1395 CHECK_EQ(2, length); 1395 CHECK_EQ(2, length);
1396 CHECK_EQ(upper, uncanon[0]); 1396 CHECK_EQ(upper, uncanon[0]);
1397 CHECK_EQ(lower, uncanon[1]); 1397 CHECK_EQ(lower, uncanon[1]);
1398 } 1398 }
1399 for (uc32 c = 128; c < (1 << 21); c++) 1399 for (uc32 c = 128; c < (1 << 21); c++)
1400 CHECK_GE(canonicalize(c), 128); 1400 CHECK_GE(canonicalize(c), 128);
1401 unibrow::Mapping<unibrow::ToUppercase> to_upper; 1401 unibrow::Mapping<unibrow::ToUppercase> to_upper;
1402 for (uc32 c = 0; c < (1 << 21); c++) { 1402 // Canonicalization is only defined for the Basic Multilingual Plane.
1403 for (uc32 c = 0; c < (1 << 16); c++) {
1403 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth]; 1404 unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1404 int length = to_upper.get(c, '\0', upper); 1405 int length = to_upper.get(c, '\0', upper);
1405 if (length == 0) { 1406 if (length == 0) {
1406 length = 1; 1407 length = 1;
1407 upper[0] = c; 1408 upper[0] = c;
1408 } 1409 }
1409 uc32 u = upper[0]; 1410 uc32 u = upper[0];
1410 if (length > 1 || (c >= 128 && u < 128)) 1411 if (length > 1 || (c >= 128 && u < 128))
1411 u = c; 1412 u = c;
1412 CHECK_EQ(u, canonicalize(c)); 1413 CHECK_EQ(u, canonicalize(c));
1413 } 1414 }
1414 } 1415 }
1415 1416
1416 1417
1417 static uc32 CanonRange(uc32 c) { 1418 static uc32 CanonRangeEnd(uc32 c) {
1418 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth]; 1419 unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1419 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL); 1420 int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1420 if (count == 0) { 1421 if (count == 0) {
1421 return c; 1422 return c;
1422 } else { 1423 } else {
1423 CHECK_EQ(1, count); 1424 CHECK_EQ(1, count);
1424 return canon[0]; 1425 return canon[0];
1425 } 1426 }
1426 } 1427 }
1427 1428
1428 1429
1429 TEST(RangeCanonicalization) { 1430 TEST(RangeCanonicalization) {
1430 CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
1431 // Check that we arrive at the same result when using the basic 1431 // Check that we arrive at the same result when using the basic
1432 // range canonicalization primitives as when using immediate 1432 // range canonicalization primitives as when using immediate
1433 // canonicalization. 1433 // canonicalization.
1434 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1434 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1435 for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) { 1435 int block_start = 0;
1436 int range = CanonRange(i); 1436 while (block_start <= 0xFFFF) {
1437 int indirect_length = 0; 1437 uc32 block_end = CanonRangeEnd(block_start);
1438 unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1438 unsigned block_length = block_end - block_start + 1;
1439 if ((range & CharacterRange::kStartMarker) == 0) { 1439 if (block_length > 1) {
1440 indirect_length = un_canonicalize.get(i - range, '\0', indirect); 1440 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1441 for (int i = 0; i < indirect_length; i++) 1441 int first_length = un_canonicalize.get(block_start, '\0', first);
1442 indirect[i] += range; 1442 for (unsigned i = 1; i < block_length; i++) {
1443 } else { 1443 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1444 indirect_length = un_canonicalize.get(i, '\0', indirect); 1444 int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1445 } 1445 CHECK_EQ(first_length, succ_length);
1446 unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1446 for (int j = 0; j < succ_length; j++) {
1447 int direct_length = un_canonicalize.get(i, '\0', direct); 1447 int calc = first[j] + i;
1448 CHECK_EQ(direct_length, indirect_length); 1448 int found = succ[j];
1449 } 1449 CHECK_EQ(calc, found);
1450 // Check that we arrive at the same results when skipping over 1450 }
1451 // canonicalization ranges.
1452 int next_block = 0;
1453 while (next_block < CharacterRange::kRangeCanonicalizeMax) {
1454 uc32 start = CanonRange(next_block);
1455 CHECK_NE((start & CharacterRange::kStartMarker), 0);
1456 unsigned dist = start & CharacterRange::kPayloadMask;
1457 unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1458 int first_length = un_canonicalize.get(next_block, '\0', first);
1459 for (unsigned i = 1; i < dist; i++) {
1460 CHECK_EQ(i, CanonRange(next_block + i));
1461 unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1462 int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
1463 CHECK_EQ(first_length, succ_length);
1464 for (int j = 0; j < succ_length; j++) {
1465 int calc = first[j] + i;
1466 int found = succ[j];
1467 CHECK_EQ(calc, found);
1468 } 1451 }
1469 } 1452 }
1470 next_block = next_block + dist; 1453 block_start = block_start + block_length;
1471 } 1454 }
1472 } 1455 }
1473 1456
1474 1457
1475 TEST(UncanonicalizeEquivalence) { 1458 TEST(UncanonicalizeEquivalence) {
1476 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize; 1459 unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1477 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; 1460 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1478 for (int i = 0; i < (1 << 16); i++) { 1461 for (int i = 0; i < (1 << 16); i++) {
1479 int length = un_canonicalize.get(i, '\0', chars); 1462 int length = un_canonicalize.get(i, '\0', chars);
1480 for (int j = 0; j < length; j++) { 1463 for (int j = 0; j < length; j++) {
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after
1792 bool in_second = CharacterInSet(&l2, i); 1775 bool in_second = CharacterInSet(&l2, i);
1793 CHECK((in_first || in_second) == CharacterInSet(&all, i)); 1776 CHECK((in_first || in_second) == CharacterInSet(&all, i));
1794 } 1777 }
1795 } 1778 }
1796 1779
1797 1780
1798 TEST(Graph) { 1781 TEST(Graph) {
1799 V8::Initialize(NULL); 1782 V8::Initialize(NULL);
1800 Execute("\\b\\w+\\b", false, true, true); 1783 Execute("\\b\\w+\\b", false, true, true);
1801 } 1784 }
OLDNEW
« no previous file with comments | « src/unicode-inl.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698