Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(689)

Side by Side Diff: src/unicode.cc

Issue 2331303002: Use ICU for ID_START and ID_CONTINUE for Unicode 9 data (Closed)
Patch Set: undo the speculative fix to see if it's really necessary Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This file was generated at 2014-10-08 15:25:47.940335 5 // This file was generated at 2014-10-08 15:25:47.940335
6 6
7 #include "src/unicode.h" 7 #include "src/unicode.h"
8 #include "src/unicode-inl.h" 8 #include "src/unicode-inl.h"
9 #include <stdio.h> 9 #include <stdio.h>
10 #include <stdlib.h> 10 #include <stdlib.h>
11 11
12 #ifdef V8_INTL_SUPPORT
13 #include "unicode/uchar.h"
14 #endif
15
12 namespace unibrow { 16 namespace unibrow {
13 17
14 static const int kStartBit = (1 << 30); 18 static const int kStartBit = (1 << 30);
15 static const int kChunkBits = (1 << 13); 19 static const int kChunkBits = (1 << 13);
16 static const uchar kSentinel = static_cast<uchar>(-1); 20 static const uchar kSentinel = static_cast<uchar>(-1);
17 21
18 /** 22 /**
19 * \file 23 * \file
20 * Implementations of functions for working with unicode. 24 * Implementations of functions for working with unicode.
21 */ 25 */
(...skipping 13 matching lines...) Expand all
35 static inline uchar GetEntry(int32_t entry) { 39 static inline uchar GetEntry(int32_t entry) {
36 return entry & (kStartBit - 1); 40 return entry & (kStartBit - 1);
37 } 41 }
38 42
39 43
40 static inline bool IsStart(int32_t entry) { 44 static inline bool IsStart(int32_t entry) {
41 return (entry & kStartBit) != 0; 45 return (entry & kStartBit) != 0;
42 } 46 }
43 47
44 48
49 #ifndef V8_INTL_SUPPORT
45 /** 50 /**
46 * Look up a character in the unicode table using a mix of binary and 51 * Look up a character in the unicode table using a mix of binary and
47 * interpolation search. For a uniformly distributed array 52 * interpolation search. For a uniformly distributed array
48 * interpolation search beats binary search by a wide margin. However, 53 * interpolation search beats binary search by a wide margin. However,
49 * in this case interpolation search degenerates because of some very 54 * in this case interpolation search degenerates because of some very
50 * high values in the lower end of the table so this function uses a 55 * high values in the lower end of the table so this function uses a
51 * combination. The average number of steps to look up the information 56 * combination. The average number of steps to look up the information
52 * about a character is around 10, slightly higher if there is no 57 * about a character is around 10, slightly higher if there is no
53 * information available about the character. 58 * information available about the character.
54 */ 59 */
(...skipping 19 matching lines...) Expand all
74 // the one we're looking for, we're done. 79 // the one we're looking for, we're done.
75 if (mid == 0) break; 80 if (mid == 0) break;
76 high = mid - 1; 81 high = mid - 1;
77 } 82 }
78 } 83 }
79 int32_t field = TableGet<kEntryDist>(table, low); 84 int32_t field = TableGet<kEntryDist>(table, low);
80 uchar entry = GetEntry(field); 85 uchar entry = GetEntry(field);
81 bool is_start = IsStart(field); 86 bool is_start = IsStart(field);
82 return (entry == value) || (entry < value && is_start); 87 return (entry == value) || (entry < value && is_start);
83 } 88 }
89 #endif // !V8_INTL_SUPPORT
84 90
85 template <int kW> 91 template <int kW>
86 struct MultiCharacterSpecialCase { 92 struct MultiCharacterSpecialCase {
87 static const uchar kEndOfEncoding = kSentinel; 93 static const uchar kEndOfEncoding = kSentinel;
88 uchar chars[kW]; 94 uchar chars[kW];
89 }; 95 };
90 96
91 97
98 // TODO(jshin): Once icu_case_mapping flag is removed, enclose LookupMapping
99 // with #ifndef V8_INTL_SUPPORT.
100
92 // Look up the mapping for the given character in the specified table, 101 // Look up the mapping for the given character in the specified table,
93 // which is of the specified length and uses the specified special case 102 // which is of the specified length and uses the specified special case
94 // mapping for multi-char mappings. The next parameter is the character 103 // mapping for multi-char mappings. The next parameter is the character
95 // following the one to map. The result will be written in to the result 104 // following the one to map. The result will be written in to the result
96 // buffer and the number of characters written will be returned. Finally, 105 // buffer and the number of characters written will be returned. Finally,
97 // if the allow_caching_ptr is non-null then false will be stored in 106 // if the allow_caching_ptr is non-null then false will be stored in
98 // it if the result contains multiple characters or depends on the 107 // it if the result contains multiple characters or depends on the
99 // context. 108 // context.
100 // If ranges are linear, a match between a start and end point is 109 // If ranges are linear, a match between a start and end point is
101 // offset by the distance between the match and the start. Otherwise 110 // offset by the distance between the match and the start. Otherwise
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 } 379 }
371 380
372 while (cursor < length) { 381 while (cursor < length) {
373 uchar c = ValueOf(bytes + cursor, length - cursor, &cursor); 382 uchar c = ValueOf(bytes + cursor, length - cursor, &cursor);
374 if (!IsValidCharacter(c)) return false; 383 if (!IsValidCharacter(c)) return false;
375 } 384 }
376 return true; 385 return true;
377 } 386 }
378 387
379 // Uppercase: point.category == 'Lu' 388 // Uppercase: point.category == 'Lu'
380 389 // TODO(jshin): Check if it's ok to exclude Other_Uppercase characters.
390 #ifdef V8_INTL_SUPPORT
391 bool Uppercase::Is(uchar c) { return static_cast<bool>(u_isupper(c)); }
392 #else
381 static const uint16_t kUppercaseTable0Size = 455; 393 static const uint16_t kUppercaseTable0Size = 455;
382 static const int32_t kUppercaseTable0[455] = { 394 static const int32_t kUppercaseTable0[455] = {
383 1073741889, 90, 1073742016, 214, 395 1073741889, 90, 1073742016, 214,
384 1073742040, 222, 256, 258, // NOLINT 396 1073742040, 222, 256, 258, // NOLINT
385 260, 262, 264, 266, 397 260, 262, 264, 266,
386 268, 270, 272, 274, // NOLINT 398 268, 270, 272, 274, // NOLINT
387 276, 278, 280, 282, 399 276, 278, 280, 282,
388 284, 286, 288, 290, // NOLINT 400 284, 286, 288, 290, // NOLINT
389 292, 294, 296, 298, 401 292, 294, 296, 298,
390 300, 302, 304, 306, // NOLINT 402 300, 302, 304, 306, // NOLINT
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
536 c); 548 c);
537 case 5: return LookupPredicate(kUppercaseTable5, 549 case 5: return LookupPredicate(kUppercaseTable5,
538 kUppercaseTable5Size, 550 kUppercaseTable5Size,
539 c); 551 c);
540 case 7: return LookupPredicate(kUppercaseTable7, 552 case 7: return LookupPredicate(kUppercaseTable7,
541 kUppercaseTable7Size, 553 kUppercaseTable7Size,
542 c); 554 c);
543 default: return false; 555 default: return false;
544 } 556 }
545 } 557 }
546 558 #endif // V8_INTL_SUPPORT
547
548 // Lowercase: point.category == 'Ll'
549
550 static const uint16_t kLowercaseTable0Size = 467;
551 static const int32_t kLowercaseTable0[467] = {
552 1073741921, 122, 181, 1073742047,
553 246, 1073742072, 255, 257, // NOLINT
554 259, 261, 263, 265,
555 267, 269, 271, 273, // NOLINT
556 275, 277, 279, 281,
557 283, 285, 287, 289, // NOLINT
558 291, 293, 295, 297,
559 299, 301, 303, 305, // NOLINT
560 307, 309, 1073742135, 312,
561 314, 316, 318, 320, // NOLINT
562 322, 324, 326, 1073742152,
563 329, 331, 333, 335, // NOLINT
564 337, 339, 341, 343,
565 345, 347, 349, 351, // NOLINT
566 353, 355, 357, 359,
567 361, 363, 365, 367, // NOLINT
568 369, 371, 373, 375,
569 378, 380, 1073742206, 384, // NOLINT
570 387, 389, 392, 1073742220,
571 397, 402, 405, 1073742233, // NOLINT
572 411, 414, 417, 419,
573 421, 424, 1073742250, 427, // NOLINT
574 429, 432, 436, 438,
575 1073742265, 442, 1073742269, 447, // NOLINT
576 454, 457, 460, 462,
577 464, 466, 468, 470, // NOLINT
578 472, 474, 1073742300, 477,
579 479, 481, 483, 485, // NOLINT
580 487, 489, 491, 493,
581 1073742319, 496, 499, 501, // NOLINT
582 505, 507, 509, 511,
583 513, 515, 517, 519, // NOLINT
584 521, 523, 525, 527,
585 529, 531, 533, 535, // NOLINT
586 537, 539, 541, 543,
587 545, 547, 549, 551, // NOLINT
588 553, 555, 557, 559,
589 561, 1073742387, 569, 572, // NOLINT
590 1073742399, 576, 578, 583,
591 585, 587, 589, 1073742415, // NOLINT
592 659, 1073742485, 687, 881,
593 883, 887, 1073742715, 893, // NOLINT
594 912, 1073742764, 974, 1073742800,
595 977, 1073742805, 983, 985, // NOLINT
596 987, 989, 991, 993,
597 995, 997, 999, 1001, // NOLINT
598 1003, 1005, 1073742831, 1011,
599 1013, 1016, 1073742843, 1020, // NOLINT
600 1073742896, 1119, 1121, 1123,
601 1125, 1127, 1129, 1131, // NOLINT
602 1133, 1135, 1137, 1139,
603 1141, 1143, 1145, 1147, // NOLINT
604 1149, 1151, 1153, 1163,
605 1165, 1167, 1169, 1171, // NOLINT
606 1173, 1175, 1177, 1179,
607 1181, 1183, 1185, 1187, // NOLINT
608 1189, 1191, 1193, 1195,
609 1197, 1199, 1201, 1203, // NOLINT
610 1205, 1207, 1209, 1211,
611 1213, 1215, 1218, 1220, // NOLINT
612 1222, 1224, 1226, 1228,
613 1073743054, 1231, 1233, 1235, // NOLINT
614 1237, 1239, 1241, 1243,
615 1245, 1247, 1249, 1251, // NOLINT
616 1253, 1255, 1257, 1259,
617 1261, 1263, 1265, 1267, // NOLINT
618 1269, 1271, 1273, 1275,
619 1277, 1279, 1281, 1283, // NOLINT
620 1285, 1287, 1289, 1291,
621 1293, 1295, 1297, 1299, // NOLINT
622 1301, 1303, 1305, 1307,
623 1309, 1311, 1313, 1315, // NOLINT
624 1317, 1319, 1321, 1323,
625 1325, 1327, 1073743201, 1415, // NOLINT
626 1073749248, 7467, 1073749355, 7543,
627 1073749369, 7578, 7681, 7683, // NOLINT
628 7685, 7687, 7689, 7691,
629 7693, 7695, 7697, 7699, // NOLINT
630 7701, 7703, 7705, 7707,
631 7709, 7711, 7713, 7715, // NOLINT
632 7717, 7719, 7721, 7723,
633 7725, 7727, 7729, 7731, // NOLINT
634 7733, 7735, 7737, 7739,
635 7741, 7743, 7745, 7747, // NOLINT
636 7749, 7751, 7753, 7755,
637 7757, 7759, 7761, 7763, // NOLINT
638 7765, 7767, 7769, 7771,
639 7773, 7775, 7777, 7779, // NOLINT
640 7781, 7783, 7785, 7787,
641 7789, 7791, 7793, 7795, // NOLINT
642 7797, 7799, 7801, 7803,
643 7805, 7807, 7809, 7811, // NOLINT
644 7813, 7815, 7817, 7819,
645 7821, 7823, 7825, 7827, // NOLINT
646 1073749653, 7837, 7839, 7841,
647 7843, 7845, 7847, 7849, // NOLINT
648 7851, 7853, 7855, 7857,
649 7859, 7861, 7863, 7865, // NOLINT
650 7867, 7869, 7871, 7873,
651 7875, 7877, 7879, 7881, // NOLINT
652 7883, 7885, 7887, 7889,
653 7891, 7893, 7895, 7897, // NOLINT
654 7899, 7901, 7903, 7905,
655 7907, 7909, 7911, 7913, // NOLINT
656 7915, 7917, 7919, 7921,
657 7923, 7925, 7927, 7929, // NOLINT
658 7931, 7933, 1073749759, 7943,
659 1073749776, 7957, 1073749792, 7975, // NOLINT
660 1073749808, 7991, 1073749824, 8005,
661 1073749840, 8023, 1073749856, 8039, // NOLINT
662 1073749872, 8061, 1073749888, 8071,
663 1073749904, 8087, 1073749920, 8103, // NOLINT
664 1073749936, 8116, 1073749942, 8119,
665 8126, 1073749954, 8132, 1073749958, // NOLINT
666 8135, 1073749968, 8147, 1073749974,
667 8151, 1073749984, 8167, 1073750002, // NOLINT
668 8180, 1073750006, 8183}; // NOLINT
669 static const uint16_t kLowercaseTable1Size = 84;
670 static const int32_t kLowercaseTable1[84] = {
671 266, 1073742094, 271, 275, 303, 308, 313, 1073742140, // NOLINT
672 317, 1073742150, 329, 334, 388, 1073744944, 3166, 3169, // NOLINT
673 1073744997, 3174, 3176, 3178, 3180, 3185, 1073745011, 3188, // NOLINT
674 1073745014, 3195, 3201, 3203, 3205, 3207, 3209, 3211, // NOLINT
675 3213, 3215, 3217, 3219, 3221, 3223, 3225, 3227, // NOLINT
676 3229, 3231, 3233, 3235, 3237, 3239, 3241, 3243, // NOLINT
677 3245, 3247, 3249, 3251, 3253, 3255, 3257, 3259, // NOLINT
678 3261, 3263, 3265, 3267, 3269, 3271, 3273, 3275, // NOLINT
679 3277, 3279, 3281, 3283, 3285, 3287, 3289, 3291, // NOLINT
680 3293, 3295, 3297, 1073745123, 3300, 3308, 3310, 3315, // NOLINT
681 1073745152, 3365, 3367, 3373 }; // NOLINT
682 static const uint16_t kLowercaseTable5Size = 105;
683 static const int32_t kLowercaseTable5[105] = {
684 1601, 1603, 1605, 1607,
685 1609, 1611, 1613, 1615, // NOLINT
686 1617, 1619, 1621, 1623,
687 1625, 1627, 1629, 1631, // NOLINT
688 1633, 1635, 1637, 1639,
689 1641, 1643, 1645, 1665, // NOLINT
690 1667, 1669, 1671, 1673,
691 1675, 1677, 1679, 1681, // NOLINT
692 1683, 1685, 1687, 1689,
693 1691, 1827, 1829, 1831, // NOLINT
694 1833, 1835, 1837, 1073743663,
695 1841, 1843, 1845, 1847, // NOLINT
696 1849, 1851, 1853, 1855,
697 1857, 1859, 1861, 1863, // NOLINT
698 1865, 1867, 1869, 1871,
699 1873, 1875, 1877, 1879, // NOLINT
700 1881, 1883, 1885, 1887,
701 1889, 1891, 1893, 1895, // NOLINT
702 1897, 1899, 1901, 1903,
703 1073743729, 1912, 1914, 1916, // NOLINT
704 1919, 1921, 1923, 1925,
705 1927, 1932, 1934, 1937, // NOLINT
706 1073743763, 1941, 1943, 1945,
707 1947, 1949, 1951, 1953, // NOLINT
708 1955, 1957, 1959, 1961,
709 2042, 1073744688, 2906, 1073744740, // NOLINT
710 2917}; // NOLINT
711 static const uint16_t kLowercaseTable7Size = 6;
712 static const int32_t kLowercaseTable7[6] = {
713 1073748736, 6918, 1073748755, 6935, 1073749825, 8026 }; // NOLINT
714 bool Lowercase::Is(uchar c) {
715 int chunk_index = c >> 13;
716 switch (chunk_index) {
717 case 0: return LookupPredicate(kLowercaseTable0,
718 kLowercaseTable0Size,
719 c);
720 case 1: return LookupPredicate(kLowercaseTable1,
721 kLowercaseTable1Size,
722 c);
723 case 5: return LookupPredicate(kLowercaseTable5,
724 kLowercaseTable5Size,
725 c);
726 case 7: return LookupPredicate(kLowercaseTable7,
727 kLowercaseTable7Size,
728 c);
729 default: return false;
730 }
731 }
732
733 559
734 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] 560 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
735 561 #ifdef V8_INTL_SUPPORT
562 bool Letter::Is(uchar c) { return static_cast<bool>(u_isalpha(c)); }
563 #else
736 static const uint16_t kLetterTable0Size = 431; 564 static const uint16_t kLetterTable0Size = 431;
737 static const int32_t kLetterTable0[431] = { 565 static const int32_t kLetterTable0[431] = {
738 1073741889, 90, 1073741921, 122, 566 1073741889, 90, 1073741921, 122,
739 170, 181, 186, 1073742016, // NOLINT 567 170, 181, 186, 1073742016, // NOLINT
740 214, 1073742040, 246, 1073742072, 568 214, 1073742040, 246, 1073742072,
741 705, 1073742534, 721, 1073742560, // NOLINT 569 705, 1073742534, 721, 1073742560, // NOLINT
742 740, 748, 750, 1073742704, 570 740, 748, 750, 1073742704,
743 884, 1073742710, 887, 1073742714, // NOLINT 571 884, 1073742710, 887, 1073742714, // NOLINT
744 893, 895, 902, 1073742728, 572 893, 895, 902, 1073742728,
745 906, 908, 1073742734, 929, // NOLINT 573 906, 908, 1073742734, 929, // NOLINT
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
926 c); 754 c);
927 case 6: return LookupPredicate(kLetterTable6, 755 case 6: return LookupPredicate(kLetterTable6,
928 kLetterTable6Size, 756 kLetterTable6Size,
929 c); 757 c);
930 case 7: return LookupPredicate(kLetterTable7, 758 case 7: return LookupPredicate(kLetterTable7,
931 kLetterTable7Size, 759 kLetterTable7Size,
932 c); 760 c);
933 default: return false; 761 default: return false;
934 } 762 }
935 } 763 }
764 #endif
936 765
937 766 #ifndef V8_INTL_SUPPORT
938 // ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 767 // ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo',
939 // 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in 768 // 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in
940 // point.properties) and ('Pattern_White_Space' not in point.properties)) or 769 // point.properties) and ('Pattern_White_Space' not in point.properties)) or
941 // ('JS_ID_Start' in point.properties) 770 // ('JS_ID_Start' in point.properties)
942 771
943 static const uint16_t kID_StartTable0Size = 434; 772 static const uint16_t kID_StartTable0Size = 434;
944 static const int32_t kID_StartTable0[434] = { 773 static const int32_t kID_StartTable0[434] = {
945 36, 1073741889, 90, 92, 774 36, 1073741889, 90, 92,
946 95, 1073741921, 122, 170, // NOLINT 775 95, 1073741921, 122, 170, // NOLINT
947 181, 186, 1073742016, 214, 776 181, 186, 1073742016, 214,
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
1273 case 1: 1102 case 1:
1274 return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c); 1103 return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c);
1275 case 5: 1104 case 5:
1276 return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c); 1105 return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c);
1277 case 7: 1106 case 7:
1278 return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c); 1107 return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c);
1279 default: return false; 1108 default: return false;
1280 } 1109 }
1281 } 1110 }
1282 1111
1283
1284 // WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in 1112 // WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in
1285 // point.properties) 1113 // point.properties)
1286 1114
1287 static const uint16_t kWhiteSpaceTable0Size = 6; 1115 static const uint16_t kWhiteSpaceTable0Size = 6;
1288 static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12, 1116 static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12,
1289 32, 160, 5760}; // NOLINT 1117 32, 160, 5760}; // NOLINT
1290 static const uint16_t kWhiteSpaceTable1Size = 5; 1118 static const uint16_t kWhiteSpaceTable1Size = 5;
1291 static const int32_t kWhiteSpaceTable1[5] = { 1119 static const int32_t kWhiteSpaceTable1[5] = {
1292 1073741824, 10, 47, 95, 4096 }; // NOLINT 1120 1073741824, 10, 47, 95, 4096 }; // NOLINT
1293 static const uint16_t kWhiteSpaceTable7Size = 1; 1121 static const uint16_t kWhiteSpaceTable7Size = 1;
1294 static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT 1122 static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT
1295 bool WhiteSpace::Is(uchar c) { 1123 bool WhiteSpace::Is(uchar c) {
1296 int chunk_index = c >> 13; 1124 int chunk_index = c >> 13;
1297 switch (chunk_index) { 1125 switch (chunk_index) {
1298 case 0: return LookupPredicate(kWhiteSpaceTable0, 1126 case 0: return LookupPredicate(kWhiteSpaceTable0,
1299 kWhiteSpaceTable0Size, 1127 kWhiteSpaceTable0Size,
1300 c); 1128 c);
1301 case 1: return LookupPredicate(kWhiteSpaceTable1, 1129 case 1: return LookupPredicate(kWhiteSpaceTable1,
1302 kWhiteSpaceTable1Size, 1130 kWhiteSpaceTable1Size,
1303 c); 1131 c);
1304 case 7: 1132 case 7:
1305 return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c); 1133 return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c);
1306 default: return false; 1134 default: return false;
1307 } 1135 }
1308 } 1136 }
1309 1137 #endif // !V8_INTL_SUPPORT
1310 1138
1311 // LineTerminator: 'JS_Line_Terminator' in point.properties 1139 // LineTerminator: 'JS_Line_Terminator' in point.properties
1140 // Ecma 262 7.0 11.3 lists exactly 4 code points:
1141 // LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
1312 1142
1313 static const uint16_t kLineTerminatorTable0Size = 2;
1314 static const int32_t kLineTerminatorTable0[2] = {
1315 10, 13 }; // NOLINT
1316 static const uint16_t kLineTerminatorTable1Size = 2;
1317 static const int32_t kLineTerminatorTable1[2] = {
1318 1073741864, 41 }; // NOLINT
1319 bool LineTerminator::Is(uchar c) { 1143 bool LineTerminator::Is(uchar c) {
1320 int chunk_index = c >> 13; 1144 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
1321 switch (chunk_index) {
1322 case 0: return LookupPredicate(kLineTerminatorTable0,
1323 kLineTerminatorTable0Size,
1324 c);
1325 case 1: return LookupPredicate(kLineTerminatorTable1,
1326 kLineTerminatorTable1Size,
1327 c);
1328 default: return false;
1329 }
1330 } 1145 }
1331 1146
1147 // TODO(jshin): Once icu_case_mapping flag is removed, enclose case-mapping
1148 // related tables with #ifndef V8_INTL_SUPPORT.
1332 static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // N OLINT 1149 static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // N OLINT
1333 {{105, 775}}, {{kSentinel}} }; // NOLINT 1150 {{105, 775}}, {{kSentinel}} }; // NOLINT
1334 static const uint16_t kToLowercaseTable0Size = 488; // NOLINT 1151 static const uint16_t kToLowercaseTable0Size = 488; // NOLINT
1335 static const int32_t kToLowercaseTable0[976] = { 1152 static const int32_t kToLowercaseTable0[976] = {
1336 1073741889, 128, 90, 128, 1073742016, 128, 1153 1073741889, 128, 90, 128, 1073742016, 128,
1337 214, 128, 1073742040, 128, 222, 128, 1154 214, 128, 1073742040, 128, 222, 128,
1338 256, 4, 258, 4, // NOLINT 1155 256, 4, 258, 4, // NOLINT
1339 260, 4, 262, 4, 264, 4, 1156 260, 4, 262, 4, 264, 4,
1340 266, 4, 268, 4, 270, 4, 1157 266, 4, 268, 4, 270, 4,
1341 272, 4, 274, 4, // NOLINT 1158 272, 4, 274, 4, // NOLINT
(...skipping 2068 matching lines...) Expand 10 before | Expand all | Expand 10 after
3410 result, 3227 result,
3411 allow_caching_ptr); 3228 allow_caching_ptr);
3412 default: return 0; 3229 default: return 0;
3413 } 3230 }
3414 } 3231 }
3415 3232
3416 3233
3417 const uchar UnicodeData::kMaxCodePoint = 65533; 3234 const uchar UnicodeData::kMaxCodePoint = 65533;
3418 3235
3419 int UnicodeData::GetByteCount() { 3236 int UnicodeData::GetByteCount() {
3237 #ifndef V8_INTL_SUPPORT // NOLINT
3420 return kUppercaseTable0Size * sizeof(int32_t) // NOLINT 3238 return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
3421 + kUppercaseTable1Size * sizeof(int32_t) // NOLINT 3239 + kUppercaseTable1Size * sizeof(int32_t) // NOLINT
3422 + kUppercaseTable5Size * sizeof(int32_t) // NOLINT 3240 + kUppercaseTable5Size * sizeof(int32_t) // NOLINT
3423 + kUppercaseTable7Size * sizeof(int32_t) // NOLINT 3241 + kUppercaseTable7Size * sizeof(int32_t) // NOLINT
3424 + kLowercaseTable0Size * sizeof(int32_t) // NOLINT
3425 + kLowercaseTable1Size * sizeof(int32_t) // NOLINT
3426 + kLowercaseTable5Size * sizeof(int32_t) // NOLINT
3427 + kLowercaseTable7Size * sizeof(int32_t) // NOLINT
3428 + kLetterTable0Size * sizeof(int32_t) // NOLINT 3242 + kLetterTable0Size * sizeof(int32_t) // NOLINT
3429 + kLetterTable1Size * sizeof(int32_t) // NOLINT 3243 + kLetterTable1Size * sizeof(int32_t) // NOLINT
3430 + kLetterTable2Size * sizeof(int32_t) // NOLINT 3244 + kLetterTable2Size * sizeof(int32_t) // NOLINT
3431 + kLetterTable3Size * sizeof(int32_t) // NOLINT 3245 + kLetterTable3Size * sizeof(int32_t) // NOLINT
3432 + kLetterTable4Size * sizeof(int32_t) // NOLINT 3246 + kLetterTable4Size * sizeof(int32_t) // NOLINT
3433 + kLetterTable5Size * sizeof(int32_t) // NOLINT 3247 + kLetterTable5Size * sizeof(int32_t) // NOLINT
3434 + kLetterTable6Size * sizeof(int32_t) // NOLINT 3248 + kLetterTable6Size * sizeof(int32_t) // NOLINT
3435 + kLetterTable7Size * sizeof(int32_t) // NOLINT 3249 + kLetterTable7Size * sizeof(int32_t) // NOLINT
3436 + kID_StartTable0Size * sizeof(int32_t) // NOLINT 3250 + kID_StartTable0Size * sizeof(int32_t) // NOLINT
3437 + kID_StartTable1Size * sizeof(int32_t) // NOLINT 3251 + kID_StartTable1Size * sizeof(int32_t) // NOLINT
3438 + kID_StartTable2Size * sizeof(int32_t) // NOLINT 3252 + kID_StartTable2Size * sizeof(int32_t) // NOLINT
3439 + kID_StartTable3Size * sizeof(int32_t) // NOLINT 3253 + kID_StartTable3Size * sizeof(int32_t) // NOLINT
3440 + kID_StartTable4Size * sizeof(int32_t) // NOLINT 3254 + kID_StartTable4Size * sizeof(int32_t) // NOLINT
3441 + kID_StartTable5Size * sizeof(int32_t) // NOLINT 3255 + kID_StartTable5Size * sizeof(int32_t) // NOLINT
3442 + kID_StartTable6Size * sizeof(int32_t) // NOLINT 3256 + kID_StartTable6Size * sizeof(int32_t) // NOLINT
3443 + kID_StartTable7Size * sizeof(int32_t) // NOLINT 3257 + kID_StartTable7Size * sizeof(int32_t) // NOLINT
3444 + kID_ContinueTable0Size * sizeof(int32_t) // NOLINT 3258 + kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
3445 + kID_ContinueTable1Size * sizeof(int32_t) // NOLINT 3259 + kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
3446 + kID_ContinueTable5Size * sizeof(int32_t) // NOLINT 3260 + kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
3447 + kID_ContinueTable7Size * sizeof(int32_t) // NOLINT 3261 + kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
3448 + kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT 3262 + kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
3449 + kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT 3263 + kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
3450 + kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT 3264 + kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
3451 + kLineTerminatorTable0Size * sizeof(int32_t) // NOLINT 3265 #else
3452 + kLineTerminatorTable1Size * sizeof(int32_t) // NOLINT 3266 return
3267 #endif // !V8_INTL_SUPPORT
3453 + 3268 +
3454 kToLowercaseMultiStrings0Size * 3269 kToLowercaseMultiStrings0Size *
3455 sizeof(MultiCharacterSpecialCase<2>) // NOLINT 3270 sizeof(MultiCharacterSpecialCase<2>) // NOLINT
3456 + 3271 +
3457 kToLowercaseMultiStrings1Size * 3272 kToLowercaseMultiStrings1Size *
3458 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3273 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3459 + 3274 +
3460 kToLowercaseMultiStrings5Size * 3275 kToLowercaseMultiStrings5Size *
3461 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3276 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3462 + 3277 +
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
3503 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3318 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3504 + 3319 +
3505 kCanonicalizationRangeMultiStrings1Size * 3320 kCanonicalizationRangeMultiStrings1Size *
3506 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3321 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3507 + 3322 +
3508 kCanonicalizationRangeMultiStrings7Size * 3323 kCanonicalizationRangeMultiStrings7Size *
3509 sizeof(MultiCharacterSpecialCase<1>); // NOLINT 3324 sizeof(MultiCharacterSpecialCase<1>); // NOLINT
3510 } 3325 }
3511 3326
3512 } // namespace unibrow 3327 } // namespace unibrow
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698