Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: src/unicode.cc

Issue 2331303002: Use ICU for ID_START and ID_CONTINUE for Unicode 9 data (Closed)
Patch Set: drop an unnecessary todo Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/unicode.h ('k') | src/v8.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This file was generated at 2014-10-08 15:25:47.940335 5 // This file was generated at 2014-10-08 15:25:47.940335
6 6
7 #include "src/unicode.h" 7 #include "src/unicode.h"
8 #include "src/unicode-inl.h" 8 #include "src/unicode-inl.h"
9 #include <stdio.h> 9 #include <stdio.h>
10 #include <stdlib.h> 10 #include <stdlib.h>
11 11
12 #ifdef V8_INTL_SUPPORT
13 #include "unicode/uchar.h"
14 #endif
15
12 namespace unibrow { 16 namespace unibrow {
13 17
14 static const int kStartBit = (1 << 30); 18 static const int kStartBit = (1 << 30);
15 static const int kChunkBits = (1 << 13); 19 static const int kChunkBits = (1 << 13);
16 static const uchar kSentinel = static_cast<uchar>(-1); 20 static const uchar kSentinel = static_cast<uchar>(-1);
17 21
18 /** 22 /**
19 * \file 23 * \file
20 * Implementations of functions for working with unicode. 24 * Implementations of functions for working with unicode.
21 */ 25 */
22 26
23 typedef signed short int16_t; // NOLINT 27 typedef signed short int16_t; // NOLINT
24 typedef unsigned short uint16_t; // NOLINT 28 typedef unsigned short uint16_t; // NOLINT
25 typedef int int32_t; // NOLINT 29 typedef int int32_t; // NOLINT
26 30
27 31
28 // All access to the character table should go through this function. 32 // All access to the character table should go through this function.
29 template <int D> 33 template <int D>
30 static inline uchar TableGet(const int32_t* table, int index) { 34 static inline uchar TableGet(const int32_t* table, int index) {
31 return table[D * index]; 35 return table[D * index];
32 } 36 }
33 37
34 38
35 static inline uchar GetEntry(int32_t entry) { 39 static inline uchar GetEntry(int32_t entry) {
36 return entry & (kStartBit - 1); 40 return entry & (kStartBit - 1);
37 } 41 }
38 42
39
40 static inline bool IsStart(int32_t entry) { 43 static inline bool IsStart(int32_t entry) {
41 return (entry & kStartBit) != 0; 44 return (entry & kStartBit) != 0;
42 } 45 }
43 46
44 47 #ifndef V8_INTL_SUPPORT
45 /** 48 /**
46 * Look up a character in the unicode table using a mix of binary and 49 * Look up a character in the unicode table using a mix of binary and
47 * interpolation search. For a uniformly distributed array 50 * interpolation search. For a uniformly distributed array
48 * interpolation search beats binary search by a wide margin. However, 51 * interpolation search beats binary search by a wide margin. However,
49 * in this case interpolation search degenerates because of some very 52 * in this case interpolation search degenerates because of some very
50 * high values in the lower end of the table so this function uses a 53 * high values in the lower end of the table so this function uses a
51 * combination. The average number of steps to look up the information 54 * combination. The average number of steps to look up the information
52 * about a character is around 10, slightly higher if there is no 55 * about a character is around 10, slightly higher if there is no
53 * information available about the character. 56 * information available about the character.
54 */ 57 */
(...skipping 19 matching lines...) Expand all
74 // the one we're looking for, we're done. 77 // the one we're looking for, we're done.
75 if (mid == 0) break; 78 if (mid == 0) break;
76 high = mid - 1; 79 high = mid - 1;
77 } 80 }
78 } 81 }
79 int32_t field = TableGet<kEntryDist>(table, low); 82 int32_t field = TableGet<kEntryDist>(table, low);
80 uchar entry = GetEntry(field); 83 uchar entry = GetEntry(field);
81 bool is_start = IsStart(field); 84 bool is_start = IsStart(field);
82 return (entry == value) || (entry < value && is_start); 85 return (entry == value) || (entry < value && is_start);
83 } 86 }
87 #endif // !V8_INTL_SUPPORT
84 88
85 template <int kW> 89 template <int kW>
86 struct MultiCharacterSpecialCase { 90 struct MultiCharacterSpecialCase {
87 static const uchar kEndOfEncoding = kSentinel; 91 static const uchar kEndOfEncoding = kSentinel;
88 uchar chars[kW]; 92 uchar chars[kW];
89 }; 93 };
90 94
91
92 // Look up the mapping for the given character in the specified table, 95 // Look up the mapping for the given character in the specified table,
93 // which is of the specified length and uses the specified special case 96 // which is of the specified length and uses the specified special case
94 // mapping for multi-char mappings. The next parameter is the character 97 // mapping for multi-char mappings. The next parameter is the character
95 // following the one to map. The result will be written in to the result 98 // following the one to map. The result will be written in to the result
96 // buffer and the number of characters written will be returned. Finally, 99 // buffer and the number of characters written will be returned. Finally,
97 // if the allow_caching_ptr is non-null then false will be stored in 100 // if the allow_caching_ptr is non-null then false will be stored in
98 // it if the result contains multiple characters or depends on the 101 // it if the result contains multiple characters or depends on the
99 // context. 102 // context.
100 // If ranges are linear, a match between a start and end point is 103 // If ranges are linear, a match between a start and end point is
101 // offset by the distance between the match and the start. Otherwise 104 // offset by the distance between the match and the start. Otherwise
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
403 // Code points outside of the unicode range. 406 // Code points outside of the unicode range.
404 return false; 407 return false;
405 } 408 }
406 } 409 }
407 cursor = char_end; 410 cursor = char_end;
408 } 411 }
409 return true; 412 return true;
410 } 413 }
411 414
412 // Uppercase: point.category == 'Lu' 415 // Uppercase: point.category == 'Lu'
413 416 // TODO(jshin): Check if it's ok to exclude Other_Uppercase characters.
417 #ifdef V8_INTL_SUPPORT
418 bool Uppercase::Is(uchar c) { return static_cast<bool>(u_isupper(c)); }
419 #else
414 static const uint16_t kUppercaseTable0Size = 455; 420 static const uint16_t kUppercaseTable0Size = 455;
415 static const int32_t kUppercaseTable0[455] = { 421 static const int32_t kUppercaseTable0[455] = {
416 1073741889, 90, 1073742016, 214, 422 1073741889, 90, 1073742016, 214,
417 1073742040, 222, 256, 258, // NOLINT 423 1073742040, 222, 256, 258, // NOLINT
418 260, 262, 264, 266, 424 260, 262, 264, 266,
419 268, 270, 272, 274, // NOLINT 425 268, 270, 272, 274, // NOLINT
420 276, 278, 280, 282, 426 276, 278, 280, 282,
421 284, 286, 288, 290, // NOLINT 427 284, 286, 288, 290, // NOLINT
422 292, 294, 296, 298, 428 292, 294, 296, 298,
423 300, 302, 304, 306, // NOLINT 429 300, 302, 304, 306, // NOLINT
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after
569 c); 575 c);
570 case 5: return LookupPredicate(kUppercaseTable5, 576 case 5: return LookupPredicate(kUppercaseTable5,
571 kUppercaseTable5Size, 577 kUppercaseTable5Size,
572 c); 578 c);
573 case 7: return LookupPredicate(kUppercaseTable7, 579 case 7: return LookupPredicate(kUppercaseTable7,
574 kUppercaseTable7Size, 580 kUppercaseTable7Size,
575 c); 581 c);
576 default: return false; 582 default: return false;
577 } 583 }
578 } 584 }
579 585 #endif // V8_INTL_SUPPORT
580
581 // Lowercase: point.category == 'Ll'
582
583 static const uint16_t kLowercaseTable0Size = 467;
584 static const int32_t kLowercaseTable0[467] = {
585 1073741921, 122, 181, 1073742047,
586 246, 1073742072, 255, 257, // NOLINT
587 259, 261, 263, 265,
588 267, 269, 271, 273, // NOLINT
589 275, 277, 279, 281,
590 283, 285, 287, 289, // NOLINT
591 291, 293, 295, 297,
592 299, 301, 303, 305, // NOLINT
593 307, 309, 1073742135, 312,
594 314, 316, 318, 320, // NOLINT
595 322, 324, 326, 1073742152,
596 329, 331, 333, 335, // NOLINT
597 337, 339, 341, 343,
598 345, 347, 349, 351, // NOLINT
599 353, 355, 357, 359,
600 361, 363, 365, 367, // NOLINT
601 369, 371, 373, 375,
602 378, 380, 1073742206, 384, // NOLINT
603 387, 389, 392, 1073742220,
604 397, 402, 405, 1073742233, // NOLINT
605 411, 414, 417, 419,
606 421, 424, 1073742250, 427, // NOLINT
607 429, 432, 436, 438,
608 1073742265, 442, 1073742269, 447, // NOLINT
609 454, 457, 460, 462,
610 464, 466, 468, 470, // NOLINT
611 472, 474, 1073742300, 477,
612 479, 481, 483, 485, // NOLINT
613 487, 489, 491, 493,
614 1073742319, 496, 499, 501, // NOLINT
615 505, 507, 509, 511,
616 513, 515, 517, 519, // NOLINT
617 521, 523, 525, 527,
618 529, 531, 533, 535, // NOLINT
619 537, 539, 541, 543,
620 545, 547, 549, 551, // NOLINT
621 553, 555, 557, 559,
622 561, 1073742387, 569, 572, // NOLINT
623 1073742399, 576, 578, 583,
624 585, 587, 589, 1073742415, // NOLINT
625 659, 1073742485, 687, 881,
626 883, 887, 1073742715, 893, // NOLINT
627 912, 1073742764, 974, 1073742800,
628 977, 1073742805, 983, 985, // NOLINT
629 987, 989, 991, 993,
630 995, 997, 999, 1001, // NOLINT
631 1003, 1005, 1073742831, 1011,
632 1013, 1016, 1073742843, 1020, // NOLINT
633 1073742896, 1119, 1121, 1123,
634 1125, 1127, 1129, 1131, // NOLINT
635 1133, 1135, 1137, 1139,
636 1141, 1143, 1145, 1147, // NOLINT
637 1149, 1151, 1153, 1163,
638 1165, 1167, 1169, 1171, // NOLINT
639 1173, 1175, 1177, 1179,
640 1181, 1183, 1185, 1187, // NOLINT
641 1189, 1191, 1193, 1195,
642 1197, 1199, 1201, 1203, // NOLINT
643 1205, 1207, 1209, 1211,
644 1213, 1215, 1218, 1220, // NOLINT
645 1222, 1224, 1226, 1228,
646 1073743054, 1231, 1233, 1235, // NOLINT
647 1237, 1239, 1241, 1243,
648 1245, 1247, 1249, 1251, // NOLINT
649 1253, 1255, 1257, 1259,
650 1261, 1263, 1265, 1267, // NOLINT
651 1269, 1271, 1273, 1275,
652 1277, 1279, 1281, 1283, // NOLINT
653 1285, 1287, 1289, 1291,
654 1293, 1295, 1297, 1299, // NOLINT
655 1301, 1303, 1305, 1307,
656 1309, 1311, 1313, 1315, // NOLINT
657 1317, 1319, 1321, 1323,
658 1325, 1327, 1073743201, 1415, // NOLINT
659 1073749248, 7467, 1073749355, 7543,
660 1073749369, 7578, 7681, 7683, // NOLINT
661 7685, 7687, 7689, 7691,
662 7693, 7695, 7697, 7699, // NOLINT
663 7701, 7703, 7705, 7707,
664 7709, 7711, 7713, 7715, // NOLINT
665 7717, 7719, 7721, 7723,
666 7725, 7727, 7729, 7731, // NOLINT
667 7733, 7735, 7737, 7739,
668 7741, 7743, 7745, 7747, // NOLINT
669 7749, 7751, 7753, 7755,
670 7757, 7759, 7761, 7763, // NOLINT
671 7765, 7767, 7769, 7771,
672 7773, 7775, 7777, 7779, // NOLINT
673 7781, 7783, 7785, 7787,
674 7789, 7791, 7793, 7795, // NOLINT
675 7797, 7799, 7801, 7803,
676 7805, 7807, 7809, 7811, // NOLINT
677 7813, 7815, 7817, 7819,
678 7821, 7823, 7825, 7827, // NOLINT
679 1073749653, 7837, 7839, 7841,
680 7843, 7845, 7847, 7849, // NOLINT
681 7851, 7853, 7855, 7857,
682 7859, 7861, 7863, 7865, // NOLINT
683 7867, 7869, 7871, 7873,
684 7875, 7877, 7879, 7881, // NOLINT
685 7883, 7885, 7887, 7889,
686 7891, 7893, 7895, 7897, // NOLINT
687 7899, 7901, 7903, 7905,
688 7907, 7909, 7911, 7913, // NOLINT
689 7915, 7917, 7919, 7921,
690 7923, 7925, 7927, 7929, // NOLINT
691 7931, 7933, 1073749759, 7943,
692 1073749776, 7957, 1073749792, 7975, // NOLINT
693 1073749808, 7991, 1073749824, 8005,
694 1073749840, 8023, 1073749856, 8039, // NOLINT
695 1073749872, 8061, 1073749888, 8071,
696 1073749904, 8087, 1073749920, 8103, // NOLINT
697 1073749936, 8116, 1073749942, 8119,
698 8126, 1073749954, 8132, 1073749958, // NOLINT
699 8135, 1073749968, 8147, 1073749974,
700 8151, 1073749984, 8167, 1073750002, // NOLINT
701 8180, 1073750006, 8183}; // NOLINT
702 static const uint16_t kLowercaseTable1Size = 84;
703 static const int32_t kLowercaseTable1[84] = {
704 266, 1073742094, 271, 275, 303, 308, 313, 1073742140, // NOLINT
705 317, 1073742150, 329, 334, 388, 1073744944, 3166, 3169, // NOLINT
706 1073744997, 3174, 3176, 3178, 3180, 3185, 1073745011, 3188, // NOLINT
707 1073745014, 3195, 3201, 3203, 3205, 3207, 3209, 3211, // NOLINT
708 3213, 3215, 3217, 3219, 3221, 3223, 3225, 3227, // NOLINT
709 3229, 3231, 3233, 3235, 3237, 3239, 3241, 3243, // NOLINT
710 3245, 3247, 3249, 3251, 3253, 3255, 3257, 3259, // NOLINT
711 3261, 3263, 3265, 3267, 3269, 3271, 3273, 3275, // NOLINT
712 3277, 3279, 3281, 3283, 3285, 3287, 3289, 3291, // NOLINT
713 3293, 3295, 3297, 1073745123, 3300, 3308, 3310, 3315, // NOLINT
714 1073745152, 3365, 3367, 3373 }; // NOLINT
715 static const uint16_t kLowercaseTable5Size = 105;
716 static const int32_t kLowercaseTable5[105] = {
717 1601, 1603, 1605, 1607,
718 1609, 1611, 1613, 1615, // NOLINT
719 1617, 1619, 1621, 1623,
720 1625, 1627, 1629, 1631, // NOLINT
721 1633, 1635, 1637, 1639,
722 1641, 1643, 1645, 1665, // NOLINT
723 1667, 1669, 1671, 1673,
724 1675, 1677, 1679, 1681, // NOLINT
725 1683, 1685, 1687, 1689,
726 1691, 1827, 1829, 1831, // NOLINT
727 1833, 1835, 1837, 1073743663,
728 1841, 1843, 1845, 1847, // NOLINT
729 1849, 1851, 1853, 1855,
730 1857, 1859, 1861, 1863, // NOLINT
731 1865, 1867, 1869, 1871,
732 1873, 1875, 1877, 1879, // NOLINT
733 1881, 1883, 1885, 1887,
734 1889, 1891, 1893, 1895, // NOLINT
735 1897, 1899, 1901, 1903,
736 1073743729, 1912, 1914, 1916, // NOLINT
737 1919, 1921, 1923, 1925,
738 1927, 1932, 1934, 1937, // NOLINT
739 1073743763, 1941, 1943, 1945,
740 1947, 1949, 1951, 1953, // NOLINT
741 1955, 1957, 1959, 1961,
742 2042, 1073744688, 2906, 1073744740, // NOLINT
743 2917}; // NOLINT
744 static const uint16_t kLowercaseTable7Size = 6;
745 static const int32_t kLowercaseTable7[6] = {
746 1073748736, 6918, 1073748755, 6935, 1073749825, 8026 }; // NOLINT
747 bool Lowercase::Is(uchar c) {
748 int chunk_index = c >> 13;
749 switch (chunk_index) {
750 case 0: return LookupPredicate(kLowercaseTable0,
751 kLowercaseTable0Size,
752 c);
753 case 1: return LookupPredicate(kLowercaseTable1,
754 kLowercaseTable1Size,
755 c);
756 case 5: return LookupPredicate(kLowercaseTable5,
757 kLowercaseTable5Size,
758 c);
759 case 7: return LookupPredicate(kLowercaseTable7,
760 kLowercaseTable7Size,
761 c);
762 default: return false;
763 }
764 }
765
766 586
767 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'] 587 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
768 588 #ifdef V8_INTL_SUPPORT
589 bool Letter::Is(uchar c) { return static_cast<bool>(u_isalpha(c)); }
590 #else
769 static const uint16_t kLetterTable0Size = 431; 591 static const uint16_t kLetterTable0Size = 431;
770 static const int32_t kLetterTable0[431] = { 592 static const int32_t kLetterTable0[431] = {
771 1073741889, 90, 1073741921, 122, 593 1073741889, 90, 1073741921, 122,
772 170, 181, 186, 1073742016, // NOLINT 594 170, 181, 186, 1073742016, // NOLINT
773 214, 1073742040, 246, 1073742072, 595 214, 1073742040, 246, 1073742072,
774 705, 1073742534, 721, 1073742560, // NOLINT 596 705, 1073742534, 721, 1073742560, // NOLINT
775 740, 748, 750, 1073742704, 597 740, 748, 750, 1073742704,
776 884, 1073742710, 887, 1073742714, // NOLINT 598 884, 1073742710, 887, 1073742714, // NOLINT
777 893, 895, 902, 1073742728, 599 893, 895, 902, 1073742728,
778 906, 908, 1073742734, 929, // NOLINT 600 906, 908, 1073742734, 929, // NOLINT
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
959 c); 781 c);
960 case 6: return LookupPredicate(kLetterTable6, 782 case 6: return LookupPredicate(kLetterTable6,
961 kLetterTable6Size, 783 kLetterTable6Size,
962 c); 784 c);
963 case 7: return LookupPredicate(kLetterTable7, 785 case 7: return LookupPredicate(kLetterTable7,
964 kLetterTable7Size, 786 kLetterTable7Size,
965 c); 787 c);
966 default: return false; 788 default: return false;
967 } 789 }
968 } 790 }
791 #endif
969 792
970 793 #ifndef V8_INTL_SUPPORT
971 // ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 794 // ID_Start: ((point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo',
972 // 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in 795 // 'Nl'] or 'Other_ID_Start' in point.properties) and ('Pattern_Syntax' not in
973 // point.properties) and ('Pattern_White_Space' not in point.properties)) or 796 // point.properties) and ('Pattern_White_Space' not in point.properties)) or
974 // ('JS_ID_Start' in point.properties) 797 // ('JS_ID_Start' in point.properties)
975 798
976 static const uint16_t kID_StartTable0Size = 434; 799 static const uint16_t kID_StartTable0Size = 434;
977 static const int32_t kID_StartTable0[434] = { 800 static const int32_t kID_StartTable0[434] = {
978 36, 1073741889, 90, 92, 801 36, 1073741889, 90, 92,
979 95, 1073741921, 122, 170, // NOLINT 802 95, 1073741921, 122, 170, // NOLINT
980 181, 186, 1073742016, 214, 803 181, 186, 1073742016, 214,
(...skipping 325 matching lines...) Expand 10 before | Expand all | Expand 10 after
1306 case 1: 1129 case 1:
1307 return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c); 1130 return LookupPredicate(kID_ContinueTable1, kID_ContinueTable1Size, c);
1308 case 5: 1131 case 5:
1309 return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c); 1132 return LookupPredicate(kID_ContinueTable5, kID_ContinueTable5Size, c);
1310 case 7: 1133 case 7:
1311 return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c); 1134 return LookupPredicate(kID_ContinueTable7, kID_ContinueTable7Size, c);
1312 default: return false; 1135 default: return false;
1313 } 1136 }
1314 } 1137 }
1315 1138
1316
1317 // WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in 1139 // WhiteSpace: (point.category == 'Zs') or ('JS_White_Space' in
1318 // point.properties) 1140 // point.properties)
1319 1141
1320 static const uint16_t kWhiteSpaceTable0Size = 6; 1142 static const uint16_t kWhiteSpaceTable0Size = 6;
1321 static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12, 1143 static const int32_t kWhiteSpaceTable0[6] = {9, 1073741835, 12,
1322 32, 160, 5760}; // NOLINT 1144 32, 160, 5760}; // NOLINT
1323 static const uint16_t kWhiteSpaceTable1Size = 5; 1145 static const uint16_t kWhiteSpaceTable1Size = 5;
1324 static const int32_t kWhiteSpaceTable1[5] = { 1146 static const int32_t kWhiteSpaceTable1[5] = {
1325 1073741824, 10, 47, 95, 4096 }; // NOLINT 1147 1073741824, 10, 47, 95, 4096 }; // NOLINT
1326 static const uint16_t kWhiteSpaceTable7Size = 1; 1148 static const uint16_t kWhiteSpaceTable7Size = 1;
1327 static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT 1149 static const int32_t kWhiteSpaceTable7[1] = {7935}; // NOLINT
1328 bool WhiteSpace::Is(uchar c) { 1150 bool WhiteSpace::Is(uchar c) {
1329 int chunk_index = c >> 13; 1151 int chunk_index = c >> 13;
1330 switch (chunk_index) { 1152 switch (chunk_index) {
1331 case 0: return LookupPredicate(kWhiteSpaceTable0, 1153 case 0: return LookupPredicate(kWhiteSpaceTable0,
1332 kWhiteSpaceTable0Size, 1154 kWhiteSpaceTable0Size,
1333 c); 1155 c);
1334 case 1: return LookupPredicate(kWhiteSpaceTable1, 1156 case 1: return LookupPredicate(kWhiteSpaceTable1,
1335 kWhiteSpaceTable1Size, 1157 kWhiteSpaceTable1Size,
1336 c); 1158 c);
1337 case 7: 1159 case 7:
1338 return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c); 1160 return LookupPredicate(kWhiteSpaceTable7, kWhiteSpaceTable7Size, c);
1339 default: return false; 1161 default: return false;
1340 } 1162 }
1341 } 1163 }
1342 1164 #endif // !V8_INTL_SUPPORT
1343 1165
1344 // LineTerminator: 'JS_Line_Terminator' in point.properties 1166 // LineTerminator: 'JS_Line_Terminator' in point.properties
1167 // ES#sec-line-terminators lists exactly 4 code points:
1168 // LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
1345 1169
1346 static const uint16_t kLineTerminatorTable0Size = 2;
1347 static const int32_t kLineTerminatorTable0[2] = {
1348 10, 13 }; // NOLINT
1349 static const uint16_t kLineTerminatorTable1Size = 2;
1350 static const int32_t kLineTerminatorTable1[2] = {
1351 1073741864, 41 }; // NOLINT
1352 bool LineTerminator::Is(uchar c) { 1170 bool LineTerminator::Is(uchar c) {
1353 int chunk_index = c >> 13; 1171 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
1354 switch (chunk_index) {
1355 case 0: return LookupPredicate(kLineTerminatorTable0,
1356 kLineTerminatorTable0Size,
1357 c);
1358 case 1: return LookupPredicate(kLineTerminatorTable1,
1359 kLineTerminatorTable1Size,
1360 c);
1361 default: return false;
1362 }
1363 } 1172 }
1364 1173
1174 // TODO(jshin): Once icu_case_mapping flag is removed, enclose case-mapping
1175 // related tables with #ifndef V8_INTL_SUPPORT.
1365 static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // N OLINT 1176 static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // N OLINT
1366 {{105, 775}}, {{kSentinel}} }; // NOLINT 1177 {{105, 775}}, {{kSentinel}} }; // NOLINT
1367 static const uint16_t kToLowercaseTable0Size = 488; // NOLINT 1178 static const uint16_t kToLowercaseTable0Size = 488; // NOLINT
1368 static const int32_t kToLowercaseTable0[976] = { 1179 static const int32_t kToLowercaseTable0[976] = {
1369 1073741889, 128, 90, 128, 1073742016, 128, 1180 1073741889, 128, 90, 128, 1073742016, 128,
1370 214, 128, 1073742040, 128, 222, 128, 1181 214, 128, 1073742040, 128, 222, 128,
1371 256, 4, 258, 4, // NOLINT 1182 256, 4, 258, 4, // NOLINT
1372 260, 4, 262, 4, 264, 4, 1183 260, 4, 262, 4, 264, 4,
1373 266, 4, 268, 4, 270, 4, 1184 266, 4, 268, 4, 270, 4,
1374 272, 4, 274, 4, // NOLINT 1185 272, 4, 274, 4, // NOLINT
(...skipping 2068 matching lines...) Expand 10 before | Expand all | Expand 10 after
3443 result, 3254 result,
3444 allow_caching_ptr); 3255 allow_caching_ptr);
3445 default: return 0; 3256 default: return 0;
3446 } 3257 }
3447 } 3258 }
3448 3259
3449 3260
3450 const uchar UnicodeData::kMaxCodePoint = 65533; 3261 const uchar UnicodeData::kMaxCodePoint = 65533;
3451 3262
3452 int UnicodeData::GetByteCount() { 3263 int UnicodeData::GetByteCount() {
3264 #ifndef V8_INTL_SUPPORT // NOLINT
3453 return kUppercaseTable0Size * sizeof(int32_t) // NOLINT 3265 return kUppercaseTable0Size * sizeof(int32_t) // NOLINT
3454 + kUppercaseTable1Size * sizeof(int32_t) // NOLINT 3266 + kUppercaseTable1Size * sizeof(int32_t) // NOLINT
3455 + kUppercaseTable5Size * sizeof(int32_t) // NOLINT 3267 + kUppercaseTable5Size * sizeof(int32_t) // NOLINT
3456 + kUppercaseTable7Size * sizeof(int32_t) // NOLINT 3268 + kUppercaseTable7Size * sizeof(int32_t) // NOLINT
3457 + kLowercaseTable0Size * sizeof(int32_t) // NOLINT
3458 + kLowercaseTable1Size * sizeof(int32_t) // NOLINT
3459 + kLowercaseTable5Size * sizeof(int32_t) // NOLINT
3460 + kLowercaseTable7Size * sizeof(int32_t) // NOLINT
3461 + kLetterTable0Size * sizeof(int32_t) // NOLINT 3269 + kLetterTable0Size * sizeof(int32_t) // NOLINT
3462 + kLetterTable1Size * sizeof(int32_t) // NOLINT 3270 + kLetterTable1Size * sizeof(int32_t) // NOLINT
3463 + kLetterTable2Size * sizeof(int32_t) // NOLINT 3271 + kLetterTable2Size * sizeof(int32_t) // NOLINT
3464 + kLetterTable3Size * sizeof(int32_t) // NOLINT 3272 + kLetterTable3Size * sizeof(int32_t) // NOLINT
3465 + kLetterTable4Size * sizeof(int32_t) // NOLINT 3273 + kLetterTable4Size * sizeof(int32_t) // NOLINT
3466 + kLetterTable5Size * sizeof(int32_t) // NOLINT 3274 + kLetterTable5Size * sizeof(int32_t) // NOLINT
3467 + kLetterTable6Size * sizeof(int32_t) // NOLINT 3275 + kLetterTable6Size * sizeof(int32_t) // NOLINT
3468 + kLetterTable7Size * sizeof(int32_t) // NOLINT 3276 + kLetterTable7Size * sizeof(int32_t) // NOLINT
3469 + kID_StartTable0Size * sizeof(int32_t) // NOLINT 3277 + kID_StartTable0Size * sizeof(int32_t) // NOLINT
3470 + kID_StartTable1Size * sizeof(int32_t) // NOLINT 3278 + kID_StartTable1Size * sizeof(int32_t) // NOLINT
3471 + kID_StartTable2Size * sizeof(int32_t) // NOLINT 3279 + kID_StartTable2Size * sizeof(int32_t) // NOLINT
3472 + kID_StartTable3Size * sizeof(int32_t) // NOLINT 3280 + kID_StartTable3Size * sizeof(int32_t) // NOLINT
3473 + kID_StartTable4Size * sizeof(int32_t) // NOLINT 3281 + kID_StartTable4Size * sizeof(int32_t) // NOLINT
3474 + kID_StartTable5Size * sizeof(int32_t) // NOLINT 3282 + kID_StartTable5Size * sizeof(int32_t) // NOLINT
3475 + kID_StartTable6Size * sizeof(int32_t) // NOLINT 3283 + kID_StartTable6Size * sizeof(int32_t) // NOLINT
3476 + kID_StartTable7Size * sizeof(int32_t) // NOLINT 3284 + kID_StartTable7Size * sizeof(int32_t) // NOLINT
3477 + kID_ContinueTable0Size * sizeof(int32_t) // NOLINT 3285 + kID_ContinueTable0Size * sizeof(int32_t) // NOLINT
3478 + kID_ContinueTable1Size * sizeof(int32_t) // NOLINT 3286 + kID_ContinueTable1Size * sizeof(int32_t) // NOLINT
3479 + kID_ContinueTable5Size * sizeof(int32_t) // NOLINT 3287 + kID_ContinueTable5Size * sizeof(int32_t) // NOLINT
3480 + kID_ContinueTable7Size * sizeof(int32_t) // NOLINT 3288 + kID_ContinueTable7Size * sizeof(int32_t) // NOLINT
3481 + kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT 3289 + kWhiteSpaceTable0Size * sizeof(int32_t) // NOLINT
3482 + kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT 3290 + kWhiteSpaceTable1Size * sizeof(int32_t) // NOLINT
3483 + kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT 3291 + kWhiteSpaceTable7Size * sizeof(int32_t) // NOLINT
3484 + kLineTerminatorTable0Size * sizeof(int32_t) // NOLINT 3292 #else
3485 + kLineTerminatorTable1Size * sizeof(int32_t) // NOLINT 3293 return
3294 #endif // !V8_INTL_SUPPORT
3486 + 3295 +
3487 kToLowercaseMultiStrings0Size * 3296 kToLowercaseMultiStrings0Size *
3488 sizeof(MultiCharacterSpecialCase<2>) // NOLINT 3297 sizeof(MultiCharacterSpecialCase<2>) // NOLINT
3489 + 3298 +
3490 kToLowercaseMultiStrings1Size * 3299 kToLowercaseMultiStrings1Size *
3491 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3300 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3492 + 3301 +
3493 kToLowercaseMultiStrings5Size * 3302 kToLowercaseMultiStrings5Size *
3494 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3303 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3495 + 3304 +
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
3536 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3345 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3537 + 3346 +
3538 kCanonicalizationRangeMultiStrings1Size * 3347 kCanonicalizationRangeMultiStrings1Size *
3539 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3348 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3540 + 3349 +
3541 kCanonicalizationRangeMultiStrings7Size * 3350 kCanonicalizationRangeMultiStrings7Size *
3542 sizeof(MultiCharacterSpecialCase<1>); // NOLINT 3351 sizeof(MultiCharacterSpecialCase<1>); // NOLINT
3543 } 3352 }
3544 3353
3545 } // namespace unibrow 3354 } // namespace unibrow
OLDNEW
« no previous file with comments | « src/unicode.h ('k') | src/v8.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698