OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 typedef signed short int16_t; // NOLINT | 45 typedef signed short int16_t; // NOLINT |
46 typedef unsigned short uint16_t; // NOLINT | 46 typedef unsigned short uint16_t; // NOLINT |
47 typedef int int32_t; // NOLINT | 47 typedef int int32_t; // NOLINT |
48 | 48 |
49 // All access to the character table should go through this function. | 49 // All access to the character table should go through this function. |
50 template <int D> | 50 template <int D> |
51 static inline uchar TableGet(const int32_t* table, int index) { | 51 static inline uchar TableGet(const int32_t* table, int index) { |
52 return table[D * index]; | 52 return table[D * index]; |
53 } | 53 } |
54 | 54 |
| 55 |
55 static inline uchar GetEntry(int32_t entry) { | 56 static inline uchar GetEntry(int32_t entry) { |
56 return entry & (kStartBit - 1); | 57 return entry & (kStartBit - 1); |
57 } | 58 } |
58 | 59 |
| 60 |
59 static inline bool IsStart(int32_t entry) { | 61 static inline bool IsStart(int32_t entry) { |
60 return (entry & kStartBit) != 0; | 62 return (entry & kStartBit) != 0; |
61 } | 63 } |
62 | 64 |
| 65 |
63 /** | 66 /** |
64 * Look up a character in the unicode table using a mix of binary and | 67 * Look up a character in the unicode table using a mix of binary and |
65 * interpolation search. For a uniformly distributed array | 68 * interpolation search. For a uniformly distributed array |
66 * interpolation search beats binary search by a wide margin. However, | 69 * interpolation search beats binary search by a wide margin. However, |
67 * in this case interpolation search degenerates because of some very | 70 * in this case interpolation search degenerates because of some very |
68 * high values in the lower end of the table so this function uses a | 71 * high values in the lower end of the table so this function uses a |
69 * combination. The average number of steps to look up the information | 72 * combination. The average number of steps to look up the information |
70 * about a character is around 10, slightly higher if there is no | 73 * about a character is around 10, slightly higher if there is no |
71 * information available about the character. | 74 * information available about the character. |
72 */ | 75 */ |
(...skipping 26 matching lines...) Expand all Loading... |
99 bool is_start = IsStart(field); | 102 bool is_start = IsStart(field); |
100 return (entry == value) || (entry < value && is_start); | 103 return (entry == value) || (entry < value && is_start); |
101 } | 104 } |
102 | 105 |
103 template <int kW> | 106 template <int kW> |
104 struct MultiCharacterSpecialCase { | 107 struct MultiCharacterSpecialCase { |
105 static const uchar kEndOfEncoding = kSentinel; | 108 static const uchar kEndOfEncoding = kSentinel; |
106 uchar chars[kW]; | 109 uchar chars[kW]; |
107 }; | 110 }; |
108 | 111 |
| 112 |
109 // Look up the mapping for the given character in the specified table, | 113 // Look up the mapping for the given character in the specified table, |
110 // which is of the specified length and uses the specified special case | 114 // which is of the specified length and uses the specified special case |
111 // mapping for multi-char mappings. The next parameter is the character | 115 // mapping for multi-char mappings. The next parameter is the character |
112 // following the one to map. The result will be written in to the result | 116 // following the one to map. The result will be written in to the result |
113 // buffer and the number of characters written will be returned. Finally, | 117 // buffer and the number of characters written will be returned. Finally, |
114 // if the allow_caching_ptr is non-null then false will be stored in | 118 // if the allow_caching_ptr is non-null then false will be stored in |
115 // it if the result contains multiple characters or depends on the | 119 // it if the result contains multiple characters or depends on the |
116 // context. | 120 // context. |
117 // If ranges are linear, a match between a start and end point is | 121 // If ranges are linear, a match between a start and end point is |
118 // offset by the distance between the match and the start. Otherwise | 122 // offset by the distance between the match and the start. Otherwise |
(...skipping 330 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
449 case 5: return LookupPredicate(kUppercaseTable5, | 453 case 5: return LookupPredicate(kUppercaseTable5, |
450 kUppercaseTable5Size, | 454 kUppercaseTable5Size, |
451 c); | 455 c); |
452 case 7: return LookupPredicate(kUppercaseTable7, | 456 case 7: return LookupPredicate(kUppercaseTable7, |
453 kUppercaseTable7Size, | 457 kUppercaseTable7Size, |
454 c); | 458 c); |
455 default: return false; | 459 default: return false; |
456 } | 460 } |
457 } | 461 } |
458 | 462 |
| 463 |
459 // Lowercase: point.category == 'Ll' | 464 // Lowercase: point.category == 'Ll' |
460 | 465 |
461 static const uint16_t kLowercaseTable0Size = 463; | 466 static const uint16_t kLowercaseTable0Size = 463; |
462 static const int32_t kLowercaseTable0[463] = { | 467 static const int32_t kLowercaseTable0[463] = { |
463 1073741921, 122, 181, 1073742047, 246, 1073742072, 255, 257, // NOLINT | 468 1073741921, 122, 181, 1073742047, 246, 1073742072, 255, 257, // NOLINT |
464 259, 261, 263, 265, 267, 269, 271, 273, // NOLINT | 469 259, 261, 263, 265, 267, 269, 271, 273, // NOLINT |
465 275, 277, 279, 281, 283, 285, 287, 289, // NOLINT | 470 275, 277, 279, 281, 283, 285, 287, 289, // NOLINT |
466 291, 293, 295, 297, 299, 301, 303, 305, // NOLINT | 471 291, 293, 295, 297, 299, 301, 303, 305, // NOLINT |
467 307, 309, 1073742135, 312, 314, 316, 318, 320, // NOLINT | 472 307, 309, 1073742135, 312, 314, 316, 318, 320, // NOLINT |
468 322, 324, 326, 1073742152, 329, 331, 333, 335, // NOLINT | 473 322, 324, 326, 1073742152, 329, 331, 333, 335, // NOLINT |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
560 case 5: return LookupPredicate(kLowercaseTable5, | 565 case 5: return LookupPredicate(kLowercaseTable5, |
561 kLowercaseTable5Size, | 566 kLowercaseTable5Size, |
562 c); | 567 c); |
563 case 7: return LookupPredicate(kLowercaseTable7, | 568 case 7: return LookupPredicate(kLowercaseTable7, |
564 kLowercaseTable7Size, | 569 kLowercaseTable7Size, |
565 c); | 570 c); |
566 default: return false; | 571 default: return false; |
567 } | 572 } |
568 } | 573 } |
569 | 574 |
| 575 |
570 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl' ] | 576 // Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl' ] |
571 | 577 |
572 static const uint16_t kLetterTable0Size = 435; | 578 static const uint16_t kLetterTable0Size = 435; |
573 static const int32_t kLetterTable0[435] = { | 579 static const int32_t kLetterTable0[435] = { |
574 1073741889, 90, 1073741921, 122, 170, 181, 186, 1073742016, // NOLINT | 580 1073741889, 90, 1073741921, 122, 170, 181, 186, 1073742016, // NOLINT |
575 214, 1073742040, 246, 1073742072, 705, 1073742534, 721, 1073742560, // NOLINT | 581 214, 1073742040, 246, 1073742072, 705, 1073742534, 721, 1073742560, // NOLINT |
576 740, 748, 750, 1073742704, 884, 1073742710, 887, 1073742714, // NOLINT | 582 740, 748, 750, 1073742704, 884, 1073742710, 887, 1073742714, // NOLINT |
577 893, 902, 1073742728, 906, 908, 1073742734, 929, 1073742755, // NOLINT | 583 893, 902, 1073742728, 906, 908, 1073742734, 929, 1073742755, // NOLINT |
578 1013, 1073742839, 1153, 1073742986, 1319, 1073743153, 1366, 1369, // NOLINT | 584 1013, 1073742839, 1153, 1073742986, 1319, 1073743153, 1366, 1369, // NOLINT |
579 1073743201, 1415, 1073743312, 1514, 1073743344, 1522, 1073743392, 1610, // NO
LINT | 585 1073743201, 1415, 1073743312, 1514, 1073743344, 1522, 1073743392, 1610, // NO
LINT |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
696 case 6: return LookupPredicate(kLetterTable6, | 702 case 6: return LookupPredicate(kLetterTable6, |
697 kLetterTable6Size, | 703 kLetterTable6Size, |
698 c); | 704 c); |
699 case 7: return LookupPredicate(kLetterTable7, | 705 case 7: return LookupPredicate(kLetterTable7, |
700 kLetterTable7Size, | 706 kLetterTable7Size, |
701 c); | 707 c); |
702 default: return false; | 708 default: return false; |
703 } | 709 } |
704 } | 710 } |
705 | 711 |
| 712 |
706 // Space: point.category == 'Zs' | 713 // Space: point.category == 'Zs' |
707 | 714 |
708 static const uint16_t kSpaceTable0Size = 4; | 715 static const uint16_t kSpaceTable0Size = 4; |
709 static const int32_t kSpaceTable0[4] = { | 716 static const int32_t kSpaceTable0[4] = { |
710 32, 160, 5760, 6158 }; // NOLINT | 717 32, 160, 5760, 6158 }; // NOLINT |
711 static const uint16_t kSpaceTable1Size = 5; | 718 static const uint16_t kSpaceTable1Size = 5; |
712 static const int32_t kSpaceTable1[5] = { | 719 static const int32_t kSpaceTable1[5] = { |
713 1073741824, 10, 47, 95, 4096 }; // NOLINT | 720 1073741824, 10, 47, 95, 4096 }; // NOLINT |
714 bool Space::Is(uchar c) { | 721 bool Space::Is(uchar c) { |
715 int chunk_index = c >> 13; | 722 int chunk_index = c >> 13; |
716 switch (chunk_index) { | 723 switch (chunk_index) { |
717 case 0: return LookupPredicate(kSpaceTable0, | 724 case 0: return LookupPredicate(kSpaceTable0, |
718 kSpaceTable0Size, | 725 kSpaceTable0Size, |
719 c); | 726 c); |
720 case 1: return LookupPredicate(kSpaceTable1, | 727 case 1: return LookupPredicate(kSpaceTable1, |
721 kSpaceTable1Size, | 728 kSpaceTable1Size, |
722 c); | 729 c); |
723 default: return false; | 730 default: return false; |
724 } | 731 } |
725 } | 732 } |
726 | 733 |
| 734 |
727 // Number: point.category == 'Nd' | 735 // Number: point.category == 'Nd' |
728 | 736 |
729 static const uint16_t kNumberTable0Size = 56; | 737 static const uint16_t kNumberTable0Size = 56; |
730 static const int32_t kNumberTable0[56] = { | 738 static const int32_t kNumberTable0[56] = { |
731 1073741872, 57, 1073743456, 1641, 1073743600, 1785, 1073743808, 1993, // NOLI
NT | 739 1073741872, 57, 1073743456, 1641, 1073743600, 1785, 1073743808, 1993, // NOLI
NT |
732 1073744230, 2415, 1073744358, 2543, 1073744486, 2671, 1073744614, 2799, // NO
LINT | 740 1073744230, 2415, 1073744358, 2543, 1073744486, 2671, 1073744614, 2799, // NO
LINT |
733 1073744742, 2927, 1073744870, 3055, 1073744998, 3183, 1073745126, 3311, // NO
LINT | 741 1073744742, 2927, 1073744870, 3055, 1073744998, 3183, 1073745126, 3311, // NO
LINT |
734 1073745254, 3439, 1073745488, 3673, 1073745616, 3801, 1073745696, 3881, // NO
LINT | 742 1073745254, 3439, 1073745488, 3673, 1073745616, 3801, 1073745696, 3881, // NO
LINT |
735 1073745984, 4169, 1073746064, 4249, 1073747936, 6121, 1073747984, 6169, // NO
LINT | 743 1073745984, 4169, 1073746064, 4249, 1073747936, 6121, 1073747984, 6169, // NO
LINT |
736 1073748294, 6479, 1073748432, 6617, 1073748608, 6793, 1073748624, 6809, // NO
LINT | 744 1073748294, 6479, 1073748432, 6617, 1073748608, 6793, 1073748624, 6809, // NO
LINT |
(...skipping 14 matching lines...) Expand all Loading... |
751 case 5: return LookupPredicate(kNumberTable5, | 759 case 5: return LookupPredicate(kNumberTable5, |
752 kNumberTable5Size, | 760 kNumberTable5Size, |
753 c); | 761 c); |
754 case 7: return LookupPredicate(kNumberTable7, | 762 case 7: return LookupPredicate(kNumberTable7, |
755 kNumberTable7Size, | 763 kNumberTable7Size, |
756 c); | 764 c); |
757 default: return false; | 765 default: return false; |
758 } | 766 } |
759 } | 767 } |
760 | 768 |
| 769 |
761 // WhiteSpace: 'Ws' in point.properties | 770 // WhiteSpace: 'Ws' in point.properties |
762 | 771 |
763 static const uint16_t kWhiteSpaceTable0Size = 7; | 772 static const uint16_t kWhiteSpaceTable0Size = 7; |
764 static const int32_t kWhiteSpaceTable0[7] = { | 773 static const int32_t kWhiteSpaceTable0[7] = { |
765 1073741833, 13, 32, 133, 160, 5760, 6158 }; // NOLINT | 774 1073741833, 13, 32, 133, 160, 5760, 6158 }; // NOLINT |
766 static const uint16_t kWhiteSpaceTable1Size = 7; | 775 static const uint16_t kWhiteSpaceTable1Size = 7; |
767 static const int32_t kWhiteSpaceTable1[7] = { | 776 static const int32_t kWhiteSpaceTable1[7] = { |
768 1073741824, 10, 1073741864, 41, 47, 95, 4096 }; // NOLINT | 777 1073741824, 10, 1073741864, 41, 47, 95, 4096 }; // NOLINT |
769 bool WhiteSpace::Is(uchar c) { | 778 bool WhiteSpace::Is(uchar c) { |
770 int chunk_index = c >> 13; | 779 int chunk_index = c >> 13; |
771 switch (chunk_index) { | 780 switch (chunk_index) { |
772 case 0: return LookupPredicate(kWhiteSpaceTable0, | 781 case 0: return LookupPredicate(kWhiteSpaceTable0, |
773 kWhiteSpaceTable0Size, | 782 kWhiteSpaceTable0Size, |
774 c); | 783 c); |
775 case 1: return LookupPredicate(kWhiteSpaceTable1, | 784 case 1: return LookupPredicate(kWhiteSpaceTable1, |
776 kWhiteSpaceTable1Size, | 785 kWhiteSpaceTable1Size, |
777 c); | 786 c); |
778 default: return false; | 787 default: return false; |
779 } | 788 } |
780 } | 789 } |
781 | 790 |
| 791 |
782 // LineTerminator: 'Lt' in point.properties | 792 // LineTerminator: 'Lt' in point.properties |
783 | 793 |
784 static const uint16_t kLineTerminatorTable0Size = 2; | 794 static const uint16_t kLineTerminatorTable0Size = 2; |
785 static const int32_t kLineTerminatorTable0[2] = { | 795 static const int32_t kLineTerminatorTable0[2] = { |
786 10, 13 }; // NOLINT | 796 10, 13 }; // NOLINT |
787 static const uint16_t kLineTerminatorTable1Size = 2; | 797 static const uint16_t kLineTerminatorTable1Size = 2; |
788 static const int32_t kLineTerminatorTable1[2] = { | 798 static const int32_t kLineTerminatorTable1[2] = { |
789 1073741864, 41 }; // NOLINT | 799 1073741864, 41 }; // NOLINT |
790 bool LineTerminator::Is(uchar c) { | 800 bool LineTerminator::Is(uchar c) { |
791 int chunk_index = c >> 13; | 801 int chunk_index = c >> 13; |
792 switch (chunk_index) { | 802 switch (chunk_index) { |
793 case 0: return LookupPredicate(kLineTerminatorTable0, | 803 case 0: return LookupPredicate(kLineTerminatorTable0, |
794 kLineTerminatorTable0Size, | 804 kLineTerminatorTable0Size, |
795 c); | 805 c); |
796 case 1: return LookupPredicate(kLineTerminatorTable1, | 806 case 1: return LookupPredicate(kLineTerminatorTable1, |
797 kLineTerminatorTable1Size, | 807 kLineTerminatorTable1Size, |
798 c); | 808 c); |
799 default: return false; | 809 default: return false; |
800 } | 810 } |
801 } | 811 } |
802 | 812 |
| 813 |
803 // CombiningMark: point.category in ['Mn', 'Mc'] | 814 // CombiningMark: point.category in ['Mn', 'Mc'] |
804 | 815 |
805 static const uint16_t kCombiningMarkTable0Size = 258; | 816 static const uint16_t kCombiningMarkTable0Size = 258; |
806 static const int32_t kCombiningMarkTable0[258] = { | 817 static const int32_t kCombiningMarkTable0[258] = { |
807 1073742592, 879, 1073742979, 1159, 1073743249, 1469, 1471, 1073743297, // NOL
INT | 818 1073742592, 879, 1073742979, 1159, 1073743249, 1469, 1471, 1073743297, // NOL
INT |
808 1474, 1073743300, 1477, 1479, 1073743376, 1562, 1073743435, 1631, // NOLINT | 819 1474, 1073743300, 1477, 1479, 1073743376, 1562, 1073743435, 1631, // NOLINT |
809 1648, 1073743574, 1756, 1073743583, 1764, 1073743591, 1768, 1073743594, // NO
LINT | 820 1648, 1073743574, 1756, 1073743583, 1764, 1073743591, 1768, 1073743594, // NO
LINT |
810 1773, 1809, 1073743664, 1866, 1073743782, 1968, 1073743851, 2035, // NOLINT | 821 1773, 1809, 1073743664, 1866, 1073743782, 1968, 1073743851, 2035, // NOLINT |
811 1073743894, 2073, 1073743899, 2083, 1073743909, 2087, 1073743913, 2093, // NO
LINT | 822 1073743894, 2073, 1073743899, 2083, 1073743909, 2087, 1073743913, 2093, // NO
LINT |
812 1073743961, 2139, 1073744100, 2302, 1073744128, 2307, 1073744186, 2364, // NO
LINT | 823 1073743961, 2139, 1073744100, 2302, 1073744128, 2307, 1073744186, 2364, // NO
LINT |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
864 case 5: return LookupPredicate(kCombiningMarkTable5, | 875 case 5: return LookupPredicate(kCombiningMarkTable5, |
865 kCombiningMarkTable5Size, | 876 kCombiningMarkTable5Size, |
866 c); | 877 c); |
867 case 7: return LookupPredicate(kCombiningMarkTable7, | 878 case 7: return LookupPredicate(kCombiningMarkTable7, |
868 kCombiningMarkTable7Size, | 879 kCombiningMarkTable7Size, |
869 c); | 880 c); |
870 default: return false; | 881 default: return false; |
871 } | 882 } |
872 } | 883 } |
873 | 884 |
| 885 |
874 // ConnectorPunctuation: point.category == 'Pc' | 886 // ConnectorPunctuation: point.category == 'Pc' |
875 | 887 |
876 static const uint16_t kConnectorPunctuationTable0Size = 1; | 888 static const uint16_t kConnectorPunctuationTable0Size = 1; |
877 static const int32_t kConnectorPunctuationTable0[1] = { | 889 static const int32_t kConnectorPunctuationTable0[1] = { |
878 95 }; // NOLINT | 890 95 }; // NOLINT |
879 static const uint16_t kConnectorPunctuationTable1Size = 3; | 891 static const uint16_t kConnectorPunctuationTable1Size = 3; |
880 static const int32_t kConnectorPunctuationTable1[3] = { | 892 static const int32_t kConnectorPunctuationTable1[3] = { |
881 1073741887, 64, 84 }; // NOLINT | 893 1073741887, 64, 84 }; // NOLINT |
882 static const uint16_t kConnectorPunctuationTable7Size = 5; | 894 static const uint16_t kConnectorPunctuationTable7Size = 5; |
883 static const int32_t kConnectorPunctuationTable7[5] = { | 895 static const int32_t kConnectorPunctuationTable7[5] = { |
(...skipping 968 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1852 + kEcma262UnCanonicalizeMultiStrings0Size * sizeof(MultiCharacterSpecialCa
se<4>) // NOLINT | 1864 + kEcma262UnCanonicalizeMultiStrings0Size * sizeof(MultiCharacterSpecialCa
se<4>) // NOLINT |
1853 + kEcma262UnCanonicalizeMultiStrings1Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT | 1865 + kEcma262UnCanonicalizeMultiStrings1Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT |
1854 + kEcma262UnCanonicalizeMultiStrings5Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT | 1866 + kEcma262UnCanonicalizeMultiStrings5Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT |
1855 + kEcma262UnCanonicalizeMultiStrings7Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT | 1867 + kEcma262UnCanonicalizeMultiStrings7Size * sizeof(MultiCharacterSpecialCa
se<2>) // NOLINT |
1856 + kCanonicalizationRangeMultiStrings0Size * sizeof(MultiCharacterSpecialCa
se<1>) // NOLINT | 1868 + kCanonicalizationRangeMultiStrings0Size * sizeof(MultiCharacterSpecialCa
se<1>) // NOLINT |
1857 + kCanonicalizationRangeMultiStrings1Size * sizeof(MultiCharacterSpecialCa
se<1>) // NOLINT | 1869 + kCanonicalizationRangeMultiStrings1Size * sizeof(MultiCharacterSpecialCa
se<1>) // NOLINT |
1858 + kCanonicalizationRangeMultiStrings7Size * sizeof(MultiCharacterSpecialCa
se<1>); // NOLINT | 1870 + kCanonicalizationRangeMultiStrings7Size * sizeof(MultiCharacterSpecialCa
se<1>); // NOLINT |
1859 } | 1871 } |
1860 | 1872 |
1861 } // namespace unicode | 1873 } // namespace unicode |
OLD | NEW |