OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "src/char-predicates.h" | 5 #include "src/char-predicates.h" |
6 #include "src/unicode.h" | 6 #include "src/unicode.h" |
7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
8 | 8 |
9 namespace v8 { | 9 namespace v8 { |
10 namespace internal { | 10 namespace internal { |
11 | 11 |
12 TEST(CharPredicatesTest, WhiteSpace) { | 12 TEST(CharPredicatesTest, WhiteSpace) { |
13 // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider | |
14 // it to be one though, since JS recognizes all white spaces in Unicode 5.1. | |
15 EXPECT_TRUE(WhiteSpace::Is(0x0009)); | 13 EXPECT_TRUE(WhiteSpace::Is(0x0009)); |
16 EXPECT_TRUE(WhiteSpace::Is(0x000B)); | 14 EXPECT_TRUE(WhiteSpace::Is(0x000B)); |
17 EXPECT_TRUE(WhiteSpace::Is(0x000C)); | 15 EXPECT_TRUE(WhiteSpace::Is(0x000C)); |
18 EXPECT_TRUE(WhiteSpace::Is(' ')); | 16 EXPECT_TRUE(WhiteSpace::Is(' ')); |
19 EXPECT_TRUE(WhiteSpace::Is(0x00A0)); | 17 EXPECT_TRUE(WhiteSpace::Is(0x00A0)); |
| 18 EXPECT_TRUE(WhiteSpace::Is(0x1680)); |
| 19 EXPECT_TRUE(WhiteSpace::Is(0x2000)); |
| 20 EXPECT_TRUE(WhiteSpace::Is(0x2007)); |
| 21 EXPECT_TRUE(WhiteSpace::Is(0x202F)); |
| 22 EXPECT_TRUE(WhiteSpace::Is(0x205F)); |
| 23 EXPECT_TRUE(WhiteSpace::Is(0x3000)); |
20 EXPECT_TRUE(WhiteSpace::Is(0xFEFF)); | 24 EXPECT_TRUE(WhiteSpace::Is(0xFEFF)); |
| 25 EXPECT_FALSE(WhiteSpace::Is(0x180E)); |
21 } | 26 } |
22 | 27 |
23 | 28 |
24 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) { | 29 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) { |
25 // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider | |
26 // it to be one though, since JS recognizes all white spaces in Unicode 5.1. | |
27 // White spaces | |
28 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009)); | 30 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009)); |
29 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B)); | 31 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B)); |
30 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C)); | 32 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C)); |
31 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' ')); | 33 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' ')); |
32 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0)); | 34 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0)); |
| 35 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x1680)); |
| 36 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2000)); |
| 37 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2007)); |
| 38 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x202F)); |
| 39 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x205F)); |
33 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF)); | 40 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF)); |
34 // Line terminators | 41 // Line terminators |
35 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A)); | 42 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A)); |
36 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D)); | 43 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D)); |
37 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028)); | 44 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028)); |
38 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029)); | 45 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029)); |
| 46 EXPECT_FALSE(WhiteSpaceOrLineTerminator::Is(0x180E)); |
39 } | 47 } |
40 | 48 |
41 | 49 |
42 TEST(CharPredicatesTest, IdentifierStart) { | 50 TEST(CharPredicatesTest, IdentifierStart) { |
43 EXPECT_TRUE(IdentifierStart::Is('$')); | 51 EXPECT_TRUE(IdentifierStart::Is('$')); |
44 EXPECT_TRUE(IdentifierStart::Is('_')); | 52 EXPECT_TRUE(IdentifierStart::Is('_')); |
45 EXPECT_TRUE(IdentifierStart::Is('\\')); | 53 EXPECT_TRUE(IdentifierStart::Is('\\')); |
46 | 54 |
47 // http://www.unicode.org/reports/tr31/ | 55 // http://www.unicode.org/reports/tr31/ |
| 56 // curl ftp://ftp.unicode.org/Public/9.0.0/ucd/DerivedCoreProperties.txt | |
| 57 // grep '; ID_Start # [^NL]' |
48 // Other_ID_Start | 58 // Other_ID_Start |
| 59 EXPECT_TRUE(IdentifierStart::Is(0x1885)); |
| 60 EXPECT_TRUE(IdentifierStart::Is(0x1886)); |
49 EXPECT_TRUE(IdentifierStart::Is(0x2118)); | 61 EXPECT_TRUE(IdentifierStart::Is(0x2118)); |
50 EXPECT_TRUE(IdentifierStart::Is(0x212E)); | 62 EXPECT_TRUE(IdentifierStart::Is(0x212E)); |
51 EXPECT_TRUE(IdentifierStart::Is(0x309B)); | 63 EXPECT_TRUE(IdentifierStart::Is(0x309B)); |
52 EXPECT_TRUE(IdentifierStart::Is(0x309C)); | 64 EXPECT_TRUE(IdentifierStart::Is(0x309C)); |
53 | 65 |
54 // Issue 2892: | 66 // Issue 2892: |
55 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. | 67 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. |
56 EXPECT_FALSE(unibrow::ID_Start::Is(0x2E2F)); | 68 EXPECT_FALSE(IdentifierStart::Is(0x2E2F)); |
| 69 |
| 70 #ifdef V8_INTL_SUPPORT |
| 71 // New in Unicode 8.0 (6,847 code points) |
| 72 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]] |
| 73 EXPECT_TRUE(IdentifierStart::Is(0x08B3)); |
| 74 EXPECT_TRUE(IdentifierStart::Is(0x0AF9)); |
| 75 EXPECT_TRUE(IdentifierStart::Is(0x13F8)); |
| 76 EXPECT_TRUE(IdentifierStart::Is(0x9FCD)); |
| 77 EXPECT_TRUE(IdentifierStart::Is(0xAB60)); |
| 78 EXPECT_TRUE(IdentifierStart::Is(0x10CC0)); |
| 79 EXPECT_TRUE(IdentifierStart::Is(0x108E0)); |
| 80 EXPECT_TRUE(IdentifierStart::Is(0x2B820)); |
| 81 |
| 82 // New in Unicode 9.0 (7,177 code points) |
| 83 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]] |
| 84 |
| 85 EXPECT_TRUE(IdentifierStart::Is(0x1C80)); |
| 86 EXPECT_TRUE(IdentifierStart::Is(0x104DB)); |
| 87 EXPECT_TRUE(IdentifierStart::Is(0x1E922)); |
| 88 #endif |
57 } | 89 } |
58 | 90 |
59 | 91 |
60 TEST(CharPredicatesTest, IdentifierPart) { | 92 TEST(CharPredicatesTest, IdentifierPart) { |
61 EXPECT_TRUE(IdentifierPart::Is('$')); | 93 EXPECT_TRUE(IdentifierPart::Is('$')); |
62 EXPECT_TRUE(IdentifierPart::Is('_')); | 94 EXPECT_TRUE(IdentifierPart::Is('_')); |
63 EXPECT_TRUE(IdentifierPart::Is('\\')); | 95 EXPECT_TRUE(IdentifierPart::Is('\\')); |
64 EXPECT_TRUE(IdentifierPart::Is(0x200C)); | 96 EXPECT_TRUE(IdentifierPart::Is(0x200C)); |
65 EXPECT_TRUE(IdentifierPart::Is(0x200D)); | 97 EXPECT_TRUE(IdentifierPart::Is(0x200D)); |
66 | 98 |
| 99 #ifdef V8_INTL_SUPPORT |
| 100 // New in Unicode 8.0 (6,847 code points) |
| 101 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]] |
| 102 EXPECT_TRUE(IdentifierPart::Is(0x08B3)); |
| 103 EXPECT_TRUE(IdentifierPart::Is(0x0AF9)); |
| 104 EXPECT_TRUE(IdentifierPart::Is(0x13F8)); |
| 105 EXPECT_TRUE(IdentifierPart::Is(0x9FCD)); |
| 106 EXPECT_TRUE(IdentifierPart::Is(0xAB60)); |
| 107 EXPECT_TRUE(IdentifierPart::Is(0x10CC0)); |
| 108 EXPECT_TRUE(IdentifierPart::Is(0x108E0)); |
| 109 EXPECT_TRUE(IdentifierPart::Is(0x2B820)); |
| 110 |
| 111 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=8.0:]-[:Age=7.0:]] |
| 112 // 162 code points |
| 113 EXPECT_TRUE(IdentifierPart::Is(0x08E3)); |
| 114 EXPECT_TRUE(IdentifierPart::Is(0xA69E)); |
| 115 EXPECT_TRUE(IdentifierPart::Is(0x11730)); |
| 116 |
| 117 |
| 118 // New in Unicode 9.0 (7,177 code points) |
| 119 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]] |
| 120 EXPECT_TRUE(IdentifierPart::Is(0x1C80)); |
| 121 EXPECT_TRUE(IdentifierPart::Is(0x104DB)); |
| 122 EXPECT_TRUE(IdentifierPart::Is(0x1E922)); |
| 123 |
| 124 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=9.0:]-[:Age=8.0:]] |
| 125 // 162 code points |
| 126 EXPECT_TRUE(IdentifierPart::Is(0x08D4)); |
| 127 EXPECT_TRUE(IdentifierPart::Is(0x1DFB)); |
| 128 EXPECT_TRUE(IdentifierPart::Is(0xA8C5)); |
| 129 EXPECT_TRUE(IdentifierPart::Is(0x11450)); |
| 130 #endif |
| 131 |
67 // http://www.unicode.org/reports/tr31/ | 132 // http://www.unicode.org/reports/tr31/ |
| 133 // curl ftp://ftp.unicode.org/Public/9.0.0/ucd/DerivedCoreProperties.txt | |
| 134 // grep '; ID_Continue' | egrep -v '# (M[nc]|L|Nl|Nd|Pc)' |
| 135 |
68 // Other_ID_Start | 136 // Other_ID_Start |
| 137 EXPECT_TRUE(IdentifierPart::Is(0x1885)); |
| 138 EXPECT_TRUE(IdentifierPart::Is(0x1886)); |
69 EXPECT_TRUE(IdentifierPart::Is(0x2118)); | 139 EXPECT_TRUE(IdentifierPart::Is(0x2118)); |
70 EXPECT_TRUE(IdentifierPart::Is(0x212E)); | 140 EXPECT_TRUE(IdentifierPart::Is(0x212E)); |
71 EXPECT_TRUE(IdentifierPart::Is(0x309B)); | 141 EXPECT_TRUE(IdentifierPart::Is(0x309B)); |
72 EXPECT_TRUE(IdentifierPart::Is(0x309C)); | 142 EXPECT_TRUE(IdentifierPart::Is(0x309C)); |
73 | 143 |
74 // Other_ID_Continue | 144 // Other_ID_Continue - Other_ID_Start |
75 EXPECT_TRUE(IdentifierPart::Is(0x00B7)); | 145 EXPECT_TRUE(IdentifierPart::Is(0x00B7)); |
76 EXPECT_TRUE(IdentifierPart::Is(0x0387)); | 146 EXPECT_TRUE(IdentifierPart::Is(0x0387)); |
77 EXPECT_TRUE(IdentifierPart::Is(0x1369)); | 147 EXPECT_TRUE(IdentifierPart::Is(0x1369)); |
78 EXPECT_TRUE(IdentifierPart::Is(0x1370)); | 148 EXPECT_TRUE(IdentifierPart::Is(0x1370)); |
79 EXPECT_TRUE(IdentifierPart::Is(0x1371)); | 149 EXPECT_TRUE(IdentifierPart::Is(0x1371)); |
80 EXPECT_TRUE(IdentifierPart::Is(0x19DA)); | 150 EXPECT_TRUE(IdentifierPart::Is(0x19DA)); |
81 | 151 |
82 // Issue 2892: | 152 // Issue 2892: |
83 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. | 153 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. |
84 EXPECT_FALSE(IdentifierPart::Is(0x2E2F)); | 154 EXPECT_FALSE(IdentifierPart::Is(0x2E2F)); |
85 } | 155 } |
86 | 156 |
87 #ifdef V8_INTL_SUPPORT | 157 #ifdef V8_INTL_SUPPORT |
88 TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) { | 158 TEST(CharPredicatesTest, SupplementaryPlaneIdentifiers) { |
89 // Both ID_Start and ID_Continue. | 159 // Both ID_Start and ID_Continue. |
90 EXPECT_TRUE(IdentifierStart::Is(0x10403)); // Category Lu | 160 EXPECT_TRUE(IdentifierStart::Is(0x10403)); // Category Lu |
91 EXPECT_TRUE(IdentifierPart::Is(0x10403)); | 161 EXPECT_TRUE(IdentifierPart::Is(0x10403)); |
92 EXPECT_TRUE(IdentifierStart::Is(0x1043C)); // Category Ll | 162 EXPECT_TRUE(IdentifierStart::Is(0x1043C)); // Category Ll |
93 EXPECT_TRUE(IdentifierPart::Is(0x1043C)); | 163 EXPECT_TRUE(IdentifierPart::Is(0x1043C)); |
94 EXPECT_TRUE(IdentifierStart::Is(0x16F9C)); // Category Lm | 164 EXPECT_TRUE(IdentifierStart::Is(0x16F9C)); // Category Lm |
95 EXPECT_TRUE(IdentifierPart::Is(0x16F9C)); | 165 EXPECT_TRUE(IdentifierPart::Is(0x16F9C)); |
96 EXPECT_TRUE(IdentifierStart::Is(0x10048)); // Category Lo | 166 EXPECT_TRUE(IdentifierStart::Is(0x10048)); // Category Lo |
97 EXPECT_TRUE(IdentifierPart::Is(0x10048)); | 167 EXPECT_TRUE(IdentifierPart::Is(0x10048)); |
98 EXPECT_TRUE(IdentifierStart::Is(0x1014D)); // Category Nl | 168 EXPECT_TRUE(IdentifierStart::Is(0x1014D)); // Category Nl |
99 EXPECT_TRUE(IdentifierPart::Is(0x1014D)); | 169 EXPECT_TRUE(IdentifierPart::Is(0x1014D)); |
100 | 170 |
| 171 // New in Unicode 8.0 |
| 172 // [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:] |
| 173 EXPECT_TRUE(IdentifierStart::Is(0x108E0)); |
| 174 EXPECT_TRUE(IdentifierStart::Is(0x10C80)); |
| 175 |
101 // Only ID_Continue. | 176 // Only ID_Continue. |
102 EXPECT_FALSE(IdentifierStart::Is(0x101FD)); // Category Mn | 177 EXPECT_FALSE(IdentifierStart::Is(0x101FD)); // Category Mn |
103 EXPECT_TRUE(IdentifierPart::Is(0x101FD)); | 178 EXPECT_TRUE(IdentifierPart::Is(0x101FD)); |
104 EXPECT_FALSE(IdentifierStart::Is(0x11002)); // Category Mc | 179 EXPECT_FALSE(IdentifierStart::Is(0x11002)); // Category Mc |
105 EXPECT_TRUE(IdentifierPart::Is(0x11002)); | 180 EXPECT_TRUE(IdentifierPart::Is(0x11002)); |
106 EXPECT_FALSE(IdentifierStart::Is(0x104A9)); // Category Nd | 181 EXPECT_FALSE(IdentifierStart::Is(0x104A9)); // Category Nd |
107 EXPECT_TRUE(IdentifierPart::Is(0x104A9)); | 182 EXPECT_TRUE(IdentifierPart::Is(0x104A9)); |
108 | 183 |
109 // Neither. | 184 // Neither. |
110 EXPECT_FALSE(IdentifierStart::Is(0x10111)); // Category No | 185 EXPECT_FALSE(IdentifierStart::Is(0x10111)); // Category No |
111 EXPECT_FALSE(IdentifierPart::Is(0x10111)); | 186 EXPECT_FALSE(IdentifierPart::Is(0x10111)); |
112 EXPECT_FALSE(IdentifierStart::Is(0x1F4A9)); // Category So | 187 EXPECT_FALSE(IdentifierStart::Is(0x1F4A9)); // Category So |
113 EXPECT_FALSE(IdentifierPart::Is(0x1F4A9)); | 188 EXPECT_FALSE(IdentifierPart::Is(0x1F4A9)); |
114 } | 189 } |
115 #endif // V8_INTL_SUPPORT | 190 #endif // V8_INTL_SUPPORT |
116 | 191 |
117 } // namespace internal | 192 } // namespace internal |
118 } // namespace v8 | 193 } // namespace v8 |
OLD | NEW |