| OLD | NEW |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "src/char-predicates.h" | 5 #include "src/char-predicates.h" |
| 6 #include "src/unicode.h" | 6 #include "src/unicode.h" |
| 7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
| 8 | 8 |
| 9 namespace v8 { | 9 namespace v8 { |
| 10 namespace internal { | 10 namespace internal { |
| 11 | 11 |
| 12 TEST(CharPredicatesTest, WhiteSpace) { | 12 TEST(CharPredicatesTest, WhiteSpace) { |
| 13 // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider | |
| 14 // it to be one though, since JS recognizes all white spaces in Unicode 5.1. | |
| 15 EXPECT_TRUE(WhiteSpace::Is(0x0009)); | 13 EXPECT_TRUE(WhiteSpace::Is(0x0009)); |
| 16 EXPECT_TRUE(WhiteSpace::Is(0x000B)); | 14 EXPECT_TRUE(WhiteSpace::Is(0x000B)); |
| 17 EXPECT_TRUE(WhiteSpace::Is(0x000C)); | 15 EXPECT_TRUE(WhiteSpace::Is(0x000C)); |
| 18 EXPECT_TRUE(WhiteSpace::Is(' ')); | 16 EXPECT_TRUE(WhiteSpace::Is(' ')); |
| 19 EXPECT_TRUE(WhiteSpace::Is(0x00A0)); | 17 EXPECT_TRUE(WhiteSpace::Is(0x00A0)); |
| 18 EXPECT_TRUE(WhiteSpace::Is(0x1680)); |
| 19 EXPECT_TRUE(WhiteSpace::Is(0x2000)); |
| 20 EXPECT_TRUE(WhiteSpace::Is(0x2007)); |
| 21 EXPECT_TRUE(WhiteSpace::Is(0x202F)); |
| 22 EXPECT_TRUE(WhiteSpace::Is(0x205F)); |
| 23 EXPECT_TRUE(WhiteSpace::Is(0x3000)); |
| 20 EXPECT_TRUE(WhiteSpace::Is(0xFEFF)); | 24 EXPECT_TRUE(WhiteSpace::Is(0xFEFF)); |
| 25 EXPECT_FALSE(WhiteSpace::Is(0x180E)); |
| 21 } | 26 } |
| 22 | 27 |
| 23 | 28 |
| 24 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) { | 29 TEST(CharPredicatesTest, WhiteSpaceOrLineTerminator) { |
| 25 // As of Unicode 6.3.0, \u180E is no longer a white space. We still consider | |
| 26 // it to be one though, since JS recognizes all white spaces in Unicode 5.1. | |
| 27 // White spaces | |
| 28 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009)); | 30 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x0009)); |
| 29 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B)); | 31 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000B)); |
| 30 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C)); | 32 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000C)); |
| 31 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' ')); | 33 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(' ')); |
| 32 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0)); | 34 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x00A0)); |
| 35 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x1680)); |
| 36 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2000)); |
| 37 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2007)); |
| 38 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x202F)); |
| 39 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x205F)); |
| 33 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF)); | 40 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0xFEFF)); |
| 34 // Line terminators | 41 // Line terminators |
| 35 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A)); | 42 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000A)); |
| 36 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D)); | 43 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x000D)); |
| 37 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028)); | 44 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2028)); |
| 38 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029)); | 45 EXPECT_TRUE(WhiteSpaceOrLineTerminator::Is(0x2029)); |
| 46 EXPECT_FALSE(WhiteSpaceOrLineTerminator::Is(0x180E)); |
| 39 } | 47 } |
| 40 | 48 |
| 41 | 49 |
| 42 TEST(CharPredicatesTest, IdentifierStart) { | 50 TEST(CharPredicatesTest, IdentifierStart) { |
| 43 EXPECT_TRUE(IdentifierStart::Is('$')); | 51 EXPECT_TRUE(IdentifierStart::Is('$')); |
| 44 EXPECT_TRUE(IdentifierStart::Is('_')); | 52 EXPECT_TRUE(IdentifierStart::Is('_')); |
| 45 EXPECT_TRUE(IdentifierStart::Is('\\')); | 53 EXPECT_TRUE(IdentifierStart::Is('\\')); |
| 46 | 54 |
| 47 // http://www.unicode.org/reports/tr31/ | 55 // http://www.unicode.org/reports/tr31/ |
| 56 // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt | |
| 57 // grep 'Other_ID_Start' |
| 48 // Other_ID_Start | 58 // Other_ID_Start |
| 59 EXPECT_TRUE(IdentifierStart::Is(0x1885)); |
| 60 EXPECT_TRUE(IdentifierStart::Is(0x1886)); |
| 49 EXPECT_TRUE(IdentifierStart::Is(0x2118)); | 61 EXPECT_TRUE(IdentifierStart::Is(0x2118)); |
| 50 EXPECT_TRUE(IdentifierStart::Is(0x212E)); | 62 EXPECT_TRUE(IdentifierStart::Is(0x212E)); |
| 51 EXPECT_TRUE(IdentifierStart::Is(0x309B)); | 63 EXPECT_TRUE(IdentifierStart::Is(0x309B)); |
| 52 EXPECT_TRUE(IdentifierStart::Is(0x309C)); | 64 EXPECT_TRUE(IdentifierStart::Is(0x309C)); |
| 53 | 65 |
| 54 // Issue 2892: | 66 // Issue 2892: |
| 55 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. | 67 // \u2E2F has the Pattern_Syntax property, excluding it from ID_Start. |
| 56 EXPECT_FALSE(unibrow::ID_Start::Is(0x2E2F)); | 68 EXPECT_FALSE(IdentifierStart::Is(0x2E2F)); |
| 69 |
| 70 #ifdef V8_INTL_SUPPORT |
| 71 // New in Unicode 8.0 (6,847 code points) |
| 72 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]] |
| 73 EXPECT_TRUE(IdentifierStart::Is(0x08B3)); |
| 74 EXPECT_TRUE(IdentifierStart::Is(0x0AF9)); |
| 75 EXPECT_TRUE(IdentifierStart::Is(0x13F8)); |
| 76 EXPECT_TRUE(IdentifierStart::Is(0x9FCD)); |
| 77 EXPECT_TRUE(IdentifierStart::Is(0xAB60)); |
| 78 EXPECT_TRUE(IdentifierStart::Is(0x10CC0)); |
| 79 EXPECT_TRUE(IdentifierStart::Is(0x108E0)); |
| 80 EXPECT_TRUE(IdentifierStart::Is(0x2B820)); |
| 81 |
| 82 // New in Unicode 9.0 (7,177 code points) |
| 83 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]] |
| 84 |
| 85 EXPECT_TRUE(IdentifierStart::Is(0x1C80)); |
| 86 EXPECT_TRUE(IdentifierStart::Is(0x104DB)); |
| 87 EXPECT_TRUE(IdentifierStart::Is(0x1E922)); |
| 88 #endif |
| 57 } | 89 } |
| 58 | 90 |
| 59 | 91 |
| 60 TEST(CharPredicatesTest, IdentifierPart) { | 92 TEST(CharPredicatesTest, IdentifierPart) { |
| 61 EXPECT_TRUE(IdentifierPart::Is('$')); | 93 EXPECT_TRUE(IdentifierPart::Is('$')); |
| 62 EXPECT_TRUE(IdentifierPart::Is('_')); | 94 EXPECT_TRUE(IdentifierPart::Is('_')); |
| 63 EXPECT_TRUE(IdentifierPart::Is('\\')); | 95 EXPECT_TRUE(IdentifierPart::Is('\\')); |
| 64 EXPECT_TRUE(IdentifierPart::Is(0x200C)); | 96 EXPECT_TRUE(IdentifierPart::Is(0x200C)); |
| 65 EXPECT_TRUE(IdentifierPart::Is(0x200D)); | 97 EXPECT_TRUE(IdentifierPart::Is(0x200D)); |
| 66 | 98 |
| 99 #ifdef V8_INTL_SUPPORT |
| 100 // New in Unicode 8.0 (6,847 code points) |
| 101 // [:ID_Start:] & [[:Age=8.0:] - [:Age=7.0:]] |
| 102 EXPECT_TRUE(IdentifierPart::Is(0x08B3)); |
| 103 EXPECT_TRUE(IdentifierPart::Is(0x0AF9)); |
| 104 EXPECT_TRUE(IdentifierPart::Is(0x13F8)); |
| 105 EXPECT_TRUE(IdentifierPart::Is(0x9FCD)); |
| 106 EXPECT_TRUE(IdentifierPart::Is(0xAB60)); |
| 107 EXPECT_TRUE(IdentifierPart::Is(0x10CC0)); |
| 108 EXPECT_TRUE(IdentifierPart::Is(0x108E0)); |
| 109 EXPECT_TRUE(IdentifierPart::Is(0x2B820)); |
| 110 |
| 111 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=8.0:]-[:Age=7.0:]] |
| 112 // 162 code points |
| 113 EXPECT_TRUE(IdentifierPart::Is(0x08E3)); |
| 114 EXPECT_TRUE(IdentifierPart::Is(0xA69E)); |
| 115 EXPECT_TRUE(IdentifierPart::Is(0x11730)); |
| 116 |
| 117 // New in Unicode 9.0 (7,177 code points) |
| 118 // [:ID_Start:] & [[:Age=9.0:] - [:Age=8.0:]] |
| 119 EXPECT_TRUE(IdentifierPart::Is(0x1C80)); |
| 120 EXPECT_TRUE(IdentifierPart::Is(0x104DB)); |
| 121 EXPECT_TRUE(IdentifierPart::Is(0x1E922)); |
| 122 |
| 123 // [[:ID_Continue:]-[:ID_Start:]] & [[:Age=9.0:]-[:Age=8.0:]] |
| 124 // 162 code points |
| 125 EXPECT_TRUE(IdentifierPart::Is(0x08D4)); |
| 126 EXPECT_TRUE(IdentifierPart::Is(0x1DFB)); |
| 127 EXPECT_TRUE(IdentifierPart::Is(0xA8C5)); |
| 128 EXPECT_TRUE(IdentifierPart::Is(0x11450)); |
| 129 #endif |
| 130 |
| 67 // http://www.unicode.org/reports/tr31/ | 131 // http://www.unicode.org/reports/tr31/ |
| 132 // curl http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt | |
| 133 // grep 'Other_ID_(Continue|Start)' |
| 134 |
| 68 // Other_ID_Start | 135 // Other_ID_Start |
| 136 EXPECT_TRUE(IdentifierPart::Is(0x1885)); |
| 137 EXPECT_TRUE(IdentifierPart::Is(0x1886)); |
| 69 EXPECT_TRUE(IdentifierPart::Is(0x2118)); | 138 EXPECT_TRUE(IdentifierPart::Is(0x2118)); |
| 70 EXPECT_TRUE(IdentifierPart::Is(0x212E)); | 139 EXPECT_TRUE(IdentifierPart::Is(0x212E)); |
| 71 EXPECT_TRUE(IdentifierPart::Is(0x309B)); | 140 EXPECT_TRUE(IdentifierPart::Is(0x309B)); |
| 72 EXPECT_TRUE(IdentifierPart::Is(0x309C)); | 141 EXPECT_TRUE(IdentifierPart::Is(0x309C)); |
| 73 | 142 |
| 74 // Other_ID_Continue | 143 // Other_ID_Continue |
| 75 EXPECT_TRUE(IdentifierPart::Is(0x00B7)); | 144 EXPECT_TRUE(IdentifierPart::Is(0x00B7)); |
| 76 EXPECT_TRUE(IdentifierPart::Is(0x0387)); | 145 EXPECT_TRUE(IdentifierPart::Is(0x0387)); |
| 77 EXPECT_TRUE(IdentifierPart::Is(0x1369)); | 146 EXPECT_TRUE(IdentifierPart::Is(0x1369)); |
| 78 EXPECT_TRUE(IdentifierPart::Is(0x1370)); | 147 EXPECT_TRUE(IdentifierPart::Is(0x1370)); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 91 EXPECT_TRUE(IdentifierPart::Is(0x10403)); | 160 EXPECT_TRUE(IdentifierPart::Is(0x10403)); |
| 92 EXPECT_TRUE(IdentifierStart::Is(0x1043C)); // Category Ll | 161 EXPECT_TRUE(IdentifierStart::Is(0x1043C)); // Category Ll |
| 93 EXPECT_TRUE(IdentifierPart::Is(0x1043C)); | 162 EXPECT_TRUE(IdentifierPart::Is(0x1043C)); |
| 94 EXPECT_TRUE(IdentifierStart::Is(0x16F9C)); // Category Lm | 163 EXPECT_TRUE(IdentifierStart::Is(0x16F9C)); // Category Lm |
| 95 EXPECT_TRUE(IdentifierPart::Is(0x16F9C)); | 164 EXPECT_TRUE(IdentifierPart::Is(0x16F9C)); |
| 96 EXPECT_TRUE(IdentifierStart::Is(0x10048)); // Category Lo | 165 EXPECT_TRUE(IdentifierStart::Is(0x10048)); // Category Lo |
| 97 EXPECT_TRUE(IdentifierPart::Is(0x10048)); | 166 EXPECT_TRUE(IdentifierPart::Is(0x10048)); |
| 98 EXPECT_TRUE(IdentifierStart::Is(0x1014D)); // Category Nl | 167 EXPECT_TRUE(IdentifierStart::Is(0x1014D)); // Category Nl |
| 99 EXPECT_TRUE(IdentifierPart::Is(0x1014D)); | 168 EXPECT_TRUE(IdentifierPart::Is(0x1014D)); |
| 100 | 169 |
| 170 // New in Unicode 8.0 |
| 171 // [ [:ID_Start=Yes:] & [:Age=8.0:]] - [:Age=7.0:] |
| 172 EXPECT_TRUE(IdentifierStart::Is(0x108E0)); |
| 173 EXPECT_TRUE(IdentifierStart::Is(0x10C80)); |
| 174 |
| 101 // Only ID_Continue. | 175 // Only ID_Continue. |
| 102 EXPECT_FALSE(IdentifierStart::Is(0x101FD)); // Category Mn | 176 EXPECT_FALSE(IdentifierStart::Is(0x101FD)); // Category Mn |
| 103 EXPECT_TRUE(IdentifierPart::Is(0x101FD)); | 177 EXPECT_TRUE(IdentifierPart::Is(0x101FD)); |
| 104 EXPECT_FALSE(IdentifierStart::Is(0x11002)); // Category Mc | 178 EXPECT_FALSE(IdentifierStart::Is(0x11002)); // Category Mc |
| 105 EXPECT_TRUE(IdentifierPart::Is(0x11002)); | 179 EXPECT_TRUE(IdentifierPart::Is(0x11002)); |
| 106 EXPECT_FALSE(IdentifierStart::Is(0x104A9)); // Category Nd | 180 EXPECT_FALSE(IdentifierStart::Is(0x104A9)); // Category Nd |
| 107 EXPECT_TRUE(IdentifierPart::Is(0x104A9)); | 181 EXPECT_TRUE(IdentifierPart::Is(0x104A9)); |
| 108 | 182 |
| 109 // Neither. | 183 // Neither. |
| 110 EXPECT_FALSE(IdentifierStart::Is(0x10111)); // Category No | 184 EXPECT_FALSE(IdentifierStart::Is(0x10111)); // Category No |
| 111 EXPECT_FALSE(IdentifierPart::Is(0x10111)); | 185 EXPECT_FALSE(IdentifierPart::Is(0x10111)); |
| 112 EXPECT_FALSE(IdentifierStart::Is(0x1F4A9)); // Category So | 186 EXPECT_FALSE(IdentifierStart::Is(0x1F4A9)); // Category So |
| 113 EXPECT_FALSE(IdentifierPart::Is(0x1F4A9)); | 187 EXPECT_FALSE(IdentifierPart::Is(0x1F4A9)); |
| 114 } | 188 } |
| 115 #endif // V8_INTL_SUPPORT | 189 #endif // V8_INTL_SUPPORT |
| 116 | 190 |
| 117 } // namespace internal | 191 } // namespace internal |
| 118 } // namespace v8 | 192 } // namespace v8 |
| OLD | NEW |