OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_CHAR_PREDICATES_H_ | 5 #ifndef V8_CHAR_PREDICATES_H_ |
6 #define V8_CHAR_PREDICATES_H_ | 6 #define V8_CHAR_PREDICATES_H_ |
7 | 7 |
8 #include "src/unicode.h" | 8 #include "src/unicode.h" |
9 | 9 |
10 namespace v8 { | 10 namespace v8 { |
11 namespace internal { | 11 namespace internal { |
12 | 12 |
13 // Unicode character predicates as defined by ECMA-262, 3rd, | 13 // Unicode character predicates as defined by ECMA-262, 3rd, |
14 // used for lexical analysis. | 14 // used for lexical analysis. |
15 | 15 |
16 inline bool IsCarriageReturn(uc32 c); | 16 inline bool IsCarriageReturn(uc32 c); |
17 inline bool IsLineFeed(uc32 c); | 17 inline bool IsLineFeed(uc32 c); |
18 inline bool IsDecimalDigit(uc32 c); | 18 inline bool IsDecimalDigit(uc32 c); |
19 inline bool IsHexDigit(uc32 c); | 19 inline bool IsHexDigit(uc32 c); |
20 inline bool IsOctalDigit(uc32 c); | 20 inline bool IsOctalDigit(uc32 c); |
21 inline bool IsBinaryDigit(uc32 c); | 21 inline bool IsBinaryDigit(uc32 c); |
22 inline bool IsRegExpWord(uc32 c); | 22 inline bool IsRegExpWord(uc32 c); |
23 inline bool IsRegExpNewline(uc32 c); | 23 inline bool IsRegExpNewline(uc32 c); |
24 | 24 |
| 25 |
| 26 struct SupplementaryPlanes { |
| 27 static bool IsIDStart(uc32 c); |
| 28 static bool IsIDPart(uc32 c); |
| 29 }; |
| 30 |
| 31 |
25 // ES6 draft section 11.6 | 32 // ES6 draft section 11.6 |
26 // This includes '_', '$' and '\', and ID_Start according to | 33 // This includes '_', '$' and '\', and ID_Start according to |
27 // http://www.unicode.org/reports/tr31/, which consists of categories | 34 // http://www.unicode.org/reports/tr31/, which consists of categories |
28 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties | 35 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
29 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 36 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 37 // For code points in the SMPs, we can resort to ICU (if available). |
30 struct IdentifierStart { | 38 struct IdentifierStart { |
31 static inline bool Is(uc32 c) { return unibrow::ID_Start::Is(c); } | 39 static inline bool Is(uc32 c) { |
| 40 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); |
| 41 return unibrow::ID_Start::Is(c); |
| 42 } |
32 }; | 43 }; |
33 | 44 |
34 | 45 |
35 // ES6 draft section 11.6 | 46 // ES6 draft section 11.6 |
36 // This includes \u200c and \u200d, and ID_Continue according to | 47 // This includes \u200c and \u200d, and ID_Continue according to |
37 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, | 48 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
38 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties | 49 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
39 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 50 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 51 // For code points in the SMPs, we can resort to ICU (if available). |
40 struct IdentifierPart { | 52 struct IdentifierPart { |
41 static inline bool Is(uc32 c) { | 53 static inline bool Is(uc32 c) { |
| 54 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); |
42 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); | 55 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
43 } | 56 } |
44 }; | 57 }; |
45 | 58 |
46 | 59 |
47 // ES6 draft section 11.2 | 60 // ES6 draft section 11.2 |
48 // This includes all code points of Unicode category 'Zs'. | 61 // This includes all code points of Unicode category 'Zs'. |
49 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, | 62 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, |
50 // so it is also included. | 63 // so it is also included. |
51 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. | 64 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. |
| 65 // There are no category 'Zs' code points in the SMPs. |
52 struct WhiteSpace { | 66 struct WhiteSpace { |
53 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } | 67 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
54 }; | 68 }; |
55 | 69 |
56 | 70 |
57 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 | 71 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 |
58 // This consists of \000a, \000d, \u2028, and \u2029. | 72 // This consists of \000a, \000d, \u2028, and \u2029. |
59 struct WhiteSpaceOrLineTerminator { | 73 struct WhiteSpaceOrLineTerminator { |
60 static inline bool Is(uc32 c) { | 74 static inline bool Is(uc32 c) { |
61 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); | 75 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); |
62 } | 76 } |
63 }; | 77 }; |
64 | 78 |
65 } } // namespace v8::internal | 79 } } // namespace v8::internal |
66 | 80 |
67 #endif // V8_CHAR_PREDICATES_H_ | 81 #endif // V8_CHAR_PREDICATES_H_ |
OLD | NEW |