| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_CHAR_PREDICATES_H_ | 5 #ifndef V8_CHAR_PREDICATES_H_ |
| 6 #define V8_CHAR_PREDICATES_H_ | 6 #define V8_CHAR_PREDICATES_H_ |
| 7 | 7 |
| 8 #include "src/unicode.h" | 8 #include "src/unicode.h" |
| 9 | 9 |
| 10 namespace v8 { | 10 namespace v8 { |
| 11 namespace internal { | 11 namespace internal { |
| 12 | 12 |
| 13 // Unicode character predicates as defined by ECMA-262, 3rd, | 13 // Unicode character predicates as defined by ECMA-262, 3rd, |
| 14 // used for lexical analysis. | 14 // used for lexical analysis. |
| 15 | 15 |
| 16 inline bool IsCarriageReturn(uc32 c); | 16 inline bool IsCarriageReturn(uc32 c); |
| 17 inline bool IsLineFeed(uc32 c); | 17 inline bool IsLineFeed(uc32 c); |
| 18 inline bool IsDecimalDigit(uc32 c); | 18 inline bool IsDecimalDigit(uc32 c); |
| 19 inline bool IsHexDigit(uc32 c); | 19 inline bool IsHexDigit(uc32 c); |
| 20 inline bool IsOctalDigit(uc32 c); | 20 inline bool IsOctalDigit(uc32 c); |
| 21 inline bool IsBinaryDigit(uc32 c); | 21 inline bool IsBinaryDigit(uc32 c); |
| 22 inline bool IsRegExpWord(uc32 c); | 22 inline bool IsRegExpWord(uc32 c); |
| 23 inline bool IsRegExpNewline(uc32 c); | 23 inline bool IsRegExpNewline(uc32 c); |
| 24 | 24 |
| 25 |
| 26 struct SupplementaryPlanes { |
| 27 static bool IsIDStart(uc32 c); |
| 28 static bool IsIDPart(uc32 c); |
| 29 }; |
| 30 |
| 31 |
| 25 // ES6 draft section 11.6 | 32 // ES6 draft section 11.6 |
| 26 // This includes '_', '$' and '\', and ID_Start according to | 33 // This includes '_', '$' and '\', and ID_Start according to |
| 27 // http://www.unicode.org/reports/tr31/, which consists of categories | 34 // http://www.unicode.org/reports/tr31/, which consists of categories |
| 28 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties | 35 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
| 29 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 36 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 37 // For code points in the SMPs, we can resort to ICU (if available). |
| 30 struct IdentifierStart { | 38 struct IdentifierStart { |
| 31 static inline bool Is(uc32 c) { return unibrow::ID_Start::Is(c); } | 39 static inline bool Is(uc32 c) { |
| 40 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); |
| 41 return unibrow::ID_Start::Is(c); |
| 42 } |
| 32 }; | 43 }; |
| 33 | 44 |
| 34 | 45 |
| 35 // ES6 draft section 11.6 | 46 // ES6 draft section 11.6 |
| 36 // This includes \u200c and \u200d, and ID_Continue according to | 47 // This includes \u200c and \u200d, and ID_Continue according to |
| 37 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, | 48 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
| 38 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties | 49 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
| 39 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 50 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 51 // For code points in the SMPs, we can resort to ICU (if available). |
| 40 struct IdentifierPart { | 52 struct IdentifierPart { |
| 41 static inline bool Is(uc32 c) { | 53 static inline bool Is(uc32 c) { |
| 54 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); |
| 42 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); | 55 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
| 43 } | 56 } |
| 44 }; | 57 }; |
| 45 | 58 |
| 46 | 59 |
| 47 // ES6 draft section 11.2 | 60 // ES6 draft section 11.2 |
| 48 // This includes all code points of Unicode category 'Zs'. | 61 // This includes all code points of Unicode category 'Zs'. |
| 49 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, | 62 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, |
| 50 // so it is also included. | 63 // so it is also included. |
| 51 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. | 64 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. |
| 65 // There are no category 'Zs' code points in the SMPs. |
| 52 struct WhiteSpace { | 66 struct WhiteSpace { |
| 53 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } | 67 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
| 54 }; | 68 }; |
| 55 | 69 |
| 56 | 70 |
| 57 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 | 71 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 |
| 58 // This consists of \000a, \000d, \u2028, and \u2029. | 72 // This consists of \000a, \000d, \u2028, and \u2029. |
| 59 struct WhiteSpaceOrLineTerminator { | 73 struct WhiteSpaceOrLineTerminator { |
| 60 static inline bool Is(uc32 c) { | 74 static inline bool Is(uc32 c) { |
| 61 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); | 75 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); |
| 62 } | 76 } |
| 63 }; | 77 }; |
| 64 | 78 |
| 65 } } // namespace v8::internal | 79 } } // namespace v8::internal |
| 66 | 80 |
| 67 #endif // V8_CHAR_PREDICATES_H_ | 81 #endif // V8_CHAR_PREDICATES_H_ |
| OLD | NEW |