| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_CHAR_PREDICATES_H_ | 5 #ifndef V8_CHAR_PREDICATES_H_ |
| 6 #define V8_CHAR_PREDICATES_H_ | 6 #define V8_CHAR_PREDICATES_H_ |
| 7 | 7 |
| 8 #include "src/globals.h" | 8 #include "src/globals.h" |
| 9 #include "src/unicode.h" | 9 #include "src/unicode.h" |
| 10 | 10 |
| 11 namespace v8 { | 11 namespace v8 { |
| 12 namespace internal { | 12 namespace internal { |
| 13 | 13 |
| 14 // Unicode character predicates as defined by ECMA-262, 3rd, | 14 // Unicode character predicates as defined by ECMA-262, 3rd, |
| 15 // used for lexical analysis. | 15 // used for lexical analysis. |
| 16 | 16 |
| 17 inline int AsciiAlphaToLower(uc32 c); | 17 inline int AsciiAlphaToLower(uc32 c); |
| 18 inline bool IsCarriageReturn(uc32 c); | 18 inline bool IsCarriageReturn(uc32 c); |
| 19 inline bool IsLineFeed(uc32 c); | 19 inline bool IsLineFeed(uc32 c); |
| 20 inline bool IsAsciiIdentifier(uc32 c); | 20 inline bool IsAsciiIdentifier(uc32 c); |
| 21 inline bool IsAlphaNumeric(uc32 c); | 21 inline bool IsAlphaNumeric(uc32 c); |
| 22 inline bool IsDecimalDigit(uc32 c); | 22 inline bool IsDecimalDigit(uc32 c); |
| 23 inline bool IsHexDigit(uc32 c); | 23 inline bool IsHexDigit(uc32 c); |
| 24 inline bool IsOctalDigit(uc32 c); | 24 inline bool IsOctalDigit(uc32 c); |
| 25 inline bool IsBinaryDigit(uc32 c); | 25 inline bool IsBinaryDigit(uc32 c); |
| 26 inline bool IsRegExpWord(uc32 c); | 26 inline bool IsRegExpWord(uc32 c); |
| 27 inline bool IsRegExpNewline(uc32 c); | 27 inline bool IsRegExpNewline(uc32 c); |
| 28 | 28 |
| 29 struct V8_EXPORT_PRIVATE SupplementaryPlanes { | |
| 30 static bool IsIDStart(uc32 c); | |
| 31 static bool IsIDPart(uc32 c); | |
| 32 }; | |
| 33 | |
| 34 | |
| 35 // ES6 draft section 11.6 | 29 // ES6 draft section 11.6 |
| 36 // This includes '_', '$' and '\', and ID_Start according to | 30 // This includes '_', '$' and '\', and ID_Start according to |
| 37 // http://www.unicode.org/reports/tr31/, which consists of categories | 31 // http://www.unicode.org/reports/tr31/, which consists of categories |
| 38 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties | 32 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
| 39 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 33 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 40 // For code points in the SMPs, we can resort to ICU (if available). | 34 #ifdef V8_INTL_SUPPORT |
| 35 struct V8_EXPORT_PRIVATE IdentifierStart { |
| 36 static bool Is(uc32 c); |
| 37 #else |
| 41 struct IdentifierStart { | 38 struct IdentifierStart { |
| 39 // Non-BMP characters are not supported without I18N. |
| 42 static inline bool Is(uc32 c) { | 40 static inline bool Is(uc32 c) { |
| 43 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); | 41 return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false; |
| 44 return unibrow::ID_Start::Is(c); | |
| 45 } | 42 } |
| 43 #endif |
| 46 }; | 44 }; |
| 47 | 45 |
| 48 | |
| 49 // ES6 draft section 11.6 | 46 // ES6 draft section 11.6 |
| 50 // This includes \u200c and \u200d, and ID_Continue according to | 47 // This includes \u200c and \u200d, and ID_Continue according to |
| 51 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, | 48 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
| 52 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties | 49 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
| 53 // 'Pattern_Syntax' or 'Pattern_White_Space'. | 50 // 'Pattern_Syntax' or 'Pattern_White_Space'. |
| 54 // For code points in the SMPs, we can resort to ICU (if available). | 51 #ifdef V8_INTL_SUPPORT |
| 52 struct V8_EXPORT_PRIVATE IdentifierPart { |
| 53 static bool Is(uc32 c); |
| 54 #else |
| 55 struct IdentifierPart { | 55 struct IdentifierPart { |
| 56 static inline bool Is(uc32 c) { | 56 static inline bool Is(uc32 c) { |
| 57 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); | 57 // Non-BMP charaacters are not supported without I18N. |
| 58 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); | 58 if (c <= 0xFFFF) { |
| 59 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
| 60 } |
| 61 return false; |
| 59 } | 62 } |
| 63 #endif |
| 60 }; | 64 }; |
| 61 | 65 |
| 62 | |
| 63 // ES6 draft section 11.2 | 66 // ES6 draft section 11.2 |
| 64 // This includes all code points of Unicode category 'Zs'. | 67 // This includes all code points of Unicode category 'Zs'. |
| 65 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, | 68 // Further included are \u0009, \u000b, \u000c, and \ufeff. |
| 66 // so it is also included. | 69 #ifdef V8_INTL_SUPPORT |
| 67 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. | 70 struct V8_EXPORT_PRIVATE WhiteSpace { |
| 68 // There are no category 'Zs' code points in the SMPs. | 71 static bool Is(uc32 c); |
| 72 #else |
| 69 struct WhiteSpace { | 73 struct WhiteSpace { |
| 70 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } | 74 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
| 75 #endif |
| 71 }; | 76 }; |
| 72 | 77 |
| 73 | |
| 74 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 | 78 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 |
| 75 // This consists of \000a, \000d, \u2028, and \u2029. | 79 // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp) |
| 80 // as well as \u0009 - \u000d and \ufeff. |
| 76 struct WhiteSpaceOrLineTerminator { | 81 struct WhiteSpaceOrLineTerminator { |
| 77 static inline bool Is(uc32 c) { | 82 static inline bool Is(uc32 c) { |
| 78 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); | 83 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); |
| 79 } | 84 } |
| 80 }; | 85 }; |
| 81 | 86 |
| 82 } // namespace internal | 87 } // namespace internal |
| 83 } // namespace v8 | 88 } // namespace v8 |
| 84 | 89 |
| 85 #endif // V8_CHAR_PREDICATES_H_ | 90 #endif // V8_CHAR_PREDICATES_H_ |
| OLD | NEW |