Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(479)

Side by Side Diff: src/char-predicates.h

Issue 2331303002: Use ICU for ID_START and ID_CONTINUE for Unicode 9 data (Closed)
Patch Set: drop an unnecessary todo Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « BUILD.gn ('k') | src/char-predicates.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_CHAR_PREDICATES_H_ 5 #ifndef V8_CHAR_PREDICATES_H_
6 #define V8_CHAR_PREDICATES_H_ 6 #define V8_CHAR_PREDICATES_H_
7 7
8 #include "src/globals.h" 8 #include "src/globals.h"
9 #include "src/unicode.h" 9 #include "src/unicode.h"
10 10
11 namespace v8 { 11 namespace v8 {
12 namespace internal { 12 namespace internal {
13 13
14 // Unicode character predicates as defined by ECMA-262, 3rd, 14 // Unicode character predicates as defined by ECMA-262, 3rd,
15 // used for lexical analysis. 15 // used for lexical analysis.
16 16
17 inline int AsciiAlphaToLower(uc32 c); 17 inline int AsciiAlphaToLower(uc32 c);
18 inline bool IsCarriageReturn(uc32 c); 18 inline bool IsCarriageReturn(uc32 c);
19 inline bool IsLineFeed(uc32 c); 19 inline bool IsLineFeed(uc32 c);
20 inline bool IsAsciiIdentifier(uc32 c); 20 inline bool IsAsciiIdentifier(uc32 c);
21 inline bool IsAlphaNumeric(uc32 c); 21 inline bool IsAlphaNumeric(uc32 c);
22 inline bool IsDecimalDigit(uc32 c); 22 inline bool IsDecimalDigit(uc32 c);
23 inline bool IsHexDigit(uc32 c); 23 inline bool IsHexDigit(uc32 c);
24 inline bool IsOctalDigit(uc32 c); 24 inline bool IsOctalDigit(uc32 c);
25 inline bool IsBinaryDigit(uc32 c); 25 inline bool IsBinaryDigit(uc32 c);
26 inline bool IsRegExpWord(uc32 c); 26 inline bool IsRegExpWord(uc32 c);
27 inline bool IsRegExpNewline(uc32 c); 27 inline bool IsRegExpNewline(uc32 c);
28 28
29 struct V8_EXPORT_PRIVATE SupplementaryPlanes { 29 // ES#sec-names-and-keywords
30 static bool IsIDStart(uc32 c);
31 static bool IsIDPart(uc32 c);
32 };
33
34
35 // ES6 draft section 11.6
36 // This includes '_', '$' and '\', and ID_Start according to 30 // This includes '_', '$' and '\', and ID_Start according to
37 // http://www.unicode.org/reports/tr31/, which consists of categories 31 // http://www.unicode.org/reports/tr31/, which consists of categories
38 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties 32 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
39 // 'Pattern_Syntax' or 'Pattern_White_Space'. 33 // 'Pattern_Syntax' or 'Pattern_White_Space'.
40 // For code points in the SMPs, we can resort to ICU (if available). 34 #ifdef V8_INTL_SUPPORT
35 struct V8_EXPORT_PRIVATE IdentifierStart {
36 static bool Is(uc32 c);
37 #else
41 struct IdentifierStart { 38 struct IdentifierStart {
39 // Non-BMP characters are not supported without I18N.
42 static inline bool Is(uc32 c) { 40 static inline bool Is(uc32 c) {
43 if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); 41 return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
44 return unibrow::ID_Start::Is(c);
45 } 42 }
43 #endif
46 }; 44 };
47 45
48 46 // ES#sec-names-and-keywords
49 // ES6 draft section 11.6
50 // This includes \u200c and \u200d, and ID_Continue according to 47 // This includes \u200c and \u200d, and ID_Continue according to
51 // http://www.unicode.org/reports/tr31/, which consists of ID_Start, 48 // http://www.unicode.org/reports/tr31/, which consists of ID_Start,
52 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties 49 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
53 // 'Pattern_Syntax' or 'Pattern_White_Space'. 50 // 'Pattern_Syntax' or 'Pattern_White_Space'.
54 // For code points in the SMPs, we can resort to ICU (if available). 51 #ifdef V8_INTL_SUPPORT
52 struct V8_EXPORT_PRIVATE IdentifierPart {
53 static bool Is(uc32 c);
54 #else
55 struct IdentifierPart { 55 struct IdentifierPart {
56 static inline bool Is(uc32 c) { 56 static inline bool Is(uc32 c) {
57 if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); 57 // Non-BMP charaacters are not supported without I18N.
58 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); 58 if (c <= 0xFFFF) {
59 return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
60 }
61 return false;
59 } 62 }
63 #endif
60 }; 64 };
61 65
62
63 // ES6 draft section 11.2 66 // ES6 draft section 11.2
64 // This includes all code points of Unicode category 'Zs'. 67 // This includes all code points of Unicode category 'Zs'.
65 // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, 68 // Further included are \u0009, \u000b, \u000c, and \ufeff.
66 // so it is also included. 69 #ifdef V8_INTL_SUPPORT
67 // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. 70 struct V8_EXPORT_PRIVATE WhiteSpace {
68 // There are no category 'Zs' code points in the SMPs. 71 static bool Is(uc32 c);
72 #else
69 struct WhiteSpace { 73 struct WhiteSpace {
70 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } 74 static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
75 #endif
71 }; 76 };
72 77
73
74 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3 78 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
75 // This consists of \000a, \000d, \u2028, and \u2029. 79 // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
80 // as well as \u0009 - \u000d and \ufeff.
76 struct WhiteSpaceOrLineTerminator { 81 struct WhiteSpaceOrLineTerminator {
77 static inline bool Is(uc32 c) { 82 static inline bool Is(uc32 c) {
78 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); 83 return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
79 } 84 }
80 }; 85 };
81 86
82 } // namespace internal 87 } // namespace internal
83 } // namespace v8 88 } // namespace v8
84 89
85 #endif // V8_CHAR_PREDICATES_H_ 90 #endif // V8_CHAR_PREDICATES_H_
OLDNEW
« no previous file with comments | « BUILD.gn ('k') | src/char-predicates.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698