Index: src/char-predicates.h |
diff --git a/src/char-predicates.h b/src/char-predicates.h |
index bfe7fe18bf6f048ee85d57d1fb38134ea2b0035c..5ecb07de992a6f62b1ad605ee97ce7f81c04a624 100644 |
--- a/src/char-predicates.h |
+++ b/src/char-predicates.h |
@@ -22,13 +22,24 @@ inline bool IsBinaryDigit(uc32 c); |
inline bool IsRegExpWord(uc32 c); |
inline bool IsRegExpNewline(uc32 c); |
+ |
+struct SupplementaryPlanes { |
+ static bool IsIDStart(uc32 c); |
+ static bool IsIDPart(uc32 c); |
+}; |
+ |
+ |
// ES6 draft section 11.6 |
// This includes '_', '$' and '\', and ID_Start according to |
// http://www.unicode.org/reports/tr31/, which consists of categories |
// 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties |
// 'Pattern_Syntax' or 'Pattern_White_Space'. |
+// For code points in the SMPs, we can resort to ICU (if available). |
struct IdentifierStart { |
- static inline bool Is(uc32 c) { return unibrow::ID_Start::Is(c); } |
+ static inline bool Is(uc32 c) { |
+ if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c); |
+ return unibrow::ID_Start::Is(c); |
+ } |
}; |
@@ -37,8 +48,10 @@ struct IdentifierStart { |
// http://www.unicode.org/reports/tr31/, which consists of ID_Start, |
// the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties |
// 'Pattern_Syntax' or 'Pattern_White_Space'. |
+// For code points in the SMPs, we can resort to ICU (if available). |
struct IdentifierPart { |
static inline bool Is(uc32 c) { |
+ if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c); |
return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c); |
} |
}; |
@@ -49,6 +62,7 @@ struct IdentifierPart { |
// \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1, |
// so it is also included. |
// Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff. |
+// There are no category 'Zs' code points in the SMPs. |
struct WhiteSpace { |
static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); } |
}; |