OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 #include "src/utils.h" | 10 #include "src/utils.h" |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
148 static const unsigned kMaxFourByteChar = 0x1fffff; | 148 static const unsigned kMaxFourByteChar = 0x1fffff; |
149 | 149 |
150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
151 // that match are coded as a 4 byte UTF-8 sequence. | 151 // that match are coded as a 4 byte UTF-8 sequence. |
152 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 152 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
153 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 153 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
154 // The maximum size a single UTF-16 code unit may take up when encoded as | 154 // The maximum size a single UTF-16 code unit may take up when encoded as |
155 // UTF-8. | 155 // UTF-8. |
156 static const unsigned kMax16BitCodeUnitSize = 3; | 156 static const unsigned kMax16BitCodeUnitSize = 3; |
157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); | 157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); |
| 158 |
| 159 // Excludes non-characters from the set of valid code points. |
| 160 static inline bool IsValidCharacter(uchar c); |
| 161 |
| 162 static bool Validate(const byte* str, size_t length); |
158 }; | 163 }; |
159 | 164 |
160 struct Uppercase { | 165 struct Uppercase { |
161 static bool Is(uchar c); | 166 static bool Is(uchar c); |
162 }; | 167 }; |
163 struct Lowercase { | 168 struct Lowercase { |
164 static bool Is(uchar c); | 169 static bool Is(uchar c); |
165 }; | 170 }; |
166 struct Letter { | 171 struct Letter { |
167 static bool Is(uchar c); | 172 static bool Is(uchar c); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 static const int kMaxWidth = 1; | 217 static const int kMaxWidth = 1; |
213 static int Convert(uchar c, | 218 static int Convert(uchar c, |
214 uchar n, | 219 uchar n, |
215 uchar* result, | 220 uchar* result, |
216 bool* allow_caching_ptr); | 221 bool* allow_caching_ptr); |
217 }; | 222 }; |
218 | 223 |
219 } // namespace unibrow | 224 } // namespace unibrow |
220 | 225 |
221 #endif // V8_UNICODE_H_ | 226 #endif // V8_UNICODE_H_ |
OLD | NEW |