OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 #include "src/utils.h" | 10 #include "src/utils.h" |
11 /** | 11 /** |
12 * \file | 12 * \file |
13 * Definitions and convenience functions for working with unicode. | 13 * Definitions and convenience functions for working with unicode. |
14 */ | 14 */ |
15 | 15 |
16 namespace unibrow { | 16 namespace unibrow { |
17 | 17 |
18 typedef int32_t uchar; | 18 typedef unsigned int uchar; |
19 typedef uint8_t byte; | 19 typedef unsigned char byte; |
20 | 20 |
21 /** | 21 /** |
22 * The max length of the result of converting the case of a single | 22 * The max length of the result of converting the case of a single |
23 * character. | 23 * character. |
24 */ | 24 */ |
25 const int kMaxMappingSize = 4; | 25 const int kMaxMappingSize = 4; |
26 | 26 |
27 template <class T, int size = 256> | 27 template <class T, int size = 256> |
28 class Predicate { | 28 class Predicate { |
29 public: | 29 public: |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
123 return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); | 123 return 0xd800 + (((char_code - 0x10000) >> 10) & 0x3ff); |
124 } | 124 } |
125 static inline uint16_t TrailSurrogate(uint32_t char_code) { | 125 static inline uint16_t TrailSurrogate(uint32_t char_code) { |
126 return 0xdc00 + (char_code & 0x3ff); | 126 return 0xdc00 + (char_code & 0x3ff); |
127 } | 127 } |
128 }; | 128 }; |
129 | 129 |
130 | 130 |
131 class Utf8 { | 131 class Utf8 { |
132 public: | 132 public: |
133 static inline unsigned Length(uchar chr, int previous); | 133 static inline uchar Length(uchar chr, int previous); |
134 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
135 static inline unsigned Encode(char* out, | 135 static inline unsigned Encode(char* out, |
136 uchar c, | 136 uchar c, |
137 int previous, | 137 int previous, |
138 bool replace_invalid = false); | 138 bool replace_invalid = false); |
139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); | 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); |
140 | 140 |
141 // The unicode replacement character, used to signal invalid unicode | 141 // The unicode replacement character, used to signal invalid unicode |
142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
143 static const uchar kBadChar = 0xFFFD; | 143 static const uchar kBadChar = 0xFFFD; |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
212 static const int kMaxWidth = 1; | 212 static const int kMaxWidth = 1; |
213 static int Convert(uchar c, | 213 static int Convert(uchar c, |
214 uchar n, | 214 uchar n, |
215 uchar* result, | 215 uchar* result, |
216 bool* allow_caching_ptr); | 216 bool* allow_caching_ptr); |
217 }; | 217 }; |
218 | 218 |
219 } // namespace unibrow | 219 } // namespace unibrow |
220 | 220 |
221 #endif // V8_UNICODE_H_ | 221 #endif // V8_UNICODE_H_ |
OLD | NEW |