OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 #include "src/utils.h" | 10 #include "src/utils.h" |
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
129 | 129 |
130 | 130 |
131 class Utf8 { | 131 class Utf8 { |
132 public: | 132 public: |
133 static inline uchar Length(uchar chr, int previous); | 133 static inline uchar Length(uchar chr, int previous); |
134 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
135 static inline unsigned Encode(char* out, | 135 static inline unsigned Encode(char* out, |
136 uchar c, | 136 uchar c, |
137 int previous, | 137 int previous, |
138 bool replace_invalid = false); | 138 bool replace_invalid = false); |
139 static uchar CalculateValue(const byte* str, | 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); |
140 unsigned length, | |
141 unsigned* cursor); | |
142 | 140 |
143 // The unicode replacement character, used to signal invalid unicode | 141 // The unicode replacement character, used to signal invalid unicode |
144 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
145 static const uchar kBadChar = 0xFFFD; | 143 static const uchar kBadChar = 0xFFFD; |
146 static const unsigned kMaxEncodedSize = 4; | 144 static const unsigned kMaxEncodedSize = 4; |
147 static const unsigned kMaxOneByteChar = 0x7f; | 145 static const unsigned kMaxOneByteChar = 0x7f; |
148 static const unsigned kMaxTwoByteChar = 0x7ff; | 146 static const unsigned kMaxTwoByteChar = 0x7ff; |
149 static const unsigned kMaxThreeByteChar = 0xffff; | 147 static const unsigned kMaxThreeByteChar = 0xffff; |
150 static const unsigned kMaxFourByteChar = 0x1fffff; | 148 static const unsigned kMaxFourByteChar = 0x1fffff; |
151 | 149 |
152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
153 // that match are coded as a 4 byte UTF-8 sequence. | 151 // that match are coded as a 4 byte UTF-8 sequence. |
154 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 152 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
155 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 153 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
156 // The maximum size a single UTF-16 code unit may take up when encoded as | 154 // The maximum size a single UTF-16 code unit may take up when encoded as |
157 // UTF-8. | 155 // UTF-8. |
158 static const unsigned kMax16BitCodeUnitSize = 3; | 156 static const unsigned kMax16BitCodeUnitSize = 3; |
159 static inline uchar ValueOf(const byte* str, | 157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); |
160 unsigned length, | |
161 unsigned* cursor); | |
162 }; | 158 }; |
163 | 159 |
164 struct Uppercase { | 160 struct Uppercase { |
165 static bool Is(uchar c); | 161 static bool Is(uchar c); |
166 }; | 162 }; |
167 struct Lowercase { | 163 struct Lowercase { |
168 static bool Is(uchar c); | 164 static bool Is(uchar c); |
169 }; | 165 }; |
170 struct Letter { | 166 struct Letter { |
171 static bool Is(uchar c); | 167 static bool Is(uchar c); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
216 static const int kMaxWidth = 1; | 212 static const int kMaxWidth = 1; |
217 static int Convert(uchar c, | 213 static int Convert(uchar c, |
218 uchar n, | 214 uchar n, |
219 uchar* result, | 215 uchar* result, |
220 bool* allow_caching_ptr); | 216 bool* allow_caching_ptr); |
221 }; | 217 }; |
222 | 218 |
223 } // namespace unibrow | 219 } // namespace unibrow |
224 | 220 |
225 #endif // V8_UNICODE_H_ | 221 #endif // V8_UNICODE_H_ |
OLD | NEW |