OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 #include "src/utils.h" | 10 #include "src/utils.h" |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
135 static inline unsigned Encode(char* out, | 135 static inline unsigned Encode(char* out, |
136 uchar c, | 136 uchar c, |
137 int previous, | 137 int previous, |
138 bool replace_invalid = false); | 138 bool replace_invalid = false); |
139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); | 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); |
140 | 140 |
141 // The unicode replacement character, used to signal invalid unicode | 141 // The unicode replacement character, used to signal invalid unicode |
142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
143 static const uchar kBadChar = 0xFFFD; | 143 static const uchar kBadChar = 0xFFFD; |
| 144 static const uchar kBufferEmpty = 0x0; |
| 145 static const uchar kIncomplete = 0xFFFFFFFC; // any non-valid code point. |
144 static const unsigned kMaxEncodedSize = 4; | 146 static const unsigned kMaxEncodedSize = 4; |
145 static const unsigned kMaxOneByteChar = 0x7f; | 147 static const unsigned kMaxOneByteChar = 0x7f; |
146 static const unsigned kMaxTwoByteChar = 0x7ff; | 148 static const unsigned kMaxTwoByteChar = 0x7ff; |
147 static const unsigned kMaxThreeByteChar = 0xffff; | 149 static const unsigned kMaxThreeByteChar = 0xffff; |
148 static const unsigned kMaxFourByteChar = 0x1fffff; | 150 static const unsigned kMaxFourByteChar = 0x1fffff; |
149 | 151 |
150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
151 // that match are coded as a 4 byte UTF-8 sequence. | 153 // that match are coded as a 4 byte UTF-8 sequence. |
152 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 154 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
153 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 155 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
154 // The maximum size a single UTF-16 code unit may take up when encoded as | 156 // The maximum size a single UTF-16 code unit may take up when encoded as |
155 // UTF-8. | 157 // UTF-8. |
156 static const unsigned kMax16BitCodeUnitSize = 3; | 158 static const unsigned kMax16BitCodeUnitSize = 3; |
157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); | 159 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); |
158 | 160 |
| 161 typedef uint32_t Utf8IncrementalBuffer; |
| 162 static uchar ValueOfIncremental(byte next_byte, |
| 163 Utf8IncrementalBuffer& buffer); |
| 164 |
159 // Excludes non-characters from the set of valid code points. | 165 // Excludes non-characters from the set of valid code points. |
160 static inline bool IsValidCharacter(uchar c); | 166 static inline bool IsValidCharacter(uchar c); |
161 | 167 |
162 static bool Validate(const byte* str, size_t length); | 168 static bool Validate(const byte* str, size_t length); |
163 }; | 169 }; |
164 | 170 |
165 struct Uppercase { | 171 struct Uppercase { |
166 static bool Is(uchar c); | 172 static bool Is(uchar c); |
167 }; | 173 }; |
168 struct Lowercase { | 174 struct Lowercase { |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
217 static const int kMaxWidth = 1; | 223 static const int kMaxWidth = 1; |
218 static int Convert(uchar c, | 224 static int Convert(uchar c, |
219 uchar n, | 225 uchar n, |
220 uchar* result, | 226 uchar* result, |
221 bool* allow_caching_ptr); | 227 bool* allow_caching_ptr); |
222 }; | 228 }; |
223 | 229 |
224 } // namespace unibrow | 230 } // namespace unibrow |
225 | 231 |
226 #endif // V8_UNICODE_H_ | 232 #endif // V8_UNICODE_H_ |
OLD | NEW |