| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
| 6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
| 7 | 7 |
| 8 #include <sys/types.h> | 8 #include <sys/types.h> |
| 9 #include "src/globals.h" | 9 #include "src/globals.h" |
| 10 #include "src/utils.h" | 10 #include "src/utils.h" |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
| 135 static inline unsigned Encode(char* out, | 135 static inline unsigned Encode(char* out, |
| 136 uchar c, | 136 uchar c, |
| 137 int previous, | 137 int previous, |
| 138 bool replace_invalid = false); | 138 bool replace_invalid = false); |
| 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); | 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); |
| 140 | 140 |
| 141 // The unicode replacement character, used to signal invalid unicode | 141 // The unicode replacement character, used to signal invalid unicode |
| 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
| 143 static const uchar kBadChar = 0xFFFD; | 143 static const uchar kBadChar = 0xFFFD; |
| 144 static const uchar kBufferEmpty = 0x0; |
| 145 static const uchar kIncomplete = 0xFFFFFFFC; // any non-valid code point. |
| 144 static const unsigned kMaxEncodedSize = 4; | 146 static const unsigned kMaxEncodedSize = 4; |
| 145 static const unsigned kMaxOneByteChar = 0x7f; | 147 static const unsigned kMaxOneByteChar = 0x7f; |
| 146 static const unsigned kMaxTwoByteChar = 0x7ff; | 148 static const unsigned kMaxTwoByteChar = 0x7ff; |
| 147 static const unsigned kMaxThreeByteChar = 0xffff; | 149 static const unsigned kMaxThreeByteChar = 0xffff; |
| 148 static const unsigned kMaxFourByteChar = 0x1fffff; | 150 static const unsigned kMaxFourByteChar = 0x1fffff; |
| 149 | 151 |
| 150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
| 151 // that match are coded as a 4 byte UTF-8 sequence. | 153 // that match are coded as a 4 byte UTF-8 sequence. |
| 152 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 154 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
| 153 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 155 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
| 154 // The maximum size a single UTF-16 code unit may take up when encoded as | 156 // The maximum size a single UTF-16 code unit may take up when encoded as |
| 155 // UTF-8. | 157 // UTF-8. |
| 156 static const unsigned kMax16BitCodeUnitSize = 3; | 158 static const unsigned kMax16BitCodeUnitSize = 3; |
| 157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); | 159 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); |
| 158 | 160 |
| 161 typedef uint32_t Utf8IncrementalBuffer; |
| 162 static uchar ValueOfIncremental(byte next_byte, |
| 163 Utf8IncrementalBuffer& buffer); |
| 164 |
| 159 // Excludes non-characters from the set of valid code points. | 165 // Excludes non-characters from the set of valid code points. |
| 160 static inline bool IsValidCharacter(uchar c); | 166 static inline bool IsValidCharacter(uchar c); |
| 161 | 167 |
| 162 static bool Validate(const byte* str, size_t length); | 168 static bool Validate(const byte* str, size_t length); |
| 163 }; | 169 }; |
| 164 | 170 |
| 165 struct Uppercase { | 171 struct Uppercase { |
| 166 static bool Is(uchar c); | 172 static bool Is(uchar c); |
| 167 }; | 173 }; |
| 168 struct Lowercase { | 174 struct Lowercase { |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 217 static const int kMaxWidth = 1; | 223 static const int kMaxWidth = 1; |
| 218 static int Convert(uchar c, | 224 static int Convert(uchar c, |
| 219 uchar n, | 225 uchar n, |
| 220 uchar* result, | 226 uchar* result, |
| 221 bool* allow_caching_ptr); | 227 bool* allow_caching_ptr); |
| 222 }; | 228 }; |
| 223 | 229 |
| 224 } // namespace unibrow | 230 } // namespace unibrow |
| 225 | 231 |
| 226 #endif // V8_UNICODE_H_ | 232 #endif // V8_UNICODE_H_ |
| OLD | NEW |