| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_UNICODE_H_ | 5 #ifndef V8_UNICODE_H_ |
| 6 #define V8_UNICODE_H_ | 6 #define V8_UNICODE_H_ |
| 7 | 7 |
| 8 #include <sys/types.h> | 8 #include <sys/types.h> |
| 9 #include "src/globals.h" | 9 #include "src/globals.h" |
| 10 #include "src/utils.h" | 10 #include "src/utils.h" |
| (...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 129 | 129 |
| 130 | 130 |
| 131 class Utf8 { | 131 class Utf8 { |
| 132 public: | 132 public: |
| 133 static inline uchar Length(uchar chr, int previous); | 133 static inline uchar Length(uchar chr, int previous); |
| 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 134 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
| 135 static inline unsigned Encode(char* out, | 135 static inline unsigned Encode(char* out, |
| 136 uchar c, | 136 uchar c, |
| 137 int previous, | 137 int previous, |
| 138 bool replace_invalid = false); | 138 bool replace_invalid = false); |
| 139 static uchar CalculateValue(const byte* str, | 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); |
| 140 unsigned length, | |
| 141 unsigned* cursor); | |
| 142 | 140 |
| 143 // The unicode replacement character, used to signal invalid unicode | 141 // The unicode replacement character, used to signal invalid unicode |
| 144 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. |
| 145 static const uchar kBadChar = 0xFFFD; | 143 static const uchar kBadChar = 0xFFFD; |
| 146 static const unsigned kMaxEncodedSize = 4; | 144 static const unsigned kMaxEncodedSize = 4; |
| 147 static const unsigned kMaxOneByteChar = 0x7f; | 145 static const unsigned kMaxOneByteChar = 0x7f; |
| 148 static const unsigned kMaxTwoByteChar = 0x7ff; | 146 static const unsigned kMaxTwoByteChar = 0x7ff; |
| 149 static const unsigned kMaxThreeByteChar = 0xffff; | 147 static const unsigned kMaxThreeByteChar = 0xffff; |
| 150 static const unsigned kMaxFourByteChar = 0x1fffff; | 148 static const unsigned kMaxFourByteChar = 0x1fffff; |
| 151 | 149 |
| 152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
| 153 // that match are coded as a 4 byte UTF-8 sequence. | 151 // that match are coded as a 4 byte UTF-8 sequence. |
| 154 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 152 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
| 155 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 153 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
| 156 // The maximum size a single UTF-16 code unit may take up when encoded as | 154 // The maximum size a single UTF-16 code unit may take up when encoded as |
| 157 // UTF-8. | 155 // UTF-8. |
| 158 static const unsigned kMax16BitCodeUnitSize = 3; | 156 static const unsigned kMax16BitCodeUnitSize = 3; |
| 159 static inline uchar ValueOf(const byte* str, | 157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); |
| 160 unsigned length, | |
| 161 unsigned* cursor); | |
| 162 }; | 158 }; |
| 163 | 159 |
| 164 struct Uppercase { | 160 struct Uppercase { |
| 165 static bool Is(uchar c); | 161 static bool Is(uchar c); |
| 166 }; | 162 }; |
| 167 struct Lowercase { | 163 struct Lowercase { |
| 168 static bool Is(uchar c); | 164 static bool Is(uchar c); |
| 169 }; | 165 }; |
| 170 struct Letter { | 166 struct Letter { |
| 171 static bool Is(uchar c); | 167 static bool Is(uchar c); |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 216 static const int kMaxWidth = 1; | 212 static const int kMaxWidth = 1; |
| 217 static int Convert(uchar c, | 213 static int Convert(uchar c, |
| 218 uchar n, | 214 uchar n, |
| 219 uchar* result, | 215 uchar* result, |
| 220 bool* allow_caching_ptr); | 216 bool* allow_caching_ptr); |
| 221 }; | 217 }; |
| 222 | 218 |
| 223 } // namespace unibrow | 219 } // namespace unibrow |
| 224 | 220 |
| 225 #endif // V8_UNICODE_H_ | 221 #endif // V8_UNICODE_H_ |
| OLD | NEW |