OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
95 | 95 |
96 class UnicodeData { | 96 class UnicodeData { |
97 private: | 97 private: |
98 friend class Test; | 98 friend class Test; |
99 static int GetByteCount(); | 99 static int GetByteCount(); |
100 static const uchar kMaxCodePoint; | 100 static const uchar kMaxCodePoint; |
101 }; | 101 }; |
102 | 102 |
103 class Utf16 { | 103 class Utf16 { |
104 public: | 104 public: |
105 static inline bool IsSurrogatePair(int lead, int trail) { | |
106 return IsLeadSurrogate(lead) && IsTrailSurrogate(trail); | |
107 } | |
105 static inline bool IsLeadSurrogate(int code) { | 108 static inline bool IsLeadSurrogate(int code) { |
106 if (code == kNoPreviousCharacter) return false; | 109 if (code == kNoPreviousCharacter) return false; |
107 return (code & 0xfc00) == 0xd800; | 110 return (code & 0xfc00) == 0xd800; |
108 } | 111 } |
109 static inline bool IsTrailSurrogate(int code) { | 112 static inline bool IsTrailSurrogate(int code) { |
110 if (code == kNoPreviousCharacter) return false; | 113 if (code == kNoPreviousCharacter) return false; |
111 return (code & 0xfc00) == 0xdc00; | 114 return (code & 0xfc00) == 0xdc00; |
112 } | 115 } |
113 | 116 |
114 static inline int CombineSurrogatePair(uchar lead, uchar trail) { | 117 static inline int CombineSurrogatePair(uchar lead, uchar trail) { |
(...skipping 24 matching lines...) Expand all Loading... | |
139 // Returns 0 if character does not convert to single latin-1 character | 142 // Returns 0 if character does not convert to single latin-1 character |
140 // or if the character doesn't not convert back to latin-1 via inverse | 143 // or if the character doesn't not convert back to latin-1 via inverse |
141 // operation (upper to lower, etc). | 144 // operation (upper to lower, etc). |
142 static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t); | 145 static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t); |
143 }; | 146 }; |
144 | 147 |
145 class Utf8 { | 148 class Utf8 { |
146 public: | 149 public: |
147 static inline uchar Length(uchar chr, int previous); | 150 static inline uchar Length(uchar chr, int previous); |
148 static inline unsigned EncodeOneByte(char* out, uint8_t c); | 151 static inline unsigned EncodeOneByte(char* out, uint8_t c); |
149 static inline unsigned Encode( | 152 static inline unsigned Encode(char* out, |
150 char* out, uchar c, int previous); | 153 uchar c, |
154 int previous, | |
155 bool allow_invalid); | |
dcarney
2014/01/13 09:19:56
needs default value
haimuiba
2014/01/15 10:52:34
Done.
| |
151 static uchar CalculateValue(const byte* str, | 156 static uchar CalculateValue(const byte* str, |
152 unsigned length, | 157 unsigned length, |
153 unsigned* cursor); | 158 unsigned* cursor); |
159 | |
160 // The unicode replacement character, used to signal invalid unicode | |
161 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. | |
154 static const uchar kBadChar = 0xFFFD; | 162 static const uchar kBadChar = 0xFFFD; |
155 static const unsigned kMaxEncodedSize = 4; | 163 static const unsigned kMaxEncodedSize = 4; |
156 static const unsigned kMaxOneByteChar = 0x7f; | 164 static const unsigned kMaxOneByteChar = 0x7f; |
157 static const unsigned kMaxTwoByteChar = 0x7ff; | 165 static const unsigned kMaxTwoByteChar = 0x7ff; |
158 static const unsigned kMaxThreeByteChar = 0xffff; | 166 static const unsigned kMaxThreeByteChar = 0xffff; |
159 static const unsigned kMaxFourByteChar = 0x1fffff; | 167 static const unsigned kMaxFourByteChar = 0x1fffff; |
160 | 168 |
161 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together | 169 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together |
162 // that match are coded as a 4 byte UTF-8 sequence. | 170 // that match are coded as a 4 byte UTF-8 sequence. |
163 static const unsigned kBytesSavedByCombiningSurrogates = 2; | 171 static const unsigned kBytesSavedByCombiningSurrogates = 2; |
164 static const unsigned kSizeOfUnmatchedSurrogate = 3; | 172 static const unsigned kSizeOfUnmatchedSurrogate = 3; |
173 // The maximum size a single UTF-16 code unit may take up when encoded as | |
174 // UTF-8. | |
175 static const unsigned kMax16BitCodeUnitSize = 3; | |
165 static inline uchar ValueOf(const byte* str, | 176 static inline uchar ValueOf(const byte* str, |
166 unsigned length, | 177 unsigned length, |
167 unsigned* cursor); | 178 unsigned* cursor); |
168 }; | 179 }; |
169 | 180 |
170 | 181 |
171 class Utf8DecoderBase { | 182 class Utf8DecoderBase { |
172 public: | 183 public: |
173 // Initialization done in subclass. | 184 // Initialization done in subclass. |
174 inline Utf8DecoderBase(); | 185 inline Utf8DecoderBase(); |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
267 static const int kMaxWidth = 1; | 278 static const int kMaxWidth = 1; |
268 static int Convert(uchar c, | 279 static int Convert(uchar c, |
269 uchar n, | 280 uchar n, |
270 uchar* result, | 281 uchar* result, |
271 bool* allow_caching_ptr); | 282 bool* allow_caching_ptr); |
272 }; | 283 }; |
273 | 284 |
274 } // namespace unibrow | 285 } // namespace unibrow |
275 | 286 |
276 #endif // V8_UNICODE_H_ | 287 #endif // V8_UNICODE_H_ |
OLD | NEW |