Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(628)

Side by Side Diff: src/unicode.h

Issue 2314663002: Rework scanner-character-streams. (Closed)
Patch Set: Some fixes, and marching down the very long road to make all compilers happy. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef V8_UNICODE_H_ 5 #ifndef V8_UNICODE_H_
6 #define V8_UNICODE_H_ 6 #define V8_UNICODE_H_
7 7
8 #include <sys/types.h> 8 #include <sys/types.h>
9 #include "src/globals.h" 9 #include "src/globals.h"
10 #include "src/utils.h" 10 #include "src/utils.h"
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 static inline unsigned EncodeOneByte(char* out, uint8_t c); 134 static inline unsigned EncodeOneByte(char* out, uint8_t c);
135 static inline unsigned Encode(char* out, 135 static inline unsigned Encode(char* out,
136 uchar c, 136 uchar c,
137 int previous, 137 int previous,
138 bool replace_invalid = false); 138 bool replace_invalid = false);
139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor); 139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor);
140 140
141 // The unicode replacement character, used to signal invalid unicode 141 // The unicode replacement character, used to signal invalid unicode
142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding. 142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.
143 static const uchar kBadChar = 0xFFFD; 143 static const uchar kBadChar = 0xFFFD;
144 static const uchar kBufferEmpty = 0x0;
145 static const uchar kIncomplete = 0xFFFFFFFC; // any non-valid code point.
144 static const unsigned kMaxEncodedSize = 4; 146 static const unsigned kMaxEncodedSize = 4;
145 static const unsigned kMaxOneByteChar = 0x7f; 147 static const unsigned kMaxOneByteChar = 0x7f;
146 static const unsigned kMaxTwoByteChar = 0x7ff; 148 static const unsigned kMaxTwoByteChar = 0x7ff;
147 static const unsigned kMaxThreeByteChar = 0xffff; 149 static const unsigned kMaxThreeByteChar = 0xffff;
148 static const unsigned kMaxFourByteChar = 0x1fffff; 150 static const unsigned kMaxFourByteChar = 0x1fffff;
149 151
150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together 152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together
151 // that match are coded as a 4 byte UTF-8 sequence. 153 // that match are coded as a 4 byte UTF-8 sequence.
152 static const unsigned kBytesSavedByCombiningSurrogates = 2; 154 static const unsigned kBytesSavedByCombiningSurrogates = 2;
153 static const unsigned kSizeOfUnmatchedSurrogate = 3; 155 static const unsigned kSizeOfUnmatchedSurrogate = 3;
154 // The maximum size a single UTF-16 code unit may take up when encoded as 156 // The maximum size a single UTF-16 code unit may take up when encoded as
155 // UTF-8. 157 // UTF-8.
156 static const unsigned kMax16BitCodeUnitSize = 3; 158 static const unsigned kMax16BitCodeUnitSize = 3;
157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor); 159 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
158 160
161 typedef uint32_t Utf8IncrementalBuffer;
162 static uchar ValueOfIncremental(byte next_byte,
163 Utf8IncrementalBuffer& buffer);
164
159 // Excludes non-characters from the set of valid code points. 165 // Excludes non-characters from the set of valid code points.
160 static inline bool IsValidCharacter(uchar c); 166 static inline bool IsValidCharacter(uchar c);
161 167
162 static bool Validate(const byte* str, size_t length); 168 static bool Validate(const byte* str, size_t length);
163 }; 169 };
164 170
165 struct Uppercase { 171 struct Uppercase {
166 static bool Is(uchar c); 172 static bool Is(uchar c);
167 }; 173 };
168 struct Lowercase { 174 struct Lowercase {
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
217 static const int kMaxWidth = 1; 223 static const int kMaxWidth = 1;
218 static int Convert(uchar c, 224 static int Convert(uchar c,
219 uchar n, 225 uchar n,
220 uchar* result, 226 uchar* result,
221 bool* allow_caching_ptr); 227 bool* allow_caching_ptr);
222 }; 228 };
223 229
224 } // namespace unibrow 230 } // namespace unibrow
225 231
226 #endif // V8_UNICODE_H_ 232 #endif // V8_UNICODE_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698