src/unicode.h - Issue 864273005: Scanner / Unicode decoding: use size_t instead of unsigned.

Side by Side Diff: src/unicode.h

Issue 864273005: Scanner / Unicode decoding: use size_t instead of unsigned. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: tentative Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef V8_UNICODE_H_	5 #ifndef V8_UNICODE_H_

6 #define V8_UNICODE_H_	6 #define V8_UNICODE_H_

7	7

8 #include <sys/types.h>	8 #include <sys/types.h>

9 #include "src/globals.h"	9 #include "src/globals.h"

10 #include "src/utils.h"	10 #include "src/utils.h"

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
129	129

130	130

131 class Utf8 {	131 class Utf8 {

132 public:	132 public:

133 static inline uchar Length(uchar chr, int previous);	133 static inline uchar Length(uchar chr, int previous);

134 static inline unsigned EncodeOneByte(char* out, uint8_t c);	134 static inline unsigned EncodeOneByte(char* out, uint8_t c);

135 static inline unsigned Encode(char* out,	135 static inline unsigned Encode(char* out,

136 uchar c,	136 uchar c,

137 int previous,	137 int previous,

138 bool replace_invalid = false);	138 bool replace_invalid = false);

139 static uchar CalculateValue(const byte* str,	139 static uchar CalculateValue(const byte* str, size_t length, size_t* cursor);

140 unsigned length,

141 unsigned* cursor);

142	140

143 // The unicode replacement character, used to signal invalid unicode	141 // The unicode replacement character, used to signal invalid unicode

144 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.	142 // sequences (e.g. an orphan surrogate) when converting to a UTF-8 encoding.

145 static const uchar kBadChar = 0xFFFD;	143 static const uchar kBadChar = 0xFFFD;

146 static const unsigned kMaxEncodedSize = 4;	144 static const unsigned kMaxEncodedSize = 4;

147 static const unsigned kMaxOneByteChar = 0x7f;	145 static const unsigned kMaxOneByteChar = 0x7f;

148 static const unsigned kMaxTwoByteChar = 0x7ff;	146 static const unsigned kMaxTwoByteChar = 0x7ff;

149 static const unsigned kMaxThreeByteChar = 0xffff;	147 static const unsigned kMaxThreeByteChar = 0xffff;

150 static const unsigned kMaxFourByteChar = 0x1fffff;	148 static const unsigned kMaxFourByteChar = 0x1fffff;

151	149

152 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together	150 // A single surrogate is coded as a 3 byte UTF-8 sequence, but two together

153 // that match are coded as a 4 byte UTF-8 sequence.	151 // that match are coded as a 4 byte UTF-8 sequence.

154 static const unsigned kBytesSavedByCombiningSurrogates = 2;	152 static const unsigned kBytesSavedByCombiningSurrogates = 2;

155 static const unsigned kSizeOfUnmatchedSurrogate = 3;	153 static const unsigned kSizeOfUnmatchedSurrogate = 3;

156 // The maximum size a single UTF-16 code unit may take up when encoded as	154 // The maximum size a single UTF-16 code unit may take up when encoded as

157 // UTF-8.	155 // UTF-8.

158 static const unsigned kMax16BitCodeUnitSize = 3;	156 static const unsigned kMax16BitCodeUnitSize = 3;

159 static inline uchar ValueOf(const byte* str,	157 static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);

160 unsigned length,

161 unsigned* cursor);

162 };	158 };

163	159

164 struct Uppercase {	160 struct Uppercase {

165 static bool Is(uchar c);	161 static bool Is(uchar c);

166 };	162 };

167 struct Lowercase {	163 struct Lowercase {

168 static bool Is(uchar c);	164 static bool Is(uchar c);

169 };	165 };

170 struct Letter {	166 struct Letter {

171 static bool Is(uchar c);	167 static bool Is(uchar c);

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
216 static const int kMaxWidth = 1;	212 static const int kMaxWidth = 1;

217 static int Convert(uchar c,	213 static int Convert(uchar c,

218 uchar n,	214 uchar n,

219 uchar* result,	215 uchar* result,

220 bool* allow_caching_ptr);	216 bool* allow_caching_ptr);

221 };	217 };

222	218

223 } // namespace unibrow	219 } // namespace unibrow

224	220

225 #endif // V8_UNICODE_H_	221 #endif // V8_UNICODE_H_

OLD	NEW

« no previous file with comments | « src/scanner-character-streams.cc ('k') | src/unicode.cc » ('j') | no next file with comments »