src/unicode.cc - Issue 2521933002: Merged: Fix out-of-range access in unibrow::Utf8::CalculateValue.

Side by Side Diff: src/unicode.cc

Issue 2521933002: Merged: Fix out-of-range access in unibrow::Utf8::CalculateValue. (Closed)

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4 //	4 //

5 // This file was generated at 2014-10-08 15:25:47.940335	5 // This file was generated at 2014-10-08 15:25:47.940335

6	6

7 #include "src/unicode.h"	7 #include "src/unicode.h"

8 #include "src/unicode-inl.h"	8 #include "src/unicode-inl.h"

9 #include <stdio.h>	9 #include <stdio.h>

10 #include <stdlib.h>	10 #include <stdlib.h>

(...skipping 217 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
228 // This method decodes an UTF-8 value according to RFC 3629.	228 // This method decodes an UTF-8 value according to RFC 3629.

229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {	229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {

230 size_t length = NonASCIISequenceLength(str[0]);	230 size_t length = NonASCIISequenceLength(str[0]);

231	231

232 // Check continuation characters.	232 // Check continuation characters.

233 size_t max_count = std::min(length, max_length);	233 size_t max_count = std::min(length, max_length);

234 size_t count = 1;	234 size_t count = 1;

235 while (count < max_count && IsContinuationCharacter(str[count])) {	235 while (count < max_count && IsContinuationCharacter(str[count])) {

236 count++;	236 count++;

237 }	237 }

	238 *cursor += count;

238	239

239 // Check overly long sequences & other conditions. Use length as error	240 // There must be enough continuation characters.

240 // indicator.	241 if (count != length) return kBadChar;

	242

	243 // Check overly long sequences & other conditions.

241 if (length == 3) {	244 if (length == 3) {

242 if (str[0] == 0xE0 && (str[1] < 0xA0 \|\| str[1] > 0xBF)) {	245 if (str[0] == 0xE0 && (str[1] < 0xA0 \|\| str[1] > 0xBF)) {

243 // Overlong three-byte sequence?	246 // Overlong three-byte sequence?

244 length = 0;	247 return kBadChar;

245 } else if (str[0] == 0xED && (str[1] < 0x80 \|\| str[1] > 0x9F)) {	248 } else if (str[0] == 0xED && (str[1] < 0x80 \|\| str[1] > 0x9F)) {

246 // High and low surrogate halves?	249 // High and low surrogate halves?

247 length = 0;	250 return kBadChar;

248 }	251 }

249 } else if (length == 4) {	252 } else if (length == 4) {

250 if (str[0] == 0xF0 && (str[1] < 0x90 \|\| str[1] > 0xBF)) {	253 if (str[0] == 0xF0 && (str[1] < 0x90 \|\| str[1] > 0xBF)) {

251 // Overlong four-byte sequence.	254 // Overlong four-byte sequence.

252 length = 0;	255 return kBadChar;

253 } else if (str[0] == 0xF4 && (str[1] < 0x80 \|\| str[1] > 0x8F)) {	256 } else if (str[0] == 0xF4 && (str[1] < 0x80 \|\| str[1] > 0x8F)) {

254 // Code points outside of the unicode range.	257 // Code points outside of the unicode range.

255 length = 0;	258 return kBadChar;

256 }	259 }

257 }	260 }

258	261

259 if (count != length) {

260 // All invalid encodings should land here.

261 *cursor += count;

262 return kBadChar;

263 }

264

265 // All errors have been handled, so we only have to assemble the result.	262 // All errors have been handled, so we only have to assemble the result.

266 *cursor += length;

267 switch (length) {	263 switch (length) {

268 case 1:	264 case 1:

269 return str[0];	265 return str[0];

270 case 2:	266 case 2:

271 return ((str[0] << 6) + str[1]) - 0x00003080;	267 return ((str[0] << 6) + str[1]) - 0x00003080;

272 case 3:	268 case 3:

273 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;	269 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;

274 case 4:	270 case 4:

275 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -	271 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -

276 0x03C82080;	272 0x03C82080;

(...skipping 3230 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3507 sizeof(MultiCharacterSpecialCase<1>) // NOLINT	3503 sizeof(MultiCharacterSpecialCase<1>) // NOLINT

3508 +	3504 +

3509 kCanonicalizationRangeMultiStrings1Size *	3505 kCanonicalizationRangeMultiStrings1Size *

3510 sizeof(MultiCharacterSpecialCase<1>) // NOLINT	3506 sizeof(MultiCharacterSpecialCase<1>) // NOLINT

3511 +	3507 +

3512 kCanonicalizationRangeMultiStrings7Size *	3508 kCanonicalizationRangeMultiStrings7Size *

3513 sizeof(MultiCharacterSpecialCase<1>); // NOLINT	3509 sizeof(MultiCharacterSpecialCase<1>); // NOLINT

3514 }	3510 }

3515	3511

3516 } // namespace unibrow	3512 } // namespace unibrow

OLD	NEW

« no previous file with comments | « no previous file | src/unicode-decoder.h » ('j') | no next file with comments »