Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: src/unicode.cc

Issue 2521933002: Merged: Fix out-of-range access in unibrow::Utf8::CalculateValue. (Closed)
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/unicode-decoder.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // This file was generated at 2014-10-08 15:25:47.940335 5 // This file was generated at 2014-10-08 15:25:47.940335
6 6
7 #include "src/unicode.h" 7 #include "src/unicode.h"
8 #include "src/unicode-inl.h" 8 #include "src/unicode-inl.h"
9 #include <stdio.h> 9 #include <stdio.h>
10 #include <stdlib.h> 10 #include <stdlib.h>
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 // This method decodes an UTF-8 value according to RFC 3629. 228 // This method decodes an UTF-8 value according to RFC 3629.
229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { 229 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
230 size_t length = NonASCIISequenceLength(str[0]); 230 size_t length = NonASCIISequenceLength(str[0]);
231 231
232 // Check continuation characters. 232 // Check continuation characters.
233 size_t max_count = std::min(length, max_length); 233 size_t max_count = std::min(length, max_length);
234 size_t count = 1; 234 size_t count = 1;
235 while (count < max_count && IsContinuationCharacter(str[count])) { 235 while (count < max_count && IsContinuationCharacter(str[count])) {
236 count++; 236 count++;
237 } 237 }
238 *cursor += count;
238 239
239 // Check overly long sequences & other conditions. Use length as error 240 // There must be enough continuation characters.
240 // indicator. 241 if (count != length) return kBadChar;
242
243 // Check overly long sequences & other conditions.
241 if (length == 3) { 244 if (length == 3) {
242 if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { 245 if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
243 // Overlong three-byte sequence? 246 // Overlong three-byte sequence?
244 length = 0; 247 return kBadChar;
245 } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { 248 } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
246 // High and low surrogate halves? 249 // High and low surrogate halves?
247 length = 0; 250 return kBadChar;
248 } 251 }
249 } else if (length == 4) { 252 } else if (length == 4) {
250 if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { 253 if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
251 // Overlong four-byte sequence. 254 // Overlong four-byte sequence.
252 length = 0; 255 return kBadChar;
253 } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { 256 } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
254 // Code points outside of the unicode range. 257 // Code points outside of the unicode range.
255 length = 0; 258 return kBadChar;
256 } 259 }
257 } 260 }
258 261
259 if (count != length) {
260 // All invalid encodings should land here.
261 *cursor += count;
262 return kBadChar;
263 }
264
265 // All errors have been handled, so we only have to assemble the result. 262 // All errors have been handled, so we only have to assemble the result.
266 *cursor += length;
267 switch (length) { 263 switch (length) {
268 case 1: 264 case 1:
269 return str[0]; 265 return str[0];
270 case 2: 266 case 2:
271 return ((str[0] << 6) + str[1]) - 0x00003080; 267 return ((str[0] << 6) + str[1]) - 0x00003080;
272 case 3: 268 case 3:
273 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; 269 return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
274 case 4: 270 case 4:
275 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - 271 return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
276 0x03C82080; 272 0x03C82080;
(...skipping 3230 matching lines...) Expand 10 before | Expand all | Expand 10 after
3507 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3503 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3508 + 3504 +
3509 kCanonicalizationRangeMultiStrings1Size * 3505 kCanonicalizationRangeMultiStrings1Size *
3510 sizeof(MultiCharacterSpecialCase<1>) // NOLINT 3506 sizeof(MultiCharacterSpecialCase<1>) // NOLINT
3511 + 3507 +
3512 kCanonicalizationRangeMultiStrings7Size * 3508 kCanonicalizationRangeMultiStrings7Size *
3513 sizeof(MultiCharacterSpecialCase<1>); // NOLINT 3509 sizeof(MultiCharacterSpecialCase<1>); // NOLINT
3514 } 3510 }
3515 3511
3516 } // namespace unibrow 3512 } // namespace unibrow
OLDNEW
« no previous file with comments | « no previous file | src/unicode-decoder.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698