OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // This file was generated at 2014-10-08 15:25:47.940335 | 5 // This file was generated at 2014-10-08 15:25:47.940335 |
6 | 6 |
7 #include "src/unicode.h" | 7 #include "src/unicode.h" |
8 #include "src/unicode-inl.h" | 8 #include "src/unicode-inl.h" |
9 #include <stdio.h> | 9 #include <stdio.h> |
10 #include <stdlib.h> | 10 #include <stdlib.h> |
(...skipping 315 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
326 // Store the kind - 1 (i.e., remaining bytes) in the top byte, value | 326 // Store the kind - 1 (i.e., remaining bytes) in the top byte, value |
327 // in the bottom three. | 327 // in the bottom three. |
328 *buffer = (kind - 1) << 24 | (next & mask); | 328 *buffer = (kind - 1) << 24 | (next & mask); |
329 return kIncomplete; | 329 return kIncomplete; |
330 } else { | 330 } else { |
331 // No buffer, and not the start of a 1-byte char (handled at the | 331 // No buffer, and not the start of a 1-byte char (handled at the |
332 // beginning), and not the start of a 2..4 byte char? Bad char. | 332 // beginning), and not the start of a 2..4 byte char? Bad char. |
333 *buffer = 0; | 333 *buffer = 0; |
334 return kBadChar; | 334 return kBadChar; |
335 } | 335 } |
| 336 } else if (*buffer <= 0xff) { |
| 337 // We have one unprocessed byte left (from the last else case in this if |
| 338 // statement). |
| 339 uchar previous = *buffer; |
| 340 *buffer = 0; |
| 341 uchar t = ValueOfIncremental(previous, buffer); |
| 342 if (t == kIncomplete) { |
| 343 // If we have an incomplete character, process both the previous and the |
| 344 // next byte at once. |
| 345 return ValueOfIncremental(next, buffer); |
| 346 } else { |
| 347 // Otherwise, process the previous byte and save the next byte for next |
| 348 // time. |
| 349 DCHECK_EQ(0, *buffer); |
| 350 *buffer = next; |
| 351 return t; |
| 352 } |
| 353 } else if (IsContinuationCharacter(next)) { |
| 354 // We're inside of a character, as described by buffer. |
| 355 |
| 356 // How many bytes (excluding this one) do we still expect? |
| 357 uint8_t count = (*buffer >> 24) - 1; |
| 358 // Update the value. |
| 359 uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F); |
| 360 if (count) { |
| 361 *buffer = count << 24 | value; |
| 362 return kIncomplete; |
| 363 } else { |
| 364 *buffer = 0; |
| 365 return value; |
| 366 } |
336 } else { | 367 } else { |
337 // We're inside of a character, as described by buffer. | 368 // Within a character, but not a continuation character? Then the |
338 if (IsContinuationCharacter(next)) { | 369 // previous char was a bad char. But we need to save the current |
339 // How many bytes (excluding this one) do we still expect? | 370 // one. |
340 uint8_t count = (*buffer >> 24) - 1; | 371 *buffer = next; |
341 // Update the value. | 372 return kBadChar; |
342 uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F); | |
343 if (count) { | |
344 *buffer = count << 24 | value; | |
345 return kIncomplete; | |
346 } else { | |
347 *buffer = 0; | |
348 return value; | |
349 } | |
350 } else { | |
351 // Within a character, but not a continuation character? Bad char. | |
352 *buffer = 0; | |
353 return kBadChar; | |
354 } | |
355 } | 373 } |
356 } | 374 } |
357 | 375 |
| 376 uchar Utf8::ValueOfIncrementalFinish(Utf8IncrementalBuffer* buffer) { |
| 377 DCHECK_NOT_NULL(buffer); |
| 378 if (*buffer == 0) { |
| 379 return kBufferEmpty; |
| 380 } else { |
| 381 // Process left-over chars. An incomplete char at the end maps to kBadChar. |
| 382 uchar t = ValueOfIncremental(0, buffer); |
| 383 return (t == kIncomplete) ? kBadChar : t; |
| 384 } |
| 385 } |
| 386 |
358 bool Utf8::Validate(const byte* bytes, size_t length) { | 387 bool Utf8::Validate(const byte* bytes, size_t length) { |
359 size_t cursor = 0; | 388 size_t cursor = 0; |
360 | 389 |
361 // Performance optimization: Skip over single-byte values first. | 390 // Performance optimization: Skip over single-byte values first. |
362 while (cursor < length && bytes[cursor] <= kMaxOneByteChar) { | 391 while (cursor < length && bytes[cursor] <= kMaxOneByteChar) { |
363 ++cursor; | 392 ++cursor; |
364 } | 393 } |
365 | 394 |
366 while (cursor < length) { | 395 while (cursor < length) { |
367 uchar c = ValueOf(bytes + cursor, length - cursor, &cursor); | 396 uchar c = ValueOf(bytes + cursor, length - cursor, &cursor); |
(...skipping 3129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3497 sizeof(MultiCharacterSpecialCase<1>) // NOLINT | 3526 sizeof(MultiCharacterSpecialCase<1>) // NOLINT |
3498 + | 3527 + |
3499 kCanonicalizationRangeMultiStrings1Size * | 3528 kCanonicalizationRangeMultiStrings1Size * |
3500 sizeof(MultiCharacterSpecialCase<1>) // NOLINT | 3529 sizeof(MultiCharacterSpecialCase<1>) // NOLINT |
3501 + | 3530 + |
3502 kCanonicalizationRangeMultiStrings7Size * | 3531 kCanonicalizationRangeMultiStrings7Size * |
3503 sizeof(MultiCharacterSpecialCase<1>); // NOLINT | 3532 sizeof(MultiCharacterSpecialCase<1>); // NOLINT |
3504 } | 3533 } |
3505 | 3534 |
3506 } // namespace unibrow | 3535 } // namespace unibrow |
OLD | NEW |