| OLD | NEW |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 #include "src/unicode-inl.h" | 6 #include "src/unicode-inl.h" |
| 7 #include "src/unicode-decoder.h" | 7 #include "src/unicode-decoder.h" |
| 8 #include <stdio.h> | 8 #include <stdio.h> |
| 9 #include <stdlib.h> | 9 #include <stdlib.h> |
| 10 | 10 |
| 11 namespace unibrow { | 11 namespace unibrow { |
| 12 | 12 |
| 13 void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length, | 13 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, |
| 14 const uint8_t* stream, unsigned stream_length) { | 14 const uint8_t* stream, size_t stream_length) { |
| 15 // Assume everything will fit in the buffer and stream won't be needed. | 15 // Assume everything will fit in the buffer and stream won't be needed. |
| 16 last_byte_of_buffer_unused_ = false; | 16 last_byte_of_buffer_unused_ = false; |
| 17 unbuffered_start_ = NULL; | 17 unbuffered_start_ = NULL; |
| 18 bool writing_to_buffer = true; | 18 bool writing_to_buffer = true; |
| 19 // Loop until stream is read, writing to buffer as long as buffer has space. | 19 // Loop until stream is read, writing to buffer as long as buffer has space. |
| 20 unsigned utf16_length = 0; | 20 size_t utf16_length = 0; |
| 21 while (stream_length != 0) { | 21 while (stream_length != 0) { |
| 22 unsigned cursor = 0; | 22 size_t cursor = 0; |
| 23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); | 23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); |
| 24 DCHECK(cursor > 0 && cursor <= stream_length); | 24 DCHECK(cursor > 0 && cursor <= stream_length); |
| 25 stream += cursor; | 25 stream += cursor; |
| 26 stream_length -= cursor; | 26 stream_length -= cursor; |
| 27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; | 27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; |
| 28 utf16_length += is_two_characters ? 2 : 1; | 28 utf16_length += is_two_characters ? 2 : 1; |
| 29 // Don't need to write to the buffer, but still need utf16_length. | 29 // Don't need to write to the buffer, but still need utf16_length. |
| 30 if (!writing_to_buffer) continue; | 30 if (!writing_to_buffer) continue; |
| 31 // Write out the characters to the buffer. | 31 // Write out the characters to the buffer. |
| 32 // Must check for equality with buffer_length as we've already updated it. | 32 // Must check for equality with buffer_length as we've already updated it. |
| (...skipping 16 matching lines...) Expand all Loading... |
| 49 DCHECK(is_two_characters); | 49 DCHECK(is_two_characters); |
| 50 writing_to_buffer = false; | 50 writing_to_buffer = false; |
| 51 last_byte_of_buffer_unused_ = true; | 51 last_byte_of_buffer_unused_ = true; |
| 52 unbuffered_start_ = stream - cursor; | 52 unbuffered_start_ = stream - cursor; |
| 53 } | 53 } |
| 54 utf16_length_ = utf16_length; | 54 utf16_length_ = utf16_length; |
| 55 } | 55 } |
| 56 | 56 |
| 57 | 57 |
| 58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data, |
| 59 unsigned data_length) { | 59 size_t data_length) { |
| 60 while (data_length != 0) { | 60 while (data_length != 0) { |
| 61 unsigned cursor = 0; | 61 size_t cursor = 0; |
| 62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor); | 62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor); |
| 63 // There's a total lack of bounds checking for stream | 63 // There's a total lack of bounds checking for stream |
| 64 // as it was already done in Reset. | 64 // as it was already done in Reset. |
| 65 stream += cursor; | 65 stream += cursor; |
| 66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
| 67 *data++ = Utf16::LeadSurrogate(character); | 67 *data++ = Utf16::LeadSurrogate(character); |
| 68 *data++ = Utf16::TrailSurrogate(character); | 68 *data++ = Utf16::TrailSurrogate(character); |
| 69 DCHECK(data_length > 1); | 69 DCHECK(data_length > 1); |
| 70 data_length -= 2; | 70 data_length -= 2; |
| 71 } else { | 71 } else { |
| 72 *data++ = character; | 72 *data++ = character; |
| 73 data_length -= 1; | 73 data_length -= 1; |
| 74 } | 74 } |
| 75 } | 75 } |
| 76 } | 76 } |
| 77 | 77 |
| 78 } // namespace unibrow | 78 } // namespace unibrow |
| OLD | NEW |