OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 #include "src/unicode-inl.h" | 6 #include "src/unicode-inl.h" |
7 #include "src/unicode-decoder.h" | 7 #include "src/unicode-decoder.h" |
8 #include <stdio.h> | 8 #include <stdio.h> |
9 #include <stdlib.h> | 9 #include <stdlib.h> |
10 | 10 |
11 namespace unibrow { | 11 namespace unibrow { |
12 | 12 |
13 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, | 13 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, |
14 const uint8_t* stream, size_t stream_length) { | 14 const uint8_t* stream, size_t stream_length) { |
15 // Assume everything will fit in the buffer and stream won't be needed. | 15 // Assume everything will fit in the buffer and stream won't be needed. |
16 last_byte_of_buffer_unused_ = false; | 16 last_byte_of_buffer_unused_ = false; |
17 unbuffered_start_ = NULL; | 17 unbuffered_start_ = NULL; |
| 18 unbuffered_length_ = 0; |
18 bool writing_to_buffer = true; | 19 bool writing_to_buffer = true; |
19 // Loop until stream is read, writing to buffer as long as buffer has space. | 20 // Loop until stream is read, writing to buffer as long as buffer has space. |
20 size_t utf16_length = 0; | 21 size_t utf16_length = 0; |
21 while (stream_length != 0) { | 22 while (stream_length != 0) { |
22 size_t cursor = 0; | 23 size_t cursor = 0; |
23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); | 24 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); |
24 DCHECK(cursor > 0 && cursor <= stream_length); | 25 DCHECK(cursor > 0 && cursor <= stream_length); |
25 stream += cursor; | 26 stream += cursor; |
26 stream_length -= cursor; | 27 stream_length -= cursor; |
27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; | 28 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; |
28 utf16_length += is_two_characters ? 2 : 1; | 29 utf16_length += is_two_characters ? 2 : 1; |
29 // Don't need to write to the buffer, but still need utf16_length. | 30 // Don't need to write to the buffer, but still need utf16_length. |
30 if (!writing_to_buffer) continue; | 31 if (!writing_to_buffer) continue; |
31 // Write out the characters to the buffer. | 32 // Write out the characters to the buffer. |
32 // Must check for equality with buffer_length as we've already updated it. | 33 // Must check for equality with buffer_length as we've already updated it. |
33 if (utf16_length <= buffer_length) { | 34 if (utf16_length <= buffer_length) { |
34 if (is_two_characters) { | 35 if (is_two_characters) { |
35 *buffer++ = Utf16::LeadSurrogate(character); | 36 *buffer++ = Utf16::LeadSurrogate(character); |
36 *buffer++ = Utf16::TrailSurrogate(character); | 37 *buffer++ = Utf16::TrailSurrogate(character); |
37 } else { | 38 } else { |
38 *buffer++ = character; | 39 *buffer++ = character; |
39 } | 40 } |
40 if (utf16_length == buffer_length) { | 41 if (utf16_length == buffer_length) { |
41 // Just wrote last character of buffer | 42 // Just wrote last character of buffer |
42 writing_to_buffer = false; | 43 writing_to_buffer = false; |
43 unbuffered_start_ = stream; | 44 unbuffered_start_ = stream; |
| 45 unbuffered_length_ = stream_length; |
44 } | 46 } |
45 continue; | 47 continue; |
46 } | 48 } |
47 // Have gone over buffer. | 49 // Have gone over buffer. |
48 // Last char of buffer is unused, set cursor back. | 50 // Last char of buffer is unused, set cursor back. |
49 DCHECK(is_two_characters); | 51 DCHECK(is_two_characters); |
50 writing_to_buffer = false; | 52 writing_to_buffer = false; |
51 last_byte_of_buffer_unused_ = true; | 53 last_byte_of_buffer_unused_ = true; |
52 unbuffered_start_ = stream - cursor; | 54 unbuffered_start_ = stream - cursor; |
| 55 unbuffered_length_ = stream_length + cursor; |
53 } | 56 } |
54 utf16_length_ = utf16_length; | 57 utf16_length_ = utf16_length; |
55 } | 58 } |
56 | 59 |
57 | 60 |
58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 61 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, |
| 62 size_t stream_length, uint16_t* data, |
59 size_t data_length) { | 63 size_t data_length) { |
60 while (data_length != 0) { | 64 while (data_length != 0) { |
61 size_t cursor = 0; | 65 size_t cursor = 0; |
62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor); | 66 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); |
63 // There's a total lack of bounds checking for stream | 67 // There's a total lack of bounds checking for stream |
64 // as it was already done in Reset. | 68 // as it was already done in Reset. |
65 stream += cursor; | 69 stream += cursor; |
| 70 DCHECK(stream_length >= cursor); |
| 71 stream_length -= cursor; |
66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 72 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { |
67 *data++ = Utf16::LeadSurrogate(character); | 73 *data++ = Utf16::LeadSurrogate(character); |
68 *data++ = Utf16::TrailSurrogate(character); | 74 *data++ = Utf16::TrailSurrogate(character); |
69 DCHECK(data_length > 1); | 75 DCHECK(data_length > 1); |
70 data_length -= 2; | 76 data_length -= 2; |
71 } else { | 77 } else { |
72 *data++ = character; | 78 *data++ = character; |
73 data_length -= 1; | 79 data_length -= 1; |
74 } | 80 } |
75 } | 81 } |
76 } | 82 } |
77 | 83 |
78 } // namespace unibrow | 84 } // namespace unibrow |
OLD | NEW |