| OLD | NEW | 
| (Empty) |  | 
 |   1 // Copyright 2014 the V8 project authors. All rights reserved. | 
 |   2 // Use of this source code is governed by a BSD-style license that can be | 
 |   3 // found in the LICENSE file. | 
 |   4  | 
 |   5  | 
 |   6 #include "src/unicode-inl.h" | 
 |   7 #include "src/unicode-decoder.h" | 
 |   8 #include <stdio.h> | 
 |   9 #include <stdlib.h> | 
 |  10  | 
 |  11 namespace unibrow { | 
 |  12  | 
 |  13 void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length, | 
 |  14                             const uint8_t* stream, unsigned stream_length) { | 
 |  15   // Assume everything will fit in the buffer and stream won't be needed. | 
 |  16   last_byte_of_buffer_unused_ = false; | 
 |  17   unbuffered_start_ = NULL; | 
 |  18   bool writing_to_buffer = true; | 
 |  19   // Loop until stream is read, writing to buffer as long as buffer has space. | 
 |  20   unsigned utf16_length = 0; | 
 |  21   while (stream_length != 0) { | 
 |  22     unsigned cursor = 0; | 
 |  23     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); | 
 |  24     DCHECK(cursor > 0 && cursor <= stream_length); | 
 |  25     stream += cursor; | 
 |  26     stream_length -= cursor; | 
 |  27     bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; | 
 |  28     utf16_length += is_two_characters ? 2 : 1; | 
 |  29     // Don't need to write to the buffer, but still need utf16_length. | 
 |  30     if (!writing_to_buffer) continue; | 
 |  31     // Write out the characters to the buffer. | 
 |  32     // Must check for equality with buffer_length as we've already updated it. | 
 |  33     if (utf16_length <= buffer_length) { | 
 |  34       if (is_two_characters) { | 
 |  35         *buffer++ = Utf16::LeadSurrogate(character); | 
 |  36         *buffer++ = Utf16::TrailSurrogate(character); | 
 |  37       } else { | 
 |  38         *buffer++ = character; | 
 |  39       } | 
 |  40       if (utf16_length == buffer_length) { | 
 |  41         // Just wrote last character of buffer | 
 |  42         writing_to_buffer = false; | 
 |  43         unbuffered_start_ = stream; | 
 |  44       } | 
 |  45       continue; | 
 |  46     } | 
 |  47     // Have gone over buffer. | 
 |  48     // Last char of buffer is unused, set cursor back. | 
 |  49     DCHECK(is_two_characters); | 
 |  50     writing_to_buffer = false; | 
 |  51     last_byte_of_buffer_unused_ = true; | 
 |  52     unbuffered_start_ = stream - cursor; | 
 |  53   } | 
 |  54   utf16_length_ = utf16_length; | 
 |  55 } | 
 |  56  | 
 |  57  | 
 |  58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 
 |  59                                      unsigned data_length) { | 
 |  60   while (data_length != 0) { | 
 |  61     unsigned cursor = 0; | 
 |  62     uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor); | 
 |  63     // There's a total lack of bounds checking for stream | 
 |  64     // as it was already done in Reset. | 
 |  65     stream += cursor; | 
 |  66     if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { | 
 |  67       *data++ = Utf16::LeadSurrogate(character); | 
 |  68       *data++ = Utf16::TrailSurrogate(character); | 
 |  69       DCHECK(data_length > 1); | 
 |  70       data_length -= 2; | 
 |  71     } else { | 
 |  72       *data++ = character; | 
 |  73       data_length -= 1; | 
 |  74     } | 
 |  75   } | 
 |  76 } | 
 |  77  | 
 |  78 }  // namespace unibrow | 
| OLD | NEW |