| Index: src/unicode-decoder.cc
 | 
| diff --git a/src/unicode-decoder.cc b/src/unicode-decoder.cc
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..88eff3ad2660d3b230bfe04922b5c9dfede8a2f9
 | 
| --- /dev/null
 | 
| +++ b/src/unicode-decoder.cc
 | 
| @@ -0,0 +1,78 @@
 | 
| +// Copyright 2014 the V8 project authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +
 | 
| +#include "src/unicode-inl.h"
 | 
| +#include "src/unicode-decoder.h"
 | 
| +#include <stdio.h>
 | 
| +#include <stdlib.h>
 | 
| +
 | 
| +namespace unibrow {
 | 
| +
 | 
| +void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
 | 
| +                            const uint8_t* stream, unsigned stream_length) {
 | 
| +  // Assume everything will fit in the buffer and stream won't be needed.
 | 
| +  last_byte_of_buffer_unused_ = false;
 | 
| +  unbuffered_start_ = NULL;
 | 
| +  bool writing_to_buffer = true;
 | 
| +  // Loop until stream is read, writing to buffer as long as buffer has space.
 | 
| +  unsigned utf16_length = 0;
 | 
| +  while (stream_length != 0) {
 | 
| +    unsigned cursor = 0;
 | 
| +    uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
 | 
| +    DCHECK(cursor > 0 && cursor <= stream_length);
 | 
| +    stream += cursor;
 | 
| +    stream_length -= cursor;
 | 
| +    bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
 | 
| +    utf16_length += is_two_characters ? 2 : 1;
 | 
| +    // Don't need to write to the buffer, but still need utf16_length.
 | 
| +    if (!writing_to_buffer) continue;
 | 
| +    // Write out the characters to the buffer.
 | 
| +    // Must check for equality with buffer_length as we've already updated it.
 | 
| +    if (utf16_length <= buffer_length) {
 | 
| +      if (is_two_characters) {
 | 
| +        *buffer++ = Utf16::LeadSurrogate(character);
 | 
| +        *buffer++ = Utf16::TrailSurrogate(character);
 | 
| +      } else {
 | 
| +        *buffer++ = character;
 | 
| +      }
 | 
| +      if (utf16_length == buffer_length) {
 | 
| +        // Just wrote last character of buffer
 | 
| +        writing_to_buffer = false;
 | 
| +        unbuffered_start_ = stream;
 | 
| +      }
 | 
| +      continue;
 | 
| +    }
 | 
| +    // Have gone over buffer.
 | 
| +    // Last char of buffer is unused, set cursor back.
 | 
| +    DCHECK(is_two_characters);
 | 
| +    writing_to_buffer = false;
 | 
| +    last_byte_of_buffer_unused_ = true;
 | 
| +    unbuffered_start_ = stream - cursor;
 | 
| +  }
 | 
| +  utf16_length_ = utf16_length;
 | 
| +}
 | 
| +
 | 
| +
 | 
| +void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
 | 
| +                                     unsigned data_length) {
 | 
| +  while (data_length != 0) {
 | 
| +    unsigned cursor = 0;
 | 
| +    uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
 | 
| +    // There's a total lack of bounds checking for stream
 | 
| +    // as it was already done in Reset.
 | 
| +    stream += cursor;
 | 
| +    if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
 | 
| +      *data++ = Utf16::LeadSurrogate(character);
 | 
| +      *data++ = Utf16::TrailSurrogate(character);
 | 
| +      DCHECK(data_length > 1);
 | 
| +      data_length -= 2;
 | 
| +    } else {
 | 
| +      *data++ = character;
 | 
| +      data_length -= 1;
 | 
| +    }
 | 
| +  }
 | 
| +}
 | 
| +
 | 
| +}  // namespace unibrow
 | 
| 
 |