src/unicode-decoder.cc - Issue 638643002: Update unicode to 7.0.0.

Unified Diff: src/unicode-decoder.cc

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: addressed comment Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/unicode-decoder.cc

diff --git a/src/unicode-decoder.cc b/src/unicode-decoder.cc

new file mode 100644

index 0000000000000000000000000000000000000000..88eff3ad2660d3b230bfe04922b5c9dfede8a2f9

--- /dev/null

+++ b/src/unicode-decoder.cc

@@ -0,0 +1,78 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "src/unicode-inl.h"

+#include "src/unicode-decoder.h"

+#include <stdio.h>

+#include <stdlib.h>

+namespace unibrow {

+void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,

+ const uint8_t* stream, unsigned stream_length) {

+ // Assume everything will fit in the buffer and stream won't be needed.

+ last_byte_of_buffer_unused_ = false;

+ unbuffered_start_ = NULL;

+ bool writing_to_buffer = true;

+ // Loop until stream is read, writing to buffer as long as buffer has space.

+ unsigned utf16_length = 0;

+ while (stream_length != 0) {

+ unsigned cursor = 0;

+ uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);

+ DCHECK(cursor > 0 && cursor <= stream_length);

+ stream += cursor;

+ stream_length -= cursor;

+ bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;

+ utf16_length += is_two_characters ? 2 : 1;

+ // Don't need to write to the buffer, but still need utf16_length.

+ if (!writing_to_buffer) continue;

+ // Write out the characters to the buffer.

+ // Must check for equality with buffer_length as we've already updated it.

+ if (utf16_length <= buffer_length) {

+ if (is_two_characters) {

+ *buffer++ = Utf16::LeadSurrogate(character);

+ *buffer++ = Utf16::TrailSurrogate(character);

+ } else {

+ *buffer++ = character;

+ }

+ if (utf16_length == buffer_length) {

+ // Just wrote last character of buffer

+ writing_to_buffer = false;

+ unbuffered_start_ = stream;

+ }

+ continue;

+ }

+ // Have gone over buffer.

+ // Last char of buffer is unused, set cursor back.

+ DCHECK(is_two_characters);

+ writing_to_buffer = false;

+ last_byte_of_buffer_unused_ = true;

+ unbuffered_start_ = stream - cursor;

+ }

+ utf16_length_ = utf16_length;

+void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,

+ unsigned data_length) {

+ while (data_length != 0) {

+ unsigned cursor = 0;

+ uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);

+ // There's a total lack of bounds checking for stream

+ // as it was already done in Reset.

+ stream += cursor;

+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {

+ *data++ = Utf16::LeadSurrogate(character);

+ *data++ = Utf16::TrailSurrogate(character);

+ DCHECK(data_length > 1);

+ data_length -= 2;

+ } else {

+ *data++ = character;

+ data_length -= 1;

+ }

+} // namespace unibrow

« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »