src/unicode-decoder.cc - Issue 638643002: Update unicode to 7.0.0.

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Side by Side Diff: src/unicode-decoder.cc

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: addressed comment Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright 2014 the V8 project authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5

	6 #include "src/unicode-inl.h"

	7 #include "src/unicode-decoder.h"

	8 #include <stdio.h>

	9 #include <stdlib.h>

	10

	11 namespace unibrow {

	12

	13 void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,

	14 const uint8_t* stream, unsigned stream_length) {

	15 // Assume everything will fit in the buffer and stream won't be needed.

	16 last_byte_of_buffer_unused_ = false;

	17 unbuffered_start_ = NULL;

	18 bool writing_to_buffer = true;

	19 // Loop until stream is read, writing to buffer as long as buffer has space.

	20 unsigned utf16_length = 0;

	21 while (stream_length != 0) {

	22 unsigned cursor = 0;

	23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);

	24 DCHECK(cursor > 0 && cursor <= stream_length);

	25 stream += cursor;

	26 stream_length -= cursor;

	27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;

	28 utf16_length += is_two_characters ? 2 : 1;

	29 // Don't need to write to the buffer, but still need utf16_length.

	30 if (!writing_to_buffer) continue;

	31 // Write out the characters to the buffer.

	32 // Must check for equality with buffer_length as we've already updated it.

	33 if (utf16_length <= buffer_length) {

	34 if (is_two_characters) {

	35 *buffer++ = Utf16::LeadSurrogate(character);

	36 *buffer++ = Utf16::TrailSurrogate(character);

	37 } else {

	38 *buffer++ = character;

	39 }

	40 if (utf16_length == buffer_length) {

	41 // Just wrote last character of buffer

	42 writing_to_buffer = false;

	43 unbuffered_start_ = stream;

	44 }

	45 continue;

	46 }

	47 // Have gone over buffer.

	48 // Last char of buffer is unused, set cursor back.

	49 DCHECK(is_two_characters);

	50 writing_to_buffer = false;

	51 last_byte_of_buffer_unused_ = true;

	52 unbuffered_start_ = stream - cursor;

	53 }

	54 utf16_length_ = utf16_length;

	55 }

	56

	57

	58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,

	59 unsigned data_length) {

	60 while (data_length != 0) {

	61 unsigned cursor = 0;

	62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);

	63 // There's a total lack of bounds checking for stream

	64 // as it was already done in Reset.

	65 stream += cursor;

	66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {

	67 *data++ = Utf16::LeadSurrogate(character);

	68 *data++ = Utf16::TrailSurrogate(character);

	69 DCHECK(data_length > 1);

	70 data_length -= 2;

	71 } else {

	72 *data++ = character;

	73 data_length -= 1;

	74 }

	75 }

	76 }

	77

	78 } // namespace unibrow

OLD	NEW

« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »