Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Side by Side Diff: src/unicode-decoder.cc

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comment Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5
6 #include "src/unicode-inl.h"
7 #include "src/unicode-decoder.h"
8 #include <stdio.h>
9 #include <stdlib.h>
10
11 namespace unibrow {
12
13 void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
14 const uint8_t* stream, unsigned stream_length) {
15 // Assume everything will fit in the buffer and stream won't be needed.
16 last_byte_of_buffer_unused_ = false;
17 unbuffered_start_ = NULL;
18 bool writing_to_buffer = true;
19 // Loop until stream is read, writing to buffer as long as buffer has space.
20 unsigned utf16_length = 0;
21 while (stream_length != 0) {
22 unsigned cursor = 0;
23 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
24 DCHECK(cursor > 0 && cursor <= stream_length);
25 stream += cursor;
26 stream_length -= cursor;
27 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
28 utf16_length += is_two_characters ? 2 : 1;
29 // Don't need to write to the buffer, but still need utf16_length.
30 if (!writing_to_buffer) continue;
31 // Write out the characters to the buffer.
32 // Must check for equality with buffer_length as we've already updated it.
33 if (utf16_length <= buffer_length) {
34 if (is_two_characters) {
35 *buffer++ = Utf16::LeadSurrogate(character);
36 *buffer++ = Utf16::TrailSurrogate(character);
37 } else {
38 *buffer++ = character;
39 }
40 if (utf16_length == buffer_length) {
41 // Just wrote last character of buffer
42 writing_to_buffer = false;
43 unbuffered_start_ = stream;
44 }
45 continue;
46 }
47 // Have gone over buffer.
48 // Last char of buffer is unused, set cursor back.
49 DCHECK(is_two_characters);
50 writing_to_buffer = false;
51 last_byte_of_buffer_unused_ = true;
52 unbuffered_start_ = stream - cursor;
53 }
54 utf16_length_ = utf16_length;
55 }
56
57
58 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
59 unsigned data_length) {
60 while (data_length != 0) {
61 unsigned cursor = 0;
62 uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
63 // There's a total lack of bounds checking for stream
64 // as it was already done in Reset.
65 stream += cursor;
66 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
67 *data++ = Utf16::LeadSurrogate(character);
68 *data++ = Utf16::TrailSurrogate(character);
69 DCHECK(data_length > 1);
70 data_length -= 2;
71 } else {
72 *data++ = character;
73 data_length -= 1;
74 }
75 }
76 }
77
78 } // namespace unibrow
OLDNEW
« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698