Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Unified Diff: src/unicode-decoder.cc

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comment Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/unicode-decoder.cc
diff --git a/src/unicode-decoder.cc b/src/unicode-decoder.cc
new file mode 100644
index 0000000000000000000000000000000000000000..88eff3ad2660d3b230bfe04922b5c9dfede8a2f9
--- /dev/null
+++ b/src/unicode-decoder.cc
@@ -0,0 +1,78 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+
+#include "src/unicode-inl.h"
+#include "src/unicode-decoder.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+namespace unibrow {
+
+void Utf8DecoderBase::Reset(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream, unsigned stream_length) {
+ // Assume everything will fit in the buffer and stream won't be needed.
+ last_byte_of_buffer_unused_ = false;
+ unbuffered_start_ = NULL;
+ bool writing_to_buffer = true;
+ // Loop until stream is read, writing to buffer as long as buffer has space.
+ unsigned utf16_length = 0;
+ while (stream_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
+ DCHECK(cursor > 0 && cursor <= stream_length);
+ stream += cursor;
+ stream_length -= cursor;
+ bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
+ utf16_length += is_two_characters ? 2 : 1;
+ // Don't need to write to the buffer, but still need utf16_length.
+ if (!writing_to_buffer) continue;
+ // Write out the characters to the buffer.
+ // Must check for equality with buffer_length as we've already updated it.
+ if (utf16_length <= buffer_length) {
+ if (is_two_characters) {
+ *buffer++ = Utf16::LeadSurrogate(character);
+ *buffer++ = Utf16::TrailSurrogate(character);
+ } else {
+ *buffer++ = character;
+ }
+ if (utf16_length == buffer_length) {
+ // Just wrote last character of buffer
+ writing_to_buffer = false;
+ unbuffered_start_ = stream;
+ }
+ continue;
+ }
+ // Have gone over buffer.
+ // Last char of buffer is unused, set cursor back.
+ DCHECK(is_two_characters);
+ writing_to_buffer = false;
+ last_byte_of_buffer_unused_ = true;
+ unbuffered_start_ = stream - cursor;
+ }
+ utf16_length_ = utf16_length;
+}
+
+
+void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+ unsigned data_length) {
+ while (data_length != 0) {
+ unsigned cursor = 0;
+ uint32_t character = Utf8::ValueOf(stream, Utf8::kMaxEncodedSize, &cursor);
+ // There's a total lack of bounds checking for stream
+ // as it was already done in Reset.
+ stream += cursor;
+ if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ *data++ = Utf16::LeadSurrogate(character);
+ *data++ = Utf16::TrailSurrogate(character);
+ DCHECK(data_length > 1);
+ data_length -= 2;
+ } else {
+ *data++ = character;
+ data_length -= 1;
+ }
+ }
+}
+
+} // namespace unibrow
« no previous file with comments | « src/unicode-decoder.h ('k') | src/unicode-inl.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698