Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(119)

Unified Diff: src/unicode-decoder.h

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comment Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/unicode.cc ('k') | src/unicode-decoder.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/unicode-decoder.h
diff --git a/src/unicode-decoder.h b/src/unicode-decoder.h
new file mode 100644
index 0000000000000000000000000000000000000000..35ea30cf1a5ad63ef02a41a9cbc377264edc2904
--- /dev/null
+++ b/src/unicode-decoder.h
@@ -0,0 +1,121 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_UNICODE_DECODER_H_
+#define V8_UNICODE_DECODER_H_
+
+#include <sys/types.h>
+#include "src/globals.h"
+
+namespace unibrow {
+
+class Utf8DecoderBase {
+ public:
+ // Initialization done in subclass.
+ inline Utf8DecoderBase();
+ inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream, unsigned stream_length);
+ inline unsigned Utf16Length() const { return utf16_length_; }
+
+ protected:
+ // This reads all characters and sets the utf16_length_.
+ // The first buffer_length utf16 chars are cached in the buffer.
+ void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,
+ unsigned stream_length);
+ static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
+ unsigned length);
+ const uint8_t* unbuffered_start_;
+ unsigned utf16_length_;
+ bool last_byte_of_buffer_unused_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
+};
+
+template <unsigned kBufferSize>
+class Utf8Decoder : public Utf8DecoderBase {
+ public:
+ inline Utf8Decoder() {}
+ inline Utf8Decoder(const char* stream, unsigned length);
+ inline void Reset(const char* stream, unsigned length);
+ inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
+
+ private:
+ uint16_t buffer_[kBufferSize];
+};
+
+
+Utf8DecoderBase::Utf8DecoderBase()
+ : unbuffered_start_(NULL),
+ utf16_length_(0),
+ last_byte_of_buffer_unused_(false) {}
+
+
+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
+ const uint8_t* stream,
+ unsigned stream_length) {
+ Reset(buffer, buffer_length, stream, stream_length);
+}
+
+
+template <unsigned kBufferSize>
+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
+ : Utf8DecoderBase(buffer_, kBufferSize,
+ reinterpret_cast<const uint8_t*>(stream), length) {}
+
+
+template <unsigned kBufferSize>
+void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
+ Utf8DecoderBase::Reset(buffer_, kBufferSize,
+ reinterpret_cast<const uint8_t*>(stream), length);
+}
+
+
+template <unsigned kBufferSize>
+unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
+ unsigned length) const {
+ DCHECK(length > 0);
+ if (length > utf16_length_) length = utf16_length_;
+ // memcpy everything in buffer.
+ unsigned buffer_length =
+ last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
+ unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
+ v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
+ if (length <= buffer_length) return length;
+ DCHECK(unbuffered_start_ != NULL);
+ // Copy the rest the slow way.
+ WriteUtf16Slow(unbuffered_start_, data + buffer_length,
+ length - buffer_length);
+ return length;
+}
+
+class Latin1 {
+ public:
+ static const unsigned kMaxChar = 0xff;
+ // Returns 0 if character does not convert to single latin-1 character
+ // or if the character doesn't not convert back to latin-1 via inverse
+ // operation (upper to lower, etc).
+ static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
+};
+
+
+uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
+ DCHECK(c > Latin1::kMaxChar);
+ switch (c) {
+ // This are equivalent characters in unicode.
+ case 0x39c:
+ case 0x3bc:
+ return 0xb5;
+ // This is an uppercase of a Latin-1 character
+ // outside of Latin-1.
+ case 0x178:
+ return 0xff;
+ }
+ return 0;
+}
+
+
+} // namespace unibrow
+
+#endif // V8_UNICODE_DECODER_H_
« no previous file with comments | « src/unicode.cc ('k') | src/unicode-decoder.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698