src/unicode-decoder.h - Issue 638643002: Update unicode to 7.0.0.

Unified Diff: src/unicode-decoder.h

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: addressed comment Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/unicode-decoder.h

diff --git a/src/unicode-decoder.h b/src/unicode-decoder.h

new file mode 100644

index 0000000000000000000000000000000000000000..35ea30cf1a5ad63ef02a41a9cbc377264edc2904

--- /dev/null

+++ b/src/unicode-decoder.h

@@ -0,0 +1,121 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#ifndef V8_UNICODE_DECODER_H_

+#define V8_UNICODE_DECODER_H_

+#include <sys/types.h>

+#include "src/globals.h"

+namespace unibrow {

+class Utf8DecoderBase {

+ public:

+ // Initialization done in subclass.

+ inline Utf8DecoderBase();

+ inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,

+ const uint8_t* stream, unsigned stream_length);

+ inline unsigned Utf16Length() const { return utf16_length_; }

+ protected:

+ // This reads all characters and sets the utf16_length_.

+ // The first buffer_length utf16 chars are cached in the buffer.

+ void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,

+ unsigned stream_length);

+ static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,

+ unsigned length);

+ const uint8_t* unbuffered_start_;

+ unsigned utf16_length_;

+ bool last_byte_of_buffer_unused_;

+ private:

+ DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);

+};

+template <unsigned kBufferSize>

+class Utf8Decoder : public Utf8DecoderBase {

+ public:

+ inline Utf8Decoder() {}

+ inline Utf8Decoder(const char* stream, unsigned length);

+ inline void Reset(const char* stream, unsigned length);

+ inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;

+ private:

+ uint16_t buffer_[kBufferSize];

+};

+Utf8DecoderBase::Utf8DecoderBase()

+ : unbuffered_start_(NULL),

+ utf16_length_(0),

+ last_byte_of_buffer_unused_(false) {}

+Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,

+ const uint8_t* stream,

+ unsigned stream_length) {

+ Reset(buffer, buffer_length, stream, stream_length);

+template <unsigned kBufferSize>

+Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)

+ : Utf8DecoderBase(buffer_, kBufferSize,

+ reinterpret_cast<const uint8_t*>(stream), length) {}

+template <unsigned kBufferSize>

+void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {

+ Utf8DecoderBase::Reset(buffer_, kBufferSize,

+ reinterpret_cast<const uint8_t*>(stream), length);

+template <unsigned kBufferSize>

+unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,

+ unsigned length) const {

+ DCHECK(length > 0);

+ if (length > utf16_length_) length = utf16_length_;

+ // memcpy everything in buffer.

+ unsigned buffer_length =

+ last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;

+ unsigned memcpy_length = length <= buffer_length ? length : buffer_length;

+ v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));

+ if (length <= buffer_length) return length;

+ DCHECK(unbuffered_start_ != NULL);

+ // Copy the rest the slow way.

+ WriteUtf16Slow(unbuffered_start_, data + buffer_length,

+ length - buffer_length);

+ return length;

+class Latin1 {

+ public:

+ static const unsigned kMaxChar = 0xff;

+ // Returns 0 if character does not convert to single latin-1 character

+ // or if the character doesn't not convert back to latin-1 via inverse

+ // operation (upper to lower, etc).

+ static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);

+};

+uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {

+ DCHECK(c > Latin1::kMaxChar);

+ switch (c) {

+ // This are equivalent characters in unicode.

+ case 0x39c:

+ case 0x3bc:

+ return 0xb5;

+ // This is an uppercase of a Latin-1 character

+ // outside of Latin-1.

+ case 0x178:

+ return 0xff;

+ }

+ return 0;

+} // namespace unibrow

+#endif // V8_UNICODE_DECODER_H_

« no previous file with comments | « src/unicode.cc ('k') | src/unicode-decoder.cc » ('j') | no next file with comments »