OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #ifndef V8_UNICODE_DECODER_H_ |
| 6 #define V8_UNICODE_DECODER_H_ |
| 7 |
| 8 #include <sys/types.h> |
| 9 #include "src/globals.h" |
| 10 |
| 11 namespace unibrow { |
| 12 |
| 13 class Utf8DecoderBase { |
| 14 public: |
| 15 // Initialization done in subclass. |
| 16 inline Utf8DecoderBase(); |
| 17 inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length, |
| 18 const uint8_t* stream, unsigned stream_length); |
| 19 inline unsigned Utf16Length() const { return utf16_length_; } |
| 20 |
| 21 protected: |
| 22 // This reads all characters and sets the utf16_length_. |
| 23 // The first buffer_length utf16 chars are cached in the buffer. |
| 24 void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream, |
| 25 unsigned stream_length); |
| 26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data, |
| 27 unsigned length); |
| 28 const uint8_t* unbuffered_start_; |
| 29 unsigned utf16_length_; |
| 30 bool last_byte_of_buffer_unused_; |
| 31 |
| 32 private: |
| 33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); |
| 34 }; |
| 35 |
| 36 template <unsigned kBufferSize> |
| 37 class Utf8Decoder : public Utf8DecoderBase { |
| 38 public: |
| 39 inline Utf8Decoder() {} |
| 40 inline Utf8Decoder(const char* stream, unsigned length); |
| 41 inline void Reset(const char* stream, unsigned length); |
| 42 inline unsigned WriteUtf16(uint16_t* data, unsigned length) const; |
| 43 |
| 44 private: |
| 45 uint16_t buffer_[kBufferSize]; |
| 46 }; |
| 47 |
| 48 |
| 49 Utf8DecoderBase::Utf8DecoderBase() |
| 50 : unbuffered_start_(NULL), |
| 51 utf16_length_(0), |
| 52 last_byte_of_buffer_unused_(false) {} |
| 53 |
| 54 |
| 55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length, |
| 56 const uint8_t* stream, |
| 57 unsigned stream_length) { |
| 58 Reset(buffer, buffer_length, stream, stream_length); |
| 59 } |
| 60 |
| 61 |
| 62 template <unsigned kBufferSize> |
| 63 Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length) |
| 64 : Utf8DecoderBase(buffer_, kBufferSize, |
| 65 reinterpret_cast<const uint8_t*>(stream), length) {} |
| 66 |
| 67 |
| 68 template <unsigned kBufferSize> |
| 69 void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) { |
| 70 Utf8DecoderBase::Reset(buffer_, kBufferSize, |
| 71 reinterpret_cast<const uint8_t*>(stream), length); |
| 72 } |
| 73 |
| 74 |
| 75 template <unsigned kBufferSize> |
| 76 unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data, |
| 77 unsigned length) const { |
| 78 DCHECK(length > 0); |
| 79 if (length > utf16_length_) length = utf16_length_; |
| 80 // memcpy everything in buffer. |
| 81 unsigned buffer_length = |
| 82 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; |
| 83 unsigned memcpy_length = length <= buffer_length ? length : buffer_length; |
| 84 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); |
| 85 if (length <= buffer_length) return length; |
| 86 DCHECK(unbuffered_start_ != NULL); |
| 87 // Copy the rest the slow way. |
| 88 WriteUtf16Slow(unbuffered_start_, data + buffer_length, |
| 89 length - buffer_length); |
| 90 return length; |
| 91 } |
| 92 |
| 93 class Latin1 { |
| 94 public: |
| 95 static const unsigned kMaxChar = 0xff; |
| 96 // Returns 0 if character does not convert to single latin-1 character |
| 97 // or if the character doesn't not convert back to latin-1 via inverse |
| 98 // operation (upper to lower, etc). |
| 99 static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t); |
| 100 }; |
| 101 |
| 102 |
| 103 uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) { |
| 104 DCHECK(c > Latin1::kMaxChar); |
| 105 switch (c) { |
| 106 // This are equivalent characters in unicode. |
| 107 case 0x39c: |
| 108 case 0x3bc: |
| 109 return 0xb5; |
| 110 // This is an uppercase of a Latin-1 character |
| 111 // outside of Latin-1. |
| 112 case 0x178: |
| 113 return 0xff; |
| 114 } |
| 115 return 0; |
| 116 } |
| 117 |
| 118 |
| 119 } // namespace unibrow |
| 120 |
| 121 #endif // V8_UNICODE_DECODER_H_ |
OLD | NEW |