| OLD | NEW |
| 1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef V8_UNICODE_DECODER_H_ | 5 #ifndef V8_UNICODE_DECODER_H_ |
| 6 #define V8_UNICODE_DECODER_H_ | 6 #define V8_UNICODE_DECODER_H_ |
| 7 | 7 |
| 8 #include <sys/types.h> | 8 #include <sys/types.h> |
| 9 #include "src/globals.h" | 9 #include "src/globals.h" |
| 10 | 10 |
| 11 namespace unibrow { | 11 namespace unibrow { |
| 12 | 12 |
| 13 class Utf8DecoderBase { | 13 class Utf8DecoderBase { |
| 14 public: | 14 public: |
| 15 // Initialization done in subclass. | 15 // Initialization done in subclass. |
| 16 inline Utf8DecoderBase(); | 16 inline Utf8DecoderBase(); |
| 17 inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, | 17 inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
| 18 const uint8_t* stream, size_t stream_length); | 18 const uint8_t* stream, size_t stream_length); |
| 19 inline size_t Utf16Length() const { return utf16_length_; } | 19 inline size_t Utf16Length() const { return utf16_length_; } |
| 20 | 20 |
| 21 protected: | 21 protected: |
| 22 // This reads all characters and sets the utf16_length_. | 22 // This reads all characters and sets the utf16_length_. |
| 23 // The first buffer_length utf16 chars are cached in the buffer. | 23 // The first buffer_length utf16 chars are cached in the buffer. |
| 24 void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream, | 24 void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream, |
| 25 size_t stream_length); | 25 size_t stream_length); |
| 26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 26 static void WriteUtf16Slow(const uint8_t* stream, size_t stream_length, |
| 27 size_t length); | 27 uint16_t* data, size_t length); |
| 28 const uint8_t* unbuffered_start_; | 28 const uint8_t* unbuffered_start_; |
| 29 size_t unbuffered_length_; |
| 29 size_t utf16_length_; | 30 size_t utf16_length_; |
| 30 bool last_byte_of_buffer_unused_; | 31 bool last_byte_of_buffer_unused_; |
| 31 | 32 |
| 32 private: | 33 private: |
| 33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); | 34 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); |
| 34 }; | 35 }; |
| 35 | 36 |
| 36 template <size_t kBufferSize> | 37 template <size_t kBufferSize> |
| 37 class Utf8Decoder : public Utf8DecoderBase { | 38 class Utf8Decoder : public Utf8DecoderBase { |
| 38 public: | 39 public: |
| 39 inline Utf8Decoder() {} | 40 inline Utf8Decoder() {} |
| 40 inline Utf8Decoder(const char* stream, size_t length); | 41 inline Utf8Decoder(const char* stream, size_t length); |
| 41 inline void Reset(const char* stream, size_t length); | 42 inline void Reset(const char* stream, size_t length); |
| 42 inline size_t WriteUtf16(uint16_t* data, size_t length) const; | 43 inline size_t WriteUtf16(uint16_t* data, size_t length) const; |
| 43 | 44 |
| 44 private: | 45 private: |
| 45 uint16_t buffer_[kBufferSize]; | 46 uint16_t buffer_[kBufferSize]; |
| 46 }; | 47 }; |
| 47 | 48 |
| 48 | 49 |
| 49 Utf8DecoderBase::Utf8DecoderBase() | 50 Utf8DecoderBase::Utf8DecoderBase() |
| 50 : unbuffered_start_(NULL), | 51 : unbuffered_start_(NULL), |
| 52 unbuffered_length_(0), |
| 51 utf16_length_(0), | 53 utf16_length_(0), |
| 52 last_byte_of_buffer_unused_(false) {} | 54 last_byte_of_buffer_unused_(false) {} |
| 53 | 55 |
| 54 | 56 |
| 55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, | 57 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
| 56 const uint8_t* stream, size_t stream_length) { | 58 const uint8_t* stream, size_t stream_length) { |
| 57 Reset(buffer, buffer_length, stream, stream_length); | 59 Reset(buffer, buffer_length, stream, stream_length); |
| 58 } | 60 } |
| 59 | 61 |
| 60 | 62 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 77 DCHECK(length > 0); | 79 DCHECK(length > 0); |
| 78 if (length > utf16_length_) length = utf16_length_; | 80 if (length > utf16_length_) length = utf16_length_; |
| 79 // memcpy everything in buffer. | 81 // memcpy everything in buffer. |
| 80 size_t buffer_length = | 82 size_t buffer_length = |
| 81 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; | 83 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; |
| 82 size_t memcpy_length = length <= buffer_length ? length : buffer_length; | 84 size_t memcpy_length = length <= buffer_length ? length : buffer_length; |
| 83 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); | 85 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); |
| 84 if (length <= buffer_length) return length; | 86 if (length <= buffer_length) return length; |
| 85 DCHECK(unbuffered_start_ != NULL); | 87 DCHECK(unbuffered_start_ != NULL); |
| 86 // Copy the rest the slow way. | 88 // Copy the rest the slow way. |
| 87 WriteUtf16Slow(unbuffered_start_, data + buffer_length, | 89 WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length, |
| 88 length - buffer_length); | 90 length - buffer_length); |
| 89 return length; | 91 return length; |
| 90 } | 92 } |
| 91 | 93 |
| 92 class Latin1 { | 94 class Latin1 { |
| 93 public: | 95 public: |
| 94 static const unsigned kMaxChar = 0xff; | 96 static const unsigned kMaxChar = 0xff; |
| 95 // Returns 0 if character does not convert to single latin-1 character | 97 // Returns 0 if character does not convert to single latin-1 character |
| 96 // or if the character doesn't not convert back to latin-1 via inverse | 98 // or if the character doesn't not convert back to latin-1 via inverse |
| 97 // operation (upper to lower, etc). | 99 // operation (upper to lower, etc). |
| (...skipping 13 matching lines...) Expand all Loading... |
| 111 case 0x178: | 113 case 0x178: |
| 112 return 0xff; | 114 return 0xff; |
| 113 } | 115 } |
| 114 return 0; | 116 return 0; |
| 115 } | 117 } |
| 116 | 118 |
| 117 | 119 |
| 118 } // namespace unibrow | 120 } // namespace unibrow |
| 119 | 121 |
| 120 #endif // V8_UNICODE_DECODER_H_ | 122 #endif // V8_UNICODE_DECODER_H_ |
| OLD | NEW |