OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_DECODER_H_ | 5 #ifndef V8_UNICODE_DECODER_H_ |
6 #define V8_UNICODE_DECODER_H_ | 6 #define V8_UNICODE_DECODER_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 | 10 |
11 namespace unibrow { | 11 namespace unibrow { |
12 | 12 |
13 class Utf8DecoderBase { | 13 class Utf8DecoderBase { |
14 public: | 14 public: |
15 // Initialization done in subclass. | 15 // Initialization done in subclass. |
16 inline Utf8DecoderBase(); | 16 inline Utf8DecoderBase(); |
17 inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, | 17 inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
18 const uint8_t* stream, size_t stream_length); | 18 const uint8_t* stream, size_t stream_length); |
19 inline size_t Utf16Length() const { return utf16_length_; } | 19 inline size_t Utf16Length() const { return utf16_length_; } |
20 | 20 |
21 protected: | 21 protected: |
22 // This reads all characters and sets the utf16_length_. | 22 // This reads all characters and sets the utf16_length_. |
23 // The first buffer_length utf16 chars are cached in the buffer. | 23 // The first buffer_length utf16 chars are cached in the buffer. |
24 void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream, | 24 void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream, |
25 size_t stream_length); | 25 size_t stream_length); |
26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 26 static void WriteUtf16Slow(const uint8_t* stream, size_t stream_length, |
27 size_t length); | 27 uint16_t* data, size_t length); |
28 const uint8_t* unbuffered_start_; | 28 const uint8_t* unbuffered_start_; |
| 29 size_t unbuffered_length_; |
29 size_t utf16_length_; | 30 size_t utf16_length_; |
30 bool last_byte_of_buffer_unused_; | 31 bool last_byte_of_buffer_unused_; |
31 | 32 |
32 private: | 33 private: |
33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); | 34 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); |
34 }; | 35 }; |
35 | 36 |
36 template <size_t kBufferSize> | 37 template <size_t kBufferSize> |
37 class Utf8Decoder : public Utf8DecoderBase { | 38 class Utf8Decoder : public Utf8DecoderBase { |
38 public: | 39 public: |
39 inline Utf8Decoder() {} | 40 inline Utf8Decoder() {} |
40 inline Utf8Decoder(const char* stream, size_t length); | 41 inline Utf8Decoder(const char* stream, size_t length); |
41 inline void Reset(const char* stream, size_t length); | 42 inline void Reset(const char* stream, size_t length); |
42 inline size_t WriteUtf16(uint16_t* data, size_t length) const; | 43 inline size_t WriteUtf16(uint16_t* data, size_t length) const; |
43 | 44 |
44 private: | 45 private: |
45 uint16_t buffer_[kBufferSize]; | 46 uint16_t buffer_[kBufferSize]; |
46 }; | 47 }; |
47 | 48 |
48 | 49 |
49 Utf8DecoderBase::Utf8DecoderBase() | 50 Utf8DecoderBase::Utf8DecoderBase() |
50 : unbuffered_start_(NULL), | 51 : unbuffered_start_(NULL), |
| 52 unbuffered_length_(0), |
51 utf16_length_(0), | 53 utf16_length_(0), |
52 last_byte_of_buffer_unused_(false) {} | 54 last_byte_of_buffer_unused_(false) {} |
53 | 55 |
54 | 56 |
55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, | 57 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
56 const uint8_t* stream, size_t stream_length) { | 58 const uint8_t* stream, size_t stream_length) { |
57 Reset(buffer, buffer_length, stream, stream_length); | 59 Reset(buffer, buffer_length, stream, stream_length); |
58 } | 60 } |
59 | 61 |
60 | 62 |
(...skipping 16 matching lines...) Expand all Loading... |
77 DCHECK(length > 0); | 79 DCHECK(length > 0); |
78 if (length > utf16_length_) length = utf16_length_; | 80 if (length > utf16_length_) length = utf16_length_; |
79 // memcpy everything in buffer. | 81 // memcpy everything in buffer. |
80 size_t buffer_length = | 82 size_t buffer_length = |
81 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; | 83 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; |
82 size_t memcpy_length = length <= buffer_length ? length : buffer_length; | 84 size_t memcpy_length = length <= buffer_length ? length : buffer_length; |
83 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); | 85 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); |
84 if (length <= buffer_length) return length; | 86 if (length <= buffer_length) return length; |
85 DCHECK(unbuffered_start_ != NULL); | 87 DCHECK(unbuffered_start_ != NULL); |
86 // Copy the rest the slow way. | 88 // Copy the rest the slow way. |
87 WriteUtf16Slow(unbuffered_start_, data + buffer_length, | 89 WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length, |
88 length - buffer_length); | 90 length - buffer_length); |
89 return length; | 91 return length; |
90 } | 92 } |
91 | 93 |
92 class Latin1 { | 94 class Latin1 { |
93 public: | 95 public: |
94 static const unsigned kMaxChar = 0xff; | 96 static const unsigned kMaxChar = 0xff; |
95 // Returns 0 if character does not convert to single latin-1 character | 97 // Returns 0 if character does not convert to single latin-1 character |
96 // or if the character doesn't not convert back to latin-1 via inverse | 98 // or if the character doesn't not convert back to latin-1 via inverse |
97 // operation (upper to lower, etc). | 99 // operation (upper to lower, etc). |
(...skipping 13 matching lines...) Expand all Loading... |
111 case 0x178: | 113 case 0x178: |
112 return 0xff; | 114 return 0xff; |
113 } | 115 } |
114 return 0; | 116 return 0; |
115 } | 117 } |
116 | 118 |
117 | 119 |
118 } // namespace unibrow | 120 } // namespace unibrow |
119 | 121 |
120 #endif // V8_UNICODE_DECODER_H_ | 122 #endif // V8_UNICODE_DECODER_H_ |
OLD | NEW |