OLD | NEW |
1 // Copyright 2014 the V8 project authors. All rights reserved. | 1 // Copyright 2014 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef V8_UNICODE_DECODER_H_ | 5 #ifndef V8_UNICODE_DECODER_H_ |
6 #define V8_UNICODE_DECODER_H_ | 6 #define V8_UNICODE_DECODER_H_ |
7 | 7 |
8 #include <sys/types.h> | 8 #include <sys/types.h> |
9 #include "src/globals.h" | 9 #include "src/globals.h" |
10 | 10 |
11 namespace unibrow { | 11 namespace unibrow { |
12 | 12 |
13 class Utf8DecoderBase { | 13 class Utf8DecoderBase { |
14 public: | 14 public: |
15 // Initialization done in subclass. | 15 // Initialization done in subclass. |
16 inline Utf8DecoderBase(); | 16 inline Utf8DecoderBase(); |
17 inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length, | 17 inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
18 const uint8_t* stream, unsigned stream_length); | 18 const uint8_t* stream, size_t stream_length); |
19 inline unsigned Utf16Length() const { return utf16_length_; } | 19 inline size_t Utf16Length() const { return utf16_length_; } |
20 | 20 |
21 protected: | 21 protected: |
22 // This reads all characters and sets the utf16_length_. | 22 // This reads all characters and sets the utf16_length_. |
23 // The first buffer_length utf16 chars are cached in the buffer. | 23 // The first buffer_length utf16 chars are cached in the buffer. |
24 void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream, | 24 void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream, |
25 unsigned stream_length); | 25 size_t stream_length); |
26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data, | 26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data, |
27 unsigned length); | 27 size_t length); |
28 const uint8_t* unbuffered_start_; | 28 const uint8_t* unbuffered_start_; |
29 unsigned utf16_length_; | 29 size_t utf16_length_; |
30 bool last_byte_of_buffer_unused_; | 30 bool last_byte_of_buffer_unused_; |
31 | 31 |
32 private: | 32 private: |
33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); | 33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase); |
34 }; | 34 }; |
35 | 35 |
36 template <unsigned kBufferSize> | 36 template <size_t kBufferSize> |
37 class Utf8Decoder : public Utf8DecoderBase { | 37 class Utf8Decoder : public Utf8DecoderBase { |
38 public: | 38 public: |
39 inline Utf8Decoder() {} | 39 inline Utf8Decoder() {} |
40 inline Utf8Decoder(const char* stream, unsigned length); | 40 inline Utf8Decoder(const char* stream, size_t length); |
41 inline void Reset(const char* stream, unsigned length); | 41 inline void Reset(const char* stream, size_t length); |
42 inline unsigned WriteUtf16(uint16_t* data, unsigned length) const; | 42 inline size_t WriteUtf16(uint16_t* data, size_t length) const; |
43 | 43 |
44 private: | 44 private: |
45 uint16_t buffer_[kBufferSize]; | 45 uint16_t buffer_[kBufferSize]; |
46 }; | 46 }; |
47 | 47 |
48 | 48 |
49 Utf8DecoderBase::Utf8DecoderBase() | 49 Utf8DecoderBase::Utf8DecoderBase() |
50 : unbuffered_start_(NULL), | 50 : unbuffered_start_(NULL), |
51 utf16_length_(0), | 51 utf16_length_(0), |
52 last_byte_of_buffer_unused_(false) {} | 52 last_byte_of_buffer_unused_(false) {} |
53 | 53 |
54 | 54 |
55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length, | 55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length, |
56 const uint8_t* stream, | 56 const uint8_t* stream, size_t stream_length) { |
57 unsigned stream_length) { | |
58 Reset(buffer, buffer_length, stream, stream_length); | 57 Reset(buffer, buffer_length, stream, stream_length); |
59 } | 58 } |
60 | 59 |
61 | 60 |
62 template <unsigned kBufferSize> | 61 template <size_t kBufferSize> |
63 Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length) | 62 Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length) |
64 : Utf8DecoderBase(buffer_, kBufferSize, | 63 : Utf8DecoderBase(buffer_, kBufferSize, |
65 reinterpret_cast<const uint8_t*>(stream), length) {} | 64 reinterpret_cast<const uint8_t*>(stream), length) {} |
66 | 65 |
67 | 66 |
68 template <unsigned kBufferSize> | 67 template <size_t kBufferSize> |
69 void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) { | 68 void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) { |
70 Utf8DecoderBase::Reset(buffer_, kBufferSize, | 69 Utf8DecoderBase::Reset(buffer_, kBufferSize, |
71 reinterpret_cast<const uint8_t*>(stream), length); | 70 reinterpret_cast<const uint8_t*>(stream), length); |
72 } | 71 } |
73 | 72 |
74 | 73 |
75 template <unsigned kBufferSize> | 74 template <size_t kBufferSize> |
76 unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data, | 75 size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data, |
77 unsigned length) const { | 76 size_t length) const { |
78 DCHECK(length > 0); | 77 DCHECK(length > 0); |
79 if (length > utf16_length_) length = utf16_length_; | 78 if (length > utf16_length_) length = utf16_length_; |
80 // memcpy everything in buffer. | 79 // memcpy everything in buffer. |
81 unsigned buffer_length = | 80 size_t buffer_length = |
82 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; | 81 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize; |
83 unsigned memcpy_length = length <= buffer_length ? length : buffer_length; | 82 size_t memcpy_length = length <= buffer_length ? length : buffer_length; |
84 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); | 83 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t)); |
85 if (length <= buffer_length) return length; | 84 if (length <= buffer_length) return length; |
86 DCHECK(unbuffered_start_ != NULL); | 85 DCHECK(unbuffered_start_ != NULL); |
87 // Copy the rest the slow way. | 86 // Copy the rest the slow way. |
88 WriteUtf16Slow(unbuffered_start_, data + buffer_length, | 87 WriteUtf16Slow(unbuffered_start_, data + buffer_length, |
89 length - buffer_length); | 88 length - buffer_length); |
90 return length; | 89 return length; |
91 } | 90 } |
92 | 91 |
93 class Latin1 { | 92 class Latin1 { |
(...skipping 18 matching lines...) Expand all Loading... |
112 case 0x178: | 111 case 0x178: |
113 return 0xff; | 112 return 0xff; |
114 } | 113 } |
115 return 0; | 114 return 0; |
116 } | 115 } |
117 | 116 |
118 | 117 |
119 } // namespace unibrow | 118 } // namespace unibrow |
120 | 119 |
121 #endif // V8_UNICODE_DECODER_H_ | 120 #endif // V8_UNICODE_DECODER_H_ |
OLD | NEW |