Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(791)

Side by Side Diff: src/unicode-decoder.h

Issue 638643002: Update unicode to 7.0.0. (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: addressed comment Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/unicode.cc ('k') | src/unicode-decoder.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_UNICODE_DECODER_H_
6 #define V8_UNICODE_DECODER_H_
7
8 #include <sys/types.h>
9 #include "src/globals.h"
10
11 namespace unibrow {
12
13 class Utf8DecoderBase {
14 public:
15 // Initialization done in subclass.
16 inline Utf8DecoderBase();
17 inline Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
18 const uint8_t* stream, unsigned stream_length);
19 inline unsigned Utf16Length() const { return utf16_length_; }
20
21 protected:
22 // This reads all characters and sets the utf16_length_.
23 // The first buffer_length utf16 chars are cached in the buffer.
24 void Reset(uint16_t* buffer, unsigned buffer_length, const uint8_t* stream,
25 unsigned stream_length);
26 static void WriteUtf16Slow(const uint8_t* stream, uint16_t* data,
27 unsigned length);
28 const uint8_t* unbuffered_start_;
29 unsigned utf16_length_;
30 bool last_byte_of_buffer_unused_;
31
32 private:
33 DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
34 };
35
36 template <unsigned kBufferSize>
37 class Utf8Decoder : public Utf8DecoderBase {
38 public:
39 inline Utf8Decoder() {}
40 inline Utf8Decoder(const char* stream, unsigned length);
41 inline void Reset(const char* stream, unsigned length);
42 inline unsigned WriteUtf16(uint16_t* data, unsigned length) const;
43
44 private:
45 uint16_t buffer_[kBufferSize];
46 };
47
48
49 Utf8DecoderBase::Utf8DecoderBase()
50 : unbuffered_start_(NULL),
51 utf16_length_(0),
52 last_byte_of_buffer_unused_(false) {}
53
54
55 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, unsigned buffer_length,
56 const uint8_t* stream,
57 unsigned stream_length) {
58 Reset(buffer, buffer_length, stream, stream_length);
59 }
60
61
62 template <unsigned kBufferSize>
63 Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, unsigned length)
64 : Utf8DecoderBase(buffer_, kBufferSize,
65 reinterpret_cast<const uint8_t*>(stream), length) {}
66
67
68 template <unsigned kBufferSize>
69 void Utf8Decoder<kBufferSize>::Reset(const char* stream, unsigned length) {
70 Utf8DecoderBase::Reset(buffer_, kBufferSize,
71 reinterpret_cast<const uint8_t*>(stream), length);
72 }
73
74
75 template <unsigned kBufferSize>
76 unsigned Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
77 unsigned length) const {
78 DCHECK(length > 0);
79 if (length > utf16_length_) length = utf16_length_;
80 // memcpy everything in buffer.
81 unsigned buffer_length =
82 last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
83 unsigned memcpy_length = length <= buffer_length ? length : buffer_length;
84 v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
85 if (length <= buffer_length) return length;
86 DCHECK(unbuffered_start_ != NULL);
87 // Copy the rest the slow way.
88 WriteUtf16Slow(unbuffered_start_, data + buffer_length,
89 length - buffer_length);
90 return length;
91 }
92
93 class Latin1 {
94 public:
95 static const unsigned kMaxChar = 0xff;
96 // Returns 0 if character does not convert to single latin-1 character
97 // or if the character doesn't not convert back to latin-1 via inverse
98 // operation (upper to lower, etc).
99 static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
100 };
101
102
103 uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
104 DCHECK(c > Latin1::kMaxChar);
105 switch (c) {
106 // This are equivalent characters in unicode.
107 case 0x39c:
108 case 0x3bc:
109 return 0xb5;
110 // This is an uppercase of a Latin-1 character
111 // outside of Latin-1.
112 case 0x178:
113 return 0xff;
114 }
115 return 0;
116 }
117
118
119 } // namespace unibrow
120
121 #endif // V8_UNICODE_DECODER_H_
OLDNEW
« no previous file with comments | « src/unicode.cc ('k') | src/unicode-decoder.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698