Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(289)

Side by Side Diff: tools/servicec/lib/src/resources/cc/unicode.cc

Issue 2035023003: Remove service-compiler related code. (Closed) Base URL: git@github.com:dartino/sdk.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "unicode.h"
6
7 const int8_t Utf8::kTrailBytes[256] = {
8 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
10 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
14 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
15 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
21 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
22 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
23 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
24 };
25
26
27 const uint32_t Utf8::kMagicBits[7] = {
28 0, // Padding.
29 0x00000000,
30 0x00003080,
31 0x000E2080,
32 0x03C82080,
33 0xFA082080,
34 0x82082080
35 };
36
37
38 // Minimum values of code points used to check shortest form.
39 const uint32_t Utf8::kOverlongMinimum[7] = {
40 0, // Padding.
41 0x0,
42 0x80,
43 0x800,
44 0x10000,
45 0xFFFFFFFF,
46 0xFFFFFFFF
47 };
48
49 class CodePointIterator {
50 public:
51 explicit CodePointIterator(List<uint16_t> str)
52 : str_(str),
53 ch_(0),
54 index_(-1),
55 end_(str.length()) {
56 }
57
58 int32_t Current() const {
59 return ch_;
60 }
61
62 bool Next() {
63 intptr_t length = Utf16::Length(ch_);
64 if (index_ < (end_ - length)) {
65 index_ += length;
66 ch_ = str_[index_];
67 if (Utf16::IsLeadSurrogate(ch_) && (index_ < (end_ - 1))) {
68 int32_t ch2 = str_[index_ + 1];
69 if (Utf16::IsTrailSurrogate(ch2)) {
70 ch_ = Utf16::Decode(ch_, ch2);
71 }
72 }
73 return true;
74 }
75 index_ = end_;
76 return false;
77 }
78
79 private:
80 List<uint16_t> str_;
81 int32_t ch_;
82 intptr_t index_;
83 intptr_t end_;
84 };
85
86 // Returns the most restricted coding form in which the sequence of utf8
87 // characters in 'utf8_array' can be represented in, and the number of
88 // code units needed in that form.
89 intptr_t Utf8::CodeUnitCount(const char* utf8_array,
90 intptr_t array_len,
91 Type* type) {
92 intptr_t len = 0;
93 Type char_type = kLatin1;
94 for (intptr_t i = 0; i < array_len; i++) {
95 uint8_t code_unit = utf8_array[i];
96 if (!IsTrailByte(code_unit)) {
97 ++len;
98 if (!IsLatin1SequenceStart(code_unit)) { // > U+00FF
99 if (IsSupplementarySequenceStart(code_unit)) { // >= U+10000
100 char_type = kSupplementary;
101 ++len;
102 } else if (char_type == kLatin1) {
103 char_type = kBMP;
104 }
105 }
106 }
107 }
108 *type = char_type;
109 return len;
110 }
111
112 intptr_t Utf8::Length(int32_t ch) {
113 if (ch <= kMaxOneByteChar) {
114 return 1;
115 } else if (ch <= kMaxTwoByteChar) {
116 return 2;
117 } else if (ch <= kMaxThreeByteChar) {
118 return 3;
119 }
120 return 4;
121 }
122
123 intptr_t Utf8::Length(List<uint16_t> str) {
124 intptr_t length = 0;
125 CodePointIterator it(str);
126 while (it.Next()) {
127 int32_t ch = it.Current();
128 length += Utf8::Length(ch);
129 }
130 return length;
131 }
132
133 intptr_t Utf8::Encode(int32_t ch, char* dst) {
134 static const int kMask = ~(1 << 6);
135 if (ch <= kMaxOneByteChar) {
136 dst[0] = ch;
137 return 1;
138 }
139 if (ch <= kMaxTwoByteChar) {
140 dst[0] = 0xC0 | (ch >> 6);
141 dst[1] = 0x80 | (ch & kMask);
142 return 2;
143 }
144 if (ch <= kMaxThreeByteChar) {
145 dst[0] = 0xE0 | (ch >> 12);
146 dst[1] = 0x80 | ((ch >> 6) & kMask);
147 dst[2] = 0x80 | (ch & kMask);
148 return 3;
149 }
150 dst[0] = 0xF0 | (ch >> 18);
151 dst[1] = 0x80 | ((ch >> 12) & kMask);
152 dst[2] = 0x80 | ((ch >> 6) & kMask);
153 dst[3] = 0x80 | (ch & kMask);
154 return 4;
155 }
156
157 intptr_t Utf8::Encode(List<uint16_t> src, char* dst, intptr_t len) {
158 intptr_t pos = 0;
159 CodePointIterator it(src);
160 while (it.Next()) {
161 int32_t ch = it.Current();
162 intptr_t num_bytes = Utf8::Length(ch);
163 if (pos + num_bytes > len) {
164 break;
165 }
166 Utf8::Encode(ch, &dst[pos]);
167 pos += num_bytes;
168 }
169 return pos;
170 }
171
172 intptr_t Utf8::Decode(const char* utf8_array,
173 intptr_t array_len,
174 int32_t* dst) {
175 uint32_t ch = utf8_array[0] & 0xFF;
176 intptr_t i = 1;
177 if (ch >= 0x80) {
178 intptr_t num_trail_bytes = kTrailBytes[ch];
179 bool is_malformed = false;
180 for (; i < num_trail_bytes; ++i) {
181 if (i < array_len) {
182 uint8_t code_unit = utf8_array[i];
183 is_malformed |= !IsTrailByte(code_unit);
184 ch = (ch << 6) + code_unit;
185 } else {
186 *dst = -1;
187 return 0;
188 }
189 }
190 ch -= kMagicBits[num_trail_bytes];
191 if (!((is_malformed == false) &&
192 (i == num_trail_bytes) &&
193 !Utf::IsOutOfRange(ch) &&
194 !IsNonShortestForm(ch, i) &&
195 !Utf16::IsSurrogate(ch))) {
196 *dst = -1;
197 return 0;
198 }
199 }
200 *dst = ch;
201 return i;
202 }
203
204 bool Utf8::DecodeToUTF16(const char* utf8_array,
205 intptr_t array_len,
206 uint16_t* dst,
207 intptr_t len) {
208 intptr_t i = 0;
209 intptr_t j = 0;
210 intptr_t num_bytes;
211 for (; (i < array_len) && (j < len); i += num_bytes, ++j) {
212 int32_t ch;
213 bool is_supplementary = IsSupplementarySequenceStart(utf8_array[i]);
214 num_bytes = Utf8::Decode(&utf8_array[i], (array_len - i), &ch);
215 if (ch == -1) {
216 return false; // Invalid input.
217 }
218 if (is_supplementary) {
219 Utf16::Encode(ch, &dst[j]);
220 j = j + 1;
221 } else {
222 dst[j] = ch;
223 }
224 }
225 if ((i < array_len) && (j == len)) {
226 return false; // Output overflow.
227 }
228 return true; // Success.
229 }
230
231 void Utf16::Encode(int32_t codepoint, uint16_t* dst) {
232 dst[0] = (Utf16::kLeadSurrogateOffset + (codepoint >> 10));
233 dst[1] = (0xDC00 + (codepoint & 0x3FF));
234 }
OLDNEW
« no previous file with comments | « tools/servicec/lib/src/resources/cc/unicode.h ('k') | tools/servicec/lib/src/resources/dart/struct.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698