OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.utf; | |
6 | |
7 /** | |
8 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
9 * instead. | |
10 */ | |
11 @deprecated | |
12 const int _UTF8_ONE_BYTE_MAX = 0x7f; | |
13 /** | |
14 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
15 * instead. | |
16 */ | |
17 @deprecated | |
18 const int _UTF8_TWO_BYTE_MAX = 0x7ff; | |
19 /** | |
20 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
21 * instead. | |
22 */ | |
23 @deprecated | |
24 const int _UTF8_THREE_BYTE_MAX = 0xffff; | |
25 | |
26 /** | |
27 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
28 * instead. | |
29 */ | |
30 @deprecated | |
31 const int _UTF8_LO_SIX_BIT_MASK = 0x3f; | |
32 | |
33 /** | |
34 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
35 * instead. | |
36 */ | |
37 @deprecated | |
38 const int _UTF8_FIRST_BYTE_OF_TWO_BASE = 0xc0; | |
39 /** | |
40 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
41 * instead. | |
42 */ | |
43 @deprecated | |
44 const int _UTF8_FIRST_BYTE_OF_THREE_BASE = 0xe0; | |
45 /** | |
46 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
47 * instead. | |
48 */ | |
49 @deprecated | |
50 const int _UTF8_FIRST_BYTE_OF_FOUR_BASE = 0xf0; | |
51 /** | |
52 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
53 * instead. | |
54 */ | |
55 @deprecated | |
56 const int _UTF8_FIRST_BYTE_OF_FIVE_BASE = 0xf8; | |
57 /** | |
58 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
59 * instead. | |
60 */ | |
61 @deprecated | |
62 const int _UTF8_FIRST_BYTE_OF_SIX_BASE = 0xfc; | |
63 | |
64 /** | |
65 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
66 * instead. | |
67 */ | |
68 @deprecated | |
69 const int _UTF8_FIRST_BYTE_OF_TWO_MASK = 0x1f; | |
70 /** | |
71 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
72 * instead. | |
73 */ | |
74 @deprecated | |
75 const int _UTF8_FIRST_BYTE_OF_THREE_MASK = 0xf; | |
76 /** | |
77 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
78 * instead. | |
79 */ | |
80 @deprecated | |
81 const int _UTF8_FIRST_BYTE_OF_FOUR_MASK = 0x7; | |
82 | |
83 /** | |
84 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
85 * instead. | |
86 */ | |
87 @deprecated | |
88 const int _UTF8_FIRST_BYTE_BOUND_EXCL = 0xfe; | |
89 /** | |
90 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
91 * instead. | |
92 */ | |
93 @deprecated | |
94 const int _UTF8_SUBSEQUENT_BYTE_BASE = 0x80; | |
95 | |
96 /** | |
97 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
98 * instead. | |
99 */ | |
100 @deprecated | |
101 IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0, | |
102 int length, | |
103 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
104 return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint); | |
105 } | |
106 | |
107 /** | |
108 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
109 * instead. | |
110 */ | |
111 @deprecated | |
112 String decodeUtf8(List<int> bytes, [int offset = 0, int length, | |
113 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
114 return new String.fromCharCodes( | |
115 (new Utf8Decoder(bytes, offset, length, replacementCodepoint)) | |
116 .decodeRest()); | |
117 } | |
118 | |
119 /** | |
120 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
121 * instead. | |
122 */ | |
123 @deprecated | |
124 List<int> encodeUtf8(String str) => | |
125 codepointsToUtf8(stringToCodepoints(str)); | |
126 | |
127 int _addToEncoding(int offset, int bytes, int value, List<int> buffer) { | |
128 while (bytes > 0) { | |
129 buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE | | |
130 (value & _UTF8_LO_SIX_BIT_MASK); | |
131 value = value >> 6; | |
132 bytes--; | |
133 } | |
134 return value; | |
135 } | |
136 | |
137 /** | |
138 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
139 * instead. | |
140 */ | |
141 @deprecated | |
142 List<int> codepointsToUtf8( | |
143 List<int> codepoints, [int offset = 0, int length]) { | |
144 _ListRange source = new _ListRange(codepoints, offset, length); | |
145 | |
146 int encodedLength = 0; | |
147 for (int value in source) { | |
148 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { | |
149 encodedLength += 3; | |
150 } else if (value <= _UTF8_ONE_BYTE_MAX) { | |
151 encodedLength++; | |
152 } else if (value <= _UTF8_TWO_BYTE_MAX) { | |
153 encodedLength += 2; | |
154 } else if (value <= _UTF8_THREE_BYTE_MAX) { | |
155 encodedLength += 3; | |
156 } else if (value <= UNICODE_VALID_RANGE_MAX) { | |
157 encodedLength += 4; | |
158 } | |
159 } | |
160 | |
161 List<int> encoded = new List<int>(encodedLength); | |
162 int insertAt = 0; | |
163 for (int value in source) { | |
164 if (value < 0 || value > UNICODE_VALID_RANGE_MAX) { | |
165 encoded.setRange(insertAt, insertAt + 3, [0xef, 0xbf, 0xbd]); | |
166 insertAt += 3; | |
167 } else if (value <= _UTF8_ONE_BYTE_MAX) { | |
168 encoded[insertAt] = value; | |
169 insertAt++; | |
170 } else if (value <= _UTF8_TWO_BYTE_MAX) { | |
171 encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE | ( | |
172 _UTF8_FIRST_BYTE_OF_TWO_MASK & | |
173 _addToEncoding(insertAt, 1, value, encoded)); | |
174 insertAt += 2; | |
175 } else if (value <= _UTF8_THREE_BYTE_MAX) { | |
176 encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE | ( | |
177 _UTF8_FIRST_BYTE_OF_THREE_MASK & | |
178 _addToEncoding(insertAt, 2, value, encoded)); | |
179 insertAt += 3; | |
180 } else if (value <= UNICODE_VALID_RANGE_MAX) { | |
181 encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE | ( | |
182 _UTF8_FIRST_BYTE_OF_FOUR_MASK & | |
183 _addToEncoding(insertAt, 3, value, encoded)); | |
184 insertAt += 4; | |
185 } | |
186 } | |
187 return encoded; | |
188 } | |
189 | |
190 /** | |
191 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
192 * instead. | |
193 */ | |
194 @deprecated | |
195 List<int> utf8ToCodepoints( | |
196 List<int> utf8EncodedBytes, [int offset = 0, int length, | |
197 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
198 return new Utf8Decoder(utf8EncodedBytes, offset, length, | |
199 replacementCodepoint).decodeRest(); | |
200 } | |
201 | |
202 /** | |
203 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
204 * instead. | |
205 */ | |
206 @deprecated | |
207 class IterableUtf8Decoder extends IterableBase<int> { | |
208 final List<int> bytes; | |
209 final int offset; | |
210 final int length; | |
211 final int replacementCodepoint; | |
212 | |
213 IterableUtf8Decoder(this.bytes, [this.offset = 0, this.length = null, | |
214 this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]); | |
215 | |
216 Utf8Decoder get iterator => | |
217 new Utf8Decoder(bytes, offset, length, replacementCodepoint); | |
218 } | |
219 | |
220 /** | |
221 * *DEPRECATED*: Use `package:utf/utf.dart` or, when applicable, `dart:convert` | |
222 * instead. | |
223 */ | |
224 @deprecated | |
225 class Utf8Decoder implements Iterator<int> { | |
226 final _ListRangeIterator utf8EncodedBytesIterator; | |
227 final int replacementCodepoint; | |
228 int _current = null; | |
229 | |
230 Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length, | |
231 this.replacementCodepoint = | |
232 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
233 utf8EncodedBytesIterator = | |
234 (new _ListRange(utf8EncodedBytes, offset, length)).iterator; | |
235 | |
236 | |
237 Utf8Decoder._fromListRangeIterator(_ListRange source, [ | |
238 this.replacementCodepoint = | |
239 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
240 utf8EncodedBytesIterator = source.iterator; | |
241 | |
242 /** Decode the remaininder of the characters in this decoder | |
243 * into a [List<int>]. | |
244 */ | |
245 List<int> decodeRest() { | |
246 List<int> codepoints = new List<int>(utf8EncodedBytesIterator.remaining); | |
247 int i = 0; | |
248 while (moveNext()) { | |
249 codepoints[i++] = current; | |
250 } | |
251 if (i == codepoints.length) { | |
252 return codepoints; | |
253 } else { | |
254 List<int> truncCodepoints = new List<int>(i); | |
255 truncCodepoints.setRange(0, i, codepoints); | |
256 return truncCodepoints; | |
257 } | |
258 } | |
259 | |
260 int get current => _current; | |
261 | |
262 bool moveNext() { | |
263 _current = null; | |
264 | |
265 if (!utf8EncodedBytesIterator.moveNext()) return false; | |
266 | |
267 int value = utf8EncodedBytesIterator.current; | |
268 int additionalBytes = 0; | |
269 | |
270 if (value < 0) { | |
271 if (replacementCodepoint != null) { | |
272 _current = replacementCodepoint; | |
273 return true; | |
274 } else { | |
275 throw new ArgumentError( | |
276 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); | |
277 } | |
278 } else if (value <= _UTF8_ONE_BYTE_MAX) { | |
279 _current = value; | |
280 return true; | |
281 } else if (value < _UTF8_FIRST_BYTE_OF_TWO_BASE) { | |
282 if (replacementCodepoint != null) { | |
283 _current = replacementCodepoint; | |
284 return true; | |
285 } else { | |
286 throw new ArgumentError( | |
287 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); | |
288 } | |
289 } else if (value < _UTF8_FIRST_BYTE_OF_THREE_BASE) { | |
290 value -= _UTF8_FIRST_BYTE_OF_TWO_BASE; | |
291 additionalBytes = 1; | |
292 } else if (value < _UTF8_FIRST_BYTE_OF_FOUR_BASE) { | |
293 value -= _UTF8_FIRST_BYTE_OF_THREE_BASE; | |
294 additionalBytes = 2; | |
295 } else if (value < _UTF8_FIRST_BYTE_OF_FIVE_BASE) { | |
296 value -= _UTF8_FIRST_BYTE_OF_FOUR_BASE; | |
297 additionalBytes = 3; | |
298 } else if (value < _UTF8_FIRST_BYTE_OF_SIX_BASE) { | |
299 value -= _UTF8_FIRST_BYTE_OF_FIVE_BASE; | |
300 additionalBytes = 4; | |
301 } else if (value < _UTF8_FIRST_BYTE_BOUND_EXCL) { | |
302 value -= _UTF8_FIRST_BYTE_OF_SIX_BASE; | |
303 additionalBytes = 5; | |
304 } else if (replacementCodepoint != null) { | |
305 _current = replacementCodepoint; | |
306 return true; | |
307 } else { | |
308 throw new ArgumentError( | |
309 "Invalid UTF8 at ${utf8EncodedBytesIterator.position}"); | |
310 } | |
311 int j = 0; | |
312 while (j < additionalBytes && utf8EncodedBytesIterator.moveNext()) { | |
313 int nextValue = utf8EncodedBytesIterator.current; | |
314 if (nextValue > _UTF8_ONE_BYTE_MAX && | |
315 nextValue < _UTF8_FIRST_BYTE_OF_TWO_BASE) { | |
316 value = ((value << 6) | (nextValue & _UTF8_LO_SIX_BIT_MASK)); | |
317 } else { | |
318 // if sequence-starting code unit, reposition cursor to start here | |
319 if (nextValue >= _UTF8_FIRST_BYTE_OF_TWO_BASE) { | |
320 utf8EncodedBytesIterator.backup(); | |
321 } | |
322 break; | |
323 } | |
324 j++; | |
325 } | |
326 bool validSequence = (j == additionalBytes && ( | |
327 value < UNICODE_UTF16_RESERVED_LO || | |
328 value > UNICODE_UTF16_RESERVED_HI)); | |
329 bool nonOverlong = | |
330 (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) || | |
331 (additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) || | |
332 (additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX); | |
333 bool inRange = value <= UNICODE_VALID_RANGE_MAX; | |
334 if (validSequence && nonOverlong && inRange) { | |
335 _current = value; | |
336 return true; | |
337 } else if (replacementCodepoint != null) { | |
338 _current = replacementCodepoint; | |
339 return true; | |
340 } else { | |
341 throw new ArgumentError( | |
342 "Invalid UTF8 at ${utf8EncodedBytesIterator.position - j}"); | |
343 } | |
344 } | |
345 } | |
OLD | NEW |