Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(691)

Side by Side Diff: packages/utf/lib/src/utf/utf32.dart

Issue 2989763002: Update charted to 0.4.8 and roll (Closed)
Patch Set: Removed Cutch from list of reviewers Created 3 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « packages/utf/lib/src/utf/utf16.dart ('k') | packages/utf/lib/src/utf/utf8.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 part of utf;
6
7 /**
8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert
9 * as much of the input as needed. Determines the byte order from the BOM,
10 * or uses big-endian as a default. This method always strips a leading BOM.
11 * Set the replacementCharacter to null to throw an ArgumentError
12 * rather than replace the bad value.
13 */
14 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [
15 int offset = 0, int length,
16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
17 return new IterableUtf32Decoder._(
18 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint));
19 }
20
21 /**
22 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver t
23 * as much of the input as needed. This method strips a leading BOM by default,
24 * but can be overridden by setting the optional parameter [stripBom] to false.
25 * Set the replacementCharacter to null to throw an ArgumentError
26 * rather than replace the bad value.
27 */
28 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [
29 int offset = 0, int length, bool stripBom = true,
30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
31 return new IterableUtf32Decoder._(
32 () => new Utf32beBytesDecoder(bytes, offset, length, stripBom,
33 replacementCodepoint));
34 }
35
36 /**
37 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver t
38 * as much of the input as needed. This method strips a leading BOM by default,
39 * but can be overridden by setting the optional parameter [stripBom] to false.
40 * Set the replacementCharacter to null to throw an ArgumentError
41 * rather than replace the bad value.
42 */
43 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [
44 int offset = 0, int length, bool stripBom = true,
45 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
46 return new IterableUtf32Decoder._(
47 () => new Utf32leBytesDecoder(bytes, offset, length, stripBom,
48 replacementCodepoint));
49 }
50
51 /**
52 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters
53 * allow an offset into a list of bytes (as int), limiting the length of the
54 * values be decoded and the ability of override the default Unicode
55 * replacement character. Set the replacementCharacter to null to throw an
56 * ArgumentError rather than replace the bad value.
57 */
58 String decodeUtf32(List<int> bytes, [int offset = 0, int length,
59 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
60 return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length,
61 replacementCodepoint)).decodeRest());
62 }
63 /**
64 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters
65 * allow an offset into a list of bytes (as int), limiting the length of the
66 * values be decoded and the ability of override the default Unicode
67 * replacement character. Set the replacementCharacter to null to throw an
68 * ArgumentError rather than replace the bad value.
69 */
70 String decodeUtf32be(
71 List<int> bytes, [int offset = 0, int length, bool stripBom = true,
72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
73 new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length,
74 stripBom, replacementCodepoint)).decodeRest());
75
76 /**
77 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters
78 * allow an offset into a list of bytes (as int), limiting the length of the
79 * values be decoded and the ability of override the default Unicode
80 * replacement character. Set the replacementCharacter to null to throw an
81 * ArgumentError rather than replace the bad value.
82 */
83 String decodeUtf32le(
84 List<int> bytes, [int offset = 0, int length, bool stripBom = true,
85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
86 new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length,
87 stripBom, replacementCodepoint)).decodeRest());
88
89 /**
90 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting
91 * bytes with a big-endian byte-order-marker.
92 */
93 List<int> encodeUtf32(String str) =>
94 encodeUtf32be(str, true);
95
96 /**
97 * Produce a list of UTF-32BE encoded bytes. By default, this method produces
98 * UTF-32BE bytes with no BOM.
99 */
100 List<int> encodeUtf32be(String str, [bool writeBOM = false]) {
101 List<int> utf32CodeUnits = stringToCodepoints(str);
102 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
103 (writeBOM ? 4 : 0));
104 int i = 0;
105 if (writeBOM) {
106 encoding[i++] = 0;
107 encoding[i++] = 0;
108 encoding[i++] = UNICODE_UTF_BOM_HI;
109 encoding[i++] = UNICODE_UTF_BOM_LO;
110 }
111 for (int unit in utf32CodeUnits) {
112 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;
113 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;
114 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;
115 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;
116 }
117 return encoding;
118 }
119
120 /**
121 * Produce a list of UTF-32LE encoded bytes. By default, this method produces
122 * UTF-32BE bytes with no BOM.
123 */
124 List<int> encodeUtf32le(String str, [bool writeBOM = false]) {
125 List<int> utf32CodeUnits = stringToCodepoints(str);
126 List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
127 (writeBOM ? 4 : 0));
128 int i = 0;
129 if (writeBOM) {
130 encoding[i++] = UNICODE_UTF_BOM_LO;
131 encoding[i++] = UNICODE_UTF_BOM_HI;
132 encoding[i++] = 0;
133 encoding[i++] = 0;
134 }
135 for (int unit in utf32CodeUnits) {
136 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK;
137 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK;
138 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK;
139 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK;
140 }
141 return encoding;
142 }
143
144 /**
145 * Identifies whether a List of bytes starts (based on offset) with a
146 * byte-order marker (BOM).
147 */
148 bool hasUtf32Bom(
149 List<int> utf32EncodedBytes, [int offset = 0, int length]) {
150 return hasUtf32beBom(utf32EncodedBytes, offset, length) ||
151 hasUtf32leBom(utf32EncodedBytes, offset, length);
152 }
153
154 /**
155 * Identifies whether a List of bytes starts (based on offset) with a
156 * big-endian byte-order marker (BOM).
157 */
158 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {
159 int end = length != null ? offset + length : utf32EncodedBytes.length;
160 return (offset + 4) <= end &&
161 utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 &&
162 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI &&
163 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO;
164 }
165
166 /**
167 * Identifies whether a List of bytes starts (based on offset) with a
168 * little-endian byte-order marker (BOM).
169 */
170 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {
171 int end = length != null ? offset + length : utf32EncodedBytes.length;
172 return (offset + 4) <= end &&
173 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&
174 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI &&
175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;
176 }
177
178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider();
179
180 /**
181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type
182 * provides an iterator on demand and the iterator will only translate bytes
183 * as requested by the user of the iterator. (Note: results are not cached.)
184 */
185 // TODO(floitsch): Consider removing the extend and switch to implements since
186 // that's cheaper to allocate.
187 class IterableUtf32Decoder extends IterableBase<int> {
188 final Utf32BytesDecoderProvider codeunitsProvider;
189
190 IterableUtf32Decoder._(this.codeunitsProvider);
191
192 Utf32BytesDecoder get iterator => codeunitsProvider();
193 }
194
195 /**
196 * Abstrace parent class converts encoded bytes to codepoints.
197 */
198 abstract class Utf32BytesDecoder implements ListRangeIterator {
199 // TODO(kevmoo): should this field be private?
200 final ListRangeIterator utf32EncodedBytesIterator;
201 final int replacementCodepoint;
202 int _current = null;
203
204 Utf32BytesDecoder._fromListRangeIterator(
205 this.utf32EncodedBytesIterator, this.replacementCodepoint);
206
207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [
208 int offset = 0, int length,
209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
210 if (length == null) {
211 length = utf32EncodedBytes.length - offset;
212 }
213 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) {
214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
215 false, replacementCodepoint);
216 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) {
217 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
218 false, replacementCodepoint);
219 } else {
220 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,
221 replacementCodepoint);
222 }
223 }
224
225 List<int> decodeRest() {
226 List<int> codeunits = new List<int>(remaining);
227 int i = 0;
228 while (moveNext()) {
229 codeunits[i++] = current;
230 }
231 return codeunits;
232 }
233
234 int get current => _current;
235
236 bool moveNext() {
237 _current = null;
238 int remaining = utf32EncodedBytesIterator.remaining;
239 if (remaining == 0) {
240 _current = null;
241 return false;
242 }
243 if (remaining < 4) {
244 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);
245 if (replacementCodepoint != null) {
246 _current = replacementCodepoint;
247 return true;
248 } else {
249 throw new ArgumentError(
250 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
251 }
252 }
253 int codepoint = decode();
254 if (_validCodepoint(codepoint)) {
255 _current = codepoint;
256 return true;
257 } else if (replacementCodepoint != null) {
258 _current = replacementCodepoint;
259 return true;
260 } else {
261 throw new ArgumentError(
262 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
263 }
264 }
265
266 int get position => utf32EncodedBytesIterator.position ~/ 4;
267
268 void backup([int by = 1]) {
269 utf32EncodedBytesIterator.backup(4 * by);
270 }
271
272 int get remaining => (utf32EncodedBytesIterator.remaining + 3) ~/ 4;
273
274 void skip([int count = 1]) {
275 utf32EncodedBytesIterator.skip(4 * count);
276 }
277
278 int decode();
279 }
280
281 /**
282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
283 * to produce the unicode codepoint.
284 */
285 class Utf32beBytesDecoder extends Utf32BytesDecoder {
286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
287 int length, bool stripBom = true,
288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
289 super._fromListRangeIterator(
290 (new ListRange(utf32EncodedBytes, offset, length)).iterator,
291 replacementCodepoint) {
292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {
293 skip();
294 }
295 }
296
297 int decode() {
298 utf32EncodedBytesIterator.moveNext();
299 int value = utf32EncodedBytesIterator.current;
300 utf32EncodedBytesIterator.moveNext();
301 value = (value << 8) + utf32EncodedBytesIterator.current;
302 utf32EncodedBytesIterator.moveNext();
303 value = (value << 8) + utf32EncodedBytesIterator.current;
304 utf32EncodedBytesIterator.moveNext();
305 value = (value << 8) + utf32EncodedBytesIterator.current;
306 return value;
307 }
308 }
309
310 /**
311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes
312 * to produce the unicode codepoint.
313 */
314 class Utf32leBytesDecoder extends Utf32BytesDecoder {
315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
316 int length, bool stripBom = true,
317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
318 super._fromListRangeIterator(
319 (new ListRange(utf32EncodedBytes, offset, length)).iterator,
320 replacementCodepoint) {
321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {
322 skip();
323 }
324 }
325
326 int decode() {
327 utf32EncodedBytesIterator.moveNext();
328 int value = utf32EncodedBytesIterator.current;
329 utf32EncodedBytesIterator.moveNext();
330 value += (utf32EncodedBytesIterator.current << 8);
331 utf32EncodedBytesIterator.moveNext();
332 value += (utf32EncodedBytesIterator.current << 16);
333 utf32EncodedBytesIterator.moveNext();
334 value += (utf32EncodedBytesIterator.current << 24);
335 return value;
336 }
337 }
338
339 bool _validCodepoint(int codepoint) {
340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) ||
341 (codepoint > UNICODE_UTF16_RESERVED_HI &&
342 codepoint < UNICODE_VALID_RANGE_MAX);
343 }
OLDNEW
« no previous file with comments | « packages/utf/lib/src/utf/utf16.dart ('k') | packages/utf/lib/src/utf/utf8.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698