Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: packages/utf/lib/src/utf16.dart

Issue 2989763002: Update charted to 0.4.8 and roll (Closed)
Patch Set: Removed Cutch from list of reviewers Created 3 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « packages/utf/lib/src/utf/utf_stream.dart ('k') | packages/utf/lib/src/utf32.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of utf; 5 library utf.utf16;
6 6
7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). 7 import "dart:collection";
8 /** 8
9 * Provide a list of Unicode codepoints for a given string. 9 import 'constants.dart';
10 */ 10 import 'list_range.dart';
11 List<int> stringToCodepoints(String str) { 11 import 'utf_16_code_unit_decoder.dart';
12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations. 12 import 'util.dart';
13 // So we need to convert.
14 return utf16CodeUnitsToCodepoints(str.codeUnits);
15 }
16 13
17 /** 14 /**
18 * Generate a string from the provided Unicode codepoints. 15 * Generate a string from the provided Unicode codepoints.
19 * 16 *
20 * *Deprecated* Use [String.fromCharCodes] instead. 17 * *Deprecated* Use [String.fromCharCodes] instead.
21 */ 18 */
22 @deprecated 19 @deprecated
23 String codepointsToString(List<int> codepoints) { 20 String codepointsToString(List<int> codepoints) {
24 return new String.fromCharCodes(codepoints); 21 return new String.fromCharCodes(codepoints);
25 } 22 }
23
26 /** 24 /**
27 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert 25 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
28 * as much of the input as needed. Determines the byte order from the BOM, 26 * as much of the input as needed. Determines the byte order from the BOM,
29 * or uses big-endian as a default. This method always strips a leading BOM. 27 * or uses big-endian as a default. This method always strips a leading BOM.
30 * Set the [replacementCodepoint] to null to throw an ArgumentError 28 * Set the [replacementCodepoint] to null to throw an ArgumentError
31 * rather than replace the bad value. The default value for 29 * rather than replace the bad value. The default value for
32 * [replacementCodepoint] is U+FFFD. 30 * [replacementCodepoint] is U+FFFD.
33 */ 31 */
34 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, 32 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes,
35 int length, int replacementCodepoint = 33 [int offset = 0,
36 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 34 int length,
35 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
37 return new IterableUtf16Decoder._( 36 return new IterableUtf16Decoder._(
38 () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length, 37 () => new Utf16BytesToCodeUnitsDecoder(
39 replacementCodepoint), replacementCodepoint); 38 bytes, offset, length, replacementCodepoint),
39 replacementCodepoint);
40 } 40 }
41 41
42 /** 42 /**
43 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only 43 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only
44 * convert as much of the input as needed. This method strips a leading BOM by 44 * convert as much of the input as needed. This method strips a leading BOM by
45 * default, but can be overridden by setting the optional parameter [stripBom] 45 * default, but can be overridden by setting the optional parameter [stripBom]
46 * to false. Set the [replacementCodepoint] to null to throw an 46 * to false. Set the [replacementCodepoint] to null to throw an
47 * ArgumentError rather than replace the bad value. The default 47 * ArgumentError rather than replace the bad value. The default
48 * value for the [replacementCodepoint] is U+FFFD. 48 * value for the [replacementCodepoint] is U+FFFD.
49 */ 49 */
50 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0, 50 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes,
51 int length, bool stripBom = true, int replacementCodepoint = 51 [int offset = 0,
52 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 52 int length,
53 bool stripBom = true,
54 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
53 return new IterableUtf16Decoder._( 55 return new IterableUtf16Decoder._(
54 () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, 56 () => new Utf16beBytesToCodeUnitsDecoder(
55 replacementCodepoint), replacementCodepoint); 57 bytes, offset, length, stripBom, replacementCodepoint),
58 replacementCodepoint);
56 } 59 }
57 60
58 /** 61 /**
59 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only 62 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only
60 * convert as much of the input as needed. This method strips a leading BOM by 63 * convert as much of the input as needed. This method strips a leading BOM by
61 * default, but can be overridden by setting the optional parameter [stripBom] 64 * default, but can be overridden by setting the optional parameter [stripBom]
62 * to false. Set the [replacementCodepoint] to null to throw an 65 * to false. Set the [replacementCodepoint] to null to throw an
63 * ArgumentError rather than replace the bad value. The default 66 * ArgumentError rather than replace the bad value. The default
64 * value for the [replacementCodepoint] is U+FFFD. 67 * value for the [replacementCodepoint] is U+FFFD.
65 */ 68 */
66 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0, 69 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes,
67 int length, bool stripBom = true, int replacementCodepoint = 70 [int offset = 0,
68 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 71 int length,
72 bool stripBom = true,
73 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
69 return new IterableUtf16Decoder._( 74 return new IterableUtf16Decoder._(
70 () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, 75 () => new Utf16leBytesToCodeUnitsDecoder(
71 replacementCodepoint), replacementCodepoint); 76 bytes, offset, length, stripBom, replacementCodepoint),
77 replacementCodepoint);
72 } 78 }
73 79
74 /** 80 /**
75 * Produce a String from a sequence of UTF-16 encoded bytes. This method always 81 * Produce a String from a sequence of UTF-16 encoded bytes. This method always
76 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an 82 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an
77 * ArgumentError rather than replace the bad value. The default 83 * ArgumentError rather than replace the bad value. The default
78 * value for the [replacementCodepoint] is U+FFFD. 84 * value for the [replacementCodepoint] is U+FFFD.
79 */ 85 */
80 String decodeUtf16(List<int> bytes, [int offset = 0, int length, 86 String decodeUtf16(List<int> bytes,
87 [int offset = 0,
88 int length,
81 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 89 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
82 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, 90 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(
83 offset, length, replacementCodepoint); 91 bytes, offset, length, replacementCodepoint);
84 List<int> codeunits = decoder.decodeRest(); 92 List<int> codeunits = decoder.decodeRest();
85 return new String.fromCharCodes( 93 return new String.fromCharCodes(
86 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); 94 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
87 } 95 }
88 96
89 /** 97 /**
90 * Produce a String from a sequence of UTF-16BE encoded bytes. This method 98 * Produce a String from a sequence of UTF-16BE encoded bytes. This method
91 * strips a leading BOM by default, but can be overridden by setting the 99 * strips a leading BOM by default, but can be overridden by setting the
92 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to 100 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to
93 * null to throw an ArgumentError rather than replace the bad value. 101 * null to throw an ArgumentError rather than replace the bad value.
94 * The default value for the [replacementCodepoint] is U+FFFD. 102 * The default value for the [replacementCodepoint] is U+FFFD.
95 */ 103 */
96 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, 104 String decodeUtf16be(List<int> bytes,
105 [int offset = 0,
106 int length,
97 bool stripBom = true, 107 bool stripBom = true,
98 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
99 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, 109 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(
100 length, stripBom, replacementCodepoint)).decodeRest(); 110 bytes, offset, length, stripBom, replacementCodepoint))
111 .decodeRest();
101 return new String.fromCharCodes( 112 return new String.fromCharCodes(
102 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); 113 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
103 } 114 }
104 115
105 /** 116 /**
106 * Produce a String from a sequence of UTF-16LE encoded bytes. This method 117 * Produce a String from a sequence of UTF-16LE encoded bytes. This method
107 * strips a leading BOM by default, but can be overridden by setting the 118 * strips a leading BOM by default, but can be overridden by setting the
108 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to 119 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to
109 * null to throw an ArgumentError rather than replace the bad value. 120 * null to throw an ArgumentError rather than replace the bad value.
110 * The default value for the [replacementCodepoint] is U+FFFD. 121 * The default value for the [replacementCodepoint] is U+FFFD.
111 */ 122 */
112 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, 123 String decodeUtf16le(List<int> bytes,
124 [int offset = 0,
125 int length,
113 bool stripBom = true, 126 bool stripBom = true,
114 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 127 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
115 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, 128 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(
116 length, stripBom, replacementCodepoint)).decodeRest(); 129 bytes, offset, length, stripBom, replacementCodepoint))
130 .decodeRest();
117 return new String.fromCharCodes( 131 return new String.fromCharCodes(
118 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); 132 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
119 } 133 }
120 134
121 /** 135 /**
122 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting 136 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting
123 * bytes with a big-endian byte-order-marker. 137 * bytes with a big-endian byte-order-marker.
124 */ 138 */
125 List<int> encodeUtf16(String str) => 139 List<int> encodeUtf16(String str) => encodeUtf16be(str, true);
126 encodeUtf16be(str, true);
127 140
128 /** 141 /**
129 * Produce a list of UTF-16BE encoded bytes. By default, this method produces 142 * Produce a list of UTF-16BE encoded bytes. By default, this method produces
130 * UTF-16BE bytes with no BOM. 143 * UTF-16BE bytes with no BOM.
131 */ 144 */
132 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { 145 List<int> encodeUtf16be(String str, [bool writeBOM = false]) {
133 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); 146 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str);
134 List<int> encoding = 147 List<int> encoding =
135 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); 148 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0));
136 int i = 0; 149 int i = 0;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
209 */ 222 */
210 // TODO(floitsch): Consider removing the extend and switch to implements since 223 // TODO(floitsch): Consider removing the extend and switch to implements since
211 // that's cheaper to allocate. 224 // that's cheaper to allocate.
212 class IterableUtf16Decoder extends IterableBase<int> { 225 class IterableUtf16Decoder extends IterableBase<int> {
213 final _CodeUnitsProvider codeunitsProvider; 226 final _CodeUnitsProvider codeunitsProvider;
214 final int replacementCodepoint; 227 final int replacementCodepoint;
215 228
216 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); 229 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);
217 230
218 Utf16CodeUnitDecoder get iterator => 231 Utf16CodeUnitDecoder get iterator =>
219 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), 232 new Utf16CodeUnitDecoder.fromListRangeIterator(
220 replacementCodepoint); 233 codeunitsProvider(), replacementCodepoint);
221 } 234 }
222 235
223 /** 236 /**
224 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes 237 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes
225 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine 238 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine
226 * endian-ness, and defaults to BE. 239 * endian-ness, and defaults to BE.
227 */ 240 */
228 abstract class Utf16BytesToCodeUnitsDecoder implements ListRangeIterator { 241 abstract class Utf16BytesToCodeUnitsDecoder implements ListRangeIterator {
229 // TODO(kevmoo): should this field be private? 242 // TODO(kevmoo): should this field be private?
230 final ListRangeIterator utf16EncodedBytesIterator; 243 final ListRangeIterator utf16EncodedBytesIterator;
231 final int replacementCodepoint; 244 final int replacementCodepoint;
232 int _current = null; 245 int _current = null;
233 246
234 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( 247 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(
235 this.utf16EncodedBytesIterator, this.replacementCodepoint); 248 this.utf16EncodedBytesIterator, this.replacementCodepoint);
236 249
237 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 250 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
238 int offset = 0, int length, 251 [int offset = 0,
252 int length,
239 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { 253 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
240 if (length == null) { 254 if (length == null) {
241 length = utf16EncodedBytes.length - offset; 255 length = utf16EncodedBytes.length - offset;
242 } 256 }
243 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { 257 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) {
244 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, 258 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,
245 length - 2, false, replacementCodepoint); 259 length - 2, false, replacementCodepoint);
246 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { 260 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) {
247 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, 261 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,
248 length - 2, false, replacementCodepoint); 262 length - 2, false, replacementCodepoint);
249 } else { 263 } else {
250 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, 264 return new Utf16beBytesToCodeUnitsDecoder(
251 length, false, replacementCodepoint); 265 utf16EncodedBytes, offset, length, false, replacementCodepoint);
252 } 266 }
253 } 267 }
254 268
255 /** 269 /**
256 * Provides a fast way to decode the rest of the source bytes in a single 270 * Provides a fast way to decode the rest of the source bytes in a single
257 * call. This method trades memory for improved speed in that it potentially 271 * call. This method trades memory for improved speed in that it potentially
258 * over-allocates the List containing results. 272 * over-allocates the List containing results.
259 */ 273 */
260 List<int> decodeRest() { 274 List<int> decodeRest() {
261 List<int> codeunits = new List<int>(remaining); 275 List<int> codeunits = new List<int>(remaining);
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
308 } 322 }
309 323
310 int decode(); 324 int decode();
311 } 325 }
312 326
313 /** 327 /**
314 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes 328 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes
315 * to produce the code unit (0-(2^16)-1). 329 * to produce the code unit (0-(2^16)-1).
316 */ 330 */
317 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 331 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
318 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 332 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
319 int offset = 0, int length, bool stripBom = true, 333 [int offset = 0,
320 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 334 int length,
321 super._fromListRangeIterator( 335 bool stripBom = true,
322 (new ListRange(utf16EncodedBytes, offset, length)).iterator, 336 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
323 replacementCodepoint) { 337 : super._fromListRangeIterator(
338 (new ListRange(utf16EncodedBytes, offset, length)).iterator,
339 replacementCodepoint) {
324 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { 340 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {
325 skip(); 341 skip();
326 } 342 }
327 } 343 }
328 344
329 int decode() { 345 int decode() {
330 utf16EncodedBytesIterator.moveNext(); 346 utf16EncodedBytesIterator.moveNext();
331 int hi = utf16EncodedBytesIterator.current; 347 int hi = utf16EncodedBytesIterator.current;
332 utf16EncodedBytesIterator.moveNext(); 348 utf16EncodedBytesIterator.moveNext();
333 int lo = utf16EncodedBytesIterator.current; 349 int lo = utf16EncodedBytesIterator.current;
334 return (hi << 8) + lo; 350 return (hi << 8) + lo;
335 } 351 }
336 } 352 }
337 353
338 /** 354 /**
339 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes 355 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes
340 * to produce the code unit (0-(2^16)-1). 356 * to produce the code unit (0-(2^16)-1).
341 */ 357 */
342 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { 358 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
343 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ 359 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
344 int offset = 0, int length, bool stripBom = true, 360 [int offset = 0,
345 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : 361 int length,
346 super._fromListRangeIterator( 362 bool stripBom = true,
347 (new ListRange(utf16EncodedBytes, offset, length)).iterator, 363 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
348 replacementCodepoint) { 364 : super._fromListRangeIterator(
365 (new ListRange(utf16EncodedBytes, offset, length)).iterator,
366 replacementCodepoint) {
349 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { 367 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {
350 skip(); 368 skip();
351 } 369 }
352 } 370 }
353 371
354 int decode() { 372 int decode() {
355 utf16EncodedBytesIterator.moveNext(); 373 utf16EncodedBytesIterator.moveNext();
356 int lo = utf16EncodedBytesIterator.current; 374 int lo = utf16EncodedBytesIterator.current;
357 utf16EncodedBytesIterator.moveNext(); 375 utf16EncodedBytesIterator.moveNext();
358 int hi = utf16EncodedBytesIterator.current; 376 int hi = utf16EncodedBytesIterator.current;
359 return (hi << 8) + lo; 377 return (hi << 8) + lo;
360 } 378 }
361 } 379 }
OLDNEW
« no previous file with comments | « packages/utf/lib/src/utf/utf_stream.dart ('k') | packages/utf/lib/src/utf32.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698