| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of utf; | 5 part of utf; |
| 6 | 6 |
| 7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). | 7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). |
| 8 /** | 8 /** |
| 9 * Provide a list of Unicode codepoints for a given string. | 9 * Provide a list of Unicode codepoints for a given string. |
| 10 */ | 10 */ |
| 11 List<int> stringToCodepoints(String str) { | 11 List<int> stringToCodepoints(String str) { |
| 12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations. | 12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations. |
| 13 // So we need to convert. | 13 // So we need to convert. |
| 14 return _utf16CodeUnitsToCodepoints(str.codeUnits); | 14 return utf16CodeUnitsToCodepoints(str.codeUnits); |
| 15 } | 15 } |
| 16 | 16 |
| 17 /** | 17 /** |
| 18 * Generate a string from the provided Unicode codepoints. | 18 * Generate a string from the provided Unicode codepoints. |
| 19 * | 19 * |
| 20 * *Deprecated* Use [String.fromCharCodes] instead. | 20 * *Deprecated* Use [String.fromCharCodes] instead. |
| 21 */ | 21 */ |
| 22 @deprecated |
| 22 String codepointsToString(List<int> codepoints) { | 23 String codepointsToString(List<int> codepoints) { |
| 23 return new String.fromCharCodes(codepoints); | 24 return new String.fromCharCodes(codepoints); |
| 24 } | 25 } |
| 25 | |
| 26 /** | |
| 27 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | |
| 28 * The parameters can override the default Unicode replacement character. Set | |
| 29 * the replacementCharacter to null to throw an ArgumentError | |
| 30 * rather than replace the bad value. | |
| 31 */ | |
| 32 class Utf16CodeUnitDecoder implements Iterator<int> { | |
| 33 final _ListRangeIterator utf16CodeUnitIterator; | |
| 34 final int replacementCodepoint; | |
| 35 int _current = null; | |
| 36 | |
| 37 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | |
| 38 int this.replacementCodepoint = | |
| 39 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
| 40 utf16CodeUnitIterator = | |
| 41 (new _ListRange(utf16CodeUnits, offset, length)).iterator; | |
| 42 | |
| 43 Utf16CodeUnitDecoder.fromListRangeIterator( | |
| 44 _ListRangeIterator this.utf16CodeUnitIterator, | |
| 45 int this.replacementCodepoint); | |
| 46 | |
| 47 Iterator<int> get iterator => this; | |
| 48 | |
| 49 int get current => _current; | |
| 50 | |
| 51 bool moveNext() { | |
| 52 _current = null; | |
| 53 if (!utf16CodeUnitIterator.moveNext()) return false; | |
| 54 | |
| 55 int value = utf16CodeUnitIterator.current; | |
| 56 if (value < 0) { | |
| 57 if (replacementCodepoint != null) { | |
| 58 _current = replacementCodepoint; | |
| 59 } else { | |
| 60 throw new ArgumentError( | |
| 61 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 62 } | |
| 63 } else if (value < UNICODE_UTF16_RESERVED_LO || | |
| 64 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 65 // transfer directly | |
| 66 _current = value; | |
| 67 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
| 68 utf16CodeUnitIterator.moveNext()) { | |
| 69 // merge surrogate pair | |
| 70 int nextValue = utf16CodeUnitIterator.current; | |
| 71 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
| 72 nextValue <= UNICODE_UTF16_RESERVED_HI) { | |
| 73 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | |
| 74 value += UNICODE_UTF16_OFFSET + | |
| 75 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | |
| 76 _current = value; | |
| 77 } else { | |
| 78 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | |
| 79 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | |
| 80 utf16CodeUnitIterator.backup(); | |
| 81 } | |
| 82 if (replacementCodepoint != null) { | |
| 83 _current = replacementCodepoint; | |
| 84 } else { | |
| 85 throw new ArgumentError( | |
| 86 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 87 } | |
| 88 } | |
| 89 } else if (replacementCodepoint != null) { | |
| 90 _current = replacementCodepoint; | |
| 91 } else { | |
| 92 throw new ArgumentError( | |
| 93 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 94 } | |
| 95 return true; | |
| 96 } | |
| 97 } | |
| 98 | |
| 99 /** | |
| 100 * Encode code points as UTF16 code units. | |
| 101 */ | |
| 102 List<int> _codepointsToUtf16CodeUnits( | |
| 103 List<int> codepoints, | |
| 104 [int offset = 0, | |
| 105 int length, | |
| 106 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 107 | |
| 108 _ListRange listRange = new _ListRange(codepoints, offset, length); | |
| 109 int encodedLength = 0; | |
| 110 for (int value in listRange) { | |
| 111 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | |
| 112 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 113 encodedLength++; | |
| 114 } else if (value > UNICODE_PLANE_ONE_MAX && | |
| 115 value <= UNICODE_VALID_RANGE_MAX) { | |
| 116 encodedLength += 2; | |
| 117 } else { | |
| 118 encodedLength++; | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 List<int> codeUnitsBuffer = new List<int>(encodedLength); | |
| 123 int j = 0; | |
| 124 for (int value in listRange) { | |
| 125 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | |
| 126 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 127 codeUnitsBuffer[j++] = value; | |
| 128 } else if (value > UNICODE_PLANE_ONE_MAX && | |
| 129 value <= UNICODE_VALID_RANGE_MAX) { | |
| 130 int base = value - UNICODE_UTF16_OFFSET; | |
| 131 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | |
| 132 ((base & UNICODE_UTF16_HI_MASK) >> 10); | |
| 133 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | |
| 134 (base & UNICODE_UTF16_LO_MASK); | |
| 135 } else if (replacementCodepoint != null) { | |
| 136 codeUnitsBuffer[j++] = replacementCodepoint; | |
| 137 } else { | |
| 138 throw new ArgumentError("Invalid encoding"); | |
| 139 } | |
| 140 } | |
| 141 return codeUnitsBuffer; | |
| 142 } | |
| 143 | |
| 144 /** | |
| 145 * Decodes the utf16 codeunits to codepoints. | |
| 146 */ | |
| 147 List<int> _utf16CodeUnitsToCodepoints( | |
| 148 List<int> utf16CodeUnits, [int offset = 0, int length, | |
| 149 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 150 _ListRangeIterator source = | |
| 151 (new _ListRange(utf16CodeUnits, offset, length)).iterator; | |
| 152 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder | |
| 153 .fromListRangeIterator(source, replacementCodepoint); | |
| 154 List<int> codepoints = new List<int>(source.remaining); | |
| 155 int i = 0; | |
| 156 while (decoder.moveNext()) { | |
| 157 codepoints[i++] = decoder.current; | |
| 158 } | |
| 159 if (i == codepoints.length) { | |
| 160 return codepoints; | |
| 161 } else { | |
| 162 List<int> codepointTrunc = new List<int>(i); | |
| 163 codepointTrunc.setRange(0, i, codepoints); | |
| 164 return codepointTrunc; | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 /** | 26 /** |
| 169 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 27 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
| 170 * as much of the input as needed. Determines the byte order from the BOM, | 28 * as much of the input as needed. Determines the byte order from the BOM, |
| 171 * or uses big-endian as a default. This method always strips a leading BOM. | 29 * or uses big-endian as a default. This method always strips a leading BOM. |
| 172 * Set the [replacementCodepoint] to null to throw an ArgumentError | 30 * Set the [replacementCodepoint] to null to throw an ArgumentError |
| 173 * rather than replace the bad value. The default value for | 31 * rather than replace the bad value. The default value for |
| 174 * [replacementCodepoint] is U+FFFD. | 32 * [replacementCodepoint] is U+FFFD. |
| 175 */ | 33 */ |
| 176 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, | 34 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, |
| 177 int length, int replacementCodepoint = | 35 int length, int replacementCodepoint = |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 218 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an | 76 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an |
| 219 * ArgumentError rather than replace the bad value. The default | 77 * ArgumentError rather than replace the bad value. The default |
| 220 * value for the [replacementCodepoint] is U+FFFD. | 78 * value for the [replacementCodepoint] is U+FFFD. |
| 221 */ | 79 */ |
| 222 String decodeUtf16(List<int> bytes, [int offset = 0, int length, | 80 String decodeUtf16(List<int> bytes, [int offset = 0, int length, |
| 223 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 81 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 224 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, | 82 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, |
| 225 offset, length, replacementCodepoint); | 83 offset, length, replacementCodepoint); |
| 226 List<int> codeunits = decoder.decodeRest(); | 84 List<int> codeunits = decoder.decodeRest(); |
| 227 return new String.fromCharCodes( | 85 return new String.fromCharCodes( |
| 228 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | 86 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 229 } | 87 } |
| 230 | 88 |
| 231 /** | 89 /** |
| 232 * Produce a String from a sequence of UTF-16BE encoded bytes. This method | 90 * Produce a String from a sequence of UTF-16BE encoded bytes. This method |
| 233 * strips a leading BOM by default, but can be overridden by setting the | 91 * strips a leading BOM by default, but can be overridden by setting the |
| 234 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 92 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 235 * null to throw an ArgumentError rather than replace the bad value. | 93 * null to throw an ArgumentError rather than replace the bad value. |
| 236 * The default value for the [replacementCodepoint] is U+FFFD. | 94 * The default value for the [replacementCodepoint] is U+FFFD. |
| 237 */ | 95 */ |
| 238 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, | 96 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, |
| 239 bool stripBom = true, | 97 bool stripBom = true, |
| 240 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 98 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 241 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, | 99 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, |
| 242 length, stripBom, replacementCodepoint)).decodeRest(); | 100 length, stripBom, replacementCodepoint)).decodeRest(); |
| 243 return new String.fromCharCodes( | 101 return new String.fromCharCodes( |
| 244 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | 102 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 245 } | 103 } |
| 246 | 104 |
| 247 /** | 105 /** |
| 248 * Produce a String from a sequence of UTF-16LE encoded bytes. This method | 106 * Produce a String from a sequence of UTF-16LE encoded bytes. This method |
| 249 * strips a leading BOM by default, but can be overridden by setting the | 107 * strips a leading BOM by default, but can be overridden by setting the |
| 250 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 108 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 251 * null to throw an ArgumentError rather than replace the bad value. | 109 * null to throw an ArgumentError rather than replace the bad value. |
| 252 * The default value for the [replacementCodepoint] is U+FFFD. | 110 * The default value for the [replacementCodepoint] is U+FFFD. |
| 253 */ | 111 */ |
| 254 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, | 112 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, |
| 255 bool stripBom = true, | 113 bool stripBom = true, |
| 256 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 114 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 257 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, | 115 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, |
| 258 length, stripBom, replacementCodepoint)).decodeRest(); | 116 length, stripBom, replacementCodepoint)).decodeRest(); |
| 259 return new String.fromCharCodes( | 117 return new String.fromCharCodes( |
| 260 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | 118 utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 261 } | 119 } |
| 262 | 120 |
| 263 /** | 121 /** |
| 264 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting | 122 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting |
| 265 * bytes with a big-endian byte-order-marker. | 123 * bytes with a big-endian byte-order-marker. |
| 266 */ | 124 */ |
| 267 List<int> encodeUtf16(String str) => | 125 List<int> encodeUtf16(String str) => |
| 268 encodeUtf16be(str, true); | 126 encodeUtf16be(str, true); |
| 269 | 127 |
| 270 /** | 128 /** |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 332 * little-endian byte-order marker (BOM). | 190 * little-endian byte-order marker (BOM). |
| 333 */ | 191 */ |
| 334 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { | 192 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { |
| 335 int end = length != null ? offset + length : utf16EncodedBytes.length; | 193 int end = length != null ? offset + length : utf16EncodedBytes.length; |
| 336 return (offset + 2) <= end && | 194 return (offset + 2) <= end && |
| 337 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | 195 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
| 338 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; | 196 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; |
| 339 } | 197 } |
| 340 | 198 |
| 341 List<int> _stringToUtf16CodeUnits(String str) { | 199 List<int> _stringToUtf16CodeUnits(String str) { |
| 342 return _codepointsToUtf16CodeUnits(str.codeUnits); | 200 return codepointsToUtf16CodeUnits(str.codeUnits); |
| 343 } | 201 } |
| 344 | 202 |
| 345 typedef _ListRangeIterator _CodeUnitsProvider(); | 203 typedef ListRangeIterator _CodeUnitsProvider(); |
| 346 | 204 |
| 347 /** | 205 /** |
| 348 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | 206 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |
| 349 * provides an iterator on demand and the iterator will only translate bytes | 207 * provides an iterator on demand and the iterator will only translate bytes |
| 350 * as requested by the user of the iterator. (Note: results are not cached.) | 208 * as requested by the user of the iterator. (Note: results are not cached.) |
| 351 */ | 209 */ |
| 352 // TODO(floitsch): Consider removing the extend and switch to implements since | 210 // TODO(floitsch): Consider removing the extend and switch to implements since |
| 353 // that's cheaper to allocate. | 211 // that's cheaper to allocate. |
| 354 class IterableUtf16Decoder extends IterableBase<int> { | 212 class IterableUtf16Decoder extends IterableBase<int> { |
| 355 final _CodeUnitsProvider codeunitsProvider; | 213 final _CodeUnitsProvider codeunitsProvider; |
| 356 final int replacementCodepoint; | 214 final int replacementCodepoint; |
| 357 | 215 |
| 358 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); | 216 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); |
| 359 | 217 |
| 360 Utf16CodeUnitDecoder get iterator => | 218 Utf16CodeUnitDecoder get iterator => |
| 361 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), | 219 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), |
| 362 replacementCodepoint); | 220 replacementCodepoint); |
| 363 } | 221 } |
| 364 | 222 |
| 365 /** | 223 /** |
| 366 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes | 224 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes |
| 367 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine | 225 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine |
| 368 * endian-ness, and defaults to BE. | 226 * endian-ness, and defaults to BE. |
| 369 */ | 227 */ |
| 370 abstract class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { | 228 abstract class Utf16BytesToCodeUnitsDecoder implements ListRangeIterator { |
| 371 final _ListRangeIterator utf16EncodedBytesIterator; | 229 // TODO(kevmoo): should this field be private? |
| 230 final ListRangeIterator utf16EncodedBytesIterator; |
| 372 final int replacementCodepoint; | 231 final int replacementCodepoint; |
| 373 int _current = null; | 232 int _current = null; |
| 374 | 233 |
| 375 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( | 234 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( |
| 376 this.utf16EncodedBytesIterator, this.replacementCodepoint); | 235 this.utf16EncodedBytesIterator, this.replacementCodepoint); |
| 377 | 236 |
| 378 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 237 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 379 int offset = 0, int length, | 238 int offset = 0, int length, |
| 380 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 239 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 381 if (length == null) { | 240 if (length == null) { |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 453 | 312 |
| 454 /** | 313 /** |
| 455 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes | 314 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 456 * to produce the code unit (0-(2^16)-1). | 315 * to produce the code unit (0-(2^16)-1). |
| 457 */ | 316 */ |
| 458 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 317 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 459 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 318 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 460 int offset = 0, int length, bool stripBom = true, | 319 int offset = 0, int length, bool stripBom = true, |
| 461 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 320 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 462 super._fromListRangeIterator( | 321 super._fromListRangeIterator( |
| 463 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, | 322 (new ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 464 replacementCodepoint) { | 323 replacementCodepoint) { |
| 465 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { | 324 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { |
| 466 skip(); | 325 skip(); |
| 467 } | 326 } |
| 468 } | 327 } |
| 469 | 328 |
| 470 int decode() { | 329 int decode() { |
| 471 utf16EncodedBytesIterator.moveNext(); | 330 utf16EncodedBytesIterator.moveNext(); |
| 472 int hi = utf16EncodedBytesIterator.current; | 331 int hi = utf16EncodedBytesIterator.current; |
| 473 utf16EncodedBytesIterator.moveNext(); | 332 utf16EncodedBytesIterator.moveNext(); |
| 474 int lo = utf16EncodedBytesIterator.current; | 333 int lo = utf16EncodedBytesIterator.current; |
| 475 return (hi << 8) + lo; | 334 return (hi << 8) + lo; |
| 476 } | 335 } |
| 477 } | 336 } |
| 478 | 337 |
| 479 /** | 338 /** |
| 480 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes | 339 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 481 * to produce the code unit (0-(2^16)-1). | 340 * to produce the code unit (0-(2^16)-1). |
| 482 */ | 341 */ |
| 483 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 342 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 484 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 343 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 485 int offset = 0, int length, bool stripBom = true, | 344 int offset = 0, int length, bool stripBom = true, |
| 486 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 345 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 487 super._fromListRangeIterator( | 346 super._fromListRangeIterator( |
| 488 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, | 347 (new ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 489 replacementCodepoint) { | 348 replacementCodepoint) { |
| 490 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { | 349 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { |
| 491 skip(); | 350 skip(); |
| 492 } | 351 } |
| 493 } | 352 } |
| 494 | 353 |
| 495 int decode() { | 354 int decode() { |
| 496 utf16EncodedBytesIterator.moveNext(); | 355 utf16EncodedBytesIterator.moveNext(); |
| 497 int lo = utf16EncodedBytesIterator.current; | 356 int lo = utf16EncodedBytesIterator.current; |
| 498 utf16EncodedBytesIterator.moveNext(); | 357 utf16EncodedBytesIterator.moveNext(); |
| 499 int hi = utf16EncodedBytesIterator.current; | 358 int hi = utf16EncodedBytesIterator.current; |
| 500 return (hi << 8) + lo; | 359 return (hi << 8) + lo; |
| 501 } | 360 } |
| 502 } | 361 } |
| OLD | NEW |