| OLD | NEW | 
|---|
|  | (Empty) | 
| 1 // Copyright (c) 2012, the Dart project authors.  Please see the AUTHORS file |  | 
| 2 // for details. All rights reserved. Use of this source code is governed by a |  | 
| 3 // BSD-style license that can be found in the LICENSE file. |  | 
| 4 |  | 
| 5 part of utf; |  | 
| 6 |  | 
| 7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). |  | 
| 8 /** |  | 
| 9  * Provide a list of Unicode codepoints for a given string. |  | 
| 10  */ |  | 
| 11 List<int> stringToCodepoints(String str) { |  | 
| 12   // Note: str.codeUnits gives us 16-bit code units on all Dart implementations. |  | 
| 13   // So we need to convert. |  | 
| 14   return _utf16CodeUnitsToCodepoints(str.codeUnits); |  | 
| 15 } |  | 
| 16 |  | 
| 17 /** |  | 
| 18  * Generate a string from the provided Unicode codepoints. |  | 
| 19  * |  | 
| 20  * *Deprecated* Use [String.fromCharCodes] instead. |  | 
| 21  */ |  | 
| 22 String codepointsToString(List<int> codepoints) { |  | 
| 23   return new String.fromCharCodes(codepoints); |  | 
| 24 } |  | 
| 25 |  | 
| 26 /** |  | 
| 27  * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. |  | 
| 28  * The parameters can override the default Unicode replacement character. Set |  | 
| 29  * the replacementCharacter to null to throw an ArgumentError |  | 
| 30  * rather than replace the bad value. |  | 
| 31  */ |  | 
| 32 class Utf16CodeUnitDecoder implements Iterator<int> { |  | 
| 33   final _ListRangeIterator utf16CodeUnitIterator; |  | 
| 34   final int replacementCodepoint; |  | 
| 35   int _current = null; |  | 
| 36 |  | 
| 37   Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, |  | 
| 38       int this.replacementCodepoint = |  | 
| 39       UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |  | 
| 40       utf16CodeUnitIterator = |  | 
| 41           (new _ListRange(utf16CodeUnits, offset, length)).iterator; |  | 
| 42 |  | 
| 43   Utf16CodeUnitDecoder.fromListRangeIterator( |  | 
| 44       _ListRangeIterator this.utf16CodeUnitIterator, |  | 
| 45       int this.replacementCodepoint); |  | 
| 46 |  | 
| 47   Iterator<int> get iterator => this; |  | 
| 48 |  | 
| 49   int get current => _current; |  | 
| 50 |  | 
| 51   bool moveNext() { |  | 
| 52     _current = null; |  | 
| 53     if (!utf16CodeUnitIterator.moveNext()) return false; |  | 
| 54 |  | 
| 55     int value = utf16CodeUnitIterator.current; |  | 
| 56     if (value < 0) { |  | 
| 57       if (replacementCodepoint != null) { |  | 
| 58         _current = replacementCodepoint; |  | 
| 59       } else { |  | 
| 60         throw new ArgumentError( |  | 
| 61             "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |  | 
| 62       } |  | 
| 63     } else if (value < UNICODE_UTF16_RESERVED_LO || |  | 
| 64         (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |  | 
| 65       // transfer directly |  | 
| 66       _current = value; |  | 
| 67     } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |  | 
| 68         utf16CodeUnitIterator.moveNext()) { |  | 
| 69       // merge surrogate pair |  | 
| 70       int nextValue = utf16CodeUnitIterator.current; |  | 
| 71       if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |  | 
| 72           nextValue <= UNICODE_UTF16_RESERVED_HI) { |  | 
| 73         value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; |  | 
| 74         value += UNICODE_UTF16_OFFSET + |  | 
| 75             (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); |  | 
| 76         _current = value; |  | 
| 77       } else { |  | 
| 78         if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && |  | 
| 79            nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { |  | 
| 80           utf16CodeUnitIterator.backup(); |  | 
| 81         } |  | 
| 82         if (replacementCodepoint != null) { |  | 
| 83           _current = replacementCodepoint; |  | 
| 84         } else { |  | 
| 85           throw new ArgumentError( |  | 
| 86               "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |  | 
| 87         } |  | 
| 88       } |  | 
| 89     } else if (replacementCodepoint != null) { |  | 
| 90       _current = replacementCodepoint; |  | 
| 91     } else { |  | 
| 92       throw new ArgumentError( |  | 
| 93           "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |  | 
| 94     } |  | 
| 95     return true; |  | 
| 96   } |  | 
| 97 } |  | 
| 98 |  | 
| 99 /** |  | 
| 100  * Encode code points as UTF16 code units. |  | 
| 101  */ |  | 
| 102 List<int> _codepointsToUtf16CodeUnits( |  | 
| 103     List<int> codepoints, |  | 
| 104     [int offset = 0, |  | 
| 105      int length, |  | 
| 106      int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 107 |  | 
| 108   _ListRange listRange = new _ListRange(codepoints, offset, length); |  | 
| 109   int encodedLength = 0; |  | 
| 110   for (int value in listRange) { |  | 
| 111     if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |  | 
| 112         (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |  | 
| 113       encodedLength++; |  | 
| 114     } else if (value > UNICODE_PLANE_ONE_MAX && |  | 
| 115         value <= UNICODE_VALID_RANGE_MAX) { |  | 
| 116       encodedLength += 2; |  | 
| 117     } else { |  | 
| 118       encodedLength++; |  | 
| 119     } |  | 
| 120   } |  | 
| 121 |  | 
| 122   List<int> codeUnitsBuffer = new List<int>(encodedLength); |  | 
| 123   int j = 0; |  | 
| 124   for (int value in listRange) { |  | 
| 125     if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || |  | 
| 126         (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |  | 
| 127       codeUnitsBuffer[j++] = value; |  | 
| 128     } else if (value > UNICODE_PLANE_ONE_MAX && |  | 
| 129         value <= UNICODE_VALID_RANGE_MAX) { |  | 
| 130       int base = value - UNICODE_UTF16_OFFSET; |  | 
| 131       codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + |  | 
| 132           ((base & UNICODE_UTF16_HI_MASK) >> 10); |  | 
| 133       codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + |  | 
| 134           (base & UNICODE_UTF16_LO_MASK); |  | 
| 135     } else if (replacementCodepoint != null) { |  | 
| 136       codeUnitsBuffer[j++] = replacementCodepoint; |  | 
| 137     } else { |  | 
| 138       throw new ArgumentError("Invalid encoding"); |  | 
| 139     } |  | 
| 140   } |  | 
| 141   return codeUnitsBuffer; |  | 
| 142 } |  | 
| 143 |  | 
| 144 /** |  | 
| 145  * Decodes the utf16 codeunits to codepoints. |  | 
| 146  */ |  | 
| 147 List<int> _utf16CodeUnitsToCodepoints( |  | 
| 148     List<int> utf16CodeUnits, [int offset = 0, int length, |  | 
| 149     int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 150   _ListRangeIterator source = |  | 
| 151       (new _ListRange(utf16CodeUnits, offset, length)).iterator; |  | 
| 152   Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder |  | 
| 153       .fromListRangeIterator(source, replacementCodepoint); |  | 
| 154   List<int> codepoints = new List<int>(source.remaining); |  | 
| 155   int i = 0; |  | 
| 156   while (decoder.moveNext()) { |  | 
| 157     codepoints[i++] = decoder.current; |  | 
| 158   } |  | 
| 159   if (i == codepoints.length) { |  | 
| 160     return codepoints; |  | 
| 161   } else { |  | 
| 162     List<int> codepointTrunc = new List<int>(i); |  | 
| 163     codepointTrunc.setRange(0, i, codepoints); |  | 
| 164     return codepointTrunc; |  | 
| 165   } |  | 
| 166 } |  | 
| 167 |  | 
| 168 /** |  | 
| 169  * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |  | 
| 170  * as much of the input as needed. Determines the byte order from the BOM, |  | 
| 171  * or uses big-endian as a default. This method always strips a leading BOM. |  | 
| 172  * Set the [replacementCodepoint] to null to throw an ArgumentError |  | 
| 173  * rather than replace the bad value. The default value for |  | 
| 174  * [replacementCodepoint] is U+FFFD. |  | 
| 175  */ |  | 
| 176 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, |  | 
| 177     int length, int replacementCodepoint = |  | 
| 178     UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 179   return new IterableUtf16Decoder._( |  | 
| 180       () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length, |  | 
| 181       replacementCodepoint), replacementCodepoint); |  | 
| 182 } |  | 
| 183 |  | 
| 184 /** |  | 
| 185  * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only |  | 
| 186  * convert as much of the input as needed. This method strips a leading BOM by |  | 
| 187  * default, but can be overridden by setting the optional parameter [stripBom] |  | 
| 188  * to false. Set the [replacementCodepoint] to null to throw an |  | 
| 189  * ArgumentError rather than replace the bad value. The default |  | 
| 190  * value for the [replacementCodepoint] is U+FFFD. |  | 
| 191  */ |  | 
| 192 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0, |  | 
| 193     int length, bool stripBom = true, int replacementCodepoint = |  | 
| 194     UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 195   return new IterableUtf16Decoder._( |  | 
| 196       () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, |  | 
| 197       replacementCodepoint), replacementCodepoint); |  | 
| 198 } |  | 
| 199 |  | 
| 200 /** |  | 
| 201  * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only |  | 
| 202  * convert as much of the input as needed. This method strips a leading BOM by |  | 
| 203  * default, but can be overridden by setting the optional parameter [stripBom] |  | 
| 204  * to false. Set the [replacementCodepoint] to null to throw an |  | 
| 205  * ArgumentError rather than replace the bad value. The default |  | 
| 206  * value for the [replacementCodepoint] is U+FFFD. |  | 
| 207  */ |  | 
| 208 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0, |  | 
| 209     int length, bool stripBom = true, int replacementCodepoint = |  | 
| 210     UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 211   return new IterableUtf16Decoder._( |  | 
| 212       () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, |  | 
| 213       replacementCodepoint), replacementCodepoint); |  | 
| 214 } |  | 
| 215 |  | 
| 216 /** |  | 
| 217  * Produce a String from a sequence of UTF-16 encoded bytes. This method always |  | 
| 218  * strips a leading BOM. Set the [replacementCodepoint] to null to throw  an |  | 
| 219  * ArgumentError rather than replace the bad value. The default |  | 
| 220  * value for the [replacementCodepoint] is U+FFFD. |  | 
| 221  */ |  | 
| 222 String decodeUtf16(List<int> bytes, [int offset = 0, int length, |  | 
| 223     int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 224   Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, |  | 
| 225       offset, length, replacementCodepoint); |  | 
| 226   List<int> codeunits = decoder.decodeRest(); |  | 
| 227   return new String.fromCharCodes( |  | 
| 228       _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |  | 
| 229 } |  | 
| 230 |  | 
| 231 /** |  | 
| 232  * Produce a String from a sequence of UTF-16BE encoded bytes. This method |  | 
| 233  * strips a leading BOM by default, but can be overridden by setting the |  | 
| 234  * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |  | 
| 235  * null to throw an ArgumentError rather than replace the bad value. |  | 
| 236  * The default value for the [replacementCodepoint] is U+FFFD. |  | 
| 237  */ |  | 
| 238 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, |  | 
| 239     bool stripBom = true, |  | 
| 240     int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 241   List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, |  | 
| 242       length, stripBom, replacementCodepoint)).decodeRest(); |  | 
| 243   return new String.fromCharCodes( |  | 
| 244       _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |  | 
| 245 } |  | 
| 246 |  | 
| 247 /** |  | 
| 248  * Produce a String from a sequence of UTF-16LE encoded bytes. This method |  | 
| 249  * strips a leading BOM by default, but can be overridden by setting the |  | 
| 250  * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |  | 
| 251  * null to throw an ArgumentError rather than replace the bad value. |  | 
| 252  * The default value for the [replacementCodepoint] is U+FFFD. |  | 
| 253  */ |  | 
| 254 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, |  | 
| 255     bool stripBom = true, |  | 
| 256     int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 257   List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, |  | 
| 258       length, stripBom, replacementCodepoint)).decodeRest(); |  | 
| 259   return new String.fromCharCodes( |  | 
| 260       _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |  | 
| 261 } |  | 
| 262 |  | 
| 263 /** |  | 
| 264  * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting |  | 
| 265  * bytes with a big-endian byte-order-marker. |  | 
| 266  */ |  | 
| 267 List<int> encodeUtf16(String str) => |  | 
| 268     encodeUtf16be(str, true); |  | 
| 269 |  | 
| 270 /** |  | 
| 271  * Produce a list of UTF-16BE encoded bytes. By default, this method produces |  | 
| 272  * UTF-16BE bytes with no BOM. |  | 
| 273  */ |  | 
| 274 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { |  | 
| 275   List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); |  | 
| 276   List<int> encoding = |  | 
| 277       new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); |  | 
| 278   int i = 0; |  | 
| 279   if (writeBOM) { |  | 
| 280     encoding[i++] = UNICODE_UTF_BOM_HI; |  | 
| 281     encoding[i++] = UNICODE_UTF_BOM_LO; |  | 
| 282   } |  | 
| 283   for (int unit in utf16CodeUnits) { |  | 
| 284     encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; |  | 
| 285     encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |  | 
| 286   } |  | 
| 287   return encoding; |  | 
| 288 } |  | 
| 289 |  | 
| 290 /** |  | 
| 291  * Produce a list of UTF-16LE encoded bytes. By default, this method produces |  | 
| 292  * UTF-16LE bytes with no BOM. |  | 
| 293  */ |  | 
| 294 List<int> encodeUtf16le(String str, [bool writeBOM = false]) { |  | 
| 295   List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); |  | 
| 296   List<int> encoding = |  | 
| 297       new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); |  | 
| 298   int i = 0; |  | 
| 299   if (writeBOM) { |  | 
| 300     encoding[i++] = UNICODE_UTF_BOM_LO; |  | 
| 301     encoding[i++] = UNICODE_UTF_BOM_HI; |  | 
| 302   } |  | 
| 303   for (int unit in utf16CodeUnits) { |  | 
| 304     encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |  | 
| 305     encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; |  | 
| 306   } |  | 
| 307   return encoding; |  | 
| 308 } |  | 
| 309 |  | 
| 310 /** |  | 
| 311  * Identifies whether a List of bytes starts (based on offset) with a |  | 
| 312  * byte-order marker (BOM). |  | 
| 313  */ |  | 
| 314 bool hasUtf16Bom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |  | 
| 315   return hasUtf16beBom(utf32EncodedBytes, offset, length) || |  | 
| 316       hasUtf16leBom(utf32EncodedBytes, offset, length); |  | 
| 317 } |  | 
| 318 |  | 
| 319 /** |  | 
| 320  * Identifies whether a List of bytes starts (based on offset) with a |  | 
| 321  * big-endian byte-order marker (BOM). |  | 
| 322  */ |  | 
| 323 bool hasUtf16beBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { |  | 
| 324   int end = length != null ? offset + length : utf16EncodedBytes.length; |  | 
| 325   return (offset + 2) <= end && |  | 
| 326       utf16EncodedBytes[offset] == UNICODE_UTF_BOM_HI && |  | 
| 327       utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_LO; |  | 
| 328 } |  | 
| 329 |  | 
| 330 /** |  | 
| 331  * Identifies whether a List of bytes starts (based on offset) with a |  | 
| 332  * little-endian byte-order marker (BOM). |  | 
| 333  */ |  | 
| 334 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { |  | 
| 335   int end = length != null ? offset + length : utf16EncodedBytes.length; |  | 
| 336   return (offset + 2) <= end && |  | 
| 337       utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |  | 
| 338       utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; |  | 
| 339 } |  | 
| 340 |  | 
| 341 List<int> _stringToUtf16CodeUnits(String str) { |  | 
| 342   return _codepointsToUtf16CodeUnits(str.codeUnits); |  | 
| 343 } |  | 
| 344 |  | 
| 345 typedef _ListRangeIterator _CodeUnitsProvider(); |  | 
| 346 |  | 
| 347 /** |  | 
| 348  * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |  | 
| 349  * provides an iterator on demand and the iterator will only translate bytes |  | 
| 350  * as requested by the user of the iterator. (Note: results are not cached.) |  | 
| 351  */ |  | 
| 352 // TODO(floitsch): Consider removing the extend and switch to implements since |  | 
| 353 // that's cheaper to allocate. |  | 
| 354 class IterableUtf16Decoder extends IterableBase<int> { |  | 
| 355   final _CodeUnitsProvider codeunitsProvider; |  | 
| 356   final int replacementCodepoint; |  | 
| 357 |  | 
| 358   IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); |  | 
| 359 |  | 
| 360   Utf16CodeUnitDecoder get iterator => |  | 
| 361       new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), |  | 
| 362           replacementCodepoint); |  | 
| 363 } |  | 
| 364 |  | 
| 365 /** |  | 
| 366  * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes |  | 
| 367  * to produce the code unit (0-(2^16)-1). Relies on BOM to determine |  | 
| 368  * endian-ness, and defaults to BE. |  | 
| 369  */ |  | 
| 370 abstract class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { |  | 
| 371   final _ListRangeIterator utf16EncodedBytesIterator; |  | 
| 372   final int replacementCodepoint; |  | 
| 373   int _current = null; |  | 
| 374 |  | 
| 375   Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( |  | 
| 376       this.utf16EncodedBytesIterator, this.replacementCodepoint); |  | 
| 377 |  | 
| 378   factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |  | 
| 379       int offset = 0, int length, |  | 
| 380       int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |  | 
| 381     if (length == null) { |  | 
| 382       length = utf16EncodedBytes.length - offset; |  | 
| 383     } |  | 
| 384     if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { |  | 
| 385       return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, |  | 
| 386           length - 2, false, replacementCodepoint); |  | 
| 387     } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { |  | 
| 388       return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, |  | 
| 389           length - 2, false, replacementCodepoint); |  | 
| 390     } else { |  | 
| 391       return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, |  | 
| 392           length, false, replacementCodepoint); |  | 
| 393     } |  | 
| 394   } |  | 
| 395 |  | 
| 396   /** |  | 
| 397    * Provides a fast way to decode the rest of the source bytes in a single |  | 
| 398    * call. This method trades memory for improved speed in that it potentially |  | 
| 399    * over-allocates the List containing results. |  | 
| 400    */ |  | 
| 401   List<int> decodeRest() { |  | 
| 402     List<int> codeunits = new List<int>(remaining); |  | 
| 403     int i = 0; |  | 
| 404     while (moveNext()) { |  | 
| 405       codeunits[i++] = current; |  | 
| 406     } |  | 
| 407     if (i == codeunits.length) { |  | 
| 408       return codeunits; |  | 
| 409     } else { |  | 
| 410       List<int> truncCodeunits = new List<int>(i); |  | 
| 411       truncCodeunits.setRange(0, i, codeunits); |  | 
| 412       return truncCodeunits; |  | 
| 413     } |  | 
| 414   } |  | 
| 415 |  | 
| 416   int get current => _current; |  | 
| 417 |  | 
| 418   bool moveNext() { |  | 
| 419     _current = null; |  | 
| 420     int remaining = utf16EncodedBytesIterator.remaining; |  | 
| 421     if (remaining == 0) { |  | 
| 422       _current = null; |  | 
| 423       return false; |  | 
| 424     } |  | 
| 425     if (remaining == 1) { |  | 
| 426       utf16EncodedBytesIterator.moveNext(); |  | 
| 427       if (replacementCodepoint != null) { |  | 
| 428         _current = replacementCodepoint; |  | 
| 429         return true; |  | 
| 430       } else { |  | 
| 431         throw new ArgumentError( |  | 
| 432             "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); |  | 
| 433       } |  | 
| 434     } |  | 
| 435     _current = decode(); |  | 
| 436     return true; |  | 
| 437   } |  | 
| 438 |  | 
| 439   int get position => utf16EncodedBytesIterator.position ~/ 2; |  | 
| 440 |  | 
| 441   void backup([int by = 1]) { |  | 
| 442     utf16EncodedBytesIterator.backup(2 * by); |  | 
| 443   } |  | 
| 444 |  | 
| 445   int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; |  | 
| 446 |  | 
| 447   void skip([int count = 1]) { |  | 
| 448     utf16EncodedBytesIterator.skip(2 * count); |  | 
| 449   } |  | 
| 450 |  | 
| 451   int decode(); |  | 
| 452 } |  | 
| 453 |  | 
| 454 /** |  | 
| 455  * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes |  | 
| 456  * to produce the code unit (0-(2^16)-1). |  | 
| 457  */ |  | 
| 458 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |  | 
| 459   Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |  | 
| 460       int offset = 0, int length, bool stripBom = true, |  | 
| 461       int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |  | 
| 462       super._fromListRangeIterator( |  | 
| 463           (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |  | 
| 464           replacementCodepoint) { |  | 
| 465     if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { |  | 
| 466       skip(); |  | 
| 467     } |  | 
| 468   } |  | 
| 469 |  | 
| 470   int decode() { |  | 
| 471     utf16EncodedBytesIterator.moveNext(); |  | 
| 472     int hi = utf16EncodedBytesIterator.current; |  | 
| 473     utf16EncodedBytesIterator.moveNext(); |  | 
| 474     int lo = utf16EncodedBytesIterator.current; |  | 
| 475     return (hi << 8) + lo; |  | 
| 476   } |  | 
| 477 } |  | 
| 478 |  | 
| 479 /** |  | 
| 480  * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes |  | 
| 481  * to produce the code unit (0-(2^16)-1). |  | 
| 482  */ |  | 
| 483 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |  | 
| 484   Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |  | 
| 485       int offset = 0, int length, bool stripBom = true, |  | 
| 486       int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |  | 
| 487       super._fromListRangeIterator( |  | 
| 488           (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |  | 
| 489           replacementCodepoint) { |  | 
| 490     if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { |  | 
| 491       skip(); |  | 
| 492     } |  | 
| 493   } |  | 
| 494 |  | 
| 495   int decode() { |  | 
| 496     utf16EncodedBytesIterator.moveNext(); |  | 
| 497     int lo = utf16EncodedBytesIterator.current; |  | 
| 498     utf16EncodedBytesIterator.moveNext(); |  | 
| 499     int hi = utf16EncodedBytesIterator.current; |  | 
| 500     return (hi << 8) + lo; |  | 
| 501   } |  | 
| 502 } |  | 
| OLD | NEW | 
|---|