| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 /** | 6 /** |
| 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
| 8 * as much of the input as needed. Determines the byte order from the BOM, | 8 * as much of the input as needed. Determines the byte order from the BOM, |
| 9 * or uses big-endian as a default. This method always strips a leading BOM. | 9 * or uses big-endian as a default. This method always strips a leading BOM. |
| 10 * Set the [replacementCodepoint] to null to throw an IllegalArgumentException | 10 * Set the [replacementCodepoint] to null to throw an ArgumentError |
| 11 * rather than replace the bad value. The default value for | 11 * rather than replace the bad value. The default value for |
| 12 * [replacementCodepoint] is U+FFFD. | 12 * [replacementCodepoint] is U+FFFD. |
| 13 */ | 13 */ |
| 14 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, | 14 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, |
| 15 int length, int replacementCodepoint = | 15 int length, int replacementCodepoint = |
| 16 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 16 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 17 return new IterableUtf16Decoder._( | 17 return new IterableUtf16Decoder._( |
| 18 () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length, | 18 () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length, |
| 19 replacementCodepoint), replacementCodepoint); | 19 replacementCodepoint), replacementCodepoint); |
| 20 } | 20 } |
| 21 | 21 |
| 22 /** | 22 /** |
| 23 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only | 23 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only |
| 24 * convert as much of the input as needed. This method strips a leading BOM by | 24 * convert as much of the input as needed. This method strips a leading BOM by |
| 25 * default, but can be overridden by setting the optional parameter [stripBom] | 25 * default, but can be overridden by setting the optional parameter [stripBom] |
| 26 * to false. Set the [replacementCodepoint] to null to throw an | 26 * to false. Set the [replacementCodepoint] to null to throw an |
| 27 * IllegalArgumentException rather than replace the bad value. The default | 27 * ArgumentError rather than replace the bad value. The default |
| 28 * value for the [replacementCodepoint] is U+FFFD. | 28 * value for the [replacementCodepoint] is U+FFFD. |
| 29 */ | 29 */ |
| 30 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0, | 30 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0, |
| 31 int length, bool stripBom = true, int replacementCodepoint = | 31 int length, bool stripBom = true, int replacementCodepoint = |
| 32 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 32 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 33 return new IterableUtf16Decoder._( | 33 return new IterableUtf16Decoder._( |
| 34 () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, | 34 () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, |
| 35 replacementCodepoint), replacementCodepoint); | 35 replacementCodepoint), replacementCodepoint); |
| 36 } | 36 } |
| 37 | 37 |
| 38 /** | 38 /** |
| 39 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only | 39 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only |
| 40 * convert as much of the input as needed. This method strips a leading BOM by | 40 * convert as much of the input as needed. This method strips a leading BOM by |
| 41 * default, but can be overridden by setting the optional parameter [stripBom] | 41 * default, but can be overridden by setting the optional parameter [stripBom] |
| 42 * to false. Set the [replacementCodepoint] to null to throw an | 42 * to false. Set the [replacementCodepoint] to null to throw an |
| 43 * IllegalArgumentException rather than replace the bad value. The default | 43 * ArgumentError rather than replace the bad value. The default |
| 44 * value for the [replacementCodepoint] is U+FFFD. | 44 * value for the [replacementCodepoint] is U+FFFD. |
| 45 */ | 45 */ |
| 46 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0, | 46 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0, |
| 47 int length, bool stripBom = true, int replacementCodepoint = | 47 int length, bool stripBom = true, int replacementCodepoint = |
| 48 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 48 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 49 return new IterableUtf16Decoder._( | 49 return new IterableUtf16Decoder._( |
| 50 () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, | 50 () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, |
| 51 replacementCodepoint), replacementCodepoint); | 51 replacementCodepoint), replacementCodepoint); |
| 52 } | 52 } |
| 53 | 53 |
| 54 /** | 54 /** |
| 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always | 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always |
| 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an | 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an |
| 57 * IllegalArgumentException rather than replace the bad value. The default | 57 * ArgumentError rather than replace the bad value. The default |
| 58 * value for the [replacementCodepoint] is U+FFFD. | 58 * value for the [replacementCodepoint] is U+FFFD. |
| 59 */ | 59 */ |
| 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, | 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, |
| 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, | 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, |
| 63 offset, length, replacementCodepoint); | 63 offset, length, replacementCodepoint); |
| 64 List<int> codeunits = decoder.decodeRest(); | 64 List<int> codeunits = decoder.decodeRest(); |
| 65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
| 66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
| 67 // removing after this issue is resolved. | 67 // removing after this issue is resolved. |
| 68 if (_is16BitCodeUnit()) { | 68 if (_is16BitCodeUnit()) { |
| 69 return new String.fromCharCodes(codeunits); | 69 return new String.fromCharCodes(codeunits); |
| 70 } else { | 70 } else { |
| 71 return new String.fromCharCodes( | 71 return new String.fromCharCodes( |
| 72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | 72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 73 } | 73 } |
| 74 } | 74 } |
| 75 | 75 |
| 76 /** | 76 /** |
| 77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method | 77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method |
| 78 * strips a leading BOM by default, but can be overridden by setting the | 78 * strips a leading BOM by default, but can be overridden by setting the |
| 79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 80 * null to throw an IllegalArgumentException rather than replace the bad value. | 80 * null to throw an ArgumentError rather than replace the bad value. |
| 81 * The default value for the [replacementCodepoint] is U+FFFD. | 81 * The default value for the [replacementCodepoint] is U+FFFD. |
| 82 */ | 82 */ |
| 83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, | 83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, |
| 84 bool stripBom = true, | 84 bool stripBom = true, |
| 85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, | 86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, |
| 87 length, stripBom, replacementCodepoint)).decodeRest(); | 87 length, stripBom, replacementCodepoint)).decodeRest(); |
| 88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
| 89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
| 90 // removing after this issue is resolved. | 90 // removing after this issue is resolved. |
| 91 if (_is16BitCodeUnit()) { | 91 if (_is16BitCodeUnit()) { |
| 92 return new String.fromCharCodes(codeunits); | 92 return new String.fromCharCodes(codeunits); |
| 93 } else { | 93 } else { |
| 94 return new String.fromCharCodes( | 94 return new String.fromCharCodes( |
| 95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | 95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 96 } | 96 } |
| 97 } | 97 } |
| 98 | 98 |
| 99 /** | 99 /** |
| 100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method | 100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method |
| 101 * strips a leading BOM by default, but can be overridden by setting the | 101 * strips a leading BOM by default, but can be overridden by setting the |
| 102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 103 * null to throw an IllegalArgumentException rather than replace the bad value. | 103 * null to throw an ArgumentError rather than replace the bad value. |
| 104 * The default value for the [replacementCodepoint] is U+FFFD. | 104 * The default value for the [replacementCodepoint] is U+FFFD. |
| 105 */ | 105 */ |
| 106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, | 106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, |
| 107 bool stripBom = true, | 107 bool stripBom = true, |
| 108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, | 109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, |
| 110 length, stripBom, replacementCodepoint)).decodeRest(); | 110 length, stripBom, replacementCodepoint)).decodeRest(); |
| 111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
| 112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
| 113 // removing after this issue is resolved. | 113 // removing after this issue is resolved. |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 277 } | 277 } |
| 278 | 278 |
| 279 bool hasNext() => utf16EncodedBytesIterator.hasNext(); | 279 bool hasNext() => utf16EncodedBytesIterator.hasNext(); |
| 280 | 280 |
| 281 int next() { | 281 int next() { |
| 282 if (utf16EncodedBytesIterator.remaining < 2) { | 282 if (utf16EncodedBytesIterator.remaining < 2) { |
| 283 utf16EncodedBytesIterator.next(); | 283 utf16EncodedBytesIterator.next(); |
| 284 if (replacementCodepoint != null) { | 284 if (replacementCodepoint != null) { |
| 285 return replacementCodepoint; | 285 return replacementCodepoint; |
| 286 } else { | 286 } else { |
| 287 throw new IllegalArgumentException( | 287 throw new ArgumentError( |
| 288 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); | 288 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); |
| 289 } | 289 } |
| 290 } else { | 290 } else { |
| 291 return decode(); | 291 return decode(); |
| 292 } | 292 } |
| 293 } | 293 } |
| 294 | 294 |
| 295 int get position => utf16EncodedBytesIterator.position ~/ 2; | 295 int get position => utf16EncodedBytesIterator.position ~/ 2; |
| 296 | 296 |
| 297 void backup([int by = 1]) { | 297 void backup([int by = 1]) { |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 343 skip(); | 343 skip(); |
| 344 } | 344 } |
| 345 } | 345 } |
| 346 | 346 |
| 347 int decode() { | 347 int decode() { |
| 348 int lo = utf16EncodedBytesIterator.next(); | 348 int lo = utf16EncodedBytesIterator.next(); |
| 349 int hi = utf16EncodedBytesIterator.next(); | 349 int hi = utf16EncodedBytesIterator.next(); |
| 350 return (hi << 8) + lo; | 350 return (hi << 8) + lo; |
| 351 } | 351 } |
| 352 } | 352 } |
| OLD | NEW |