| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 /** | 6 /** |
| 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
| 8 * as much of the input as needed. Determines the byte order from the BOM, | 8 * as much of the input as needed. Determines the byte order from the BOM, |
| 9 * or uses big-endian as a default. This method always strips a leading BOM. | 9 * or uses big-endian as a default. This method always strips a leading BOM. |
| 10 * Set the [replacementCodepoint] to null to throw an ArgumentError | 10 * Set the [replacementCodepoint] to null to throw an ArgumentError |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always | 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always |
| 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an | 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an |
| 57 * ArgumentError rather than replace the bad value. The default | 57 * ArgumentError rather than replace the bad value. The default |
| 58 * value for the [replacementCodepoint] is U+FFFD. | 58 * value for the [replacementCodepoint] is U+FFFD. |
| 59 */ | 59 */ |
| 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, | 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, |
| 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, | 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, |
| 63 offset, length, replacementCodepoint); | 63 offset, length, replacementCodepoint); |
| 64 List<int> codeunits = decoder.decodeRest(); | 64 List<int> codeunits = decoder.decodeRest(); |
| 65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 65 return new String.fromCharCodes( |
| 66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 66 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 67 // removing after this issue is resolved. | |
| 68 if (_is16BitCodeUnit()) { | |
| 69 return new String.fromCharCodes(codeunits); | |
| 70 } else { | |
| 71 return new String.fromCharCodes( | |
| 72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 73 } | |
| 74 } | 67 } |
| 75 | 68 |
| 76 /** | 69 /** |
| 77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method | 70 * Produce a String from a sequence of UTF-16BE encoded bytes. This method |
| 78 * strips a leading BOM by default, but can be overridden by setting the | 71 * strips a leading BOM by default, but can be overridden by setting the |
| 79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 72 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 80 * null to throw an ArgumentError rather than replace the bad value. | 73 * null to throw an ArgumentError rather than replace the bad value. |
| 81 * The default value for the [replacementCodepoint] is U+FFFD. | 74 * The default value for the [replacementCodepoint] is U+FFFD. |
| 82 */ | 75 */ |
| 83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, | 76 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, |
| 84 bool stripBom = true, | 77 bool stripBom = true, |
| 85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 78 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, | 79 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, |
| 87 length, stripBom, replacementCodepoint)).decodeRest(); | 80 length, stripBom, replacementCodepoint)).decodeRest(); |
| 88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 81 return new String.fromCharCodes( |
| 89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 82 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 90 // removing after this issue is resolved. | |
| 91 if (_is16BitCodeUnit()) { | |
| 92 return new String.fromCharCodes(codeunits); | |
| 93 } else { | |
| 94 return new String.fromCharCodes( | |
| 95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 96 } | |
| 97 } | 83 } |
| 98 | 84 |
| 99 /** | 85 /** |
| 100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method | 86 * Produce a String from a sequence of UTF-16LE encoded bytes. This method |
| 101 * strips a leading BOM by default, but can be overridden by setting the | 87 * strips a leading BOM by default, but can be overridden by setting the |
| 102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 88 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
| 103 * null to throw an ArgumentError rather than replace the bad value. | 89 * null to throw an ArgumentError rather than replace the bad value. |
| 104 * The default value for the [replacementCodepoint] is U+FFFD. | 90 * The default value for the [replacementCodepoint] is U+FFFD. |
| 105 */ | 91 */ |
| 106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, | 92 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, |
| 107 bool stripBom = true, | 93 bool stripBom = true, |
| 108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 94 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, | 95 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, |
| 110 length, stripBom, replacementCodepoint)).decodeRest(); | 96 length, stripBom, replacementCodepoint)).decodeRest(); |
| 111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 97 return new String.fromCharCodes( |
| 112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 98 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
| 113 // removing after this issue is resolved. | |
| 114 if (_is16BitCodeUnit()) { | |
| 115 return new String.fromCharCodes(codeunits); | |
| 116 } else { | |
| 117 return new String.fromCharCodes( | |
| 118 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 119 } | |
| 120 } | 99 } |
| 121 | 100 |
| 122 /** | 101 /** |
| 123 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting | 102 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting |
| 124 * bytes with a big-endian byte-order-marker. | 103 * bytes with a big-endian byte-order-marker. |
| 125 */ | 104 */ |
| 126 List<int> encodeUtf16(String str) => | 105 List<int> encodeUtf16(String str) => |
| 127 encodeUtf16be(str, true); | 106 encodeUtf16be(str, true); |
| 128 | 107 |
| 129 /** | 108 /** |
| (...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 191 * little-endian byte-order marker (BOM). | 170 * little-endian byte-order marker (BOM). |
| 192 */ | 171 */ |
| 193 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { | 172 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { |
| 194 int end = length != null ? offset + length : utf16EncodedBytes.length; | 173 int end = length != null ? offset + length : utf16EncodedBytes.length; |
| 195 return (offset + 2) <= end && | 174 return (offset + 2) <= end && |
| 196 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | 175 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
| 197 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; | 176 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; |
| 198 } | 177 } |
| 199 | 178 |
| 200 List<int> _stringToUtf16CodeUnits(String str) { | 179 List<int> _stringToUtf16CodeUnits(String str) { |
| 201 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 180 return _codepointsToUtf16CodeUnits(str.charCodes); |
| 202 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | |
| 203 // removing after this issue is resolved. | |
| 204 if (_is16BitCodeUnit()) { | |
| 205 return str.charCodes; | |
| 206 } else { | |
| 207 return _codepointsToUtf16CodeUnits(str.charCodes); | |
| 208 } | |
| 209 } | 181 } |
| 210 | 182 |
| 211 typedef _ListRangeIterator _CodeUnitsProvider(); | 183 typedef _ListRangeIterator _CodeUnitsProvider(); |
| 212 | 184 |
| 213 /** | 185 /** |
| 214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | 186 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |
| 215 * provides an iterator on demand and the iterator will only translate bytes | 187 * provides an iterator on demand and the iterator will only translate bytes |
| 216 * as requested by the user of the iterator. (Note: results are not cached.) | 188 * as requested by the user of the iterator. (Note: results are not cached.) |
| 217 */ | 189 */ |
| 218 class IterableUtf16Decoder implements Iterable<int> { | 190 class IterableUtf16Decoder implements Iterable<int> { |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 343 skip(); | 315 skip(); |
| 344 } | 316 } |
| 345 } | 317 } |
| 346 | 318 |
| 347 int decode() { | 319 int decode() { |
| 348 int lo = utf16EncodedBytesIterator.next(); | 320 int lo = utf16EncodedBytesIterator.next(); |
| 349 int hi = utf16EncodedBytesIterator.next(); | 321 int hi = utf16EncodedBytesIterator.next(); |
| 350 return (hi << 8) + lo; | 322 return (hi << 8) + lo; |
| 351 } | 323 } |
| 352 } | 324 } |
| OLD | NEW |