OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 | 5 |
6 /** | 6 /** |
7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
8 * as much of the input as needed. Determines the byte order from the BOM, | 8 * as much of the input as needed. Determines the byte order from the BOM, |
9 * or uses big-endian as a default. This method always strips a leading BOM. | 9 * or uses big-endian as a default. This method always strips a leading BOM. |
10 * Set the [replacementCodepoint] to null to throw an ArgumentError | 10 * Set the [replacementCodepoint] to null to throw an ArgumentError |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always | 55 * Produce a String from a sequence of UTF-16 encoded bytes. This method always |
56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an | 56 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an |
57 * ArgumentError rather than replace the bad value. The default | 57 * ArgumentError rather than replace the bad value. The default |
58 * value for the [replacementCodepoint] is U+FFFD. | 58 * value for the [replacementCodepoint] is U+FFFD. |
59 */ | 59 */ |
60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, | 60 String decodeUtf16(List<int> bytes, [int offset = 0, int length, |
61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 61 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, | 62 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, |
63 offset, length, replacementCodepoint); | 63 offset, length, replacementCodepoint); |
64 List<int> codeunits = decoder.decodeRest(); | 64 List<int> codeunits = decoder.decodeRest(); |
65 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 65 return new String.fromCharCodes( |
66 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 66 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
67 // removing after this issue is resolved. | |
68 if (_is16BitCodeUnit()) { | |
69 return new String.fromCharCodes(codeunits); | |
70 } else { | |
71 return new String.fromCharCodes( | |
72 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
73 } | |
74 } | 67 } |
75 | 68 |
76 /** | 69 /** |
77 * Produce a String from a sequence of UTF-16BE encoded bytes. This method | 70 * Produce a String from a sequence of UTF-16BE encoded bytes. This method |
78 * strips a leading BOM by default, but can be overridden by setting the | 71 * strips a leading BOM by default, but can be overridden by setting the |
79 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 72 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
80 * null to throw an ArgumentError rather than replace the bad value. | 73 * null to throw an ArgumentError rather than replace the bad value. |
81 * The default value for the [replacementCodepoint] is U+FFFD. | 74 * The default value for the [replacementCodepoint] is U+FFFD. |
82 */ | 75 */ |
83 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, | 76 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, |
84 bool stripBom = true, | 77 bool stripBom = true, |
85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 78 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
86 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, | 79 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, |
87 length, stripBom, replacementCodepoint)).decodeRest(); | 80 length, stripBom, replacementCodepoint)).decodeRest(); |
88 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 81 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
89 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 82 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
90 // removing after this issue is resolved. | 83 // removing after this issue is resolved. |
91 if (_is16BitCodeUnit()) { | 84 return new String.fromCharCodes( |
92 return new String.fromCharCodes(codeunits); | 85 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
93 } else { | |
94 return new String.fromCharCodes( | |
95 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
96 } | |
97 } | 86 } |
98 | 87 |
99 /** | 88 /** |
100 * Produce a String from a sequence of UTF-16LE encoded bytes. This method | 89 * Produce a String from a sequence of UTF-16LE encoded bytes. This method |
101 * strips a leading BOM by default, but can be overridden by setting the | 90 * strips a leading BOM by default, but can be overridden by setting the |
102 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | 91 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to |
103 * null to throw an ArgumentError rather than replace the bad value. | 92 * null to throw an ArgumentError rather than replace the bad value. |
104 * The default value for the [replacementCodepoint] is U+FFFD. | 93 * The default value for the [replacementCodepoint] is U+FFFD. |
105 */ | 94 */ |
106 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, | 95 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, |
107 bool stripBom = true, | 96 bool stripBom = true, |
108 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 97 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
109 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, | 98 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, |
110 length, stripBom, replacementCodepoint)).decodeRest(); | 99 length, stripBom, replacementCodepoint)).decodeRest(); |
111 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 100 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
112 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 101 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
113 // removing after this issue is resolved. | 102 // removing after this issue is resolved. |
114 if (_is16BitCodeUnit()) { | 103 return new String.fromCharCodes( |
115 return new String.fromCharCodes(codeunits); | 104 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); |
116 } else { | |
117 return new String.fromCharCodes( | |
118 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
119 } | |
120 } | 105 } |
121 | 106 |
122 /** | 107 /** |
123 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting | 108 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting |
124 * bytes with a big-endian byte-order-marker. | 109 * bytes with a big-endian byte-order-marker. |
125 */ | 110 */ |
126 List<int> encodeUtf16(String str) => | 111 List<int> encodeUtf16(String str) => |
127 encodeUtf16be(str, true); | 112 encodeUtf16be(str, true); |
128 | 113 |
129 /** | 114 /** |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
194 int end = length != null ? offset + length : utf16EncodedBytes.length; | 179 int end = length != null ? offset + length : utf16EncodedBytes.length; |
195 return (offset + 2) <= end && | 180 return (offset + 2) <= end && |
196 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | 181 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
197 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; | 182 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; |
198 } | 183 } |
199 | 184 |
200 List<int> _stringToUtf16CodeUnits(String str) { | 185 List<int> _stringToUtf16CodeUnits(String str) { |
201 // TODO is16BitCodeUnit() is used to work around a bug with dart2js | 186 // TODO is16BitCodeUnit() is used to work around a bug with dart2js |
202 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider | 187 // (http://code.google.com/p/dart/issues/detail?id=1357). Consider |
203 // removing after this issue is resolved. | 188 // removing after this issue is resolved. |
204 if (_is16BitCodeUnit()) { | 189 return _codepointsToUtf16CodeUnits(str.charCodes); |
205 return str.charCodes; | |
206 } else { | |
207 return _codepointsToUtf16CodeUnits(str.charCodes); | |
208 } | |
209 } | 190 } |
210 | 191 |
211 typedef _ListRangeIterator _CodeUnitsProvider(); | 192 typedef _ListRangeIterator _CodeUnitsProvider(); |
212 | 193 |
213 /** | 194 /** |
214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | 195 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |
215 * provides an iterator on demand and the iterator will only translate bytes | 196 * provides an iterator on demand and the iterator will only translate bytes |
216 * as requested by the user of the iterator. (Note: results are not cached.) | 197 * as requested by the user of the iterator. (Note: results are not cached.) |
217 */ | 198 */ |
218 class IterableUtf16Decoder implements Iterable<int> { | 199 class IterableUtf16Decoder implements Iterable<int> { |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
343 skip(); | 324 skip(); |
344 } | 325 } |
345 } | 326 } |
346 | 327 |
347 int decode() { | 328 int decode() { |
348 int lo = utf16EncodedBytesIterator.next(); | 329 int lo = utf16EncodedBytesIterator.next(); |
349 int hi = utf16EncodedBytesIterator.next(); | 330 int hi = utf16EncodedBytesIterator.next(); |
350 return (hi << 8) + lo; | 331 return (hi << 8) + lo; |
351 } | 332 } |
352 } | 333 } |
OLD | NEW |