| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.utf; | 5 part of dart.utf; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 8 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
| 9 * as much of the input as needed. Determines the byte order from the BOM, | 9 * as much of the input as needed. Determines the byte order from the BOM, |
| 10 * or uses big-endian as a default. This method always strips a leading BOM. | 10 * or uses big-endian as a default. This method always strips a leading BOM. |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 106 List<int> encodeUtf16(String str) => | 106 List<int> encodeUtf16(String str) => |
| 107 encodeUtf16be(str, true); | 107 encodeUtf16be(str, true); |
| 108 | 108 |
| 109 /** | 109 /** |
| 110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces | 110 * Produce a list of UTF-16BE encoded bytes. By default, this method produces |
| 111 * UTF-16BE bytes with no BOM. | 111 * UTF-16BE bytes with no BOM. |
| 112 */ | 112 */ |
| 113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { | 113 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { |
| 114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); | 114 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); |
| 115 List<int> encoding = | 115 List<int> encoding = |
| 116 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); | 116 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); |
| 117 int i = 0; | 117 int i = 0; |
| 118 if (writeBOM) { | 118 if (writeBOM) { |
| 119 encoding[i++] = UNICODE_UTF_BOM_HI; | 119 encoding[i++] = UNICODE_UTF_BOM_HI; |
| 120 encoding[i++] = UNICODE_UTF_BOM_LO; | 120 encoding[i++] = UNICODE_UTF_BOM_LO; |
| 121 } | 121 } |
| 122 for (int unit in utf16CodeUnits) { | 122 for (int unit in utf16CodeUnits) { |
| 123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; | 123 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; |
| 124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 124 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
| 125 } | 125 } |
| 126 return encoding; | 126 return encoding; |
| 127 } | 127 } |
| 128 | 128 |
| 129 /** | 129 /** |
| 130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces | 130 * Produce a list of UTF-16LE encoded bytes. By default, this method produces |
| 131 * UTF-16LE bytes with no BOM. | 131 * UTF-16LE bytes with no BOM. |
| 132 */ | 132 */ |
| 133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) { | 133 List<int> encodeUtf16le(String str, [bool writeBOM = false]) { |
| 134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); | 134 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); |
| 135 List<int> encoding = | 135 List<int> encoding = |
| 136 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); | 136 new List<int>.fixedLength(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); |
| 137 int i = 0; | 137 int i = 0; |
| 138 if (writeBOM) { | 138 if (writeBOM) { |
| 139 encoding[i++] = UNICODE_UTF_BOM_LO; | 139 encoding[i++] = UNICODE_UTF_BOM_LO; |
| 140 encoding[i++] = UNICODE_UTF_BOM_HI; | 140 encoding[i++] = UNICODE_UTF_BOM_HI; |
| 141 } | 141 } |
| 142 for (int unit in utf16CodeUnits) { | 142 for (int unit in utf16CodeUnits) { |
| 143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 143 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
| 144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; | 144 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; |
| 145 } | 145 } |
| 146 return encoding; | 146 return encoding; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 181 return _codepointsToUtf16CodeUnits(str.charCodes); | 181 return _codepointsToUtf16CodeUnits(str.charCodes); |
| 182 } | 182 } |
| 183 | 183 |
| 184 typedef _ListRangeIterator _CodeUnitsProvider(); | 184 typedef _ListRangeIterator _CodeUnitsProvider(); |
| 185 | 185 |
| 186 /** | 186 /** |
| 187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | 187 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |
| 188 * provides an iterator on demand and the iterator will only translate bytes | 188 * provides an iterator on demand and the iterator will only translate bytes |
| 189 * as requested by the user of the iterator. (Note: results are not cached.) | 189 * as requested by the user of the iterator. (Note: results are not cached.) |
| 190 */ | 190 */ |
| 191 class IterableUtf16Decoder implements Iterable<int> { | 191 // TODO(floitsch): Consider removing the extend and switch to implements since |
| 192 // that's cheaper to allocate. |
| 193 class IterableUtf16Decoder extends Iterable<int> { |
| 192 final _CodeUnitsProvider codeunitsProvider; | 194 final _CodeUnitsProvider codeunitsProvider; |
| 193 final int replacementCodepoint; | 195 final int replacementCodepoint; |
| 194 | 196 |
| 195 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); | 197 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); |
| 196 | 198 |
| 197 Utf16CodeUnitDecoder iterator() => | 199 Utf16CodeUnitDecoder get iterator => |
| 198 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), | 200 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), |
| 199 replacementCodepoint); | 201 replacementCodepoint); |
| 200 } | 202 } |
| 201 | 203 |
| 202 /** | 204 /** |
| 203 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes | 205 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes |
| 204 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine | 206 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine |
| 205 * endian-ness, and defaults to BE. | 207 * endian-ness, and defaults to BE. |
| 206 */ | 208 */ |
| 207 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { | 209 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { |
| 208 final _ListRangeIterator utf16EncodedBytesIterator; | 210 final _ListRangeIterator utf16EncodedBytesIterator; |
| 209 final int replacementCodepoint; | 211 final int replacementCodepoint; |
| 212 int _current = null; |
| 210 | 213 |
| 211 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( | 214 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( |
| 212 this.utf16EncodedBytesIterator, this.replacementCodepoint); | 215 this.utf16EncodedBytesIterator, this.replacementCodepoint); |
| 213 | 216 |
| 214 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 217 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 215 int offset = 0, int length, | 218 int offset = 0, int length, |
| 216 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 219 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 217 if (length == null) { | 220 if (length == null) { |
| 218 length = utf16EncodedBytes.length - offset; | 221 length = utf16EncodedBytes.length - offset; |
| 219 } | 222 } |
| 220 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { | 223 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { |
| 221 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, | 224 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, |
| 222 length - 2, false, replacementCodepoint); | 225 length - 2, false, replacementCodepoint); |
| 223 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { | 226 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { |
| 224 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, | 227 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, |
| 225 length - 2, false, replacementCodepoint); | 228 length - 2, false, replacementCodepoint); |
| 226 } else { | 229 } else { |
| 227 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, | 230 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, |
| 228 length, false, replacementCodepoint); | 231 length, false, replacementCodepoint); |
| 229 } | 232 } |
| 230 } | 233 } |
| 231 | 234 |
| 232 /** | 235 /** |
| 233 * Provides a fast way to decode the rest of the source bytes in a single | 236 * Provides a fast way to decode the rest of the source bytes in a single |
| 234 * call. This method trades memory for improved speed in that it potentially | 237 * call. This method trades memory for improved speed in that it potentially |
| 235 * over-allocates the List containing results. | 238 * over-allocates the List containing results. |
| 236 */ | 239 */ |
| 237 List<int> decodeRest() { | 240 List<int> decodeRest() { |
| 238 List<int> codeunits = new List<int>(remaining); | 241 List<int> codeunits = new List<int>.fixedLength(remaining); |
| 239 int i = 0; | 242 int i = 0; |
| 240 while (hasNext) { | 243 while (moveNext()) { |
| 241 codeunits[i++] = next(); | 244 codeunits[i++] = current; |
| 242 } | 245 } |
| 243 if (i == codeunits.length) { | 246 if (i == codeunits.length) { |
| 244 return codeunits; | 247 return codeunits; |
| 245 } else { | 248 } else { |
| 246 List<int> truncCodeunits = new List<int>(i); | 249 List<int> truncCodeunits = new List<int>.fixedLength(i); |
| 247 truncCodeunits.setRange(0, i, codeunits); | 250 truncCodeunits.setRange(0, i, codeunits); |
| 248 return truncCodeunits; | 251 return truncCodeunits; |
| 249 } | 252 } |
| 250 } | 253 } |
| 251 | 254 |
| 252 bool get hasNext => utf16EncodedBytesIterator.hasNext; | 255 int get current => _current; |
| 253 | 256 |
| 254 int next() { | 257 bool moveNext() { |
| 258 _current = null; |
| 255 if (utf16EncodedBytesIterator.remaining < 2) { | 259 if (utf16EncodedBytesIterator.remaining < 2) { |
| 256 utf16EncodedBytesIterator.next(); | 260 utf16EncodedBytesIterator.moveNext(); |
| 257 if (replacementCodepoint != null) { | 261 if (replacementCodepoint != null) { |
| 258 return replacementCodepoint; | 262 _current = replacementCodepoint; |
| 263 return true; |
| 259 } else { | 264 } else { |
| 260 throw new ArgumentError( | 265 throw new ArgumentError( |
| 261 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); | 266 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); |
| 262 } | 267 } |
| 263 } else { | 268 } else { |
| 264 return decode(); | 269 _current = decode(); |
| 270 return true; |
| 265 } | 271 } |
| 266 } | 272 } |
| 267 | 273 |
| 268 int get position => utf16EncodedBytesIterator.position ~/ 2; | 274 int get position => utf16EncodedBytesIterator.position ~/ 2; |
| 269 | 275 |
| 270 void backup([int by = 1]) { | 276 void backup([int by = 1]) { |
| 271 utf16EncodedBytesIterator.backup(2 * by); | 277 utf16EncodedBytesIterator.backup(2 * by); |
| 272 } | 278 } |
| 273 | 279 |
| 274 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; | 280 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; |
| 275 | 281 |
| 276 void skip([int count = 1]) { | 282 void skip([int count = 1]) { |
| 277 utf16EncodedBytesIterator.skip(2 * count); | 283 utf16EncodedBytesIterator.skip(2 * count); |
| 278 } | 284 } |
| 279 | 285 |
| 280 int decode(); | 286 int decode(); |
| 281 } | 287 } |
| 282 | 288 |
| 283 /** | 289 /** |
| 284 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes | 290 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 285 * to produce the code unit (0-(2^16)-1). | 291 * to produce the code unit (0-(2^16)-1). |
| 286 */ | 292 */ |
| 287 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 293 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 288 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 294 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 289 int offset = 0, int length, bool stripBom = true, | 295 int offset = 0, int length, bool stripBom = true, |
| 290 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 296 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 291 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, | 297 super._fromListRangeIterator( |
| 292 length)).iterator(), replacementCodepoint) { | 298 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 299 replacementCodepoint) { |
| 293 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { | 300 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { |
| 294 skip(); | 301 skip(); |
| 295 } | 302 } |
| 296 } | 303 } |
| 297 | 304 |
| 298 int decode() { | 305 int decode() { |
| 299 int hi = utf16EncodedBytesIterator.next(); | 306 utf16EncodedBytesIterator.moveNext(); |
| 300 int lo = utf16EncodedBytesIterator.next(); | 307 int hi = utf16EncodedBytesIterator.current; |
| 308 utf16EncodedBytesIterator.moveNext(); |
| 309 int lo = utf16EncodedBytesIterator.current; |
| 301 return (hi << 8) + lo; | 310 return (hi << 8) + lo; |
| 302 } | 311 } |
| 303 } | 312 } |
| 304 | 313 |
| 305 /** | 314 /** |
| 306 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes | 315 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 307 * to produce the code unit (0-(2^16)-1). | 316 * to produce the code unit (0-(2^16)-1). |
| 308 */ | 317 */ |
| 309 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 318 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 310 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 319 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 311 int offset = 0, int length, bool stripBom = true, | 320 int offset = 0, int length, bool stripBom = true, |
| 312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 321 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 313 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, | 322 super._fromListRangeIterator( |
| 314 length)).iterator(), replacementCodepoint) { | 323 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 324 replacementCodepoint) { |
| 315 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { | 325 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { |
| 316 skip(); | 326 skip(); |
| 317 } | 327 } |
| 318 } | 328 } |
| 319 | 329 |
| 320 int decode() { | 330 int decode() { |
| 321 int lo = utf16EncodedBytesIterator.next(); | 331 utf16EncodedBytesIterator.moveNext(); |
| 322 int hi = utf16EncodedBytesIterator.next(); | 332 int lo = utf16EncodedBytesIterator.current; |
| 333 utf16EncodedBytesIterator.moveNext(); |
| 334 int hi = utf16EncodedBytesIterator.current; |
| 323 return (hi << 8) + lo; | 335 return (hi << 8) + lo; |
| 324 } | 336 } |
| 325 } | 337 } |
| OLD | NEW |