| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 | 5 |
| 6 /** | 6 /** |
| 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | 7 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert |
| 8 * as much of the input as needed. Determines the byte order from the BOM, | 8 * as much of the input as needed. Determines the byte order from the BOM, |
| 9 * or uses big-endian as a default. This method always strips a leading BOM. | 9 * or uses big-endian as a default. This method always strips a leading BOM. |
| 10 * Set the [replacementCodepoint] to null to throw an ArgumentError | 10 * Set the [replacementCodepoint] to null to throw an ArgumentError |
| (...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | 214 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type |
| 215 * provides an iterator on demand and the iterator will only translate bytes | 215 * provides an iterator on demand and the iterator will only translate bytes |
| 216 * as requested by the user of the iterator. (Note: results are not cached.) | 216 * as requested by the user of the iterator. (Note: results are not cached.) |
| 217 */ | 217 */ |
| 218 class IterableUtf16Decoder extends Iterable<int> { | 218 class IterableUtf16Decoder extends Iterable<int> { |
| 219 final _CodeUnitsProvider codeunitsProvider; | 219 final _CodeUnitsProvider codeunitsProvider; |
| 220 final int replacementCodepoint; | 220 final int replacementCodepoint; |
| 221 | 221 |
| 222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); | 222 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); |
| 223 | 223 |
| 224 Utf16CodeUnitDecoder iterator() => | 224 Utf16CodeUnitDecoder get iterator => |
| 225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), | 225 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), |
| 226 replacementCodepoint); | 226 replacementCodepoint); |
| 227 } | 227 } |
| 228 | 228 |
| 229 /** | 229 /** |
| 230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes | 230 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes |
| 231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine | 231 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine |
| 232 * endian-ness, and defaults to BE. | 232 * endian-ness, and defaults to BE. |
| 233 */ | 233 */ |
| 234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { | 234 class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { |
| 235 final _ListRangeIterator utf16EncodedBytesIterator; | 235 final _ListRangeIterator utf16EncodedBytesIterator; |
| 236 final int replacementCodepoint; | 236 final int replacementCodepoint; |
| 237 int _current = -1; |
| 237 | 238 |
| 238 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( | 239 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( |
| 239 this.utf16EncodedBytesIterator, this.replacementCodepoint); | 240 this.utf16EncodedBytesIterator, this.replacementCodepoint); |
| 240 | 241 |
| 241 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 242 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 242 int offset = 0, int length, | 243 int offset = 0, int length, |
| 243 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 244 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 244 if (length == null) { | 245 if (length == null) { |
| 245 length = utf16EncodedBytes.length - offset; | 246 length = utf16EncodedBytes.length - offset; |
| 246 } | 247 } |
| (...skipping 10 matching lines...) Expand all Loading... |
| 257 } | 258 } |
| 258 | 259 |
| 259 /** | 260 /** |
| 260 * Provides a fast way to decode the rest of the source bytes in a single | 261 * Provides a fast way to decode the rest of the source bytes in a single |
| 261 * call. This method trades memory for improved speed in that it potentially | 262 * call. This method trades memory for improved speed in that it potentially |
| 262 * over-allocates the List containing results. | 263 * over-allocates the List containing results. |
| 263 */ | 264 */ |
| 264 List<int> decodeRest() { | 265 List<int> decodeRest() { |
| 265 List<int> codeunits = new List<int>(remaining); | 266 List<int> codeunits = new List<int>(remaining); |
| 266 int i = 0; | 267 int i = 0; |
| 267 while (hasNext) { | 268 while (moveNext()) { |
| 268 codeunits[i++] = next(); | 269 codeunits[i++] = current; |
| 269 } | 270 } |
| 270 if (i == codeunits.length) { | 271 if (i == codeunits.length) { |
| 271 return codeunits; | 272 return codeunits; |
| 272 } else { | 273 } else { |
| 273 List<int> truncCodeunits = new List<int>(i); | 274 List<int> truncCodeunits = new List<int>(i); |
| 274 truncCodeunits.setRange(0, i, codeunits); | 275 truncCodeunits.setRange(0, i, codeunits); |
| 275 return truncCodeunits; | 276 return truncCodeunits; |
| 276 } | 277 } |
| 277 } | 278 } |
| 278 | 279 |
| 279 bool get hasNext => utf16EncodedBytesIterator.hasNext; | 280 int get current { |
| 281 if (_current == -1) { |
| 282 // TODO(floitsch): bad error message. |
| 283 throw new StateError("No more elements"); |
| 284 } |
| 285 } |
| 280 | 286 |
| 281 int next() { | 287 bool moveNext() { |
| 288 _current = -1; |
| 282 if (utf16EncodedBytesIterator.remaining < 2) { | 289 if (utf16EncodedBytesIterator.remaining < 2) { |
| 283 utf16EncodedBytesIterator.next(); | 290 utf16EncodedBytesIterator.moveNext(); |
| 284 if (replacementCodepoint != null) { | 291 if (replacementCodepoint != null) { |
| 285 return replacementCodepoint; | 292 _current = replacementCodepoint; |
| 293 return true; |
| 286 } else { | 294 } else { |
| 287 throw new ArgumentError( | 295 throw new ArgumentError( |
| 288 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); | 296 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); |
| 289 } | 297 } |
| 290 } else { | 298 } else { |
| 291 return decode(); | 299 _current = decode(); |
| 300 return true; |
| 292 } | 301 } |
| 293 } | 302 } |
| 294 | 303 |
| 295 int get position => utf16EncodedBytesIterator.position ~/ 2; | 304 int get position => utf16EncodedBytesIterator.position ~/ 2; |
| 296 | 305 |
| 297 void backup([int by = 1]) { | 306 void backup([int by = 1]) { |
| 298 utf16EncodedBytesIterator.backup(2 * by); | 307 utf16EncodedBytesIterator.backup(2 * by); |
| 299 } | 308 } |
| 300 | 309 |
| 301 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; | 310 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; |
| 302 | 311 |
| 303 void skip([int count = 1]) { | 312 void skip([int count = 1]) { |
| 304 utf16EncodedBytesIterator.skip(2 * count); | 313 utf16EncodedBytesIterator.skip(2 * count); |
| 305 } | 314 } |
| 306 | 315 |
| 307 abstract int decode(); | 316 abstract int decode(); |
| 308 } | 317 } |
| 309 | 318 |
| 310 /** | 319 /** |
| 311 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes | 320 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 312 * to produce the code unit (0-(2^16)-1). | 321 * to produce the code unit (0-(2^16)-1). |
| 313 */ | 322 */ |
| 314 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 323 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 315 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 324 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 316 int offset = 0, int length, bool stripBom = true, | 325 int offset = 0, int length, bool stripBom = true, |
| 317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 326 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 318 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, | 327 super._fromListRangeIterator( |
| 319 length)).iterator(), replacementCodepoint) { | 328 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 329 replacementCodepoint) { |
| 320 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { | 330 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { |
| 321 skip(); | 331 skip(); |
| 322 } | 332 } |
| 323 } | 333 } |
| 324 | 334 |
| 325 int decode() { | 335 int decode() { |
| 326 int hi = utf16EncodedBytesIterator.next(); | 336 utf16EncodedBytesIterator.moveNext(); |
| 327 int lo = utf16EncodedBytesIterator.next(); | 337 int hi = utf16EncodedBytesIterator.current; |
| 338 utf16EncodedBytesIterator.moveNext(); |
| 339 int lo = utf16EncodedBytesIterator.current; |
| 328 return (hi << 8) + lo; | 340 return (hi << 8) + lo; |
| 329 } | 341 } |
| 330 } | 342 } |
| 331 | 343 |
| 332 /** | 344 /** |
| 333 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes | 345 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes |
| 334 * to produce the code unit (0-(2^16)-1). | 346 * to produce the code unit (0-(2^16)-1). |
| 335 */ | 347 */ |
| 336 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | 348 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { |
| 337 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | 349 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ |
| 338 int offset = 0, int length, bool stripBom = true, | 350 int offset = 0, int length, bool stripBom = true, |
| 339 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 351 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 340 super._fromListRangeIterator((new _ListRange(utf16EncodedBytes, offset, | 352 super._fromListRangeIterator( |
| 341 length)).iterator(), replacementCodepoint) { | 353 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, |
| 354 replacementCodepoint) { |
| 342 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { | 355 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { |
| 343 skip(); | 356 skip(); |
| 344 } | 357 } |
| 345 } | 358 } |
| 346 | 359 |
| 347 int decode() { | 360 int decode() { |
| 348 int lo = utf16EncodedBytesIterator.next(); | 361 utf16EncodedBytesIterator.moveNext(); |
| 349 int hi = utf16EncodedBytesIterator.next(); | 362 int lo = utf16EncodedBytesIterator.current; |
| 363 utf16EncodedBytesIterator.moveNext(); |
| 364 int hi = utf16EncodedBytesIterator.current; |
| 350 return (hi << 8) + lo; | 365 return (hi << 8) + lo; |
| 351 } | 366 } |
| 352 } | 367 } |
| OLD | NEW |