| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of utf; | 5 part of utf; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert | 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
| 9 * as much of the input as needed. Determines the byte order from the BOM, | 9 * as much of the input as needed. Determines the byte order from the BOM, |
| 10 * or uses big-endian as a default. This method always strips a leading BOM. | 10 * or uses big-endian as a default. This method always strips a leading BOM. |
| (...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 188 final Utf32BytesDecoderProvider codeunitsProvider; | 188 final Utf32BytesDecoderProvider codeunitsProvider; |
| 189 | 189 |
| 190 IterableUtf32Decoder._(this.codeunitsProvider); | 190 IterableUtf32Decoder._(this.codeunitsProvider); |
| 191 | 191 |
| 192 Utf32BytesDecoder get iterator => codeunitsProvider(); | 192 Utf32BytesDecoder get iterator => codeunitsProvider(); |
| 193 } | 193 } |
| 194 | 194 |
| 195 /** | 195 /** |
| 196 * Abstrace parent class converts encoded bytes to codepoints. | 196 * Abstrace parent class converts encoded bytes to codepoints. |
| 197 */ | 197 */ |
| 198 abstract class Utf32BytesDecoder implements _ListRangeIterator { | 198 abstract class Utf32BytesDecoder implements ListRangeIterator { |
| 199 final _ListRangeIterator utf32EncodedBytesIterator; | 199 // TODO(kevmoo): should this field be private? |
| 200 final ListRangeIterator utf32EncodedBytesIterator; |
| 200 final int replacementCodepoint; | 201 final int replacementCodepoint; |
| 201 int _current = null; | 202 int _current = null; |
| 202 | 203 |
| 203 Utf32BytesDecoder._fromListRangeIterator( | 204 Utf32BytesDecoder._fromListRangeIterator( |
| 204 this.utf32EncodedBytesIterator, this.replacementCodepoint); | 205 this.utf32EncodedBytesIterator, this.replacementCodepoint); |
| 205 | 206 |
| 206 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ | 207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ |
| 207 int offset = 0, int length, | 208 int offset = 0, int length, |
| 208 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 209 if (length == null) { | 210 if (length == null) { |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 279 | 280 |
| 280 /** | 281 /** |
| 281 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 282 * to produce the unicode codepoint. | 283 * to produce the unicode codepoint. |
| 283 */ | 284 */ |
| 284 class Utf32beBytesDecoder extends Utf32BytesDecoder { | 285 class Utf32beBytesDecoder extends Utf32BytesDecoder { |
| 285 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 286 int length, bool stripBom = true, | 287 int length, bool stripBom = true, |
| 287 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 288 super._fromListRangeIterator( | 289 super._fromListRangeIterator( |
| 289 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, | 290 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 290 replacementCodepoint) { | 291 replacementCodepoint) { |
| 291 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 292 skip(); | 293 skip(); |
| 293 } | 294 } |
| 294 } | 295 } |
| 295 | 296 |
| 296 int decode() { | 297 int decode() { |
| 297 utf32EncodedBytesIterator.moveNext(); | 298 utf32EncodedBytesIterator.moveNext(); |
| 298 int value = utf32EncodedBytesIterator.current; | 299 int value = utf32EncodedBytesIterator.current; |
| 299 utf32EncodedBytesIterator.moveNext(); | 300 utf32EncodedBytesIterator.moveNext(); |
| 300 value = (value << 8) + utf32EncodedBytesIterator.current; | 301 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 301 utf32EncodedBytesIterator.moveNext(); | 302 utf32EncodedBytesIterator.moveNext(); |
| 302 value = (value << 8) + utf32EncodedBytesIterator.current; | 303 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 303 utf32EncodedBytesIterator.moveNext(); | 304 utf32EncodedBytesIterator.moveNext(); |
| 304 value = (value << 8) + utf32EncodedBytesIterator.current; | 305 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 305 return value; | 306 return value; |
| 306 } | 307 } |
| 307 } | 308 } |
| 308 | 309 |
| 309 /** | 310 /** |
| 310 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 311 * to produce the unicode codepoint. | 312 * to produce the unicode codepoint. |
| 312 */ | 313 */ |
| 313 class Utf32leBytesDecoder extends Utf32BytesDecoder { | 314 class Utf32leBytesDecoder extends Utf32BytesDecoder { |
| 314 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 315 int length, bool stripBom = true, | 316 int length, bool stripBom = true, |
| 316 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 317 super._fromListRangeIterator( | 318 super._fromListRangeIterator( |
| 318 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, | 319 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 319 replacementCodepoint) { | 320 replacementCodepoint) { |
| 320 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 321 skip(); | 322 skip(); |
| 322 } | 323 } |
| 323 } | 324 } |
| 324 | 325 |
| 325 int decode() { | 326 int decode() { |
| 326 utf32EncodedBytesIterator.moveNext(); | 327 utf32EncodedBytesIterator.moveNext(); |
| 327 int value = utf32EncodedBytesIterator.current; | 328 int value = utf32EncodedBytesIterator.current; |
| 328 utf32EncodedBytesIterator.moveNext(); | 329 utf32EncodedBytesIterator.moveNext(); |
| 329 value += (utf32EncodedBytesIterator.current << 8); | 330 value += (utf32EncodedBytesIterator.current << 8); |
| 330 utf32EncodedBytesIterator.moveNext(); | 331 utf32EncodedBytesIterator.moveNext(); |
| 331 value += (utf32EncodedBytesIterator.current << 16); | 332 value += (utf32EncodedBytesIterator.current << 16); |
| 332 utf32EncodedBytesIterator.moveNext(); | 333 utf32EncodedBytesIterator.moveNext(); |
| 333 value += (utf32EncodedBytesIterator.current << 24); | 334 value += (utf32EncodedBytesIterator.current << 24); |
| 334 return value; | 335 return value; |
| 335 } | 336 } |
| 336 } | 337 } |
| 337 | 338 |
| 338 bool _validCodepoint(int codepoint) { | 339 bool _validCodepoint(int codepoint) { |
| 339 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || | 340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
| 340 (codepoint > UNICODE_UTF16_RESERVED_HI && | 341 (codepoint > UNICODE_UTF16_RESERVED_HI && |
| 341 codepoint < UNICODE_VALID_RANGE_MAX); | 342 codepoint < UNICODE_VALID_RANGE_MAX); |
| 342 } | 343 } |
| OLD | NEW |