OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library utf.utf_16_code_unit_decoder; |
| 6 |
| 7 import 'constants.dart'; |
| 8 import 'list_range.dart'; |
| 9 |
| 10 /** |
| 11 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. |
| 12 * The parameters can override the default Unicode replacement character. Set |
| 13 * the replacementCharacter to null to throw an ArgumentError |
| 14 * rather than replace the bad value. |
| 15 */ |
| 16 class Utf16CodeUnitDecoder implements Iterator<int> { |
| 17 // TODO(kevmoo): should this field be private? |
| 18 final ListRangeIterator utf16CodeUnitIterator; |
| 19 final int replacementCodepoint; |
| 20 int _current = null; |
| 21 |
| 22 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, |
| 23 int this.replacementCodepoint = |
| 24 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 25 utf16CodeUnitIterator = |
| 26 (new ListRange(utf16CodeUnits, offset, length)).iterator; |
| 27 |
| 28 Utf16CodeUnitDecoder.fromListRangeIterator( |
| 29 ListRangeIterator this.utf16CodeUnitIterator, |
| 30 int this.replacementCodepoint); |
| 31 |
| 32 Iterator<int> get iterator => this; |
| 33 |
| 34 int get current => _current; |
| 35 |
| 36 bool moveNext() { |
| 37 _current = null; |
| 38 if (!utf16CodeUnitIterator.moveNext()) return false; |
| 39 |
| 40 int value = utf16CodeUnitIterator.current; |
| 41 if (value < 0) { |
| 42 if (replacementCodepoint != null) { |
| 43 _current = replacementCodepoint; |
| 44 } else { |
| 45 throw new ArgumentError( |
| 46 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 47 } |
| 48 } else if (value < UNICODE_UTF16_RESERVED_LO || |
| 49 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { |
| 50 // transfer directly |
| 51 _current = value; |
| 52 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 53 utf16CodeUnitIterator.moveNext()) { |
| 54 // merge surrogate pair |
| 55 int nextValue = utf16CodeUnitIterator.current; |
| 56 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && |
| 57 nextValue <= UNICODE_UTF16_RESERVED_HI) { |
| 58 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; |
| 59 value += UNICODE_UTF16_OFFSET + |
| 60 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); |
| 61 _current = value; |
| 62 } else { |
| 63 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && |
| 64 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { |
| 65 utf16CodeUnitIterator.backup(); |
| 66 } |
| 67 if (replacementCodepoint != null) { |
| 68 _current = replacementCodepoint; |
| 69 } else { |
| 70 throw new ArgumentError( |
| 71 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 72 } |
| 73 } |
| 74 } else if (replacementCodepoint != null) { |
| 75 _current = replacementCodepoint; |
| 76 } else { |
| 77 throw new ArgumentError( |
| 78 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); |
| 79 } |
| 80 return true; |
| 81 } |
| 82 } |
| 83 |
OLD | NEW |