OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 library utf.utf_16_code_unit_decoder; | |
6 | |
7 import 'constants.dart'; | |
8 import 'list_range.dart'; | |
9 | |
10 /** | |
11 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | |
12 * The parameters can override the default Unicode replacement character. Set | |
13 * the replacementCharacter to null to throw an ArgumentError | |
14 * rather than replace the bad value. | |
15 */ | |
16 class Utf16CodeUnitDecoder implements Iterator<int> { | |
17 // TODO(kevmoo): should this field be private? | |
18 final ListRangeIterator utf16CodeUnitIterator; | |
19 final int replacementCodepoint; | |
20 int _current = null; | |
21 | |
22 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | |
23 int this.replacementCodepoint = | |
24 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
25 utf16CodeUnitIterator = | |
26 (new ListRange(utf16CodeUnits, offset, length)).iterator; | |
27 | |
28 Utf16CodeUnitDecoder.fromListRangeIterator( | |
29 ListRangeIterator this.utf16CodeUnitIterator, | |
30 int this.replacementCodepoint); | |
31 | |
32 Iterator<int> get iterator => this; | |
33 | |
34 int get current => _current; | |
35 | |
36 bool moveNext() { | |
37 _current = null; | |
38 if (!utf16CodeUnitIterator.moveNext()) return false; | |
39 | |
40 int value = utf16CodeUnitIterator.current; | |
41 if (value < 0) { | |
42 if (replacementCodepoint != null) { | |
43 _current = replacementCodepoint; | |
44 } else { | |
45 throw new ArgumentError( | |
46 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
47 } | |
48 } else if (value < UNICODE_UTF16_RESERVED_LO || | |
49 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
50 // transfer directly | |
51 _current = value; | |
52 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
53 utf16CodeUnitIterator.moveNext()) { | |
54 // merge surrogate pair | |
55 int nextValue = utf16CodeUnitIterator.current; | |
56 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
57 nextValue <= UNICODE_UTF16_RESERVED_HI) { | |
58 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | |
59 value += UNICODE_UTF16_OFFSET + | |
60 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | |
61 _current = value; | |
62 } else { | |
63 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | |
64 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | |
65 utf16CodeUnitIterator.backup(); | |
66 } | |
67 if (replacementCodepoint != null) { | |
68 _current = replacementCodepoint; | |
69 } else { | |
70 throw new ArgumentError( | |
71 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
72 } | |
73 } | |
74 } else if (replacementCodepoint != null) { | |
75 _current = replacementCodepoint; | |
76 } else { | |
77 throw new ArgumentError( | |
78 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
79 } | |
80 return true; | |
81 } | |
82 } | |
83 | |
OLD | NEW |