OLD | NEW |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of utf; | 5 library utf.utf32; |
| 6 |
| 7 import "dart:collection"; |
| 8 |
| 9 import 'constants.dart'; |
| 10 import 'list_range.dart'; |
| 11 import 'shared.dart'; |
6 | 12 |
7 /** | 13 /** |
8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert | 14 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
9 * as much of the input as needed. Determines the byte order from the BOM, | 15 * as much of the input as needed. Determines the byte order from the BOM, |
10 * or uses big-endian as a default. This method always strips a leading BOM. | 16 * or uses big-endian as a default. This method always strips a leading BOM. |
11 * Set the replacementCharacter to null to throw an ArgumentError | 17 * Set the replacementCharacter to null to throw an ArgumentError |
12 * rather than replace the bad value. | 18 * rather than replace the bad value. |
13 */ | 19 */ |
14 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [ | 20 IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, |
15 int offset = 0, int length, | 21 [int offset = 0, |
| 22 int length, |
16 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 23 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
17 return new IterableUtf32Decoder._( | 24 return new IterableUtf32Decoder._( |
18 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)); | 25 () => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)); |
19 } | 26 } |
20 | 27 |
21 /** | 28 /** |
22 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver
t | 29 * Decodes the UTF-32BE bytes as an iterable. Thus, the consumer can only conver
t |
23 * as much of the input as needed. This method strips a leading BOM by default, | 30 * as much of the input as needed. This method strips a leading BOM by default, |
24 * but can be overridden by setting the optional parameter [stripBom] to false. | 31 * but can be overridden by setting the optional parameter [stripBom] to false. |
25 * Set the replacementCharacter to null to throw an ArgumentError | 32 * Set the replacementCharacter to null to throw an ArgumentError |
26 * rather than replace the bad value. | 33 * rather than replace the bad value. |
27 */ | 34 */ |
28 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [ | 35 IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, |
29 int offset = 0, int length, bool stripBom = true, | 36 [int offset = 0, |
| 37 int length, |
| 38 bool stripBom = true, |
30 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 39 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
31 return new IterableUtf32Decoder._( | 40 return new IterableUtf32Decoder._(() => new Utf32beBytesDecoder( |
32 () => new Utf32beBytesDecoder(bytes, offset, length, stripBom, | 41 bytes, offset, length, stripBom, replacementCodepoint)); |
33 replacementCodepoint)); | |
34 } | 42 } |
35 | 43 |
36 /** | 44 /** |
37 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver
t | 45 * Decodes the UTF-32LE bytes as an iterable. Thus, the consumer can only conver
t |
38 * as much of the input as needed. This method strips a leading BOM by default, | 46 * as much of the input as needed. This method strips a leading BOM by default, |
39 * but can be overridden by setting the optional parameter [stripBom] to false. | 47 * but can be overridden by setting the optional parameter [stripBom] to false. |
40 * Set the replacementCharacter to null to throw an ArgumentError | 48 * Set the replacementCharacter to null to throw an ArgumentError |
41 * rather than replace the bad value. | 49 * rather than replace the bad value. |
42 */ | 50 */ |
43 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [ | 51 IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, |
44 int offset = 0, int length, bool stripBom = true, | 52 [int offset = 0, |
| 53 int length, |
| 54 bool stripBom = true, |
45 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 55 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
46 return new IterableUtf32Decoder._( | 56 return new IterableUtf32Decoder._(() => new Utf32leBytesDecoder( |
47 () => new Utf32leBytesDecoder(bytes, offset, length, stripBom, | 57 bytes, offset, length, stripBom, replacementCodepoint)); |
48 replacementCodepoint)); | |
49 } | 58 } |
50 | 59 |
51 /** | 60 /** |
52 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters | 61 * Produce a String from a sequence of UTF-32 encoded bytes. The parameters |
53 * allow an offset into a list of bytes (as int), limiting the length of the | 62 * allow an offset into a list of bytes (as int), limiting the length of the |
54 * values be decoded and the ability of override the default Unicode | 63 * values be decoded and the ability of override the default Unicode |
55 * replacement character. Set the replacementCharacter to null to throw an | 64 * replacement character. Set the replacementCharacter to null to throw an |
56 * ArgumentError rather than replace the bad value. | 65 * ArgumentError rather than replace the bad value. |
57 */ | 66 */ |
58 String decodeUtf32(List<int> bytes, [int offset = 0, int length, | 67 String decodeUtf32(List<int> bytes, |
| 68 [int offset = 0, |
| 69 int length, |
59 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 70 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
60 return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length, | 71 return new String.fromCharCodes( |
61 replacementCodepoint)).decodeRest()); | 72 (new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint)) |
| 73 .decodeRest()); |
62 } | 74 } |
| 75 |
63 /** | 76 /** |
64 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters | 77 * Produce a String from a sequence of UTF-32BE encoded bytes. The parameters |
65 * allow an offset into a list of bytes (as int), limiting the length of the | 78 * allow an offset into a list of bytes (as int), limiting the length of the |
66 * values be decoded and the ability of override the default Unicode | 79 * values be decoded and the ability of override the default Unicode |
67 * replacement character. Set the replacementCharacter to null to throw an | 80 * replacement character. Set the replacementCharacter to null to throw an |
68 * ArgumentError rather than replace the bad value. | 81 * ArgumentError rather than replace the bad value. |
69 */ | 82 */ |
70 String decodeUtf32be( | 83 String decodeUtf32be(List<int> bytes, |
71 List<int> bytes, [int offset = 0, int length, bool stripBom = true, | 84 [int offset = 0, |
72 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => | 85 int length, |
73 new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length, | 86 bool stripBom = true, |
74 stripBom, replacementCodepoint)).decodeRest()); | 87 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
| 88 new String.fromCharCodes((new Utf32beBytesDecoder( |
| 89 bytes, offset, length, stripBom, replacementCodepoint)) |
| 90 .decodeRest()); |
75 | 91 |
76 /** | 92 /** |
77 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters | 93 * Produce a String from a sequence of UTF-32LE encoded bytes. The parameters |
78 * allow an offset into a list of bytes (as int), limiting the length of the | 94 * allow an offset into a list of bytes (as int), limiting the length of the |
79 * values be decoded and the ability of override the default Unicode | 95 * values be decoded and the ability of override the default Unicode |
80 * replacement character. Set the replacementCharacter to null to throw an | 96 * replacement character. Set the replacementCharacter to null to throw an |
81 * ArgumentError rather than replace the bad value. | 97 * ArgumentError rather than replace the bad value. |
82 */ | 98 */ |
83 String decodeUtf32le( | 99 String decodeUtf32le(List<int> bytes, |
84 List<int> bytes, [int offset = 0, int length, bool stripBom = true, | 100 [int offset = 0, |
85 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => | 101 int length, |
86 new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length, | 102 bool stripBom = true, |
87 stripBom, replacementCodepoint)).decodeRest()); | 103 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) => |
| 104 new String.fromCharCodes((new Utf32leBytesDecoder( |
| 105 bytes, offset, length, stripBom, replacementCodepoint)) |
| 106 .decodeRest()); |
88 | 107 |
89 /** | 108 /** |
90 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting | 109 * Produce a list of UTF-32 encoded bytes. This method prefixes the resulting |
91 * bytes with a big-endian byte-order-marker. | 110 * bytes with a big-endian byte-order-marker. |
92 */ | 111 */ |
93 List<int> encodeUtf32(String str) => | 112 List<int> encodeUtf32(String str) => encodeUtf32be(str, true); |
94 encodeUtf32be(str, true); | |
95 | 113 |
96 /** | 114 /** |
97 * Produce a list of UTF-32BE encoded bytes. By default, this method produces | 115 * Produce a list of UTF-32BE encoded bytes. By default, this method produces |
98 * UTF-32BE bytes with no BOM. | 116 * UTF-32BE bytes with no BOM. |
99 */ | 117 */ |
100 List<int> encodeUtf32be(String str, [bool writeBOM = false]) { | 118 List<int> encodeUtf32be(String str, [bool writeBOM = false]) { |
101 List<int> utf32CodeUnits = stringToCodepoints(str); | 119 List<int> utf32CodeUnits = stringToCodepoints(str); |
102 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + | 120 List<int> encoding = |
103 (writeBOM ? 4 : 0)); | 121 new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0)); |
104 int i = 0; | 122 int i = 0; |
105 if (writeBOM) { | 123 if (writeBOM) { |
106 encoding[i++] = 0; | 124 encoding[i++] = 0; |
107 encoding[i++] = 0; | 125 encoding[i++] = 0; |
108 encoding[i++] = UNICODE_UTF_BOM_HI; | 126 encoding[i++] = UNICODE_UTF_BOM_HI; |
109 encoding[i++] = UNICODE_UTF_BOM_LO; | 127 encoding[i++] = UNICODE_UTF_BOM_LO; |
110 } | 128 } |
111 for (int unit in utf32CodeUnits) { | 129 for (int unit in utf32CodeUnits) { |
112 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; | 130 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
113 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; | 131 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
114 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; | 132 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
115 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 133 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
116 } | 134 } |
117 return encoding; | 135 return encoding; |
118 } | 136 } |
119 | 137 |
120 /** | 138 /** |
121 * Produce a list of UTF-32LE encoded bytes. By default, this method produces | 139 * Produce a list of UTF-32LE encoded bytes. By default, this method produces |
122 * UTF-32BE bytes with no BOM. | 140 * UTF-32BE bytes with no BOM. |
123 */ | 141 */ |
124 List<int> encodeUtf32le(String str, [bool writeBOM = false]) { | 142 List<int> encodeUtf32le(String str, [bool writeBOM = false]) { |
125 List<int> utf32CodeUnits = stringToCodepoints(str); | 143 List<int> utf32CodeUnits = stringToCodepoints(str); |
126 List<int> encoding = new List<int>(4 * utf32CodeUnits.length + | 144 List<int> encoding = |
127 (writeBOM ? 4 : 0)); | 145 new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0)); |
128 int i = 0; | 146 int i = 0; |
129 if (writeBOM) { | 147 if (writeBOM) { |
130 encoding[i++] = UNICODE_UTF_BOM_LO; | 148 encoding[i++] = UNICODE_UTF_BOM_LO; |
131 encoding[i++] = UNICODE_UTF_BOM_HI; | 149 encoding[i++] = UNICODE_UTF_BOM_HI; |
132 encoding[i++] = 0; | 150 encoding[i++] = 0; |
133 encoding[i++] = 0; | 151 encoding[i++] = 0; |
134 } | 152 } |
135 for (int unit in utf32CodeUnits) { | 153 for (int unit in utf32CodeUnits) { |
136 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | 154 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; |
137 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; | 155 encoding[i++] = (unit >> 8) & UNICODE_BYTE_ZERO_MASK; |
138 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; | 156 encoding[i++] = (unit >> 16) & UNICODE_BYTE_ZERO_MASK; |
139 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; | 157 encoding[i++] = (unit >> 24) & UNICODE_BYTE_ZERO_MASK; |
140 } | 158 } |
141 return encoding; | 159 return encoding; |
142 } | 160 } |
143 | 161 |
144 /** | 162 /** |
145 * Identifies whether a List of bytes starts (based on offset) with a | 163 * Identifies whether a List of bytes starts (based on offset) with a |
146 * byte-order marker (BOM). | 164 * byte-order marker (BOM). |
147 */ | 165 */ |
148 bool hasUtf32Bom( | 166 bool hasUtf32Bom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
149 List<int> utf32EncodedBytes, [int offset = 0, int length]) { | |
150 return hasUtf32beBom(utf32EncodedBytes, offset, length) || | 167 return hasUtf32beBom(utf32EncodedBytes, offset, length) || |
151 hasUtf32leBom(utf32EncodedBytes, offset, length); | 168 hasUtf32leBom(utf32EncodedBytes, offset, length); |
152 } | 169 } |
153 | 170 |
154 /** | 171 /** |
155 * Identifies whether a List of bytes starts (based on offset) with a | 172 * Identifies whether a List of bytes starts (based on offset) with a |
156 * big-endian byte-order marker (BOM). | 173 * big-endian byte-order marker (BOM). |
157 */ | 174 */ |
158 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { | 175 bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
159 int end = length != null ? offset + length : utf32EncodedBytes.length; | 176 int end = length != null ? offset + length : utf32EncodedBytes.length; |
160 return (offset + 4) <= end && | 177 return (offset + 4) <= end && |
161 utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 && | 178 utf32EncodedBytes[offset] == 0 && |
| 179 utf32EncodedBytes[offset + 1] == 0 && |
162 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI && | 180 utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI && |
163 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO; | 181 utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO; |
164 } | 182 } |
165 | 183 |
166 /** | 184 /** |
167 * Identifies whether a List of bytes starts (based on offset) with a | 185 * Identifies whether a List of bytes starts (based on offset) with a |
168 * little-endian byte-order marker (BOM). | 186 * little-endian byte-order marker (BOM). |
169 */ | 187 */ |
170 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { | 188 bool hasUtf32leBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { |
171 int end = length != null ? offset + length : utf32EncodedBytes.length; | 189 int end = length != null ? offset + length : utf32EncodedBytes.length; |
172 return (offset + 4) <= end && | 190 return (offset + 4) <= end && |
173 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | 191 utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO && |
174 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI && | 192 utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI && |
175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; | 193 utf32EncodedBytes[offset + 2] == 0 && |
| 194 utf32EncodedBytes[offset + 3] == 0; |
176 } | 195 } |
177 | 196 |
178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); | 197 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); |
179 | 198 |
180 /** | 199 /** |
181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type | 200 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type |
182 * provides an iterator on demand and the iterator will only translate bytes | 201 * provides an iterator on demand and the iterator will only translate bytes |
183 * as requested by the user of the iterator. (Note: results are not cached.) | 202 * as requested by the user of the iterator. (Note: results are not cached.) |
184 */ | 203 */ |
185 // TODO(floitsch): Consider removing the extend and switch to implements since | 204 // TODO(floitsch): Consider removing the extend and switch to implements since |
(...skipping 11 matching lines...) Expand all Loading... |
197 */ | 216 */ |
198 abstract class Utf32BytesDecoder implements ListRangeIterator { | 217 abstract class Utf32BytesDecoder implements ListRangeIterator { |
199 // TODO(kevmoo): should this field be private? | 218 // TODO(kevmoo): should this field be private? |
200 final ListRangeIterator utf32EncodedBytesIterator; | 219 final ListRangeIterator utf32EncodedBytesIterator; |
201 final int replacementCodepoint; | 220 final int replacementCodepoint; |
202 int _current = null; | 221 int _current = null; |
203 | 222 |
204 Utf32BytesDecoder._fromListRangeIterator( | 223 Utf32BytesDecoder._fromListRangeIterator( |
205 this.utf32EncodedBytesIterator, this.replacementCodepoint); | 224 this.utf32EncodedBytesIterator, this.replacementCodepoint); |
206 | 225 |
207 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ | 226 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, |
208 int offset = 0, int length, | 227 [int offset = 0, |
| 228 int length, |
209 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 229 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
210 if (length == null) { | 230 if (length == null) { |
211 length = utf32EncodedBytes.length - offset; | 231 length = utf32EncodedBytes.length - offset; |
212 } | 232 } |
213 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 233 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 234 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
215 false, replacementCodepoint); | 235 false, replacementCodepoint); |
216 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 236 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
217 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 237 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
218 false, replacementCodepoint); | 238 false, replacementCodepoint); |
219 } else { | 239 } else { |
220 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, | 240 return new Utf32beBytesDecoder( |
221 replacementCodepoint); | 241 utf32EncodedBytes, offset, length, false, replacementCodepoint); |
222 } | 242 } |
223 } | 243 } |
224 | 244 |
225 List<int> decodeRest() { | 245 List<int> decodeRest() { |
226 List<int> codeunits = new List<int>(remaining); | 246 List<int> codeunits = new List<int>(remaining); |
227 int i = 0; | 247 int i = 0; |
228 while (moveNext()) { | 248 while (moveNext()) { |
229 codeunits[i++] = current; | 249 codeunits[i++] = current; |
230 } | 250 } |
231 return codeunits; | 251 return codeunits; |
232 } | 252 } |
233 | 253 |
234 int get current => _current; | 254 int get current => _current; |
235 | 255 |
236 bool moveNext() { | 256 bool moveNext() { |
237 _current = null; | 257 _current = null; |
238 int remaining = utf32EncodedBytesIterator.remaining; | 258 int remaining = utf32EncodedBytesIterator.remaining; |
239 if (remaining == 0) { | 259 if (remaining == 0) { |
240 _current = null; | 260 _current = null; |
241 return false; | 261 return false; |
242 } | 262 } |
243 if (remaining < 4) { | 263 if (remaining < 4) { |
244 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); | 264 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); |
245 if (replacementCodepoint != null) { | 265 if (replacementCodepoint != null) { |
246 _current = replacementCodepoint; | 266 _current = replacementCodepoint; |
247 return true; | 267 return true; |
248 } else { | 268 } else { |
249 throw new ArgumentError( | 269 throw new ArgumentError( |
250 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 270 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
251 } | 271 } |
252 } | 272 } |
253 int codepoint = decode(); | 273 int codepoint = decode(); |
254 if (_validCodepoint(codepoint)) { | 274 if (_validCodepoint(codepoint)) { |
255 _current = codepoint; | 275 _current = codepoint; |
256 return true; | 276 return true; |
257 } else if (replacementCodepoint != null) { | 277 } else if (replacementCodepoint != null) { |
(...skipping 18 matching lines...) Expand all Loading... |
276 } | 296 } |
277 | 297 |
278 int decode(); | 298 int decode(); |
279 } | 299 } |
280 | 300 |
281 /** | 301 /** |
282 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 302 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
283 * to produce the unicode codepoint. | 303 * to produce the unicode codepoint. |
284 */ | 304 */ |
285 class Utf32beBytesDecoder extends Utf32BytesDecoder { | 305 class Utf32beBytesDecoder extends Utf32BytesDecoder { |
286 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 306 Utf32beBytesDecoder(List<int> utf32EncodedBytes, |
287 int length, bool stripBom = true, | 307 [int offset = 0, |
288 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 308 int length, |
289 super._fromListRangeIterator( | 309 bool stripBom = true, |
290 (new ListRange(utf32EncodedBytes, offset, length)).iterator, | 310 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
291 replacementCodepoint) { | 311 : super._fromListRangeIterator( |
| 312 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 313 replacementCodepoint) { |
292 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 314 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
293 skip(); | 315 skip(); |
294 } | 316 } |
295 } | 317 } |
296 | 318 |
297 int decode() { | 319 int decode() { |
298 utf32EncodedBytesIterator.moveNext(); | 320 utf32EncodedBytesIterator.moveNext(); |
299 int value = utf32EncodedBytesIterator.current; | 321 int value = utf32EncodedBytesIterator.current; |
300 utf32EncodedBytesIterator.moveNext(); | 322 utf32EncodedBytesIterator.moveNext(); |
301 value = (value << 8) + utf32EncodedBytesIterator.current; | 323 value = (value << 8) + utf32EncodedBytesIterator.current; |
302 utf32EncodedBytesIterator.moveNext(); | 324 utf32EncodedBytesIterator.moveNext(); |
303 value = (value << 8) + utf32EncodedBytesIterator.current; | 325 value = (value << 8) + utf32EncodedBytesIterator.current; |
304 utf32EncodedBytesIterator.moveNext(); | 326 utf32EncodedBytesIterator.moveNext(); |
305 value = (value << 8) + utf32EncodedBytesIterator.current; | 327 value = (value << 8) + utf32EncodedBytesIterator.current; |
306 return value; | 328 return value; |
307 } | 329 } |
308 } | 330 } |
309 | 331 |
310 /** | 332 /** |
311 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 333 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
312 * to produce the unicode codepoint. | 334 * to produce the unicode codepoint. |
313 */ | 335 */ |
314 class Utf32leBytesDecoder extends Utf32BytesDecoder { | 336 class Utf32leBytesDecoder extends Utf32BytesDecoder { |
315 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 337 Utf32leBytesDecoder(List<int> utf32EncodedBytes, |
316 int length, bool stripBom = true, | 338 [int offset = 0, |
317 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 339 int length, |
318 super._fromListRangeIterator( | 340 bool stripBom = true, |
319 (new ListRange(utf32EncodedBytes, offset, length)).iterator, | 341 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) |
320 replacementCodepoint) { | 342 : super._fromListRangeIterator( |
| 343 (new ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 344 replacementCodepoint) { |
321 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 345 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
322 skip(); | 346 skip(); |
323 } | 347 } |
324 } | 348 } |
325 | 349 |
326 int decode() { | 350 int decode() { |
327 utf32EncodedBytesIterator.moveNext(); | 351 utf32EncodedBytesIterator.moveNext(); |
328 int value = utf32EncodedBytesIterator.current; | 352 int value = utf32EncodedBytesIterator.current; |
329 utf32EncodedBytesIterator.moveNext(); | 353 utf32EncodedBytesIterator.moveNext(); |
330 value += (utf32EncodedBytesIterator.current << 8); | 354 value += (utf32EncodedBytesIterator.current << 8); |
331 utf32EncodedBytesIterator.moveNext(); | 355 utf32EncodedBytesIterator.moveNext(); |
332 value += (utf32EncodedBytesIterator.current << 16); | 356 value += (utf32EncodedBytesIterator.current << 16); |
333 utf32EncodedBytesIterator.moveNext(); | 357 utf32EncodedBytesIterator.moveNext(); |
334 value += (utf32EncodedBytesIterator.current << 24); | 358 value += (utf32EncodedBytesIterator.current << 24); |
335 return value; | 359 return value; |
336 } | 360 } |
337 } | 361 } |
338 | 362 |
339 bool _validCodepoint(int codepoint) { | 363 bool _validCodepoint(int codepoint) { |
340 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || | 364 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
341 (codepoint > UNICODE_UTF16_RESERVED_HI && | 365 (codepoint > UNICODE_UTF16_RESERVED_HI && |
342 codepoint < UNICODE_VALID_RANGE_MAX); | 366 codepoint < UNICODE_VALID_RANGE_MAX); |
343 } | 367 } |
OLD | NEW |