| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert | 6 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
| 7 * as much of the input as needed. Determines the byte order from the BOM, | 7 * as much of the input as needed. Determines the byte order from the BOM, |
| 8 * or uses big-endian as a default. This method always strips a leading BOM. | 8 * or uses big-endian as a default. This method always strips a leading BOM. |
| 9 * Set the replacementCharacter to null to throw an ArgumentError | 9 * Set the replacementCharacter to null to throw an ArgumentError |
| 10 * rather than replace the bad value. | 10 * rather than replace the bad value. |
| (...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 178 /** | 178 /** |
| 179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type | 179 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type |
| 180 * provides an iterator on demand and the iterator will only translate bytes | 180 * provides an iterator on demand and the iterator will only translate bytes |
| 181 * as requested by the user of the iterator. (Note: results are not cached.) | 181 * as requested by the user of the iterator. (Note: results are not cached.) |
| 182 */ | 182 */ |
| 183 class IterableUtf32Decoder extends Iterable<int> { | 183 class IterableUtf32Decoder extends Iterable<int> { |
| 184 final Utf32BytesDecoderProvider codeunitsProvider; | 184 final Utf32BytesDecoderProvider codeunitsProvider; |
| 185 | 185 |
| 186 IterableUtf32Decoder._(this.codeunitsProvider); | 186 IterableUtf32Decoder._(this.codeunitsProvider); |
| 187 | 187 |
| 188 Utf32BytesDecoder iterator() => codeunitsProvider(); | 188 Utf32BytesDecoder get iterator => codeunitsProvider(); |
| 189 } | 189 } |
| 190 | 190 |
| 191 /** | 191 /** |
| 192 * Abstrace parent class converts encoded bytes to codepoints. | 192 * Abstrace parent class converts encoded bytes to codepoints. |
| 193 */ | 193 */ |
| 194 class Utf32BytesDecoder implements _ListRangeIterator { | 194 class Utf32BytesDecoder implements _ListRangeIterator { |
| 195 final _ListRangeIterator utf32EncodedBytesIterator; | 195 final _ListRangeIterator utf32EncodedBytesIterator; |
| 196 final int replacementCodepoint; | 196 final int replacementCodepoint; |
| 197 int _current = -1; |
| 197 | 198 |
| 198 Utf32BytesDecoder._fromListRangeIterator( | 199 Utf32BytesDecoder._fromListRangeIterator( |
| 199 this.utf32EncodedBytesIterator, this.replacementCodepoint); | 200 this.utf32EncodedBytesIterator, this.replacementCodepoint); |
| 200 | 201 |
| 201 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ | 202 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ |
| 202 int offset = 0, int length, | 203 int offset = 0, int length, |
| 203 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 204 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 204 if (length == null) { | 205 if (length == null) { |
| 205 length = utf32EncodedBytes.length - offset; | 206 length = utf32EncodedBytes.length - offset; |
| 206 } | 207 } |
| 207 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 208 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 208 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 209 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 209 false, replacementCodepoint); | 210 false, replacementCodepoint); |
| 210 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 211 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 211 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 212 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 212 false, replacementCodepoint); | 213 false, replacementCodepoint); |
| 213 } else { | 214 } else { |
| 214 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, | 215 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, |
| 215 replacementCodepoint); | 216 replacementCodepoint); |
| 216 } | 217 } |
| 217 } | 218 } |
| 218 | 219 |
| 219 List<int> decodeRest() { | 220 List<int> decodeRest() { |
| 220 List<int> codeunits = new List<int>(remaining); | 221 List<int> codeunits = new List<int>(remaining); |
| 221 int i = 0; | 222 int i = 0; |
| 222 while (hasNext) { | 223 while (moveNext()) { |
| 223 codeunits[i++] = next(); | 224 codeunits[i++] = current; |
| 224 } | 225 } |
| 225 return codeunits; | 226 return codeunits; |
| 226 } | 227 } |
| 227 | 228 |
| 228 bool get hasNext => utf32EncodedBytesIterator.hasNext; | 229 int get current { |
| 230 if (_current == -1) { |
| 231 // TODO(floitsch): bad error message. |
| 232 throw new StateError("No more elements"); |
| 233 } |
| 234 return _current; |
| 235 } |
| 229 | 236 |
| 230 int next() { | 237 bool moveNext() { |
| 238 _current = -1; |
| 231 if (utf32EncodedBytesIterator.remaining < 4) { | 239 if (utf32EncodedBytesIterator.remaining < 4) { |
| 232 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); | 240 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); |
| 233 if (replacementCodepoint != null) { | 241 if (replacementCodepoint != null) { |
| 234 return replacementCodepoint; | 242 _current = replacementCodepoint; |
| 243 return true; |
| 235 } else { | 244 } else { |
| 236 throw new ArgumentError( | 245 throw new ArgumentError( |
| 237 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 246 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
| 238 } | 247 } |
| 239 } else { | 248 } else { |
| 240 int codepoint = decode(); | 249 int codepoint = decode(); |
| 241 if (_validCodepoint(codepoint)) { | 250 if (_validCodepoint(codepoint)) { |
| 242 return codepoint; | 251 _current = codepoint; |
| 252 return true; |
| 243 } else if (replacementCodepoint != null) { | 253 } else if (replacementCodepoint != null) { |
| 244 return replacementCodepoint; | 254 _current = replacementCodepoint; |
| 255 return true; |
| 245 } else { | 256 } else { |
| 246 throw new ArgumentError( | 257 throw new ArgumentError( |
| 247 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 258 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
| 248 } | 259 } |
| 249 } | 260 } |
| 250 } | 261 } |
| 251 | 262 |
| 252 int get position => utf32EncodedBytesIterator.position ~/ 4; | 263 int get position => utf32EncodedBytesIterator.position ~/ 4; |
| 253 | 264 |
| 254 void backup([int by = 1]) { | 265 void backup([int by = 1]) { |
| (...skipping 10 matching lines...) Expand all Loading... |
| 265 } | 276 } |
| 266 | 277 |
| 267 /** | 278 /** |
| 268 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 279 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 269 * to produce the unicode codepoint. | 280 * to produce the unicode codepoint. |
| 270 */ | 281 */ |
| 271 class Utf32beBytesDecoder extends Utf32BytesDecoder { | 282 class Utf32beBytesDecoder extends Utf32BytesDecoder { |
| 272 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 283 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 273 int length, bool stripBom = true, | 284 int length, bool stripBom = true, |
| 274 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 285 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 275 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, | 286 super._fromListRangeIterator( |
| 276 length)).iterator(), replacementCodepoint) { | 287 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 288 replacementCodepoint) { |
| 277 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 289 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 278 skip(); | 290 skip(); |
| 279 } | 291 } |
| 280 } | 292 } |
| 281 | 293 |
| 282 int decode() { | 294 int decode() { |
| 283 int value = utf32EncodedBytesIterator.next(); | 295 utf32EncodedBytesIterator.moveNext(); |
| 284 value = (value << 8) + utf32EncodedBytesIterator.next(); | 296 int value = utf32EncodedBytesIterator.current; |
| 285 value = (value << 8) + utf32EncodedBytesIterator.next(); | 297 utf32EncodedBytesIterator.moveNext(); |
| 286 value = (value << 8) + utf32EncodedBytesIterator.next(); | 298 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 299 utf32EncodedBytesIterator.moveNext(); |
| 300 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 301 utf32EncodedBytesIterator.moveNext(); |
| 302 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 287 return value; | 303 return value; |
| 288 } | 304 } |
| 289 } | 305 } |
| 290 | 306 |
| 291 /** | 307 /** |
| 292 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 308 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 293 * to produce the unicode codepoint. | 309 * to produce the unicode codepoint. |
| 294 */ | 310 */ |
| 295 class Utf32leBytesDecoder extends Utf32BytesDecoder { | 311 class Utf32leBytesDecoder extends Utf32BytesDecoder { |
| 296 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 312 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 297 int length, bool stripBom = true, | 313 int length, bool stripBom = true, |
| 298 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 314 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 299 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, | 315 super._fromListRangeIterator( |
| 300 length)).iterator(), replacementCodepoint) { | 316 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 317 replacementCodepoint) { |
| 301 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 318 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 302 skip(); | 319 skip(); |
| 303 } | 320 } |
| 304 } | 321 } |
| 305 | 322 |
| 306 int decode() { | 323 int decode() { |
| 307 int value = (utf32EncodedBytesIterator.next()); | 324 utf32EncodedBytesIterator.moveNext(); |
| 308 value += (utf32EncodedBytesIterator.next() << 8); | 325 int value = utf32EncodedBytesIterator.current; |
| 309 value += (utf32EncodedBytesIterator.next() << 16); | 326 utf32EncodedBytesIterator.moveNext(); |
| 310 value += (utf32EncodedBytesIterator.next() << 24); | 327 value += (utf32EncodedBytesIterator.current << 8); |
| 328 utf32EncodedBytesIterator.moveNext(); |
| 329 value += (utf32EncodedBytesIterator.current << 16); |
| 330 utf32EncodedBytesIterator.moveNext(); |
| 331 value += (utf32EncodedBytesIterator.current << 24); |
| 311 return value; | 332 return value; |
| 312 } | 333 } |
| 313 } | 334 } |
| 314 | 335 |
| 315 bool _validCodepoint(int codepoint) { | 336 bool _validCodepoint(int codepoint) { |
| 316 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || | 337 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
| 317 (codepoint > UNICODE_UTF16_RESERVED_HI && | 338 (codepoint > UNICODE_UTF16_RESERVED_HI && |
| 318 codepoint < UNICODE_VALID_RANGE_MAX); | 339 codepoint < UNICODE_VALID_RANGE_MAX); |
| 319 } | 340 } |
| OLD | NEW |