| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.utf; | 5 part of dart.utf; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert | 8 * Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert |
| 9 * as much of the input as needed. Determines the byte order from the BOM, | 9 * as much of the input as needed. Determines the byte order from the BOM, |
| 10 * or uses big-endian as a default. This method always strips a leading BOM. | 10 * or uses big-endian as a default. This method always strips a leading BOM. |
| (...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; | 175 utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0; |
| 176 } | 176 } |
| 177 | 177 |
| 178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); | 178 typedef Utf32BytesDecoder Utf32BytesDecoderProvider(); |
| 179 | 179 |
| 180 /** | 180 /** |
| 181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type | 181 * Return type of [decodeUtf32AsIterable] and variants. The Iterable type |
| 182 * provides an iterator on demand and the iterator will only translate bytes | 182 * provides an iterator on demand and the iterator will only translate bytes |
| 183 * as requested by the user of the iterator. (Note: results are not cached.) | 183 * as requested by the user of the iterator. (Note: results are not cached.) |
| 184 */ | 184 */ |
| 185 class IterableUtf32Decoder implements Iterable<int> { | 185 // TODO(floitsch): Consider removing the extend and switch to implements since |
| 186 // that's cheaper to allocate. |
| 187 class IterableUtf32Decoder extends Iterable<int> { |
| 186 final Utf32BytesDecoderProvider codeunitsProvider; | 188 final Utf32BytesDecoderProvider codeunitsProvider; |
| 187 | 189 |
| 188 IterableUtf32Decoder._(this.codeunitsProvider); | 190 IterableUtf32Decoder._(this.codeunitsProvider); |
| 189 | 191 |
| 190 Utf32BytesDecoder iterator() => codeunitsProvider(); | 192 Utf32BytesDecoder get iterator => codeunitsProvider(); |
| 191 } | 193 } |
| 192 | 194 |
| 193 /** | 195 /** |
| 194 * Abstrace parent class converts encoded bytes to codepoints. | 196 * Abstrace parent class converts encoded bytes to codepoints. |
| 195 */ | 197 */ |
| 196 class Utf32BytesDecoder implements _ListRangeIterator { | 198 class Utf32BytesDecoder implements _ListRangeIterator { |
| 197 final _ListRangeIterator utf32EncodedBytesIterator; | 199 final _ListRangeIterator utf32EncodedBytesIterator; |
| 198 final int replacementCodepoint; | 200 final int replacementCodepoint; |
| 201 int _current = null; |
| 199 | 202 |
| 200 Utf32BytesDecoder._fromListRangeIterator( | 203 Utf32BytesDecoder._fromListRangeIterator( |
| 201 this.utf32EncodedBytesIterator, this.replacementCodepoint); | 204 this.utf32EncodedBytesIterator, this.replacementCodepoint); |
| 202 | 205 |
| 203 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ | 206 factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [ |
| 204 int offset = 0, int length, | 207 int offset = 0, int length, |
| 205 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 208 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 206 if (length == null) { | 209 if (length == null) { |
| 207 length = utf32EncodedBytes.length - offset; | 210 length = utf32EncodedBytes.length - offset; |
| 208 } | 211 } |
| 209 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 212 if (hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 210 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 213 return new Utf32beBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 211 false, replacementCodepoint); | 214 false, replacementCodepoint); |
| 212 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 215 } else if (hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 213 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, | 216 return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4, |
| 214 false, replacementCodepoint); | 217 false, replacementCodepoint); |
| 215 } else { | 218 } else { |
| 216 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, | 219 return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false, |
| 217 replacementCodepoint); | 220 replacementCodepoint); |
| 218 } | 221 } |
| 219 } | 222 } |
| 220 | 223 |
| 221 List<int> decodeRest() { | 224 List<int> decodeRest() { |
| 222 List<int> codeunits = new List<int>(remaining); | 225 List<int> codeunits = new List<int>.fixedLength(remaining); |
| 223 int i = 0; | 226 int i = 0; |
| 224 while (hasNext) { | 227 while (moveNext()) { |
| 225 codeunits[i++] = next(); | 228 codeunits[i++] = current; |
| 226 } | 229 } |
| 227 return codeunits; | 230 return codeunits; |
| 228 } | 231 } |
| 229 | 232 |
| 230 bool get hasNext => utf32EncodedBytesIterator.hasNext; | 233 int get current => _current; |
| 231 | 234 |
| 232 int next() { | 235 bool moveNext() { |
| 236 _current = null; |
| 233 if (utf32EncodedBytesIterator.remaining < 4) { | 237 if (utf32EncodedBytesIterator.remaining < 4) { |
| 234 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); | 238 utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining); |
| 235 if (replacementCodepoint != null) { | 239 if (replacementCodepoint != null) { |
| 236 return replacementCodepoint; | 240 _current = replacementCodepoint; |
| 241 return true; |
| 237 } else { | 242 } else { |
| 238 throw new ArgumentError( | 243 throw new ArgumentError( |
| 239 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 244 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
| 240 } | 245 } |
| 241 } else { | 246 } else { |
| 242 int codepoint = decode(); | 247 int codepoint = decode(); |
| 243 if (_validCodepoint(codepoint)) { | 248 if (_validCodepoint(codepoint)) { |
| 244 return codepoint; | 249 _current = codepoint; |
| 250 return true; |
| 245 } else if (replacementCodepoint != null) { | 251 } else if (replacementCodepoint != null) { |
| 246 return replacementCodepoint; | 252 _current = replacementCodepoint; |
| 253 return true; |
| 247 } else { | 254 } else { |
| 248 throw new ArgumentError( | 255 throw new ArgumentError( |
| 249 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); | 256 "Invalid UTF32 at ${utf32EncodedBytesIterator.position}"); |
| 250 } | 257 } |
| 251 } | 258 } |
| 252 } | 259 } |
| 253 | 260 |
| 254 int get position => utf32EncodedBytesIterator.position ~/ 4; | 261 int get position => utf32EncodedBytesIterator.position ~/ 4; |
| 255 | 262 |
| 256 void backup([int by = 1]) { | 263 void backup([int by = 1]) { |
| (...skipping 10 matching lines...) Expand all Loading... |
| 267 } | 274 } |
| 268 | 275 |
| 269 /** | 276 /** |
| 270 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 277 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 271 * to produce the unicode codepoint. | 278 * to produce the unicode codepoint. |
| 272 */ | 279 */ |
| 273 class Utf32beBytesDecoder extends Utf32BytesDecoder { | 280 class Utf32beBytesDecoder extends Utf32BytesDecoder { |
| 274 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 281 Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 275 int length, bool stripBom = true, | 282 int length, bool stripBom = true, |
| 276 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 283 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 277 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, | 284 super._fromListRangeIterator( |
| 278 length)).iterator(), replacementCodepoint) { | 285 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 286 replacementCodepoint) { |
| 279 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { | 287 if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) { |
| 280 skip(); | 288 skip(); |
| 281 } | 289 } |
| 282 } | 290 } |
| 283 | 291 |
| 284 int decode() { | 292 int decode() { |
| 285 int value = utf32EncodedBytesIterator.next(); | 293 utf32EncodedBytesIterator.moveNext(); |
| 286 value = (value << 8) + utf32EncodedBytesIterator.next(); | 294 int value = utf32EncodedBytesIterator.current; |
| 287 value = (value << 8) + utf32EncodedBytesIterator.next(); | 295 utf32EncodedBytesIterator.moveNext(); |
| 288 value = (value << 8) + utf32EncodedBytesIterator.next(); | 296 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 297 utf32EncodedBytesIterator.moveNext(); |
| 298 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 299 utf32EncodedBytesIterator.moveNext(); |
| 300 value = (value << 8) + utf32EncodedBytesIterator.current; |
| 289 return value; | 301 return value; |
| 290 } | 302 } |
| 291 } | 303 } |
| 292 | 304 |
| 293 /** | 305 /** |
| 294 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes | 306 * Convert UTF-32BE encoded bytes to codepoints by grouping 4 bytes |
| 295 * to produce the unicode codepoint. | 307 * to produce the unicode codepoint. |
| 296 */ | 308 */ |
| 297 class Utf32leBytesDecoder extends Utf32BytesDecoder { | 309 class Utf32leBytesDecoder extends Utf32BytesDecoder { |
| 298 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, | 310 Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0, |
| 299 int length, bool stripBom = true, | 311 int length, bool stripBom = true, |
| 300 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | 312 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : |
| 301 super._fromListRangeIterator((new _ListRange(utf32EncodedBytes, offset, | 313 super._fromListRangeIterator( |
| 302 length)).iterator(), replacementCodepoint) { | 314 (new _ListRange(utf32EncodedBytes, offset, length)).iterator, |
| 315 replacementCodepoint) { |
| 303 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { | 316 if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) { |
| 304 skip(); | 317 skip(); |
| 305 } | 318 } |
| 306 } | 319 } |
| 307 | 320 |
| 308 int decode() { | 321 int decode() { |
| 309 int value = (utf32EncodedBytesIterator.next()); | 322 utf32EncodedBytesIterator.moveNext(); |
| 310 value += (utf32EncodedBytesIterator.next() << 8); | 323 int value = utf32EncodedBytesIterator.current; |
| 311 value += (utf32EncodedBytesIterator.next() << 16); | 324 utf32EncodedBytesIterator.moveNext(); |
| 312 value += (utf32EncodedBytesIterator.next() << 24); | 325 value += (utf32EncodedBytesIterator.current << 8); |
| 326 utf32EncodedBytesIterator.moveNext(); |
| 327 value += (utf32EncodedBytesIterator.current << 16); |
| 328 utf32EncodedBytesIterator.moveNext(); |
| 329 value += (utf32EncodedBytesIterator.current << 24); |
| 313 return value; | 330 return value; |
| 314 } | 331 } |
| 315 } | 332 } |
| 316 | 333 |
| 317 bool _validCodepoint(int codepoint) { | 334 bool _validCodepoint(int codepoint) { |
| 318 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || | 335 return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) || |
| 319 (codepoint > UNICODE_UTF16_RESERVED_HI && | 336 (codepoint > UNICODE_UTF16_RESERVED_HI && |
| 320 codepoint < UNICODE_VALID_RANGE_MAX); | 337 codepoint < UNICODE_VALID_RANGE_MAX); |
| 321 } | 338 } |
| OLD | NEW |