| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 #library('utf8'); | |
| 6 | |
| 7 class Utf8Decoder implements Iterable<int>, Iterator<int> { | |
| 8 final List<int> bytes; | |
| 9 int offset; | |
| 10 final int end; | |
| 11 | |
| 12 Utf8Decoder(List<int> this.bytes, int offset, int length) | |
| 13 : this.offset = offset, end = offset + length; | |
| 14 | |
| 15 /** Decode the remaininder of the characters in this decoder | |
| 16 * into a [List<int>]. | |
| 17 */ | |
| 18 List<int> decodeRest() { | |
| 19 List<int> result = <int>[]; | |
| 20 for (int char in this) result.add(char); | |
| 21 return result; | |
| 22 } | |
| 23 | |
| 24 Iterator<int> iterator() => this; | |
| 25 | |
| 26 bool hasNext() => offset < end; | |
| 27 | |
| 28 int next() { | |
| 29 assert(hasNext()); | |
| 30 int byte = bytes[offset++]; | |
| 31 if (byte < 0x80) { | |
| 32 return byte; | |
| 33 } | |
| 34 if (byte < 0xC2) { | |
| 35 throw new Exception('Cannot decode UTF-8 @ $offset'); | |
| 36 } | |
| 37 if (byte < 0xE0) { | |
| 38 int char = (byte & 0x1F) << 6; | |
| 39 char += decodeTrailing(bytes[offset++]); | |
| 40 if (char < 0x80) { | |
| 41 throw new Exception('Cannot decode UTF-8 @ ${offset-1}'); | |
| 42 } | |
| 43 return char; | |
| 44 } | |
| 45 if (byte < 0xF0) { | |
| 46 int char = (byte & 0x0F) << 6; | |
| 47 char += decodeTrailing(bytes[offset++]); | |
| 48 char <<= 6; | |
| 49 char += decodeTrailing(bytes[offset++]); | |
| 50 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { | |
| 51 throw new Exception('Cannot decode UTF-8 @ ${offset-2}'); | |
| 52 } | |
| 53 return char; | |
| 54 } | |
| 55 if (byte < 0xF8) { | |
| 56 int char = (byte & 0x07) << 6; | |
| 57 char += decodeTrailing(bytes[offset++]); | |
| 58 char <<= 6; | |
| 59 char += decodeTrailing(bytes[offset++]); | |
| 60 char <<= 6; | |
| 61 char += decodeTrailing(bytes[offset++]); | |
| 62 if (char < 0x10000) { | |
| 63 throw new Exception('Cannot decode UTF-8 @ ${offset-3}'); | |
| 64 } | |
| 65 return char; | |
| 66 } | |
| 67 throw new Exception('Cannot decode UTF-8 @ ${offset}'); | |
| 68 } | |
| 69 | |
| 70 static int decodeTrailing(int byte) { | |
| 71 if (byte < 0x80 || 0xBF < byte) { | |
| 72 throw new Exception('Cannot decode UTF-8 $byte'); | |
| 73 } else { | |
| 74 return byte & 0x3F; | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 static List<int> decodeUtf8(List<int> bytes) { | |
| 79 return new Utf8Decoder(bytes, 0, bytes.length).decodeRest(); | |
| 80 } | |
| 81 } | |
| OLD | NEW |