OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 #library('utf8'); | |
6 | |
7 class Utf8Decoder implements Iterable<int>, Iterator<int> { | |
8 final List<int> bytes; | |
9 int offset; | |
10 final int end; | |
11 | |
12 Utf8Decoder(List<int> this.bytes, int offset, int length) | |
13 : this.offset = offset, end = offset + length; | |
14 | |
15 /** Decode the remaininder of the characters in this decoder | |
16 * into a [List<int>]. | |
17 */ | |
18 List<int> decodeRest() { | |
19 List<int> result = <int>[]; | |
20 for (int char in this) result.add(char); | |
21 return result; | |
22 } | |
23 | |
24 Iterator<int> iterator() => this; | |
25 | |
26 bool hasNext() => offset < end; | |
27 | |
28 int next() { | |
29 assert(hasNext()); | |
30 int byte = bytes[offset++]; | |
31 if (byte < 0x80) { | |
32 return byte; | |
33 } | |
34 if (byte < 0xC2) { | |
35 throw new Exception('Cannot decode UTF-8 @ $offset'); | |
36 } | |
37 if (byte < 0xE0) { | |
38 int char = (byte & 0x1F) << 6; | |
39 char += decodeTrailing(bytes[offset++]); | |
40 if (char < 0x80) { | |
41 throw new Exception('Cannot decode UTF-8 @ ${offset-1}'); | |
42 } | |
43 return char; | |
44 } | |
45 if (byte < 0xF0) { | |
46 int char = (byte & 0x0F) << 6; | |
47 char += decodeTrailing(bytes[offset++]); | |
48 char <<= 6; | |
49 char += decodeTrailing(bytes[offset++]); | |
50 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { | |
51 throw new Exception('Cannot decode UTF-8 @ ${offset-2}'); | |
52 } | |
53 return char; | |
54 } | |
55 if (byte < 0xF8) { | |
56 int char = (byte & 0x07) << 6; | |
57 char += decodeTrailing(bytes[offset++]); | |
58 char <<= 6; | |
59 char += decodeTrailing(bytes[offset++]); | |
60 char <<= 6; | |
61 char += decodeTrailing(bytes[offset++]); | |
62 if (char < 0x10000) { | |
63 throw new Exception('Cannot decode UTF-8 @ ${offset-3}'); | |
64 } | |
65 return char; | |
66 } | |
67 throw new Exception('Cannot decode UTF-8 @ ${offset}'); | |
68 } | |
69 | |
70 static int decodeTrailing(int byte) { | |
71 if (byte < 0x80 || 0xBF < byte) { | |
72 throw new Exception('Cannot decode UTF-8 $byte'); | |
73 } else { | |
74 return byte & 0x3F; | |
75 } | |
76 } | |
77 | |
78 static List<int> decodeUtf8(List<int> bytes) { | |
79 return new Utf8Decoder(bytes, 0, bytes.length).decodeRest(); | |
80 } | |
81 } | |
OLD | NEW |