OLD | NEW |
1 // Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 /** | 5 #library('utf8'); |
6 * An abstract string representation. | 6 |
7 */ | 7 class Utf8Decoder { |
8 class ByteString implements SourceString { | |
9 final List<int> bytes; | 8 final List<int> bytes; |
10 final int offset; | 9 final int offset; |
11 final int length; | 10 final int length; |
12 int _hashCode; | |
13 | 11 |
14 ByteString(List<int> this.bytes, int this.offset, int this.length); | 12 Utf8Decoder(List<int> this.bytes, int this.offset, int this.length); |
15 | |
16 abstract String get charset(); | |
17 | 13 |
18 String toString() { | 14 String toString() { |
19 var list; | 15 return new String.fromCharCodes(decodeUtf8(bytes.getRange(offset, length))); |
20 try { | |
21 list = bytes.getRange(offset, length); | |
22 } catch (var ignored) { | |
23 // An exception may occur when running this on node. This is | |
24 // because [bytes] really is a buffer (or typed array). | |
25 list = new List<int>(length); | |
26 for (int i = 0; i < length; i++) { | |
27 list[i] = bytes[i + offset]; | |
28 } | |
29 } | |
30 return new String.fromCharCodes(decodeUtf8(list)); | |
31 } | 16 } |
32 | 17 |
33 static int decodeTrailing(int byte) { | 18 static int decodeTrailing(int byte) { |
34 if (byte < 0x80 || 0xBF < byte) { | 19 if (byte < 0x80 || 0xBF < byte) { |
35 throw new MalformedInputException('Cannot decode UTF-8 $byte'); | 20 throw new Exception('Cannot decode UTF-8 $byte'); |
36 } else { | 21 } else { |
37 return byte & 0x3F; | 22 return byte & 0x3F; |
38 } | 23 } |
39 } | 24 } |
40 | 25 |
41 static List<int> decodeUtf8(List<int> bytes) { | 26 static List<int> decodeUtf8(List<int> bytes) { |
42 List<int> result = new List<int>(); | 27 List<int> result = new List<int>(); |
43 for (int i = 0; i < bytes.length; i++) { | 28 for (int i = 0; i < bytes.length; i++) { |
44 if (bytes[i] < 0x80) { | 29 if (bytes[i] < 0x80) { |
45 result.add(bytes[i]); | 30 result.add(bytes[i]); |
46 } else if (bytes[i] < 0xC2) { | 31 } else if (bytes[i] < 0xC2) { |
47 throw new MalformedInputException('Cannot decode UTF-8 @ $i'); | 32 throw new Exception('Cannot decode UTF-8 @ $i'); |
48 } else if (bytes[i] < 0xE0) { | 33 } else if (bytes[i] < 0xE0) { |
49 int char = (bytes[i++] & 0x1F) << 6; | 34 int char = (bytes[i++] & 0x1F) << 6; |
50 char += decodeTrailing(bytes[i]); | 35 char += decodeTrailing(bytes[i]); |
51 if (char < 0x80) { | 36 if (char < 0x80) { |
52 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-1}'); | 37 throw new Exception('Cannot decode UTF-8 @ ${i-1}'); |
53 } else { | 38 } else { |
54 result.add(char); | 39 result.add(char); |
55 } | 40 } |
56 } else if (bytes[i] < 0xF0) { | 41 } else if (bytes[i] < 0xF0) { |
57 int char = (bytes[i++] & 0x0F) << 6; | 42 int char = (bytes[i++] & 0x0F) << 6; |
58 char += decodeTrailing(bytes[i++]); | 43 char += decodeTrailing(bytes[i++]); |
59 char <<= 6; | 44 char <<= 6; |
60 char += decodeTrailing(bytes[i]); | 45 char += decodeTrailing(bytes[i]); |
61 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { | 46 if (char < 0x800 || (0xD800 <= char && char <= 0xDFFF)) { |
62 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-2}'); | 47 throw new Exception('Cannot decode UTF-8 @ ${i-2}'); |
63 } else { | 48 } else { |
64 result.add(char); | 49 result.add(char); |
65 } | 50 } |
66 } else if (bytes[i] < 0xF8) { | 51 } else if (bytes[i] < 0xF8) { |
67 int char = (bytes[i++] & 0x07) << 6; | 52 int char = (bytes[i++] & 0x07) << 6; |
68 char += decodeTrailing(bytes[i++]); | 53 char += decodeTrailing(bytes[i++]); |
69 char <<= 6; | 54 char <<= 6; |
70 char += decodeTrailing(bytes[i++]); | 55 char += decodeTrailing(bytes[i++]); |
71 char <<= 6; | 56 char <<= 6; |
72 char += decodeTrailing(bytes[i]); | 57 char += decodeTrailing(bytes[i]); |
73 if (char < 0x10000) { | 58 if (char < 0x10000) { |
74 throw new MalformedInputException('Cannot decode UTF-8 @ ${i-3}'); | 59 throw new Exception('Cannot decode UTF-8 @ ${i-3}'); |
75 } else { | 60 } else { |
76 result.add(char); | 61 result.add(char); |
77 } | 62 } |
78 } else { | 63 } else { |
79 throw new MalformedInputException('Cannot decode UTF-8 @ $i'); | 64 throw new Exception('Cannot decode UTF-8 @ $i'); |
80 } | 65 } |
81 } | 66 } |
82 return result; | 67 return result; |
83 } | 68 } |
84 | |
85 bool operator ==(other) { | |
86 throw "should be overridden in subclass"; | |
87 } | |
88 | |
89 int hashCode() { | |
90 if (_hashCode === null) { | |
91 _hashCode = computeHashCode(); | |
92 } | |
93 return _hashCode; | |
94 } | |
95 | |
96 int computeHashCode() { | |
97 int code = 1; | |
98 int end = offset + length; | |
99 for (int i = offset; i < end; i++) { | |
100 code += 19 * code + bytes[i]; | |
101 } | |
102 return code; | |
103 } | |
104 | |
105 printOn(StringBuffer sb) { | |
106 sb.add(toString()); | |
107 } | |
108 } | 69 } |
109 | |
110 /** | |
111 * A string that consists purely of 7bit ASCII characters. | |
112 */ | |
113 class AsciiString extends ByteString { | |
114 final String charset = "ASCII"; | |
115 | |
116 AsciiString(List<int> bytes, int offset, int length) | |
117 : super(bytes, offset, length); | |
118 | |
119 static AsciiString of(List<int> bytes, int offset, int length) { | |
120 AsciiString string = new AsciiString(bytes, offset, length); | |
121 return string; | |
122 } | |
123 | |
124 static AsciiString fromString(String string) { | |
125 List<int> bytes = string.charCodes(); | |
126 return AsciiString.of(bytes, 0, bytes.length); | |
127 } | |
128 } | |
129 | |
130 /** | |
131 * A string that consists of characters that can be encoded as UTF-8. | |
132 */ | |
133 class Utf8String extends ByteString { | |
134 final String charset = "UTF8"; | |
135 | |
136 Utf8String(List<int> bytes, int offset, int length) | |
137 : super(bytes, offset, length); | |
138 | |
139 static Utf8String of(List<int> bytes, int offset, int length) { | |
140 return new Utf8String(bytes, offset, length); | |
141 } | |
142 | |
143 static Utf8String fromString(String string) { | |
144 throw "not implemented yet"; | |
145 } | |
146 } | |
147 | |
148 /** | |
149 * A ByteString-valued token. | |
150 */ | |
151 class ByteStringToken extends Token { | |
152 final ByteString value; | |
153 | |
154 ByteStringToken(PrecedenceInfo info, ByteString this.value, int charOffset) | |
155 : super(info, charOffset); | |
156 | |
157 String toString() => value.toString(); | |
158 } | |
OLD | NEW |