OLD | NEW |
| (Empty) |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 part of dart.convert; | |
6 | |
7 /** | |
8 * An instance of the default implementation of the [AsciiCodec]. | |
9 * | |
10 * This instance provides a convenient access to the most common ASCII | |
11 * use cases. | |
12 * | |
13 * Examples: | |
14 * | |
15 * var encoded = ASCII.encode("This is ASCII!"); | |
16 * var decoded = ASCII.decode([0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, | |
17 * 0x20, 0x41, 0x53, 0x43, 0x49, 0x49, 0x21]); | |
18 */ | |
19 const AsciiCodec ASCII = const AsciiCodec(); | |
20 | |
21 const int _ASCII_MASK = 0x7F; | |
22 | |
23 /** | |
24 * An [AsciiCodec] allows encoding strings as ASCII bytes | |
25 * and decoding ASCII bytes to strings. | |
26 */ | |
27 class AsciiCodec extends Encoding { | |
28 final bool _allowInvalid; | |
29 /** | |
30 * Instantiates a new [AsciiCodec]. | |
31 * | |
32 * If [allowInvalid] is true, the [decode] method and the converter | |
33 * returned by [decoder] will default to allowing invalid values. | |
34 * If allowing invalid values, the values will be decoded into the Unicode | |
35 * Replacement character (U+FFFD). If not, an exception will be thrown. | |
36 * Calls to the [decode] method can choose to override this default. | |
37 * | |
38 * Encoders will not accept invalid (non Latin-1) characters. | |
39 */ | |
40 const AsciiCodec({bool allowInvalid: false}) : _allowInvalid = allowInvalid; | |
41 | |
42 String get name => "us-ascii"; | |
43 | |
44 /** | |
45 * Decodes the ASCII [bytes] (a list of unsigned 7-bit integers) to the | |
46 * corresponding string. | |
47 * | |
48 * If [bytes] contains values that are not in the range 0 .. 127, the decoder | |
49 * will eventually throw a [FormatException]. | |
50 * | |
51 * If [allowInvalid] is not provided, it defaults to the value used to create | |
52 * this [AsciiCodec]. | |
53 */ | |
54 String decode(List<int> bytes, { bool allowInvalid }) { | |
55 if (allowInvalid == null) allowInvalid = _allowInvalid; | |
56 if (allowInvalid) { | |
57 return const AsciiDecoder(allowInvalid: true).convert(bytes); | |
58 } else { | |
59 return const AsciiDecoder(allowInvalid: false).convert(bytes); | |
60 } | |
61 } | |
62 | |
63 AsciiEncoder get encoder => const AsciiEncoder(); | |
64 | |
65 AsciiDecoder get decoder => | |
66 _allowInvalid ? const AsciiDecoder(allowInvalid: true) | |
67 : const AsciiDecoder(allowInvalid: false); | |
68 } | |
69 | |
70 // Superclass for [AsciiEncoder] and [Latin1Encoder]. | |
71 // Generalizes common operations that only differ by a mask; | |
72 class _UnicodeSubsetEncoder extends Converter<String, List<int>> { | |
73 final int _subsetMask; | |
74 | |
75 const _UnicodeSubsetEncoder(this._subsetMask); | |
76 | |
77 /** | |
78 * Converts the [String] into a list of its code units. | |
79 * | |
80 * If [start] and [end] are provided, only the substring | |
81 * `string.substring(start, end)` is used as input to the conversion. | |
82 */ | |
83 List<int> convert(String string, [int start = 0, int end]) { | |
84 int stringLength = string.length; | |
85 RangeError.checkValidRange(start, end, stringLength); | |
86 if (end == null) end = stringLength; | |
87 int length = end - start; | |
88 List<int> result = new Uint8List(length); | |
89 for (int i = 0; i < length; i++) { | |
90 var codeUnit = string.codeUnitAt(start + i); | |
91 if ((codeUnit & ~_subsetMask) != 0) { | |
92 throw new ArgumentError("String contains invalid characters."); | |
93 } | |
94 result[i] = codeUnit; | |
95 } | |
96 return result; | |
97 } | |
98 | |
99 /** | |
100 * Starts a chunked conversion. | |
101 * | |
102 * The converter works more efficiently if the given [sink] is a | |
103 * [ByteConversionSink]. | |
104 */ | |
105 StringConversionSink startChunkedConversion(Sink<List<int>> sink) { | |
106 if (sink is! ByteConversionSink) { | |
107 sink = new ByteConversionSink.from(sink); | |
108 } | |
109 return new _UnicodeSubsetEncoderSink(_subsetMask, sink); | |
110 } | |
111 | |
112 // Override the base-class' bind, to provide a better type. | |
113 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); | |
114 } | |
115 | |
116 /** | |
117 * This class converts strings of only ASCII characters to bytes. | |
118 */ | |
119 class AsciiEncoder extends _UnicodeSubsetEncoder { | |
120 const AsciiEncoder() : super(_ASCII_MASK); | |
121 } | |
122 | |
123 /** | |
124 * This class encodes chunked strings to bytes (unsigned 8-bit | |
125 * integers). | |
126 */ | |
127 class _UnicodeSubsetEncoderSink extends StringConversionSinkBase { | |
128 final ByteConversionSink _sink; | |
129 final int _subsetMask; | |
130 | |
131 _UnicodeSubsetEncoderSink(this._subsetMask, this._sink); | |
132 | |
133 void close() { | |
134 _sink.close(); | |
135 } | |
136 | |
137 void addSlice(String source, int start, int end, bool isLast) { | |
138 RangeError.checkValidRange(start, end, source.length); | |
139 for (int i = start; i < end; i++) { | |
140 int codeUnit = source.codeUnitAt(i); | |
141 if ((codeUnit & ~_subsetMask) != 0) { | |
142 throw new ArgumentError( | |
143 "Source contains invalid character with code point: $codeUnit."); | |
144 } | |
145 } | |
146 _sink.add(source.codeUnits.sublist(start, end)); | |
147 if (isLast) { | |
148 close(); | |
149 } | |
150 } | |
151 } | |
152 | |
153 /** | |
154 * This class converts Latin-1 bytes (lists of unsigned 8-bit integers) | |
155 * to a string. | |
156 */ | |
157 abstract class _UnicodeSubsetDecoder extends Converter<List<int>, String> { | |
158 final bool _allowInvalid; | |
159 final int _subsetMask; | |
160 | |
161 /** | |
162 * Instantiates a new decoder. | |
163 * | |
164 * The [_allowInvalid] argument defines how [convert] deals | |
165 * with invalid bytes. | |
166 * | |
167 * The [_subsetMask] argument is a bit mask used to define the subset | |
168 * of Unicode being decoded. Use [_LATIN1_MASK] for Latin-1 (8-bit) or | |
169 * [_ASCII_MASK] for ASCII (7-bit). | |
170 * | |
171 * If [_allowInvalid] is `true`, [convert] replaces invalid bytes with the | |
172 * Unicode Replacement character `U+FFFD` (�). | |
173 * Otherwise it throws a [FormatException]. | |
174 */ | |
175 const _UnicodeSubsetDecoder(this._allowInvalid, this._subsetMask); | |
176 | |
177 /** | |
178 * Converts the [bytes] (a list of unsigned 7- or 8-bit integers) to the | |
179 * corresponding string. | |
180 * | |
181 * If [start] and [end] are provided, only the sub-list of bytes from | |
182 * `start` to `end` (`end` not inclusive) is used as input to the conversion. | |
183 */ | |
184 String convert(List<int> bytes, [int start = 0, int end]) { | |
185 int byteCount = bytes.length; | |
186 RangeError.checkValidRange(start, end, byteCount); | |
187 if (end == null) end = byteCount; | |
188 | |
189 for (int i = start; i < end; i++) { | |
190 int byte = bytes[i]; | |
191 if ((byte & ~_subsetMask) != 0) { | |
192 if (!_allowInvalid) { | |
193 throw new FormatException("Invalid value in input: $byte"); | |
194 } | |
195 return _convertInvalid(bytes, start, end); | |
196 } | |
197 } | |
198 return new String.fromCharCodes(bytes, start, end); | |
199 } | |
200 | |
201 String _convertInvalid(List<int> bytes, int start, int end) { | |
202 StringBuffer buffer = new StringBuffer(); | |
203 for (int i = start; i < end; i++) { | |
204 int value = bytes[i]; | |
205 if ((value & ~_subsetMask) != 0) value = 0xFFFD; | |
206 buffer.writeCharCode(value); | |
207 } | |
208 return buffer.toString(); | |
209 } | |
210 | |
211 /** | |
212 * Starts a chunked conversion. | |
213 * | |
214 * The converter works more efficiently if the given [sink] is a | |
215 * [StringConversionSink]. | |
216 */ | |
217 ByteConversionSink startChunkedConversion(Sink<String> sink); | |
218 | |
219 // Override the base-class's bind, to provide a better type. | |
220 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); | |
221 } | |
222 | |
223 class AsciiDecoder extends _UnicodeSubsetDecoder { | |
224 const AsciiDecoder({bool allowInvalid: false}) | |
225 : super(allowInvalid, _ASCII_MASK); | |
226 | |
227 /** | |
228 * Starts a chunked conversion. | |
229 * | |
230 * The converter works more efficiently if the given [sink] is a | |
231 * [StringConversionSink]. | |
232 */ | |
233 ByteConversionSink startChunkedConversion(Sink<String> sink) { | |
234 StringConversionSink stringSink; | |
235 if (sink is StringConversionSink) { | |
236 stringSink = sink; | |
237 } else { | |
238 stringSink = new StringConversionSink.from(sink); | |
239 } | |
240 // TODO(lrn): Use asUtf16Sink when it becomes available. It | |
241 // works just as well, is likely to have less decoding overhead, | |
242 // and make adding U+FFFD easier. | |
243 // At that time, merge this with _Latin1DecoderSink; | |
244 if (_allowInvalid) { | |
245 return new _ErrorHandlingAsciiDecoderSink(stringSink.asUtf8Sink(false)); | |
246 } else { | |
247 return new _SimpleAsciiDecoderSink(stringSink); | |
248 } | |
249 } | |
250 } | |
251 | |
252 class _ErrorHandlingAsciiDecoderSink extends ByteConversionSinkBase { | |
253 ByteConversionSink _utf8Sink; | |
254 _ErrorHandlingAsciiDecoderSink(this._utf8Sink); | |
255 | |
256 void close() { | |
257 _utf8Sink.close(); | |
258 } | |
259 | |
260 void add(List<int> source) { | |
261 addSlice(source, 0, source.length, false); | |
262 } | |
263 | |
264 void addSlice(List<int> source, int start, int end, bool isLast) { | |
265 RangeError.checkValidRange(start, end, source.length); | |
266 for (int i = start; i < end; i++) { | |
267 if ((source[i] & ~_ASCII_MASK) != 0) { | |
268 if (i > start) _utf8Sink.addSlice(source, start, i, false); | |
269 // Add UTF-8 encoding of U+FFFD. | |
270 _utf8Sink.add(const<int>[0xEF, 0xBF, 0xBD]); | |
271 start = i + 1; | |
272 } | |
273 } | |
274 if (start < end) { | |
275 _utf8Sink.addSlice(source, start, end, isLast); | |
276 } else if (isLast) { | |
277 close(); | |
278 } | |
279 } | |
280 } | |
281 | |
282 class _SimpleAsciiDecoderSink extends ByteConversionSinkBase { | |
283 Sink _sink; | |
284 _SimpleAsciiDecoderSink(this._sink); | |
285 | |
286 void close() { | |
287 _sink.close(); | |
288 } | |
289 | |
290 void add(List<int> source) { | |
291 for (int i = 0; i < source.length; i++) { | |
292 if ((source[i] & ~_ASCII_MASK) != 0) { | |
293 throw new FormatException("Source contains non-ASCII bytes."); | |
294 } | |
295 } | |
296 _sink.add(new String.fromCharCodes(source)); | |
297 } | |
298 | |
299 void addSlice(List<int> source, int start, int end, bool isLast) { | |
300 final int length = source.length; | |
301 RangeError.checkValidRange(start, end, length); | |
302 if (start < end) { | |
303 if (start != 0 || end != length) { | |
304 source = source.sublist(start, end); | |
305 } | |
306 add(source); | |
307 } | |
308 if (isLast) close(); | |
309 } | |
310 } | |
OLD | NEW |