Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1038)

Side by Side Diff: sdk/lib/io/string_transformer.dart

Issue 22872012: Remove Encoding-enum from dart:io and add interface in dart:convert. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Fix typo. Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/io/stdio.dart ('k') | sdk/lib/io/websocket_impl.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.io; 5 part of dart.io;
6 6
7 const SYSTEM_ENCODING = const SystemEncoding();
8
7 /** 9 /**
8 * String encodings. 10 * The system encoding is the current code page on Windows and UTF-8 on
11 * Linux and Mac.
9 */ 12 */
10 class Encoding { 13 class SystemEncoding extends Encoding {
11 static const Encoding UTF_8 = const Encoding._internal("utf-8"); 14 const SystemEncoding();
12 static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1");
13 static const Encoding ASCII = const Encoding._internal("us-ascii");
14 15
15 /** 16 List<int> encode(String input) => encoder.convert(input);
16 * SYSTEM encoding is the current code page on Windows and UTF-8 on 17 String decode(List<int> encoded) => decoder.convert(encoded);
17 * Linux and Mac.
18 */
19 static const Encoding SYSTEM = const Encoding._internal("system");
20 18
21 // All aliasses (in lowercase) of supported encoding from 19 Converter<String, List<int>> get encoder {
22 // http://www.iana.org/assignments/character-sets/character-sets.xml. 20 if (Platform.operatingSystem == "windows") {
23 static Map<String, Encoding> _nameToEncoding = <String, Encoding> { 21 return const _WindowsCodePageEncoder();
24 // ISO_8859-1:1987. 22 } else {
25 "iso_8859-1:1987": ISO_8859_1, 23 return const Utf8Encoder();
26 "iso-ir-100": ISO_8859_1, 24 }
27 "iso_8859-1": ISO_8859_1, 25 }
28 "iso-8859-1": ISO_8859_1,
29 "latin1": ISO_8859_1,
30 "l1": ISO_8859_1,
31 "ibm819": ISO_8859_1,
32 "cp819": ISO_8859_1,
33 "csisolatin1": ISO_8859_1,
34 26
35 // US-ASCII. 27 Converter<List<int>, String> get decoder {
36 "iso-ir-6": ASCII, 28 if (Platform.operatingSystem == "windows") {
37 "ansi_x3.4-1968": ASCII, 29 return const _WindowsCodePageDecoder();
38 "ansi_x3.4-1986": ASCII, 30 } else {
39 "iso_646.irv:1991": ASCII, 31 return const Utf8Decoder();
40 "iso646-us": ASCII, 32 }
41 "us-ascii": ASCII, 33 }
42 "us": ASCII, 34 }
43 "ibm367": ASCII,
44 "cp367": ASCII,
45 "csascii": ASCII,
46 "ascii": ASCII, // This is not in the IANA official names.
47 35
48 // UTF-8. 36 class _WindowsCodePageEncoder extends Converter<String, List<int>> {
49 "csutf8": UTF_8,
50 "utf-8": UTF_8
51 };
52 37
53 /** 38 const _WindowsCodePageEncoder();
54 * Gets an [Encoding] object from the name of the character set 39
55 * name. The names used are the IANA official names for the 40 List<int> convert(String input) {
56 * character set (see 41 List<int> encoded = _encodeString(input);
57 * http://www.iana.org/assignments/character-sets/character-sets.xml). 42 if (encoded == null) {
58 * 43 throw new FormatException("Invalid character for encoding");
59 * The [name] passed is case insensitive. 44 }
60 * 45 return encoded;
61 * If character set is not supported [:null:] is returned.
62 */
63 static Encoding fromName(String name) {
64 if (name == null) return null;
65 name = name.toLowerCase();
66 return _nameToEncoding[name];
67 } 46 }
68 47
69 /** 48 /**
70 * Name of the encoding. This will be the lower-case version of one of the 49 * Starts a chunked conversion.
71 * IANA official names for the character set (see
72 * http://www.iana.org/assignments/character-sets/character-sets.xml)
73 */ 50 */
74 final String name; 51 StringConversionSink startChunkedConversion(
52 ChunkedConversionSink<List<int>> sink) {
53 return new _WindowsCodePageEncoderSink(sink);
54 }
75 55
76 const Encoding._internal(String this.name); 56 // Override the base-class' bind, to provide a better type.
57 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
58
59 external static List<int> _encodeString(String string);
77 } 60 }
78 61
79 const UTF_8 = Encoding.UTF_8; 62 class _WindowsCodePageEncoderSink extends StringConversionSinkBase {
80 const ISO_8859_1 = Encoding.ISO_8859_1; 63 // TODO(floitsch): provide more efficient conversions when the input is
81 const ASCII = Encoding.ASCII; 64 // not a String.
82 65
83 /** 66 final ByteConversionSink _sink;
84 * Stream transformer that can decode a stream of bytes into a stream of
85 * strings using [encoding].
86 *
87 * Invalid or forbidden byte-sequences will not produce errors, but will instead
88 * insert [replacementChar] in the decoded strings.
89 */
90 class StringDecoder implements StreamTransformer<List<int>, String> {
91 var _decoder;
92 67
93 static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD; 68 _WindowsCodePageEncoderSink(this._sink);
94 69
95 /** 70 void close() {
96 * Decodes a stream of bytes into a `String` with an optional 71 _sink.close();
97 * [encoding] and [replacementChar].
98 *
99 * The default value for [encoding] is [Encoding.UTF_8].
100 *
101 * The default value for [replacementChar] is code point U+FFFD.
102 *
103 * Completes with the decoded `String` when the stream is done.
104 */
105 static Future<String> decode(
106 Stream<List<int>> stream,
107 [Encoding encoding = Encoding.UTF_8,
108 int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
109 if (replacementChar != null &&
110 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {
111 throw new UnsupportedError("replacement character must be null or "
112 "the Unicode replacement character");
113 }
114 return stream
115 .transform(new StringDecoder(encoding, replacementChar))
116 .fold(
117 new StringBuffer(),
118 (prev, data) => prev..write(data))
119 .then((sb) => sb.toString());
120 } 72 }
121 73
122 /** 74 void add(String string) {
123 * Create a new [StringDecoder] with an optional [encoding] and 75 List<int> encoded = _WindowsCodePageEncoder._encodeString(string);
124 * [replacementChar]. 76 if (encoded == null) {
125 * 77 throw new FormatException("Invalid character for encoding");
126 * The default value for [encoding] is [Encoding.UTF_8].
127 *
128 * The default value for [replacementChar] is code point U+FFFD.
129 */
130 StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) {
131 switch (encoding) {
132 case Encoding.UTF_8:
133 if (replacementChar != null &&
134 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {
135 throw new UnsupportedError("replacement character must be null or "
136 "the Unicode replacement character");
137 }
138 _decoder = new Utf8Decoder(allowMalformed: true);
139 break;
140 case Encoding.ASCII:
141 if (replacementChar == null) {
142 replacementChar = '?'.codeUnitAt(0);
143 } else if (replacementChar > 127) {
144 throw new ArgumentError("Invalid replacement character for ASCII");
145 }
146 _decoder = new _AsciiDecoder(replacementChar);
147 break;
148 case Encoding.ISO_8859_1:
149 if (replacementChar == null) {
150 replacementChar = '?'.codeUnitAt(0);
151 } else if (replacementChar > 255) {
152 throw new ArgumentError(
153 "Invalid replacement character for ISO_8859_1");
154 }
155 _decoder = new _Latin1Decoder(replacementChar);
156 break;
157 case Encoding.SYSTEM:
158 if (Platform.operatingSystem == "windows") {
159 _decoder = new _WindowsCodePageDecoder();
160 } else {
161 if (replacementChar != null) {
162 // TODO(ajohnsen): Handle replacement character.
163 throw new UnsupportedError(
164 "Replacement character is not supported for SYSTEM encoding");
165 }
166 _decoder = new Utf8Decoder(allowMalformed: true);
167 }
168 break;
169 default:
170 throw new ArgumentError("Unsupported encoding '$encoding'");
171 } 78 }
79 _sink.add(encoded);
172 } 80 }
173 81
174 Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream); 82 void addSlice(String source, int start, int end, bool isLast) {
175 } 83 if (start != 0 || end != source.length) {
176 84 source = source.substring(start, end);
177
178 /**
179 * Stream transformer that can encode a stream of strings info a stream of
180 * bytes using [encoding].
181 *
182 * Strings that cannot be represented in the given encoding will result in an
183 * error and a close event on the stream.
184 */
185 class StringEncoder implements StreamTransformer<String, List<int>> {
186 var _encoder;
187
188 /**
189 * Create a new [StringDecoder] with an optional [encoding] and
190 * [replacementChar].
191 */
192 StringEncoder([Encoding encoding = Encoding.UTF_8]) {
193 switch (encoding) {
194 case Encoding.UTF_8:
195 _encoder = new Utf8Encoder();
196 break;
197 case Encoding.ASCII:
198 _encoder = new _AsciiEncoder();
199 break;
200 case Encoding.ISO_8859_1:
201 _encoder = new _Latin1Encoder();
202 break;
203 case Encoding.SYSTEM:
204 if (Platform.operatingSystem == "windows") {
205 _encoder = new _WindowsCodePageEncoder();
206 } else {
207 _encoder = new Utf8Encoder();
208 }
209 break;
210 default:
211 throw new ArgumentError("Unsupported encoding '$encoding'");
212 } 85 }
213 } 86 add(source);
214 87 if (isLast) close();
215 Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream);
216 }
217
218
219 // Utility function to synchronously decode a list of bytes.
220 String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) {
221 if (bytes.length == 0) return "";
222 if (encoding == Encoding.UTF_8) {
223 return UTF8.decode(bytes, allowMalformed: true);
224 }
225 var string;
226 var error;
227 var controller = new StreamController(sync: true);
228 controller.stream
229 .transform(new StringDecoder(encoding))
230 .listen((data) {
231 // The StringEncoder decodes every encoding (except UTF-8) in one go.
232 assert(string == null);
233 string = data;
234 }, onError: (e) => error = e);
235 controller.add(bytes);
236 controller.close();
237 if (error != null) throw error;
238 assert(string != null);
239 return string;
240 }
241
242
243 // Utility function to synchronously encode a String.
244 // Will throw an exception if the encoding is invalid.
245 List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) {
246 if (string.length == 0) return [];
247 if (encoding == Encoding.UTF_8) return UTF8.encode(string);
248 var bytes;
249 var controller = new StreamController(sync: true);
250 controller.stream
251 .transform(new StringEncoder(encoding))
252 .listen((data) {
253 // The StringEncoder encodes every encoding (except UTF-8) in one go.
254 assert(bytes == null);
255 bytes = data;
256 });
257 controller.add(string);
258 controller.close();
259 assert(bytes != null);
260 return bytes;
261 }
262
263
264 abstract class _SingleByteDecoder
265 extends StreamEventTransformer<List<int>, String> {
266 final int _replacementChar;
267
268 _SingleByteDecoder(this._replacementChar);
269
270 void handleData(List<int> data, EventSink<String> sink) {
271 var buffer = new List<int>(data.length);
272 for (int i = 0; i < data.length; i++) {
273 int char = _decodeByte(data[i]);
274 if (char < 0) char = _replacementChar;
275 buffer[i] = char;
276 }
277 sink.add(new String.fromCharCodes(buffer));
278 }
279
280 int _decodeByte(int byte);
281 }
282
283
284 // Utility class for decoding ascii data delivered as a stream of
285 // bytes.
286 class _AsciiDecoder extends _SingleByteDecoder {
287 _AsciiDecoder(int replacementChar) : super(replacementChar);
288
289 int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1;
290 }
291
292
293 // Utility class for decoding Latin-1 data delivered as a stream of
294 // bytes.
295 class _Latin1Decoder extends _SingleByteDecoder {
296 _Latin1Decoder(int replacementChar) : super(replacementChar);
297
298 int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1;
299 }
300
301
302 abstract class _SingleByteEncoder
303 extends StreamEventTransformer<String, List<int>> {
304 void handleData(String data, EventSink<List<int>> sink) {
305 var bytes = _encode(data);
306 if (bytes == null) {
307 sink.addError(new FormatException("Invalid character for encoding"));
308 sink.close();
309 } else {
310 sink.add(bytes);
311 }
312 }
313
314 List<int> _encode(String string);
315 }
316
317
318 // Utility class for encoding a string into an ASCII byte stream.
319 class _AsciiEncoder extends _SingleByteEncoder {
320 List<int> _encode(String string) {
321 var bytes = string.codeUnits;
322 for (var byte in bytes) {
323 if (byte > 127) return null;
324 }
325 return bytes;
326 } 88 }
327 } 89 }
328 90
329 91
330 // Utility class for encoding a string into a Latin1 byte stream. 92 class _WindowsCodePageDecoder extends Converter<List<int>, String> {
331 class _Latin1Encoder extends _SingleByteEncoder { 93
332 List<int> _encode(String string) { 94 const _WindowsCodePageDecoder();
333 var bytes = string.codeUnits; 95
334 for (var byte in bytes) { 96 String convert(List<int> input) {
335 if (byte > 255) return null; 97 return _decodeBytes(input);
336 }
337 return bytes;
338 } 98 }
339 }
340 99
100 /**
101 * Starts a chunked conversion.
102 */
103 ByteConversionSink startChunkedConversion(
104 ChunkedConversionSink<String> sink) {
105 return new _WindowsCodePageDecoderSink(sink);
106 }
341 107
342 // Utility class for encoding a string into a current windows 108 // Override the base-class' bind, to provide a better type.
343 // code page byte list. 109 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
344 // Implemented on top of a _SingleByteEncoder, even though it's not really a
345 // single byte encoder, to avoid copying boilerplate.
346 class _WindowsCodePageEncoder extends _SingleByteEncoder {
347 List<int> _encode(String string) => _encodeString(string);
348
349 external static List<int> _encodeString(String string);
350 }
351
352
353 // Utility class for decoding Windows current code page data delivered
354 // as a stream of bytes.
355 class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String> {
356 void handleData(List<int> data, EventSink<String> sink) {
357 sink.add(_decodeBytes(data));
358 }
359 110
360 external static String _decodeBytes(List<int> bytes); 111 external static String _decodeBytes(List<int> bytes);
361 } 112 }
113
114 class _WindowsCodePageDecoderSink extends ByteConversionSinkBase {
115 // TODO(floitsch): provide more efficient conversions when the input is
116 // a slice.
117
118 final StringConversionSink _sink;
119
120 _WindowsCodePageDecoderSink(this._sink);
121
122 void close() {
123 _sink.close();
124 }
125
126 void add(List<int> bytes) {
127 _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes));
128 }
129 }
OLDNEW
« no previous file with comments | « sdk/lib/io/stdio.dart ('k') | sdk/lib/io/websocket_impl.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698