| OLD | NEW |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.io; | 5 part of dart.io; |
| 6 | 6 |
| 7 const SYSTEM_ENCODING = const SystemEncoding(); |
| 8 |
| 7 /** | 9 /** |
| 8 * String encodings. | 10 * The system encoding is the current code page on Windows and UTF-8 on |
| 11 * Linux and Mac. |
| 9 */ | 12 */ |
| 10 class Encoding { | 13 class SystemEncoding extends Encoding { |
| 11 static const Encoding UTF_8 = const Encoding._internal("utf-8"); | 14 const SystemEncoding(); |
| 12 static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1"); | |
| 13 static const Encoding ASCII = const Encoding._internal("us-ascii"); | |
| 14 | 15 |
| 15 /** | 16 List<int> encode(String input) => encoder.convert(input); |
| 16 * SYSTEM encoding is the current code page on Windows and UTF-8 on | 17 String decode(List<int> encoded) => decoder.convert(encoded); |
| 17 * Linux and Mac. | |
| 18 */ | |
| 19 static const Encoding SYSTEM = const Encoding._internal("system"); | |
| 20 | 18 |
| 21 // All aliasses (in lowercase) of supported encoding from | 19 Converter<String, List<int>> get encoder { |
| 22 // http://www.iana.org/assignments/character-sets/character-sets.xml. | 20 if (Platform.operatingSystem == "windows") { |
| 23 static Map<String, Encoding> _nameToEncoding = <String, Encoding> { | 21 return const _WindowsCodePageEncoder(); |
| 24 // ISO_8859-1:1987. | 22 } else { |
| 25 "iso_8859-1:1987": ISO_8859_1, | 23 return const Utf8Encoder(); |
| 26 "iso-ir-100": ISO_8859_1, | 24 } |
| 27 "iso_8859-1": ISO_8859_1, | 25 } |
| 28 "iso-8859-1": ISO_8859_1, | |
| 29 "latin1": ISO_8859_1, | |
| 30 "l1": ISO_8859_1, | |
| 31 "ibm819": ISO_8859_1, | |
| 32 "cp819": ISO_8859_1, | |
| 33 "csisolatin1": ISO_8859_1, | |
| 34 | 26 |
| 35 // US-ASCII. | 27 Converter<List<int>, String> get decoder { |
| 36 "iso-ir-6": ASCII, | 28 if (Platform.operatingSystem == "windows") { |
| 37 "ansi_x3.4-1968": ASCII, | 29 return const _WindowsCodePageDecoder(); |
| 38 "ansi_x3.4-1986": ASCII, | 30 } else { |
| 39 "iso_646.irv:1991": ASCII, | 31 return const Utf8Decoder(); |
| 40 "iso646-us": ASCII, | 32 } |
| 41 "us-ascii": ASCII, | 33 } |
| 42 "us": ASCII, | 34 } |
| 43 "ibm367": ASCII, | |
| 44 "cp367": ASCII, | |
| 45 "csascii": ASCII, | |
| 46 "ascii": ASCII, // This is not in the IANA official names. | |
| 47 | 35 |
| 48 // UTF-8. | 36 class _WindowsCodePageEncoder extends Converter<String, List<int>> { |
| 49 "csutf8": UTF_8, | |
| 50 "utf-8": UTF_8 | |
| 51 }; | |
| 52 | 37 |
| 53 /** | 38 const _WindowsCodePageEncoder(); |
| 54 * Gets an [Encoding] object from the name of the character set | 39 |
| 55 * name. The names used are the IANA official names for the | 40 List<int> convert(String input) { |
| 56 * character set (see | 41 List<int> encoded = _encodeString(input); |
| 57 * http://www.iana.org/assignments/character-sets/character-sets.xml). | 42 if (encoded == null) { |
| 58 * | 43 throw new FormatException("Invalid character for encoding"); |
| 59 * The [name] passed is case insensitive. | 44 } |
| 60 * | 45 return encoded; |
| 61 * If character set is not supported [:null:] is returned. | |
| 62 */ | |
| 63 static Encoding fromName(String name) { | |
| 64 if (name == null) return null; | |
| 65 name = name.toLowerCase(); | |
| 66 return _nameToEncoding[name]; | |
| 67 } | 46 } |
| 68 | 47 |
| 69 /** | 48 /** |
| 70 * Name of the encoding. This will be the lower-case version of one of the | 49 * Starts a chunked conversion. |
| 71 * IANA official names for the character set (see | |
| 72 * http://www.iana.org/assignments/character-sets/character-sets.xml) | |
| 73 */ | 50 */ |
| 74 final String name; | 51 StringConversionSink startChunkedConversion( |
| 52 ChunkedConversionSink<List<int>> sink) { |
| 53 return new _WindowsCodePageEncoderSink(sink); |
| 54 } |
| 75 | 55 |
| 76 const Encoding._internal(String this.name); | 56 // Override the base-class' bind, to provide a better type. |
| 57 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); |
| 58 |
| 59 external static List<int> _encodeString(String string); |
| 77 } | 60 } |
| 78 | 61 |
| 79 const UTF_8 = Encoding.UTF_8; | 62 class _WindowsCodePageEncoderSink extends StringConversionSinkBase { |
| 80 const ISO_8859_1 = Encoding.ISO_8859_1; | 63 // TODO(floitsch): provide more efficient conversions when the input is |
| 81 const ASCII = Encoding.ASCII; | 64 // not a String. |
| 82 | 65 |
| 83 /** | 66 final ByteConversionSink _sink; |
| 84 * Stream transformer that can decode a stream of bytes into a stream of | |
| 85 * strings using [encoding]. | |
| 86 * | |
| 87 * Invalid or forbidden byte-sequences will not produce errors, but will instead | |
| 88 * insert [replacementChar] in the decoded strings. | |
| 89 */ | |
| 90 class StringDecoder implements StreamTransformer<List<int>, String> { | |
| 91 var _decoder; | |
| 92 | 67 |
| 93 static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD; | 68 _WindowsCodePageEncoderSink(this._sink); |
| 94 | 69 |
| 95 /** | 70 void close() { |
| 96 * Decodes a stream of bytes into a `String` with an optional | 71 _sink.close(); |
| 97 * [encoding] and [replacementChar]. | |
| 98 * | |
| 99 * The default value for [encoding] is [Encoding.UTF_8]. | |
| 100 * | |
| 101 * The default value for [replacementChar] is code point U+FFFD. | |
| 102 * | |
| 103 * Completes with the decoded `String` when the stream is done. | |
| 104 */ | |
| 105 static Future<String> decode( | |
| 106 Stream<List<int>> stream, | |
| 107 [Encoding encoding = Encoding.UTF_8, | |
| 108 int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 109 if (replacementChar != null && | |
| 110 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { | |
| 111 throw new UnsupportedError("replacement character must be null or " | |
| 112 "the Unicode replacement character"); | |
| 113 } | |
| 114 return stream | |
| 115 .transform(new StringDecoder(encoding, replacementChar)) | |
| 116 .fold( | |
| 117 new StringBuffer(), | |
| 118 (prev, data) => prev..write(data)) | |
| 119 .then((sb) => sb.toString()); | |
| 120 } | 72 } |
| 121 | 73 |
| 122 /** | 74 void add(String string) { |
| 123 * Create a new [StringDecoder] with an optional [encoding] and | 75 List<int> encoded = _WindowsCodePageEncoder._encodeString(string); |
| 124 * [replacementChar]. | 76 if (encoded == null) { |
| 125 * | 77 throw new FormatException("Invalid character for encoding"); |
| 126 * The default value for [encoding] is [Encoding.UTF_8]. | |
| 127 * | |
| 128 * The default value for [replacementChar] is code point U+FFFD. | |
| 129 */ | |
| 130 StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) { | |
| 131 switch (encoding) { | |
| 132 case Encoding.UTF_8: | |
| 133 if (replacementChar != null && | |
| 134 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { | |
| 135 throw new UnsupportedError("replacement character must be null or " | |
| 136 "the Unicode replacement character"); | |
| 137 } | |
| 138 _decoder = new Utf8Decoder(allowMalformed: true); | |
| 139 break; | |
| 140 case Encoding.ASCII: | |
| 141 if (replacementChar == null) { | |
| 142 replacementChar = '?'.codeUnitAt(0); | |
| 143 } else if (replacementChar > 127) { | |
| 144 throw new ArgumentError("Invalid replacement character for ASCII"); | |
| 145 } | |
| 146 _decoder = new _AsciiDecoder(replacementChar); | |
| 147 break; | |
| 148 case Encoding.ISO_8859_1: | |
| 149 if (replacementChar == null) { | |
| 150 replacementChar = '?'.codeUnitAt(0); | |
| 151 } else if (replacementChar > 255) { | |
| 152 throw new ArgumentError( | |
| 153 "Invalid replacement character for ISO_8859_1"); | |
| 154 } | |
| 155 _decoder = new _Latin1Decoder(replacementChar); | |
| 156 break; | |
| 157 case Encoding.SYSTEM: | |
| 158 if (Platform.operatingSystem == "windows") { | |
| 159 _decoder = new _WindowsCodePageDecoder(); | |
| 160 } else { | |
| 161 if (replacementChar != null) { | |
| 162 // TODO(ajohnsen): Handle replacement character. | |
| 163 throw new UnsupportedError( | |
| 164 "Replacement character is not supported for SYSTEM encoding"); | |
| 165 } | |
| 166 _decoder = new Utf8Decoder(allowMalformed: true); | |
| 167 } | |
| 168 break; | |
| 169 default: | |
| 170 throw new ArgumentError("Unsupported encoding '$encoding'"); | |
| 171 } | 78 } |
| 79 _sink.add(encoded); |
| 172 } | 80 } |
| 173 | 81 |
| 174 Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream); | 82 void addSlice(String source, int start, int end, bool isLast) { |
| 175 } | 83 if (start != 0 || end != source.length) { |
| 176 | 84 source = source.substring(start, end); |
| 177 | |
| 178 /** | |
| 179 * Stream transformer that can encode a stream of strings info a stream of | |
| 180 * bytes using [encoding]. | |
| 181 * | |
| 182 * Strings that cannot be represented in the given encoding will result in an | |
| 183 * error and a close event on the stream. | |
| 184 */ | |
| 185 class StringEncoder implements StreamTransformer<String, List<int>> { | |
| 186 var _encoder; | |
| 187 | |
| 188 /** | |
| 189 * Create a new [StringDecoder] with an optional [encoding] and | |
| 190 * [replacementChar]. | |
| 191 */ | |
| 192 StringEncoder([Encoding encoding = Encoding.UTF_8]) { | |
| 193 switch (encoding) { | |
| 194 case Encoding.UTF_8: | |
| 195 _encoder = new Utf8Encoder(); | |
| 196 break; | |
| 197 case Encoding.ASCII: | |
| 198 _encoder = new _AsciiEncoder(); | |
| 199 break; | |
| 200 case Encoding.ISO_8859_1: | |
| 201 _encoder = new _Latin1Encoder(); | |
| 202 break; | |
| 203 case Encoding.SYSTEM: | |
| 204 if (Platform.operatingSystem == "windows") { | |
| 205 _encoder = new _WindowsCodePageEncoder(); | |
| 206 } else { | |
| 207 _encoder = new Utf8Encoder(); | |
| 208 } | |
| 209 break; | |
| 210 default: | |
| 211 throw new ArgumentError("Unsupported encoding '$encoding'"); | |
| 212 } | 85 } |
| 213 } | 86 add(source); |
| 214 | 87 if (isLast) close(); |
| 215 Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream); | |
| 216 } | |
| 217 | |
| 218 | |
| 219 // Utility function to synchronously decode a list of bytes. | |
| 220 String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) { | |
| 221 if (bytes.length == 0) return ""; | |
| 222 if (encoding == Encoding.UTF_8) { | |
| 223 return UTF8.decode(bytes, allowMalformed: true); | |
| 224 } | |
| 225 var string; | |
| 226 var error; | |
| 227 var controller = new StreamController(sync: true); | |
| 228 controller.stream | |
| 229 .transform(new StringDecoder(encoding)) | |
| 230 .listen((data) { | |
| 231 // The StringEncoder decodes every encoding (except UTF-8) in one go. | |
| 232 assert(string == null); | |
| 233 string = data; | |
| 234 }, onError: (e) => error = e); | |
| 235 controller.add(bytes); | |
| 236 controller.close(); | |
| 237 if (error != null) throw error; | |
| 238 assert(string != null); | |
| 239 return string; | |
| 240 } | |
| 241 | |
| 242 | |
| 243 // Utility function to synchronously encode a String. | |
| 244 // Will throw an exception if the encoding is invalid. | |
| 245 List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) { | |
| 246 if (string.length == 0) return []; | |
| 247 if (encoding == Encoding.UTF_8) return UTF8.encode(string); | |
| 248 var bytes; | |
| 249 var controller = new StreamController(sync: true); | |
| 250 controller.stream | |
| 251 .transform(new StringEncoder(encoding)) | |
| 252 .listen((data) { | |
| 253 // The StringEncoder encodes every encoding (except UTF-8) in one go. | |
| 254 assert(bytes == null); | |
| 255 bytes = data; | |
| 256 }); | |
| 257 controller.add(string); | |
| 258 controller.close(); | |
| 259 assert(bytes != null); | |
| 260 return bytes; | |
| 261 } | |
| 262 | |
| 263 | |
| 264 abstract class _SingleByteDecoder | |
| 265 extends StreamEventTransformer<List<int>, String> { | |
| 266 final int _replacementChar; | |
| 267 | |
| 268 _SingleByteDecoder(this._replacementChar); | |
| 269 | |
| 270 void handleData(List<int> data, EventSink<String> sink) { | |
| 271 var buffer = new List<int>(data.length); | |
| 272 for (int i = 0; i < data.length; i++) { | |
| 273 int char = _decodeByte(data[i]); | |
| 274 if (char < 0) char = _replacementChar; | |
| 275 buffer[i] = char; | |
| 276 } | |
| 277 sink.add(new String.fromCharCodes(buffer)); | |
| 278 } | |
| 279 | |
| 280 int _decodeByte(int byte); | |
| 281 } | |
| 282 | |
| 283 | |
| 284 // Utility class for decoding ascii data delivered as a stream of | |
| 285 // bytes. | |
| 286 class _AsciiDecoder extends _SingleByteDecoder { | |
| 287 _AsciiDecoder(int replacementChar) : super(replacementChar); | |
| 288 | |
| 289 int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1; | |
| 290 } | |
| 291 | |
| 292 | |
| 293 // Utility class for decoding Latin-1 data delivered as a stream of | |
| 294 // bytes. | |
| 295 class _Latin1Decoder extends _SingleByteDecoder { | |
| 296 _Latin1Decoder(int replacementChar) : super(replacementChar); | |
| 297 | |
| 298 int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1; | |
| 299 } | |
| 300 | |
| 301 | |
| 302 abstract class _SingleByteEncoder | |
| 303 extends StreamEventTransformer<String, List<int>> { | |
| 304 void handleData(String data, EventSink<List<int>> sink) { | |
| 305 var bytes = _encode(data); | |
| 306 if (bytes == null) { | |
| 307 sink.addError(new FormatException("Invalid character for encoding")); | |
| 308 sink.close(); | |
| 309 } else { | |
| 310 sink.add(bytes); | |
| 311 } | |
| 312 } | |
| 313 | |
| 314 List<int> _encode(String string); | |
| 315 } | |
| 316 | |
| 317 | |
| 318 // Utility class for encoding a string into an ASCII byte stream. | |
| 319 class _AsciiEncoder extends _SingleByteEncoder { | |
| 320 List<int> _encode(String string) { | |
| 321 var bytes = string.codeUnits; | |
| 322 for (var byte in bytes) { | |
| 323 if (byte > 127) return null; | |
| 324 } | |
| 325 return bytes; | |
| 326 } | 88 } |
| 327 } | 89 } |
| 328 | 90 |
| 329 | 91 |
| 330 // Utility class for encoding a string into a Latin1 byte stream. | 92 class _WindowsCodePageDecoder extends Converter<List<int>, String> { |
| 331 class _Latin1Encoder extends _SingleByteEncoder { | 93 |
| 332 List<int> _encode(String string) { | 94 const _WindowsCodePageDecoder(); |
| 333 var bytes = string.codeUnits; | 95 |
| 334 for (var byte in bytes) { | 96 String convert(List<int> input) { |
| 335 if (byte > 255) return null; | 97 return _decodeBytes(input); |
| 336 } | |
| 337 return bytes; | |
| 338 } | 98 } |
| 339 } | |
| 340 | 99 |
| 100 /** |
| 101 * Starts a chunked conversion. |
| 102 */ |
| 103 ByteConversionSink startChunkedConversion( |
| 104 ChunkedConversionSink<String> sink) { |
| 105 return new _WindowsCodePageDecoderSink(sink); |
| 106 } |
| 341 | 107 |
| 342 // Utility class for encoding a string into a current windows | 108 // Override the base-class' bind, to provide a better type. |
| 343 // code page byte list. | 109 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); |
| 344 // Implemented on top of a _SingleByteEncoder, even though it's not really a | |
| 345 // single byte encoder, to avoid copying boilerplate. | |
| 346 class _WindowsCodePageEncoder extends _SingleByteEncoder { | |
| 347 List<int> _encode(String string) => _encodeString(string); | |
| 348 | |
| 349 external static List<int> _encodeString(String string); | |
| 350 } | |
| 351 | |
| 352 | |
| 353 // Utility class for decoding Windows current code page data delivered | |
| 354 // as a stream of bytes. | |
| 355 class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String>
{ | |
| 356 void handleData(List<int> data, EventSink<String> sink) { | |
| 357 sink.add(_decodeBytes(data)); | |
| 358 } | |
| 359 | 110 |
| 360 external static String _decodeBytes(List<int> bytes); | 111 external static String _decodeBytes(List<int> bytes); |
| 361 } | 112 } |
| 113 |
| 114 class _WindowsCodePageDecoderSink extends ByteConversionSinkBase { |
| 115 // TODO(floitsch): provide more efficient conversions when the input is |
| 116 // a slice. |
| 117 |
| 118 final StringConversionSink _sink; |
| 119 |
| 120 _WindowsCodePageDecoderSink(this._sink); |
| 121 |
| 122 void close() { |
| 123 _sink.close(); |
| 124 } |
| 125 |
| 126 void add(List<int> bytes) { |
| 127 _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes)); |
| 128 } |
| 129 } |
| OLD | NEW |