Chromium Code Reviews| Index: sdk/lib/io/string_transformer.dart |
| diff --git a/sdk/lib/io/string_transformer.dart b/sdk/lib/io/string_transformer.dart |
| index 0f2a6c39ea1baaf034464e2bd70c96419cc61e23..e68342bb73c081f775a97034ee2bdf72a7b646d2 100644 |
| --- a/sdk/lib/io/string_transformer.dart |
| +++ b/sdk/lib/io/string_transformer.dart |
| @@ -4,358 +4,174 @@ |
| part of dart.io; |
| +// All aliases (in lowercase) of supported encoding from |
| +// http://www.iana.org/assignments/character-sets/character-sets.xml. |
| +Map<String, Encoding> _nameToEncoding = <String, Encoding> { |
| + // ISO_8859-1:1987. |
| + "iso_8859-1:1987": LATIN1, |
| + "iso-ir-100": LATIN1, |
| + "iso_8859-1": LATIN1, |
| + "iso-8859-1": LATIN1, |
| + "latin1": LATIN1, |
| + "l1": LATIN1, |
| + "ibm819": LATIN1, |
| + "cp819": LATIN1, |
| + "csisolatin1": LATIN1, |
| + |
| + // US-ASCII. |
| + "iso-ir-6": ASCII, |
| + "ansi_x3.4-1968": ASCII, |
| + "ansi_x3.4-1986": ASCII, |
| + "iso_646.irv:1991": ASCII, |
| + "iso646-us": ASCII, |
| + "us-ascii": ASCII, |
| + "us": ASCII, |
| + "ibm367": ASCII, |
| + "cp367": ASCII, |
| + "csascii": ASCII, |
| + "ascii": ASCII, // This is not in the IANA official names. |
| + |
| + // UTF-8. |
| + "csutf8": UTF8, |
| + "utf-8": UTF8 |
| +}; |
| + |
| /** |
| - * String encodings. |
| + * Gets an [Encoding] object from the name of the character set |
| + * name. The names used are the IANA official names for the |
| + * character set (see |
| + * http://www.iana.org/assignments/character-sets/character-sets.xml). |
| + * |
| + * The [name] passed is case insensitive. |
| + * |
| + * If character set is not supported [:null:] is returned. |
| */ |
| -class Encoding { |
| - static const Encoding UTF_8 = const Encoding._internal("utf-8"); |
| - static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1"); |
| - static const Encoding ASCII = const Encoding._internal("us-ascii"); |
| - |
| - /** |
| - * SYSTEM encoding is the current code page on Windows and UTF-8 on |
| - * Linux and Mac. |
| - */ |
| - static const Encoding SYSTEM = const Encoding._internal("system"); |
| - |
| - // All aliasses (in lowercase) of supported encoding from |
| - // http://www.iana.org/assignments/character-sets/character-sets.xml. |
| - static Map<String, Encoding> _nameToEncoding = <String, Encoding> { |
| - // ISO_8859-1:1987. |
| - "iso_8859-1:1987": ISO_8859_1, |
| - "iso-ir-100": ISO_8859_1, |
| - "iso_8859-1": ISO_8859_1, |
| - "iso-8859-1": ISO_8859_1, |
| - "latin1": ISO_8859_1, |
| - "l1": ISO_8859_1, |
| - "ibm819": ISO_8859_1, |
| - "cp819": ISO_8859_1, |
| - "csisolatin1": ISO_8859_1, |
| - |
| - // US-ASCII. |
| - "iso-ir-6": ASCII, |
| - "ansi_x3.4-1968": ASCII, |
| - "ansi_x3.4-1986": ASCII, |
| - "iso_646.irv:1991": ASCII, |
| - "iso646-us": ASCII, |
| - "us-ascii": ASCII, |
| - "us": ASCII, |
| - "ibm367": ASCII, |
| - "cp367": ASCII, |
| - "csascii": ASCII, |
| - "ascii": ASCII, // This is not in the IANA official names. |
| - |
| - // UTF-8. |
| - "csutf8": UTF_8, |
| - "utf-8": UTF_8 |
| - }; |
| - |
| - /** |
| - * Gets an [Encoding] object from the name of the character set |
| - * name. The names used are the IANA official names for the |
| - * character set (see |
| - * http://www.iana.org/assignments/character-sets/character-sets.xml). |
| - * |
| - * The [name] passed is case insensitive. |
| - * |
| - * If character set is not supported [:null:] is returned. |
| - */ |
| - static Encoding fromName(String name) { |
| +Encoding encodingFromName(String name) { |
|
Søren Gjesse
2013/08/26 08:03:15
Should this move to dart:convert as well? With the
floitsch
2013/08/26 09:33:40
Moved to Encoding.getByName.
No option to registe
|
| if (name == null) return null; |
| name = name.toLowerCase(); |
| return _nameToEncoding[name]; |
| - } |
| - |
| - /** |
| - * Name of the encoding. This will be the lower-case version of one of the |
| - * IANA official names for the character set (see |
| - * http://www.iana.org/assignments/character-sets/character-sets.xml) |
| - */ |
| - final String name; |
| - |
| - const Encoding._internal(String this.name); |
| } |
| -const UTF_8 = Encoding.UTF_8; |
| -const ISO_8859_1 = Encoding.ISO_8859_1; |
| -const ASCII = Encoding.ASCII; |
| +const SYSTEM_ENCODING = const SystemEncoding(); |
| /** |
| - * Stream transformer that can decode a stream of bytes into a stream of |
| - * strings using [encoding]. |
| - * |
| - * Invalid or forbidden byte-sequences will not produce errors, but will instead |
| - * insert [replacementChar] in the decoded strings. |
| + * The system encoding is the current code page on Windows and UTF-8 on |
| + * Linux and Mac. |
| */ |
| -class StringDecoder implements StreamTransformer<List<int>, String> { |
| - var _decoder; |
| +class SystemEncoding extends Encoding { |
| + const SystemEncoding(); |
| - static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD; |
| + List<int> encode(String input) => encoder.convert(input); |
| + String decode(List<int> encoded) => decoder.convert(encoded); |
| - /** |
| - * Decodes a stream of bytes into a `String` with an optional |
| - * [encoding] and [replacementChar]. |
| - * |
| - * The default value for [encoding] is [Encoding.UTF_8]. |
| - * |
| - * The default value for [replacementChar] is code point U+FFFD. |
| - * |
| - * Completes with the decoded `String` when the stream is done. |
| - */ |
| - static Future<String> decode( |
| - Stream<List<int>> stream, |
| - [Encoding encoding = Encoding.UTF_8, |
| - int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| - if (replacementChar != null && |
| - replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { |
| - throw new UnsupportedError("replacement character must be null or " |
| - "the Unicode replacement character"); |
| + Converter<String, List<int>> get encoder { |
| + if (Platform.operatingSystem == "windows") { |
| + return const _WindowsCodePageEncoder(); |
| + } else { |
| + return const Utf8Encoder(); |
| } |
| - return stream |
| - .transform(new StringDecoder(encoding, replacementChar)) |
| - .fold( |
| - new StringBuffer(), |
| - (prev, data) => prev..write(data)) |
| - .then((sb) => sb.toString()); |
| } |
| - /** |
| - * Create a new [StringDecoder] with an optional [encoding] and |
| - * [replacementChar]. |
| - * |
| - * The default value for [encoding] is [Encoding.UTF_8]. |
| - * |
| - * The default value for [replacementChar] is code point U+FFFD. |
| - */ |
| - StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) { |
| - switch (encoding) { |
| - case Encoding.UTF_8: |
| - if (replacementChar != null && |
| - replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { |
| - throw new UnsupportedError("replacement character must be null or " |
| - "the Unicode replacement character"); |
| - } |
| - _decoder = new Utf8Decoder(allowMalformed: true); |
| - break; |
| - case Encoding.ASCII: |
| - if (replacementChar == null) { |
| - replacementChar = '?'.codeUnitAt(0); |
| - } else if (replacementChar > 127) { |
| - throw new ArgumentError("Invalid replacement character for ASCII"); |
| - } |
| - _decoder = new _AsciiDecoder(replacementChar); |
| - break; |
| - case Encoding.ISO_8859_1: |
| - if (replacementChar == null) { |
| - replacementChar = '?'.codeUnitAt(0); |
| - } else if (replacementChar > 255) { |
| - throw new ArgumentError( |
| - "Invalid replacement character for ISO_8859_1"); |
| - } |
| - _decoder = new _Latin1Decoder(replacementChar); |
| - break; |
| - case Encoding.SYSTEM: |
| - if (Platform.operatingSystem == "windows") { |
| - _decoder = new _WindowsCodePageDecoder(); |
| - } else { |
| - if (replacementChar != null) { |
| - // TODO(ajohnsen): Handle replacement character. |
| - throw new UnsupportedError( |
| - "Replacement character is not supported for SYSTEM encoding"); |
| - } |
| - _decoder = new Utf8Decoder(allowMalformed: true); |
| - } |
| - break; |
| - default: |
| - throw new ArgumentError("Unsupported encoding '$encoding'"); |
| + Converter<List<int>, String> get decoder { |
| + if (Platform.operatingSystem == "windows") { |
| + return const _WindowsCodePageDecoder(); |
| + } else { |
| + return const Utf8Decoder(); |
| } |
| } |
| - |
| - Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream); |
| } |
| +class _WindowsCodePageEncoder extends Converter<String, List<int>> { |
| -/** |
| - * Stream transformer that can encode a stream of strings info a stream of |
| - * bytes using [encoding]. |
| - * |
| - * Strings that cannot be represented in the given encoding will result in an |
| - * error and a close event on the stream. |
| - */ |
| -class StringEncoder implements StreamTransformer<String, List<int>> { |
| - var _encoder; |
| + const _WindowsCodePageEncoder(); |
| - /** |
| - * Create a new [StringDecoder] with an optional [encoding] and |
| - * [replacementChar]. |
| - */ |
| - StringEncoder([Encoding encoding = Encoding.UTF_8]) { |
| - switch (encoding) { |
| - case Encoding.UTF_8: |
| - _encoder = new Utf8Encoder(); |
| - break; |
| - case Encoding.ASCII: |
| - _encoder = new _AsciiEncoder(); |
| - break; |
| - case Encoding.ISO_8859_1: |
| - _encoder = new _Latin1Encoder(); |
| - break; |
| - case Encoding.SYSTEM: |
| - if (Platform.operatingSystem == "windows") { |
| - _encoder = new _WindowsCodePageEncoder(); |
| - } else { |
| - _encoder = new Utf8Encoder(); |
| - } |
| - break; |
| - default: |
| - throw new ArgumentError("Unsupported encoding '$encoding'"); |
| + List<int> convert(String input) { |
| + List<int> encoded = _encodeString(input); |
| + if (encoded == null) { |
| + throw new FormatException("Invalid character for encoding"); |
| } |
| + return encoded; |
| } |
| - Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream); |
| -} |
| - |
| - |
| -// Utility function to synchronously decode a list of bytes. |
| -String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) { |
| - if (bytes.length == 0) return ""; |
| - if (encoding == Encoding.UTF_8) { |
| - return UTF8.decode(bytes, allowMalformed: true); |
| + /** |
| + * Starts a chunked conversion. |
| + */ |
| + StringConversionSink startChunkedConversion( |
| + ChunkedConversionSink<List<int>> sink) { |
| + return new _WindowsCodePageEncoderSink(sink); |
| } |
| - var string; |
| - var error; |
| - var controller = new StreamController(sync: true); |
| - controller.stream |
| - .transform(new StringDecoder(encoding)) |
| - .listen((data) { |
| - // The StringEncoder decodes every encoding (except UTF-8) in one go. |
| - assert(string == null); |
| - string = data; |
| - }, onError: (e) => error = e); |
| - controller.add(bytes); |
| - controller.close(); |
| - if (error != null) throw error; |
| - assert(string != null); |
| - return string; |
| -} |
| + // Override the base-class' bind, to provide a better type. |
| + Stream<List<int>> bind(Stream<String> stream) => super.bind(stream); |
| -// Utility function to synchronously encode a String. |
| -// Will throw an exception if the encoding is invalid. |
| -List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) { |
| - if (string.length == 0) return []; |
| - if (encoding == Encoding.UTF_8) return UTF8.encode(string); |
| - var bytes; |
| - var controller = new StreamController(sync: true); |
| - controller.stream |
| - .transform(new StringEncoder(encoding)) |
| - .listen((data) { |
| - // The StringEncoder encodes every encoding (except UTF-8) in one go. |
| - assert(bytes == null); |
| - bytes = data; |
| - }); |
| - controller.add(string); |
| - controller.close(); |
| - assert(bytes != null); |
| - return bytes; |
| + external static List<int> _encodeString(String string); |
| } |
| +class _WindowsCodePageEncoderSink extends StringConversionSinkBase { |
| + // TODO(floitsch): provide more efficient conversions when the input is |
| + // not a String. |
| -abstract class _SingleByteDecoder |
| - extends StreamEventTransformer<List<int>, String> { |
| - final int _replacementChar; |
| + final ByteConversionSink _sink; |
| - _SingleByteDecoder(this._replacementChar); |
| + _WindowsCodePageEncoderSink(this._sink); |
| - void handleData(List<int> data, EventSink<String> sink) { |
| - var buffer = new List<int>(data.length); |
| - for (int i = 0; i < data.length; i++) { |
| - int char = _decodeByte(data[i]); |
| - if (char < 0) char = _replacementChar; |
| - buffer[i] = char; |
| - } |
| - sink.add(new String.fromCharCodes(buffer)); |
| + void close() { |
| + _sink.close(); |
| } |
| - int _decodeByte(int byte); |
| -} |
| - |
| - |
| -// Utility class for decoding ascii data delivered as a stream of |
| -// bytes. |
| -class _AsciiDecoder extends _SingleByteDecoder { |
| - _AsciiDecoder(int replacementChar) : super(replacementChar); |
| + void add(String string) { |
| + List<int> encoded = _WindowsCodePageByteEncoder._encodeString(string); |
| + if (encoded == null) { |
| + throw new FormatException("Invalid character for encoding"); |
| + } |
| + _sink.add(encoded); |
| + } |
| - int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1; |
| + void addSlice(String source, int start, int end, bool isLast) { |
| + if (start != 0 || end != source.length) { |
| + source = source.substring(start, end); |
| + } |
| + add(source); |
| + if (isLast) close(); |
| + } |
| } |
| -// Utility class for decoding Latin-1 data delivered as a stream of |
| -// bytes. |
| -class _Latin1Decoder extends _SingleByteDecoder { |
| - _Latin1Decoder(int replacementChar) : super(replacementChar); |
| +class _WindowsCodePageDecoder extends Converter<List<int>, String> { |
| - int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1; |
| -} |
| + const _WindowsCodePageDecoder(); |
| - |
| -abstract class _SingleByteEncoder |
| - extends StreamEventTransformer<String, List<int>> { |
| - void handleData(String data, EventSink<List<int>> sink) { |
| - var bytes = _encode(data); |
| - if (bytes == null) { |
| - sink.addError(new FormatException("Invalid character for encoding")); |
| - sink.close(); |
| - } else { |
| - sink.add(bytes); |
| - } |
| + String convert(List<int> input) { |
| + return _decodeBytes(input); |
| } |
| - List<int> _encode(String string); |
| -} |
| - |
| - |
| -// Utility class for encoding a string into an ASCII byte stream. |
| -class _AsciiEncoder extends _SingleByteEncoder { |
| - List<int> _encode(String string) { |
| - var bytes = string.codeUnits; |
| - for (var byte in bytes) { |
| - if (byte > 127) return null; |
| - } |
| - return bytes; |
| + /** |
| + * Starts a chunked conversion. |
| + */ |
| + ByteConversionSink startChunkedConversion( |
| + ChunkedConversionSink<String> sink) { |
| + return new _WindowsCodePageDecoderSink(sink); |
| } |
| -} |
| + // Override the base-class' bind, to provide a better type. |
| + Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); |
| -// Utility class for encoding a string into a Latin1 byte stream. |
| -class _Latin1Encoder extends _SingleByteEncoder { |
| - List<int> _encode(String string) { |
| - var bytes = string.codeUnits; |
| - for (var byte in bytes) { |
| - if (byte > 255) return null; |
| - } |
| - return bytes; |
| - } |
| + external static String _decodeBytes(List<int> bytes); |
| } |
| +class _WindowsCodePageDecoderSink extends ByteConversionSinkBase { |
| + // TODO(floitsch): provide more efficient conversions when the input is |
| + // a slice. |
| -// Utility class for encoding a string into a current windows |
| -// code page byte list. |
| -// Implemented on top of a _SingleByteEncoder, even though it's not really a |
| -// single byte encoder, to avoid copying boilerplate. |
| -class _WindowsCodePageEncoder extends _SingleByteEncoder { |
| - List<int> _encode(String string) => _encodeString(string); |
| + final StringConversionSink _sink; |
| - external static List<int> _encodeString(String string); |
| -} |
| + _WindowsCodePageDecoderSink(this._sink); |
| - |
| -// Utility class for decoding Windows current code page data delivered |
| -// as a stream of bytes. |
| -class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String> { |
| - void handleData(List<int> data, EventSink<String> sink) { |
| - sink.add(_decodeBytes(data)); |
| + void close() { |
| + _sink.close(); |
| } |
| - external static String _decodeBytes(List<int> bytes); |
| + void add(List<int> bytes) { |
| + _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes)); |
| + } |
| } |