| Index: sdk/lib/io/string_transformer.dart
|
| diff --git a/sdk/lib/io/string_transformer.dart b/sdk/lib/io/string_transformer.dart
|
| index 0f2a6c39ea1baaf034464e2bd70c96419cc61e23..d4f13fc8337f4a3d4349c8064d4e7a8824db564f 100644
|
| --- a/sdk/lib/io/string_transformer.dart
|
| +++ b/sdk/lib/io/string_transformer.dart
|
| @@ -4,358 +4,126 @@
|
|
|
| part of dart.io;
|
|
|
| -/**
|
| - * String encodings.
|
| - */
|
| -class Encoding {
|
| - static const Encoding UTF_8 = const Encoding._internal("utf-8");
|
| - static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1");
|
| - static const Encoding ASCII = const Encoding._internal("us-ascii");
|
| -
|
| - /**
|
| - * SYSTEM encoding is the current code page on Windows and UTF-8 on
|
| - * Linux and Mac.
|
| - */
|
| - static const Encoding SYSTEM = const Encoding._internal("system");
|
| -
|
| - // All aliasses (in lowercase) of supported encoding from
|
| - // http://www.iana.org/assignments/character-sets/character-sets.xml.
|
| - static Map<String, Encoding> _nameToEncoding = <String, Encoding> {
|
| - // ISO_8859-1:1987.
|
| - "iso_8859-1:1987": ISO_8859_1,
|
| - "iso-ir-100": ISO_8859_1,
|
| - "iso_8859-1": ISO_8859_1,
|
| - "iso-8859-1": ISO_8859_1,
|
| - "latin1": ISO_8859_1,
|
| - "l1": ISO_8859_1,
|
| - "ibm819": ISO_8859_1,
|
| - "cp819": ISO_8859_1,
|
| - "csisolatin1": ISO_8859_1,
|
| -
|
| - // US-ASCII.
|
| - "iso-ir-6": ASCII,
|
| - "ansi_x3.4-1968": ASCII,
|
| - "ansi_x3.4-1986": ASCII,
|
| - "iso_646.irv:1991": ASCII,
|
| - "iso646-us": ASCII,
|
| - "us-ascii": ASCII,
|
| - "us": ASCII,
|
| - "ibm367": ASCII,
|
| - "cp367": ASCII,
|
| - "csascii": ASCII,
|
| - "ascii": ASCII, // This is not in the IANA official names.
|
| -
|
| - // UTF-8.
|
| - "csutf8": UTF_8,
|
| - "utf-8": UTF_8
|
| - };
|
| -
|
| - /**
|
| - * Gets an [Encoding] object from the name of the character set
|
| - * name. The names used are the IANA official names for the
|
| - * character set (see
|
| - * http://www.iana.org/assignments/character-sets/character-sets.xml).
|
| - *
|
| - * The [name] passed is case insensitive.
|
| - *
|
| - * If character set is not supported [:null:] is returned.
|
| - */
|
| - static Encoding fromName(String name) {
|
| - if (name == null) return null;
|
| - name = name.toLowerCase();
|
| - return _nameToEncoding[name];
|
| - }
|
| -
|
| - /**
|
| - * Name of the encoding. This will be the lower-case version of one of the
|
| - * IANA official names for the character set (see
|
| - * http://www.iana.org/assignments/character-sets/character-sets.xml)
|
| - */
|
| - final String name;
|
| -
|
| - const Encoding._internal(String this.name);
|
| -}
|
| -
|
| -const UTF_8 = Encoding.UTF_8;
|
| -const ISO_8859_1 = Encoding.ISO_8859_1;
|
| -const ASCII = Encoding.ASCII;
|
| +const SYSTEM_ENCODING = const SystemEncoding();
|
|
|
| /**
|
| - * Stream transformer that can decode a stream of bytes into a stream of
|
| - * strings using [encoding].
|
| - *
|
| - * Invalid or forbidden byte-sequences will not produce errors, but will instead
|
| - * insert [replacementChar] in the decoded strings.
|
| + * The system encoding is the current code page on Windows and UTF-8 on
|
| + * Linux and Mac.
|
| */
|
| -class StringDecoder implements StreamTransformer<List<int>, String> {
|
| - var _decoder;
|
| +class SystemEncoding extends Encoding {
|
| + const SystemEncoding();
|
|
|
| - static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD;
|
| + List<int> encode(String input) => encoder.convert(input);
|
| + String decode(List<int> encoded) => decoder.convert(encoded);
|
|
|
| - /**
|
| - * Decodes a stream of bytes into a `String` with an optional
|
| - * [encoding] and [replacementChar].
|
| - *
|
| - * The default value for [encoding] is [Encoding.UTF_8].
|
| - *
|
| - * The default value for [replacementChar] is code point U+FFFD.
|
| - *
|
| - * Completes with the decoded `String` when the stream is done.
|
| - */
|
| - static Future<String> decode(
|
| - Stream<List<int>> stream,
|
| - [Encoding encoding = Encoding.UTF_8,
|
| - int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
|
| - if (replacementChar != null &&
|
| - replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {
|
| - throw new UnsupportedError("replacement character must be null or "
|
| - "the Unicode replacement character");
|
| + Converter<String, List<int>> get encoder {
|
| + if (Platform.operatingSystem == "windows") {
|
| + return const _WindowsCodePageEncoder();
|
| + } else {
|
| + return const Utf8Encoder();
|
| }
|
| - return stream
|
| - .transform(new StringDecoder(encoding, replacementChar))
|
| - .fold(
|
| - new StringBuffer(),
|
| - (prev, data) => prev..write(data))
|
| - .then((sb) => sb.toString());
|
| }
|
|
|
| - /**
|
| - * Create a new [StringDecoder] with an optional [encoding] and
|
| - * [replacementChar].
|
| - *
|
| - * The default value for [encoding] is [Encoding.UTF_8].
|
| - *
|
| - * The default value for [replacementChar] is code point U+FFFD.
|
| - */
|
| - StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) {
|
| - switch (encoding) {
|
| - case Encoding.UTF_8:
|
| - if (replacementChar != null &&
|
| - replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {
|
| - throw new UnsupportedError("replacement character must be null or "
|
| - "the Unicode replacement character");
|
| - }
|
| - _decoder = new Utf8Decoder(allowMalformed: true);
|
| - break;
|
| - case Encoding.ASCII:
|
| - if (replacementChar == null) {
|
| - replacementChar = '?'.codeUnitAt(0);
|
| - } else if (replacementChar > 127) {
|
| - throw new ArgumentError("Invalid replacement character for ASCII");
|
| - }
|
| - _decoder = new _AsciiDecoder(replacementChar);
|
| - break;
|
| - case Encoding.ISO_8859_1:
|
| - if (replacementChar == null) {
|
| - replacementChar = '?'.codeUnitAt(0);
|
| - } else if (replacementChar > 255) {
|
| - throw new ArgumentError(
|
| - "Invalid replacement character for ISO_8859_1");
|
| - }
|
| - _decoder = new _Latin1Decoder(replacementChar);
|
| - break;
|
| - case Encoding.SYSTEM:
|
| - if (Platform.operatingSystem == "windows") {
|
| - _decoder = new _WindowsCodePageDecoder();
|
| - } else {
|
| - if (replacementChar != null) {
|
| - // TODO(ajohnsen): Handle replacement character.
|
| - throw new UnsupportedError(
|
| - "Replacement character is not supported for SYSTEM encoding");
|
| - }
|
| - _decoder = new Utf8Decoder(allowMalformed: true);
|
| - }
|
| - break;
|
| - default:
|
| - throw new ArgumentError("Unsupported encoding '$encoding'");
|
| + Converter<List<int>, String> get decoder {
|
| + if (Platform.operatingSystem == "windows") {
|
| + return const _WindowsCodePageDecoder();
|
| + } else {
|
| + return const Utf8Decoder();
|
| }
|
| }
|
| -
|
| - Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream);
|
| }
|
|
|
| +class _WindowsCodePageEncoder extends Converter<String, List<int>> {
|
|
|
| -/**
|
| - * Stream transformer that can encode a stream of strings info a stream of
|
| - * bytes using [encoding].
|
| - *
|
| - * Strings that cannot be represented in the given encoding will result in an
|
| - * error and a close event on the stream.
|
| - */
|
| -class StringEncoder implements StreamTransformer<String, List<int>> {
|
| - var _encoder;
|
| + const _WindowsCodePageEncoder();
|
|
|
| - /**
|
| - * Create a new [StringDecoder] with an optional [encoding] and
|
| - * [replacementChar].
|
| - */
|
| - StringEncoder([Encoding encoding = Encoding.UTF_8]) {
|
| - switch (encoding) {
|
| - case Encoding.UTF_8:
|
| - _encoder = new Utf8Encoder();
|
| - break;
|
| - case Encoding.ASCII:
|
| - _encoder = new _AsciiEncoder();
|
| - break;
|
| - case Encoding.ISO_8859_1:
|
| - _encoder = new _Latin1Encoder();
|
| - break;
|
| - case Encoding.SYSTEM:
|
| - if (Platform.operatingSystem == "windows") {
|
| - _encoder = new _WindowsCodePageEncoder();
|
| - } else {
|
| - _encoder = new Utf8Encoder();
|
| - }
|
| - break;
|
| - default:
|
| - throw new ArgumentError("Unsupported encoding '$encoding'");
|
| + List<int> convert(String input) {
|
| + List<int> encoded = _encodeString(input);
|
| + if (encoded == null) {
|
| + throw new FormatException("Invalid character for encoding");
|
| }
|
| + return encoded;
|
| }
|
|
|
| - Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream);
|
| -}
|
| -
|
| -
|
| -// Utility function to synchronously decode a list of bytes.
|
| -String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) {
|
| - if (bytes.length == 0) return "";
|
| - if (encoding == Encoding.UTF_8) {
|
| - return UTF8.decode(bytes, allowMalformed: true);
|
| + /**
|
| + * Starts a chunked conversion.
|
| + */
|
| + StringConversionSink startChunkedConversion(
|
| + ChunkedConversionSink<List<int>> sink) {
|
| + return new _WindowsCodePageEncoderSink(sink);
|
| }
|
| - var string;
|
| - var error;
|
| - var controller = new StreamController(sync: true);
|
| - controller.stream
|
| - .transform(new StringDecoder(encoding))
|
| - .listen((data) {
|
| - // The StringEncoder decodes every encoding (except UTF-8) in one go.
|
| - assert(string == null);
|
| - string = data;
|
| - }, onError: (e) => error = e);
|
| - controller.add(bytes);
|
| - controller.close();
|
| - if (error != null) throw error;
|
| - assert(string != null);
|
| - return string;
|
| -}
|
|
|
| + // Override the base-class' bind, to provide a better type.
|
| + Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);
|
|
|
| -// Utility function to synchronously encode a String.
|
| -// Will throw an exception if the encoding is invalid.
|
| -List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) {
|
| - if (string.length == 0) return [];
|
| - if (encoding == Encoding.UTF_8) return UTF8.encode(string);
|
| - var bytes;
|
| - var controller = new StreamController(sync: true);
|
| - controller.stream
|
| - .transform(new StringEncoder(encoding))
|
| - .listen((data) {
|
| - // The StringEncoder encodes every encoding (except UTF-8) in one go.
|
| - assert(bytes == null);
|
| - bytes = data;
|
| - });
|
| - controller.add(string);
|
| - controller.close();
|
| - assert(bytes != null);
|
| - return bytes;
|
| + external static List<int> _encodeString(String string);
|
| }
|
|
|
| +class _WindowsCodePageEncoderSink extends StringConversionSinkBase {
|
| + // TODO(floitsch): provide more efficient conversions when the input is
|
| + // not a String.
|
|
|
| -abstract class _SingleByteDecoder
|
| - extends StreamEventTransformer<List<int>, String> {
|
| - final int _replacementChar;
|
| + final ByteConversionSink _sink;
|
|
|
| - _SingleByteDecoder(this._replacementChar);
|
| + _WindowsCodePageEncoderSink(this._sink);
|
|
|
| - void handleData(List<int> data, EventSink<String> sink) {
|
| - var buffer = new List<int>(data.length);
|
| - for (int i = 0; i < data.length; i++) {
|
| - int char = _decodeByte(data[i]);
|
| - if (char < 0) char = _replacementChar;
|
| - buffer[i] = char;
|
| - }
|
| - sink.add(new String.fromCharCodes(buffer));
|
| + void close() {
|
| + _sink.close();
|
| }
|
|
|
| - int _decodeByte(int byte);
|
| -}
|
| -
|
| -
|
| -// Utility class for decoding ascii data delivered as a stream of
|
| -// bytes.
|
| -class _AsciiDecoder extends _SingleByteDecoder {
|
| - _AsciiDecoder(int replacementChar) : super(replacementChar);
|
| + void add(String string) {
|
| + List<int> encoded = _WindowsCodePageEncoder._encodeString(string);
|
| + if (encoded == null) {
|
| + throw new FormatException("Invalid character for encoding");
|
| + }
|
| + _sink.add(encoded);
|
| + }
|
|
|
| - int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1;
|
| + void addSlice(String source, int start, int end, bool isLast) {
|
| + if (start != 0 || end != source.length) {
|
| + source = source.substring(start, end);
|
| + }
|
| + add(source);
|
| + if (isLast) close();
|
| + }
|
| }
|
|
|
|
|
| -// Utility class for decoding Latin-1 data delivered as a stream of
|
| -// bytes.
|
| -class _Latin1Decoder extends _SingleByteDecoder {
|
| - _Latin1Decoder(int replacementChar) : super(replacementChar);
|
| -
|
| - int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1;
|
| -}
|
| +class _WindowsCodePageDecoder extends Converter<List<int>, String> {
|
|
|
| + const _WindowsCodePageDecoder();
|
|
|
| -abstract class _SingleByteEncoder
|
| - extends StreamEventTransformer<String, List<int>> {
|
| - void handleData(String data, EventSink<List<int>> sink) {
|
| - var bytes = _encode(data);
|
| - if (bytes == null) {
|
| - sink.addError(new FormatException("Invalid character for encoding"));
|
| - sink.close();
|
| - } else {
|
| - sink.add(bytes);
|
| - }
|
| + String convert(List<int> input) {
|
| + return _decodeBytes(input);
|
| }
|
|
|
| - List<int> _encode(String string);
|
| -}
|
| -
|
| -
|
| -// Utility class for encoding a string into an ASCII byte stream.
|
| -class _AsciiEncoder extends _SingleByteEncoder {
|
| - List<int> _encode(String string) {
|
| - var bytes = string.codeUnits;
|
| - for (var byte in bytes) {
|
| - if (byte > 127) return null;
|
| - }
|
| - return bytes;
|
| + /**
|
| + * Starts a chunked conversion.
|
| + */
|
| + ByteConversionSink startChunkedConversion(
|
| + ChunkedConversionSink<String> sink) {
|
| + return new _WindowsCodePageDecoderSink(sink);
|
| }
|
| -}
|
|
|
| + // Override the base-class' bind, to provide a better type.
|
| + Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
|
|
|
| -// Utility class for encoding a string into a Latin1 byte stream.
|
| -class _Latin1Encoder extends _SingleByteEncoder {
|
| - List<int> _encode(String string) {
|
| - var bytes = string.codeUnits;
|
| - for (var byte in bytes) {
|
| - if (byte > 255) return null;
|
| - }
|
| - return bytes;
|
| - }
|
| + external static String _decodeBytes(List<int> bytes);
|
| }
|
|
|
| +class _WindowsCodePageDecoderSink extends ByteConversionSinkBase {
|
| + // TODO(floitsch): provide more efficient conversions when the input is
|
| + // a slice.
|
|
|
| -// Utility class for encoding a string into a current windows
|
| -// code page byte list.
|
| -// Implemented on top of a _SingleByteEncoder, even though it's not really a
|
| -// single byte encoder, to avoid copying boilerplate.
|
| -class _WindowsCodePageEncoder extends _SingleByteEncoder {
|
| - List<int> _encode(String string) => _encodeString(string);
|
| + final StringConversionSink _sink;
|
|
|
| - external static List<int> _encodeString(String string);
|
| -}
|
| + _WindowsCodePageDecoderSink(this._sink);
|
|
|
| -
|
| -// Utility class for decoding Windows current code page data delivered
|
| -// as a stream of bytes.
|
| -class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String> {
|
| - void handleData(List<int> data, EventSink<String> sink) {
|
| - sink.add(_decodeBytes(data));
|
| + void close() {
|
| + _sink.close();
|
| }
|
|
|
| - external static String _decodeBytes(List<int> bytes);
|
| + void add(List<int> bytes) {
|
| + _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes));
|
| + }
|
| }
|
|
|