sdk/lib/io/string_transformer.dart - Issue 22872012: Remove Encoding-enum from dart:io and add interface in dart:convert.

Unified Diff: sdk/lib/io/string_transformer.dart

Issue 22872012: Remove Encoding-enum from dart:io and add interface in dart:convert. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Fix ddbg. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sdk/lib/io/string_transformer.dart

diff --git a/sdk/lib/io/string_transformer.dart b/sdk/lib/io/string_transformer.dart

index 0f2a6c39ea1baaf034464e2bd70c96419cc61e23..e68342bb73c081f775a97034ee2bdf72a7b646d2 100644

--- a/sdk/lib/io/string_transformer.dart

+++ b/sdk/lib/io/string_transformer.dart

@@ -4,358 +4,174 @@

part of dart.io;

+// All aliases (in lowercase) of supported encoding from

+// http://www.iana.org/assignments/character-sets/character-sets.xml.

+Map<String, Encoding> _nameToEncoding = <String, Encoding> {

+ // ISO_8859-1:1987.

+ "iso_8859-1:1987": LATIN1,

+ "iso-ir-100": LATIN1,

+ "iso_8859-1": LATIN1,

+ "iso-8859-1": LATIN1,

+ "latin1": LATIN1,

+ "l1": LATIN1,

+ "ibm819": LATIN1,

+ "cp819": LATIN1,

+ "csisolatin1": LATIN1,

+ // US-ASCII.

+ "iso-ir-6": ASCII,

+ "ansi_x3.4-1968": ASCII,

+ "ansi_x3.4-1986": ASCII,

+ "iso_646.irv:1991": ASCII,

+ "iso646-us": ASCII,

+ "us-ascii": ASCII,

+ "us": ASCII,

+ "ibm367": ASCII,

+ "cp367": ASCII,

+ "csascii": ASCII,

+ "ascii": ASCII, // This is not in the IANA official names.

+ // UTF-8.

+ "csutf8": UTF8,

+ "utf-8": UTF8

+};

/**

- * String encodings.

+ * Gets an [Encoding] object from the name of the character set

+ * name. The names used are the IANA official names for the

+ * character set (see

+ * http://www.iana.org/assignments/character-sets/character-sets.xml).

+ *

+ * The [name] passed is case insensitive.

+ *

+ * If character set is not supported [:null:] is returned.

-class Encoding {

- static const Encoding UTF_8 = const Encoding._internal("utf-8");

- static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1");

- static const Encoding ASCII = const Encoding._internal("us-ascii");

- /**

- * SYSTEM encoding is the current code page on Windows and UTF-8 on

- * Linux and Mac.

- */

- static const Encoding SYSTEM = const Encoding._internal("system");

- // All aliasses (in lowercase) of supported encoding from

- // http://www.iana.org/assignments/character-sets/character-sets.xml.

- static Map<String, Encoding> _nameToEncoding = <String, Encoding> {

- // ISO_8859-1:1987.

- "iso_8859-1:1987": ISO_8859_1,

- "iso-ir-100": ISO_8859_1,

- "iso_8859-1": ISO_8859_1,

- "iso-8859-1": ISO_8859_1,

- "latin1": ISO_8859_1,

- "l1": ISO_8859_1,

- "ibm819": ISO_8859_1,

- "cp819": ISO_8859_1,

- "csisolatin1": ISO_8859_1,

- // US-ASCII.

- "iso-ir-6": ASCII,

- "ansi_x3.4-1968": ASCII,

- "ansi_x3.4-1986": ASCII,

- "iso_646.irv:1991": ASCII,

- "iso646-us": ASCII,

- "us-ascii": ASCII,

- "us": ASCII,

- "ibm367": ASCII,

- "cp367": ASCII,

- "csascii": ASCII,

- "ascii": ASCII, // This is not in the IANA official names.

- // UTF-8.

- "csutf8": UTF_8,

- "utf-8": UTF_8

- };

- /**

- * Gets an [Encoding] object from the name of the character set

- * name. The names used are the IANA official names for the

- * character set (see

- * http://www.iana.org/assignments/character-sets/character-sets.xml).

- *

- * The [name] passed is case insensitive.

- *

- * If character set is not supported [:null:] is returned.

- */

- static Encoding fromName(String name) {

+Encoding encodingFromName(String name) {

Søren Gjesse 2013/08/26 08:03:15 Should this move to dart:convert as well? With the

floitsch 2013/08/26 09:33:40 Moved to Encoding.getByName. No option to registe

if (name == null) return null;

name = name.toLowerCase();

return _nameToEncoding[name];

- }

- /**

- * Name of the encoding. This will be the lower-case version of one of the

- * IANA official names for the character set (see

- * http://www.iana.org/assignments/character-sets/character-sets.xml)

- */

- final String name;

- const Encoding._internal(String this.name);

}

-const UTF_8 = Encoding.UTF_8;

-const ISO_8859_1 = Encoding.ISO_8859_1;

-const ASCII = Encoding.ASCII;

+const SYSTEM_ENCODING = const SystemEncoding();

/**

- * Stream transformer that can decode a stream of bytes into a stream of

- * strings using [encoding].

- *

- * Invalid or forbidden byte-sequences will not produce errors, but will instead

- * insert [replacementChar] in the decoded strings.

+ * The system encoding is the current code page on Windows and UTF-8 on

+ * Linux and Mac.

-class StringDecoder implements StreamTransformer<List<int>, String> {

- var _decoder;

+class SystemEncoding extends Encoding {

+ const SystemEncoding();

- static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD;

+ List<int> encode(String input) => encoder.convert(input);

+ String decode(List<int> encoded) => decoder.convert(encoded);

- /**

- * Decodes a stream of bytes into a `String` with an optional

- * [encoding] and [replacementChar].

- *

- * The default value for [encoding] is [Encoding.UTF_8].

- *

- * The default value for [replacementChar] is code point U+FFFD.

- *

- * Completes with the decoded `String` when the stream is done.

- */

- static Future<String> decode(

- Stream<List<int>> stream,

- [Encoding encoding = Encoding.UTF_8,

- int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

- if (replacementChar != null &&

- replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {

- throw new UnsupportedError("replacement character must be null or "

- "the Unicode replacement character");

+ Converter<String, List<int>> get encoder {

+ if (Platform.operatingSystem == "windows") {

+ return const _WindowsCodePageEncoder();

+ } else {

+ return const Utf8Encoder();

}

- return stream

- .transform(new StringDecoder(encoding, replacementChar))

- .fold(

- new StringBuffer(),

- (prev, data) => prev..write(data))

- .then((sb) => sb.toString());

}

- /**

- * Create a new [StringDecoder] with an optional [encoding] and

- * [replacementChar].

- *

- * The default value for [encoding] is [Encoding.UTF_8].

- *

- * The default value for [replacementChar] is code point U+FFFD.

- */

- StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) {

- switch (encoding) {

- case Encoding.UTF_8:

- if (replacementChar != null &&

- replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {

- throw new UnsupportedError("replacement character must be null or "

- "the Unicode replacement character");

- }

- _decoder = new Utf8Decoder(allowMalformed: true);

- break;

- case Encoding.ASCII:

- if (replacementChar == null) {

- replacementChar = '?'.codeUnitAt(0);

- } else if (replacementChar > 127) {

- throw new ArgumentError("Invalid replacement character for ASCII");

- }

- _decoder = new _AsciiDecoder(replacementChar);

- break;

- case Encoding.ISO_8859_1:

- if (replacementChar == null) {

- replacementChar = '?'.codeUnitAt(0);

- } else if (replacementChar > 255) {

- throw new ArgumentError(

- "Invalid replacement character for ISO_8859_1");

- }

- _decoder = new _Latin1Decoder(replacementChar);

- break;

- case Encoding.SYSTEM:

- if (Platform.operatingSystem == "windows") {

- _decoder = new _WindowsCodePageDecoder();

- } else {

- if (replacementChar != null) {

- // TODO(ajohnsen): Handle replacement character.

- throw new UnsupportedError(

- "Replacement character is not supported for SYSTEM encoding");

- }

- _decoder = new Utf8Decoder(allowMalformed: true);

- }

- break;

- default:

- throw new ArgumentError("Unsupported encoding '$encoding'");

+ Converter<List<int>, String> get decoder {

+ if (Platform.operatingSystem == "windows") {

+ return const _WindowsCodePageDecoder();

+ } else {

+ return const Utf8Decoder();

}

- Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream);

}

+class _WindowsCodePageEncoder extends Converter<String, List<int>> {

-/**

- * Stream transformer that can encode a stream of strings info a stream of

- * bytes using [encoding].

- *

- * Strings that cannot be represented in the given encoding will result in an

- * error and a close event on the stream.

- */

-class StringEncoder implements StreamTransformer<String, List<int>> {

- var _encoder;

+ const _WindowsCodePageEncoder();

- /**

- * Create a new [StringDecoder] with an optional [encoding] and

- * [replacementChar].

- */

- StringEncoder([Encoding encoding = Encoding.UTF_8]) {

- switch (encoding) {

- case Encoding.UTF_8:

- _encoder = new Utf8Encoder();

- break;

- case Encoding.ASCII:

- _encoder = new _AsciiEncoder();

- break;

- case Encoding.ISO_8859_1:

- _encoder = new _Latin1Encoder();

- break;

- case Encoding.SYSTEM:

- if (Platform.operatingSystem == "windows") {

- _encoder = new _WindowsCodePageEncoder();

- } else {

- _encoder = new Utf8Encoder();

- }

- break;

- default:

- throw new ArgumentError("Unsupported encoding '$encoding'");

+ List<int> convert(String input) {

+ List<int> encoded = _encodeString(input);

+ if (encoded == null) {

+ throw new FormatException("Invalid character for encoding");

}

+ return encoded;

}

- Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream);

-// Utility function to synchronously decode a list of bytes.

-String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) {

- if (bytes.length == 0) return "";

- if (encoding == Encoding.UTF_8) {

- return UTF8.decode(bytes, allowMalformed: true);

+ /**

+ * Starts a chunked conversion.

+ */

+ StringConversionSink startChunkedConversion(

+ ChunkedConversionSink<List<int>> sink) {

+ return new _WindowsCodePageEncoderSink(sink);

}

- var string;

- var error;

- var controller = new StreamController(sync: true);

- controller.stream

- .transform(new StringDecoder(encoding))

- .listen((data) {

- // The StringEncoder decodes every encoding (except UTF-8) in one go.

- assert(string == null);

- string = data;

- }, onError: (e) => error = e);

- controller.add(bytes);

- controller.close();

- if (error != null) throw error;

- assert(string != null);

- return string;

+ // Override the base-class' bind, to provide a better type.

+ Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);

-// Utility function to synchronously encode a String.

-// Will throw an exception if the encoding is invalid.

-List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) {

- if (string.length == 0) return [];

- if (encoding == Encoding.UTF_8) return UTF8.encode(string);

- var bytes;

- var controller = new StreamController(sync: true);

- controller.stream

- .transform(new StringEncoder(encoding))

- .listen((data) {

- // The StringEncoder encodes every encoding (except UTF-8) in one go.

- assert(bytes == null);

- bytes = data;

- });

- controller.add(string);

- controller.close();

- assert(bytes != null);

- return bytes;

+ external static List<int> _encodeString(String string);

}

+class _WindowsCodePageEncoderSink extends StringConversionSinkBase {

+ // TODO(floitsch): provide more efficient conversions when the input is

+ // not a String.

-abstract class _SingleByteDecoder

- extends StreamEventTransformer<List<int>, String> {

- final int _replacementChar;

+ final ByteConversionSink _sink;

- _SingleByteDecoder(this._replacementChar);

+ _WindowsCodePageEncoderSink(this._sink);

- void handleData(List<int> data, EventSink<String> sink) {

- var buffer = new List<int>(data.length);

- for (int i = 0; i < data.length; i++) {

- int char = _decodeByte(data[i]);

- if (char < 0) char = _replacementChar;

- buffer[i] = char;

- }

- sink.add(new String.fromCharCodes(buffer));

+ void close() {

+ _sink.close();

}

- int _decodeByte(int byte);

-// Utility class for decoding ascii data delivered as a stream of

-// bytes.

-class _AsciiDecoder extends _SingleByteDecoder {

- _AsciiDecoder(int replacementChar) : super(replacementChar);

+ void add(String string) {

+ List<int> encoded = _WindowsCodePageByteEncoder._encodeString(string);

+ if (encoded == null) {

+ throw new FormatException("Invalid character for encoding");

+ }

+ _sink.add(encoded);

+ }

- int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1;

+ void addSlice(String source, int start, int end, bool isLast) {

+ if (start != 0 || end != source.length) {

+ source = source.substring(start, end);

+ }

+ add(source);

+ if (isLast) close();

+ }

}

-// Utility class for decoding Latin-1 data delivered as a stream of

-// bytes.

-class _Latin1Decoder extends _SingleByteDecoder {

- _Latin1Decoder(int replacementChar) : super(replacementChar);

+class _WindowsCodePageDecoder extends Converter<List<int>, String> {

- int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1;

+ const _WindowsCodePageDecoder();

-abstract class _SingleByteEncoder

- extends StreamEventTransformer<String, List<int>> {

- void handleData(String data, EventSink<List<int>> sink) {

- var bytes = _encode(data);

- if (bytes == null) {

- sink.addError(new FormatException("Invalid character for encoding"));

- sink.close();

- } else {

- sink.add(bytes);

- }

+ String convert(List<int> input) {

+ return _decodeBytes(input);

}

- List<int> _encode(String string);

-// Utility class for encoding a string into an ASCII byte stream.

-class _AsciiEncoder extends _SingleByteEncoder {

- List<int> _encode(String string) {

- var bytes = string.codeUnits;

- for (var byte in bytes) {

- if (byte > 127) return null;

- }

- return bytes;

+ /**

+ * Starts a chunked conversion.

+ */

+ ByteConversionSink startChunkedConversion(

+ ChunkedConversionSink<String> sink) {

+ return new _WindowsCodePageDecoderSink(sink);

}

+ // Override the base-class' bind, to provide a better type.

+ Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);

-// Utility class for encoding a string into a Latin1 byte stream.

-class _Latin1Encoder extends _SingleByteEncoder {

- List<int> _encode(String string) {

- var bytes = string.codeUnits;

- for (var byte in bytes) {

- if (byte > 255) return null;

- }

- return bytes;

- }

+ external static String _decodeBytes(List<int> bytes);

}

+class _WindowsCodePageDecoderSink extends ByteConversionSinkBase {

+ // TODO(floitsch): provide more efficient conversions when the input is

+ // a slice.

-// Utility class for encoding a string into a current windows

-// code page byte list.

-// Implemented on top of a _SingleByteEncoder, even though it's not really a

-// single byte encoder, to avoid copying boilerplate.

-class _WindowsCodePageEncoder extends _SingleByteEncoder {

- List<int> _encode(String string) => _encodeString(string);

+ final StringConversionSink _sink;

- external static List<int> _encodeString(String string);

+ _WindowsCodePageDecoderSink(this._sink);

-// Utility class for decoding Windows current code page data delivered

-// as a stream of bytes.

-class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String> {

- void handleData(List<int> data, EventSink<String> sink) {

- sink.add(_decodeBytes(data));

+ void close() {

+ _sink.close();

}

- external static String _decodeBytes(List<int> bytes);

+ void add(List<int> bytes) {

+ _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes));

+ }

}

« pkg/http/test/request_test.dart ('K') | « sdk/lib/io/stdio.dart ('k') | sdk/lib/io/websocket_impl.dart » ('j') | no next file with comments »