sdk/lib/io/string_transformer.dart - Issue 22872012: Remove Encoding-enum from dart:io and add interface in dart:convert.

Side by Side Diff: sdk/lib/io/string_transformer.dart

Issue 22872012: Remove Encoding-enum from dart:io and add interface in dart:convert. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Fix ddbg. Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 part of dart.io;	5 part of dart.io;

6	6

	7 // All aliases (in lowercase) of supported encoding from

	8 // http://www.iana.org/assignments/character-sets/character-sets.xml.

	9 Map<String, Encoding> _nameToEncoding = <String, Encoding> {

	10 // ISO_8859-1:1987.

	11 "iso_8859-1:1987": LATIN1,

	12 "iso-ir-100": LATIN1,

	13 "iso_8859-1": LATIN1,

	14 "iso-8859-1": LATIN1,

	15 "latin1": LATIN1,

	16 "l1": LATIN1,

	17 "ibm819": LATIN1,

	18 "cp819": LATIN1,

	19 "csisolatin1": LATIN1,

	20

	21 // US-ASCII.

	22 "iso-ir-6": ASCII,

	23 "ansi_x3.4-1968": ASCII,

	24 "ansi_x3.4-1986": ASCII,

	25 "iso_646.irv:1991": ASCII,

	26 "iso646-us": ASCII,

	27 "us-ascii": ASCII,

	28 "us": ASCII,

	29 "ibm367": ASCII,

	30 "cp367": ASCII,

	31 "csascii": ASCII,

	32 "ascii": ASCII, // This is not in the IANA official names.

	33

	34 // UTF-8.

	35 "csutf8": UTF8,

	36 "utf-8": UTF8

	37 };

	38

7 /**	39 /**

8 * String encodings.	40 * Gets an [Encoding] object from the name of the character set

	41 * name. The names used are the IANA official names for the

	42 * character set (see

	43 * http://www.iana.org/assignments/character-sets/character-sets.xml).

	44 *

	45 * The [name] passed is case insensitive.

	46 *

	47 * If character set is not supported [:null:] is returned.

9 */	48 */

10 class Encoding {	49 Encoding encodingFromName(String name) {
	Søren Gjesse 2013/08/26 08:03:15 Should this move to dart:convert as well? With the Should this move to dart:convert as well? With the option of registration of additional converters? floitsch 2013/08/26 09:33:40 Moved to Encoding.getByName. No option to registe Show quoted text On 2013/08/26 08:03:15, Søren Gjesse wrote: > Should this move to dart:convert as well? With the option of registration of > additional converters? Moved to Encoding.getByName. No option to register additional converters yet, but we will think about it. Filed issue 12741 to track this.
11 static const Encoding UTF_8 = const Encoding._internal("utf-8");

12 static const Encoding ISO_8859_1 = const Encoding._internal("iso-8859-1");

13 static const Encoding ASCII = const Encoding._internal("us-ascii");

14

15 /**

16 * SYSTEM encoding is the current code page on Windows and UTF-8 on

17 * Linux and Mac.

18 */

19 static const Encoding SYSTEM = const Encoding._internal("system");

20

21 // All aliasses (in lowercase) of supported encoding from

22 // http://www.iana.org/assignments/character-sets/character-sets.xml.

23 static Map<String, Encoding> _nameToEncoding = <String, Encoding> {

24 // ISO_8859-1:1987.

25 "iso_8859-1:1987": ISO_8859_1,

26 "iso-ir-100": ISO_8859_1,

27 "iso_8859-1": ISO_8859_1,

28 "iso-8859-1": ISO_8859_1,

29 "latin1": ISO_8859_1,

30 "l1": ISO_8859_1,

31 "ibm819": ISO_8859_1,

32 "cp819": ISO_8859_1,

33 "csisolatin1": ISO_8859_1,

34

35 // US-ASCII.

36 "iso-ir-6": ASCII,

37 "ansi_x3.4-1968": ASCII,

38 "ansi_x3.4-1986": ASCII,

39 "iso_646.irv:1991": ASCII,

40 "iso646-us": ASCII,

41 "us-ascii": ASCII,

42 "us": ASCII,

43 "ibm367": ASCII,

44 "cp367": ASCII,

45 "csascii": ASCII,

46 "ascii": ASCII, // This is not in the IANA official names.

47

48 // UTF-8.

49 "csutf8": UTF_8,

50 "utf-8": UTF_8

51 };

52

53 /**

54 * Gets an [Encoding] object from the name of the character set

55 * name. The names used are the IANA official names for the

56 * character set (see

57 * http://www.iana.org/assignments/character-sets/character-sets.xml).

58 *

59 * The [name] passed is case insensitive.

60 *

61 * If character set is not supported [:null:] is returned.

62 */

63 static Encoding fromName(String name) {

64 if (name == null) return null;	50 if (name == null) return null;

65 name = name.toLowerCase();	51 name = name.toLowerCase();

66 return _nameToEncoding[name];	52 return _nameToEncoding[name];

	53 }

	54

	55 const SYSTEM_ENCODING = const SystemEncoding();

	56

	57 /**

	58 * The system encoding is the current code page on Windows and UTF-8 on

	59 * Linux and Mac.

	60 */

	61 class SystemEncoding extends Encoding {

	62 const SystemEncoding();

	63

	64 List<int> encode(String input) => encoder.convert(input);

	65 String decode(List<int> encoded) => decoder.convert(encoded);

	66

	67 Converter<String, List<int>> get encoder {

	68 if (Platform.operatingSystem == "windows") {

	69 return const _WindowsCodePageEncoder();

	70 } else {

	71 return const Utf8Encoder();

	72 }

	73 }

	74

	75 Converter<List<int>, String> get decoder {

	76 if (Platform.operatingSystem == "windows") {

	77 return const _WindowsCodePageDecoder();

	78 } else {

	79 return const Utf8Decoder();

	80 }

	81 }

	82 }

	83

	84 class _WindowsCodePageEncoder extends Converter<String, List<int>> {

	85

	86 const _WindowsCodePageEncoder();

	87

	88 List<int> convert(String input) {

	89 List<int> encoded = _encodeString(input);

	90 if (encoded == null) {

	91 throw new FormatException("Invalid character for encoding");

	92 }

	93 return encoded;

67 }	94 }

68	95

69 /**	96 /**

70 * Name of the encoding. This will be the lower-case version of one of the	97 * Starts a chunked conversion.

71 * IANA official names for the character set (see

72 * http://www.iana.org/assignments/character-sets/character-sets.xml)

73 */	98 */

74 final String name;	99 StringConversionSink startChunkedConversion(

	100 ChunkedConversionSink<List<int>> sink) {

	101 return new _WindowsCodePageEncoderSink(sink);

	102 }

75	103

76 const Encoding._internal(String this.name);	104 // Override the base-class' bind, to provide a better type.

	105 Stream<List<int>> bind(Stream<String> stream) => super.bind(stream);

	106

	107 external static List<int> _encodeString(String string);

77 }	108 }

78	109

79 const UTF_8 = Encoding.UTF_8;	110 class _WindowsCodePageEncoderSink extends StringConversionSinkBase {

80 const ISO_8859_1 = Encoding.ISO_8859_1;	111 // TODO(floitsch): provide more efficient conversions when the input is

81 const ASCII = Encoding.ASCII;	112 // not a String.

82	113

83 /**	114 final ByteConversionSink _sink;

84 * Stream transformer that can decode a stream of bytes into a stream of

85 * strings using [encoding].

86 *

87 * Invalid or forbidden byte-sequences will not produce errors, but will instead

88 * insert [replacementChar] in the decoded strings.

89 */

90 class StringDecoder implements StreamTransformer<List<int>, String> {

91 var _decoder;

92	115

93 static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD;	116 _WindowsCodePageEncoderSink(this._sink);

94	117

95 /**	118 void close() {

96 * Decodes a stream of bytes into a `String` with an optional	119 _sink.close();

97 * [encoding] and [replacementChar].

98 *

99 * The default value for [encoding] is [Encoding.UTF_8].

100 *

101 * The default value for [replacementChar] is code point U+FFFD.

102 *

103 * Completes with the decoded `String` when the stream is done.

104 */

105 static Future<String> decode(

106 Stream<List<int>> stream,

107 [Encoding encoding = Encoding.UTF_8,

108 int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {

109 if (replacementChar != null &&

110 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {

111 throw new UnsupportedError("replacement character must be null or "

112 "the Unicode replacement character");

113 }

114 return stream

115 .transform(new StringDecoder(encoding, replacementChar))

116 .fold(

117 new StringBuffer(),

118 (prev, data) => prev..write(data))

119 .then((sb) => sb.toString());

120 }	120 }

121	121

122 /**	122 void add(String string) {

123 * Create a new [StringDecoder] with an optional [encoding] and	123 List<int> encoded = _WindowsCodePageByteEncoder._encodeString(string);

124 * [replacementChar].	124 if (encoded == null) {

125 *	125 throw new FormatException("Invalid character for encoding");

126 * The default value for [encoding] is [Encoding.UTF_8].

127 *

128 * The default value for [replacementChar] is code point U+FFFD.

129 */

130 StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) {

131 switch (encoding) {

132 case Encoding.UTF_8:

133 if (replacementChar != null &&

134 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) {

135 throw new UnsupportedError("replacement character must be null or "

136 "the Unicode replacement character");

137 }

138 _decoder = new Utf8Decoder(allowMalformed: true);

139 break;

140 case Encoding.ASCII:

141 if (replacementChar == null) {

142 replacementChar = '?'.codeUnitAt(0);

143 } else if (replacementChar > 127) {

144 throw new ArgumentError("Invalid replacement character for ASCII");

145 }

146 _decoder = new _AsciiDecoder(replacementChar);

147 break;

148 case Encoding.ISO_8859_1:

149 if (replacementChar == null) {

150 replacementChar = '?'.codeUnitAt(0);

151 } else if (replacementChar > 255) {

152 throw new ArgumentError(

153 "Invalid replacement character for ISO_8859_1");

154 }

155 _decoder = new _Latin1Decoder(replacementChar);

156 break;

157 case Encoding.SYSTEM:

158 if (Platform.operatingSystem == "windows") {

159 _decoder = new _WindowsCodePageDecoder();

160 } else {

161 if (replacementChar != null) {

162 // TODO(ajohnsen): Handle replacement character.

163 throw new UnsupportedError(

164 "Replacement character is not supported for SYSTEM encoding");

165 }

166 _decoder = new Utf8Decoder(allowMalformed: true);

167 }

168 break;

169 default:

170 throw new ArgumentError("Unsupported encoding '$encoding'");

171 }	126 }

	127 _sink.add(encoded);

172 }	128 }

173	129

174 Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream);	130 void addSlice(String source, int start, int end, bool isLast) {

175 }	131 if (start != 0 \|\| end != source.length) {

176	132 source = source.substring(start, end);

177

178 /**

179 * Stream transformer that can encode a stream of strings info a stream of

180 * bytes using [encoding].

181 *

182 * Strings that cannot be represented in the given encoding will result in an

183 * error and a close event on the stream.

184 */

185 class StringEncoder implements StreamTransformer<String, List<int>> {

186 var _encoder;

187

188 /**

189 * Create a new [StringDecoder] with an optional [encoding] and

190 * [replacementChar].

191 */

192 StringEncoder([Encoding encoding = Encoding.UTF_8]) {

193 switch (encoding) {

194 case Encoding.UTF_8:

195 _encoder = new Utf8Encoder();

196 break;

197 case Encoding.ASCII:

198 _encoder = new _AsciiEncoder();

199 break;

200 case Encoding.ISO_8859_1:

201 _encoder = new _Latin1Encoder();

202 break;

203 case Encoding.SYSTEM:

204 if (Platform.operatingSystem == "windows") {

205 _encoder = new _WindowsCodePageEncoder();

206 } else {

207 _encoder = new Utf8Encoder();

208 }

209 break;

210 default:

211 throw new ArgumentError("Unsupported encoding '$encoding'");

212 }	133 }

213 }	134 add(source);

214	135 if (isLast) close();

215 Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream);

216 }

217

218

219 // Utility function to synchronously decode a list of bytes.

220 String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) {

221 if (bytes.length == 0) return "";

222 if (encoding == Encoding.UTF_8) {

223 return UTF8.decode(bytes, allowMalformed: true);

224 }

225 var string;

226 var error;

227 var controller = new StreamController(sync: true);

228 controller.stream

229 .transform(new StringDecoder(encoding))

230 .listen((data) {

231 // The StringEncoder decodes every encoding (except UTF-8) in one go.

232 assert(string == null);

233 string = data;

234 }, onError: (e) => error = e);

235 controller.add(bytes);

236 controller.close();

237 if (error != null) throw error;

238 assert(string != null);

239 return string;

240 }

241

242

243 // Utility function to synchronously encode a String.

244 // Will throw an exception if the encoding is invalid.

245 List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) {

246 if (string.length == 0) return [];

247 if (encoding == Encoding.UTF_8) return UTF8.encode(string);

248 var bytes;

249 var controller = new StreamController(sync: true);

250 controller.stream

251 .transform(new StringEncoder(encoding))

252 .listen((data) {

253 // The StringEncoder encodes every encoding (except UTF-8) in one go.

254 assert(bytes == null);

255 bytes = data;

256 });

257 controller.add(string);

258 controller.close();

259 assert(bytes != null);

260 return bytes;

261 }

262

263

264 abstract class _SingleByteDecoder

265 extends StreamEventTransformer<List<int>, String> {

266 final int _replacementChar;

267

268 _SingleByteDecoder(this._replacementChar);

269

270 void handleData(List<int> data, EventSink<String> sink) {

271 var buffer = new List<int>(data.length);

272 for (int i = 0; i < data.length; i++) {

273 int char = _decodeByte(data[i]);

274 if (char < 0) char = _replacementChar;

275 buffer[i] = char;

276 }

277 sink.add(new String.fromCharCodes(buffer));

278 }

279

280 int _decodeByte(int byte);

281 }

282

283

284 // Utility class for decoding ascii data delivered as a stream of

285 // bytes.

286 class _AsciiDecoder extends _SingleByteDecoder {

287 _AsciiDecoder(int replacementChar) : super(replacementChar);

288

289 int _decodeByte(int byte) => ((byte & 0x7f) == byte) ? byte : -1;

290 }

291

292

293 // Utility class for decoding Latin-1 data delivered as a stream of

294 // bytes.

295 class _Latin1Decoder extends _SingleByteDecoder {

296 _Latin1Decoder(int replacementChar) : super(replacementChar);

297

298 int _decodeByte(int byte) => ((byte & 0xFF) == byte) ? byte : -1;

299 }

300

301

302 abstract class _SingleByteEncoder

303 extends StreamEventTransformer<String, List<int>> {

304 void handleData(String data, EventSink<List<int>> sink) {

305 var bytes = _encode(data);

306 if (bytes == null) {

307 sink.addError(new FormatException("Invalid character for encoding"));

308 sink.close();

309 } else {

310 sink.add(bytes);

311 }

312 }

313

314 List<int> _encode(String string);

315 }

316

317

318 // Utility class for encoding a string into an ASCII byte stream.

319 class _AsciiEncoder extends _SingleByteEncoder {

320 List<int> _encode(String string) {

321 var bytes = string.codeUnits;

322 for (var byte in bytes) {

323 if (byte > 127) return null;

324 }

325 return bytes;

326 }	136 }

327 }	137 }

328	138

329	139

330 // Utility class for encoding a string into a Latin1 byte stream.	140 class _WindowsCodePageDecoder extends Converter<List<int>, String> {

331 class _Latin1Encoder extends _SingleByteEncoder {	141

332 List<int> _encode(String string) {	142 const _WindowsCodePageDecoder();

333 var bytes = string.codeUnits;	143

334 for (var byte in bytes) {	144 String convert(List<int> input) {

335 if (byte > 255) return null;	145 return _decodeBytes(input);

336 }

337 return bytes;

338 }	146 }

339 }

340	147

	148 /**

	149 * Starts a chunked conversion.

	150 */

	151 ByteConversionSink startChunkedConversion(

	152 ChunkedConversionSink<String> sink) {

	153 return new _WindowsCodePageDecoderSink(sink);

	154 }

341	155

342 // Utility class for encoding a string into a current windows	156 // Override the base-class' bind, to provide a better type.

343 // code page byte list.	157 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);

344 // Implemented on top of a _SingleByteEncoder, even though it's not really a

345 // single byte encoder, to avoid copying boilerplate.

346 class _WindowsCodePageEncoder extends _SingleByteEncoder {

347 List<int> _encode(String string) => _encodeString(string);

348

349 external static List<int> _encodeString(String string);

350 }

351

352

353 // Utility class for decoding Windows current code page data delivered

354 // as a stream of bytes.

355 class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String> {

356 void handleData(List<int> data, EventSink<String> sink) {

357 sink.add(_decodeBytes(data));

358 }

359	158

360 external static String _decodeBytes(List<int> bytes);	159 external static String _decodeBytes(List<int> bytes);

361 }	160 }

	161

	162 class _WindowsCodePageDecoderSink extends ByteConversionSinkBase {

	163 // TODO(floitsch): provide more efficient conversions when the input is

	164 // a slice.

	165

	166 final StringConversionSink _sink;

	167

	168 _WindowsCodePageDecoderSink(this._sink);

	169

	170 void close() {

	171 _sink.close();

	172 }

	173

	174 void add(List<int> bytes) {

	175 _sink.add(_WindowsCodePageDecoder._decodeBytes(bytes));

	176 }

	177 }

OLD	NEW

« pkg/http/test/request_test.dart ('K') | « sdk/lib/io/stdio.dart ('k') | sdk/lib/io/websocket_impl.dart » ('j') | no next file with comments »