OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.io; | 5 part of dart.io; |
6 | 6 |
7 /** | 7 /** |
8 * String encodings. | 8 * String encodings. |
9 */ | 9 */ |
10 class Encoding { | 10 class Encoding { |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
83 /** | 83 /** |
84 * Stream transformer that can decode a stream of bytes into a stream of | 84 * Stream transformer that can decode a stream of bytes into a stream of |
85 * strings using [encoding]. | 85 * strings using [encoding]. |
86 * | 86 * |
87 * Invalid or forbidden byte-sequences will not produce errors, but will instead | 87 * Invalid or forbidden byte-sequences will not produce errors, but will instead |
88 * insert [replacementChar] in the decoded strings. | 88 * insert [replacementChar] in the decoded strings. |
89 */ | 89 */ |
90 class StringDecoder implements StreamTransformer<List<int>, String> { | 90 class StringDecoder implements StreamTransformer<List<int>, String> { |
91 var _decoder; | 91 var _decoder; |
92 | 92 |
| 93 static const _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT = 0xFFFD; |
| 94 |
93 /** | 95 /** |
94 * Decodes a stream of bytes into a `String` with an optional | 96 * Decodes a stream of bytes into a `String` with an optional |
95 * [encoding] and [replacementChar]. | 97 * [encoding] and [replacementChar]. |
96 * | 98 * |
97 * The default value for [encoding] is [Encoding.UTF_8]. | 99 * The default value for [encoding] is [Encoding.UTF_8]. |
98 * | 100 * |
99 * The default value for [replacementChar] is code point U+FFFD. | 101 * The default value for [replacementChar] is code point U+FFFD. |
100 * | 102 * |
101 * Completes with the decoded `String` when the stream is done. | 103 * Completes with the decoded `String` when the stream is done. |
102 */ | 104 */ |
103 static Future<String> decode( | 105 static Future<String> decode( |
104 Stream<List<int>> stream, | 106 Stream<List<int>> stream, |
105 [Encoding encoding = Encoding.UTF_8, | 107 [Encoding encoding = Encoding.UTF_8, |
106 int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | 108 int replacementChar = _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { |
| 109 if (replacementChar != null && |
| 110 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { |
| 111 throw new UnsupportedError("replacement character must be null or " |
| 112 "the Unicode replacement character"); |
| 113 } |
107 return stream | 114 return stream |
108 .transform(new StringDecoder(encoding, replacementChar)) | 115 .transform(new StringDecoder(encoding, replacementChar)) |
109 .fold( | 116 .fold( |
110 new StringBuffer(), | 117 new StringBuffer(), |
111 (prev, data) => prev..write(data)) | 118 (prev, data) => prev..write(data)) |
112 .then((sb) => sb.toString()); | 119 .then((sb) => sb.toString()); |
113 } | 120 } |
114 | 121 |
115 /** | 122 /** |
116 * Create a new [StringDecoder] with an optional [encoding] and | 123 * Create a new [StringDecoder] with an optional [encoding] and |
117 * [replacementChar]. | 124 * [replacementChar]. |
118 * | 125 * |
119 * The default value for [encoding] is [Encoding.UTF_8]. | 126 * The default value for [encoding] is [Encoding.UTF_8]. |
120 * | 127 * |
121 * The default value for [replacementChar] is code point U+FFFD. | 128 * The default value for [replacementChar] is code point U+FFFD. |
122 */ | 129 */ |
123 StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) { | 130 StringDecoder([Encoding encoding = Encoding.UTF_8, int replacementChar]) { |
124 switch (encoding) { | 131 switch (encoding) { |
125 case Encoding.UTF_8: | 132 case Encoding.UTF_8: |
126 if (replacementChar == null) { | 133 if (replacementChar != null && |
127 replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT; | 134 replacementChar != _UNICODE_REPLACEMENT_CHARACTER_CODEPOINT) { |
| 135 throw new UnsupportedError("replacement character must be null or " |
| 136 "the Unicode replacement character"); |
128 } | 137 } |
129 _decoder = new Utf8DecoderTransformer(replacementChar); | 138 _decoder = new Utf8Decoder(allowMalformed: true); |
130 break; | 139 break; |
131 case Encoding.ASCII: | 140 case Encoding.ASCII: |
132 if (replacementChar == null) { | 141 if (replacementChar == null) { |
133 replacementChar = '?'.codeUnitAt(0); | 142 replacementChar = '?'.codeUnitAt(0); |
134 } else if (replacementChar > 127) { | 143 } else if (replacementChar > 127) { |
135 throw new ArgumentError("Invalid replacement character for ASCII"); | 144 throw new ArgumentError("Invalid replacement character for ASCII"); |
136 } | 145 } |
137 _decoder = new _AsciiDecoder(replacementChar); | 146 _decoder = new _AsciiDecoder(replacementChar); |
138 break; | 147 break; |
139 case Encoding.ISO_8859_1: | 148 case Encoding.ISO_8859_1: |
140 if (replacementChar == null) { | 149 if (replacementChar == null) { |
141 replacementChar = '?'.codeUnitAt(0); | 150 replacementChar = '?'.codeUnitAt(0); |
142 } else if (replacementChar > 255) { | 151 } else if (replacementChar > 255) { |
143 throw new ArgumentError( | 152 throw new ArgumentError( |
144 "Invalid replacement character for ISO_8859_1"); | 153 "Invalid replacement character for ISO_8859_1"); |
145 } | 154 } |
146 _decoder = new _Latin1Decoder(replacementChar); | 155 _decoder = new _Latin1Decoder(replacementChar); |
147 break; | 156 break; |
148 case Encoding.SYSTEM: | 157 case Encoding.SYSTEM: |
149 if (Platform.operatingSystem == "windows") { | 158 if (Platform.operatingSystem == "windows") { |
150 _decoder = new _WindowsCodePageDecoder(); | 159 _decoder = new _WindowsCodePageDecoder(); |
151 } else { | 160 } else { |
152 if (replacementChar != null) { | 161 if (replacementChar != null) { |
153 // TODO(ajohnsen): Handle replacement character. | 162 // TODO(ajohnsen): Handle replacement character. |
154 throw new UnsupportedError( | 163 throw new UnsupportedError( |
155 "Replacement character is not supported for SYSTEM encoding"); | 164 "Replacement character is not supported for SYSTEM encoding"); |
156 } | 165 } |
157 _decoder = new Utf8DecoderTransformer(); | 166 _decoder = new Utf8Decoder(allowMalformed: true); |
158 } | 167 } |
159 break; | 168 break; |
160 default: | 169 default: |
161 throw new ArgumentError("Unsupported encoding '$encoding'"); | 170 throw new ArgumentError("Unsupported encoding '$encoding'"); |
162 } | 171 } |
163 } | 172 } |
164 | 173 |
165 Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream); | 174 Stream<String> bind(Stream<List<int>> stream) => _decoder.bind(stream); |
166 } | 175 } |
167 | 176 |
168 | 177 |
169 /** | 178 /** |
170 * Stream transformer that can encode a stream of strings info a stream of | 179 * Stream transformer that can encode a stream of strings info a stream of |
171 * bytes using [encoding]. | 180 * bytes using [encoding]. |
172 * | 181 * |
173 * Strings that cannot be represented in the given encoding will result in an | 182 * Strings that cannot be represented in the given encoding will result in an |
174 * error and a close event on the stream. | 183 * error and a close event on the stream. |
175 */ | 184 */ |
176 class StringEncoder implements StreamTransformer<String, List<int>> { | 185 class StringEncoder implements StreamTransformer<String, List<int>> { |
177 var _encoder; | 186 var _encoder; |
178 | 187 |
179 /** | 188 /** |
180 * Create a new [StringDecoder] with an optional [encoding] and | 189 * Create a new [StringDecoder] with an optional [encoding] and |
181 * [replacementChar]. | 190 * [replacementChar]. |
182 */ | 191 */ |
183 StringEncoder([Encoding encoding = Encoding.UTF_8]) { | 192 StringEncoder([Encoding encoding = Encoding.UTF_8]) { |
184 switch (encoding) { | 193 switch (encoding) { |
185 case Encoding.UTF_8: | 194 case Encoding.UTF_8: |
186 _encoder = new Utf8EncoderTransformer(); | 195 _encoder = new Utf8Encoder(); |
187 break; | 196 break; |
188 case Encoding.ASCII: | 197 case Encoding.ASCII: |
189 _encoder = new _AsciiEncoder(); | 198 _encoder = new _AsciiEncoder(); |
190 break; | 199 break; |
191 case Encoding.ISO_8859_1: | 200 case Encoding.ISO_8859_1: |
192 _encoder = new _Latin1Encoder(); | 201 _encoder = new _Latin1Encoder(); |
193 break; | 202 break; |
194 case Encoding.SYSTEM: | 203 case Encoding.SYSTEM: |
195 if (Platform.operatingSystem == "windows") { | 204 if (Platform.operatingSystem == "windows") { |
196 _encoder = new _WindowsCodePageEncoder(); | 205 _encoder = new _WindowsCodePageEncoder(); |
197 } else { | 206 } else { |
198 _encoder = new Utf8EncoderTransformer(); | 207 _encoder = new Utf8Encoder(); |
199 } | 208 } |
200 break; | 209 break; |
201 default: | 210 default: |
202 throw new ArgumentError("Unsupported encoding '$encoding'"); | 211 throw new ArgumentError("Unsupported encoding '$encoding'"); |
203 } | 212 } |
204 } | 213 } |
205 | 214 |
206 Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream); | 215 Stream<List<int>> bind(Stream<String> stream) => _encoder.bind(stream); |
207 } | 216 } |
208 | 217 |
209 | 218 |
210 // Utility function to synchronously decode a list of bytes. | 219 // Utility function to synchronously decode a list of bytes. |
211 String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) { | 220 String _decodeString(List<int> bytes, [Encoding encoding = Encoding.UTF_8]) { |
212 if (bytes.length == 0) return ""; | 221 if (bytes.length == 0) return ""; |
| 222 if (encoding == Encoding.UTF_8) { |
| 223 return UTF8.decode(bytes, allowMalformed: true); |
| 224 } |
213 var string; | 225 var string; |
214 var error; | 226 var error; |
215 var controller = new StreamController(sync: true); | 227 var controller = new StreamController(sync: true); |
216 controller.stream | 228 controller.stream |
217 .transform(new StringDecoder(encoding)) | 229 .transform(new StringDecoder(encoding)) |
218 .listen((data) => string = data, | 230 .listen((data) { |
219 onError: (e) => error = e); | 231 // The StringEncoder decodes every encoding (except UTF-8) in one go. |
| 232 assert(string == null); |
| 233 string = data; |
| 234 }, onError: (e) => error = e); |
220 controller.add(bytes); | 235 controller.add(bytes); |
221 controller.close(); | 236 controller.close(); |
222 if (error != null) throw error; | 237 if (error != null) throw error; |
223 assert(string != null); | 238 assert(string != null); |
224 return string; | 239 return string; |
225 } | 240 } |
226 | 241 |
227 | 242 |
228 // Utility function to synchronously decode a utf8-encoded list of bytes, | 243 // Utility function to synchronously decode a utf8-encoded list of bytes, |
229 // throwing on error. | 244 // throwing on error. |
230 String _decodeUtf8Strict(List<int> bytes) { | 245 String _decodeUtf8Strict(List<int> bytes) { |
231 if (bytes.length == 0) return ""; | 246 if (bytes.length == 0) return ""; |
232 var string; | 247 return UTF8.decode(bytes); |
233 var error; | |
234 var controller = new StreamController(sync: true); | |
235 controller.stream | |
236 .transform(new Utf8DecoderTransformer(null)) | |
237 .listen((data) => string = data, | |
238 onError: (e) => error = e); | |
239 controller.add(bytes); | |
240 controller.close(); | |
241 if (error != null) throw error; | |
242 assert(string != null); | |
243 return string; | |
244 } | 248 } |
245 | 249 |
246 | 250 |
247 // Utility function to synchronously encode a String. | 251 // Utility function to synchronously encode a String. |
248 // Will throw an exception if the encoding is invalid. | 252 // Will throw an exception if the encoding is invalid. |
249 List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) { | 253 List<int> _encodeString(String string, [Encoding encoding = Encoding.UTF_8]) { |
250 if (string.length == 0) return []; | 254 if (string.length == 0) return []; |
| 255 if (encoding == Encoding.UTF_8) return UTF8.encode(string); |
251 var bytes; | 256 var bytes; |
252 var controller = new StreamController(sync: true); | 257 var controller = new StreamController(sync: true); |
253 controller.stream | 258 controller.stream |
254 .transform(new StringEncoder(encoding)) | 259 .transform(new StringEncoder(encoding)) |
255 .listen((data) => bytes = data); | 260 .listen((data) { |
| 261 // The StringEncoder encodes every encoding (except UTF-8) in one go. |
| 262 assert(bytes == null); |
| 263 bytes = data; |
| 264 }); |
256 controller.add(string); | 265 controller.add(string); |
257 controller.close(); | 266 controller.close(); |
258 assert(bytes != null); | 267 assert(bytes != null); |
259 return bytes; | 268 return bytes; |
260 } | 269 } |
261 | 270 |
262 | 271 |
263 abstract class _SingleByteDecoder | 272 abstract class _SingleByteDecoder |
264 extends StreamEventTransformer<List<int>, String> { | 273 extends StreamEventTransformer<List<int>, String> { |
265 final int _replacementChar; | 274 final int _replacementChar; |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 | 360 |
352 // Utility class for decoding Windows current code page data delivered | 361 // Utility class for decoding Windows current code page data delivered |
353 // as a stream of bytes. | 362 // as a stream of bytes. |
354 class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String>
{ | 363 class _WindowsCodePageDecoder extends StreamEventTransformer<List<int>, String>
{ |
355 void handleData(List<int> data, EventSink<String> sink) { | 364 void handleData(List<int> data, EventSink<String> sink) { |
356 sink.add(_decodeBytes(data)); | 365 sink.add(_decodeBytes(data)); |
357 } | 366 } |
358 | 367 |
359 external static String _decodeBytes(List<int> bytes); | 368 external static String _decodeBytes(List<int> bytes); |
360 } | 369 } |
OLD | NEW |