OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 import 'dart:convert'; | |
6 | |
7 import 'package:convert/convert.dart'; | |
8 import 'package:crypto/crypto.dart'; | |
9 import 'package:string_scanner/string_scanner.dart'; | |
10 | |
11 import 'media_type.dart'; | |
12 import 'scan.dart'; | |
13 import 'utils.dart'; | |
14 | |
15 /// Like [whitespace] from scan.dart, except that it matches URI-encoded | |
16 /// whitespace rather than literal characters. | |
17 final _whitespace = new RegExp(r'(?:(?:%0D%0A)?(?:%20|%09)+)*'); | |
18 | |
19 /// A converter for percent encoding strings using UTF-8. | |
20 final _utf8Percent = UTF8.fuse(percent); | |
21 | |
22 /// A class representing a `data:` URI that provides access to its [mediaType] | |
23 /// and the [data] it contains. | |
24 /// | |
25 /// Data can be encoded as a `data:` URI using [encode] or [encodeString], and | |
26 /// decoded using [decode]. | |
27 /// | |
28 /// This implementation is based on [RFC 2397][rfc], but as that RFC is | |
29 /// [notoriously ambiguous][ambiguities], some judgment calls have been made. | |
30 /// This class tries to match browsers' data URI logic, to ensure that it can | |
31 /// losslessly parse its own output, and to accept as much input as it can make | |
32 /// sense of. A balance has been struck between these goals so that while none | |
33 /// of them have been accomplished perfectly, all of them are close enough for | |
34 /// practical use. | |
35 /// | |
36 /// [rfc]: http://tools.ietf.org/html/rfc2397 | |
37 /// [ambiguities]: https://simonsapin.github.io/data-urls/ | |
38 /// | |
39 /// Some particular notes on the behavior: | |
40 /// | |
41 /// * When encoding, all characters that are not [reserved][] in the type, | |
42 /// subtype, parameter names, and parameter values of media types are | |
43 /// percent-encoded using UTF-8. | |
44 /// | |
45 /// * When decoding, the type, subtype, parameter names, and parameter values of | |
46 /// media types are percent-decoded using UTF-8. Parameter values are allowed | |
47 /// to contain non-token characters once decoded, but the other tokens are | |
48 /// not. | |
49 /// | |
50 /// * As per the spec, quoted-string parameters are not supported when decoding. | |
51 /// | |
52 /// * Query components are included in the decoding algorithm, but fragments are | |
53 /// not. | |
54 /// | |
55 /// * Invalid media types and parameters will raise exceptions when decoding. | |
56 /// This is standard for Dart parsers but contrary to browser behavior. | |
57 /// | |
58 /// * The URL and filename-safe base64 alphabet is accepted when decoding but | |
59 /// never emitted when encoding, since browsers don't support it. | |
60 /// | |
61 /// [lws]: https://tools.ietf.org/html/rfc2616#section-2.2 | |
62 /// [reserved]: https://tools.ietf.org/html/rfc3986#section-2.2 | |
63 class DataUri implements Uri { | |
64 /// The inner URI to which all [Uri] methods are forwarded. | |
65 final Uri _inner; | |
66 | |
67 /// The byte data contained in the data URI. | |
68 final List<int> data; | |
69 | |
70 /// The media type declared for the data URI. | |
71 /// | |
72 /// This defaults to `text/plain;charset=US-ASCII`. | |
73 final MediaType mediaType; | |
74 | |
75 /// The encoding declared by the `charset` parameter in [mediaType]. | |
76 /// | |
77 /// If [mediaType] has no `charset` parameter, this defaults to [ASCII]. If | |
78 /// the `charset` parameter declares an encoding that can't be found using | |
79 /// [Encoding.getByName], this returns `null`. | |
80 Encoding get declaredEncoding { | |
81 var charset = mediaType.parameters["charset"]; | |
82 return charset == null ? ASCII : Encoding.getByName(charset); | |
83 } | |
84 | |
85 /// Creates a new data URI with the given [mediaType] and [data]. | |
86 /// | |
87 /// If [base64] is `true` (the default), the data is base64-encoded; | |
88 /// otherwise, it's percent-encoded. | |
89 /// | |
90 /// If [encoding] is passed or [mediaType] declares a `charset` parameter, | |
91 /// [data] is encoded using that encoding. Otherwise, it's encoded using | |
92 /// [UTF8] or [ASCII] depending on whether it contains any non-ASCII | |
93 /// characters. | |
94 /// | |
95 /// Throws [ArgumentError] if [mediaType] and [encoding] disagree on the | |
96 /// encoding, and an [UnsupportedError] if [mediaType] defines an encoding | |
97 /// that's not supported by [Encoding.getByName]. | |
98 factory DataUri.encodeString(String data, {bool base64: true, | |
99 MediaType mediaType, Encoding encoding}) { | |
100 if (mediaType == null) mediaType = new MediaType("text", "plain"); | |
101 | |
102 var charset = mediaType.parameters["charset"]; | |
103 var bytes; | |
104 if (encoding != null) { | |
105 if (charset == null) { | |
106 mediaType = mediaType.change(parameters: {"charset": encoding.name}); | |
107 } else if (Encoding.getByName(charset) != encoding) { | |
108 throw new ArgumentError("Media type charset '$charset' disagrees with " | |
109 "encoding '${encoding.name}'."); | |
110 } | |
111 bytes = encoding.encode(data); | |
112 } else if (charset != null) { | |
113 encoding = Encoding.getByName(charset); | |
114 if (encoding == null) { | |
115 throw new UnsupportedError( | |
116 'Unsupported media type charset "$charset".'); | |
117 } | |
118 bytes = encoding.encode(data); | |
119 } else if (data.codeUnits.every((codeUnit) => codeUnit < 0x80)) { | |
120 // If the data is pure ASCII, don't bother explicitly defining a charset. | |
121 bytes = data.codeUnits; | |
122 } else { | |
123 // If the data isn't pure ASCII, default to UTF-8. | |
124 bytes = UTF8.encode(data); | |
125 mediaType = mediaType.change(parameters: {"charset": "utf-8"}); | |
126 } | |
127 | |
128 return new DataUri.encode(bytes, base64: base64, mediaType: mediaType); | |
129 } | |
130 | |
131 /// Creates a new data URI with the given [mediaType] and [data]. | |
132 /// | |
133 /// If [base64] is `true` (the default), the data is base64-encoded; | |
134 /// otherwise, it's percent-encoded. | |
135 factory DataUri.encode(List<int> data, {bool base64: true, | |
136 MediaType mediaType}) { | |
137 mediaType ??= new MediaType('text', 'plain'); | |
138 | |
139 var buffer = new StringBuffer(); | |
140 | |
141 // Manually stringify the media type because [section 3][rfc] requires that | |
142 // parameter values should have non-token characters URL-escaped rather than | |
143 // emitting them as quoted-strings. This also allows us to omit text/plain | |
144 // if possible. | |
145 // | |
146 // [rfc]: http://tools.ietf.org/html/rfc2397#section-3 | |
147 if (mediaType.type != 'text' || mediaType.subtype != 'plain') { | |
148 buffer.write(_utf8Percent.encode(mediaType.type)); | |
149 buffer.write("/"); | |
150 buffer.write(_utf8Percent.encode(mediaType.subtype)); | |
151 } | |
152 | |
153 mediaType.parameters.forEach((attribute, value) { | |
154 buffer.write(";${_utf8Percent.encode(attribute)}="); | |
155 buffer.write(_utf8Percent.encode(value)); | |
156 }); | |
157 | |
158 if (base64) { | |
159 buffer.write(";base64,"); | |
160 // *Don't* use the URL-safe encoding scheme, since browsers don't actually | |
161 // support it. | |
162 buffer.write(CryptoUtils.bytesToBase64(data)); | |
163 } else { | |
164 buffer.write(","); | |
165 buffer.write(percent.encode(data)); | |
166 } | |
167 | |
168 return new DataUri._(data, mediaType, | |
169 new Uri(scheme: 'data', path: buffer.toString())); | |
170 } | |
171 | |
172 /// Decodes [uri] to make its [data] and [mediaType] available. | |
173 /// | |
174 /// [uri] may be a [Uri] or a [String]. | |
175 /// | |
176 /// Throws an [ArgumentError] if [uri] is an invalid type or has a scheme | |
177 /// other than `data:`. Throws a [FormatException] if parsing fails. | |
178 factory DataUri.decode(uri) { | |
179 if (uri is String) { | |
180 uri = Uri.parse(uri); | |
181 } else if (uri is! Uri) { | |
182 throw new ArgumentError.value(uri, "uri", "Must be a String or a Uri."); | |
183 } | |
184 | |
185 if (uri.scheme != 'data') { | |
186 throw new ArgumentError.value(uri, "uri", "Can only decode a data: URI."); | |
187 } | |
188 | |
189 return wrapFormatException("data URI", uri.toString(), () { | |
190 // Remove the fragment, as per https://simonsapin.github.io/data-urls/. | |
191 // TODO(nweiz): Use Uri.removeFragment once sdk#24593 is fixed. | |
192 var string = uri.toString(); | |
193 var fragment = string.indexOf('#'); | |
194 if (fragment != -1) string = string.substring(0, fragment); | |
195 var scanner = new StringScanner(string); | |
196 scanner.expect('data:'); | |
197 | |
198 // Manually scan the media type for three reasons: | |
199 // | |
200 // * Media type parameter values that aren't valid tokens are URL-encoded | |
201 // rather than quoted. | |
202 // | |
203 // * The media type may be omitted without omitting the parameters. | |
204 // | |
205 // * We need to be able to stop once we reach `;base64,`, even though at | |
206 // first it looks like a parameter. | |
207 var type; | |
208 var subtype; | |
209 var implicitType = false; | |
210 if (scanner.scan(token)) { | |
211 type = _verifyToken(scanner); | |
212 scanner.expect('/'); | |
213 subtype = _expectToken(scanner); | |
214 } else { | |
215 type = 'text'; | |
216 subtype = 'plain'; | |
217 implicitType = true; | |
218 } | |
219 | |
220 // Scan the parameters, up through ";base64" or a comma. | |
221 var parameters = {}; | |
222 var base64 = false; | |
223 while (scanner.scan(';')) { | |
224 var attribute = _expectToken(scanner); | |
225 | |
226 if (attribute != 'base64') { | |
227 scanner.expect('='); | |
228 } else if (!scanner.scan('=')) { | |
229 base64 = true; | |
230 break; | |
231 } | |
232 | |
233 // Don't use [_expectToken] because the value uses percent-encoding to | |
234 // escape non-token characters. | |
235 scanner.expect(token); | |
236 parameters[attribute] = _utf8Percent.decode(scanner.lastMatch[0]); | |
237 } | |
238 scanner.expect(','); | |
239 | |
240 if (implicitType && parameters.isEmpty) { | |
241 parameters = {"charset": "US-ASCII"}; | |
242 } | |
243 | |
kevmoo
2015/10/19 23:46:49
watch trailing whitespace
nweiz
2015/10/20 21:29:22
Done.
| |
244 var mediaType = new MediaType(type, subtype, parameters); | |
245 | |
246 var data = base64 | |
247 ? CryptoUtils.base64StringToBytes(scanner.rest) | |
248 : percent.decode(scanner.rest); | |
249 | |
250 return new DataUri._(data, mediaType, uri); | |
251 }); | |
252 } | |
253 | |
254 /// Returns the percent-decoded value of the last MIME token scanned by | |
255 /// [scanner]. | |
256 /// | |
257 /// Throws a [FormatException] if it's not a valid token after | |
258 /// percent-decoding. | |
259 static String _verifyToken(StringScanner scanner) { | |
260 var value = _utf8Percent.decode(scanner.lastMatch[0]); | |
261 if (!value.contains(nonToken)) return value; | |
262 scanner.error("Invalid token."); | |
kevmoo
2015/10/19 23:46:49
I wish 'scanner.error' returned an error that was
nweiz
2015/10/20 21:29:22
I wish there were an @AlwaysThrows annotation that
| |
263 } | |
264 | |
265 /// Scans [scanner] through a MIME token and returns its percent-decoded | |
266 /// value. | |
267 /// | |
268 /// Throws a [FormatException] if it's not a valid token after | |
269 /// percent-decoding. | |
270 static String _expectToken(StringScanner scanner) { | |
271 scanner.expect(token, name: "a token"); | |
272 return _verifyToken(scanner); | |
273 } | |
274 | |
275 DataUri._(this.data, this.mediaType, this._inner); | |
276 | |
277 /// Returns the decoded [data] decoded using [encoding]. | |
278 /// | |
279 /// [encoding] defaults to [declaredEncoding]. If the declared encoding isn't | |
280 /// supported by [Encoding.getByName] and [encoding] isn't passed, this throws | |
281 /// an [UnsupportedError]. | |
282 String dataAsString({Encoding encoding}) { | |
283 encoding ??= declaredEncoding; | |
284 if (encoding == null) { | |
285 throw new UnsupportedError( | |
286 'Unsupported media type charset ' | |
287 '"${mediaType.parameters["charset"]}".'); | |
288 } | |
289 | |
290 return encoding.decode(data); | |
291 } | |
292 | |
293 String get scheme => _inner.scheme; | |
294 String get authority => _inner.authority; | |
295 String get userInfo => _inner.userInfo; | |
296 String get host => _inner.host; | |
297 int get port => _inner.port; | |
298 String get path => _inner.path; | |
299 String get query => _inner.query; | |
300 String get fragment => _inner.fragment; | |
301 Uri replace({String scheme, String userInfo, String host, int port, | |
302 String path, Iterable<String> pathSegments, String query, | |
303 Map<String, String> queryParameters, String fragment}) => | |
304 _inner.replace( | |
305 scheme: scheme, userInfo: userInfo, host: host, port: port, | |
306 path: path, pathSegments: pathSegments, query: query, | |
307 queryParameters: queryParameters, fragment: fragment); | |
308 Uri removeFragment() => _inner.removeFragment(); | |
309 List<String> get pathSegments => _inner.pathSegments; | |
310 Map<String, String> get queryParameters => _inner.queryParameters; | |
311 Uri normalizePath() => _inner.normalizePath(); | |
312 bool get isAbsolute => _inner.isAbsolute; | |
313 Uri resolve(String reference) => _inner.resolve(reference); | |
314 Uri resolveUri(Uri reference) => _inner.resolveUri(reference); | |
315 bool get hasScheme => _inner.hasScheme; | |
316 bool get hasAuthority => _inner.hasAuthority; | |
317 bool get hasPort => _inner.hasPort; | |
318 bool get hasQuery => _inner.hasQuery; | |
319 bool get hasFragment => _inner.hasFragment; | |
320 bool get hasEmptyPath => _inner.hasEmptyPath; | |
321 bool get hasAbsolutePath => _inner.hasAbsolutePath; | |
322 String get origin => _inner.origin; | |
323 String toFilePath({bool windows}) => _inner.toFilePath(windows: windows); | |
324 String toString() => _inner.toString(); | |
325 bool operator==(other) => _inner == other; | |
326 int get hashCode => _inner.hashCode; | |
327 } | |
OLD | NEW |