Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(199)

Unified Diff: lib/src/data_uri.dart

Issue 1390353008: Add a DataUri class. (Closed) Base URL: git@github.com:dart-lang/http_parser@master
Patch Set: Code review changes Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/http_parser.dart ('k') | pubspec.yaml » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/src/data_uri.dart
diff --git a/lib/src/data_uri.dart b/lib/src/data_uri.dart
new file mode 100644
index 0000000000000000000000000000000000000000..77f921032eb18335f42425f4b0c49492e304f21e
--- /dev/null
+++ b/lib/src/data_uri.dart
@@ -0,0 +1,327 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+
+import 'package:convert/convert.dart';
+import 'package:crypto/crypto.dart';
+import 'package:string_scanner/string_scanner.dart';
+
+import 'media_type.dart';
+import 'scan.dart';
+import 'utils.dart';
+
+/// Like [whitespace] from scan.dart, except that it matches URI-encoded
+/// whitespace rather than literal characters.
+final _whitespace = new RegExp(r'(?:(?:%0D%0A)?(?:%20|%09)+)*');
+
+/// A converter for percent encoding strings using UTF-8.
+final _utf8Percent = UTF8.fuse(percent);
+
+/// A class representing a `data:` URI that provides access to its [mediaType]
+/// and the [data] it contains.
+///
+/// Data can be encoded as a `data:` URI using [encode] or [encodeString], and
+/// decoded using [decode].
+///
+/// This implementation is based on [RFC 2397][rfc], but as that RFC is
+/// [notoriously ambiguous][ambiguities], some judgment calls have been made.
+/// This class tries to match browsers' data URI logic, to ensure that it can
+/// losslessly parse its own output, and to accept as much input as it can make
+/// sense of. A balance has been struck between these goals so that while none
+/// of them have been accomplished perfectly, all of them are close enough for
+/// practical use.
+///
+/// [rfc]: http://tools.ietf.org/html/rfc2397
+/// [ambiguities]: https://simonsapin.github.io/data-urls/
+///
+/// Some particular notes on the behavior:
+///
+/// * When encoding, all characters that are not [reserved][] in the type,
+/// subtype, parameter names, and parameter values of media types are
+/// percent-encoded using UTF-8.
+///
+/// * When decoding, the type, subtype, parameter names, and parameter values of
+/// media types are percent-decoded using UTF-8. Parameter values are allowed
+/// to contain non-token characters once decoded, but the other tokens are
+/// not.
+///
+/// * As per the spec, quoted-string parameters are not supported when decoding.
+///
+/// * Query components are included in the decoding algorithm, but fragments are
+/// not.
+///
+/// * Invalid media types and parameters will raise exceptions when decoding.
+/// This is standard for Dart parsers but contrary to browser behavior.
+///
+/// * The URL and filename-safe base64 alphabet is accepted when decoding but
+/// never emitted when encoding, since browsers don't support it.
+///
+/// [lws]: https://tools.ietf.org/html/rfc2616#section-2.2
+/// [reserved]: https://tools.ietf.org/html/rfc3986#section-2.2
+class DataUri implements Uri {
+ /// The inner URI to which all [Uri] methods are forwarded.
+ final Uri _inner;
+
+ /// The byte data contained in the data URI.
+ final List<int> data;
+
+ /// The media type declared for the data URI.
+ ///
+ /// This defaults to `text/plain;charset=US-ASCII`.
+ final MediaType mediaType;
+
+ /// The encoding declared by the `charset` parameter in [mediaType].
+ ///
+ /// If [mediaType] has no `charset` parameter, this defaults to [ASCII]. If
+ /// the `charset` parameter declares an encoding that can't be found using
+ /// [Encoding.getByName], this returns `null`.
+ Encoding get declaredEncoding {
+ var charset = mediaType.parameters["charset"];
+ return charset == null ? ASCII : Encoding.getByName(charset);
+ }
+
+ /// Creates a new data URI with the given [mediaType] and [data].
+ ///
+ /// If [base64] is `true` (the default), the data is base64-encoded;
+ /// otherwise, it's percent-encoded.
+ ///
+ /// If [encoding] is passed or [mediaType] declares a `charset` parameter,
+ /// [data] is encoded using that encoding. Otherwise, it's encoded using
+ /// [UTF8] or [ASCII] depending on whether it contains any non-ASCII
+ /// characters.
+ ///
+ /// Throws [ArgumentError] if [mediaType] and [encoding] disagree on the
+ /// encoding, and an [UnsupportedError] if [mediaType] defines an encoding
+ /// that's not supported by [Encoding.getByName].
+ factory DataUri.encodeString(String data, {bool base64: true,
+ MediaType mediaType, Encoding encoding}) {
+ if (mediaType == null) mediaType = new MediaType("text", "plain");
+
+ var charset = mediaType.parameters["charset"];
+ var bytes;
+ if (encoding != null) {
+ if (charset == null) {
+ mediaType = mediaType.change(parameters: {"charset": encoding.name});
+ } else if (Encoding.getByName(charset) != encoding) {
+ throw new ArgumentError("Media type charset '$charset' disagrees with "
+ "encoding '${encoding.name}'.");
+ }
+ bytes = encoding.encode(data);
+ } else if (charset != null) {
+ encoding = Encoding.getByName(charset);
+ if (encoding == null) {
+ throw new UnsupportedError(
+ 'Unsupported media type charset "$charset".');
+ }
+ bytes = encoding.encode(data);
+ } else if (data.codeUnits.every((codeUnit) => codeUnit < 0x80)) {
+ // If the data is pure ASCII, don't bother explicitly defining a charset.
+ bytes = data.codeUnits;
+ } else {
+ // If the data isn't pure ASCII, default to UTF-8.
+ bytes = UTF8.encode(data);
+ mediaType = mediaType.change(parameters: {"charset": "utf-8"});
+ }
+
+ return new DataUri.encode(bytes, base64: base64, mediaType: mediaType);
+ }
+
+ /// Creates a new data URI with the given [mediaType] and [data].
+ ///
+ /// If [base64] is `true` (the default), the data is base64-encoded;
+ /// otherwise, it's percent-encoded.
+ factory DataUri.encode(List<int> data, {bool base64: true,
+ MediaType mediaType}) {
+ mediaType ??= new MediaType('text', 'plain');
+
+ var buffer = new StringBuffer();
+
+ // Manually stringify the media type because [section 3][rfc] requires that
+ // parameter values should have non-token characters URL-escaped rather than
+ // emitting them as quoted-strings. This also allows us to omit text/plain
+ // if possible.
+ //
+ // [rfc]: http://tools.ietf.org/html/rfc2397#section-3
+ if (mediaType.type != 'text' || mediaType.subtype != 'plain') {
+ buffer.write(_utf8Percent.encode(mediaType.type));
+ buffer.write("/");
+ buffer.write(_utf8Percent.encode(mediaType.subtype));
+ }
+
+ mediaType.parameters.forEach((attribute, value) {
+ buffer.write(";${_utf8Percent.encode(attribute)}=");
+ buffer.write(_utf8Percent.encode(value));
+ });
+
+ if (base64) {
+ buffer.write(";base64,");
+ // *Don't* use the URL-safe encoding scheme, since browsers don't actually
+ // support it.
+ buffer.write(CryptoUtils.bytesToBase64(data));
+ } else {
+ buffer.write(",");
+ buffer.write(percent.encode(data));
+ }
+
+ return new DataUri._(data, mediaType,
+ new Uri(scheme: 'data', path: buffer.toString()));
+ }
+
+ /// Decodes [uri] to make its [data] and [mediaType] available.
+ ///
+ /// [uri] may be a [Uri] or a [String].
+ ///
+ /// Throws an [ArgumentError] if [uri] is an invalid type or has a scheme
+ /// other than `data:`. Throws a [FormatException] if parsing fails.
+ factory DataUri.decode(uri) {
+ if (uri is String) {
+ uri = Uri.parse(uri);
+ } else if (uri is! Uri) {
+ throw new ArgumentError.value(uri, "uri", "Must be a String or a Uri.");
+ }
+
+ if (uri.scheme != 'data') {
+ throw new ArgumentError.value(uri, "uri", "Can only decode a data: URI.");
+ }
+
+ return wrapFormatException("data URI", uri.toString(), () {
+ // Remove the fragment, as per https://simonsapin.github.io/data-urls/.
+ // TODO(nweiz): Use Uri.removeFragment once sdk#24593 is fixed.
+ var string = uri.toString();
+ var fragment = string.indexOf('#');
+ if (fragment != -1) string = string.substring(0, fragment);
+ var scanner = new StringScanner(string);
+ scanner.expect('data:');
+
+ // Manually scan the media type for three reasons:
+ //
+ // * Media type parameter values that aren't valid tokens are URL-encoded
+ // rather than quoted.
+ //
+ // * The media type may be omitted without omitting the parameters.
+ //
+ // * We need to be able to stop once we reach `;base64,`, even though at
+ // first it looks like a parameter.
+ var type;
+ var subtype;
+ var implicitType = false;
+ if (scanner.scan(token)) {
+ type = _verifyToken(scanner);
+ scanner.expect('/');
+ subtype = _expectToken(scanner);
+ } else {
+ type = 'text';
+ subtype = 'plain';
+ implicitType = true;
+ }
+
+ // Scan the parameters, up through ";base64" or a comma.
+ var parameters = {};
+ var base64 = false;
+ while (scanner.scan(';')) {
+ var attribute = _expectToken(scanner);
+
+ if (attribute != 'base64') {
+ scanner.expect('=');
+ } else if (!scanner.scan('=')) {
+ base64 = true;
+ break;
+ }
+
+ // Don't use [_expectToken] because the value uses percent-encoding to
+ // escape non-token characters.
+ scanner.expect(token);
+ parameters[attribute] = _utf8Percent.decode(scanner.lastMatch[0]);
+ }
+ scanner.expect(',');
+
+ if (implicitType && parameters.isEmpty) {
+ parameters = {"charset": "US-ASCII"};
+ }
+
kevmoo 2015/10/19 23:46:49 watch trailing whitespace
nweiz 2015/10/20 21:29:22 Done.
+ var mediaType = new MediaType(type, subtype, parameters);
+
+ var data = base64
+ ? CryptoUtils.base64StringToBytes(scanner.rest)
+ : percent.decode(scanner.rest);
+
+ return new DataUri._(data, mediaType, uri);
+ });
+ }
+
+ /// Returns the percent-decoded value of the last MIME token scanned by
+ /// [scanner].
+ ///
+ /// Throws a [FormatException] if it's not a valid token after
+ /// percent-decoding.
+ static String _verifyToken(StringScanner scanner) {
+ var value = _utf8Percent.decode(scanner.lastMatch[0]);
+ if (!value.contains(nonToken)) return value;
+ scanner.error("Invalid token.");
kevmoo 2015/10/19 23:46:49 I wish 'scanner.error' returned an error that was
nweiz 2015/10/20 21:29:22 I wish there were an @AlwaysThrows annotation that
+ }
+
+ /// Scans [scanner] through a MIME token and returns its percent-decoded
+ /// value.
+ ///
+ /// Throws a [FormatException] if it's not a valid token after
+ /// percent-decoding.
+ static String _expectToken(StringScanner scanner) {
+ scanner.expect(token, name: "a token");
+ return _verifyToken(scanner);
+ }
+
+ DataUri._(this.data, this.mediaType, this._inner);
+
+ /// Returns the decoded [data] decoded using [encoding].
+ ///
+ /// [encoding] defaults to [declaredEncoding]. If the declared encoding isn't
+ /// supported by [Encoding.getByName] and [encoding] isn't passed, this throws
+ /// an [UnsupportedError].
+ String dataAsString({Encoding encoding}) {
+ encoding ??= declaredEncoding;
+ if (encoding == null) {
+ throw new UnsupportedError(
+ 'Unsupported media type charset '
+ '"${mediaType.parameters["charset"]}".');
+ }
+
+ return encoding.decode(data);
+ }
+
+ String get scheme => _inner.scheme;
+ String get authority => _inner.authority;
+ String get userInfo => _inner.userInfo;
+ String get host => _inner.host;
+ int get port => _inner.port;
+ String get path => _inner.path;
+ String get query => _inner.query;
+ String get fragment => _inner.fragment;
+ Uri replace({String scheme, String userInfo, String host, int port,
+ String path, Iterable<String> pathSegments, String query,
+ Map<String, String> queryParameters, String fragment}) =>
+ _inner.replace(
+ scheme: scheme, userInfo: userInfo, host: host, port: port,
+ path: path, pathSegments: pathSegments, query: query,
+ queryParameters: queryParameters, fragment: fragment);
+ Uri removeFragment() => _inner.removeFragment();
+ List<String> get pathSegments => _inner.pathSegments;
+ Map<String, String> get queryParameters => _inner.queryParameters;
+ Uri normalizePath() => _inner.normalizePath();
+ bool get isAbsolute => _inner.isAbsolute;
+ Uri resolve(String reference) => _inner.resolve(reference);
+ Uri resolveUri(Uri reference) => _inner.resolveUri(reference);
+ bool get hasScheme => _inner.hasScheme;
+ bool get hasAuthority => _inner.hasAuthority;
+ bool get hasPort => _inner.hasPort;
+ bool get hasQuery => _inner.hasQuery;
+ bool get hasFragment => _inner.hasFragment;
+ bool get hasEmptyPath => _inner.hasEmptyPath;
+ bool get hasAbsolutePath => _inner.hasAbsolutePath;
+ String get origin => _inner.origin;
+ String toFilePath({bool windows}) => _inner.toFilePath(windows: windows);
+ String toString() => _inner.toString();
+ bool operator==(other) => _inner == other;
+ int get hashCode => _inner.hashCode;
+}
« no previous file with comments | « lib/http_parser.dart ('k') | pubspec.yaml » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698