Chromium Code Reviews| Index: sdk/lib/core/uri.dart |
| diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f27f364210de186dcd13bdc26e40e6796258ef48 |
| --- /dev/null |
| +++ b/sdk/lib/core/uri.dart |
| @@ -0,0 +1,938 @@ |
| +// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +part of dart.core; |
| + |
| +/** |
| + * A parsed URI, as specified by RFC-3986, http://tools.ietf.org/html/rfc3986. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
More documentation here, please.
Describe the part
Søren Gjesse
2013/05/28 13:33:26
I absolutely agree. However I will like to postpon
|
| + */ |
| +class Uri { |
| + int _port; |
| + |
| + /** |
| + * Returns the scheme. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Returns the scheme component of this URI.
General
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + * |
| + * Returns the empty string if there is no scheme. |
| + */ |
| + final String scheme; |
| + |
| + /** |
| + * Returns the authority. |
| + * |
| + * The authority is formatted from the [userInfo], [host] and [port] |
| + * components. |
| + * |
| + * Returns the empty string if there is no authority. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Why empty string and not, e.g., null?
(I assume it
Søren Gjesse
2013/05/28 13:33:26
This is not changed from the current behavior in d
|
| + */ |
| + String get authority { |
| + if (!hasAuthority) return ""; |
| + var sb = new StringBuffer(); |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Two spaces after "=".
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + _writeAuthority(sb); |
| + return sb.toString(); |
| + } |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Add empty line.
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + /** |
| + * Returns the user info part of the authority. |
| + * |
| + * Returns the empty string if there is no user info in the authority. |
| + */ |
| + final String userInfo; |
| + |
| + /** |
| + * Returns the host part of the authority. |
| + * |
| + * Returns the empty string if there is no authority and hence no host. |
| + */ |
| + final String host; |
| + |
| + /** |
| + * Returns the port part of the authority. |
| + * |
| + * Returns 0 if there is no port in the authority. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
I can see "port zero is a wildcard" as precedented
Søren Gjesse
2013/05/28 13:33:26
Again this is the behavior from the current dart:u
|
| + */ |
| + int get port => _port; |
| + |
| + /** |
| + * Returns the path. |
| + * |
| + * The returned path is encoded. To get direct access to the decoded |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Encoded how, and encoded from what?
The version th
Søren Gjesse
2013/05/28 13:33:26
I agree. I think adding a general comment above ab
|
| + * path use [pathSegments]. |
| + * |
| + * Returns the empty string if there is no path. |
| + */ |
| + final String path; |
| + |
| + /** |
| + * Returns the URI query. The returned query is encoded. To get |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Ditto for encoded: What has been encoded and how?
Søren Gjesse
2013/05/28 13:33:26
See above.
|
| + * direct access to the decoded query use [queryParameters]. |
| + * |
| + * Returns the empty string if there is no query. |
| + */ |
| + final String query; |
| + |
| + /** |
| + * Returns the fragment. |
| + * |
| + * Returns the empty string if there is no fragment. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Is there a way to distinguish:
foo:bar#
from
f
Søren Gjesse
2013/05/28 13:33:26
No and I don't think so. We could use null vs. the
|
| + */ |
| + final String fragment; |
| + |
| + /** |
| + * Creates a new URI object by parsing a URI string. |
| + */ |
| + static Uri parse(String uri) => new Uri._fromMatch(_splitRe.firstMatch(uri)); |
| + |
| + Uri._fromMatch(Match m) : |
| + this(scheme: _emptyIfNull(m[_COMPONENT_SCHEME]), |
| + userInfo: _emptyIfNull(m[_COMPONENT_USER_INFO]), |
| + host: _eitherOf( |
| + m[_COMPONENT_HOST], m[_COMPONENT_HOST_IPV6]), |
| + port: _parseIntOrZero(m[_COMPONENT_PORT]), |
| + path: _emptyIfNull(m[_COMPONENT_PATH]), |
| + query: _emptyIfNull(m[_COMPONENT_QUERY_DATA]), |
| + fragment: _emptyIfNull(m[_COMPONENT_FRAGMENT])); |
| + |
| + /* |
| + * Create a new URI from its components. |
| + * |
| + * Each component is set through a named argument. Any number of |
| + * components can be provided. The default value for the components |
| + * not provided is the empry string, except for [port] which has a |
| + * default value of 0. The [path] and [query] components can be set |
| + * using two different named arguments. |
| + * |
| + * The scheme component is set through [scheme]. The scheme is |
| + * normalized to all lowercase letters. |
| + * |
| + * The user info part of the authority component is set through |
| + * [userInfo]. |
| + * |
| + * The host part of the authority component is set through |
| + * [host]. The host can either be a hostname, a IPv4 address or an |
| + * IPv6 address, contained in '[' and ']'. If the host contains a |
| + * ':' character, the '[' and ']' are added if not already provided. |
| + * |
| + * The port part of the authority component is set through |
| + * [port]. The port is normalized for scheme http and https where |
| + * port 80 and port 443 respectively is set. |
| + * |
| + * The path component is set through either [path] or |
| + * [pathSegments]. When [path] is used, the provided string is |
| + * expected to be fully percent-encoded, and is used in its literal |
| + * form. When [pathSegments] is used, each of the provided segments |
| + * is percent-encoded and joined using the forward slash |
| + * separator. The percent-encoding of the path segments encodes all |
| + * characters except for the unreserved characters and the following |
| + * list of characters: `!$&'()*+,;=:@`. |
| + * |
| + * The query component is set through either [query] or |
| + * [queryParameters]. When [query] is used the provided string is |
| + * expected to be fully percent-encoded and is used in its literal |
| + * form. When [queryParameters] is used the query is built from the |
| + * provided map. Each key and value in the map is percent-encoded |
| + * and joined using equal and ampersand characters. The |
| + * percent-encoding of the keys and values encodes all characters |
| + * except for the unreserved characters. |
| + * |
| + * The fragment component is set through [fragment]. |
| + */ |
| + Uri({scheme, |
| + this.userInfo: "", |
| + this.host: "", |
| + port: 0, |
| + String path, |
| + List<String> pathSegments, |
| + String query, |
| + Map<String, String> queryParameters, |
| + fragment: ""}) : |
| + scheme = _makeScheme(scheme), |
| + path = _makePath(path, pathSegments), |
| + query = _makeQuery(query, queryParameters), |
| + fragment = _makeFragment(fragment) { |
| + // Perform scheme specific normalization. |
| + if (scheme == "http" && port == 80) { |
| + _port = 0; |
| + } else if (scheme == "https" && port == 443) { |
| + _port = 0; |
| + } else { |
| + _port = port; |
| + } |
| + } |
| + |
| + /* |
| + * Returns the URI path split into its segments. Each of the |
| + * segments in the returned list have been decoded. If the path is |
| + * empty the empty list will be returned. |
| + */ |
| + List<String> get pathSegments { |
| + if (path == "") return const<String>[]; |
| + return path.split("/").map(Uri.decodeComponent).toList(growable: false); |
| + } |
| + |
| + /* |
| + * Returns the URI query split into a map according to the rules |
| + * specified for FORM post in the HTML 4.01 specification. Each key |
| + * and value in the returned map have been decoded. If there is no |
| + * query the empty map will be returned. |
| + */ |
| + Map<String, String> get queryParameters { |
| + return query.split("&").fold({}, (map, element) { |
| + int index = element.indexOf("="); |
| + if (index == -1) { |
| + if (!element.isEmpty) map[element] = ""; |
| + } else if (index != 0) { |
| + var key = element.substring(0, index); |
| + var value = element.substring(index + 1); |
| + map[Uri.decodeQueryComponent(key)] = decodeQueryComponent(value); |
| + } |
| + return map; |
| + }); |
| + } |
| + |
| + static String _makeScheme(String scheme) { |
| + bool isSchemeLowerCharacter(int ch) { |
| + return ch < 128 && |
| + ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| + |
| + bool isSchemeCharacter(int ch) { |
| + return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| + |
| + if (scheme == null) return ""; |
| + bool allLowercase = true; |
| + int length = scheme.length; |
| + for (int i = 0; i < length; i++) { |
| + int codeUnit = scheme.codeUnitAt(i); |
| + if (!isSchemeLowerCharacter(codeUnit)) { |
| + if (isSchemeCharacter(codeUnit)) { |
| + allLowercase = false; |
| + } else { |
| + throw new ArgumentError('Illegal scheme: $scheme'); |
| + } |
| + } |
| + } |
| + |
| + return allLowercase ? scheme : scheme.toLowerCase(); |
| + } |
| + |
| + static String _makePath(String path, List<String> pathSegments) { |
| + if (path == null && pathSegments == null) return ""; |
| + if (path != null && pathSegments != null) { |
| + throw new ArgumentError('Both path and pathSegments specified'); |
| + } |
| + if (path != null) return _normalize(path); |
| + |
| + return pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); |
| + } |
| + |
| + static String _makeQuery(String query, Map<String, String> queryParameters) { |
| + if (query == null && queryParameters == null) return ""; |
| + if (query != null && queryParameters != null) { |
| + throw new ArgumentError('Both query and queryParameters specified'); |
| + } |
| + if (query != null) return _normalize(query); |
| + |
| + var result = new StringBuffer(); |
| + var first = true; |
| + queryParameters.forEach((key, value) { |
| + if (!first) { |
| + result.write("&"); |
| + } |
| + first = false; |
| + result.write(Uri.encodeQueryComponent(key)); |
| + if (value != null && !value.isEmpty) { |
| + result.write("="); |
| + result.write(Uri.encodeQueryComponent(value)); |
| + } |
| + }); |
| + return result.toString(); |
| + } |
| + |
| + static String _makeFragment(String fragment) { |
| + if (fragment == null) return ""; |
| + return _normalize(fragment); |
| + } |
| + |
| + static String _normalize(String component) { |
| + bool isNormalizedHexDigit(int digit) { |
| + return (_ZERO <= digit && digit <= _NINE) || |
| + (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F); |
| + } |
| + |
| + bool isLowerCaseHexDigit(int digit) { |
| + return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; |
| + } |
| + |
| + bool isUnreserved(int ch) { |
| + return ch < 128 && |
| + ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| + |
| + int normalizeHexDigit(int index) { |
| + var codeUnit = component.codeUnitAt(index); |
| + if (isLowerCaseHexDigit(codeUnit)) { |
| + return codeUnit - 0x20; |
| + } else if (!isNormalizedHexDigit(codeUnit)) { |
| + throw new ArgumentError("Invalid URI component: $component"); |
| + } else { |
| + return codeUnit; |
| + } |
| + } |
| + |
| + int decodeHexDigitPair(int index) { |
| + int byte = 0; |
| + for (int i = 0; i < 2; i++) { |
| + var codeUnit = component.codeUnitAt(index + i); |
| + if (_ZERO <= codeUnit && codeUnit <= _NINE) { |
| + byte = byte * 16 + codeUnit - _ZERO; |
| + } else { |
| + // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). |
| + codeUnit |= 0x20; |
| + if (_LOWER_CASE_A <= codeUnit && |
| + codeUnit <= _LOWER_CASE_F) { |
| + byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10; |
| + } else { |
| + throw new ArgumentError( |
| + "Invalid percent-encoding in URI component: $component"); |
| + } |
| + } |
| + } |
| + return byte; |
| + } |
| + |
| + // Start building the normalized component string. |
| + StringBuffer result; |
| + int length = component.length; |
| + int index = 0; |
| + int prevIndex = 0; |
| + while (index < length) { |
| + |
| + // Copy a part of the component string to the result. |
| + fillResult() { |
| + if (result == null) { |
| + assert(prevIndex == 0); |
| + result = new StringBuffer(component.substring(prevIndex, index)); |
| + } else { |
| + result.write(component.substring(prevIndex, index)); |
| + } |
| + } |
| + |
| + // Normalize percent encoding to uppercase and don't encode |
| + // unreserved characters. |
| + if (component.codeUnitAt(index) == _PERCENT) { |
| + if (length < index + 2) { |
| + throw new ArgumentError( |
| + "Invalid percent-encoding in URI component: $component"); |
| + } |
| + |
| + var codeUnit1 = component.codeUnitAt(index + 1); |
| + var codeUnit2 = component.codeUnitAt(index + 2); |
| + var decodedCodeUnit = decodeHexDigitPair(index + 1); |
| + if (isNormalizedHexDigit(codeUnit1) && |
| + isNormalizedHexDigit(codeUnit2) && |
| + !isUnreserved(decodedCodeUnit)) { |
| + index += 3; |
| + } else { |
| + fillResult(); |
| + if (isUnreserved(decodedCodeUnit)) { |
| + result.writeCharCode(decodedCodeUnit); |
| + } else { |
| + result.write("%"); |
| + result.writeCharCode(normalizeHexDigit(index + 1)); |
| + result.writeCharCode(normalizeHexDigit(index + 2)); |
| + } |
| + index += 3; |
| + prevIndex = index; |
| + } |
| + } else { |
| + index++; |
| + } |
| + } |
| + assert(index == length); |
| + |
| + if (result == null) return component; |
| + return result.toString(); |
| + } |
| + |
| + static String _emptyIfNull(String val) => val != null ? val : ''; |
| + |
| + static int _parseIntOrZero(String val) { |
| + if (val != null && val != '') { |
| + return int.parse(val); |
| + } else { |
| + return 0; |
| + } |
| + } |
| + |
| + static String _eitherOf(String val1, String val2) { |
| + if (val1 != null) return val1; |
| + if (val2 != null) return val2; |
| + return ''; |
| + } |
| + |
| + // NOTE: This code was ported from: closure-library/closure/goog/uri/utils.js |
| + static final RegExp _splitRe = new RegExp( |
| + '^' |
| + '(?:' |
| + '([^:/?#.]+)' // scheme - ignore special characters |
| + // used by other URL parts such as :, |
| + // ?, /, #, and . |
| + ':)?' |
| + '(?://' |
| + '(?:([^/?#]*)@)?' // userInfo |
| + '(?:' |
| + r'([\w\d\-\u0100-\uffff.%]*)' |
| + // host - restrict to letters, |
| + // digits, dashes, dots, percent |
| + // escapes, and unicode characters. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Why are we restricting more than the RegExp in the
Søren Gjesse
2013/05/28 13:33:26
Currently RegExp is also parsing the user info, ho
|
| + '|' |
| + // TODO(ajohnsen): Only allow a max number of parts? |
| + r'\[([A-Fa-f0-9:.]*)\])' |
| + // IPv6 host - restrict to hex, |
| + // dot and colon. |
| + '(?::([0-9]+))?' // port |
| + ')?' |
| + r'([^?#[]+)?' // path |
| + r'(?:\?([^#]*))?' // query |
| + '(?:#(.*))?' // fragment |
| + r'$'); |
| + |
| + static const _COMPONENT_SCHEME = 1; |
| + static const _COMPONENT_USER_INFO = 2; |
| + static const _COMPONENT_HOST = 3; |
| + static const _COMPONENT_HOST_IPV6 = 4; |
| + static const _COMPONENT_PORT = 5; |
| + static const _COMPONENT_PATH = 6; |
| + static const _COMPONENT_QUERY_DATA = 7; |
| + static const _COMPONENT_FRAGMENT = 8; |
| + |
| + /** |
| + * Returns `true` if the URI is absolute. |
| + */ |
| + bool get isAbsolute { |
| + if ("" == scheme) return false; |
| + if ("" != fragment) return false; |
| + return true; |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
just:
return scheme != "" && fragment == "";
Mor
Søren Gjesse
2013/05/28 13:33:26
Change the formatting.
The spec says "...calls fo
|
| + } |
| + |
| + String _merge(String base, String reference) { |
| + if (base == "") return "/$reference"; |
| + return "${base.substring(0, base.lastIndexOf("/") + 1)}$reference"; |
| + } |
| + |
| + String _removeDotSegments(String path) { |
| + List<String> output = []; |
| + bool appendSlash = false; |
| + for (String segment in path.split("/")) { |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Seems like overkill to split and rebuild every tim
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + appendSlash = false; |
| + if (segment == "..") { |
| + if (!output.isEmpty && |
| + ((output.length != 1) || (output[0] != ""))) output.removeLast(); |
| + appendSlash = true; |
| + } else if ("." == segment) { |
| + appendSlash = true; |
| + } else { |
| + output.add(segment); |
| + } |
| + } |
| + if (appendSlash) output.add(""); |
| + return output.join("/"); |
| + } |
| + |
| + Uri resolve(String uri) { |
| + return resolveUri(Uri.parse(uri)); |
| + } |
| + |
| + Uri resolveUri(Uri reference) { |
| + // From RFC 3986. |
| + String targetScheme; |
| + String targetUserInfo; |
| + String targetHost; |
| + int targetPort; |
| + String targetPath; |
| + String targetQuery; |
| + if (reference.scheme != "") { |
| + targetScheme = reference.scheme; |
| + targetUserInfo = reference.userInfo; |
| + targetHost = reference.host; |
| + targetPort = reference.port; |
| + targetPath = _removeDotSegments(reference.path); |
| + targetQuery = reference.query; |
| + } else { |
| + if (reference.hasAuthority) { |
| + targetUserInfo = reference.userInfo; |
| + targetHost = reference.host; |
| + targetPort = reference.port; |
| + targetPath = _removeDotSegments(reference.path); |
| + targetQuery = reference.query; |
| + } else { |
| + if (reference.path == "") { |
| + targetPath = this.path; |
| + if (reference.query != "") { |
| + targetQuery = reference.query; |
| + } else { |
| + targetQuery = this.query; |
| + } |
| + } else { |
| + if (reference.path.startsWith("/")) { |
| + targetPath = _removeDotSegments(reference.path); |
| + } else { |
| + targetPath = _removeDotSegments(_merge(this.path, reference.path)); |
| + } |
| + targetQuery = reference.query; |
| + } |
| + targetUserInfo = this.userInfo; |
| + targetHost = this.host; |
| + targetPort = this.port; |
| + } |
| + targetScheme = this.scheme; |
| + } |
| + return new Uri(scheme: targetScheme, |
| + userInfo: targetUserInfo, |
| + host: targetHost, |
| + port: targetPort, |
| + path: targetPath, |
| + query: targetQuery, |
| + fragment: reference.fragment); |
| + } |
| + |
| + bool get hasAuthority => host != ""; |
| + |
| + /** |
| + * Returns the origin of the URI in the form scheme://host:port for the |
| + * schemes http and https. |
| + * |
| + * Throws StateError if the scheme is not http or https. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Just say "It is an error if ...".
Quote "http" and
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + * |
| + * See: http://www.w3.org/TR/2011/WD-html5-20110405/origin-0.html#origin |
| + */ |
| + String get origin { |
| + if (scheme == "" || host == null || host == "") { |
| + throw new StateError("Cannot use origin without a scheme: $this"); |
| + } |
| + if (scheme != "http" && scheme != "https") { |
| + throw new StateError( |
| + "Origin is only applicable schemes http and https: $this"); |
| + } |
| + if (port == 0) return "$scheme://$host"; |
| + return "$scheme://$host:$port"; |
| + } |
| + |
| + void _writeAuthority(StringSink ss) { |
| + _addIfNonEmpty(ss, userInfo, userInfo, "@"); |
| + ss.write(host == null ? "null" : |
| + host.contains(':') ? '[$host]' : host); |
| + if (port != 0) { |
| + ss.write(":"); |
| + ss.write(port.toString()); |
| + } |
| + } |
| + |
| + String toString() { |
| + StringBuffer sb = new StringBuffer(); |
| + _addIfNonEmpty(sb, scheme, scheme, ':'); |
| + if (hasAuthority || (scheme == "file")) { |
| + sb.write("//"); |
| + _writeAuthority(sb); |
| + } |
| + sb.write(path); |
| + _addIfNonEmpty(sb, query, "?", query); |
| + _addIfNonEmpty(sb, fragment, "#", fragment); |
| + return sb.toString(); |
| + } |
| + |
| + bool operator==(other) { |
| + if (other is! Uri) return false; |
| + Uri uri = other; |
| + return scheme == uri.scheme && |
| + userInfo == uri.userInfo && |
| + host == uri.host && |
| + port == uri.port && |
| + path == uri.path && |
| + query == uri.query && |
| + fragment == uri.fragment; |
| + } |
| + |
| + int get hashCode { |
| + int combine(part, current) { |
| + // The sum is truncated to 30 bits to make sure it fits into a Smi. |
| + return (current * 31 + part.hashCode) & 0x3FFFFFFF; |
| + } |
| + return combine(scheme, combine(userInfo, combine(host, combine(port, |
| + combine(path, combine(query, combine(fragment, 1))))))); |
| + } |
| + |
| + static void _addIfNonEmpty(StringBuffer sb, String test, |
| + String first, String second) { |
| + if ("" != test) { |
| + sb.write(first == null ? "null" : first); |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Since null.toString() == "null", just write
sb.w
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + sb.write(second == null ? "null" : second); |
| + } |
| + } |
| + |
| + /** |
| + * Encode the string [component] using percent-encoding to make it |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
I prefer "URL encoding" to "percent-encoding".
Is
Søren Gjesse
2013/05/28 13:33:26
I would like to address the comments in a separate
|
| + * safe for literal use as a URI component. |
| + * |
| + * All characters except uppercase and lowercase letters, digits and |
| + * the characters `!$&'()*+,;=:@` are percent-encoded. This is the |
| + * set of characters specified in RFC 2396 and the which is |
| + * specified for the encodeUriComponent in ECMA-262 version 5.1. |
| + * |
| + * When manually encoding path segments or query components remember |
| + * to encode each part separately before building the path or query |
| + * string. |
| + * |
| + * For encoding the query part consider using |
| + * [encodeQueryComponent]. |
| + * |
| + * To avoid the need for explicitly encoding use the [pathSegments] |
| + * and [queryParameters] optional named arguments when constructing |
| + * a Uri. |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Uri -> [Uri].
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + */ |
| + static String encodeComponent(String component) { |
| + return _uriEncode(_unreserved2396Table, component); |
| + } |
| + |
| + /* |
| + * Encode the string [component] according to the HTML 4.01 rules |
| + * for encoding the posting of a HTML form as a query string |
| + * component. |
| + * |
| + * Spaces will be replaced with plus and all characters except for |
| + * uppercase and lowercase letters, decimal digits and the |
| + * characters `-._~`. Note that the set of characters encoded is a |
| + * superset of what HTML 4.01 says as it refers to RFC 1738 for |
| + * reserved characters. |
| + * |
| + * When manually encoding query components remember to encode each |
| + * part separately before building the query string. |
| + * |
| + * To avoid the need for explicitly encoding the query use the |
| + * [queryParameters] optional named arguments when constructing a |
| + * URI. |
| + * |
| + * See http://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 for more |
| + * details. |
| + */ |
| + static String encodeQueryComponent(String component) { |
| + return _uriEncode(_unreservedTable, component, spaceToPlus: true); |
| + } |
| + |
| + /** |
| + * Decodes the percent-encoding in [encodedComponent]. |
| + * |
| + * Note that decoding a URI component might change its meaning as |
| + * some of the decoded characters could be characters with are |
| + * delimiters for a given URI componene type. Always split a URI |
| + * component using the delimiters for the component before decoding |
| + * the individual parts. |
| + * |
| + * For handling the [path] and [query] components consider using |
| + * [pathSegments] and [queryParameters] to get the separated and |
| + * decoded component. |
| + */ |
| + static String decodeComponent(String encodedComponent) { |
| + return _uriDecode(encodedComponent); |
| + } |
| + |
| + static String decodeQueryComponent(String encodedComponent) { |
| + return _uriDecode(encodedComponent, plusToSpace: true); |
| + } |
| + |
| + /** |
| + * Encode the string [uri] using percent-encoding to make it |
| + * safe for literal use as a full URI. |
| + * |
| + * All characters except uppercase and lowercase letters, digits and |
| + * the characters `!#$&'()*+,-./:;=?@_~` are percent-encoded. This |
| + * is the set of characters specified in in ECMA-262 version 5.1 for |
| + * the encodeURI function . |
| + */ |
| + static String encodeFull(String uri) { |
| + return _uriEncode(_encodeFullTable, uri); |
| + } |
| + |
| + /** |
| + * Decodes the percent-encoding in [uri]. |
| + * |
| + * Note that decoding a full URI might change its meaning as some of |
| + * the decoded characters could be reserved characters. In most |
| + * cases an encoded URI should be parsed into components using |
| + * [Uri.parse] before decoding the separate components. |
| + */ |
| + static String decodeFull(String uri) { |
| + return _uriDecode(uri); |
| + } |
| + |
| + // Frequently used character codes. |
| + static const int _PERCENT = 0x25; |
| + static const int _ZERO = 0x30; |
| + static const int _NINE = 0x39; |
| + static const int _UPPER_CASE_A = 0x41; |
| + static const int _UPPER_CASE_F = 0x46; |
| + static const int _LOWER_CASE_A = 0x61; |
| + static const int _LOWER_CASE_F = 0x66; |
| + |
| + /** |
| + * This is the internal implementation of JavaScript's encodeURI function. |
| + * It encodes all characters in the string [text] except for those |
| + * that appear in [canonicalTable], and returns the escaped string. |
| + */ |
| + static String _uriEncode(List<int> canonicalTable, |
| + String text, |
| + {bool spaceToPlus: false}) { |
| + byteToHex(int v) { |
| + final String hex = '0123456789ABCDEF'; |
| + return '%${hex[v >> 4]}${hex[v & 0x0f]}'; |
| + } |
| + |
| + StringBuffer result = new StringBuffer(); |
| + for (int i = 0; i < text.length; i++) { |
| + int ch = text.codeUnitAt(i); |
| + if (ch < 128 && ((canonicalTable[ch >> 4] & (1 << (ch & 0x0f))) != 0)) { |
| + result.write(text[i]); |
| + } else if (spaceToPlus && text[i] == " ") { |
| + result.write("+"); |
| + } else { |
| + if (ch >= 0xD800 && ch < 0xDC00) { |
| + // Low surrogate. We expect a next char high surrogate. |
| + ++i; |
| + int nextCh = text.length == i ? 0 : text.codeUnitAt(i); |
| + if (nextCh >= 0xDC00 && nextCh < 0xE000) { |
| + // convert the pair to a U+10000 codepoint |
| + ch = 0x10000 + ((ch - 0xD800) << 10) + (nextCh - 0xDC00); |
| + } else { |
| + throw new ArgumentError('Malformed URI'); |
| + } |
| + } |
| + for (int codepoint in codepointsToUtf8([ch])) { |
| + result.write(byteToHex(codepoint)); |
| + } |
| + } |
| + } |
| + return result.toString(); |
| + } |
| + |
| + /** |
| + * Convert a byte (2 character hex sequence) in string [s] starting |
| + * at position [pos] to its ordinal value |
| + */ |
| + static int _hexCharPairToByte(String s, int pos) { |
| + int byte = 0; |
| + for (int i = 0; i < 2; i++) { |
| + var charCode = s.codeUnitAt(pos + i); |
| + if (0x30 <= charCode && charCode <= 0x39) { |
| + byte = byte * 16 + charCode - 0x30; |
| + } else { |
| + // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). |
| + charCode |= 0x20; |
| + if (0x61 <= charCode && charCode <= 0x66) { |
| + byte = byte * 16 + charCode - 0x57; |
| + } else { |
| + throw new ArgumentError("Invalid URL encoding"); |
| + } |
| + } |
| + } |
| + return byte; |
| + } |
| + |
| + /** |
| + * A JavaScript-like decodeURI function. It unescapes the string [text] and |
| + * returns the unescaped string. |
| + */ |
| + static String _uriDecode(String text, {bool plusToSpace: false}) { |
| + StringBuffer result = new StringBuffer(); |
| + List<int> codepoints = new List<int>(); |
| + for (int i = 0; i < text.length;) { |
| + String ch = text[i]; |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
Consider using text.codeUnitAt(i) and work with co
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + if (ch != '%') { |
| + if (plusToSpace && ch == '+') { |
| + result.write(" "); |
| + } else { |
| + result.write(ch); |
| + } |
| + i++; |
| + } else { |
| + codepoints.clear(); |
| + while (ch == '%') { |
| + if (++i > text.length - 2) { |
| + throw new ArgumentError('Truncated URI'); |
| + } |
| + codepoints.add(_hexCharPairToByte(text, i)); |
| + i += 2; |
| + if (i == text.length) |
| + break; |
|
Lasse Reichstein Nielsen
2013/05/28 08:45:33
break on line above, or add braces.
Søren Gjesse
2013/05/28 13:33:26
Done.
|
| + ch = text[i]; |
| + } |
| + result.write(decodeUtf8(codepoints)); |
| + } |
| + } |
| + return result.toString(); |
| + } |
| + |
| + // Tables of char-codes organized as a bit vector of 128 bits where |
| + // each bit indicate whether a character code on the 0-127 needs to |
| + // be escaped or not. |
| + |
| + // The unreserved characters of RFC 3986. |
| + static const _unreservedTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // -. |
| + 0x6000, // 0x20 - 0x2f 0000000000000110 |
| + // 0123456789 |
| + 0x03ff, // 0x30 - 0x3f 1111111111000000 |
| + // ABCDEFGHIJKLMNO |
| + 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // The unreserved characters of RFC 2396. |
| + static const _unreserved2396Table = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! '()* -. |
| + 0x6782, // 0x20 - 0x2f 0100000111100110 |
| + // 0123456789 |
| + 0x03ff, // 0x30 - 0x3f 1111111111000000 |
| + // ABCDEFGHIJKLMNO |
| + 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Table of reserved characters specified by ECMAScript 5. |
| + static const _encodeFullTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! #$ &'()*+,-./ |
| + 0xf7da, // 0x20 - 0x2f 0101101111101111 |
| + // 0123456789:; = ? |
| + 0xafff, // 0x30 - 0x3f 1111111111110101 |
| + // @ABCDEFGHIJKLMNO |
| + 0xffff, // 0x40 - 0x4f 1111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Characters allowed in the scheme. |
| + static const _schemeTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // + -. |
| + 0x6800, // 0x20 - 0x2f 0000000000010110 |
| + // 0123456789 |
| + 0x03ff, // 0x30 - 0x3f 1111111111000000 |
| + // ABCDEFGHIJKLMNO |
| + 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| + // PQRSTUVWXYZ |
| + 0x07ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz |
| + 0x07ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Characters allowed in scheme except for upper case letters. |
| + static const _schemeLowerTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // + -. |
| + 0x6800, // 0x20 - 0x2f 0000000000010110 |
| + // 0123456789 |
| + 0x03ff, // 0x30 - 0x3f 1111111111000000 |
| + // |
| + 0x0000, // 0x40 - 0x4f 0111111111111111 |
| + // |
| + 0x0000, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz |
| + 0x07ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Sub delimiter characters combined with unreserved as of 3986. |
| + // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
| + // / "*" / "+" / "," / ";" / "=" |
| + // RFC 3986 section 2.3. |
| + // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
| + static const _subDelimitersTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! $ &'()*+,-. |
| + 0x7fd2, // 0x20 - 0x2f 0100101111111110 |
| + // 0123456789 ; = |
| + 0x2bff, // 0x30 - 0x3f 1111111111010100 |
| + // ABCDEFGHIJKLMNO |
| + 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Characters allowed in the path as of RFC 3986. |
| + // RFC 3986 section 3.3. |
| + // pchar = unreserved / pct-encoded / sub-delims / ":" / "@" |
| + static const _pathCharTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! $ &'()*+,-. |
| + 0x7fd2, // 0x20 - 0x2f 0100101111111110 |
| + // 0123456789:; = |
| + 0x2fff, // 0x30 - 0x3f 1111111111110100 |
| + // @ABCDEFGHIJKLMNO |
| + 0xffff, // 0x40 - 0x4f 1111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| + // Characters allowed in the query as of RFC 3986. |
| + // RFC 3986 section 3.4. |
| + // query = *( pchar / "/" / "?" ) |
| + static const _queryCharTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! $ &'()*+,-./ |
| + 0xffd2, // 0x20 - 0x2f 0100101111111111 |
| + // 0123456789:; = ? |
| + 0xafff, // 0x30 - 0x3f 1111111111110101 |
| + // @ABCDEFGHIJKLMNO |
| + 0xffff, // 0x40 - 0x4f 1111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| +} |