| Index: sdk/lib/core/uri.dart
|
| diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart
|
| index d254850eaa092845e67714d48ee809ecf0654b0a..331ac3b0448de6cc1b679f5edca925c36480e753 100644
|
| --- a/sdk/lib/core/uri.dart
|
| +++ b/sdk/lib/core/uri.dart
|
| @@ -116,7 +116,11 @@ class Uri {
|
| Map<String, String> _queryParameters;
|
|
|
| /**
|
| - * Creates a new URI object by parsing a URI string.
|
| + * Creates a new `Uri` object by parsing a URI string.
|
| + *
|
| + * If the string is not valid as a URI or URI reference,
|
| + * invalid characters will be percent escaped where possible.
|
| + * The resulting `Uri` will represent a valid URI or URI reference.
|
| */
|
| static Uri parse(String uri) {
|
| // This parsing will not validate percent-encoding, IPv6, etc. When done
|
| @@ -175,171 +179,232 @@ class Uri {
|
| bool isRegName(int ch) {
|
| return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| }
|
| + const int EOI = -1;
|
|
|
| - int ipV6Address(int index) {
|
| - // IPv6. Skip to ']'.
|
| - index = uri.indexOf(']', index);
|
| - if (index == -1) {
|
| - throw new FormatException("Bad end of IPv6 host");
|
| - }
|
| - return index + 1;
|
| - }
|
| + String scheme = "";
|
| + String path;
|
| + String userinfo = "";
|
| + String host = "";
|
| + int port = 0;
|
| + String query = "";
|
| + String fragment = "";
|
|
|
| - int length = uri.length;
|
| int index = 0;
|
| -
|
| - int schemeEndIndex = 0;
|
| -
|
| - if (length == 0) {
|
| - return new Uri();
|
| - }
|
| -
|
| - if (uri.codeUnitAt(0) != _SLASH) {
|
| - // Can be scheme.
|
| - while (index < length) {
|
| - // Look for ':'. If found, continue from the post of ':'. If not (end
|
| - // reached or invalid scheme char found) back up one char, and continue
|
| - // to path.
|
| - // Note that scheme-chars is contained in path-chars.
|
| - int codeUnit = uri.codeUnitAt(index++);
|
| - if (!_isSchemeCharacter(codeUnit)) {
|
| - if (codeUnit == _COLON) {
|
| - schemeEndIndex = index;
|
| - } else {
|
| - // Back up one char, since we met an invalid scheme char.
|
| - index--;
|
| - }
|
| + int pathStart = 0;
|
| + // End of input-marker.
|
| + int char = EOI;
|
| +
|
| + void parseAuth() {
|
| + if (index == uri.length) {
|
| + char = EOI;
|
| + return;
|
| + }
|
| + int authStart = index;
|
| + int lastColon = -1;
|
| + int lastAt = -1;
|
| + char = uri.codeUnitAt(index);
|
| + while (index < uri.length) {
|
| + char = uri.codeUnitAt(index);
|
| + if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) {
|
| break;
|
| }
|
| - }
|
| - }
|
| -
|
| - int userInfoEndIndex = -1;
|
| - int portIndex = -1;
|
| - int authorityEndIndex = schemeEndIndex;
|
| - // If we see '//', there must be an authority.
|
| - if (authorityEndIndex == index &&
|
| - authorityEndIndex + 1 < length &&
|
| - uri.codeUnitAt(authorityEndIndex) == _SLASH &&
|
| - uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) {
|
| - // Skip '//'.
|
| - authorityEndIndex += 2;
|
| - // It can both be host and userInfo.
|
| - while (authorityEndIndex < length) {
|
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++);
|
| - if (!isRegName(codeUnit)) {
|
| - if (codeUnit == _LEFT_BRACKET) {
|
| - authorityEndIndex = ipV6Address(authorityEndIndex);
|
| - } else if (portIndex == -1 && codeUnit == _COLON) {
|
| - // First time ':'.
|
| - portIndex = authorityEndIndex;
|
| - } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) {
|
| - // Second time ':' or first '@'. Must be userInfo.
|
| - userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1);
|
| - // Not found. Must be path then.
|
| - if (userInfoEndIndex == -1) {
|
| - authorityEndIndex = index;
|
| - break;
|
| - }
|
| - portIndex = -1;
|
| - authorityEndIndex = userInfoEndIndex + 1;
|
| - // Now it can only be host:port.
|
| - while (authorityEndIndex < length) {
|
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++);
|
| - if (!isRegName(codeUnit)) {
|
| - if (codeUnit == _LEFT_BRACKET) {
|
| - authorityEndIndex = ipV6Address(authorityEndIndex);
|
| - } else if (codeUnit == _COLON) {
|
| - if (portIndex != -1) {
|
| - throw new FormatException("Double port in host");
|
| - }
|
| - portIndex = authorityEndIndex;
|
| - } else {
|
| - authorityEndIndex--;
|
| - break;
|
| - }
|
| - }
|
| - }
|
| + if (char == _AT_SIGN) {
|
| + lastAt = index;
|
| + lastColon = -1;
|
| + } else if (char == _COLON) {
|
| + lastColon = index;
|
| + } else if (char == _LEFT_BRACKET) {
|
| + lastColon = -1;
|
| + int endBracket = uri.indexOf(']', index + 1);
|
| + if (endBracket == -1) {
|
| + index = uri.length;
|
| + char = EOI;
|
| break;
|
| } else {
|
| - authorityEndIndex--;
|
| - break;
|
| + index = endBracket;
|
| }
|
| }
|
| + index++;
|
| + char = EOI;
|
| + }
|
| + int hostStart = authStart;
|
| + int hostEnd = index;
|
| + if (lastAt >= 0) {
|
| + userinfo = _makeUserInfo(uri, authStart, lastAt);
|
| + hostStart = lastAt + 1;
|
| + }
|
| + if (lastColon >= 0) {
|
| + if (lastColon + 1 == index) {
|
| + _fail(uri, index, "Invalid port number");
|
| + }
|
| + int portNumber = 0;
|
| + for (int i = lastColon + 1; i < index; i++) {
|
| + int digit = uri.codeUnitAt(i);
|
| + if (_ZERO > digit || _NINE < digit) {
|
| + _fail(uri, i, "Invalid port number");
|
| + }
|
| + portNumber = portNumber * 10 + (digit - _ZERO);
|
| + }
|
| + port = _makePort(portNumber, scheme);
|
| + hostEnd = lastColon;
|
| + }
|
| + host = _makeHost(uri, hostStart, hostEnd, true);
|
| + if (index < uri.length) {
|
| + char = uri.codeUnitAt(index);
|
| }
|
| - } else {
|
| - authorityEndIndex = schemeEndIndex;
|
| }
|
|
|
| - // At path now.
|
| - int pathEndIndex = authorityEndIndex;
|
| - while (pathEndIndex < length) {
|
| - int codeUnit = uri.codeUnitAt(pathEndIndex++);
|
| - if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
|
| - pathEndIndex--;
|
| + // When reaching path parsing, the current character is known to not
|
| + // be part of the path.
|
| + const int NOT_IN_PATH = 0;
|
| + // When reaching path parsing, the current character is part
|
| + // of the a non-empty path.
|
| + const int IN_PATH = 1;
|
| + // When reaching authority parsing, authority is possible.
|
| + // This is only true at start or right after scheme.
|
| + const int ALLOW_AUTH = 2;
|
| +
|
| + // Current state.
|
| + // Initialized to the default value that is used when exiting the
|
| + // scheme loop by reaching the end of input.
|
| + // All other breaks set their own state.
|
| + int state = NOT_IN_PATH;
|
| + while (index < uri.length) {
|
| + char = uri.codeUnitAt(index);
|
| + if (char == _QUESTION || char == _NUMBER_SIGN) {
|
| + state = NOT_IN_PATH;
|
| break;
|
| }
|
| + if (char == _SLASH) {
|
| + state = (index == 0) ? ALLOW_AUTH : IN_PATH;
|
| + break;
|
| + }
|
| + if (char == _COLON) {
|
| + if (index == 0) _fail(uri, 0, "Invalid empty scheme");
|
| + scheme = _makeScheme(uri, index);
|
| + index++;
|
| + pathStart = index;
|
| + if (index == uri.length) {
|
| + char = EOI;
|
| + state = NOT_IN_PATH;
|
| + } else {
|
| + char = uri.codeUnitAt(index);
|
| + if (char == _QUESTION || char == _NUMBER_SIGN) {
|
| + state = NOT_IN_PATH;
|
| + } else if (char == _SLASH) {
|
| + state = ALLOW_AUTH;
|
| + } else {
|
| + state = IN_PATH;
|
| + }
|
| + }
|
| + break;
|
| + }
|
| + index++;
|
| + char = EOI;
|
| + }
|
| +
|
| + if (state == ALLOW_AUTH) {
|
| + assert(char == _SLASH);
|
| + // Have seen one slash either at start or right after scheme.
|
| + // If two slashes, it's an authority, otherwise it's just the path.
|
| + index++;
|
| + if (index == uri.length) {
|
| + char = EOI;
|
| + state = NOT_IN_PATH;
|
| + } else {
|
| + char = uri.codeUnitAt(index);
|
| + if (char == _SLASH) {
|
| + index++;
|
| + parseAuth();
|
| + pathStart = index;
|
| + }
|
| + if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) {
|
| + state = NOT_IN_PATH;
|
| + } else {
|
| + state = IN_PATH;
|
| + }
|
| + }
|
| }
|
|
|
| - // Maybe query.
|
| - int queryEndIndex = pathEndIndex;
|
| - if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) {
|
| - while (queryEndIndex < length) {
|
| - int codeUnit = uri.codeUnitAt(queryEndIndex++);
|
| - if (codeUnit == _NUMBER_SIGN) {
|
| - queryEndIndex--;
|
| + assert(state == IN_PATH || state == NOT_IN_PATH);
|
| + if (state == IN_PATH) {
|
| + // Characters from pathStart to index (inclusive) are known
|
| + // to be part of the path.
|
| + while (++index < uri.length) {
|
| + char = uri.codeUnitAt(index);
|
| + if (char == _QUESTION || char == _NUMBER_SIGN) {
|
| break;
|
| }
|
| + char = EOI;
|
| }
|
| + state = NOT_IN_PATH;
|
| }
|
|
|
| - var scheme = null;
|
| - if (schemeEndIndex > 0) {
|
| - scheme = uri.substring(0, schemeEndIndex - 1);
|
| - }
|
| + assert(state == NOT_IN_PATH);
|
| + bool ensureLeadingSlash = (host != "" || scheme == "file");
|
| + path = _makePath(uri, pathStart, index, null, ensureLeadingSlash);
|
|
|
| - var host = "";
|
| - var userInfo = "";
|
| - var port = 0;
|
| - if (schemeEndIndex != authorityEndIndex) {
|
| - int startIndex = schemeEndIndex + 2;
|
| - if (userInfoEndIndex > 0) {
|
| - userInfo = uri.substring(startIndex, userInfoEndIndex);
|
| - startIndex = userInfoEndIndex + 1;
|
| - }
|
| - if (portIndex > 0) {
|
| - var portStr = uri.substring(portIndex, authorityEndIndex);
|
| - try {
|
| - port = int.parse(portStr);
|
| - } catch (_) {
|
| - throw new FormatException("Invalid port: '$portStr'");
|
| - }
|
| - host = uri.substring(startIndex, portIndex - 1);
|
| + if (char == _QUESTION) {
|
| + int numberSignIndex = uri.indexOf('#', index + 1);
|
| + if (numberSignIndex < 0) {
|
| + query = _makeQuery(uri, index + 1, uri.length, null);
|
| } else {
|
| - host = uri.substring(startIndex, authorityEndIndex);
|
| + query = _makeQuery(uri, index + 1, numberSignIndex, null);
|
| + fragment = _makeFragment(uri, numberSignIndex + 1, uri.length);
|
| }
|
| - }
|
| + } else if (char == _NUMBER_SIGN) {
|
| + fragment = _makeFragment(uri, index + 1, uri.length);
|
| + }
|
| + return new Uri._internal(scheme,
|
| + userinfo,
|
| + host,
|
| + port,
|
| + path,
|
| + query,
|
| + fragment);
|
| + }
|
|
|
| - var path = uri.substring(authorityEndIndex, pathEndIndex);
|
| - var query = "";
|
| - if (pathEndIndex < queryEndIndex) {
|
| - query = uri.substring(pathEndIndex + 1, queryEndIndex);
|
| - }
|
| - var fragment = "";
|
| - // If queryEndIndex is not at end (length), there is a fragment.
|
| - if (queryEndIndex < length) {
|
| - fragment = uri.substring(queryEndIndex + 1, length);
|
| + // Report a parse failure.
|
| + static void _fail(String uri, int index, String message) {
|
| + // TODO(lrn): Consider adding this to FormatException.
|
| + if (index == uri.length) {
|
| + message += " at end of input.";
|
| + } else {
|
| + message += " at position $index.\n";
|
| + // Pick a slice of uri containing index and, if
|
| + // necessary, truncate the ends to ensure the entire
|
| + // slice fits on one line.
|
| + int min = 0;
|
| + int max = uri.length;
|
| + String pre = "";
|
| + String post = "";
|
| + if (uri.length > 78) {
|
| + min = index - 10;
|
| + if (min < 0) min = 0;
|
| + int max = min + 72;
|
| + if (max > uri.length) {
|
| + max = uri.length;
|
| + min = max - 72;
|
| + }
|
| + if (min != 0) pre = "...";
|
| + if (max != uri.length) post = "...";
|
| + }
|
| + // Combine message, slice and a caret pointing to the error index.
|
| + message = "$message$pre${uri.substring(min, max)}$post\n"
|
| + "${' ' * (pre.length + index - min)}^";
|
| }
|
| -
|
| - return new Uri(scheme: scheme,
|
| - userInfo: userInfo,
|
| - host: host,
|
| - port: port,
|
| - path: path,
|
| - query: query,
|
| - fragment: fragment);
|
| + throw new FormatException(message);
|
| }
|
|
|
| + /// Internal non-verifying constructor. Only call with validated arguments.
|
| + Uri._internal(this.scheme,
|
| + this.userInfo,
|
| + this._host,
|
| + this._port,
|
| + this._path,
|
| + this.query,
|
| + this.fragment);
|
| +
|
| /**
|
| * Creates a new URI from its components.
|
| *
|
| @@ -359,6 +424,7 @@ class Uri {
|
| * [host]. The host can either be a hostname, an IPv4 address or an
|
| * IPv6 address, contained in '[' and ']'. If the host contains a
|
| * ':' character, the '[' and ']' are added if not already provided.
|
| + * The host is normalized to all lowercase letters.
|
| *
|
| * The port part of the authority component is set through
|
| * [port]. The port is normalized for scheme http and https where
|
| @@ -386,29 +452,27 @@ class Uri {
|
| *
|
| * The fragment component is set through [fragment].
|
| */
|
| - Uri({String scheme,
|
| - this.userInfo: "",
|
| + factory Uri({String scheme,
|
| + String userInfo: "",
|
| String host: "",
|
| port: 0,
|
| String path,
|
| Iterable<String> pathSegments,
|
| String query,
|
| Map<String, String> queryParameters,
|
| - fragment: ""}) :
|
| - scheme = _makeScheme(scheme),
|
| - _host = _makeHost(host),
|
| - query = _makeQuery(query, queryParameters),
|
| - fragment = _makeFragment(fragment) {
|
| - // Perform scheme specific normalization.
|
| - if (scheme == "http" && port == 80) {
|
| - _port = 0;
|
| - } else if (scheme == "https" && port == 443) {
|
| - _port = 0;
|
| - } else {
|
| - _port = port;
|
| - }
|
| - // Fill the path.
|
| - _path = _makePath(path, pathSegments);
|
| + fragment: ""}) {
|
| + scheme = _makeScheme(scheme, _stringOrNullLength(scheme));
|
| + userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo));
|
| + host = _makeHost(host, 0, _stringOrNullLength(host), false);
|
| + query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters);
|
| + fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment));
|
| + port = _makePort(port, scheme);
|
| + bool ensureLeadingSlash = (host != "" || scheme == "file");
|
| + path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,
|
| + ensureLeadingSlash);
|
| +
|
| + return new Uri._internal(scheme, userInfo, host, port,
|
| + path, query, fragment);
|
| }
|
|
|
| /**
|
| @@ -489,7 +553,7 @@ class Uri {
|
| if (hostEnd == authority.length) {
|
| throw new FormatException("Invalid IPv6 host entry.");
|
| }
|
| - parseIPv6Address(authority.substring(hostStart + 1, hostEnd));
|
| + parseIPv6Address(authority, hostStart + 1, hostEnd);
|
| hostEnd++; // Skip the closing bracket.
|
| if (hostEnd != authority.length &&
|
| authority.codeUnitAt(hostEnd) != _COLON) {
|
| @@ -765,75 +829,193 @@ class Uri {
|
| return _queryParameters;
|
| }
|
|
|
| - static String _makeHost(String host) {
|
| - if (host == null || host.isEmpty) return host;
|
| - if (host.codeUnitAt(0) == _LEFT_BRACKET) {
|
| - if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) {
|
| - throw new FormatException('Missing end `]` to match `[` in host');
|
| - }
|
| - parseIPv6Address(host.substring(1, host.length - 1));
|
| - return host;
|
| + static int _makePort(int port, String scheme) {
|
| + // Perform scheme specific normalization.
|
| + if (port == 80 && scheme == "http") {
|
| + return 0;
|
| }
|
| - for (int i = 0; i < host.length; i++) {
|
| - if (host.codeUnitAt(i) == _COLON) {
|
| - parseIPv6Address(host);
|
| - return '[$host]';
|
| + if (port == 443 && scheme == "https") {
|
| + return 0;
|
| + }
|
| + return port;
|
| + }
|
| +
|
| + /**
|
| + * Check and normalize a most name.
|
| + *
|
| + * If the host name starts and ends with '[' and ']', it is considered an
|
| + * IPv6 address. If [strictIPv6] is false, the address is also considered
|
| + * an IPv6 address if it contains any ':' character.
|
| + *
|
| + * If it is not an IPv6 address, it is case- and escape-normalized.
|
| + * This escapes all characters not valid in a reg-name,
|
| + * and converts all non-escape upper-case letters to lower-case.
|
| + */
|
| + static String _makeHost(String host, int start, int end, bool strictIPv6) {
|
| + // TODO(lrn): Should we normalize IPv6 addresses according to RFC 5952?
|
| +
|
| + if (host == null) return null;
|
| + if (start == end) return "";
|
| + // Host is an IPv6 address if it starts with '[' or contains a colon.
|
| + if (host.codeUnitAt(start) == _LEFT_BRACKET) {
|
| + if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) {
|
| + _fail(host, start, 'Missing end `]` to match `[` in host');
|
| + }
|
| + parseIPv6Address(host, start + 1, end - 1);
|
| + // RFC 5952 requires hex digits to be lower case.
|
| + return host.substring(start, end).toLowerCase();
|
| + }
|
| + if (!strictIPv6) {
|
| + // TODO(lrn): skip if too short to be a valid IPv6 address?
|
| + for (int i = start; i < end; i++) {
|
| + if (host.codeUnitAt(i) == _COLON) {
|
| + parseIPv6Address(host, start, end);
|
| + return '[$host]';
|
| + }
|
| }
|
| }
|
| - return host;
|
| + return _normalizeRegName(host, start, end);
|
| + }
|
| +
|
| + static bool _isRegNameChar(int char) {
|
| + return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0;
|
| }
|
|
|
| - static String _makeScheme(String scheme) {
|
| - bool isSchemeLowerCharacter(int ch) {
|
| - return ch < 128 &&
|
| - ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| + /**
|
| + * Validates and does case- and percent-encoding normalization.
|
| + *
|
| + * The [host] must be an RFC3986 "reg-name". It is converted
|
| + * to lower case, and percent escapes are converted to either
|
| + * lower case unreserved characters or upper case escapes.
|
| + */
|
| + static String _normalizeRegName(String host, int start, int end) {
|
| + StringBuffer buffer;
|
| + int sectionStart = start;
|
| + int index = start;
|
| + // Whether all characters between sectionStart and index are normalized,
|
| + bool isNormalized = true;
|
| +
|
| + while (index < end) {
|
| + int char = host.codeUnitAt(index);
|
| + if (char == _PERCENT) {
|
| + // The _regNameTable contains "%", so we check that first.
|
| + String replacement = _normalizeEscape(host, index, true);
|
| + if (replacement == null && isNormalized) {
|
| + index += 3;
|
| + continue;
|
| + }
|
| + if (buffer == null) buffer = new StringBuffer();
|
| + String slice = host.substring(sectionStart, index);
|
| + if (!isNormalized) slice = slice.toLowerCase();
|
| + buffer.write(slice);
|
| + int sourceLength = 3;
|
| + if (replacement == null) {
|
| + replacement = host.substring(index, index + 3);
|
| + } else if (replacement == "%") {
|
| + replacement = "%25";
|
| + sourceLength = 1;
|
| + }
|
| + buffer.write(replacement);
|
| + index += sourceLength;
|
| + sectionStart = index;
|
| + isNormalized = true;
|
| + } else if (_isRegNameChar(char)) {
|
| + if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {
|
| + // Put initial slice in buffer and continue in non-normalized mode
|
| + if (buffer == null) buffer = new StringBuffer();
|
| + if (sectionStart < index) {
|
| + buffer.write(host.substring(sectionStart, index));
|
| + sectionStart = index;
|
| + }
|
| + isNormalized = false;
|
| + }
|
| + index++;
|
| + } else if (_isGeneralDelimiter(char)) {
|
| + _fail(host, index, "Invalid character");
|
| + } else {
|
| + int sourceLength = 1;
|
| + if ((char & 0xFC00) == 0xD800 && (index + 1) < end) {
|
| + int tail = host.codeUnitAt(index + 1);
|
| + if ((tail & 0xFC00) == 0xDC00) {
|
| + char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);
|
| + sourceLength = 2;
|
| + }
|
| + }
|
| + if (buffer == null) buffer = new StringBuffer();
|
| + String slice = host.substring(sectionStart, index);
|
| + if (!isNormalized) slice = slice.toLowerCase();
|
| + buffer.write(slice);
|
| + buffer.write(_escapeChar(char));
|
| + index += sourceLength;
|
| + sectionStart = index;
|
| + }
|
| + }
|
| + if (buffer == null) return host.substring(start, end);
|
| + if (sectionStart < end) {
|
| + String slice = host.substring(sectionStart, end);
|
| + if (!isNormalized) slice = slice.toLowerCase();
|
| + buffer.write(slice);
|
| }
|
| + return buffer.toString();
|
| + }
|
|
|
| - if (scheme == null) return "";
|
| - bool allLowercase = true;
|
| - int length = scheme.length;
|
| - for (int i = 0; i < length; i++) {
|
| + /**
|
| + * Validates scheme characters and does case-normalization.
|
| + *
|
| + * Schemes are converted to lower case. They cannot contain escapes.
|
| + */
|
| + static String _makeScheme(String scheme, int end) {
|
| + if (end == 0) return "";
|
| + int char = scheme.codeUnitAt(0);
|
| + if (!_isAlphabeticCharacter(char)) {
|
| + _fail(scheme, 0, "Scheme not starting with alphabetic character");
|
| + }
|
| + bool allLowercase = char >= _LOWER_CASE_A;
|
| + for (int i = 0; i < end; i++) {
|
| int codeUnit = scheme.codeUnitAt(i);
|
| - if (i == 0 && !_isAlphabeticCharacter(codeUnit)) {
|
| - // First code unit must be an alphabetic character.
|
| - throw new ArgumentError('Illegal scheme: $scheme');
|
| + if (!_isSchemeCharacter(codeUnit)) {
|
| + _fail(scheme, i, "Illegal scheme character");
|
| }
|
| - if (!isSchemeLowerCharacter(codeUnit)) {
|
| - if (_isSchemeCharacter(codeUnit)) {
|
| - allLowercase = false;
|
| - } else {
|
| - throw new ArgumentError('Illegal scheme: $scheme');
|
| - }
|
| + if (_LOWER_CASE_A <= char && _LOWER_CASE_Z >= char) {
|
| + allLowercase = false;
|
| }
|
| }
|
| + scheme = scheme.substring(0, end);
|
| + if (!allLowercase) scheme = scheme.toLowerCase();
|
| + return scheme;
|
| + }
|
|
|
| - return allLowercase ? scheme : scheme.toLowerCase();
|
| + static String _makeUserInfo(String userInfo, int start, int end) {
|
| + if (userInfo == null) return "null";
|
| + return _normalize(userInfo, start, end, _userinfoTable);
|
| }
|
|
|
| - String _makePath(String path, Iterable<String> pathSegments) {
|
| + static String _makePath(String path, int start, int end,
|
| + Iterable<String> pathSegments,
|
| + bool ensureLeadingSlash) {
|
| if (path == null && pathSegments == null) return "";
|
| if (path != null && pathSegments != null) {
|
| throw new ArgumentError('Both path and pathSegments specified');
|
| }
|
| var result;
|
| if (path != null) {
|
| - result = _normalize(path);
|
| + result = _normalize(path, start, end, _pathCharOrSlashTable);
|
| } else {
|
| result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");
|
| }
|
| - if ((hasAuthority || (scheme == "file")) &&
|
| - result.isNotEmpty && !result.startsWith("/")) {
|
| + if (ensureLeadingSlash && result.isNotEmpty && !result.startsWith("/")) {
|
| return "/$result";
|
| }
|
| return result;
|
| }
|
|
|
| - static String _makeQuery(String query, Map<String, String> queryParameters) {
|
| + static String _makeQuery(String query, int start, int end,
|
| + Map<String, String> queryParameters) {
|
| if (query == null && queryParameters == null) return "";
|
| if (query != null && queryParameters != null) {
|
| throw new ArgumentError('Both query and queryParameters specified');
|
| }
|
| - if (query != null) return _normalize(query);
|
| + if (query != null) return _normalize(query, start, end, _queryCharTable);
|
|
|
| var result = new StringBuffer();
|
| var first = true;
|
| @@ -851,123 +1033,184 @@ class Uri {
|
| return result.toString();
|
| }
|
|
|
| - static String _makeFragment(String fragment) {
|
| + static String _makeFragment(String fragment, int start, int end) {
|
| if (fragment == null) return "";
|
| - return _normalize(fragment);
|
| + return _normalize(fragment, start, end, _queryCharTable);
|
| }
|
|
|
| - static String _normalize(String component) {
|
| - int index = component.indexOf('%');
|
| - if (index < 0) return component;
|
| -
|
| - bool isNormalizedHexDigit(int digit) {
|
| - return (_ZERO <= digit && digit <= _NINE) ||
|
| - (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F);
|
| - }
|
| + static int _stringOrNullLength(String s) => (s == null) ? 0 : s.length;
|
|
|
| - bool isLowerCaseHexDigit(int digit) {
|
| - return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F;
|
| - }
|
| + static bool _isHexDigit(int char) {
|
| + if (_NINE >= char) return _ZERO <= char;
|
| + char |= 0x20;
|
| + return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char;
|
| + }
|
|
|
| - bool isUnreserved(int ch) {
|
| - return ch < 128 &&
|
| - ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| - }
|
| + static int _hexValue(int char) {
|
| + assert(_isHexDigit(char));
|
| + if (_NINE >= char) return char - _ZERO;
|
| + char |= 0x20;
|
| + return char - (_LOWER_CASE_A - 10);
|
| + }
|
|
|
| - int normalizeHexDigit(int index) {
|
| - var codeUnit = component.codeUnitAt(index);
|
| - if (isLowerCaseHexDigit(codeUnit)) {
|
| - return codeUnit - 0x20;
|
| - } else if (!isNormalizedHexDigit(codeUnit)) {
|
| - throw new ArgumentError("Invalid URI component: $component");
|
| - } else {
|
| - return codeUnit;
|
| + /**
|
| + * Performs RFC 3986 Percent-Encoding Normalization.
|
| + *
|
| + * Returns a replacement string that should be replace the original escape.
|
| + * Returns null if no replacement is necessary because the escape is
|
| + * not for an unreserved character and is already non-lower-case.
|
| + *
|
| + * Returns "%" if the escape is invalid (not two valid hex digits following
|
| + * the percent sign). The calling code should replace the percent
|
| + * sign with "%25", but leave the following two characters unmodified.
|
| + *
|
| + * If [lowerCase] is true, a single character returned is always lower case,
|
| + */
|
| + static String _normalizeEscape(String source, int index, bool lowerCase) {
|
| + assert(source.codeUnitAt(index) == _PERCENT);
|
| + if (index + 2 >= source.length) {
|
| + return "%"; // Marks the escape as invalid.
|
| + }
|
| + int firstDigit = source.codeUnitAt(index + 1);
|
| + int secondDigit = source.codeUnitAt(index + 2);
|
| + if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) {
|
| + return "%"; // Marks the escape as invalid.
|
| + }
|
| + int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit);
|
| + if (_isUnreservedChar(value)) {
|
| + if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) {
|
| + value |= 0x20;
|
| }
|
| + return new String.fromCharCode(value);
|
| }
|
| -
|
| - int decodeHexDigitPair(int index) {
|
| - int byte = 0;
|
| - for (int i = 0; i < 2; i++) {
|
| - var codeUnit = component.codeUnitAt(index + i);
|
| - if (_ZERO <= codeUnit && codeUnit <= _NINE) {
|
| - byte = byte * 16 + codeUnit - _ZERO;
|
| - } else {
|
| - // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
|
| - codeUnit |= 0x20;
|
| - if (_LOWER_CASE_A <= codeUnit &&
|
| - codeUnit <= _LOWER_CASE_F) {
|
| - byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10;
|
| - } else {
|
| - throw new ArgumentError(
|
| - "Invalid percent-encoding in URI component: $component");
|
| - }
|
| - }
|
| - }
|
| - return byte;
|
| + if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) {
|
| + // Either digit is lower case.
|
| + return source.substring(index, index + 3).toUpperCase();
|
| }
|
| + // Escape is retained, and is already non-lower case, so return null to
|
| + // represent "no replacement necessary".
|
| + return null;
|
| + }
|
|
|
| - // Start building the normalized component string.
|
| - StringBuffer result;
|
| - int length = component.length;
|
| - int prevIndex = 0;
|
| + static bool _isUnreservedChar(int ch) {
|
| + return ch < 127 &&
|
| + ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| + }
|
|
|
| - // Copy a part of the component string to the result.
|
| - void fillResult() {
|
| - if (result == null) {
|
| - assert(prevIndex == 0);
|
| - result = new StringBuffer(component.substring(prevIndex, index));
|
| - } else {
|
| - result.write(component.substring(prevIndex, index));
|
| + static String _escapeChar(char) {
|
| + assert(char <= 0x10ffff); // It's a valid unicode code point.
|
| + const hexDigits = "0123456789ABCDEF";
|
| + List codeUnits;
|
| + if (char < 0x80) {
|
| + // ASCII, a single percent encoded sequence.
|
| + codeUnits = new List(3);
|
| + codeUnits[0] = _PERCENT;
|
| + codeUnits[1] = hexDigits.codeUnitAt(char >> 4);
|
| + codeUnits[2] = hexDigits.codeUnitAt(char & 0xf);
|
| + } else {
|
| + // Do UTF-8 encoding of character, then percent encode bytes.
|
| + int flag = 0xc0; // The high-bit markers on the first byte of UTF-8.
|
| + int encodedBytes = 2;
|
| + if (char > 0x7ff) {
|
| + flag = 0xe0;
|
| + encodedBytes = 3;
|
| + if (char > 0xffff) {
|
| + encodedBytes = 4;
|
| + flag = 0xf0;
|
| + }
|
| }
|
| - }
|
| -
|
| - while (index < length) {
|
| - // Normalize percent-encoding to uppercase and don't encode
|
| - // unreserved characters.
|
| - assert(component.codeUnitAt(index) == _PERCENT);
|
| - if (length < index + 2) {
|
| - throw new ArgumentError(
|
| - "Invalid percent-encoding in URI component: $component");
|
| + codeUnits = new List(3 * encodedBytes);
|
| + int index = 0;
|
| + while (--encodedBytes >= 0) {
|
| + int byte = ((char >> (6 * encodedBytes)) & 0x3f) | flag;
|
| + codeUnits[index] = _PERCENT;
|
| + codeUnits[index + 1] = hexDigits.codeUnitAt(byte >> 4);
|
| + codeUnits[index + 2] = hexDigits.codeUnitAt(byte & 0xf);
|
| + index += 3;
|
| + flag = 0x80; // Following bytes have only high bit set.
|
| }
|
| + }
|
| + return new String.fromCharCodes(codeUnits);
|
| + }
|
|
|
| - var codeUnit1 = component.codeUnitAt(index + 1);
|
| - var codeUnit2 = component.codeUnitAt(index + 2);
|
| - var decodedCodeUnit = decodeHexDigitPair(index + 1);
|
| - if (isNormalizedHexDigit(codeUnit1) &&
|
| - isNormalizedHexDigit(codeUnit2) &&
|
| - !isUnreserved(decodedCodeUnit)) {
|
| - index += 3;
|
| + /**
|
| + * Runs through component checking that each character is valid and
|
| + * normalize percent escapes.
|
| + *
|
| + * Uses [charTable] to check if a non-`%` character is allowed.
|
| + * Each `%` character must be followed by two hex digits.
|
| + * If the hex-digits are lower case letters, they are converted to
|
| + * upper case.
|
| + */
|
| + static String _normalize(String component, int start, int end,
|
| + List<int> charTable) {
|
| + StringBuffer buffer;
|
| + int sectionStart = start;
|
| + int index = start;
|
| + // Loop while characters are valid and escapes correct and upper-case.
|
| + while (index < end) {
|
| + int char = component.codeUnitAt(index);
|
| + if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) {
|
| + index++;
|
| } else {
|
| - fillResult();
|
| - if (isUnreserved(decodedCodeUnit)) {
|
| - result.writeCharCode(decodedCodeUnit);
|
| + String replacement;
|
| + int sourceLength;
|
| + if (char == _PERCENT) {
|
| + replacement = _normalizeEscape(component, index, false);
|
| + // Returns null if we should keep the existing escape.
|
| + if (replacement == null) {
|
| + index += 3;
|
| + continue;
|
| + }
|
| + // Returns "%" if we should escape the existing percent.
|
| + if ("%" == replacement) {
|
| + replacement = "%25";
|
| + sourceLength = 1;
|
| + } else {
|
| + sourceLength = 3;
|
| + }
|
| + } else if (_isGeneralDelimiter(char)) {
|
| + _fail(component, index, "Invalid character");
|
| } else {
|
| - result.write("%");
|
| - result.writeCharCode(normalizeHexDigit(index + 1));
|
| - result.writeCharCode(normalizeHexDigit(index + 2));
|
| + sourceLength = 1;
|
| + if ((char & 0xFC00) == 0xD800) {
|
| + // Possible lead surrogate.
|
| + if (index + 1 < end) {
|
| + int tail = component.codeUnitAt(index + 1);
|
| + if ((tail & 0xFC00) == 0xDC00) {
|
| + // Tail surrogat.
|
| + sourceLength = 2;
|
| + char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);
|
| + }
|
| + }
|
| + }
|
| + replacement = _escapeChar(char);
|
| }
|
| - index += 3;
|
| - prevIndex = index;
|
| - }
|
| - int next = component.indexOf('%', index);
|
| - if (next >= index) {
|
| - index = next;
|
| - } else {
|
| - index = length;
|
| + if (buffer == null) buffer = new StringBuffer();
|
| + buffer.write(component.substring(sectionStart, index));
|
| + buffer.write(replacement);
|
| + index += sourceLength;
|
| + sectionStart = index;
|
| }
|
| }
|
| - if (result == null) return component;
|
| -
|
| - if (result != null && prevIndex != index) fillResult();
|
| - assert(index == length);
|
| -
|
| - return result.toString();
|
| + if (buffer == null) {
|
| + // Makes no copy if start == 0 and end == component.length.
|
| + return component.substring(start, end);
|
| + }
|
| + if (sectionStart < end) {
|
| + buffer.write(component.substring(sectionStart, end));
|
| + }
|
| + return buffer.toString();
|
| }
|
|
|
| static bool _isSchemeCharacter(int ch) {
|
| return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| }
|
|
|
| + static bool _isGeneralDelimiter(int ch) {
|
| + return ch <= 64 &&
|
| + ((_genDelimitersTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
|
| + }
|
|
|
| /**
|
| * Returns whether the URI is absolute.
|
| @@ -1465,6 +1708,9 @@ class Uri {
|
| * Throws a [FormatException] if [host] is not a valid IPv6 address
|
| * representation.
|
| *
|
| + * Acts on the substring from [start] to [end]. If [end] is omitted, it
|
| + * defaults ot the end of the string.
|
| + *
|
| * Some examples of IPv6 addresses:
|
| * * ::1
|
| * * FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
|
| @@ -1472,7 +1718,8 @@ class Uri {
|
| * * ::FFFF:129.144.52.38
|
| * * 2010:836B:4179::836B:4179
|
| */
|
| - static List<int> parseIPv6Address(String host) {
|
| + static List<int> parseIPv6Address(String host, [int start = 0, int end]) {
|
| + if (end == null) end = host.length;
|
| // An IPv6 address consists of exactly 8 parts of 1-4 hex digits, seperated
|
| // by `:`'s, with the following exceptions:
|
| //
|
| @@ -1495,11 +1742,11 @@ class Uri {
|
| if (host.length < 2) error('address is too short');
|
| List<int> parts = [];
|
| bool wildcardSeen = false;
|
| - int partStart = 0;
|
| + int partStart = start;
|
| // Parse all parts, except a potential last one.
|
| - for (int i = 0; i < host.length; i++) {
|
| + for (int i = start; i < end; i++) {
|
| if (host.codeUnitAt(i) == _COLON) {
|
| - if (i == 0) {
|
| + if (i == start) {
|
| // If we see a `:` in the beginning, expect wildcard.
|
| i++;
|
| if (host.codeUnitAt(i) != _COLON) {
|
| @@ -1522,18 +1769,18 @@ class Uri {
|
| }
|
| }
|
| if (parts.length == 0) error('too few parts');
|
| - bool atEnd = partStart == host.length;
|
| - bool isLastWildcard = parts.last == -1;
|
| + bool atEnd = (partStart == end);
|
| + bool isLastWildcard = (parts.last == -1);
|
| if (atEnd && !isLastWildcard) {
|
| error('expected a part after last `:`');
|
| }
|
| if (!atEnd) {
|
| try {
|
| - parts.add(parseHex(partStart, host.length));
|
| + parts.add(parseHex(partStart, end));
|
| } catch (e) {
|
| // Failed to parse the last chunk as hex. Try IPv4.
|
| try {
|
| - List<int> last = parseIPv4Address(host.substring(partStart));
|
| + List<int> last = parseIPv4Address(host.substring(partStart, end));
|
| parts.add(last[0] << 8 | last[1]);
|
| parts.add(last[2] << 8 | last[3]);
|
| } catch (e) {
|
| @@ -1549,15 +1796,23 @@ class Uri {
|
| error('an address without a wildcard must contain exactly 8 parts');
|
| }
|
| // TODO(ajohnsen): Consider using Uint8List.
|
| - return parts
|
| - .expand((value) {
|
| - if (value == -1) {
|
| - return new List.filled((9 - parts.length) * 2, 0);
|
| - } else {
|
| - return [(value >> 8) & 0xFF, value & 0xFF];
|
| - }
|
| - })
|
| - .toList();
|
| + List bytes = new List<int>(16);
|
| + for (int i = 0, index = 0; i < parts.length; i++) {
|
| + int value = parts[i];
|
| + if (value == -1) {
|
| + int wildCardLength = 9 - parts.length;
|
| + for (int j = 0; j < wildCardLength; j++) {
|
| + bytes[index] = 0;
|
| + bytes[index + 1] = 0;
|
| + index += 2;
|
| + }
|
| + } else {
|
| + bytes[index] = value >> 8;
|
| + bytes[index + 1] = value & 0xff;
|
| + index += 2;
|
| + }
|
| + }
|
| + return bytes;
|
| }
|
|
|
| // Frequently used character codes.
|
| @@ -1820,6 +2075,48 @@ class Uri {
|
| // pqrstuvwxyz ~
|
| 0x47ff]; // 0x70 - 0x7f 1111111111100010
|
|
|
| + // General delimiter characters, RFC 3986 section 2.2.
|
| + // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
| + //
|
| + static const _genDelimitersTable = const [
|
| + // LSB MSB
|
| + // | |
|
| + 0x0000, // 0x00 - 0x0f 0000000000000000
|
| + 0x0000, // 0x10 - 0x1f 0000000000000000
|
| + // # /
|
| + 0x8008, // 0x20 - 0x2f 0001000000000001
|
| + // : [ ]?
|
| + 0xe400, // 0x30 - 0x3f 0000000000101011
|
| + // @
|
| + 0x0001, // 0x40 - 0x4f 1000000000000000
|
| + //
|
| + 0x0000, // 0x50 - 0x5f 0000000000000000
|
| + //
|
| + 0x0000, // 0x60 - 0x6f 0000000000000000
|
| + //
|
| + 0x0000]; // 0x70 - 0x7f 0000000000000000
|
| +
|
| + // Characters allowed in the userinfo as of RFC 3986.
|
| + // RFC 3986 Apendix A
|
| + // userinfo = *( unreserved / pct-encoded / sub-delims / ':')
|
| + static const _userinfoTable = const [
|
| + // LSB MSB
|
| + // | |
|
| + 0x0000, // 0x00 - 0x0f 0000000000000000
|
| + 0x0000, // 0x10 - 0x1f 0000000000000000
|
| + // ! $ &'()*+,-.
|
| + 0x7fd2, // 0x20 - 0x2f 0100101111111110
|
| + // 0123456789:; =
|
| + 0x2fff, // 0x30 - 0x3f 1111111111110100
|
| + // ABCDEFGHIJKLMNO
|
| + 0xfffe, // 0x40 - 0x4f 0111111111111111
|
| + // PQRSTUVWXYZ _
|
| + 0x87ff, // 0x50 - 0x5f 1111111111100001
|
| + // abcdefghijklmno
|
| + 0xfffe, // 0x60 - 0x6f 0111111111111111
|
| + // pqrstuvwxyz ~
|
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010
|
| +
|
| // Characters allowed in the reg-name as of RFC 3986.
|
| // RFC 3986 Apendix A
|
| // reg-name = *( unreserved / pct-encoded / sub-delims )
|
| @@ -1862,6 +2159,27 @@ class Uri {
|
| // pqrstuvwxyz ~
|
| 0x47ff]; // 0x70 - 0x7f 1111111111100010
|
|
|
| + // Characters allowed in the path as of RFC 3986.
|
| + // RFC 3986 section 3.3 *and* slash.
|
| + static const _pathCharOrSlashTable = const [
|
| + // LSB MSB
|
| + // | |
|
| + 0x0000, // 0x00 - 0x0f 0000000000000000
|
| + 0x0000, // 0x10 - 0x1f 0000000000000000
|
| + // ! $ &'()*+,-./
|
| + 0xffd2, // 0x20 - 0x2f 0100101111111111
|
| + // 0123456789:; =
|
| + 0x2fff, // 0x30 - 0x3f 1111111111110100
|
| + // @ABCDEFGHIJKLMNO
|
| + 0xffff, // 0x40 - 0x4f 1111111111111111
|
| +
|
| + // PQRSTUVWXYZ _
|
| + 0x87ff, // 0x50 - 0x5f 1111111111100001
|
| + // abcdefghijklmno
|
| + 0xfffe, // 0x60 - 0x6f 0111111111111111
|
| + // pqrstuvwxyz ~
|
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010
|
| +
|
| // Characters allowed in the query as of RFC 3986.
|
| // RFC 3986 section 3.4.
|
| // query = *( pchar / "/" / "?" )
|
|
|