Chromium Code Reviews| Index: sdk/lib/core/uri.dart |
| diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart |
| index bdaf08639b200222f667b76c3a888f3678393f99..36d94d05657c8ca86989f3cfc87f5906750f6493 100644 |
| --- a/sdk/lib/core/uri.dart |
| +++ b/sdk/lib/core/uri.dart |
| @@ -176,170 +176,202 @@ class Uri { |
| return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| } |
| - int ipV6Address(int index) { |
| - // IPv6. Skip to ']'. |
| - index = uri.indexOf(']', index); |
| - if (index == -1) { |
| - throw new FormatException("Bad end of IPv6 host"); |
| - } |
| - return index + 1; |
| - } |
| + String scheme = ""; |
| + String path; |
| + String userinfo = ""; |
| + String host = ""; |
| + int port = 0; |
| + String query = ""; |
| + String fragment = ""; |
| - int length = uri.length; |
| int index = 0; |
| + int pathStart = 0; |
| + int char = -1; |
| - int schemeEndIndex = 0; |
| - |
| - if (length == 0) { |
| - return new Uri(); |
| - } |
| - |
| - if (uri.codeUnitAt(0) != _SLASH) { |
| - // Can be scheme. |
| - while (index < length) { |
| - // Look for ':'. If found, continue from the post of ':'. If not (end |
| - // reached or invalid scheme char found) back up one char, and continue |
| - // to path. |
| - // Note that scheme-chars is contained in path-chars. |
| - int codeUnit = uri.codeUnitAt(index++); |
| - if (!_isSchemeCharacter(codeUnit)) { |
| - if (codeUnit == _COLON) { |
| - schemeEndIndex = index; |
| - } else { |
| - // Back up one char, since we met an invalid scheme char. |
| - index--; |
| - } |
| - break; |
| - } |
| + void parseAuth() { |
| + if (index == uri.length) { |
| + char = -1; |
| + return; |
| } |
| - } |
| - |
| - int userInfoEndIndex = -1; |
| - int portIndex = -1; |
| - int authorityEndIndex = schemeEndIndex; |
| - // If we see '//', there must be an authority. |
| - if (authorityEndIndex == index && |
| - authorityEndIndex + 1 < length && |
| - uri.codeUnitAt(authorityEndIndex) == _SLASH && |
| - uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { |
| - // Skip '//'. |
| - authorityEndIndex += 2; |
| - // It can both be host and userInfo. |
| - while (authorityEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
| - if (!isRegName(codeUnit)) { |
| - if (codeUnit == _LEFT_BRACKET) { |
| - authorityEndIndex = ipV6Address(authorityEndIndex); |
| - } else if (portIndex == -1 && codeUnit == _COLON) { |
| - // First time ':'. |
| - portIndex = authorityEndIndex; |
| - } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { |
| - // Second time ':' or first '@'. Must be userInfo. |
| - userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1); |
| - // Not found. Must be path then. |
| - if (userInfoEndIndex == -1) { |
| - authorityEndIndex = index; |
| + int authStart = index; |
| + int lastColon = -1; |
| + int lastAt = -1; |
| + char = uri.codeUnitAt(index); |
| + authEnd: { |
| + while (index < uri.length) { |
| + char = uri.codeUnitAt(index); |
| + if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) { |
| + break authEnd; |
|
Anders Johnsen
2014/06/17 13:07:03
Consider alternative without authEnd block.
Lasse Reichstein Nielsen
2014/06/17 14:31:45
Considered. See suggestion.
|
| + } |
| + if (char == _AT_SIGN) { |
| + lastAt = index; |
| + lastColon = -1; |
| + } else if (char == _COLON) { |
| + lastColon = index; |
| + } else if (char == _LEFT_BRACKET) { |
| + lastColon = -1; |
| + int endBracket = uri.indexOf(']', index + 1); |
| + if (endBracket == -1) { |
| + index = uri.length; |
| break; |
| + } else { |
| + index = endBracket; |
| } |
| - portIndex = -1; |
| - authorityEndIndex = userInfoEndIndex + 1; |
| - // Now it can only be host:port. |
| - while (authorityEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
| - if (!isRegName(codeUnit)) { |
| - if (codeUnit == _LEFT_BRACKET) { |
| - authorityEndIndex = ipV6Address(authorityEndIndex); |
| - } else if (codeUnit == _COLON) { |
| - if (portIndex != -1) { |
| - throw new FormatException("Double port in host"); |
| - } |
| - portIndex = authorityEndIndex; |
| - } else { |
| - authorityEndIndex--; |
| - break; |
| - } |
| - } |
| - } |
| - break; |
| - } else { |
| - authorityEndIndex--; |
| - break; |
| } |
| + index++; |
| } |
| + char = -1; |
| } |
| - } else { |
| - authorityEndIndex = schemeEndIndex; |
| - } |
| - |
| - // At path now. |
| - int pathEndIndex = authorityEndIndex; |
| - while (pathEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(pathEndIndex++); |
| - if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { |
| - pathEndIndex--; |
| - break; |
| + int hostStart = authStart; |
| + int hostEnd = index; |
| + if (lastAt >= 0) { |
| + userinfo = _makeUserInfo(uri, authStart, lastAt); |
| + hostStart = lastAt + 1; |
| } |
| - } |
| - |
| - // Maybe query. |
| - int queryEndIndex = pathEndIndex; |
| - if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) { |
| - while (queryEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(queryEndIndex++); |
| - if (codeUnit == _NUMBER_SIGN) { |
| - queryEndIndex--; |
| - break; |
| + if (lastColon >= 0) { |
| + if (lastColon + 1 == index) { |
| + _fail(uri, index, "Invalid port number"); |
| + } |
| + int portNumber = 0; |
| + for (int i = lastColon + 1; i < index; i++) { |
| + int digit = uri.codeUnitAt(i); |
| + if (_ZERO > digit || _NINE < digit) { |
| + _fail(uri, i, "Invalid port number"); |
| + } |
| + portNumber = portNumber * 10 + (digit - _ZERO); |
| } |
| + port = _makePort(portNumber, scheme); |
| + hostEnd = lastColon; |
| + } |
| + host = _makeHost(uri, hostStart, hostEnd); |
| + if (index < uri.length) { |
| + char = uri.codeUnitAt(index); |
| } |
| } |
| - var scheme = null; |
| - if (schemeEndIndex > 0) { |
| - scheme = uri.substring(0, schemeEndIndex - 1); |
| - } |
| - |
| - var host = ""; |
| - var userInfo = ""; |
| - var port = 0; |
| - if (schemeEndIndex != authorityEndIndex) { |
| - int startIndex = schemeEndIndex + 2; |
| - if (userInfoEndIndex > 0) { |
| - userInfo = uri.substring(startIndex, userInfoEndIndex); |
| - startIndex = userInfoEndIndex + 1; |
| - } |
| - if (portIndex > 0) { |
| - var portStr = uri.substring(portIndex, authorityEndIndex); |
| - try { |
| - port = int.parse(portStr); |
| - } catch (_) { |
| - throw new FormatException("Invalid port: '$portStr'"); |
| + endPath: { |
| + toPath: { |
|
Anders Johnsen
2014/06/17 13:07:03
Consider doing:
bool hasPath = true;
while(...) {
Lasse Reichstein Nielsen
2014/06/17 14:31:44
Ack, my precious flow control!!!
See alternative
|
| + toAuthOpt: { |
| + while (index < uri.length) { |
| + char = uri.codeUnitAt(index); |
| + if (char == _QUESTION || char == _NUMBER_SIGN) break endPath; |
| + if (char == _SLASH) { |
| + if (index == 0) break toAuthOpt; |
| + break toPath; |
| + } |
| + if (char == _COLON) { |
| + if (index == 0) _fail(uri, 0, "Invalid empty scheme"); |
| + scheme = _makeScheme(uri, 0, index); |
| + index++; |
| + pathStart = index; |
| + if (index == uri.length) { |
| + char = -1; |
| + break endPath; |
| + } |
| + char = uri.codeUnitAt(index); |
| + if (char == _QUESTION || char == _NUMBER_SIGN) break endPath; |
| + if (char == _SLASH) break toAuthOpt; |
| + break toPath; |
| + } |
| + index++; |
| + } |
| + char = -1; |
| + break endPath; |
| + } // toAuthOpt. |
| + // Have seen one slash either at start or right after scheme. |
| + // If two slashes, it's an authority, otherwise it's just the path. |
| + assert(char == _SLASH); |
| + index++; |
| + if (index == uri.length) { |
| + char = -1; |
| + break endPath; |
| } |
| - host = uri.substring(startIndex, portIndex - 1); |
| + char = uri.codeUnitAt(index); |
| + if (char == _QUESTION || char == _NUMBER_SIGN) break endPath; |
| + if (char == _SLASH) { |
| + index++; |
| + parseAuth(); |
| + pathStart = index; |
| + if (char == _QUESTION || char == _NUMBER_SIGN) break endPath; |
| + } else { |
| + break toPath; |
| + } |
| + } // toPath |
| + |
| + // Characters from pathStart to index (inclusive) are known |
| + // to be part of the path. |
| + if (index < uri.length) { |
| + while (++index < uri.length) { |
| + char = uri.codeUnitAt(index); |
| + if (char== _QUESTION || char == _NUMBER_SIGN) break endPath; |
|
Anders Johnsen
2014/06/17 13:07:03
Add missing space.
Lasse Reichstein Nielsen
2014/06/17 14:31:45
Done.
|
| + } |
| + char = -1; |
| + } |
| + } // endPath |
| + bool ensureLeadingSlash = (host != "" || scheme == "file"); |
| + path = _makePath(uri, pathStart, index, null, ensureLeadingSlash); |
| + |
| + if (char == _QUESTION) { |
| + int numberSignIndex = uri.indexOf('#', index + 1); |
| + if (numberSignIndex < 0) { |
| + query = _makeQuery(uri, index + 1, uri.length, null); |
| } else { |
| - host = uri.substring(startIndex, authorityEndIndex); |
| + query = _makeQuery(uri, index + 1, numberSignIndex, null); |
| + fragment = _makeFragment(uri, numberSignIndex + 1, uri.length); |
| } |
| - } |
| + } else if (char == _NUMBER_SIGN) { |
| + fragment = _makeFragment(uri, index + 1, uri.length); |
| + } |
| + return new Uri._internal(scheme, |
| + userinfo, |
| + host, |
| + port, |
| + path, |
| + query, |
| + fragment); |
| + } |
| - var path = uri.substring(authorityEndIndex, pathEndIndex); |
| - var query = ""; |
| - if (pathEndIndex < queryEndIndex) { |
| - query = uri.substring(pathEndIndex + 1, queryEndIndex); |
| - } |
| - var fragment = ""; |
| - // If queryEndIndex is not at end (length), there is a fragment. |
| - if (queryEndIndex < length) { |
| - fragment = uri.substring(queryEndIndex + 1, length); |
| + // Report a parse failure. |
| + static void _fail(String uri, int index, String message) { |
| + // TODO(lrn): Consider adding this to FormatException. |
| + if (index == uri.length) { |
| + message += " at end of input."; |
| + } else { |
| + message += " at position $index.\n"; |
| + // Pick a slice of uri containing index and, if |
| + // necessary, truncate the ends to ensure the entire |
| + // slice fits on one line. |
| + int min = 0; |
| + int max = uri.length; |
| + String pre = ""; |
| + String post = ""; |
| + if (uri.length > 78) { |
| + min = index - 10; |
| + if (min < 0) min = 0; |
| + int max = min + 72; |
| + if (max > uri.length) { |
| + max = uri.length; |
| + min = max - 72; |
| + } |
| + if (min != 0) pre = "..."; |
| + if (max != uri.length) post = "..."; |
| + } |
| + // Combine message, slice and a caret pointing to the error index. |
| + message = "$message$pre${uri.substring(min, max)}$post\n" |
| + "${' ' * (pre.length + index - min)}^"; |
| } |
| - |
| - return new Uri(scheme: scheme, |
| - userInfo: userInfo, |
| - host: host, |
| - port: port, |
| - path: path, |
| - query: query, |
| - fragment: fragment); |
| + throw new FormatException(message); |
| } |
| + /// Internal non-verifying constructor. Only call with validated arguments. |
| + Uri._internal(this.scheme, |
| + this.userInfo, |
| + this._host, |
| + this._port, |
| + this._path, |
| + this.query, |
| + this.fragment); |
| + |
| /** |
| * Creates a new URI from its components. |
| * |
| @@ -386,29 +418,27 @@ class Uri { |
| * |
| * The fragment component is set through [fragment]. |
| */ |
| - Uri({String scheme, |
| - this.userInfo: "", |
| + factory Uri({String scheme, |
| + String userInfo: "", |
| String host: "", |
| port: 0, |
| String path, |
| Iterable<String> pathSegments, |
| String query, |
| Map<String, String> queryParameters, |
| - fragment: ""}) : |
| - scheme = _makeScheme(scheme), |
| - _host = _makeHost(host), |
| - query = _makeQuery(query, queryParameters), |
| - fragment = _makeFragment(fragment) { |
| - // Perform scheme specific normalization. |
| - if (scheme == "http" && port == 80) { |
| - _port = 0; |
| - } else if (scheme == "https" && port == 443) { |
| - _port = 0; |
| - } else { |
| - _port = port; |
| - } |
| - // Fill the path. |
| - _path = _makePath(path, pathSegments); |
| + fragment: ""}) { |
| + scheme = _makeScheme(scheme, 0, _stringOrNullLength(scheme)); |
| + userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo)); |
| + host = _makeHost(host, 0, _stringOrNullLength(host)); |
| + query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters); |
| + fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment)); |
| + port = _makePort(port, scheme); |
| + bool ensureLeadingSlash = (host != "" || scheme == "file"); |
| + path = _makePath(path, 0, _stringOrNullLength(path), pathSegments, |
| + ensureLeadingSlash); |
| + |
| + return new Uri._internal(scheme, userInfo, host, port, |
| + path, query, fragment); |
| } |
| /** |
| @@ -489,7 +519,7 @@ class Uri { |
| if (hostEnd == authority.length) { |
| throw new FormatException("Invalid IPv6 host entry."); |
| } |
| - parseIPv6Address(authority.substring(hostStart + 1, hostEnd)); |
| + parseIPv6Address(authority, hostStart + 1, hostEnd); |
| hostEnd++; // Skip the closing bracket. |
| if (hostEnd != authority.length && |
| authority.codeUnitAt(hostEnd) != _COLON) { |
| @@ -765,75 +795,177 @@ class Uri { |
| return _queryParameters; |
| } |
| - static String _makeHost(String host) { |
| - if (host == null || host.isEmpty) return host; |
| - if (host.codeUnitAt(0) == _LEFT_BRACKET) { |
| - if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) { |
| - throw new FormatException('Missing end `]` to match `[` in host'); |
| + static int _makePort(int port, String scheme) { |
| + // Perform scheme specific normalization. |
| + if (port == 80 && scheme == "http") { |
| + return 0; |
| + } |
| + if (port == 443 && scheme == "https") { |
| + return 0; |
| + } |
| + return port; |
| + } |
| + |
| + static String _makeHost(String host, int start, int end) { |
| + if (host == null) return null; |
| + if (start == end) return ""; |
| + // Host is an IPv6 address if it starts with '[' or contains a colon. |
| + if (host.codeUnitAt(start) == _LEFT_BRACKET) { |
| + if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) { |
| + _fail(host, start, 'Missing end `]` to match `[` in host'); |
| } |
| - parseIPv6Address(host.substring(1, host.length - 1)); |
| - return host; |
| + parseIPv6Address(host, start + 1, end - 1); |
| + return host.substring(start, end); |
| } |
| - for (int i = 0; i < host.length; i++) { |
| + // TODO(lrn): skip if too short to be a valid IPv6 address. |
| + for (int i = start; i < end; i++) { |
| if (host.codeUnitAt(i) == _COLON) { |
| - parseIPv6Address(host); |
| + parseIPv6Address(host, start, end); |
| return '[$host]'; |
| } |
| } |
| - return host; |
| + return _normalizeRegName(host, start, end); |
| + } |
| + |
| + static bool _isRegNameChar(int char) { |
| + return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0; |
| } |
| - static String _makeScheme(String scheme) { |
| - bool isSchemeLowerCharacter(int ch) { |
| - return ch < 128 && |
| - ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + /** |
| + * Validates and does case- and percent-encoding normalization. |
| + * |
| + * The [host] must be an RFC3986 "reg-name". It is converted |
| + * to lower case, and percent escapes are converted to either |
| + * lower case unreserved characters or upper case escapes. |
| + */ |
| + static String _normalizeRegName(String host, int start, int end) { |
| + StringBuffer buffer; |
| + int sectionStart = start; |
| + int index = start; |
| + // Whether all characters between sectionStart and index are normalized, |
| + bool isNormalized = true; |
| + |
| + while (index < end) { |
| + int char = host.codeUnitAt(index); |
| + if (char == _PERCENT) { |
| + // The _regNameTable contains "%", so we check that first. |
| + String replacement = _normalizeEscape(host, index, true); |
| + if (replacement == null && isNormalized) { |
| + index += 3; |
| + continue; |
| + } |
| + if (buffer == null) buffer = new StringBuffer(); |
| + String slice = host.substring(sectionStart, index); |
| + if (!isNormalized) slice = slice.toLowerCase(); |
| + buffer.write(slice); |
| + int sourceLength = 3; |
| + if (replacement == null) { |
| + replacement = host.substring(index, index + 3); |
| + } else if (replacement == "%") { |
| + replacement = "%25"; |
| + sourceLength = 1; |
| + } |
| + buffer.write(replacement); |
| + index += sourceLength; |
| + sectionStart = index; |
| + isNormalized = true; |
| + } else if (_isRegNameChar(char)) { |
| + if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) { |
| + // Put initial slice in buffer and continue in non-normalized mode |
| + if (buffer == null) buffer = new StringBuffer(); |
| + if (sectionStart < index) { |
| + buffer.write(host.substring(sectionStart, index)); |
| + sectionStart = index; |
| + } |
| + isNormalized = false; |
| + } |
| + index++; |
| + } else if (_isGeneralDelimiter(char)) { |
| + _fail(host, index, "Invalid character"); |
| + } else { |
| + int sourceLength = 1; |
| + if ((char & 0xFC00) == 0xD800 && (index + 1) < end) { |
| + int tail = host.codeUnitAt(index + 1); |
| + if ((tail & 0xFC00) == 0xDC00) { |
| + char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff); |
| + sourceLength = 2; |
| + } |
| + } |
| + if (buffer == null) buffer = new StringBuffer(); |
| + String slice = host.substring(sectionStart, index); |
| + if (!isNormalized) slice = slice.toLowerCase(); |
| + buffer.write(slice); |
| + buffer.write(_escapeChar(char)); |
| + index += sourceLength; |
| + sectionStart = index; |
| + } |
| } |
| + if (buffer == null) return host.substring(start, end); |
| + if (sectionStart < end) { |
| + String slice = host.substring(sectionStart, end); |
| + if (!isNormalized) slice = slice.toLowerCase(); |
| + buffer.write(slice); |
| + } |
| + return buffer.toString(); |
| + } |
| - if (scheme == null) return ""; |
| - bool allLowercase = true; |
| - int length = scheme.length; |
| - for (int i = 0; i < length; i++) { |
| + /** |
| + * Validates scheme characters and does case-normalization. |
| + * |
| + * Schemes are converted to lower case. They cannot contain escapes. |
| + */ |
| + static String _makeScheme(String scheme, int start, int end) { |
|
Anders Johnsen
2014/06/17 13:07:03
start is redundant, as it's always 0.
Lasse Reichstein Nielsen
2014/06/17 14:31:45
True. And I know.
I have a dream of making Uri.pa
|
| + if (start == end) return ""; |
| + int char = scheme.codeUnitAt(start); |
| + if (!_isAlphabeticCharacter(char)) { |
| + _fail(scheme, start, "Scheme not starting with alphabetic character"); |
| + } |
| + bool allLowercase = char >= _LOWER_CASE_A; |
| + for (int i = start; i < end; i++) { |
| int codeUnit = scheme.codeUnitAt(i); |
| - if (i == 0 && !_isAlphabeticCharacter(codeUnit)) { |
| - // First code unit must be an alphabetic character. |
| - throw new ArgumentError('Illegal scheme: $scheme'); |
| + if (!_isSchemeCharacter(codeUnit)) { |
| + _fail(scheme, i, "Illegal scheme character"); |
| } |
| - if (!isSchemeLowerCharacter(codeUnit)) { |
| - if (_isSchemeCharacter(codeUnit)) { |
| - allLowercase = false; |
| - } else { |
| - throw new ArgumentError('Illegal scheme: $scheme'); |
| - } |
| + if (_LOWER_CASE_A <= char && _LOWER_CASE_Z >= char) { |
| + allLowercase = false; |
| } |
| } |
| + scheme = scheme.substring(start, end); |
| + if (!allLowercase) scheme = scheme.toLowerCase(); |
| + return scheme; |
| + } |
| - return allLowercase ? scheme : scheme.toLowerCase(); |
| + static String _makeUserInfo(String userInfo, int start, int end) { |
| + if (userInfo == null) return "null"; |
| + return _normalize(userInfo, start, end, _userinfoTable); |
| } |
| - String _makePath(String path, Iterable<String> pathSegments) { |
| + static String _makePath(String path, int start, int end, |
| + Iterable<String> pathSegments, |
| + bool ensureLeadingSlash) { |
| if (path == null && pathSegments == null) return ""; |
| if (path != null && pathSegments != null) { |
| throw new ArgumentError('Both path and pathSegments specified'); |
| } |
| var result; |
| if (path != null) { |
| - result = _normalize(path); |
| + result = _normalize(path, start, end, _pathCharOrSlashTable); |
| } else { |
| result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); |
| } |
| - if ((hasAuthority || (scheme == "file")) && |
| - result.isNotEmpty && !result.startsWith("/")) { |
| + if (ensureLeadingSlash && result.isNotEmpty && !result.startsWith("/")) { |
| return "/$result"; |
| } |
| return result; |
| } |
| - static String _makeQuery(String query, Map<String, String> queryParameters) { |
| + static String _makeQuery(String query, int start, int end, |
| + Map<String, String> queryParameters) { |
| if (query == null && queryParameters == null) return ""; |
| if (query != null && queryParameters != null) { |
| throw new ArgumentError('Both query and queryParameters specified'); |
| } |
| - if (query != null) return _normalize(query); |
| + if (query != null) return _normalize(query, start, end, _queryCharTable); |
| var result = new StringBuffer(); |
| var first = true; |
| @@ -851,123 +983,183 @@ class Uri { |
| return result.toString(); |
| } |
| - static String _makeFragment(String fragment) { |
| + static String _makeFragment(String fragment, int start, int end) { |
| if (fragment == null) return ""; |
| - return _normalize(fragment); |
| + return _normalize(fragment, start, end, _queryCharTable); |
| } |
| - static String _normalize(String component) { |
| - int index = component.indexOf('%'); |
| - if (index < 0) return component; |
| + static int _stringOrNullLength(String s) => (s == null) ? 0 : s.length; |
| - bool isNormalizedHexDigit(int digit) { |
| - return (_ZERO <= digit && digit <= _NINE) || |
| - (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F); |
| - } |
| - |
| - bool isLowerCaseHexDigit(int digit) { |
| - return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; |
| - } |
| + static bool _isHexDigit(int char) { |
| + if (_NINE >= char) return _ZERO <= char; |
| + char |= 0x20; |
| + return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char; |
| + } |
| - bool isUnreserved(int ch) { |
| - return ch < 128 && |
| - ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| - } |
| + static int _hexValue(int char) { |
| + assert(_isHexDigit(char)); |
| + if (_NINE >= char) return char - _ZERO; |
| + char |= 0x20; |
| + return char - (_LOWER_CASE_A - 10); |
| + } |
| - int normalizeHexDigit(int index) { |
| - var codeUnit = component.codeUnitAt(index); |
| - if (isLowerCaseHexDigit(codeUnit)) { |
| - return codeUnit - 0x20; |
| - } else if (!isNormalizedHexDigit(codeUnit)) { |
| - throw new ArgumentError("Invalid URI component: $component"); |
| - } else { |
| - return codeUnit; |
| + /** |
| + * Performs RFC 3986 Percent-Encoding Normalization. |
| + * |
| + * Returns a replacement string that should be replace the original escape. |
| + * Returns null if no replacement is necessary because the escape is |
| + * not for an unreserved character and is already non-lower-case. |
| + * |
| + * Returns "%" if the escape is invalid (not two valid hex digits following |
| + * the percent sign). The calling code should replace the percent |
| + * sign with "%25", but leave the following two characters unmodified. |
| + * |
| + * If [lowerCase] is true, a single character returned is always lower case, |
| + */ |
| + static String _normalizeEscape(String source, int index, bool lowerCase) { |
| + assert(source.codeUnitAt(index) == _PERCENT); |
| + if (index + 2 >= source.length) { |
| + return "%"; // Marks the escape as invalid. |
| + } |
| + int firstDigit = source.codeUnitAt(index + 1); |
| + int secondDigit = source.codeUnitAt(index + 2); |
| + if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) { |
| + return "%"; // Marks the escape as invalid. |
| + } |
| + int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit); |
| + if (_isUnreservedChar(value)) { |
| + if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) { |
| + value |= 0x20; |
| } |
| + return new String.fromCharCode(value); |
| } |
| - |
| - int decodeHexDigitPair(int index) { |
| - int byte = 0; |
| - for (int i = 0; i < 2; i++) { |
| - var codeUnit = component.codeUnitAt(index + i); |
| - if (_ZERO <= codeUnit && codeUnit <= _NINE) { |
| - byte = byte * 16 + codeUnit - _ZERO; |
| - } else { |
| - // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). |
| - codeUnit |= 0x20; |
| - if (_LOWER_CASE_A <= codeUnit && |
| - codeUnit <= _LOWER_CASE_F) { |
| - byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10; |
| - } else { |
| - throw new ArgumentError( |
| - "Invalid percent-encoding in URI component: $component"); |
| - } |
| - } |
| - } |
| - return byte; |
| + if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) { |
| + // Either digit is lower case. |
| + return source.substring(index, index + 3).toUpperCase(); |
| } |
| + // Escape is retained, and is already non-lower case, so return null to |
| + // represent "no replacement necessary". |
| + return null; |
| + } |
| - // Start building the normalized component string. |
| - StringBuffer result; |
| - int length = component.length; |
| - int prevIndex = 0; |
| + static bool _isUnreservedChar(int ch) { |
| + return ch < 127 && |
| + ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| - // Copy a part of the component string to the result. |
| - void fillResult() { |
| - if (result == null) { |
| - assert(prevIndex == 0); |
| - result = new StringBuffer(component.substring(prevIndex, index)); |
| - } else { |
| - result.write(component.substring(prevIndex, index)); |
| + static String _escapeChar(char) { |
| + const hexDigits = "0123456789ABCDEF"; |
| + List codeUnits; |
| + if (char < 0x80) { |
| + // ASCII, a single percent encoded sequence. |
| + codeUnits = new List(3); |
| + codeUnits[0] = _PERCENT; |
| + codeUnits[1] = hexDigits.codeUnitAt(char >> 4); |
| + codeUnits[2] = hexDigits.codeUnitAt(char & 0xf); |
| + } else { |
| + // Do UTF-8 encoding of character, then percent encode bytes. |
| + int flag = 0xc0; // The high-bit markers on the first byte of UTF-8. |
| + int encodedBytes = 2; |
| + if (char > 0x7ff) { |
| + flag = 0xe0; |
| + encodedBytes = 3; |
| + if (char > 0xffff) { |
| + encodedBytes = 4; |
| + flag = 0xf0; |
| + } |
| } |
| - } |
| - |
| - while (index < length) { |
| - // Normalize percent-encoding to uppercase and don't encode |
| - // unreserved characters. |
| - assert(component.codeUnitAt(index) == _PERCENT); |
| - if (length < index + 2) { |
| - throw new ArgumentError( |
| - "Invalid percent-encoding in URI component: $component"); |
| + codeUnits = new List(3 * encodedBytes); |
| + int index = 0; |
| + while (--encodedBytes >= 0) { |
| + int byte = ((char >> (6 * encodedBytes)) & 0x3f) | flag; |
| + codeUnits[index] = _PERCENT; |
| + codeUnits[index + 1] = hexDigits.codeUnitAt(byte >> 4); |
| + codeUnits[index + 2] = hexDigits.codeUnitAt(byte & 0xf); |
| + index += 3; |
| + flag = 0x80; // Following bytes have only high bit set. |
| } |
| + } |
| + return new String.fromCharCodes(codeUnits); |
| + } |
| - var codeUnit1 = component.codeUnitAt(index + 1); |
| - var codeUnit2 = component.codeUnitAt(index + 2); |
| - var decodedCodeUnit = decodeHexDigitPair(index + 1); |
| - if (isNormalizedHexDigit(codeUnit1) && |
| - isNormalizedHexDigit(codeUnit2) && |
| - !isUnreserved(decodedCodeUnit)) { |
| - index += 3; |
| + /** |
| + * Runs through component checking that each character is valid and |
| + * normalize percent escapes. |
| + * |
| + * Uses [charTable] to check if a non-`%` character is allowed. |
| + * Each `%` character must be followed by two hex digits. |
| + * If the hex-digits are lower case letters, they are converted to |
| + * upper case. |
| + */ |
| + static String _normalize(String component, int start, int end, |
| + List<int> charTable) { |
| + StringBuffer buffer; |
| + int sectionStart = start; |
| + int index = start; |
| + // Loop while characters are valid and escapes correct and upper-case. |
| + while (index < end) { |
| + int char = component.codeUnitAt(index); |
| + if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) { |
| + index++; |
| } else { |
| - fillResult(); |
| - if (isUnreserved(decodedCodeUnit)) { |
| - result.writeCharCode(decodedCodeUnit); |
| + String replacement; |
| + int sourceLength; |
| + if (char == _PERCENT) { |
| + replacement = _normalizeEscape(component, index, false); |
| + // Returns null if we should keep the existing escape. |
| + if (replacement == null) { |
| + index += 3; |
| + continue; |
| + } |
| + // Returns "%" if we should escape the existing percent. |
| + if ("%" == replacement) { |
| + replacement = "%25"; |
| + sourceLength = 1; |
| + } else { |
| + sourceLength = 3; |
| + } |
| + } else if (_isGeneralDelimiter(char)) { |
| + _fail(component, index, "Invalid character"); |
| } else { |
| - result.write("%"); |
| - result.writeCharCode(normalizeHexDigit(index + 1)); |
| - result.writeCharCode(normalizeHexDigit(index + 2)); |
| + sourceLength = 1; |
| + if ((char & 0xFC00) == 0xD800) { |
| + // Possible lead surrogate. |
| + if (index + 1 < end) { |
| + int tail = component.codeUnitAt(index + 1); |
| + if ((tail & 0xFC00) == 0xDC00) { |
| + // Tail surrogat. |
| + sourceLength = 2; |
| + char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff); |
| + } |
| + } |
| + } |
| + replacement = _escapeChar(char); |
| } |
| - index += 3; |
| - prevIndex = index; |
| - } |
| - int next = component.indexOf('%', index); |
| - if (next >= index) { |
| - index = next; |
| - } else { |
| - index = length; |
| + if (buffer == null) buffer = new StringBuffer(); |
| + buffer.write(component.substring(sectionStart, index)); |
| + buffer.write(replacement); |
| + index += sourceLength; |
| + sectionStart = index; |
| } |
| } |
| - if (result == null) return component; |
| - |
| - if (result != null && prevIndex != index) fillResult(); |
| - assert(index == length); |
| - |
| - return result.toString(); |
| + if (buffer == null) { |
| + // Makes no copy if start == 0 and end == component.length. |
| + return component.substring(start, end); |
| + } |
| + if (sectionStart < end) { |
| + buffer.write(component.substring(sectionStart, end)); |
| + } |
| + return buffer.toString(); |
| } |
| static bool _isSchemeCharacter(int ch) { |
| return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| } |
| + static bool _isGeneralDelimiter(int ch) { |
| + return ch <= 64 && |
| + ((_genDelimitersTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| /** |
| * Returns whether the URI is absolute. |
| @@ -1465,6 +1657,9 @@ class Uri { |
| * Throws a [FormatException] if [host] is not a valid IPv6 address |
| * representation. |
| * |
| + * Acts on the substring from [start] to [end]. If [end] is omitted, it |
| + * defaults ot the end of the string. |
| + * |
| * Some examples of IPv6 addresses: |
| * * ::1 |
| * * FEDC:BA98:7654:3210:FEDC:BA98:7654:3210 |
| @@ -1472,7 +1667,8 @@ class Uri { |
| * * ::FFFF:129.144.52.38 |
| * * 2010:836B:4179::836B:4179 |
| */ |
| - static List<int> parseIPv6Address(String host) { |
| + static List<int> parseIPv6Address(String host, [int start = 0, int end]) { |
|
Lasse Reichstein Nielsen
2014/06/17 14:31:44
This is a public method that I added start/end to.
|
| + if (end == null) end = host.length; |
| // An IPv6 address consists of exactly 8 parts of 1-4 hex digits, seperated |
| // by `:`'s, with the following exceptions: |
| // |
| @@ -1495,11 +1691,11 @@ class Uri { |
| if (host.length < 2) error('address is too short'); |
| List<int> parts = []; |
| bool wildcardSeen = false; |
| - int partStart = 0; |
| + int partStart = start; |
| // Parse all parts, except a potential last one. |
| - for (int i = 0; i < host.length; i++) { |
| + for (int i = start; i < end; i++) { |
| if (host.codeUnitAt(i) == _COLON) { |
| - if (i == 0) { |
| + if (i == start) { |
| // If we see a `:` in the beginning, expect wildcard. |
| i++; |
| if (host.codeUnitAt(i) != _COLON) { |
| @@ -1522,18 +1718,18 @@ class Uri { |
| } |
| } |
| if (parts.length == 0) error('too few parts'); |
| - bool atEnd = partStart == host.length; |
| - bool isLastWildcard = parts.last == -1; |
| + bool atEnd = (partStart == end); |
| + bool isLastWildcard = (parts.last == -1); |
| if (atEnd && !isLastWildcard) { |
| error('expected a part after last `:`'); |
| } |
| if (!atEnd) { |
| try { |
| - parts.add(parseHex(partStart, host.length)); |
| + parts.add(parseHex(partStart, end)); |
| } catch (e) { |
| // Failed to parse the last chunk as hex. Try IPv4. |
| try { |
| - List<int> last = parseIPv4Address(host.substring(partStart)); |
| + List<int> last = parseIPv4Address(host.substring(partStart, end)); |
| parts.add(last[0] << 8 | last[1]); |
| parts.add(last[2] << 8 | last[3]); |
| } catch (e) { |
| @@ -1549,15 +1745,23 @@ class Uri { |
| error('an address without a wildcard must contain exactly 8 parts'); |
| } |
| // TODO(ajohnsen): Consider using Uint8List. |
| - return parts |
| - .expand((value) { |
| - if (value == -1) { |
| - return new List.filled((9 - parts.length) * 2, 0); |
| - } else { |
| - return [(value >> 8) & 0xFF, value & 0xFF]; |
| - } |
| - }) |
| - .toList(); |
| + List bytes = new List<int>(16); |
| + for (int i = 0, index = 0; i < parts.length; i++) { |
| + int value = parts[i]; |
| + if (value == -1) { |
| + int wildCardLength = 9 - parts.length; |
| + for (int j = 0; j < wildCardLength; j++) { |
| + bytes[index] = 0; |
| + bytes[index + 1] = 0; |
| + index += 2; |
| + } |
| + } else { |
| + bytes[index] = value >> 8; |
| + bytes[index + 1] = value & 0xff; |
| + index += 2; |
| + } |
| + } |
| + return bytes; |
| } |
| // Frequently used character codes. |
| @@ -1820,6 +2024,48 @@ class Uri { |
| // pqrstuvwxyz ~ |
| 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + // General delimiter characters, RFC 3986 section 2.2. |
| + // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
| + // |
| + static const _genDelimitersTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // # / |
| + 0x8008, // 0x20 - 0x2f 0001000000000001 |
| + // : [ ]? |
| + 0xe400, // 0x30 - 0x3f 0000000000101011 |
| + // @ |
| + 0x0001, // 0x40 - 0x4f 1000000000000000 |
| + // |
| + 0x0000, // 0x50 - 0x5f 0000000000000000 |
| + // |
| + 0x0000, // 0x60 - 0x6f 0000000000000000 |
| + // |
| + 0x0000]; // 0x70 - 0x7f 0000000000000000 |
| + |
| + // Characters allowed in the userinfo as of RFC 3986. |
| + // RFC 3986 Apendix A |
| + // userinfo = *( unreserved / pct-encoded / sub-delims / ':') |
| + static const _userinfoTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! $ &'()*+,-. |
| + 0x7fd2, // 0x20 - 0x2f 0100101111111110 |
| + // 0123456789:; = |
| + 0x2fff, // 0x30 - 0x3f 1111111111110100 |
| + // ABCDEFGHIJKLMNO |
| + 0xfffe, // 0x40 - 0x4f 0111111111111111 |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| // Characters allowed in the reg-name as of RFC 3986. |
| // RFC 3986 Apendix A |
| // reg-name = *( unreserved / pct-encoded / sub-delims ) |
| @@ -1862,6 +2108,27 @@ class Uri { |
| // pqrstuvwxyz ~ |
| 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + // Characters allowed in the path as of RFC 3986. |
| + // RFC 3986 section 3.3 *and* slash. |
| + static const _pathCharOrSlashTable = const [ |
| + // LSB MSB |
| + // | | |
| + 0x0000, // 0x00 - 0x0f 0000000000000000 |
| + 0x0000, // 0x10 - 0x1f 0000000000000000 |
| + // ! $ &'()*+,-./ |
| + 0xffd2, // 0x20 - 0x2f 0100101111111111 |
| + // 0123456789:; = |
| + 0x2fff, // 0x30 - 0x3f 1111111111110100 |
| + // @ABCDEFGHIJKLMNO |
| + 0xffff, // 0x40 - 0x4f 1111111111111111 |
| + |
| + // PQRSTUVWXYZ _ |
| + 0x87ff, // 0x50 - 0x5f 1111111111100001 |
| + // abcdefghijklmno |
| + 0xfffe, // 0x60 - 0x6f 0111111111111111 |
| + // pqrstuvwxyz ~ |
| + 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
| + |
| // Characters allowed in the query as of RFC 3986. |
| // RFC 3986 section 3.4. |
| // query = *( pchar / "/" / "?" ) |