Chromium Code Reviews| Index: sdk/lib/core/uri.dart |
| diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart |
| index 091e1fece45903402fde1890ed9698afe85b10e8..8d3f589c7602eb2703828493f66289e2ff904b72 100644 |
| --- a/sdk/lib/core/uri.dart |
| +++ b/sdk/lib/core/uri.dart |
| @@ -172,174 +172,256 @@ class Uri { |
| // query = *( pchar / "/" / "?" ) |
| // |
| // fragment = *( pchar / "/" / "?" ) |
| - bool isRegName(int ch) { |
| - return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| - } |
| - |
| - int ipV6Address(int index) { |
| - // IPv6. Skip to ']'. |
| - index = uri.indexOf(']', index); |
| - if (index == -1) { |
| - throw new FormatException("Bad end of IPv6 host"); |
| - } |
| - return index + 1; |
| - } |
| - |
| - int length = uri.length; |
| - int index = 0; |
| - |
| - int schemeEndIndex = 0; |
| - |
| - if (length == 0) { |
| + if (uri.isEmpty) { |
| return new Uri(); |
| } |
| + String scheme; |
| + String userInfo = ""; |
| + String host = ""; |
| + int port = 0; |
| + String path = ""; |
| + String query; |
| + String fragment; |
| + bool allowColonInPath = false; |
| - if (uri.codeUnitAt(0) != _SLASH) { |
| - // Can be scheme. |
| - while (index < length) { |
| - // Look for ':'. If found, continue from the post of ':'. If not (end |
| - // reached or invalid scheme char found) back up one char, and continue |
| - // to path. |
| - // Note that scheme-chars is contained in path-chars. |
| - int codeUnit = uri.codeUnitAt(index++); |
| - if (!_isSchemeCharacter(codeUnit)) { |
| - if (codeUnit == _COLON) { |
| - schemeEndIndex = index; |
| - } else { |
| - // Back up one char, since we met an invalid scheme char. |
| - index--; |
| + /// Index after current char. |
|
Søren Gjesse
2014/06/11 08:33:15
Why dart-doc comments here?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Because I assume the editor will display that comm
|
| + int index = 0; |
| + /// Current char. |
| + int current = _EOI; |
| + /// Start index of current section being parsed. |
| + int start = 0; |
|
Anders Johnsen
2014/06/11 10:00:03
Start is a bit ambiguous. currentSectionStart ?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
.... loooooong name.
But ok.
|
| + /// The position after the current character. If parsing a percent |
| + /// escape, this is index + 3, otherwise index + 1. |
| + int nextIndex = 0; |
| + |
| + void advance() { |
| + index = nextIndex; |
| + if (index < uri.length) { |
| + current = uri.codeUnitAt(index); |
| + if (_MAX_VALID_CHAR >= current) { |
| + if (_PERCENT != current) { |
| + nextIndex = nextIndex + 1; |
| + return; |
| + } |
| + if (index + 2 >= uri.length || |
| + !_isHexDigit(uri.codeUnitAt(index + 1)) || |
| + !_isHexDigit(uri.codeUnitAt(index + 2))) { |
| + _fail(uri, index, "Incomplete percent escape"); |
| } |
| - break; |
| + // Valid escape, returns _PERCENT as current. |
| + nextIndex = nextIndex + 3; |
| + return; |
| } |
| + _fail(uri, index, "Unexpected character"); |
| } |
| - } |
| - |
| - int userInfoEndIndex = -1; |
| - int portIndex = -1; |
| - int authorityEndIndex = schemeEndIndex; |
| - // If we see '//', there must be an authority. |
| - if (authorityEndIndex == index && |
| - authorityEndIndex + 1 < length && |
| - uri.codeUnitAt(authorityEndIndex) == _SLASH && |
| - uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { |
| - // Skip '//'. |
| - authorityEndIndex += 2; |
| - // It can both be host and userInfo. |
| - while (authorityEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
| - if (!isRegName(codeUnit)) { |
| - if (codeUnit == _LEFT_BRACKET) { |
| - authorityEndIndex = ipV6Address(authorityEndIndex); |
| - } else if (portIndex == -1 && codeUnit == _COLON) { |
| - // First time ':'. |
| - portIndex = authorityEndIndex; |
| - } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { |
| - // Second time ':' or first '@'. Must be userInfo. |
| - userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1); |
| - // Not found. Must be path then. |
| - if (userInfoEndIndex == -1) { |
| - authorityEndIndex = index; |
| - break; |
| - } |
| - portIndex = -1; |
| - authorityEndIndex = userInfoEndIndex + 1; |
| - // Now it can only be host:port. |
| - while (authorityEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
| - if (!isRegName(codeUnit)) { |
| - if (codeUnit == _LEFT_BRACKET) { |
| - authorityEndIndex = ipV6Address(authorityEndIndex); |
| - } else if (codeUnit == _COLON) { |
| - if (portIndex != -1) { |
| - throw new FormatException("Double port in host"); |
| - } |
| - portIndex = authorityEndIndex; |
| - } else { |
| - authorityEndIndex--; |
| - break; |
| - } |
| - } |
| - } |
| - break; |
| - } else { |
| - authorityEndIndex--; |
| - break; |
| + current = _EOI; |
| + } |
| + |
| + // Parse authority. |
| + void parseAuth() { |
| + start = index; |
| + void parseIpV6() { |
| + assert(current == _LEFT_BRACKET); |
| + assert(start == index); |
| + for (int i = index + 1; i < uri.length; i++) { |
| + if (uri.codeUnitAt(i) == _RIGHT_BRACKET) { |
| + nextIndex = i + 1; |
| + advance(); |
| + host = uri.substring(start, index); |
|
Søren Gjesse
2014/06/11 08:33:15
Are we doing proper IPv6 validation later, or is t
Lasse Reichstein Nielsen
2014/06/12 08:07:31
I believe we are doing it later.
I don't know if
|
| + return; |
| } |
| } |
| + _fail(uri, start, "Unmatched [ in host name"); |
|
kevmoo
2014/06/10 21:29:08
quote '['
|
| } |
| - } else { |
| - authorityEndIndex = schemeEndIndex; |
| - } |
| - // At path now. |
| - int pathEndIndex = authorityEndIndex; |
| - while (pathEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(pathEndIndex++); |
| - if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { |
| - pathEndIndex--; |
| - break; |
| + void parseHost() { |
| + assert(start == index); |
| + if (current == _LEFT_BRACKET) { |
| + parseIpV6(); |
| + return; |
| + } |
| + while (_isRegNameChar(current)) { |
| + advance(); |
| + } |
| + host = uri.substring(start, index); |
| } |
| - } |
| - // Maybe query. |
| - int queryEndIndex = pathEndIndex; |
| - if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) { |
| - while (queryEndIndex < length) { |
| - int codeUnit = uri.codeUnitAt(queryEndIndex++); |
| - if (codeUnit == _NUMBER_SIGN) { |
| - queryEndIndex--; |
| - break; |
| + int parsePort() { |
| + assert(_isDigit(current)); |
| + int portVal = current - _ZERO; |
| + advance(); |
| + while (_isDigit(current)) { |
| + portVal = portVal * 10 + (current - _ZERO); |
|
Søren Gjesse
2014/06/11 08:33:15
Any limitations on the port value in the spec?
|
| + advance(); |
| } |
| + return portVal; |
| } |
| - } |
| - |
| - var scheme = null; |
| - if (schemeEndIndex > 0) { |
| - scheme = uri.substring(0, schemeEndIndex - 1); |
| - } |
| - var host = ""; |
| - var userInfo = ""; |
| - var port = 0; |
| - if (schemeEndIndex != authorityEndIndex) { |
| - int startIndex = schemeEndIndex + 2; |
| - if (userInfoEndIndex > 0) { |
| - userInfo = uri.substring(startIndex, userInfoEndIndex); |
| - startIndex = userInfoEndIndex + 1; |
| + maybeUserInfo: { |
| + // Break this when we know user-info is done or not there. |
| + // user-info or reg-name |
| + if (current == _LEFT_BRACKET) break maybeUserInfo; |
| + while (_isRegNameChar(current)) { |
| + advance(); |
| + } |
| + if (current == _SLASH) { |
| + host = uri.substring(start, index); |
| + return; |
| + } |
| + if (current == _AT_SIGN) { |
| + userInfo = uri.substring(start, index); |
| + advance(); |
| + start = index; |
| + break maybeUserInfo; |
| + } |
| + if (current == _COLON) { |
| + // First colon seen after what might be a host name. |
| + // Can be either part of user-info or preceeding a port. |
| + int hostEnd = index; |
| + advance(); |
| + // user-info or port. |
| + if (_isDigit(current)) { |
| + int portVal = parsePort(); |
| + if (current == _SLASH || current == _EOI) { |
| + host = uri.substring(start, hostEnd); |
| + port = portVal; |
| + return; |
| + } |
| + } |
| + } |
| + if (current == _EOI) { |
| + host = uri.substring(start, index); |
| + return; |
| + } |
| + // A non-port character seen after a colon. |
| + // This must be user-info. |
| + while (_isUserInfoChar(current)) { |
| + advance(); |
| + } |
| + if (current != _AT_SIGN) { |
| + _fail(uri, index, "Expected @"); |
|
kevmoo
2014/06/10 21:29:08
quote '@'
|
| + } |
| + userInfo = uri.substring(start, index); |
| + advance(); |
| + start = index; |
| + } // end maybeUserInfo |
| + parseHost(); |
| + if (current == _COLON) { |
| + advance(); |
| + if (!_isDigit(current)) { |
| + _fail(uri, index, "Expected port number"); |
| + } |
| + port = parsePort(); |
| } |
| - if (portIndex > 0) { |
| - var portStr = uri.substring(portIndex, authorityEndIndex); |
| - try { |
| - port = int.parse(portStr); |
| - } catch (_) { |
| - throw new FormatException("Invalid port: '$portStr'"); |
| + } // end parseAuth(). |
| + |
| + // Start parsing. |
| + to_path: { // Break this block to go to parsing the path. |
| + advance(); |
| + if (_isAlpha(current)) { |
|
Anders Johnsen
2014/06/11 10:00:03
Do we need 'scheme != null' to prevent double-pars
Lasse Reichstein Nielsen
2014/06/12 08:07:31
No, there is no parsing before this point. The cod
|
| + // May be scheme or path. |
| + do { |
| + advance(); |
| + } while (_isSchemeCharacter(current)); |
| + if (current != _COLON) { |
| + break to_path; |
| + } |
| + allowColonInPath = true; |
| + scheme = uri.substring(0, index); |
| + advance(); |
| + start = index; |
| + } |
| + // Path or authority. |
| + if (current == _SLASH) { |
| + allowColonInPath = true; |
| + advance(); |
| + if (current == _SLASH) { |
| + advance(); |
| + parseAuth(); |
| + if (current == _EOI) { |
| + start = index; |
| + break to_path; |
| + } |
| + if (current != _SLASH) { |
| + _fail(uri, index, "Expected /"); |
|
kevmoo
2014/06/10 21:29:08
quote '/'
|
| + } |
| + start = index; |
| + advance(); |
| } |
| - host = uri.substring(startIndex, portIndex - 1); |
| - } else { |
| - host = uri.substring(startIndex, authorityEndIndex); |
| } |
| + } // end to_path. |
| + if (!allowColonInPath) { |
| + while (_isPathChar(current) || current == _PERCENT) { |
|
Anders Johnsen
2014/06/11 10:00:03
Will _isPathChar return true for '/' ?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
No.
|
| + if (current == _COLON) { |
| + _fail(uri, index, "Colon in path before first '/'"); |
| + } |
| + advance(); |
| + } |
| + } |
| + // Start or continue path. May be empty. |
| + while (_isPathChar(current) || current == _SLASH || current == _PERCENT) { |
| + advance(); |
| } |
| + path = uri.substring(start, index); |
| - var path = uri.substring(authorityEndIndex, pathEndIndex); |
| - var query = ""; |
| - if (pathEndIndex < queryEndIndex) { |
| - query = uri.substring(pathEndIndex + 1, queryEndIndex); |
| + if (current == _QUESTION) { |
| + start = nextIndex; |
| + do { |
| + advance(); |
| + } while (_isQueryChar(current) || current == _PERCENT); |
| + query = uri.substring(start, index); |
| } |
| - var fragment = ""; |
| - // If queryEndIndex is not at end (length), there is a fragment. |
| - if (queryEndIndex < length) { |
| - fragment = uri.substring(queryEndIndex + 1, length); |
| + |
| + if (current == _NUMBER_SIGN) { |
| + // Fragment can contain same characters as query. |
|
Søren Gjesse
2014/06/11 08:33:15
No number sign in fragment?
Lasse Reichstein Nielsen
2014/06/12 08:07:32
No. At most one # in any URL.
|
| + start = nextIndex; |
| + do { |
| + advance(); |
| + } while (_isQueryChar(current) || current == _PERCENT); |
| + fragment = uri.substring(start, index); |
| + } |
| + |
| + if (current != _EOI) { |
| + _fail(uri, index, "Unexpected character"); |
| } |
| return new Uri(scheme: scheme, |
|
Anders Johnsen
2014/06/11 10:00:03
This constructor does some extra validation. Do we
Lasse Reichstein Nielsen
2014/06/12 08:07:31
As pointed out elsewhere, the validation needs to
|
| userInfo: userInfo, |
| - host: host, |
| port: port, |
|
kevmoo
2014/06/10 21:29:08
nit: might as well keep these params in the origin
Lasse Reichstein Nielsen
2014/06/12 08:07:31
ACK.
|
| + host: host, |
| path: path, |
| query: query, |
| fragment: fragment); |
| } |
| + // Report a parse failure. |
| + static void _fail(uri, index, message) { |
|
kevmoo
2014/06/10 21:29:08
Add types to parameters
|
| + if (index == uri.length) { |
| + message += ": Unexpected end of input."; |
|
kevmoo
2014/06/10 21:29:08
Could this be changed so each message reads like a
|
| + } else { |
| + message += ": Unexpected character at position $index.\n"; |
|
kevmoo
2014/06/10 21:29:08
ditto for reading like a sentence from above.
|
| + int min = 0; |
| + int max = uri.length; |
| + String pre = ""; |
| + String post = ""; |
|
kevmoo
2014/06/10 21:29:08
Code comments for what you're doing here? Is this
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Yes, at most 78 characters of the source will be d
|
| + if (uri.length > 78) { |
| + min = index - 10; |
| + if (min < 0) min = 0; |
| + int max = min + 72; |
| + if (max > uri.length) { |
| + max = uri.length; |
| + min = max - 72; |
| + } |
| + if (min != 0) pre = "..."; |
| + if (max != uri.length) post = "..."; |
| + } |
| + message = "$message$pre${uri.substring(min, max)}$post\n" |
| + "${' ' * (pre.length + index - min)}^"; |
| + } |
| + throw new FormatException(message); |
| + } |
| + |
| + |
| /** |
| * Creates a new URI from its components. |
| * |
| @@ -934,6 +1016,22 @@ class Uri { |
| return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| } |
| + static bool _isPathChar(int ch) { |
| + return ch < 128 && ((_pathCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| + |
| + static bool _isRegNameChar(int ch) { |
| + return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| + |
| + static bool _isUserInfoChar(int ch) { |
| + return ch < 128 && ( |
| + ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0) || ch == _COLON); |
| + } |
| + |
| + static bool _isQueryChar(int ch) { |
| + return ch < 128 && ((_queryCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
| + } |
| /** |
| * Returns whether the URI is absolute. |
| @@ -1551,6 +1649,25 @@ class Uri { |
| static const int _LOWER_CASE_F = 0x66; |
| static const int _LOWER_CASE_Z = 0x7A; |
| static const int _BAR = 0x7C; |
| + static const int _MAX_VALID_CHAR = 0x7e; |
| + static const int _EOI = 0x10000; // Not a code unit. |
|
Anders Johnsen
2014/06/11 10:00:03
-1?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Breaks the _isPathChar function (or requires an ex
|
| + |
| + static bool _isAlpha(int char) { |
| + char |= 0x20; |
| + return _LOWER_CASE_A <= char && _LOWER_CASE_Z >= char; |
| + // TODO: Test: |
| + // return ((char - _a) & 0xffff) <= (_z - _a); |
| + } |
| + |
| + static bool _isDigit(int char) { |
| + return _NINE >= char && _ZERO <= char; |
| + } |
| + |
| + static bool _isHexDigit(int char) { |
| + if (_NINE >= char) return _ZERO <= char; |
| + char |= 0x20; |
| + return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char; |
| + } |
| /** |
| * This is the internal implementation of JavaScript's encodeURI function. |