Index: sdk/lib/core/uri.dart |
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart |
index 091e1fece45903402fde1890ed9698afe85b10e8..8d3f589c7602eb2703828493f66289e2ff904b72 100644 |
--- a/sdk/lib/core/uri.dart |
+++ b/sdk/lib/core/uri.dart |
@@ -172,174 +172,256 @@ class Uri { |
// query = *( pchar / "/" / "?" ) |
// |
// fragment = *( pchar / "/" / "?" ) |
- bool isRegName(int ch) { |
- return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
- } |
- |
- int ipV6Address(int index) { |
- // IPv6. Skip to ']'. |
- index = uri.indexOf(']', index); |
- if (index == -1) { |
- throw new FormatException("Bad end of IPv6 host"); |
- } |
- return index + 1; |
- } |
- |
- int length = uri.length; |
- int index = 0; |
- |
- int schemeEndIndex = 0; |
- |
- if (length == 0) { |
+ if (uri.isEmpty) { |
return new Uri(); |
} |
+ String scheme; |
+ String userInfo = ""; |
+ String host = ""; |
+ int port = 0; |
+ String path = ""; |
+ String query; |
+ String fragment; |
+ bool allowColonInPath = false; |
- if (uri.codeUnitAt(0) != _SLASH) { |
- // Can be scheme. |
- while (index < length) { |
- // Look for ':'. If found, continue from the post of ':'. If not (end |
- // reached or invalid scheme char found) back up one char, and continue |
- // to path. |
- // Note that scheme-chars is contained in path-chars. |
- int codeUnit = uri.codeUnitAt(index++); |
- if (!_isSchemeCharacter(codeUnit)) { |
- if (codeUnit == _COLON) { |
- schemeEndIndex = index; |
- } else { |
- // Back up one char, since we met an invalid scheme char. |
- index--; |
+ /// Index after current char. |
Søren Gjesse
2014/06/11 08:33:15
Why dart-doc comments here?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Because I assume the editor will display that comm
|
+ int index = 0; |
+ /// Current char. |
+ int current = _EOI; |
+ /// Start index of current section being parsed. |
+ int start = 0; |
Anders Johnsen
2014/06/11 10:00:03
Start is a bit ambiguous. currentSectionStart ?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
.... loooooong name.
But ok.
|
+ /// The position after the current character. If parsing a percent |
+ /// escape, this is index + 3, otherwise index + 1. |
+ int nextIndex = 0; |
+ |
+ void advance() { |
+ index = nextIndex; |
+ if (index < uri.length) { |
+ current = uri.codeUnitAt(index); |
+ if (_MAX_VALID_CHAR >= current) { |
+ if (_PERCENT != current) { |
+ nextIndex = nextIndex + 1; |
+ return; |
+ } |
+ if (index + 2 >= uri.length || |
+ !_isHexDigit(uri.codeUnitAt(index + 1)) || |
+ !_isHexDigit(uri.codeUnitAt(index + 2))) { |
+ _fail(uri, index, "Incomplete percent escape"); |
} |
- break; |
+ // Valid escape, returns _PERCENT as current. |
+ nextIndex = nextIndex + 3; |
+ return; |
} |
+ _fail(uri, index, "Unexpected character"); |
} |
- } |
- |
- int userInfoEndIndex = -1; |
- int portIndex = -1; |
- int authorityEndIndex = schemeEndIndex; |
- // If we see '//', there must be an authority. |
- if (authorityEndIndex == index && |
- authorityEndIndex + 1 < length && |
- uri.codeUnitAt(authorityEndIndex) == _SLASH && |
- uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { |
- // Skip '//'. |
- authorityEndIndex += 2; |
- // It can both be host and userInfo. |
- while (authorityEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
- if (!isRegName(codeUnit)) { |
- if (codeUnit == _LEFT_BRACKET) { |
- authorityEndIndex = ipV6Address(authorityEndIndex); |
- } else if (portIndex == -1 && codeUnit == _COLON) { |
- // First time ':'. |
- portIndex = authorityEndIndex; |
- } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { |
- // Second time ':' or first '@'. Must be userInfo. |
- userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1); |
- // Not found. Must be path then. |
- if (userInfoEndIndex == -1) { |
- authorityEndIndex = index; |
- break; |
- } |
- portIndex = -1; |
- authorityEndIndex = userInfoEndIndex + 1; |
- // Now it can only be host:port. |
- while (authorityEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
- if (!isRegName(codeUnit)) { |
- if (codeUnit == _LEFT_BRACKET) { |
- authorityEndIndex = ipV6Address(authorityEndIndex); |
- } else if (codeUnit == _COLON) { |
- if (portIndex != -1) { |
- throw new FormatException("Double port in host"); |
- } |
- portIndex = authorityEndIndex; |
- } else { |
- authorityEndIndex--; |
- break; |
- } |
- } |
- } |
- break; |
- } else { |
- authorityEndIndex--; |
- break; |
+ current = _EOI; |
+ } |
+ |
+ // Parse authority. |
+ void parseAuth() { |
+ start = index; |
+ void parseIpV6() { |
+ assert(current == _LEFT_BRACKET); |
+ assert(start == index); |
+ for (int i = index + 1; i < uri.length; i++) { |
+ if (uri.codeUnitAt(i) == _RIGHT_BRACKET) { |
+ nextIndex = i + 1; |
+ advance(); |
+ host = uri.substring(start, index); |
Søren Gjesse
2014/06/11 08:33:15
Are we doing proper IPv6 validation later, or is t
Lasse Reichstein Nielsen
2014/06/12 08:07:31
I believe we are doing it later.
I don't know if
|
+ return; |
} |
} |
+ _fail(uri, start, "Unmatched [ in host name"); |
kevmoo
2014/06/10 21:29:08
quote '['
|
} |
- } else { |
- authorityEndIndex = schemeEndIndex; |
- } |
- // At path now. |
- int pathEndIndex = authorityEndIndex; |
- while (pathEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(pathEndIndex++); |
- if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { |
- pathEndIndex--; |
- break; |
+ void parseHost() { |
+ assert(start == index); |
+ if (current == _LEFT_BRACKET) { |
+ parseIpV6(); |
+ return; |
+ } |
+ while (_isRegNameChar(current)) { |
+ advance(); |
+ } |
+ host = uri.substring(start, index); |
} |
- } |
- // Maybe query. |
- int queryEndIndex = pathEndIndex; |
- if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) { |
- while (queryEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(queryEndIndex++); |
- if (codeUnit == _NUMBER_SIGN) { |
- queryEndIndex--; |
- break; |
+ int parsePort() { |
+ assert(_isDigit(current)); |
+ int portVal = current - _ZERO; |
+ advance(); |
+ while (_isDigit(current)) { |
+ portVal = portVal * 10 + (current - _ZERO); |
Søren Gjesse
2014/06/11 08:33:15
Any limitations on the port value in the spec?
|
+ advance(); |
} |
+ return portVal; |
} |
- } |
- |
- var scheme = null; |
- if (schemeEndIndex > 0) { |
- scheme = uri.substring(0, schemeEndIndex - 1); |
- } |
- var host = ""; |
- var userInfo = ""; |
- var port = 0; |
- if (schemeEndIndex != authorityEndIndex) { |
- int startIndex = schemeEndIndex + 2; |
- if (userInfoEndIndex > 0) { |
- userInfo = uri.substring(startIndex, userInfoEndIndex); |
- startIndex = userInfoEndIndex + 1; |
+ maybeUserInfo: { |
+ // Break this when we know user-info is done or not there. |
+ // user-info or reg-name |
+ if (current == _LEFT_BRACKET) break maybeUserInfo; |
+ while (_isRegNameChar(current)) { |
+ advance(); |
+ } |
+ if (current == _SLASH) { |
+ host = uri.substring(start, index); |
+ return; |
+ } |
+ if (current == _AT_SIGN) { |
+ userInfo = uri.substring(start, index); |
+ advance(); |
+ start = index; |
+ break maybeUserInfo; |
+ } |
+ if (current == _COLON) { |
+ // First colon seen after what might be a host name. |
+ // Can be either part of user-info or preceeding a port. |
+ int hostEnd = index; |
+ advance(); |
+ // user-info or port. |
+ if (_isDigit(current)) { |
+ int portVal = parsePort(); |
+ if (current == _SLASH || current == _EOI) { |
+ host = uri.substring(start, hostEnd); |
+ port = portVal; |
+ return; |
+ } |
+ } |
+ } |
+ if (current == _EOI) { |
+ host = uri.substring(start, index); |
+ return; |
+ } |
+ // A non-port character seen after a colon. |
+ // This must be user-info. |
+ while (_isUserInfoChar(current)) { |
+ advance(); |
+ } |
+ if (current != _AT_SIGN) { |
+ _fail(uri, index, "Expected @"); |
kevmoo
2014/06/10 21:29:08
quote '@'
|
+ } |
+ userInfo = uri.substring(start, index); |
+ advance(); |
+ start = index; |
+ } // end maybeUserInfo |
+ parseHost(); |
+ if (current == _COLON) { |
+ advance(); |
+ if (!_isDigit(current)) { |
+ _fail(uri, index, "Expected port number"); |
+ } |
+ port = parsePort(); |
} |
- if (portIndex > 0) { |
- var portStr = uri.substring(portIndex, authorityEndIndex); |
- try { |
- port = int.parse(portStr); |
- } catch (_) { |
- throw new FormatException("Invalid port: '$portStr'"); |
+ } // end parseAuth(). |
+ |
+ // Start parsing. |
+ to_path: { // Break this block to go to parsing the path. |
+ advance(); |
+ if (_isAlpha(current)) { |
Anders Johnsen
2014/06/11 10:00:03
Do we need 'scheme != null' to prevent double-pars
Lasse Reichstein Nielsen
2014/06/12 08:07:31
No, there is no parsing before this point. The cod
|
+ // May be scheme or path. |
+ do { |
+ advance(); |
+ } while (_isSchemeCharacter(current)); |
+ if (current != _COLON) { |
+ break to_path; |
+ } |
+ allowColonInPath = true; |
+ scheme = uri.substring(0, index); |
+ advance(); |
+ start = index; |
+ } |
+ // Path or authority. |
+ if (current == _SLASH) { |
+ allowColonInPath = true; |
+ advance(); |
+ if (current == _SLASH) { |
+ advance(); |
+ parseAuth(); |
+ if (current == _EOI) { |
+ start = index; |
+ break to_path; |
+ } |
+ if (current != _SLASH) { |
+ _fail(uri, index, "Expected /"); |
kevmoo
2014/06/10 21:29:08
quote '/'
|
+ } |
+ start = index; |
+ advance(); |
} |
- host = uri.substring(startIndex, portIndex - 1); |
- } else { |
- host = uri.substring(startIndex, authorityEndIndex); |
} |
+ } // end to_path. |
+ if (!allowColonInPath) { |
+ while (_isPathChar(current) || current == _PERCENT) { |
Anders Johnsen
2014/06/11 10:00:03
Will _isPathChar return true for '/' ?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
No.
|
+ if (current == _COLON) { |
+ _fail(uri, index, "Colon in path before first '/'"); |
+ } |
+ advance(); |
+ } |
+ } |
+ // Start or continue path. May be empty. |
+ while (_isPathChar(current) || current == _SLASH || current == _PERCENT) { |
+ advance(); |
} |
+ path = uri.substring(start, index); |
- var path = uri.substring(authorityEndIndex, pathEndIndex); |
- var query = ""; |
- if (pathEndIndex < queryEndIndex) { |
- query = uri.substring(pathEndIndex + 1, queryEndIndex); |
+ if (current == _QUESTION) { |
+ start = nextIndex; |
+ do { |
+ advance(); |
+ } while (_isQueryChar(current) || current == _PERCENT); |
+ query = uri.substring(start, index); |
} |
- var fragment = ""; |
- // If queryEndIndex is not at end (length), there is a fragment. |
- if (queryEndIndex < length) { |
- fragment = uri.substring(queryEndIndex + 1, length); |
+ |
+ if (current == _NUMBER_SIGN) { |
+ // Fragment can contain same characters as query. |
Søren Gjesse
2014/06/11 08:33:15
No number sign in fragment?
Lasse Reichstein Nielsen
2014/06/12 08:07:32
No. At most one # in any URL.
|
+ start = nextIndex; |
+ do { |
+ advance(); |
+ } while (_isQueryChar(current) || current == _PERCENT); |
+ fragment = uri.substring(start, index); |
+ } |
+ |
+ if (current != _EOI) { |
+ _fail(uri, index, "Unexpected character"); |
} |
return new Uri(scheme: scheme, |
Anders Johnsen
2014/06/11 10:00:03
This constructor does some extra validation. Do we
Lasse Reichstein Nielsen
2014/06/12 08:07:31
As pointed out elsewhere, the validation needs to
|
userInfo: userInfo, |
- host: host, |
port: port, |
kevmoo
2014/06/10 21:29:08
nit: might as well keep these params in the origin
Lasse Reichstein Nielsen
2014/06/12 08:07:31
ACK.
|
+ host: host, |
path: path, |
query: query, |
fragment: fragment); |
} |
+ // Report a parse failure. |
+ static void _fail(uri, index, message) { |
kevmoo
2014/06/10 21:29:08
Add types to parameters
|
+ if (index == uri.length) { |
+ message += ": Unexpected end of input."; |
kevmoo
2014/06/10 21:29:08
Could this be changed so each message reads like a
|
+ } else { |
+ message += ": Unexpected character at position $index.\n"; |
kevmoo
2014/06/10 21:29:08
ditto for reading like a sentence from above.
|
+ int min = 0; |
+ int max = uri.length; |
+ String pre = ""; |
+ String post = ""; |
kevmoo
2014/06/10 21:29:08
Code comments for what you're doing here? Is this
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Yes, at most 78 characters of the source will be d
|
+ if (uri.length > 78) { |
+ min = index - 10; |
+ if (min < 0) min = 0; |
+ int max = min + 72; |
+ if (max > uri.length) { |
+ max = uri.length; |
+ min = max - 72; |
+ } |
+ if (min != 0) pre = "..."; |
+ if (max != uri.length) post = "..."; |
+ } |
+ message = "$message$pre${uri.substring(min, max)}$post\n" |
+ "${' ' * (pre.length + index - min)}^"; |
+ } |
+ throw new FormatException(message); |
+ } |
+ |
+ |
/** |
* Creates a new URI from its components. |
* |
@@ -934,6 +1016,22 @@ class Uri { |
return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
} |
+ static bool _isPathChar(int ch) { |
+ return ch < 128 && ((_pathCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ } |
+ |
+ static bool _isRegNameChar(int ch) { |
+ return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ } |
+ |
+ static bool _isUserInfoChar(int ch) { |
+ return ch < 128 && ( |
+ ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0) || ch == _COLON); |
+ } |
+ |
+ static bool _isQueryChar(int ch) { |
+ return ch < 128 && ((_queryCharTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ } |
/** |
* Returns whether the URI is absolute. |
@@ -1551,6 +1649,25 @@ class Uri { |
static const int _LOWER_CASE_F = 0x66; |
static const int _LOWER_CASE_Z = 0x7A; |
static const int _BAR = 0x7C; |
+ static const int _MAX_VALID_CHAR = 0x7e; |
+ static const int _EOI = 0x10000; // Not a code unit. |
Anders Johnsen
2014/06/11 10:00:03
-1?
Lasse Reichstein Nielsen
2014/06/12 08:07:31
Breaks the _isPathChar function (or requires an ex
|
+ |
+ static bool _isAlpha(int char) { |
+ char |= 0x20; |
+ return _LOWER_CASE_A <= char && _LOWER_CASE_Z >= char; |
+ // TODO: Test: |
+ // return ((char - _a) & 0xffff) <= (_z - _a); |
+ } |
+ |
+ static bool _isDigit(int char) { |
+ return _NINE >= char && _ZERO <= char; |
+ } |
+ |
+ static bool _isHexDigit(int char) { |
+ if (_NINE >= char) return _ZERO <= char; |
+ char |= 0x20; |
+ return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char; |
+ } |
/** |
* This is the internal implementation of JavaScript's encodeURI function. |