Index: sdk/lib/core/uri.dart |
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart |
index bdaf08639b200222f667b76c3a888f3678393f99..313512cdd65faa0537963faeea685b6e421a004c 100644 |
--- a/sdk/lib/core/uri.dart |
+++ b/sdk/lib/core/uri.dart |
@@ -175,171 +175,233 @@ class Uri { |
bool isRegName(int ch) { |
return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
} |
+ const int EOI = -1; |
- int ipV6Address(int index) { |
- // IPv6. Skip to ']'. |
- index = uri.indexOf(']', index); |
- if (index == -1) { |
- throw new FormatException("Bad end of IPv6 host"); |
- } |
- return index + 1; |
- } |
+ String scheme = ""; |
+ String path; |
+ String userinfo = ""; |
+ String host = ""; |
+ int port = 0; |
+ String query = ""; |
+ String fragment = ""; |
- int length = uri.length; |
int index = 0; |
- |
- int schemeEndIndex = 0; |
- |
- if (length == 0) { |
- return new Uri(); |
- } |
- |
- if (uri.codeUnitAt(0) != _SLASH) { |
- // Can be scheme. |
- while (index < length) { |
- // Look for ':'. If found, continue from the post of ':'. If not (end |
- // reached or invalid scheme char found) back up one char, and continue |
- // to path. |
- // Note that scheme-chars is contained in path-chars. |
- int codeUnit = uri.codeUnitAt(index++); |
- if (!_isSchemeCharacter(codeUnit)) { |
- if (codeUnit == _COLON) { |
- schemeEndIndex = index; |
- } else { |
- // Back up one char, since we met an invalid scheme char. |
- index--; |
- } |
+ int pathStart = 0; |
+ // End of input-marker. |
+ int char = EOI; |
+ |
+ void parseAuth() { |
+ if (index == uri.length) { |
+ char = EOI; |
+ return; |
+ } |
+ int authStart = index; |
+ int lastColon = -1; |
+ int lastAt = -1; |
+ char = uri.codeUnitAt(index); |
+ while (index < uri.length) { |
+ char = uri.codeUnitAt(index); |
+ if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) { |
break; |
} |
- } |
- } |
- |
- int userInfoEndIndex = -1; |
- int portIndex = -1; |
- int authorityEndIndex = schemeEndIndex; |
- // If we see '//', there must be an authority. |
- if (authorityEndIndex == index && |
- authorityEndIndex + 1 < length && |
- uri.codeUnitAt(authorityEndIndex) == _SLASH && |
- uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) { |
- // Skip '//'. |
- authorityEndIndex += 2; |
- // It can both be host and userInfo. |
- while (authorityEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
- if (!isRegName(codeUnit)) { |
- if (codeUnit == _LEFT_BRACKET) { |
- authorityEndIndex = ipV6Address(authorityEndIndex); |
- } else if (portIndex == -1 && codeUnit == _COLON) { |
- // First time ':'. |
- portIndex = authorityEndIndex; |
- } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) { |
- // Second time ':' or first '@'. Must be userInfo. |
- userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1); |
- // Not found. Must be path then. |
- if (userInfoEndIndex == -1) { |
- authorityEndIndex = index; |
- break; |
- } |
- portIndex = -1; |
- authorityEndIndex = userInfoEndIndex + 1; |
- // Now it can only be host:port. |
- while (authorityEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(authorityEndIndex++); |
- if (!isRegName(codeUnit)) { |
- if (codeUnit == _LEFT_BRACKET) { |
- authorityEndIndex = ipV6Address(authorityEndIndex); |
- } else if (codeUnit == _COLON) { |
- if (portIndex != -1) { |
- throw new FormatException("Double port in host"); |
- } |
- portIndex = authorityEndIndex; |
- } else { |
- authorityEndIndex--; |
- break; |
- } |
- } |
- } |
+ if (char == _AT_SIGN) { |
Søren Gjesse
2014/06/19 07:39:43
So you can have @ and : in the username?
Lasse Reichstein Nielsen
2014/06/19 08:44:58
You can have :, which you always could.
You can't
|
+ lastAt = index; |
+ lastColon = -1; |
+ } else if (char == _COLON) { |
+ lastColon = index; |
+ } else if (char == _LEFT_BRACKET) { |
+ lastColon = -1; |
+ int endBracket = uri.indexOf(']', index + 1); |
+ if (endBracket == -1) { |
+ index = uri.length; |
+ char = EOI; |
break; |
} else { |
- authorityEndIndex--; |
- break; |
+ index = endBracket; |
} |
} |
+ index++; |
+ char = EOI; |
+ } |
+ int hostStart = authStart; |
+ int hostEnd = index; |
+ if (lastAt >= 0) { |
+ userinfo = _makeUserInfo(uri, authStart, lastAt); |
+ hostStart = lastAt + 1; |
+ } |
+ if (lastColon >= 0) { |
+ if (lastColon + 1 == index) { |
+ _fail(uri, index, "Invalid port number"); |
+ } |
+ int portNumber = 0; |
+ for (int i = lastColon + 1; i < index; i++) { |
+ int digit = uri.codeUnitAt(i); |
+ if (_ZERO > digit || _NINE < digit) { |
+ _fail(uri, i, "Invalid port number"); |
+ } |
+ portNumber = portNumber * 10 + (digit - _ZERO); |
+ } |
+ port = _makePort(portNumber, scheme); |
+ hostEnd = lastColon; |
+ } |
+ host = _makeHost(uri, hostStart, hostEnd); |
+ if (index < uri.length) { |
+ char = uri.codeUnitAt(index); |
} |
- } else { |
- authorityEndIndex = schemeEndIndex; |
} |
- // At path now. |
- int pathEndIndex = authorityEndIndex; |
- while (pathEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(pathEndIndex++); |
- if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) { |
- pathEndIndex--; |
+ // When reaching authority parsing, authority is possible. |
+ // This is only true at start or right after scheme. |
+ const int STATE_AUTH_OPT = 1; |
+ // When reaching path parsing, the current character is part |
+ // of the path. |
+ const int STATE_PATH = 2; |
+ // When reaching path parsing, the current character is known to not |
+ // be part of the path. |
+ const int STATE_PATH_END = 3; |
+ |
+ // Current state. |
+ // Initialized to the default value that is used when exiting the |
+ // scheme loop by reaching the end of input. |
+ // All other breaks set their own state. |
+ int state = STATE_PATH_END; |
Søren Gjesse
2014/06/19 07:39:43
It is somewhat confusing that this state value doe
Lasse Reichstein Nielsen
2014/06/19 08:44:58
I only set the state on a break. This is the state
|
+ while (index < uri.length) { |
+ char = uri.codeUnitAt(index); |
+ if (char == _QUESTION || char == _NUMBER_SIGN) { |
+ state = STATE_PATH_END; |
break; |
} |
+ if (char == _SLASH) { |
+ if (index == 0) { |
Søren Gjesse
2014/06/19 07:39:43
Conditional expression?
Lasse Reichstein Nielsen
2014/06/19 08:44:58
Ok.
I don't really like them, but I guess it's ok
|
+ state = STATE_AUTH_OPT; |
+ } else { |
+ state = STATE_PATH; |
+ } |
+ break; |
+ } |
+ if (char == _COLON) { |
+ if (index == 0) _fail(uri, 0, "Invalid empty scheme"); |
+ scheme = _makeScheme(uri, index); |
+ index++; |
+ pathStart = index; |
+ if (index == uri.length) { |
+ char = EOI; |
+ state = STATE_PATH_END; |
+ } else { |
+ char = uri.codeUnitAt(index); |
+ if (char == _QUESTION || char == _NUMBER_SIGN) { |
+ state = STATE_PATH_END; |
+ } else if (char == _SLASH) { |
+ state = STATE_AUTH_OPT; |
+ } else { |
+ state = STATE_PATH; |
+ } |
+ } |
+ break; |
+ } |
+ index++; |
+ char = EOI; |
+ } |
+ if (state == STATE_AUTH_OPT) { |
+ state = STATE_PATH; // Default value when leaving authority. |
+ // Have seen one slash either at start or right after scheme. |
+ // If two slashes, it's an authority, otherwise it's just the path. |
+ assert(char == _SLASH); |
+ index++; |
+ if (index == uri.length) { |
+ char = EOI; |
+ state = STATE_PATH_END; |
+ } else { |
+ char = uri.codeUnitAt(index); |
+ if (char == _QUESTION || char == _NUMBER_SIGN) { |
+ state = STATE_PATH_END; |
+ } else if (char == _SLASH) { |
+ index++; |
+ parseAuth(); |
+ pathStart = index; |
+ if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) { |
+ state = STATE_PATH_END; |
+ } |
+ } |
+ } |
} |
- |
- // Maybe query. |
- int queryEndIndex = pathEndIndex; |
- if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) { |
- while (queryEndIndex < length) { |
- int codeUnit = uri.codeUnitAt(queryEndIndex++); |
- if (codeUnit == _NUMBER_SIGN) { |
- queryEndIndex--; |
+ if (state == STATE_PATH) { |
+ // Characters from pathStart to index (inclusive) are known |
+ // to be part of the path. |
+ while (++index < uri.length) { |
+ char = uri.codeUnitAt(index); |
+ if (char == _QUESTION || char == _NUMBER_SIGN) { |
+ state = STATE_PATH_END; |
break; |
} |
+ char = EOI; |
} |
} |
+ assert(state == STATE_PATH_END); |
+ bool ensureLeadingSlash = (host != "" || scheme == "file"); |
+ path = _makePath(uri, pathStart, index, null, ensureLeadingSlash); |
- var scheme = null; |
- if (schemeEndIndex > 0) { |
- scheme = uri.substring(0, schemeEndIndex - 1); |
- } |
- |
- var host = ""; |
- var userInfo = ""; |
- var port = 0; |
- if (schemeEndIndex != authorityEndIndex) { |
- int startIndex = schemeEndIndex + 2; |
- if (userInfoEndIndex > 0) { |
- userInfo = uri.substring(startIndex, userInfoEndIndex); |
- startIndex = userInfoEndIndex + 1; |
- } |
- if (portIndex > 0) { |
- var portStr = uri.substring(portIndex, authorityEndIndex); |
- try { |
- port = int.parse(portStr); |
- } catch (_) { |
- throw new FormatException("Invalid port: '$portStr'"); |
- } |
- host = uri.substring(startIndex, portIndex - 1); |
+ if (char == _QUESTION) { |
+ int numberSignIndex = uri.indexOf('#', index + 1); |
+ if (numberSignIndex < 0) { |
+ query = _makeQuery(uri, index + 1, uri.length, null); |
} else { |
- host = uri.substring(startIndex, authorityEndIndex); |
+ query = _makeQuery(uri, index + 1, numberSignIndex, null); |
+ fragment = _makeFragment(uri, numberSignIndex + 1, uri.length); |
} |
- } |
+ } else if (char == _NUMBER_SIGN) { |
+ fragment = _makeFragment(uri, index + 1, uri.length); |
+ } |
+ return new Uri._internal(scheme, |
+ userinfo, |
+ host, |
+ port, |
+ path, |
+ query, |
+ fragment); |
+ } |
- var path = uri.substring(authorityEndIndex, pathEndIndex); |
- var query = ""; |
- if (pathEndIndex < queryEndIndex) { |
- query = uri.substring(pathEndIndex + 1, queryEndIndex); |
- } |
- var fragment = ""; |
- // If queryEndIndex is not at end (length), there is a fragment. |
- if (queryEndIndex < length) { |
- fragment = uri.substring(queryEndIndex + 1, length); |
+ // Report a parse failure. |
+ static void _fail(String uri, int index, String message) { |
+ // TODO(lrn): Consider adding this to FormatException. |
+ if (index == uri.length) { |
+ message += " at end of input."; |
+ } else { |
+ message += " at position $index.\n"; |
+ // Pick a slice of uri containing index and, if |
+ // necessary, truncate the ends to ensure the entire |
+ // slice fits on one line. |
+ int min = 0; |
+ int max = uri.length; |
+ String pre = ""; |
+ String post = ""; |
+ if (uri.length > 78) { |
+ min = index - 10; |
+ if (min < 0) min = 0; |
+ int max = min + 72; |
+ if (max > uri.length) { |
+ max = uri.length; |
+ min = max - 72; |
+ } |
+ if (min != 0) pre = "..."; |
+ if (max != uri.length) post = "..."; |
+ } |
+ // Combine message, slice and a caret pointing to the error index. |
+ message = "$message$pre${uri.substring(min, max)}$post\n" |
+ "${' ' * (pre.length + index - min)}^"; |
} |
- |
- return new Uri(scheme: scheme, |
- userInfo: userInfo, |
- host: host, |
- port: port, |
- path: path, |
- query: query, |
- fragment: fragment); |
+ throw new FormatException(message); |
} |
+ /// Internal non-verifying constructor. Only call with validated arguments. |
+ Uri._internal(this.scheme, |
+ this.userInfo, |
+ this._host, |
+ this._port, |
+ this._path, |
+ this.query, |
+ this.fragment); |
+ |
/** |
* Creates a new URI from its components. |
* |
@@ -386,29 +448,27 @@ class Uri { |
* |
* The fragment component is set through [fragment]. |
*/ |
- Uri({String scheme, |
- this.userInfo: "", |
+ factory Uri({String scheme, |
+ String userInfo: "", |
String host: "", |
port: 0, |
String path, |
Iterable<String> pathSegments, |
String query, |
Map<String, String> queryParameters, |
- fragment: ""}) : |
- scheme = _makeScheme(scheme), |
- _host = _makeHost(host), |
- query = _makeQuery(query, queryParameters), |
- fragment = _makeFragment(fragment) { |
- // Perform scheme specific normalization. |
- if (scheme == "http" && port == 80) { |
- _port = 0; |
- } else if (scheme == "https" && port == 443) { |
- _port = 0; |
- } else { |
- _port = port; |
- } |
- // Fill the path. |
- _path = _makePath(path, pathSegments); |
+ fragment: ""}) { |
+ scheme = _makeScheme(scheme, _stringOrNullLength(scheme)); |
+ userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo)); |
+ host = _makeHost(host, 0, _stringOrNullLength(host)); |
+ query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters); |
+ fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment)); |
+ port = _makePort(port, scheme); |
+ bool ensureLeadingSlash = (host != "" || scheme == "file"); |
+ path = _makePath(path, 0, _stringOrNullLength(path), pathSegments, |
+ ensureLeadingSlash); |
+ |
+ return new Uri._internal(scheme, userInfo, host, port, |
+ path, query, fragment); |
} |
/** |
@@ -489,7 +549,7 @@ class Uri { |
if (hostEnd == authority.length) { |
throw new FormatException("Invalid IPv6 host entry."); |
} |
- parseIPv6Address(authority.substring(hostStart + 1, hostEnd)); |
+ parseIPv6Address(authority, hostStart + 1, hostEnd); |
hostEnd++; // Skip the closing bracket. |
if (hostEnd != authority.length && |
authority.codeUnitAt(hostEnd) != _COLON) { |
@@ -765,75 +825,177 @@ class Uri { |
return _queryParameters; |
} |
- static String _makeHost(String host) { |
- if (host == null || host.isEmpty) return host; |
- if (host.codeUnitAt(0) == _LEFT_BRACKET) { |
- if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) { |
- throw new FormatException('Missing end `]` to match `[` in host'); |
+ static int _makePort(int port, String scheme) { |
+ // Perform scheme specific normalization. |
+ if (port == 80 && scheme == "http") { |
+ return 0; |
+ } |
+ if (port == 443 && scheme == "https") { |
+ return 0; |
+ } |
+ return port; |
+ } |
+ |
+ static String _makeHost(String host, int start, int end) { |
+ if (host == null) return null; |
+ if (start == end) return ""; |
+ // Host is an IPv6 address if it starts with '[' or contains a colon. |
+ if (host.codeUnitAt(start) == _LEFT_BRACKET) { |
+ if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) { |
+ _fail(host, start, 'Missing end `]` to match `[` in host'); |
} |
- parseIPv6Address(host.substring(1, host.length - 1)); |
- return host; |
+ parseIPv6Address(host, start + 1, end - 1); |
+ return host.substring(start, end); |
} |
- for (int i = 0; i < host.length; i++) { |
+ // TODO(lrn): skip if too short to be a valid IPv6 address. |
+ for (int i = start; i < end; i++) { |
if (host.codeUnitAt(i) == _COLON) { |
- parseIPv6Address(host); |
+ parseIPv6Address(host, start, end); |
return '[$host]'; |
} |
} |
- return host; |
+ return _normalizeRegName(host, start, end); |
+ } |
+ |
+ static bool _isRegNameChar(int char) { |
+ return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0; |
} |
- static String _makeScheme(String scheme) { |
- bool isSchemeLowerCharacter(int ch) { |
- return ch < 128 && |
- ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ /** |
+ * Validates and does case- and percent-encoding normalization. |
+ * |
+ * The [host] must be an RFC3986 "reg-name". It is converted |
+ * to lower case, and percent escapes are converted to either |
+ * lower case unreserved characters or upper case escapes. |
+ */ |
+ static String _normalizeRegName(String host, int start, int end) { |
+ StringBuffer buffer; |
+ int sectionStart = start; |
+ int index = start; |
+ // Whether all characters between sectionStart and index are normalized, |
+ bool isNormalized = true; |
+ |
+ while (index < end) { |
+ int char = host.codeUnitAt(index); |
+ if (char == _PERCENT) { |
+ // The _regNameTable contains "%", so we check that first. |
+ String replacement = _normalizeEscape(host, index, true); |
+ if (replacement == null && isNormalized) { |
+ index += 3; |
+ continue; |
+ } |
+ if (buffer == null) buffer = new StringBuffer(); |
+ String slice = host.substring(sectionStart, index); |
+ if (!isNormalized) slice = slice.toLowerCase(); |
+ buffer.write(slice); |
+ int sourceLength = 3; |
+ if (replacement == null) { |
+ replacement = host.substring(index, index + 3); |
+ } else if (replacement == "%") { |
+ replacement = "%25"; |
+ sourceLength = 1; |
+ } |
+ buffer.write(replacement); |
+ index += sourceLength; |
+ sectionStart = index; |
+ isNormalized = true; |
+ } else if (_isRegNameChar(char)) { |
+ if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) { |
+ // Put initial slice in buffer and continue in non-normalized mode |
+ if (buffer == null) buffer = new StringBuffer(); |
+ if (sectionStart < index) { |
+ buffer.write(host.substring(sectionStart, index)); |
+ sectionStart = index; |
+ } |
+ isNormalized = false; |
+ } |
+ index++; |
+ } else if (_isGeneralDelimiter(char)) { |
+ _fail(host, index, "Invalid character"); |
+ } else { |
+ int sourceLength = 1; |
+ if ((char & 0xFC00) == 0xD800 && (index + 1) < end) { |
+ int tail = host.codeUnitAt(index + 1); |
+ if ((tail & 0xFC00) == 0xDC00) { |
+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff); |
+ sourceLength = 2; |
+ } |
+ } |
+ if (buffer == null) buffer = new StringBuffer(); |
+ String slice = host.substring(sectionStart, index); |
+ if (!isNormalized) slice = slice.toLowerCase(); |
+ buffer.write(slice); |
+ buffer.write(_escapeChar(char)); |
+ index += sourceLength; |
+ sectionStart = index; |
+ } |
+ } |
+ if (buffer == null) return host.substring(start, end); |
+ if (sectionStart < end) { |
+ String slice = host.substring(sectionStart, end); |
+ if (!isNormalized) slice = slice.toLowerCase(); |
+ buffer.write(slice); |
} |
+ return buffer.toString(); |
+ } |
- if (scheme == null) return ""; |
- bool allLowercase = true; |
- int length = scheme.length; |
- for (int i = 0; i < length; i++) { |
+ /** |
+ * Validates scheme characters and does case-normalization. |
+ * |
+ * Schemes are converted to lower case. They cannot contain escapes. |
+ */ |
+ static String _makeScheme(String scheme, int end) { |
+ if (end == 0) return ""; |
+ int char = scheme.codeUnitAt(0); |
+ if (!_isAlphabeticCharacter(char)) { |
+ _fail(scheme, 0, "Scheme not starting with alphabetic character"); |
+ } |
+ bool allLowercase = char >= _LOWER_CASE_A; |
+ for (int i = 0; i < end; i++) { |
int codeUnit = scheme.codeUnitAt(i); |
- if (i == 0 && !_isAlphabeticCharacter(codeUnit)) { |
- // First code unit must be an alphabetic character. |
- throw new ArgumentError('Illegal scheme: $scheme'); |
+ if (!_isSchemeCharacter(codeUnit)) { |
+ _fail(scheme, i, "Illegal scheme character"); |
} |
- if (!isSchemeLowerCharacter(codeUnit)) { |
- if (_isSchemeCharacter(codeUnit)) { |
- allLowercase = false; |
- } else { |
- throw new ArgumentError('Illegal scheme: $scheme'); |
- } |
+ if (_LOWER_CASE_A <= char && _LOWER_CASE_Z >= char) { |
+ allLowercase = false; |
} |
} |
+ scheme = scheme.substring(0, end); |
+ if (!allLowercase) scheme = scheme.toLowerCase(); |
+ return scheme; |
+ } |
- return allLowercase ? scheme : scheme.toLowerCase(); |
+ static String _makeUserInfo(String userInfo, int start, int end) { |
+ if (userInfo == null) return "null"; |
+ return _normalize(userInfo, start, end, _userinfoTable); |
} |
- String _makePath(String path, Iterable<String> pathSegments) { |
+ static String _makePath(String path, int start, int end, |
+ Iterable<String> pathSegments, |
+ bool ensureLeadingSlash) { |
if (path == null && pathSegments == null) return ""; |
if (path != null && pathSegments != null) { |
throw new ArgumentError('Both path and pathSegments specified'); |
} |
var result; |
if (path != null) { |
- result = _normalize(path); |
+ result = _normalize(path, start, end, _pathCharOrSlashTable); |
} else { |
result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/"); |
} |
- if ((hasAuthority || (scheme == "file")) && |
- result.isNotEmpty && !result.startsWith("/")) { |
+ if (ensureLeadingSlash && result.isNotEmpty && !result.startsWith("/")) { |
return "/$result"; |
} |
return result; |
} |
- static String _makeQuery(String query, Map<String, String> queryParameters) { |
+ static String _makeQuery(String query, int start, int end, |
+ Map<String, String> queryParameters) { |
if (query == null && queryParameters == null) return ""; |
if (query != null && queryParameters != null) { |
throw new ArgumentError('Both query and queryParameters specified'); |
} |
- if (query != null) return _normalize(query); |
+ if (query != null) return _normalize(query, start, end, _queryCharTable); |
var result = new StringBuffer(); |
var first = true; |
@@ -851,123 +1013,183 @@ class Uri { |
return result.toString(); |
} |
- static String _makeFragment(String fragment) { |
+ static String _makeFragment(String fragment, int start, int end) { |
if (fragment == null) return ""; |
- return _normalize(fragment); |
+ return _normalize(fragment, start, end, _queryCharTable); |
} |
- static String _normalize(String component) { |
- int index = component.indexOf('%'); |
- if (index < 0) return component; |
- |
- bool isNormalizedHexDigit(int digit) { |
- return (_ZERO <= digit && digit <= _NINE) || |
- (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F); |
- } |
+ static int _stringOrNullLength(String s) => (s == null) ? 0 : s.length; |
- bool isLowerCaseHexDigit(int digit) { |
- return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F; |
- } |
+ static bool _isHexDigit(int char) { |
+ if (_NINE >= char) return _ZERO <= char; |
+ char |= 0x20; |
+ return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char; |
+ } |
- bool isUnreserved(int ch) { |
- return ch < 128 && |
- ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
- } |
+ static int _hexValue(int char) { |
+ assert(_isHexDigit(char)); |
+ if (_NINE >= char) return char - _ZERO; |
+ char |= 0x20; |
+ return char - (_LOWER_CASE_A - 10); |
+ } |
- int normalizeHexDigit(int index) { |
- var codeUnit = component.codeUnitAt(index); |
- if (isLowerCaseHexDigit(codeUnit)) { |
- return codeUnit - 0x20; |
- } else if (!isNormalizedHexDigit(codeUnit)) { |
- throw new ArgumentError("Invalid URI component: $component"); |
- } else { |
- return codeUnit; |
+ /** |
+ * Performs RFC 3986 Percent-Encoding Normalization. |
+ * |
+ * Returns a replacement string that should be replace the original escape. |
+ * Returns null if no replacement is necessary because the escape is |
+ * not for an unreserved character and is already non-lower-case. |
+ * |
+ * Returns "%" if the escape is invalid (not two valid hex digits following |
+ * the percent sign). The calling code should replace the percent |
+ * sign with "%25", but leave the following two characters unmodified. |
+ * |
+ * If [lowerCase] is true, a single character returned is always lower case, |
+ */ |
+ static String _normalizeEscape(String source, int index, bool lowerCase) { |
+ assert(source.codeUnitAt(index) == _PERCENT); |
+ if (index + 2 >= source.length) { |
+ return "%"; // Marks the escape as invalid. |
+ } |
+ int firstDigit = source.codeUnitAt(index + 1); |
+ int secondDigit = source.codeUnitAt(index + 2); |
+ if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) { |
+ return "%"; // Marks the escape as invalid. |
+ } |
+ int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit); |
+ if (_isUnreservedChar(value)) { |
+ if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) { |
+ value |= 0x20; |
} |
+ return new String.fromCharCode(value); |
} |
- |
- int decodeHexDigitPair(int index) { |
- int byte = 0; |
- for (int i = 0; i < 2; i++) { |
- var codeUnit = component.codeUnitAt(index + i); |
- if (_ZERO <= codeUnit && codeUnit <= _NINE) { |
- byte = byte * 16 + codeUnit - _ZERO; |
- } else { |
- // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66). |
- codeUnit |= 0x20; |
- if (_LOWER_CASE_A <= codeUnit && |
- codeUnit <= _LOWER_CASE_F) { |
- byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10; |
- } else { |
- throw new ArgumentError( |
- "Invalid percent-encoding in URI component: $component"); |
- } |
- } |
- } |
- return byte; |
+ if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) { |
+ // Either digit is lower case. |
+ return source.substring(index, index + 3).toUpperCase(); |
} |
+ // Escape is retained, and is already non-lower case, so return null to |
+ // represent "no replacement necessary". |
+ return null; |
+ } |
- // Start building the normalized component string. |
- StringBuffer result; |
- int length = component.length; |
- int prevIndex = 0; |
+ static bool _isUnreservedChar(int ch) { |
+ return ch < 127 && |
+ ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ } |
- // Copy a part of the component string to the result. |
- void fillResult() { |
- if (result == null) { |
- assert(prevIndex == 0); |
- result = new StringBuffer(component.substring(prevIndex, index)); |
- } else { |
- result.write(component.substring(prevIndex, index)); |
+ static String _escapeChar(char) { |
+ const hexDigits = "0123456789ABCDEF"; |
+ List codeUnits; |
+ if (char < 0x80) { |
+ // ASCII, a single percent encoded sequence. |
+ codeUnits = new List(3); |
+ codeUnits[0] = _PERCENT; |
+ codeUnits[1] = hexDigits.codeUnitAt(char >> 4); |
+ codeUnits[2] = hexDigits.codeUnitAt(char & 0xf); |
+ } else { |
+ // Do UTF-8 encoding of character, then percent encode bytes. |
+ int flag = 0xc0; // The high-bit markers on the first byte of UTF-8. |
+ int encodedBytes = 2; |
+ if (char > 0x7ff) { |
+ flag = 0xe0; |
+ encodedBytes = 3; |
+ if (char > 0xffff) { |
Søren Gjesse
2014/06/19 07:39:43
Are we sure that we cannot go beyond 4-byte encodi
Lasse Reichstein Nielsen
2014/06/19 08:44:58
Yes. We get at most 21 bits from a surrogate pair.
|
+ encodedBytes = 4; |
+ flag = 0xf0; |
+ } |
} |
- } |
- |
- while (index < length) { |
- // Normalize percent-encoding to uppercase and don't encode |
- // unreserved characters. |
- assert(component.codeUnitAt(index) == _PERCENT); |
- if (length < index + 2) { |
- throw new ArgumentError( |
- "Invalid percent-encoding in URI component: $component"); |
+ codeUnits = new List(3 * encodedBytes); |
+ int index = 0; |
+ while (--encodedBytes >= 0) { |
+ int byte = ((char >> (6 * encodedBytes)) & 0x3f) | flag; |
+ codeUnits[index] = _PERCENT; |
+ codeUnits[index + 1] = hexDigits.codeUnitAt(byte >> 4); |
+ codeUnits[index + 2] = hexDigits.codeUnitAt(byte & 0xf); |
+ index += 3; |
+ flag = 0x80; // Following bytes have only high bit set. |
} |
+ } |
+ return new String.fromCharCodes(codeUnits); |
+ } |
- var codeUnit1 = component.codeUnitAt(index + 1); |
- var codeUnit2 = component.codeUnitAt(index + 2); |
- var decodedCodeUnit = decodeHexDigitPair(index + 1); |
- if (isNormalizedHexDigit(codeUnit1) && |
- isNormalizedHexDigit(codeUnit2) && |
- !isUnreserved(decodedCodeUnit)) { |
- index += 3; |
+ /** |
+ * Runs through component checking that each character is valid and |
+ * normalize percent escapes. |
+ * |
+ * Uses [charTable] to check if a non-`%` character is allowed. |
+ * Each `%` character must be followed by two hex digits. |
+ * If the hex-digits are lower case letters, they are converted to |
+ * upper case. |
+ */ |
+ static String _normalize(String component, int start, int end, |
+ List<int> charTable) { |
+ StringBuffer buffer; |
+ int sectionStart = start; |
+ int index = start; |
+ // Loop while characters are valid and escapes correct and upper-case. |
+ while (index < end) { |
+ int char = component.codeUnitAt(index); |
+ if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) { |
+ index++; |
} else { |
- fillResult(); |
- if (isUnreserved(decodedCodeUnit)) { |
- result.writeCharCode(decodedCodeUnit); |
+ String replacement; |
+ int sourceLength; |
+ if (char == _PERCENT) { |
+ replacement = _normalizeEscape(component, index, false); |
+ // Returns null if we should keep the existing escape. |
+ if (replacement == null) { |
+ index += 3; |
+ continue; |
+ } |
+ // Returns "%" if we should escape the existing percent. |
+ if ("%" == replacement) { |
+ replacement = "%25"; |
+ sourceLength = 1; |
+ } else { |
+ sourceLength = 3; |
+ } |
+ } else if (_isGeneralDelimiter(char)) { |
+ _fail(component, index, "Invalid character"); |
} else { |
- result.write("%"); |
- result.writeCharCode(normalizeHexDigit(index + 1)); |
- result.writeCharCode(normalizeHexDigit(index + 2)); |
+ sourceLength = 1; |
+ if ((char & 0xFC00) == 0xD800) { |
+ // Possible lead surrogate. |
+ if (index + 1 < end) { |
+ int tail = component.codeUnitAt(index + 1); |
+ if ((tail & 0xFC00) == 0xDC00) { |
+ // Tail surrogat. |
+ sourceLength = 2; |
+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff); |
+ } |
+ } |
+ } |
+ replacement = _escapeChar(char); |
} |
- index += 3; |
- prevIndex = index; |
- } |
- int next = component.indexOf('%', index); |
- if (next >= index) { |
- index = next; |
- } else { |
- index = length; |
+ if (buffer == null) buffer = new StringBuffer(); |
+ buffer.write(component.substring(sectionStart, index)); |
+ buffer.write(replacement); |
+ index += sourceLength; |
+ sectionStart = index; |
} |
} |
- if (result == null) return component; |
- |
- if (result != null && prevIndex != index) fillResult(); |
- assert(index == length); |
- |
- return result.toString(); |
+ if (buffer == null) { |
+ // Makes no copy if start == 0 and end == component.length. |
+ return component.substring(start, end); |
+ } |
+ if (sectionStart < end) { |
+ buffer.write(component.substring(sectionStart, end)); |
+ } |
+ return buffer.toString(); |
} |
static bool _isSchemeCharacter(int ch) { |
return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
} |
+ static bool _isGeneralDelimiter(int ch) { |
+ return ch <= 64 && |
+ ((_genDelimitersTable[ch >> 4] & (1 << (ch & 0x0f))) != 0); |
+ } |
/** |
* Returns whether the URI is absolute. |
@@ -1465,6 +1687,9 @@ class Uri { |
* Throws a [FormatException] if [host] is not a valid IPv6 address |
* representation. |
* |
+ * Acts on the substring from [start] to [end]. If [end] is omitted, it |
+ * defaults ot the end of the string. |
+ * |
* Some examples of IPv6 addresses: |
* * ::1 |
* * FEDC:BA98:7654:3210:FEDC:BA98:7654:3210 |
@@ -1472,7 +1697,8 @@ class Uri { |
* * ::FFFF:129.144.52.38 |
* * 2010:836B:4179::836B:4179 |
*/ |
- static List<int> parseIPv6Address(String host) { |
+ static List<int> parseIPv6Address(String host, [int start = 0, int end]) { |
+ if (end == null) end = host.length; |
// An IPv6 address consists of exactly 8 parts of 1-4 hex digits, seperated |
// by `:`'s, with the following exceptions: |
// |
@@ -1495,11 +1721,11 @@ class Uri { |
if (host.length < 2) error('address is too short'); |
List<int> parts = []; |
bool wildcardSeen = false; |
- int partStart = 0; |
+ int partStart = start; |
// Parse all parts, except a potential last one. |
- for (int i = 0; i < host.length; i++) { |
+ for (int i = start; i < end; i++) { |
if (host.codeUnitAt(i) == _COLON) { |
- if (i == 0) { |
+ if (i == start) { |
// If we see a `:` in the beginning, expect wildcard. |
i++; |
if (host.codeUnitAt(i) != _COLON) { |
@@ -1522,18 +1748,18 @@ class Uri { |
} |
} |
if (parts.length == 0) error('too few parts'); |
- bool atEnd = partStart == host.length; |
- bool isLastWildcard = parts.last == -1; |
+ bool atEnd = (partStart == end); |
+ bool isLastWildcard = (parts.last == -1); |
if (atEnd && !isLastWildcard) { |
error('expected a part after last `:`'); |
} |
if (!atEnd) { |
try { |
- parts.add(parseHex(partStart, host.length)); |
+ parts.add(parseHex(partStart, end)); |
} catch (e) { |
// Failed to parse the last chunk as hex. Try IPv4. |
try { |
- List<int> last = parseIPv4Address(host.substring(partStart)); |
+ List<int> last = parseIPv4Address(host.substring(partStart, end)); |
parts.add(last[0] << 8 | last[1]); |
parts.add(last[2] << 8 | last[3]); |
} catch (e) { |
@@ -1549,15 +1775,23 @@ class Uri { |
error('an address without a wildcard must contain exactly 8 parts'); |
} |
// TODO(ajohnsen): Consider using Uint8List. |
- return parts |
- .expand((value) { |
- if (value == -1) { |
- return new List.filled((9 - parts.length) * 2, 0); |
- } else { |
- return [(value >> 8) & 0xFF, value & 0xFF]; |
- } |
- }) |
- .toList(); |
+ List bytes = new List<int>(16); |
+ for (int i = 0, index = 0; i < parts.length; i++) { |
+ int value = parts[i]; |
+ if (value == -1) { |
+ int wildCardLength = 9 - parts.length; |
+ for (int j = 0; j < wildCardLength; j++) { |
+ bytes[index] = 0; |
+ bytes[index + 1] = 0; |
+ index += 2; |
+ } |
+ } else { |
+ bytes[index] = value >> 8; |
+ bytes[index + 1] = value & 0xff; |
+ index += 2; |
+ } |
+ } |
+ return bytes; |
} |
// Frequently used character codes. |
@@ -1820,6 +2054,48 @@ class Uri { |
// pqrstuvwxyz ~ |
0x47ff]; // 0x70 - 0x7f 1111111111100010 |
+ // General delimiter characters, RFC 3986 section 2.2. |
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
+ // |
+ static const _genDelimitersTable = const [ |
+ // LSB MSB |
+ // | | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000 |
+ 0x0000, // 0x10 - 0x1f 0000000000000000 |
+ // # / |
+ 0x8008, // 0x20 - 0x2f 0001000000000001 |
+ // : [ ]? |
+ 0xe400, // 0x30 - 0x3f 0000000000101011 |
+ // @ |
+ 0x0001, // 0x40 - 0x4f 1000000000000000 |
+ // |
+ 0x0000, // 0x50 - 0x5f 0000000000000000 |
+ // |
+ 0x0000, // 0x60 - 0x6f 0000000000000000 |
+ // |
+ 0x0000]; // 0x70 - 0x7f 0000000000000000 |
+ |
+ // Characters allowed in the userinfo as of RFC 3986. |
+ // RFC 3986 Apendix A |
+ // userinfo = *( unreserved / pct-encoded / sub-delims / ':') |
+ static const _userinfoTable = const [ |
+ // LSB MSB |
+ // | | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000 |
+ 0x0000, // 0x10 - 0x1f 0000000000000000 |
+ // ! $ &'()*+,-. |
+ 0x7fd2, // 0x20 - 0x2f 0100101111111110 |
+ // 0123456789:; = |
+ 0x2fff, // 0x30 - 0x3f 1111111111110100 |
+ // ABCDEFGHIJKLMNO |
+ 0xfffe, // 0x40 - 0x4f 0111111111111111 |
+ // PQRSTUVWXYZ _ |
+ 0x87ff, // 0x50 - 0x5f 1111111111100001 |
+ // abcdefghijklmno |
+ 0xfffe, // 0x60 - 0x6f 0111111111111111 |
+ // pqrstuvwxyz ~ |
+ 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
+ |
// Characters allowed in the reg-name as of RFC 3986. |
// RFC 3986 Apendix A |
// reg-name = *( unreserved / pct-encoded / sub-delims ) |
@@ -1862,6 +2138,27 @@ class Uri { |
// pqrstuvwxyz ~ |
0x47ff]; // 0x70 - 0x7f 1111111111100010 |
+ // Characters allowed in the path as of RFC 3986. |
+ // RFC 3986 section 3.3 *and* slash. |
+ static const _pathCharOrSlashTable = const [ |
+ // LSB MSB |
+ // | | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000 |
+ 0x0000, // 0x10 - 0x1f 0000000000000000 |
+ // ! $ &'()*+,-./ |
+ 0xffd2, // 0x20 - 0x2f 0100101111111111 |
+ // 0123456789:; = |
+ 0x2fff, // 0x30 - 0x3f 1111111111110100 |
+ // @ABCDEFGHIJKLMNO |
+ 0xffff, // 0x40 - 0x4f 1111111111111111 |
+ |
+ // PQRSTUVWXYZ _ |
+ 0x87ff, // 0x50 - 0x5f 1111111111100001 |
+ // abcdefghijklmno |
+ 0xfffe, // 0x60 - 0x6f 0111111111111111 |
+ // pqrstuvwxyz ~ |
+ 0x47ff]; // 0x70 - 0x7f 1111111111100010 |
+ |
// Characters allowed in the query as of RFC 3986. |
// RFC 3986 section 3.4. |
// query = *( pchar / "/" / "?" ) |