Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(136)

Unified Diff: sdk/lib/core/uri.dart

Issue 335373003: New Uri.parse and validation. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: More test. Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/core/uri.dart
diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart
index d254850eaa092845e67714d48ee809ecf0654b0a..331ac3b0448de6cc1b679f5edca925c36480e753 100644
--- a/sdk/lib/core/uri.dart
+++ b/sdk/lib/core/uri.dart
@@ -116,7 +116,11 @@ class Uri {
Map<String, String> _queryParameters;
/**
- * Creates a new URI object by parsing a URI string.
+ * Creates a new `Uri` object by parsing a URI string.
+ *
+ * If the string is not valid as a URI or URI reference,
+ * invalid characters will be percent escaped where possible.
+ * The resulting `Uri` will represent a valid URI or URI reference.
*/
static Uri parse(String uri) {
// This parsing will not validate percent-encoding, IPv6, etc. When done
@@ -175,171 +179,232 @@ class Uri {
bool isRegName(int ch) {
return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
}
+ const int EOI = -1;
- int ipV6Address(int index) {
- // IPv6. Skip to ']'.
- index = uri.indexOf(']', index);
- if (index == -1) {
- throw new FormatException("Bad end of IPv6 host");
- }
- return index + 1;
- }
+ String scheme = "";
+ String path;
+ String userinfo = "";
+ String host = "";
+ int port = 0;
+ String query = "";
+ String fragment = "";
- int length = uri.length;
int index = 0;
-
- int schemeEndIndex = 0;
-
- if (length == 0) {
- return new Uri();
- }
-
- if (uri.codeUnitAt(0) != _SLASH) {
- // Can be scheme.
- while (index < length) {
- // Look for ':'. If found, continue from the post of ':'. If not (end
- // reached or invalid scheme char found) back up one char, and continue
- // to path.
- // Note that scheme-chars is contained in path-chars.
- int codeUnit = uri.codeUnitAt(index++);
- if (!_isSchemeCharacter(codeUnit)) {
- if (codeUnit == _COLON) {
- schemeEndIndex = index;
- } else {
- // Back up one char, since we met an invalid scheme char.
- index--;
- }
+ int pathStart = 0;
+ // End of input-marker.
+ int char = EOI;
+
+ void parseAuth() {
+ if (index == uri.length) {
+ char = EOI;
+ return;
+ }
+ int authStart = index;
+ int lastColon = -1;
+ int lastAt = -1;
+ char = uri.codeUnitAt(index);
+ while (index < uri.length) {
+ char = uri.codeUnitAt(index);
+ if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) {
break;
}
- }
- }
-
- int userInfoEndIndex = -1;
- int portIndex = -1;
- int authorityEndIndex = schemeEndIndex;
- // If we see '//', there must be an authority.
- if (authorityEndIndex == index &&
- authorityEndIndex + 1 < length &&
- uri.codeUnitAt(authorityEndIndex) == _SLASH &&
- uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) {
- // Skip '//'.
- authorityEndIndex += 2;
- // It can both be host and userInfo.
- while (authorityEndIndex < length) {
- int codeUnit = uri.codeUnitAt(authorityEndIndex++);
- if (!isRegName(codeUnit)) {
- if (codeUnit == _LEFT_BRACKET) {
- authorityEndIndex = ipV6Address(authorityEndIndex);
- } else if (portIndex == -1 && codeUnit == _COLON) {
- // First time ':'.
- portIndex = authorityEndIndex;
- } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) {
- // Second time ':' or first '@'. Must be userInfo.
- userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1);
- // Not found. Must be path then.
- if (userInfoEndIndex == -1) {
- authorityEndIndex = index;
- break;
- }
- portIndex = -1;
- authorityEndIndex = userInfoEndIndex + 1;
- // Now it can only be host:port.
- while (authorityEndIndex < length) {
- int codeUnit = uri.codeUnitAt(authorityEndIndex++);
- if (!isRegName(codeUnit)) {
- if (codeUnit == _LEFT_BRACKET) {
- authorityEndIndex = ipV6Address(authorityEndIndex);
- } else if (codeUnit == _COLON) {
- if (portIndex != -1) {
- throw new FormatException("Double port in host");
- }
- portIndex = authorityEndIndex;
- } else {
- authorityEndIndex--;
- break;
- }
- }
- }
+ if (char == _AT_SIGN) {
+ lastAt = index;
+ lastColon = -1;
+ } else if (char == _COLON) {
+ lastColon = index;
+ } else if (char == _LEFT_BRACKET) {
+ lastColon = -1;
+ int endBracket = uri.indexOf(']', index + 1);
+ if (endBracket == -1) {
+ index = uri.length;
+ char = EOI;
break;
} else {
- authorityEndIndex--;
- break;
+ index = endBracket;
}
}
+ index++;
+ char = EOI;
+ }
+ int hostStart = authStart;
+ int hostEnd = index;
+ if (lastAt >= 0) {
+ userinfo = _makeUserInfo(uri, authStart, lastAt);
+ hostStart = lastAt + 1;
+ }
+ if (lastColon >= 0) {
+ if (lastColon + 1 == index) {
+ _fail(uri, index, "Invalid port number");
+ }
+ int portNumber = 0;
+ for (int i = lastColon + 1; i < index; i++) {
+ int digit = uri.codeUnitAt(i);
+ if (_ZERO > digit || _NINE < digit) {
+ _fail(uri, i, "Invalid port number");
+ }
+ portNumber = portNumber * 10 + (digit - _ZERO);
+ }
+ port = _makePort(portNumber, scheme);
+ hostEnd = lastColon;
+ }
+ host = _makeHost(uri, hostStart, hostEnd, true);
+ if (index < uri.length) {
+ char = uri.codeUnitAt(index);
}
- } else {
- authorityEndIndex = schemeEndIndex;
}
- // At path now.
- int pathEndIndex = authorityEndIndex;
- while (pathEndIndex < length) {
- int codeUnit = uri.codeUnitAt(pathEndIndex++);
- if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {
- pathEndIndex--;
+ // When reaching path parsing, the current character is known to not
+ // be part of the path.
+ const int NOT_IN_PATH = 0;
+ // When reaching path parsing, the current character is part
+ // of the a non-empty path.
+ const int IN_PATH = 1;
+ // When reaching authority parsing, authority is possible.
+ // This is only true at start or right after scheme.
+ const int ALLOW_AUTH = 2;
+
+ // Current state.
+ // Initialized to the default value that is used when exiting the
+ // scheme loop by reaching the end of input.
+ // All other breaks set their own state.
+ int state = NOT_IN_PATH;
+ while (index < uri.length) {
+ char = uri.codeUnitAt(index);
+ if (char == _QUESTION || char == _NUMBER_SIGN) {
+ state = NOT_IN_PATH;
break;
}
+ if (char == _SLASH) {
+ state = (index == 0) ? ALLOW_AUTH : IN_PATH;
+ break;
+ }
+ if (char == _COLON) {
+ if (index == 0) _fail(uri, 0, "Invalid empty scheme");
+ scheme = _makeScheme(uri, index);
+ index++;
+ pathStart = index;
+ if (index == uri.length) {
+ char = EOI;
+ state = NOT_IN_PATH;
+ } else {
+ char = uri.codeUnitAt(index);
+ if (char == _QUESTION || char == _NUMBER_SIGN) {
+ state = NOT_IN_PATH;
+ } else if (char == _SLASH) {
+ state = ALLOW_AUTH;
+ } else {
+ state = IN_PATH;
+ }
+ }
+ break;
+ }
+ index++;
+ char = EOI;
+ }
+
+ if (state == ALLOW_AUTH) {
+ assert(char == _SLASH);
+ // Have seen one slash either at start or right after scheme.
+ // If two slashes, it's an authority, otherwise it's just the path.
+ index++;
+ if (index == uri.length) {
+ char = EOI;
+ state = NOT_IN_PATH;
+ } else {
+ char = uri.codeUnitAt(index);
+ if (char == _SLASH) {
+ index++;
+ parseAuth();
+ pathStart = index;
+ }
+ if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) {
+ state = NOT_IN_PATH;
+ } else {
+ state = IN_PATH;
+ }
+ }
}
- // Maybe query.
- int queryEndIndex = pathEndIndex;
- if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) {
- while (queryEndIndex < length) {
- int codeUnit = uri.codeUnitAt(queryEndIndex++);
- if (codeUnit == _NUMBER_SIGN) {
- queryEndIndex--;
+ assert(state == IN_PATH || state == NOT_IN_PATH);
+ if (state == IN_PATH) {
+ // Characters from pathStart to index (inclusive) are known
+ // to be part of the path.
+ while (++index < uri.length) {
+ char = uri.codeUnitAt(index);
+ if (char == _QUESTION || char == _NUMBER_SIGN) {
break;
}
+ char = EOI;
}
+ state = NOT_IN_PATH;
}
- var scheme = null;
- if (schemeEndIndex > 0) {
- scheme = uri.substring(0, schemeEndIndex - 1);
- }
+ assert(state == NOT_IN_PATH);
+ bool ensureLeadingSlash = (host != "" || scheme == "file");
+ path = _makePath(uri, pathStart, index, null, ensureLeadingSlash);
- var host = "";
- var userInfo = "";
- var port = 0;
- if (schemeEndIndex != authorityEndIndex) {
- int startIndex = schemeEndIndex + 2;
- if (userInfoEndIndex > 0) {
- userInfo = uri.substring(startIndex, userInfoEndIndex);
- startIndex = userInfoEndIndex + 1;
- }
- if (portIndex > 0) {
- var portStr = uri.substring(portIndex, authorityEndIndex);
- try {
- port = int.parse(portStr);
- } catch (_) {
- throw new FormatException("Invalid port: '$portStr'");
- }
- host = uri.substring(startIndex, portIndex - 1);
+ if (char == _QUESTION) {
+ int numberSignIndex = uri.indexOf('#', index + 1);
+ if (numberSignIndex < 0) {
+ query = _makeQuery(uri, index + 1, uri.length, null);
} else {
- host = uri.substring(startIndex, authorityEndIndex);
+ query = _makeQuery(uri, index + 1, numberSignIndex, null);
+ fragment = _makeFragment(uri, numberSignIndex + 1, uri.length);
}
- }
+ } else if (char == _NUMBER_SIGN) {
+ fragment = _makeFragment(uri, index + 1, uri.length);
+ }
+ return new Uri._internal(scheme,
+ userinfo,
+ host,
+ port,
+ path,
+ query,
+ fragment);
+ }
- var path = uri.substring(authorityEndIndex, pathEndIndex);
- var query = "";
- if (pathEndIndex < queryEndIndex) {
- query = uri.substring(pathEndIndex + 1, queryEndIndex);
- }
- var fragment = "";
- // If queryEndIndex is not at end (length), there is a fragment.
- if (queryEndIndex < length) {
- fragment = uri.substring(queryEndIndex + 1, length);
+ // Report a parse failure.
+ static void _fail(String uri, int index, String message) {
+ // TODO(lrn): Consider adding this to FormatException.
+ if (index == uri.length) {
+ message += " at end of input.";
+ } else {
+ message += " at position $index.\n";
+ // Pick a slice of uri containing index and, if
+ // necessary, truncate the ends to ensure the entire
+ // slice fits on one line.
+ int min = 0;
+ int max = uri.length;
+ String pre = "";
+ String post = "";
+ if (uri.length > 78) {
+ min = index - 10;
+ if (min < 0) min = 0;
+ int max = min + 72;
+ if (max > uri.length) {
+ max = uri.length;
+ min = max - 72;
+ }
+ if (min != 0) pre = "...";
+ if (max != uri.length) post = "...";
+ }
+ // Combine message, slice and a caret pointing to the error index.
+ message = "$message$pre${uri.substring(min, max)}$post\n"
+ "${' ' * (pre.length + index - min)}^";
}
-
- return new Uri(scheme: scheme,
- userInfo: userInfo,
- host: host,
- port: port,
- path: path,
- query: query,
- fragment: fragment);
+ throw new FormatException(message);
}
+ /// Internal non-verifying constructor. Only call with validated arguments.
+ Uri._internal(this.scheme,
+ this.userInfo,
+ this._host,
+ this._port,
+ this._path,
+ this.query,
+ this.fragment);
+
/**
* Creates a new URI from its components.
*
@@ -359,6 +424,7 @@ class Uri {
* [host]. The host can either be a hostname, an IPv4 address or an
* IPv6 address, contained in '[' and ']'. If the host contains a
* ':' character, the '[' and ']' are added if not already provided.
+ * The host is normalized to all lowercase letters.
*
* The port part of the authority component is set through
* [port]. The port is normalized for scheme http and https where
@@ -386,29 +452,27 @@ class Uri {
*
* The fragment component is set through [fragment].
*/
- Uri({String scheme,
- this.userInfo: "",
+ factory Uri({String scheme,
+ String userInfo: "",
String host: "",
port: 0,
String path,
Iterable<String> pathSegments,
String query,
Map<String, String> queryParameters,
- fragment: ""}) :
- scheme = _makeScheme(scheme),
- _host = _makeHost(host),
- query = _makeQuery(query, queryParameters),
- fragment = _makeFragment(fragment) {
- // Perform scheme specific normalization.
- if (scheme == "http" && port == 80) {
- _port = 0;
- } else if (scheme == "https" && port == 443) {
- _port = 0;
- } else {
- _port = port;
- }
- // Fill the path.
- _path = _makePath(path, pathSegments);
+ fragment: ""}) {
+ scheme = _makeScheme(scheme, _stringOrNullLength(scheme));
+ userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo));
+ host = _makeHost(host, 0, _stringOrNullLength(host), false);
+ query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters);
+ fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment));
+ port = _makePort(port, scheme);
+ bool ensureLeadingSlash = (host != "" || scheme == "file");
+ path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,
+ ensureLeadingSlash);
+
+ return new Uri._internal(scheme, userInfo, host, port,
+ path, query, fragment);
}
/**
@@ -489,7 +553,7 @@ class Uri {
if (hostEnd == authority.length) {
throw new FormatException("Invalid IPv6 host entry.");
}
- parseIPv6Address(authority.substring(hostStart + 1, hostEnd));
+ parseIPv6Address(authority, hostStart + 1, hostEnd);
hostEnd++; // Skip the closing bracket.
if (hostEnd != authority.length &&
authority.codeUnitAt(hostEnd) != _COLON) {
@@ -765,75 +829,193 @@ class Uri {
return _queryParameters;
}
- static String _makeHost(String host) {
- if (host == null || host.isEmpty) return host;
- if (host.codeUnitAt(0) == _LEFT_BRACKET) {
- if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) {
- throw new FormatException('Missing end `]` to match `[` in host');
- }
- parseIPv6Address(host.substring(1, host.length - 1));
- return host;
+ static int _makePort(int port, String scheme) {
+ // Perform scheme specific normalization.
+ if (port == 80 && scheme == "http") {
+ return 0;
}
- for (int i = 0; i < host.length; i++) {
- if (host.codeUnitAt(i) == _COLON) {
- parseIPv6Address(host);
- return '[$host]';
+ if (port == 443 && scheme == "https") {
+ return 0;
+ }
+ return port;
+ }
+
+ /**
+ * Check and normalize a most name.
+ *
+ * If the host name starts and ends with '[' and ']', it is considered an
+ * IPv6 address. If [strictIPv6] is false, the address is also considered
+ * an IPv6 address if it contains any ':' character.
+ *
+ * If it is not an IPv6 address, it is case- and escape-normalized.
+ * This escapes all characters not valid in a reg-name,
+ * and converts all non-escape upper-case letters to lower-case.
+ */
+ static String _makeHost(String host, int start, int end, bool strictIPv6) {
+ // TODO(lrn): Should we normalize IPv6 addresses according to RFC 5952?
+
+ if (host == null) return null;
+ if (start == end) return "";
+ // Host is an IPv6 address if it starts with '[' or contains a colon.
+ if (host.codeUnitAt(start) == _LEFT_BRACKET) {
+ if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) {
+ _fail(host, start, 'Missing end `]` to match `[` in host');
+ }
+ parseIPv6Address(host, start + 1, end - 1);
+ // RFC 5952 requires hex digits to be lower case.
+ return host.substring(start, end).toLowerCase();
+ }
+ if (!strictIPv6) {
+ // TODO(lrn): skip if too short to be a valid IPv6 address?
+ for (int i = start; i < end; i++) {
+ if (host.codeUnitAt(i) == _COLON) {
+ parseIPv6Address(host, start, end);
+ return '[$host]';
+ }
}
}
- return host;
+ return _normalizeRegName(host, start, end);
+ }
+
+ static bool _isRegNameChar(int char) {
+ return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0;
}
- static String _makeScheme(String scheme) {
- bool isSchemeLowerCharacter(int ch) {
- return ch < 128 &&
- ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
+ /**
+ * Validates and does case- and percent-encoding normalization.
+ *
+ * The [host] must be an RFC3986 "reg-name". It is converted
+ * to lower case, and percent escapes are converted to either
+ * lower case unreserved characters or upper case escapes.
+ */
+ static String _normalizeRegName(String host, int start, int end) {
+ StringBuffer buffer;
+ int sectionStart = start;
+ int index = start;
+ // Whether all characters between sectionStart and index are normalized,
+ bool isNormalized = true;
+
+ while (index < end) {
+ int char = host.codeUnitAt(index);
+ if (char == _PERCENT) {
+ // The _regNameTable contains "%", so we check that first.
+ String replacement = _normalizeEscape(host, index, true);
+ if (replacement == null && isNormalized) {
+ index += 3;
+ continue;
+ }
+ if (buffer == null) buffer = new StringBuffer();
+ String slice = host.substring(sectionStart, index);
+ if (!isNormalized) slice = slice.toLowerCase();
+ buffer.write(slice);
+ int sourceLength = 3;
+ if (replacement == null) {
+ replacement = host.substring(index, index + 3);
+ } else if (replacement == "%") {
+ replacement = "%25";
+ sourceLength = 1;
+ }
+ buffer.write(replacement);
+ index += sourceLength;
+ sectionStart = index;
+ isNormalized = true;
+ } else if (_isRegNameChar(char)) {
+ if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {
+ // Put initial slice in buffer and continue in non-normalized mode
+ if (buffer == null) buffer = new StringBuffer();
+ if (sectionStart < index) {
+ buffer.write(host.substring(sectionStart, index));
+ sectionStart = index;
+ }
+ isNormalized = false;
+ }
+ index++;
+ } else if (_isGeneralDelimiter(char)) {
+ _fail(host, index, "Invalid character");
+ } else {
+ int sourceLength = 1;
+ if ((char & 0xFC00) == 0xD800 && (index + 1) < end) {
+ int tail = host.codeUnitAt(index + 1);
+ if ((tail & 0xFC00) == 0xDC00) {
+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);
+ sourceLength = 2;
+ }
+ }
+ if (buffer == null) buffer = new StringBuffer();
+ String slice = host.substring(sectionStart, index);
+ if (!isNormalized) slice = slice.toLowerCase();
+ buffer.write(slice);
+ buffer.write(_escapeChar(char));
+ index += sourceLength;
+ sectionStart = index;
+ }
+ }
+ if (buffer == null) return host.substring(start, end);
+ if (sectionStart < end) {
+ String slice = host.substring(sectionStart, end);
+ if (!isNormalized) slice = slice.toLowerCase();
+ buffer.write(slice);
}
+ return buffer.toString();
+ }
- if (scheme == null) return "";
- bool allLowercase = true;
- int length = scheme.length;
- for (int i = 0; i < length; i++) {
+ /**
+ * Validates scheme characters and does case-normalization.
+ *
+ * Schemes are converted to lower case. They cannot contain escapes.
+ */
+ static String _makeScheme(String scheme, int end) {
+ if (end == 0) return "";
+ int char = scheme.codeUnitAt(0);
+ if (!_isAlphabeticCharacter(char)) {
+ _fail(scheme, 0, "Scheme not starting with alphabetic character");
+ }
+ bool allLowercase = char >= _LOWER_CASE_A;
+ for (int i = 0; i < end; i++) {
int codeUnit = scheme.codeUnitAt(i);
- if (i == 0 && !_isAlphabeticCharacter(codeUnit)) {
- // First code unit must be an alphabetic character.
- throw new ArgumentError('Illegal scheme: $scheme');
+ if (!_isSchemeCharacter(codeUnit)) {
+ _fail(scheme, i, "Illegal scheme character");
}
- if (!isSchemeLowerCharacter(codeUnit)) {
- if (_isSchemeCharacter(codeUnit)) {
- allLowercase = false;
- } else {
- throw new ArgumentError('Illegal scheme: $scheme');
- }
+ if (_LOWER_CASE_A <= char && _LOWER_CASE_Z >= char) {
+ allLowercase = false;
}
}
+ scheme = scheme.substring(0, end);
+ if (!allLowercase) scheme = scheme.toLowerCase();
+ return scheme;
+ }
- return allLowercase ? scheme : scheme.toLowerCase();
+ static String _makeUserInfo(String userInfo, int start, int end) {
+ if (userInfo == null) return "null";
+ return _normalize(userInfo, start, end, _userinfoTable);
}
- String _makePath(String path, Iterable<String> pathSegments) {
+ static String _makePath(String path, int start, int end,
+ Iterable<String> pathSegments,
+ bool ensureLeadingSlash) {
if (path == null && pathSegments == null) return "";
if (path != null && pathSegments != null) {
throw new ArgumentError('Both path and pathSegments specified');
}
var result;
if (path != null) {
- result = _normalize(path);
+ result = _normalize(path, start, end, _pathCharOrSlashTable);
} else {
result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");
}
- if ((hasAuthority || (scheme == "file")) &&
- result.isNotEmpty && !result.startsWith("/")) {
+ if (ensureLeadingSlash && result.isNotEmpty && !result.startsWith("/")) {
return "/$result";
}
return result;
}
- static String _makeQuery(String query, Map<String, String> queryParameters) {
+ static String _makeQuery(String query, int start, int end,
+ Map<String, String> queryParameters) {
if (query == null && queryParameters == null) return "";
if (query != null && queryParameters != null) {
throw new ArgumentError('Both query and queryParameters specified');
}
- if (query != null) return _normalize(query);
+ if (query != null) return _normalize(query, start, end, _queryCharTable);
var result = new StringBuffer();
var first = true;
@@ -851,123 +1033,184 @@ class Uri {
return result.toString();
}
- static String _makeFragment(String fragment) {
+ static String _makeFragment(String fragment, int start, int end) {
if (fragment == null) return "";
- return _normalize(fragment);
+ return _normalize(fragment, start, end, _queryCharTable);
}
- static String _normalize(String component) {
- int index = component.indexOf('%');
- if (index < 0) return component;
-
- bool isNormalizedHexDigit(int digit) {
- return (_ZERO <= digit && digit <= _NINE) ||
- (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F);
- }
+ static int _stringOrNullLength(String s) => (s == null) ? 0 : s.length;
- bool isLowerCaseHexDigit(int digit) {
- return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F;
- }
+ static bool _isHexDigit(int char) {
+ if (_NINE >= char) return _ZERO <= char;
+ char |= 0x20;
+ return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char;
+ }
- bool isUnreserved(int ch) {
- return ch < 128 &&
- ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
- }
+ static int _hexValue(int char) {
+ assert(_isHexDigit(char));
+ if (_NINE >= char) return char - _ZERO;
+ char |= 0x20;
+ return char - (_LOWER_CASE_A - 10);
+ }
- int normalizeHexDigit(int index) {
- var codeUnit = component.codeUnitAt(index);
- if (isLowerCaseHexDigit(codeUnit)) {
- return codeUnit - 0x20;
- } else if (!isNormalizedHexDigit(codeUnit)) {
- throw new ArgumentError("Invalid URI component: $component");
- } else {
- return codeUnit;
+ /**
+ * Performs RFC 3986 Percent-Encoding Normalization.
+ *
+ * Returns a replacement string that should be replace the original escape.
+ * Returns null if no replacement is necessary because the escape is
+ * not for an unreserved character and is already non-lower-case.
+ *
+ * Returns "%" if the escape is invalid (not two valid hex digits following
+ * the percent sign). The calling code should replace the percent
+ * sign with "%25", but leave the following two characters unmodified.
+ *
+ * If [lowerCase] is true, a single character returned is always lower case,
+ */
+ static String _normalizeEscape(String source, int index, bool lowerCase) {
+ assert(source.codeUnitAt(index) == _PERCENT);
+ if (index + 2 >= source.length) {
+ return "%"; // Marks the escape as invalid.
+ }
+ int firstDigit = source.codeUnitAt(index + 1);
+ int secondDigit = source.codeUnitAt(index + 2);
+ if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) {
+ return "%"; // Marks the escape as invalid.
+ }
+ int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit);
+ if (_isUnreservedChar(value)) {
+ if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) {
+ value |= 0x20;
}
+ return new String.fromCharCode(value);
}
-
- int decodeHexDigitPair(int index) {
- int byte = 0;
- for (int i = 0; i < 2; i++) {
- var codeUnit = component.codeUnitAt(index + i);
- if (_ZERO <= codeUnit && codeUnit <= _NINE) {
- byte = byte * 16 + codeUnit - _ZERO;
- } else {
- // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).
- codeUnit |= 0x20;
- if (_LOWER_CASE_A <= codeUnit &&
- codeUnit <= _LOWER_CASE_F) {
- byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10;
- } else {
- throw new ArgumentError(
- "Invalid percent-encoding in URI component: $component");
- }
- }
- }
- return byte;
+ if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) {
+ // Either digit is lower case.
+ return source.substring(index, index + 3).toUpperCase();
}
+ // Escape is retained, and is already non-lower case, so return null to
+ // represent "no replacement necessary".
+ return null;
+ }
- // Start building the normalized component string.
- StringBuffer result;
- int length = component.length;
- int prevIndex = 0;
+ static bool _isUnreservedChar(int ch) {
+ return ch < 127 &&
+ ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
+ }
- // Copy a part of the component string to the result.
- void fillResult() {
- if (result == null) {
- assert(prevIndex == 0);
- result = new StringBuffer(component.substring(prevIndex, index));
- } else {
- result.write(component.substring(prevIndex, index));
+ static String _escapeChar(char) {
+ assert(char <= 0x10ffff); // It's a valid unicode code point.
+ const hexDigits = "0123456789ABCDEF";
+ List codeUnits;
+ if (char < 0x80) {
+ // ASCII, a single percent encoded sequence.
+ codeUnits = new List(3);
+ codeUnits[0] = _PERCENT;
+ codeUnits[1] = hexDigits.codeUnitAt(char >> 4);
+ codeUnits[2] = hexDigits.codeUnitAt(char & 0xf);
+ } else {
+ // Do UTF-8 encoding of character, then percent encode bytes.
+ int flag = 0xc0; // The high-bit markers on the first byte of UTF-8.
+ int encodedBytes = 2;
+ if (char > 0x7ff) {
+ flag = 0xe0;
+ encodedBytes = 3;
+ if (char > 0xffff) {
+ encodedBytes = 4;
+ flag = 0xf0;
+ }
}
- }
-
- while (index < length) {
- // Normalize percent-encoding to uppercase and don't encode
- // unreserved characters.
- assert(component.codeUnitAt(index) == _PERCENT);
- if (length < index + 2) {
- throw new ArgumentError(
- "Invalid percent-encoding in URI component: $component");
+ codeUnits = new List(3 * encodedBytes);
+ int index = 0;
+ while (--encodedBytes >= 0) {
+ int byte = ((char >> (6 * encodedBytes)) & 0x3f) | flag;
+ codeUnits[index] = _PERCENT;
+ codeUnits[index + 1] = hexDigits.codeUnitAt(byte >> 4);
+ codeUnits[index + 2] = hexDigits.codeUnitAt(byte & 0xf);
+ index += 3;
+ flag = 0x80; // Following bytes have only high bit set.
}
+ }
+ return new String.fromCharCodes(codeUnits);
+ }
- var codeUnit1 = component.codeUnitAt(index + 1);
- var codeUnit2 = component.codeUnitAt(index + 2);
- var decodedCodeUnit = decodeHexDigitPair(index + 1);
- if (isNormalizedHexDigit(codeUnit1) &&
- isNormalizedHexDigit(codeUnit2) &&
- !isUnreserved(decodedCodeUnit)) {
- index += 3;
+ /**
+ * Runs through component checking that each character is valid and
+ * normalize percent escapes.
+ *
+ * Uses [charTable] to check if a non-`%` character is allowed.
+ * Each `%` character must be followed by two hex digits.
+ * If the hex-digits are lower case letters, they are converted to
+ * upper case.
+ */
+ static String _normalize(String component, int start, int end,
+ List<int> charTable) {
+ StringBuffer buffer;
+ int sectionStart = start;
+ int index = start;
+ // Loop while characters are valid and escapes correct and upper-case.
+ while (index < end) {
+ int char = component.codeUnitAt(index);
+ if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) {
+ index++;
} else {
- fillResult();
- if (isUnreserved(decodedCodeUnit)) {
- result.writeCharCode(decodedCodeUnit);
+ String replacement;
+ int sourceLength;
+ if (char == _PERCENT) {
+ replacement = _normalizeEscape(component, index, false);
+ // Returns null if we should keep the existing escape.
+ if (replacement == null) {
+ index += 3;
+ continue;
+ }
+ // Returns "%" if we should escape the existing percent.
+ if ("%" == replacement) {
+ replacement = "%25";
+ sourceLength = 1;
+ } else {
+ sourceLength = 3;
+ }
+ } else if (_isGeneralDelimiter(char)) {
+ _fail(component, index, "Invalid character");
} else {
- result.write("%");
- result.writeCharCode(normalizeHexDigit(index + 1));
- result.writeCharCode(normalizeHexDigit(index + 2));
+ sourceLength = 1;
+ if ((char & 0xFC00) == 0xD800) {
+ // Possible lead surrogate.
+ if (index + 1 < end) {
+ int tail = component.codeUnitAt(index + 1);
+ if ((tail & 0xFC00) == 0xDC00) {
+ // Tail surrogat.
+ sourceLength = 2;
+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);
+ }
+ }
+ }
+ replacement = _escapeChar(char);
}
- index += 3;
- prevIndex = index;
- }
- int next = component.indexOf('%', index);
- if (next >= index) {
- index = next;
- } else {
- index = length;
+ if (buffer == null) buffer = new StringBuffer();
+ buffer.write(component.substring(sectionStart, index));
+ buffer.write(replacement);
+ index += sourceLength;
+ sectionStart = index;
}
}
- if (result == null) return component;
-
- if (result != null && prevIndex != index) fillResult();
- assert(index == length);
-
- return result.toString();
+ if (buffer == null) {
+ // Makes no copy if start == 0 and end == component.length.
+ return component.substring(start, end);
+ }
+ if (sectionStart < end) {
+ buffer.write(component.substring(sectionStart, end));
+ }
+ return buffer.toString();
}
static bool _isSchemeCharacter(int ch) {
return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
}
+ static bool _isGeneralDelimiter(int ch) {
+ return ch <= 64 &&
+ ((_genDelimitersTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);
+ }
/**
* Returns whether the URI is absolute.
@@ -1465,6 +1708,9 @@ class Uri {
* Throws a [FormatException] if [host] is not a valid IPv6 address
* representation.
*
+ * Acts on the substring from [start] to [end]. If [end] is omitted, it
+ * defaults ot the end of the string.
+ *
* Some examples of IPv6 addresses:
* * ::1
* * FEDC:BA98:7654:3210:FEDC:BA98:7654:3210
@@ -1472,7 +1718,8 @@ class Uri {
* * ::FFFF:129.144.52.38
* * 2010:836B:4179::836B:4179
*/
- static List<int> parseIPv6Address(String host) {
+ static List<int> parseIPv6Address(String host, [int start = 0, int end]) {
+ if (end == null) end = host.length;
// An IPv6 address consists of exactly 8 parts of 1-4 hex digits, seperated
// by `:`'s, with the following exceptions:
//
@@ -1495,11 +1742,11 @@ class Uri {
if (host.length < 2) error('address is too short');
List<int> parts = [];
bool wildcardSeen = false;
- int partStart = 0;
+ int partStart = start;
// Parse all parts, except a potential last one.
- for (int i = 0; i < host.length; i++) {
+ for (int i = start; i < end; i++) {
if (host.codeUnitAt(i) == _COLON) {
- if (i == 0) {
+ if (i == start) {
// If we see a `:` in the beginning, expect wildcard.
i++;
if (host.codeUnitAt(i) != _COLON) {
@@ -1522,18 +1769,18 @@ class Uri {
}
}
if (parts.length == 0) error('too few parts');
- bool atEnd = partStart == host.length;
- bool isLastWildcard = parts.last == -1;
+ bool atEnd = (partStart == end);
+ bool isLastWildcard = (parts.last == -1);
if (atEnd && !isLastWildcard) {
error('expected a part after last `:`');
}
if (!atEnd) {
try {
- parts.add(parseHex(partStart, host.length));
+ parts.add(parseHex(partStart, end));
} catch (e) {
// Failed to parse the last chunk as hex. Try IPv4.
try {
- List<int> last = parseIPv4Address(host.substring(partStart));
+ List<int> last = parseIPv4Address(host.substring(partStart, end));
parts.add(last[0] << 8 | last[1]);
parts.add(last[2] << 8 | last[3]);
} catch (e) {
@@ -1549,15 +1796,23 @@ class Uri {
error('an address without a wildcard must contain exactly 8 parts');
}
// TODO(ajohnsen): Consider using Uint8List.
- return parts
- .expand((value) {
- if (value == -1) {
- return new List.filled((9 - parts.length) * 2, 0);
- } else {
- return [(value >> 8) & 0xFF, value & 0xFF];
- }
- })
- .toList();
+ List bytes = new List<int>(16);
+ for (int i = 0, index = 0; i < parts.length; i++) {
+ int value = parts[i];
+ if (value == -1) {
+ int wildCardLength = 9 - parts.length;
+ for (int j = 0; j < wildCardLength; j++) {
+ bytes[index] = 0;
+ bytes[index + 1] = 0;
+ index += 2;
+ }
+ } else {
+ bytes[index] = value >> 8;
+ bytes[index + 1] = value & 0xff;
+ index += 2;
+ }
+ }
+ return bytes;
}
// Frequently used character codes.
@@ -1820,6 +2075,48 @@ class Uri {
// pqrstuvwxyz ~
0x47ff]; // 0x70 - 0x7f 1111111111100010
+ // General delimiter characters, RFC 3986 section 2.2.
+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ //
+ static const _genDelimitersTable = const [
+ // LSB MSB
+ // | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000
+ 0x0000, // 0x10 - 0x1f 0000000000000000
+ // # /
+ 0x8008, // 0x20 - 0x2f 0001000000000001
+ // : [ ]?
+ 0xe400, // 0x30 - 0x3f 0000000000101011
+ // @
+ 0x0001, // 0x40 - 0x4f 1000000000000000
+ //
+ 0x0000, // 0x50 - 0x5f 0000000000000000
+ //
+ 0x0000, // 0x60 - 0x6f 0000000000000000
+ //
+ 0x0000]; // 0x70 - 0x7f 0000000000000000
+
+ // Characters allowed in the userinfo as of RFC 3986.
+ // RFC 3986 Apendix A
+ // userinfo = *( unreserved / pct-encoded / sub-delims / ':')
+ static const _userinfoTable = const [
+ // LSB MSB
+ // | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000
+ 0x0000, // 0x10 - 0x1f 0000000000000000
+ // ! $ &'()*+,-.
+ 0x7fd2, // 0x20 - 0x2f 0100101111111110
+ // 0123456789:; =
+ 0x2fff, // 0x30 - 0x3f 1111111111110100
+ // ABCDEFGHIJKLMNO
+ 0xfffe, // 0x40 - 0x4f 0111111111111111
+ // PQRSTUVWXYZ _
+ 0x87ff, // 0x50 - 0x5f 1111111111100001
+ // abcdefghijklmno
+ 0xfffe, // 0x60 - 0x6f 0111111111111111
+ // pqrstuvwxyz ~
+ 0x47ff]; // 0x70 - 0x7f 1111111111100010
+
// Characters allowed in the reg-name as of RFC 3986.
// RFC 3986 Apendix A
// reg-name = *( unreserved / pct-encoded / sub-delims )
@@ -1862,6 +2159,27 @@ class Uri {
// pqrstuvwxyz ~
0x47ff]; // 0x70 - 0x7f 1111111111100010
+ // Characters allowed in the path as of RFC 3986.
+ // RFC 3986 section 3.3 *and* slash.
+ static const _pathCharOrSlashTable = const [
+ // LSB MSB
+ // | |
+ 0x0000, // 0x00 - 0x0f 0000000000000000
+ 0x0000, // 0x10 - 0x1f 0000000000000000
+ // ! $ &'()*+,-./
+ 0xffd2, // 0x20 - 0x2f 0100101111111111
+ // 0123456789:; =
+ 0x2fff, // 0x30 - 0x3f 1111111111110100
+ // @ABCDEFGHIJKLMNO
+ 0xffff, // 0x40 - 0x4f 1111111111111111
+
+ // PQRSTUVWXYZ _
+ 0x87ff, // 0x50 - 0x5f 1111111111100001
+ // abcdefghijklmno
+ 0xfffe, // 0x60 - 0x6f 0111111111111111
+ // pqrstuvwxyz ~
+ 0x47ff]; // 0x70 - 0x7f 1111111111100010
+
// Characters allowed in the query as of RFC 3986.
// RFC 3986 section 3.4.
// query = *( pchar / "/" / "?" )

Powered by Google App Engine
This is Rietveld 408576698