sdk/lib/core/uri.dart - Issue 335373003: New Uri.parse and validation.

Unified Diff: sdk/lib/core/uri.dart

Issue 335373003: New Uri.parse and validation. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Address comments Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: sdk/lib/core/uri.dart

diff --git a/sdk/lib/core/uri.dart b/sdk/lib/core/uri.dart

index bdaf08639b200222f667b76c3a888f3678393f99..313512cdd65faa0537963faeea685b6e421a004c 100644

--- a/sdk/lib/core/uri.dart

+++ b/sdk/lib/core/uri.dart

@@ -175,171 +175,233 @@ class Uri {

bool isRegName(int ch) {

return ch < 128 && ((_regNameTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

}

+ const int EOI = -1;

- int ipV6Address(int index) {

- // IPv6. Skip to ']'.

- index = uri.indexOf(']', index);

- if (index == -1) {

- throw new FormatException("Bad end of IPv6 host");

- }

- return index + 1;

- }

+ String scheme = "";

+ String path;

+ String userinfo = "";

+ String host = "";

+ int port = 0;

+ String query = "";

+ String fragment = "";

- int length = uri.length;

int index = 0;

- int schemeEndIndex = 0;

- if (length == 0) {

- return new Uri();

- }

- if (uri.codeUnitAt(0) != _SLASH) {

- // Can be scheme.

- while (index < length) {

- // Look for ':'. If found, continue from the post of ':'. If not (end

- // reached or invalid scheme char found) back up one char, and continue

- // to path.

- // Note that scheme-chars is contained in path-chars.

- int codeUnit = uri.codeUnitAt(index++);

- if (!_isSchemeCharacter(codeUnit)) {

- if (codeUnit == _COLON) {

- schemeEndIndex = index;

- } else {

- // Back up one char, since we met an invalid scheme char.

- index--;

- }

+ int pathStart = 0;

+ // End of input-marker.

+ int char = EOI;

+ void parseAuth() {

+ if (index == uri.length) {

+ char = EOI;

+ return;

+ }

+ int authStart = index;

+ int lastColon = -1;

+ int lastAt = -1;

+ char = uri.codeUnitAt(index);

+ while (index < uri.length) {

+ char = uri.codeUnitAt(index);

+ if (char == _SLASH || char == _QUESTION || char == _NUMBER_SIGN) {

break;

}

- }

- int userInfoEndIndex = -1;

- int portIndex = -1;

- int authorityEndIndex = schemeEndIndex;

- // If we see '//', there must be an authority.

- if (authorityEndIndex == index &&

- authorityEndIndex + 1 < length &&

- uri.codeUnitAt(authorityEndIndex) == _SLASH &&

- uri.codeUnitAt(authorityEndIndex + 1) == _SLASH) {

- // Skip '//'.

- authorityEndIndex += 2;

- // It can both be host and userInfo.

- while (authorityEndIndex < length) {

- int codeUnit = uri.codeUnitAt(authorityEndIndex++);

- if (!isRegName(codeUnit)) {

- if (codeUnit == _LEFT_BRACKET) {

- authorityEndIndex = ipV6Address(authorityEndIndex);

- } else if (portIndex == -1 && codeUnit == _COLON) {

- // First time ':'.

- portIndex = authorityEndIndex;

- } else if (codeUnit == _AT_SIGN || codeUnit == _COLON) {

- // Second time ':' or first '@'. Must be userInfo.

- userInfoEndIndex = uri.indexOf('@', authorityEndIndex - 1);

- // Not found. Must be path then.

- if (userInfoEndIndex == -1) {

- authorityEndIndex = index;

- break;

- }

- portIndex = -1;

- authorityEndIndex = userInfoEndIndex + 1;

- // Now it can only be host:port.

- while (authorityEndIndex < length) {

- int codeUnit = uri.codeUnitAt(authorityEndIndex++);

- if (!isRegName(codeUnit)) {

- if (codeUnit == _LEFT_BRACKET) {

- authorityEndIndex = ipV6Address(authorityEndIndex);

- } else if (codeUnit == _COLON) {

- if (portIndex != -1) {

- throw new FormatException("Double port in host");

- }

- portIndex = authorityEndIndex;

- } else {

- authorityEndIndex--;

- break;

- }

+ if (char == _AT_SIGN) {

Søren Gjesse 2014/06/19 07:39:43 So you can have @ and : in the username?

Lasse Reichstein Nielsen 2014/06/19 08:44:58 You can have :, which you always could. You can't

+ lastAt = index;

+ lastColon = -1;

+ } else if (char == _COLON) {

+ lastColon = index;

+ } else if (char == _LEFT_BRACKET) {

+ lastColon = -1;

+ int endBracket = uri.indexOf(']', index + 1);

+ if (endBracket == -1) {

+ index = uri.length;

+ char = EOI;

break;

} else {

- authorityEndIndex--;

- break;

+ index = endBracket;

}

+ index++;

+ char = EOI;

+ }

+ int hostStart = authStart;

+ int hostEnd = index;

+ if (lastAt >= 0) {

+ userinfo = _makeUserInfo(uri, authStart, lastAt);

+ hostStart = lastAt + 1;

+ }

+ if (lastColon >= 0) {

+ if (lastColon + 1 == index) {

+ _fail(uri, index, "Invalid port number");

+ }

+ int portNumber = 0;

+ for (int i = lastColon + 1; i < index; i++) {

+ int digit = uri.codeUnitAt(i);

+ if (_ZERO > digit || _NINE < digit) {

+ _fail(uri, i, "Invalid port number");

+ }

+ portNumber = portNumber * 10 + (digit - _ZERO);

+ }

+ port = _makePort(portNumber, scheme);

+ hostEnd = lastColon;

+ }

+ host = _makeHost(uri, hostStart, hostEnd);

+ if (index < uri.length) {

+ char = uri.codeUnitAt(index);

}

- } else {

- authorityEndIndex = schemeEndIndex;

}

- // At path now.

- int pathEndIndex = authorityEndIndex;

- while (pathEndIndex < length) {

- int codeUnit = uri.codeUnitAt(pathEndIndex++);

- if (codeUnit == _QUESTION || codeUnit == _NUMBER_SIGN) {

- pathEndIndex--;

+ // When reaching authority parsing, authority is possible.

+ // This is only true at start or right after scheme.

+ const int STATE_AUTH_OPT = 1;

+ // When reaching path parsing, the current character is part

+ // of the path.

+ const int STATE_PATH = 2;

+ // When reaching path parsing, the current character is known to not

+ // be part of the path.

+ const int STATE_PATH_END = 3;

+ // Current state.

+ // Initialized to the default value that is used when exiting the

+ // scheme loop by reaching the end of input.

+ // All other breaks set their own state.

+ int state = STATE_PATH_END;

Søren Gjesse 2014/06/19 07:39:43 It is somewhat confusing that this state value doe

Lasse Reichstein Nielsen 2014/06/19 08:44:58 I only set the state on a break. This is the state

+ while (index < uri.length) {

+ char = uri.codeUnitAt(index);

+ if (char == _QUESTION || char == _NUMBER_SIGN) {

+ state = STATE_PATH_END;

break;

}

+ if (char == _SLASH) {

+ if (index == 0) {

Søren Gjesse 2014/06/19 07:39:43 Conditional expression?

Lasse Reichstein Nielsen 2014/06/19 08:44:58 Ok. I don't really like them, but I guess it's ok

+ state = STATE_AUTH_OPT;

+ } else {

+ state = STATE_PATH;

+ }

+ break;

+ }

+ if (char == _COLON) {

+ if (index == 0) _fail(uri, 0, "Invalid empty scheme");

+ scheme = _makeScheme(uri, index);

+ index++;

+ pathStart = index;

+ if (index == uri.length) {

+ char = EOI;

+ state = STATE_PATH_END;

+ } else {

+ char = uri.codeUnitAt(index);

+ if (char == _QUESTION || char == _NUMBER_SIGN) {

+ state = STATE_PATH_END;

+ } else if (char == _SLASH) {

+ state = STATE_AUTH_OPT;

+ } else {

+ state = STATE_PATH;

+ }

+ break;

+ }

+ index++;

+ char = EOI;

+ }

+ if (state == STATE_AUTH_OPT) {

+ state = STATE_PATH; // Default value when leaving authority.

+ // Have seen one slash either at start or right after scheme.

+ // If two slashes, it's an authority, otherwise it's just the path.

+ assert(char == _SLASH);

+ index++;

+ if (index == uri.length) {

+ char = EOI;

+ state = STATE_PATH_END;

+ } else {

+ char = uri.codeUnitAt(index);

+ if (char == _QUESTION || char == _NUMBER_SIGN) {

+ state = STATE_PATH_END;

+ } else if (char == _SLASH) {

+ index++;

+ parseAuth();

+ pathStart = index;

+ if (char == _QUESTION || char == _NUMBER_SIGN || char == EOI) {

+ state = STATE_PATH_END;

+ }

}

- // Maybe query.

- int queryEndIndex = pathEndIndex;

- if (queryEndIndex < length && uri.codeUnitAt(queryEndIndex) == _QUESTION) {

- while (queryEndIndex < length) {

- int codeUnit = uri.codeUnitAt(queryEndIndex++);

- if (codeUnit == _NUMBER_SIGN) {

- queryEndIndex--;

+ if (state == STATE_PATH) {

+ // Characters from pathStart to index (inclusive) are known

+ // to be part of the path.

+ while (++index < uri.length) {

+ char = uri.codeUnitAt(index);

+ if (char == _QUESTION || char == _NUMBER_SIGN) {

+ state = STATE_PATH_END;

break;

}

+ char = EOI;

}

+ assert(state == STATE_PATH_END);

+ bool ensureLeadingSlash = (host != "" || scheme == "file");

+ path = _makePath(uri, pathStart, index, null, ensureLeadingSlash);

- var scheme = null;

- if (schemeEndIndex > 0) {

- scheme = uri.substring(0, schemeEndIndex - 1);

- }

- var host = "";

- var userInfo = "";

- var port = 0;

- if (schemeEndIndex != authorityEndIndex) {

- int startIndex = schemeEndIndex + 2;

- if (userInfoEndIndex > 0) {

- userInfo = uri.substring(startIndex, userInfoEndIndex);

- startIndex = userInfoEndIndex + 1;

- }

- if (portIndex > 0) {

- var portStr = uri.substring(portIndex, authorityEndIndex);

- try {

- port = int.parse(portStr);

- } catch (_) {

- throw new FormatException("Invalid port: '$portStr'");

- }

- host = uri.substring(startIndex, portIndex - 1);

+ if (char == _QUESTION) {

+ int numberSignIndex = uri.indexOf('#', index + 1);

+ if (numberSignIndex < 0) {

+ query = _makeQuery(uri, index + 1, uri.length, null);

} else {

- host = uri.substring(startIndex, authorityEndIndex);

+ query = _makeQuery(uri, index + 1, numberSignIndex, null);

+ fragment = _makeFragment(uri, numberSignIndex + 1, uri.length);

}

- }

+ } else if (char == _NUMBER_SIGN) {

+ fragment = _makeFragment(uri, index + 1, uri.length);

+ }

+ return new Uri._internal(scheme,

+ userinfo,

+ host,

+ port,

+ path,

+ query,

+ fragment);

+ }

- var path = uri.substring(authorityEndIndex, pathEndIndex);

- var query = "";

- if (pathEndIndex < queryEndIndex) {

- query = uri.substring(pathEndIndex + 1, queryEndIndex);

- }

- var fragment = "";

- // If queryEndIndex is not at end (length), there is a fragment.

- if (queryEndIndex < length) {

- fragment = uri.substring(queryEndIndex + 1, length);

+ // Report a parse failure.

+ static void _fail(String uri, int index, String message) {

+ // TODO(lrn): Consider adding this to FormatException.

+ if (index == uri.length) {

+ message += " at end of input.";

+ } else {

+ message += " at position $index.\n";

+ // Pick a slice of uri containing index and, if

+ // necessary, truncate the ends to ensure the entire

+ // slice fits on one line.

+ int min = 0;

+ int max = uri.length;

+ String pre = "";

+ String post = "";

+ if (uri.length > 78) {

+ min = index - 10;

+ if (min < 0) min = 0;

+ int max = min + 72;

+ if (max > uri.length) {

+ max = uri.length;

+ min = max - 72;

+ }

+ if (min != 0) pre = "...";

+ if (max != uri.length) post = "...";

+ }

+ // Combine message, slice and a caret pointing to the error index.

+ message = "$message$pre${uri.substring(min, max)}$post\n"

+ "${' ' * (pre.length + index - min)}^";

}

- return new Uri(scheme: scheme,

- userInfo: userInfo,

- host: host,

- port: port,

- path: path,

- query: query,

- fragment: fragment);

+ throw new FormatException(message);

}

+ /// Internal non-verifying constructor. Only call with validated arguments.

+ Uri._internal(this.scheme,

+ this.userInfo,

+ this._host,

+ this._port,

+ this._path,

+ this.query,

+ this.fragment);

/**

* Creates a new URI from its components.

@@ -386,29 +448,27 @@ class Uri {

* The fragment component is set through [fragment].

- Uri({String scheme,

- this.userInfo: "",

+ factory Uri({String scheme,

+ String userInfo: "",

String host: "",

port: 0,

String path,

Iterable<String> pathSegments,

String query,

Map<String, String> queryParameters,

- fragment: ""}) :

- scheme = _makeScheme(scheme),

- _host = _makeHost(host),

- query = _makeQuery(query, queryParameters),

- fragment = _makeFragment(fragment) {

- // Perform scheme specific normalization.

- if (scheme == "http" && port == 80) {

- _port = 0;

- } else if (scheme == "https" && port == 443) {

- _port = 0;

- } else {

- _port = port;

- }

- // Fill the path.

- _path = _makePath(path, pathSegments);

+ fragment: ""}) {

+ scheme = _makeScheme(scheme, _stringOrNullLength(scheme));

+ userInfo = _makeUserInfo(userInfo, 0, _stringOrNullLength(userInfo));

+ host = _makeHost(host, 0, _stringOrNullLength(host));

+ query = _makeQuery(query, 0, _stringOrNullLength(query), queryParameters);

+ fragment = _makeFragment(fragment, 0, _stringOrNullLength(fragment));

+ port = _makePort(port, scheme);

+ bool ensureLeadingSlash = (host != "" || scheme == "file");

+ path = _makePath(path, 0, _stringOrNullLength(path), pathSegments,

+ ensureLeadingSlash);

+ return new Uri._internal(scheme, userInfo, host, port,

+ path, query, fragment);

}

/**

@@ -489,7 +549,7 @@ class Uri {

if (hostEnd == authority.length) {

throw new FormatException("Invalid IPv6 host entry.");

}

- parseIPv6Address(authority.substring(hostStart + 1, hostEnd));

+ parseIPv6Address(authority, hostStart + 1, hostEnd);

hostEnd++; // Skip the closing bracket.

if (hostEnd != authority.length &&

authority.codeUnitAt(hostEnd) != _COLON) {

@@ -765,75 +825,177 @@ class Uri {

return _queryParameters;

}

- static String _makeHost(String host) {

- if (host == null || host.isEmpty) return host;

- if (host.codeUnitAt(0) == _LEFT_BRACKET) {

- if (host.codeUnitAt(host.length - 1) != _RIGHT_BRACKET) {

- throw new FormatException('Missing end `]` to match `[` in host');

+ static int _makePort(int port, String scheme) {

+ // Perform scheme specific normalization.

+ if (port == 80 && scheme == "http") {

+ return 0;

+ }

+ if (port == 443 && scheme == "https") {

+ return 0;

+ }

+ return port;

+ }

+ static String _makeHost(String host, int start, int end) {

+ if (host == null) return null;

+ if (start == end) return "";

+ // Host is an IPv6 address if it starts with '[' or contains a colon.

+ if (host.codeUnitAt(start) == _LEFT_BRACKET) {

+ if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) {

+ _fail(host, start, 'Missing end `]` to match `[` in host');

}

- parseIPv6Address(host.substring(1, host.length - 1));

- return host;

+ parseIPv6Address(host, start + 1, end - 1);

+ return host.substring(start, end);

}

- for (int i = 0; i < host.length; i++) {

+ // TODO(lrn): skip if too short to be a valid IPv6 address.

+ for (int i = start; i < end; i++) {

if (host.codeUnitAt(i) == _COLON) {

- parseIPv6Address(host);

+ parseIPv6Address(host, start, end);

return '[$host]';

}

- return host;

+ return _normalizeRegName(host, start, end);

+ }

+ static bool _isRegNameChar(int char) {

+ return char < 127 && (_regNameTable[char >> 4] & (1 << (char & 0xf))) != 0;

}

- static String _makeScheme(String scheme) {

- bool isSchemeLowerCharacter(int ch) {

- return ch < 128 &&

- ((_schemeLowerTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

+ /**

+ * Validates and does case- and percent-encoding normalization.

+ *

+ * The [host] must be an RFC3986 "reg-name". It is converted

+ * to lower case, and percent escapes are converted to either

+ * lower case unreserved characters or upper case escapes.

+ */

+ static String _normalizeRegName(String host, int start, int end) {

+ StringBuffer buffer;

+ int sectionStart = start;

+ int index = start;

+ // Whether all characters between sectionStart and index are normalized,

+ bool isNormalized = true;

+ while (index < end) {

+ int char = host.codeUnitAt(index);

+ if (char == _PERCENT) {

+ // The _regNameTable contains "%", so we check that first.

+ String replacement = _normalizeEscape(host, index, true);

+ if (replacement == null && isNormalized) {

+ index += 3;

+ continue;

+ }

+ if (buffer == null) buffer = new StringBuffer();

+ String slice = host.substring(sectionStart, index);

+ if (!isNormalized) slice = slice.toLowerCase();

+ buffer.write(slice);

+ int sourceLength = 3;

+ if (replacement == null) {

+ replacement = host.substring(index, index + 3);

+ } else if (replacement == "%") {

+ replacement = "%25";

+ sourceLength = 1;

+ }

+ buffer.write(replacement);

+ index += sourceLength;

+ sectionStart = index;

+ isNormalized = true;

+ } else if (_isRegNameChar(char)) {

+ if (isNormalized && _UPPER_CASE_A <= char && _UPPER_CASE_Z >= char) {

+ // Put initial slice in buffer and continue in non-normalized mode

+ if (buffer == null) buffer = new StringBuffer();

+ if (sectionStart < index) {

+ buffer.write(host.substring(sectionStart, index));

+ sectionStart = index;

+ }

+ isNormalized = false;

+ }

+ index++;

+ } else if (_isGeneralDelimiter(char)) {

+ _fail(host, index, "Invalid character");

+ } else {

+ int sourceLength = 1;

+ if ((char & 0xFC00) == 0xD800 && (index + 1) < end) {

+ int tail = host.codeUnitAt(index + 1);

+ if ((tail & 0xFC00) == 0xDC00) {

+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);

+ sourceLength = 2;

+ }

+ if (buffer == null) buffer = new StringBuffer();

+ String slice = host.substring(sectionStart, index);

+ if (!isNormalized) slice = slice.toLowerCase();

+ buffer.write(slice);

+ buffer.write(_escapeChar(char));

+ index += sourceLength;

+ sectionStart = index;

+ }

+ if (buffer == null) return host.substring(start, end);

+ if (sectionStart < end) {

+ String slice = host.substring(sectionStart, end);

+ if (!isNormalized) slice = slice.toLowerCase();

+ buffer.write(slice);

}

+ return buffer.toString();

+ }

- if (scheme == null) return "";

- bool allLowercase = true;

- int length = scheme.length;

- for (int i = 0; i < length; i++) {

+ /**

+ * Validates scheme characters and does case-normalization.

+ *

+ * Schemes are converted to lower case. They cannot contain escapes.

+ */

+ static String _makeScheme(String scheme, int end) {

+ if (end == 0) return "";

+ int char = scheme.codeUnitAt(0);

+ if (!_isAlphabeticCharacter(char)) {

+ _fail(scheme, 0, "Scheme not starting with alphabetic character");

+ }

+ bool allLowercase = char >= _LOWER_CASE_A;

+ for (int i = 0; i < end; i++) {

int codeUnit = scheme.codeUnitAt(i);

- if (i == 0 && !_isAlphabeticCharacter(codeUnit)) {

- // First code unit must be an alphabetic character.

- throw new ArgumentError('Illegal scheme: $scheme');

+ if (!_isSchemeCharacter(codeUnit)) {

+ _fail(scheme, i, "Illegal scheme character");

}

- if (!isSchemeLowerCharacter(codeUnit)) {

- if (_isSchemeCharacter(codeUnit)) {

- allLowercase = false;

- } else {

- throw new ArgumentError('Illegal scheme: $scheme');

- }

+ if (_LOWER_CASE_A <= char && _LOWER_CASE_Z >= char) {

+ allLowercase = false;

}

+ scheme = scheme.substring(0, end);

+ if (!allLowercase) scheme = scheme.toLowerCase();

+ return scheme;

+ }

- return allLowercase ? scheme : scheme.toLowerCase();

+ static String _makeUserInfo(String userInfo, int start, int end) {

+ if (userInfo == null) return "null";

+ return _normalize(userInfo, start, end, _userinfoTable);

}

- String _makePath(String path, Iterable<String> pathSegments) {

+ static String _makePath(String path, int start, int end,

+ Iterable<String> pathSegments,

+ bool ensureLeadingSlash) {

if (path == null && pathSegments == null) return "";

if (path != null && pathSegments != null) {

throw new ArgumentError('Both path and pathSegments specified');

}

var result;

if (path != null) {

- result = _normalize(path);

+ result = _normalize(path, start, end, _pathCharOrSlashTable);

} else {

result = pathSegments.map((s) => _uriEncode(_pathCharTable, s)).join("/");

}

- if ((hasAuthority || (scheme == "file")) &&

- result.isNotEmpty && !result.startsWith("/")) {

+ if (ensureLeadingSlash && result.isNotEmpty && !result.startsWith("/")) {

return "/$result";

}

return result;

}

- static String _makeQuery(String query, Map<String, String> queryParameters) {

+ static String _makeQuery(String query, int start, int end,

+ Map<String, String> queryParameters) {

if (query == null && queryParameters == null) return "";

if (query != null && queryParameters != null) {

throw new ArgumentError('Both query and queryParameters specified');

}

- if (query != null) return _normalize(query);

+ if (query != null) return _normalize(query, start, end, _queryCharTable);

var result = new StringBuffer();

var first = true;

@@ -851,123 +1013,183 @@ class Uri {

return result.toString();

}

- static String _makeFragment(String fragment) {

+ static String _makeFragment(String fragment, int start, int end) {

if (fragment == null) return "";

- return _normalize(fragment);

+ return _normalize(fragment, start, end, _queryCharTable);

}

- static String _normalize(String component) {

- int index = component.indexOf('%');

- if (index < 0) return component;

- bool isNormalizedHexDigit(int digit) {

- return (_ZERO <= digit && digit <= _NINE) ||

- (_UPPER_CASE_A <= digit && digit <= _UPPER_CASE_F);

- }

+ static int _stringOrNullLength(String s) => (s == null) ? 0 : s.length;

- bool isLowerCaseHexDigit(int digit) {

- return _LOWER_CASE_A <= digit && digit <= _LOWER_CASE_F;

- }

+ static bool _isHexDigit(int char) {

+ if (_NINE >= char) return _ZERO <= char;

+ char |= 0x20;

+ return _LOWER_CASE_A <= char && _LOWER_CASE_F >= char;

+ }

- bool isUnreserved(int ch) {

- return ch < 128 &&

- ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

- }

+ static int _hexValue(int char) {

+ assert(_isHexDigit(char));

+ if (_NINE >= char) return char - _ZERO;

+ char |= 0x20;

+ return char - (_LOWER_CASE_A - 10);

+ }

- int normalizeHexDigit(int index) {

- var codeUnit = component.codeUnitAt(index);

- if (isLowerCaseHexDigit(codeUnit)) {

- return codeUnit - 0x20;

- } else if (!isNormalizedHexDigit(codeUnit)) {

- throw new ArgumentError("Invalid URI component: $component");

- } else {

- return codeUnit;

+ /**

+ * Performs RFC 3986 Percent-Encoding Normalization.

+ *

+ * Returns a replacement string that should be replace the original escape.

+ * Returns null if no replacement is necessary because the escape is

+ * not for an unreserved character and is already non-lower-case.

+ *

+ * Returns "%" if the escape is invalid (not two valid hex digits following

+ * the percent sign). The calling code should replace the percent

+ * sign with "%25", but leave the following two characters unmodified.

+ *

+ * If [lowerCase] is true, a single character returned is always lower case,

+ */

+ static String _normalizeEscape(String source, int index, bool lowerCase) {

+ assert(source.codeUnitAt(index) == _PERCENT);

+ if (index + 2 >= source.length) {

+ return "%"; // Marks the escape as invalid.

+ }

+ int firstDigit = source.codeUnitAt(index + 1);

+ int secondDigit = source.codeUnitAt(index + 2);

+ if (!_isHexDigit(firstDigit) || !_isHexDigit(secondDigit)) {

+ return "%"; // Marks the escape as invalid.

+ }

+ int value = _hexValue(firstDigit) * 16 + _hexValue(secondDigit);

+ if (_isUnreservedChar(value)) {

+ if (lowerCase && _UPPER_CASE_A <= value && _UPPER_CASE_Z >= value) {

+ value |= 0x20;

}

+ return new String.fromCharCode(value);

}

- int decodeHexDigitPair(int index) {

- int byte = 0;

- for (int i = 0; i < 2; i++) {

- var codeUnit = component.codeUnitAt(index + i);

- if (_ZERO <= codeUnit && codeUnit <= _NINE) {

- byte = byte * 16 + codeUnit - _ZERO;

- } else {

- // Check ranges A-F (0x41-0x46) and a-f (0x61-0x66).

- codeUnit |= 0x20;

- if (_LOWER_CASE_A <= codeUnit &&

- codeUnit <= _LOWER_CASE_F) {

- byte = byte * 16 + codeUnit - _LOWER_CASE_A + 10;

- } else {

- throw new ArgumentError(

- "Invalid percent-encoding in URI component: $component");

- }

- return byte;

+ if (firstDigit >= _LOWER_CASE_A || secondDigit >= _LOWER_CASE_A) {

+ // Either digit is lower case.

+ return source.substring(index, index + 3).toUpperCase();

}

+ // Escape is retained, and is already non-lower case, so return null to

+ // represent "no replacement necessary".

+ return null;

+ }

- // Start building the normalized component string.

- StringBuffer result;

- int length = component.length;

- int prevIndex = 0;

+ static bool _isUnreservedChar(int ch) {

+ return ch < 127 &&

+ ((_unreservedTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

+ }

- // Copy a part of the component string to the result.

- void fillResult() {

- if (result == null) {

- assert(prevIndex == 0);

- result = new StringBuffer(component.substring(prevIndex, index));

- } else {

- result.write(component.substring(prevIndex, index));

+ static String _escapeChar(char) {

+ const hexDigits = "0123456789ABCDEF";

+ List codeUnits;

+ if (char < 0x80) {

+ // ASCII, a single percent encoded sequence.

+ codeUnits = new List(3);

+ codeUnits[0] = _PERCENT;

+ codeUnits[1] = hexDigits.codeUnitAt(char >> 4);

+ codeUnits[2] = hexDigits.codeUnitAt(char & 0xf);

+ } else {

+ // Do UTF-8 encoding of character, then percent encode bytes.

+ int flag = 0xc0; // The high-bit markers on the first byte of UTF-8.

+ int encodedBytes = 2;

+ if (char > 0x7ff) {

+ flag = 0xe0;

+ encodedBytes = 3;

+ if (char > 0xffff) {

Søren Gjesse 2014/06/19 07:39:43 Are we sure that we cannot go beyond 4-byte encodi

Lasse Reichstein Nielsen 2014/06/19 08:44:58 Yes. We get at most 21 bits from a surrogate pair.

+ encodedBytes = 4;

+ flag = 0xf0;

+ }

}

- }

- while (index < length) {

- // Normalize percent-encoding to uppercase and don't encode

- // unreserved characters.

- assert(component.codeUnitAt(index) == _PERCENT);

- if (length < index + 2) {

- throw new ArgumentError(

- "Invalid percent-encoding in URI component: $component");

+ codeUnits = new List(3 * encodedBytes);

+ int index = 0;

+ while (--encodedBytes >= 0) {

+ int byte = ((char >> (6 * encodedBytes)) & 0x3f) | flag;

+ codeUnits[index] = _PERCENT;

+ codeUnits[index + 1] = hexDigits.codeUnitAt(byte >> 4);

+ codeUnits[index + 2] = hexDigits.codeUnitAt(byte & 0xf);

+ index += 3;

+ flag = 0x80; // Following bytes have only high bit set.

}

+ }

+ return new String.fromCharCodes(codeUnits);

+ }

- var codeUnit1 = component.codeUnitAt(index + 1);

- var codeUnit2 = component.codeUnitAt(index + 2);

- var decodedCodeUnit = decodeHexDigitPair(index + 1);

- if (isNormalizedHexDigit(codeUnit1) &&

- isNormalizedHexDigit(codeUnit2) &&

- !isUnreserved(decodedCodeUnit)) {

- index += 3;

+ /**

+ * Runs through component checking that each character is valid and

+ * normalize percent escapes.

+ *

+ * Uses [charTable] to check if a non-`%` character is allowed.

+ * Each `%` character must be followed by two hex digits.

+ * If the hex-digits are lower case letters, they are converted to

+ * upper case.

+ */

+ static String _normalize(String component, int start, int end,

+ List<int> charTable) {

+ StringBuffer buffer;

+ int sectionStart = start;

+ int index = start;

+ // Loop while characters are valid and escapes correct and upper-case.

+ while (index < end) {

+ int char = component.codeUnitAt(index);

+ if (char < 127 && (charTable[char >> 4] & (1 << (char & 0x0f))) != 0) {

+ index++;

} else {

- fillResult();

- if (isUnreserved(decodedCodeUnit)) {

- result.writeCharCode(decodedCodeUnit);

+ String replacement;

+ int sourceLength;

+ if (char == _PERCENT) {

+ replacement = _normalizeEscape(component, index, false);

+ // Returns null if we should keep the existing escape.

+ if (replacement == null) {

+ index += 3;

+ continue;

+ }

+ // Returns "%" if we should escape the existing percent.

+ if ("%" == replacement) {

+ replacement = "%25";

+ sourceLength = 1;

+ } else {

+ sourceLength = 3;

+ }

+ } else if (_isGeneralDelimiter(char)) {

+ _fail(component, index, "Invalid character");

} else {

- result.write("%");

- result.writeCharCode(normalizeHexDigit(index + 1));

- result.writeCharCode(normalizeHexDigit(index + 2));

+ sourceLength = 1;

+ if ((char & 0xFC00) == 0xD800) {

+ // Possible lead surrogate.

+ if (index + 1 < end) {

+ int tail = component.codeUnitAt(index + 1);

+ if ((tail & 0xFC00) == 0xDC00) {

+ // Tail surrogat.

+ sourceLength = 2;

+ char = 0x10000 | ((char & 0x3ff) << 10) | (tail & 0x3ff);

+ }

+ replacement = _escapeChar(char);

}

- index += 3;

- prevIndex = index;

- }

- int next = component.indexOf('%', index);

- if (next >= index) {

- index = next;

- } else {

- index = length;

+ if (buffer == null) buffer = new StringBuffer();

+ buffer.write(component.substring(sectionStart, index));

+ buffer.write(replacement);

+ index += sourceLength;

+ sectionStart = index;

}

- if (result == null) return component;

- if (result != null && prevIndex != index) fillResult();

- assert(index == length);

- return result.toString();

+ if (buffer == null) {

+ // Makes no copy if start == 0 and end == component.length.

+ return component.substring(start, end);

+ }

+ if (sectionStart < end) {

+ buffer.write(component.substring(sectionStart, end));

+ }

+ return buffer.toString();

}

static bool _isSchemeCharacter(int ch) {

return ch < 128 && ((_schemeTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

}

+ static bool _isGeneralDelimiter(int ch) {

+ return ch <= 64 &&

+ ((_genDelimitersTable[ch >> 4] & (1 << (ch & 0x0f))) != 0);

+ }

/**

* Returns whether the URI is absolute.

@@ -1465,6 +1687,9 @@ class Uri {

* Throws a [FormatException] if [host] is not a valid IPv6 address

* representation.

+ * Acts on the substring from [start] to [end]. If [end] is omitted, it

+ * defaults ot the end of the string.

+ *

* Some examples of IPv6 addresses:

* * ::1

* * FEDC:BA98:7654:3210:FEDC:BA98:7654:3210

@@ -1472,7 +1697,8 @@ class Uri {

* * ::FFFF:129.144.52.38

* * 2010:836B:4179::836B:4179

- static List<int> parseIPv6Address(String host) {

+ static List<int> parseIPv6Address(String host, [int start = 0, int end]) {

+ if (end == null) end = host.length;

// An IPv6 address consists of exactly 8 parts of 1-4 hex digits, seperated

// by `:`'s, with the following exceptions:

@@ -1495,11 +1721,11 @@ class Uri {

if (host.length < 2) error('address is too short');

List<int> parts = [];

bool wildcardSeen = false;

- int partStart = 0;

+ int partStart = start;

// Parse all parts, except a potential last one.

- for (int i = 0; i < host.length; i++) {

+ for (int i = start; i < end; i++) {

if (host.codeUnitAt(i) == _COLON) {

- if (i == 0) {

+ if (i == start) {

// If we see a `:` in the beginning, expect wildcard.

i++;

if (host.codeUnitAt(i) != _COLON) {

@@ -1522,18 +1748,18 @@ class Uri {

}

if (parts.length == 0) error('too few parts');

- bool atEnd = partStart == host.length;

- bool isLastWildcard = parts.last == -1;

+ bool atEnd = (partStart == end);

+ bool isLastWildcard = (parts.last == -1);

if (atEnd && !isLastWildcard) {

error('expected a part after last `:`');

}

if (!atEnd) {

try {

- parts.add(parseHex(partStart, host.length));

+ parts.add(parseHex(partStart, end));

} catch (e) {

// Failed to parse the last chunk as hex. Try IPv4.

try {

- List<int> last = parseIPv4Address(host.substring(partStart));

+ List<int> last = parseIPv4Address(host.substring(partStart, end));

parts.add(last[0] << 8 | last[1]);

parts.add(last[2] << 8 | last[3]);

} catch (e) {

@@ -1549,15 +1775,23 @@ class Uri {

error('an address without a wildcard must contain exactly 8 parts');

}

// TODO(ajohnsen): Consider using Uint8List.

- return parts

- .expand((value) {

- if (value == -1) {

- return new List.filled((9 - parts.length) * 2, 0);

- } else {

- return [(value >> 8) & 0xFF, value & 0xFF];

- }

- })

- .toList();

+ List bytes = new List<int>(16);

+ for (int i = 0, index = 0; i < parts.length; i++) {

+ int value = parts[i];

+ if (value == -1) {

+ int wildCardLength = 9 - parts.length;

+ for (int j = 0; j < wildCardLength; j++) {

+ bytes[index] = 0;

+ bytes[index + 1] = 0;

+ index += 2;

+ }

+ } else {

+ bytes[index] = value >> 8;

+ bytes[index + 1] = value & 0xff;

+ index += 2;

+ }

+ return bytes;

}

// Frequently used character codes.

@@ -1820,6 +2054,48 @@ class Uri {

// pqrstuvwxyz ~

0x47ff]; // 0x70 - 0x7f 1111111111100010

+ // General delimiter characters, RFC 3986 section 2.2.

+ // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"

+ //

+ static const _genDelimitersTable = const [

+ // LSB MSB

+ // | |

+ 0x0000, // 0x00 - 0x0f 0000000000000000

+ 0x0000, // 0x10 - 0x1f 0000000000000000

+ // # /

+ 0x8008, // 0x20 - 0x2f 0001000000000001

+ // : [ ]?

+ 0xe400, // 0x30 - 0x3f 0000000000101011

+ // @

+ 0x0001, // 0x40 - 0x4f 1000000000000000

+ //

+ 0x0000, // 0x50 - 0x5f 0000000000000000

+ //

+ 0x0000, // 0x60 - 0x6f 0000000000000000

+ //

+ 0x0000]; // 0x70 - 0x7f 0000000000000000

+ // Characters allowed in the userinfo as of RFC 3986.

+ // RFC 3986 Apendix A

+ // userinfo = *( unreserved / pct-encoded / sub-delims / ':')

+ static const _userinfoTable = const [

+ // LSB MSB

+ // | |

+ 0x0000, // 0x00 - 0x0f 0000000000000000

+ 0x0000, // 0x10 - 0x1f 0000000000000000

+ // ! $ &'()*+,-.

+ 0x7fd2, // 0x20 - 0x2f 0100101111111110

+ // 0123456789:; =

+ 0x2fff, // 0x30 - 0x3f 1111111111110100

+ // ABCDEFGHIJKLMNO

+ 0xfffe, // 0x40 - 0x4f 0111111111111111

+ // PQRSTUVWXYZ _

+ 0x87ff, // 0x50 - 0x5f 1111111111100001

+ // abcdefghijklmno

+ 0xfffe, // 0x60 - 0x6f 0111111111111111

+ // pqrstuvwxyz ~

+ 0x47ff]; // 0x70 - 0x7f 1111111111100010

// Characters allowed in the reg-name as of RFC 3986.

// RFC 3986 Apendix A

// reg-name = *( unreserved / pct-encoded / sub-delims )

@@ -1862,6 +2138,27 @@ class Uri {

// pqrstuvwxyz ~

0x47ff]; // 0x70 - 0x7f 1111111111100010

+ // Characters allowed in the path as of RFC 3986.

+ // RFC 3986 section 3.3 *and* slash.

+ static const _pathCharOrSlashTable = const [

+ // LSB MSB

+ // | |

+ 0x0000, // 0x00 - 0x0f 0000000000000000

+ 0x0000, // 0x10 - 0x1f 0000000000000000

+ // ! $ &'()*+,-./

+ 0xffd2, // 0x20 - 0x2f 0100101111111111

+ // 0123456789:; =

+ 0x2fff, // 0x30 - 0x3f 1111111111110100

+ // @ABCDEFGHIJKLMNO

+ 0xffff, // 0x40 - 0x4f 1111111111111111

+ // PQRSTUVWXYZ _

+ 0x87ff, // 0x50 - 0x5f 1111111111100001

+ // abcdefghijklmno

+ 0xfffe, // 0x60 - 0x6f 0111111111111111

+ // pqrstuvwxyz ~

+ 0x47ff]; // 0x70 - 0x7f 1111111111100010

// Characters allowed in the query as of RFC 3986.

// RFC 3986 section 3.4.

// query = *( pchar / "/" / "?" )

« no previous file with comments | « pkg/json_rpc_2/test/server/parameters_test.dart ('k') | sdk/lib/io/http_impl.dart » ('j') | tests/corelib/uri_test.dart » ('J')