Index: runtime/vm/uri.cc |
diff --git a/runtime/vm/uri.cc b/runtime/vm/uri.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..a9d8d733fabdfabda9dbd838850b873a2b57dc0c |
--- /dev/null |
+++ b/runtime/vm/uri.cc |
@@ -0,0 +1,528 @@ |
+// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
+// for details. All rights reserved. Use of this source code is governed by a |
+// BSD-style license that can be found in the LICENSE file. |
+ |
+#include "vm/uri.h" |
+ |
+#include "vm/zone.h" |
+ |
+namespace dart { |
+ |
+// Lower-case a string in place. |
+static void StringLower(char* str) { |
+ for (int i = 0; str[i] != '\0'; i++) { |
+ char c = str[i]; |
Florian Schneider
2016/05/26 14:09:45
Why not use libc tolower?
turnidge
2016/05/31 18:25:27
I only wanted ASCII characters to be modified -- I
Florian Schneider
2016/06/01 07:13:33
Ok, it may be worth adding a short comment.
turnidge
2016/06/01 20:00:40
Done.
|
+ if (c >= 'A' && c <= 'Z') { |
+ str[i] = c + ('a' - 'A'); |
+ } |
+ } |
+} |
+ |
+ |
+static bool IsUnreservedChar(intptr_t value) { |
+ return ((value >= 'a' && value <= 'z') || |
+ (value >= 'A' && value <= 'Z') || |
+ (value >= '0' && value <= '9') || |
+ value == '-' || |
+ value == '.' || |
+ value == '_' || |
+ value == '~'); |
+} |
+ |
+ |
+static bool IsDelimiter(intptr_t value) { |
+ switch (value) { |
+ case ':': case '/': case '?': case '#': |
+ case '[': case ']': case '@': case '!': |
+ case '$': case '&': case '\'': case '(': |
+ case ')': case '*': case '+': case ',': |
+ case ';': case '=': |
+ return true; |
+ default: |
+ return false; |
+ } |
+} |
+ |
+ |
+static bool IsHexDigit(char value) { |
+ return ((value >- '0' && value <= '9') || |
Florian Schneider
2016/06/01 07:13:33
-'0' does not seem right.
s/>-/>=/
turnidge
2016/06/01 20:00:40
Nice catch. Thanks. Fixed.
|
+ (value >= 'A' && value <= 'F') || |
+ (value >= 'a' && value <= 'f')); |
+} |
+ |
+ |
+static int HexValue(char digit) { |
+ if ((digit >= '0' && digit <= '9')) { |
+ return digit - '0'; |
+ } |
+ if ((digit >= 'A' && digit <= 'F')) { |
+ return digit - 'A' + 10; |
+ } |
+ if ((digit >= 'a' && digit <= 'f')) { |
+ return digit - 'a' + 10; |
+ } |
+ UNREACHABLE(); |
+ return 0; |
+} |
+ |
+ |
+static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) { |
+ if (pos + 2 >= len) { |
+ // Not enough room for a valid escape sequence. |
+ return -1; |
+ } |
+ if (str[pos] != '%') { |
+ // Escape sequences start with '%'. |
+ return -1; |
+ } |
+ |
+ char digit1 = str[pos + 1]; |
+ char digit2 = str[pos + 2]; |
+ if (!IsHexDigit(digit1) || !IsHexDigit(digit2)) { |
+ // Invalid escape sequence. Ignore it. |
ahe
2016/05/31 21:44:42
What does the RFC say about this situation?
turnidge
2016/06/01 20:00:40
I couldn't exactly figure it out, but my best gues
|
+ return -1; |
+ } |
+ return HexValue(digit1) * 16 + HexValue(digit2); |
+} |
+ |
+ |
+static char* NormalizeEscapes(const char* str, intptr_t len) { |
+ // Allocate the buffer. |
+ Zone* zone = Thread::Current()->zone(); |
+ char* buffer = zone->Alloc<char>(len * 3 + 1); // +1 for '\0' |
Cutch
2016/05/24 23:53:46
nit: (len * 3) + 1
maybe a comment about the * 3
turnidge
2016/05/31 18:25:27
Done.
|
+ |
+ // Copy the string, normalizing as we go. |
+ intptr_t buffer_pos = 0; |
+ intptr_t pos = 0; |
+ while (pos < len) { |
+ int escaped_value = GetEscapedValue(str, pos, len); |
ahe
2016/05/25 12:44:52
I don't understand this code. As far as I can tell
turnidge
2016/05/27 21:40:05
I think you probably need to look at GetEscapedVal
ahe
2016/05/31 21:44:42
This is embarrassing, I spent a lot of time lookin
|
+ if (escaped_value >= 0) { |
+ // If one of the special "unreserved" characters has been |
+ // escaped, revert the escaping. Otherwise preserve the |
+ // escaping. |
+ if (IsUnreservedChar(escaped_value)) { |
+ buffer[buffer_pos] = escaped_value; |
+ buffer_pos++; |
+ } else { |
+ OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", escaped_value); |
Cutch
2016/05/24 23:53:46
Would copying the the three characters from the or
turnidge
2016/05/27 21:40:06
By reprinting the escaped value we normalize it to
|
+ buffer_pos += 3; |
+ } |
+ pos += 3; |
+ } else { |
+ char c = str[pos]; |
+ // If a delimiter or unreserved character is currently not |
+ // escaped, preserve that. If there is a busted %-sequence in |
+ // the input, preserve that too. |
+ if (c == '%' || IsDelimiter(c) || IsUnreservedChar(c)) { |
+ buffer[buffer_pos] = c; |
+ buffer_pos++; |
+ } else { |
+ // Escape funky characters. |
+ OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", c); |
+ buffer_pos += 3; |
+ } |
+ pos++; |
+ } |
+ } |
+ buffer[buffer_pos] = '\0'; |
+ return buffer; |
+} |
+ |
+ |
+static void ClearParsedUri(ParsedUri* parsed_uri) { |
+ parsed_uri->scheme = NULL; |
+ parsed_uri->userinfo = NULL; |
+ parsed_uri->host = NULL; |
+ parsed_uri->port = NULL; |
+ parsed_uri->path = NULL; |
+ parsed_uri->query = NULL; |
+ parsed_uri->fragment = NULL; |
+} |
+ |
+ |
+static intptr_t ParseAuthority(const char* authority, ParsedUri* parsed_uri) { |
+ Zone* zone = Thread::Current()->zone(); |
+ const char* current = authority; |
+ intptr_t len = 0; |
+ |
+ size_t userinfo_len = strcspn(current, "@/"); |
+ if (current[userinfo_len] == '@') { |
+ // The '@' character follows the optional userinfo string. |
+ parsed_uri->userinfo = NormalizeEscapes(current, userinfo_len); |
+ current += userinfo_len + 1; |
+ len += userinfo_len + 1; |
+ } else { |
+ parsed_uri->userinfo = NULL; |
+ } |
+ |
+ size_t host_len = strcspn(current, ":/"); |
+ char* host = NormalizeEscapes(current, host_len); |
+ StringLower(host); |
+ parsed_uri->host = host; |
+ len += host_len; |
+ |
+ if (current[host_len] == ':') { |
+ // The ':' character precedes the optional port string. |
+ const char* port_start = current + host_len + 1; // +1 for ':' |
+ size_t port_len = strcspn(port_start, "/"); |
+ parsed_uri->port = zone->MakeCopyOfStringN(port_start, port_len); |
+ len += 1 + port_len; // +1 for ':' |
+ } else { |
+ parsed_uri->port = NULL; |
+ } |
+ return len; |
+} |
+ |
+ |
+// Performs a simple parse of a uri into its components. |
+// See RFC 3986 Section 3: Syntax. |
+bool ParseUri(const char* uri, ParsedUri* parsed_uri) { |
+ Zone* zone = Thread::Current()->zone(); |
+ |
+ // The first ':' separates the scheme from the rest of the uri. If |
+ // a ':' occurs after the first '/' it doesn't count. |
+ size_t scheme_len = strcspn(uri, ":/"); |
+ const char* rest = uri; |
+ if (uri[scheme_len] == ':') { |
+ char* scheme = zone->MakeCopyOfStringN(uri, scheme_len); |
+ StringLower(scheme); |
+ parsed_uri->scheme = scheme; |
+ rest = uri + scheme_len + 1; |
+ } else { |
+ parsed_uri->scheme = NULL; |
+ } |
+ |
+ // The first '#' separates the optional fragment |
+ const char* hash_pos = rest + strcspn(rest, "#"); |
+ if (*hash_pos == '#') { |
+ // There is a fragment part. |
+ const char* fragment_start = hash_pos + 1; |
+ parsed_uri->fragment = |
+ NormalizeEscapes(fragment_start, strlen(fragment_start)); |
+ } else { |
+ parsed_uri->fragment = NULL; |
+ } |
+ |
+ // The first '?' or '#' separates the hierarchical part from the |
+ // optional query. |
+ const char* question_pos = rest + strcspn(rest, "?#"); |
+ if (*question_pos == '?') { |
+ // There is a query part. |
+ const char* query_start = question_pos + 1; |
+ parsed_uri->query = |
+ NormalizeEscapes(query_start, (hash_pos - query_start)); |
+ } else { |
+ parsed_uri->query = NULL; |
+ } |
+ |
+ const char* path_start = rest; |
+ if (rest[0] == '/' && rest[1] == '/') { |
+ // There is an authority part. |
+ const char* authority_start = rest + 2; // 2 for '//'. |
+ |
+ intptr_t authority_len = |
+ ParseAuthority(authority_start, parsed_uri); |
+ if (authority_len < 0) { |
+ ClearParsedUri(parsed_uri); |
+ return false; |
+ } |
+ path_start = authority_start + authority_len; |
+ } else { |
+ parsed_uri->userinfo = NULL; |
+ parsed_uri->host = NULL; |
+ parsed_uri->port = NULL; |
+ } |
+ |
+ // Double slashes in the path do not parse. |
+ bool saw_slash = false; |
+ for (const char* pos = path_start; pos < question_pos; pos++) { |
+ if (*pos == '/') { |
+ if (saw_slash) { |
+ ClearParsedUri(parsed_uri); |
+ return false; |
+ } |
+ saw_slash = true; |
+ } else { |
+ saw_slash = false; |
+ } |
+ } |
+ |
+ // The path is the substring between the authority and the query. |
+ parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start)); |
+ return true; |
+} |
+ |
+ |
+static char* RemoveLastSegment(char* current, |
+ const char* base) { |
+ if (current == base) { |
+ return current; |
+ } |
+ ASSERT(current > base); |
+ for (current--; current > base; current--) { |
+ if (*current == '/') { |
+ // We have found the beginning of the last segment. |
+ return current; |
+ } |
+ } |
+ ASSERT(current == base); |
+ return current; |
+} |
+ |
+ |
+static intptr_t SegmentLength(const char* input) { |
+ const char* cp = input; |
+ |
+ // Include initial slash in the segment, if any. |
+ if (*cp == '/') { |
+ cp++; |
+ } |
+ |
+ // Don't include trailing slash in the segment. |
+ cp += strcspn(cp, "/"); |
+ return cp - input; |
+} |
+ |
+ |
+// See RFC 3986 Section 5.2.4: Remove Dot Segments. |
+static const char* RemoveDotSegments(const char* path) { |
+ const char* input = path; |
+ |
+ // The output path will always be less than or equal to the size of |
+ // the input path. |
+ Zone* zone = Thread::Current()->zone(); |
+ char* buffer = zone->Alloc<char>(strlen(path) + 1); // +1 for '\0' |
+ char* output = buffer; |
+ |
+ while (*input != '\0') { |
+ if (strncmp("../", input, 3) == 0) { |
+ // Discard initial "../" from the input. It's junk. |
+ input += 3; |
+ |
+ } else if (strncmp("./", input, 3) == 0) { |
+ // Discard initial "./" from the input. It's junk. |
+ input += 2; |
+ |
+ } else if (strncmp("/./", input, 3) == 0) { |
+ // Advance past the "/." part of the input. |
+ input += 2; |
+ |
+ } else if (strcmp("/.", input) == 0) { |
+ // Pretend the input just contains a "/". |
+ input = "/"; |
+ |
+ } else if (strncmp("/../", input, 4) == 0) { |
+ // Advance past the "/.." part of the input and remove one |
+ // segment from the output. |
+ input += 3; |
+ output = RemoveLastSegment(output, buffer); |
+ |
+ } else if (strcmp("/..", input) == 0) { |
+ // Pretend the input contains a "/" and remove one segment from |
+ // the output. |
+ input = "/"; |
+ output = RemoveLastSegment(output, buffer); |
+ |
+ } else if (strcmp("..", input) == 0) { |
+ // The input has been reduced to nothing useful. |
+ input += 2; |
+ |
+ } else if (strcmp(".", input) == 0) { |
+ // The input has been reduced to nothing useful. |
+ input += 1; |
+ |
+ } else { |
+ intptr_t segment_len = SegmentLength(input); |
+ strncpy(output, input, segment_len); |
+ output += segment_len; |
+ input += segment_len; |
+ } |
+ } |
+ *output = '\0'; |
+ return buffer; |
+} |
+ |
+ |
+// See RFC 3986 Section 5.2.3: Merge Paths. |
+static const char* MergePaths(const char* base_path, const char* ref_path) { |
+ Zone* zone = Thread::Current()->zone(); |
+ if (base_path[0] == '\0') { |
+ // If the base_path is empty, we prepend '/'. |
+ return zone->PrintToString("/%s", ref_path); |
+ } |
+ |
+ // We need to find the last '/' in base_path. |
+ char* last_slash = strrchr(base_path, '/'); |
+ if (last_slash == NULL) { |
+ // There is no slash in the base_path. Return the ref_path unchanged. |
+ return ref_path; |
+ } |
+ |
+ // We found a '/' in the base_path. Cut off everything after it and |
+ // add the ref_path. |
+ intptr_t truncated_base_len = last_slash - base_path; |
+ intptr_t ref_path_len = strlen(ref_path); |
+ intptr_t len = truncated_base_len + ref_path_len + 1; // +1 for '/' |
+ char* buffer = zone->Alloc<char>(len + 1); // +1 for '\0' |
+ |
+ // Copy truncated base. |
+ strncpy(buffer, base_path, truncated_base_len); |
+ |
+ // Add a slash. |
+ buffer[truncated_base_len] = '/'; |
+ |
+ // Copy the ref_path. |
+ strncpy((buffer + truncated_base_len + 1), ref_path, ref_path_len); |
+ |
+ // Add the trailing '\0'. |
+ buffer[len] = '\0'; |
+ |
+ return buffer; |
+} |
+ |
+ |
+static char* BuildUri(const ParsedUri& uri) { |
+ Zone* zone = Thread::Current()->zone(); |
+ ASSERT(uri.path != NULL); |
+ |
+ const char* fragment = uri.fragment == NULL ? "" : uri.fragment; |
+ const char* fragment_separator = uri.fragment == NULL ? "" : "#"; |
+ const char* query = uri.query == NULL ? "" : uri.query; |
+ const char* query_separator = uri.query == NULL ? "" : "?"; |
+ |
+ // If there is no scheme for this uri, just build a relative uri of |
+ // the form: "path[?query][#fragment]". This is sort of a |
+ // degenerate case, but it occurs when we resolve relative urls |
ahe
2016/05/31 21:44:42
I wouldn't call this a degenerate case. It's quite
turnidge
2016/06/01 20:00:40
Corrected the comment.
|
+ // inside a "dart:" library. |
+ if (uri.scheme == NULL) { |
+ ASSERT(uri.userinfo == NULL && uri.host == NULL && uri.port == NULL); |
+ ASSERT(uri.query == NULL); |
+ return zone->PrintToString("%s%s%s%s%s", |
+ uri.path, query_separator, query, |
+ fragment_separator, fragment); |
+ } |
+ |
+ // Uri with no authority: "scheme:path[?query][#fragment]" |
+ if (uri.host == NULL) { |
+ ASSERT(uri.userinfo == NULL && uri.port == NULL); |
+ return zone->PrintToString("%s:%s%s%s%s%s", |
+ uri.scheme, uri.path, query_separator, query, |
+ fragment_separator, fragment); |
+ } |
+ |
+ const char* user = uri.userinfo == NULL ? "" : uri.userinfo; |
+ const char* user_separator = uri.userinfo == NULL ? "" : "@"; |
+ const char* port = uri.port == NULL ? "" : uri.port; |
+ const char* port_separator = uri.port == NULL ? "" : ":"; |
+ |
+ // If the path doesn't start with a '/', add one. We need it to |
+ // separate the path from the authority. |
+ const char* path_separator = ((uri.path[0] == '\0' || uri.path[0] == '/') |
+ ? "" : "/"); |
+ |
+ // Uri with authority: |
+ // "scheme://[userinfo@]host[:port][/]path[?query][#fragment]" |
+ return zone->PrintToString( |
+ "%s://%s%s%s%s%s%s%s%s%s%s%s", // There is *nothing* wrong with this. |
+ uri.scheme, user, user_separator, uri.host, port_separator, port, |
+ path_separator, uri.path, query_separator, query, |
+ fragment_separator, fragment); |
+} |
+ |
+ |
+// See RFC 3986 Section 5: Reference Resolution |
+bool ResolveUri(const char* ref_uri, |
+ const char* base_uri, |
+ const char** target_uri) { |
+ // Parse the reference uri. |
+ ParsedUri ref; |
+ if (!ParseUri(ref_uri, &ref)) { |
+ *target_uri = NULL; |
+ return false; |
+ } |
+ |
+ ParsedUri target; |
+ if (ref.scheme != NULL) { |
+ if (strcmp(ref.scheme, "dart") == 0) { |
+ Zone* zone = Thread::Current()->zone(); |
+ *target_uri = zone->MakeCopyOfString(ref_uri); |
+ return true; |
+ } |
+ |
+ // When the ref_uri specifies a scheme, the base_uri is ignored. |
+ target.scheme = ref.scheme; |
+ target.userinfo = ref.userinfo; |
+ target.host = ref.host; |
+ target.port = ref.port; |
+ target.path = RemoveDotSegments(ref.path); |
+ target.query = ref.query; |
+ target.fragment = ref.fragment; |
+ *target_uri = BuildUri(target); |
+ return true; |
+ } |
+ |
+ // Parse the base uri. |
+ ParsedUri base; |
+ if (!ParseUri(base_uri, &base)) { |
+ *target_uri = NULL; |
+ return false; |
+ } |
+ |
+ if (base.scheme != NULL && strcmp(base.scheme, "dart") == 0) { |
Cutch
2016/05/24 23:53:46
nits about parenthesis:
(base.scheme != NULL) &&
turnidge
2016/05/27 21:40:06
Done.
ahe
2016/05/31 21:44:42
This is odd. Why is there a special case for the d
turnidge
2016/06/01 20:00:40
Discussed offline. Sometimes we resolve a relativ
|
+ Zone* zone = Thread::Current()->zone(); |
+ *target_uri = zone->MakeCopyOfString(ref_uri); |
+ return true; |
+ } |
+ |
+ if (ref.host != NULL) { |
+ // When the ref_uri specifies an authority, we only use the base scheme. |
+ target.scheme = base.scheme; |
+ target.userinfo = ref.userinfo; |
+ target.host = ref.host; |
+ target.port = ref.port; |
+ target.path = RemoveDotSegments(ref.path); |
+ target.query = ref.query; |
+ target.fragment = ref.fragment; |
+ *target_uri = BuildUri(target); |
+ return true; |
+ } |
+ |
+ if (ref.path[0] == '\0') { |
+ // Empty path. Use most parts of base_uri. |
+ target.scheme = base.scheme; |
+ target.userinfo = base.userinfo; |
+ target.host = base.host; |
+ target.port = base.port; |
+ target.path = base.path; |
+ target.query = ((ref.query == NULL) ? base.query : ref.query); |
+ target.fragment = ref.fragment; |
+ *target_uri = BuildUri(target); |
+ return true; |
+ |
+ } else if (ref.path[0] == '/') { |
+ // Absolute path. ref_path wins. |
+ target.scheme = base.scheme; |
+ target.userinfo = base.userinfo; |
+ target.host = base.host; |
+ target.port = base.port; |
+ target.path = RemoveDotSegments(ref.path); |
+ target.query = ref.query; |
+ target.fragment = ref.fragment; |
+ *target_uri = BuildUri(target); |
+ return true; |
+ |
+ } else { |
+ // Relative path. We need to merge base_path and ref_path. |
+ target.scheme = base.scheme; |
+ target.userinfo = base.userinfo; |
+ target.host = base.host; |
+ target.port = base.port; |
+ target.path = RemoveDotSegments(MergePaths(base.path, ref.path)); |
+ target.query = ref.query; |
+ target.fragment = ref.fragment; |
+ *target_uri = BuildUri(target); |
+ return true; |
+ } |
+} |
+ |
+} // namespace dart |