OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 #include "vm/uri.h" |
| 6 |
| 7 #include "vm/zone.h" |
| 8 |
| 9 namespace dart { |
| 10 |
| 11 // Lower-case a string in place. |
| 12 static void StringLower(char* str) { |
| 13 for (int i = 0; str[i] != '\0'; i++) { |
| 14 char c = str[i]; |
| 15 if (c >= 'A' && c <= 'Z') { |
| 16 str[i] = c + ('a' - 'A'); |
| 17 } |
| 18 } |
| 19 } |
| 20 |
| 21 |
| 22 static bool IsUnreservedChar(intptr_t value) { |
| 23 return ((value >= 'a' && value <= 'z') || |
| 24 (value >= 'A' && value <= 'Z') || |
| 25 (value >= '0' && value <= '9') || |
| 26 value == '-' || |
| 27 value == '.' || |
| 28 value == '_' || |
| 29 value == '~'); |
| 30 } |
| 31 |
| 32 |
| 33 static bool IsDelimiter(intptr_t value) { |
| 34 switch (value) { |
| 35 case ':': case '/': case '?': case '#': |
| 36 case '[': case ']': case '@': case '!': |
| 37 case '$': case '&': case '\'': case '(': |
| 38 case ')': case '*': case '+': case ',': |
| 39 case ';': case '=': |
| 40 return true; |
| 41 default: |
| 42 return false; |
| 43 } |
| 44 } |
| 45 |
| 46 |
| 47 static bool IsHexDigit(char value) { |
| 48 return ((value >- '0' && value <= '9') || |
| 49 (value >= 'A' && value <= 'F') || |
| 50 (value >= 'a' && value <= 'f')); |
| 51 } |
| 52 |
| 53 |
| 54 static int HexValue(char digit) { |
| 55 if ((digit >= '0' && digit <= '9')) { |
| 56 return digit - '0'; |
| 57 } |
| 58 if ((digit >= 'A' && digit <= 'F')) { |
| 59 return digit - 'A' + 10; |
| 60 } |
| 61 if ((digit >= 'a' && digit <= 'f')) { |
| 62 return digit - 'a' + 10; |
| 63 } |
| 64 UNREACHABLE(); |
| 65 return 0; |
| 66 } |
| 67 |
| 68 |
| 69 static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) { |
| 70 if (pos + 2 >= len) { |
| 71 // Not enough room for a valid escape sequence. |
| 72 return -1; |
| 73 } |
| 74 if (str[pos] != '%') { |
| 75 // Escape sequences start with '%'. |
| 76 return -1; |
| 77 } |
| 78 |
| 79 char digit1 = str[pos + 1]; |
| 80 char digit2 = str[pos + 2]; |
| 81 if (!IsHexDigit(digit1) || !IsHexDigit(digit2)) { |
| 82 // Invalid escape sequence. Ignore it. |
| 83 return -1; |
| 84 } |
| 85 return HexValue(digit1) * 16 + HexValue(digit2); |
| 86 } |
| 87 |
| 88 |
| 89 static char* NormalizeEscapes(const char* str, intptr_t len) { |
| 90 // Allocate the buffer. |
| 91 Zone* zone = Thread::Current()->zone(); |
| 92 // We multiply len by three because a percent-escape sequence is |
| 93 // three characters long (e.g. ' ' -> '%20). +1 for '\0'. We could |
| 94 // take two passes through the string and avoid the excess |
| 95 // allocation, but it's zone-memory so it doesn't seem necessary. |
| 96 char* buffer = zone->Alloc<char>(len * 3 + 1); |
| 97 |
| 98 // Copy the string, normalizing as we go. |
| 99 intptr_t buffer_pos = 0; |
| 100 intptr_t pos = 0; |
| 101 while (pos < len) { |
| 102 int escaped_value = GetEscapedValue(str, pos, len); |
| 103 if (escaped_value >= 0) { |
| 104 // If one of the special "unreserved" characters has been |
| 105 // escaped, revert the escaping. Otherwise preserve the |
| 106 // escaping. |
| 107 if (IsUnreservedChar(escaped_value)) { |
| 108 buffer[buffer_pos] = escaped_value; |
| 109 buffer_pos++; |
| 110 } else { |
| 111 OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", escaped_value); |
| 112 buffer_pos += 3; |
| 113 } |
| 114 pos += 3; |
| 115 } else { |
| 116 char c = str[pos]; |
| 117 // If a delimiter or unreserved character is currently not |
| 118 // escaped, preserve that. If there is a busted %-sequence in |
| 119 // the input, preserve that too. |
| 120 if (c == '%' || IsDelimiter(c) || IsUnreservedChar(c)) { |
| 121 buffer[buffer_pos] = c; |
| 122 buffer_pos++; |
| 123 } else { |
| 124 // Escape funky characters. |
| 125 OS::SNPrint(buffer + buffer_pos, 4, "%%%02X", c); |
| 126 buffer_pos += 3; |
| 127 } |
| 128 pos++; |
| 129 } |
| 130 } |
| 131 buffer[buffer_pos] = '\0'; |
| 132 return buffer; |
| 133 } |
| 134 |
| 135 |
| 136 static void ClearParsedUri(ParsedUri* parsed_uri) { |
| 137 parsed_uri->scheme = NULL; |
| 138 parsed_uri->userinfo = NULL; |
| 139 parsed_uri->host = NULL; |
| 140 parsed_uri->port = NULL; |
| 141 parsed_uri->path = NULL; |
| 142 parsed_uri->query = NULL; |
| 143 parsed_uri->fragment = NULL; |
| 144 } |
| 145 |
| 146 |
| 147 static intptr_t ParseAuthority(const char* authority, ParsedUri* parsed_uri) { |
| 148 Zone* zone = Thread::Current()->zone(); |
| 149 const char* current = authority; |
| 150 intptr_t len = 0; |
| 151 |
| 152 size_t userinfo_len = strcspn(current, "@/"); |
| 153 if (current[userinfo_len] == '@') { |
| 154 // The '@' character follows the optional userinfo string. |
| 155 parsed_uri->userinfo = NormalizeEscapes(current, userinfo_len); |
| 156 current += userinfo_len + 1; |
| 157 len += userinfo_len + 1; |
| 158 } else { |
| 159 parsed_uri->userinfo = NULL; |
| 160 } |
| 161 |
| 162 size_t host_len = strcspn(current, ":/"); |
| 163 char* host = NormalizeEscapes(current, host_len); |
| 164 StringLower(host); |
| 165 parsed_uri->host = host; |
| 166 len += host_len; |
| 167 |
| 168 if (current[host_len] == ':') { |
| 169 // The ':' character precedes the optional port string. |
| 170 const char* port_start = current + host_len + 1; // +1 for ':' |
| 171 size_t port_len = strcspn(port_start, "/"); |
| 172 parsed_uri->port = zone->MakeCopyOfStringN(port_start, port_len); |
| 173 len += 1 + port_len; // +1 for ':' |
| 174 } else { |
| 175 parsed_uri->port = NULL; |
| 176 } |
| 177 return len; |
| 178 } |
| 179 |
| 180 |
| 181 // Performs a simple parse of a uri into its components. |
| 182 // See RFC 3986 Section 3: Syntax. |
| 183 bool ParseUri(const char* uri, ParsedUri* parsed_uri) { |
| 184 Zone* zone = Thread::Current()->zone(); |
| 185 |
| 186 // The first ':' separates the scheme from the rest of the uri. If |
| 187 // a ':' occurs after the first '/' it doesn't count. |
| 188 size_t scheme_len = strcspn(uri, ":/"); |
| 189 const char* rest = uri; |
| 190 if (uri[scheme_len] == ':') { |
| 191 char* scheme = zone->MakeCopyOfStringN(uri, scheme_len); |
| 192 StringLower(scheme); |
| 193 parsed_uri->scheme = scheme; |
| 194 rest = uri + scheme_len + 1; |
| 195 } else { |
| 196 parsed_uri->scheme = NULL; |
| 197 } |
| 198 |
| 199 // The first '#' separates the optional fragment |
| 200 const char* hash_pos = rest + strcspn(rest, "#"); |
| 201 if (*hash_pos == '#') { |
| 202 // There is a fragment part. |
| 203 const char* fragment_start = hash_pos + 1; |
| 204 parsed_uri->fragment = |
| 205 NormalizeEscapes(fragment_start, strlen(fragment_start)); |
| 206 } else { |
| 207 parsed_uri->fragment = NULL; |
| 208 } |
| 209 |
| 210 // The first '?' or '#' separates the hierarchical part from the |
| 211 // optional query. |
| 212 const char* question_pos = rest + strcspn(rest, "?#"); |
| 213 if (*question_pos == '?') { |
| 214 // There is a query part. |
| 215 const char* query_start = question_pos + 1; |
| 216 parsed_uri->query = |
| 217 NormalizeEscapes(query_start, (hash_pos - query_start)); |
| 218 } else { |
| 219 parsed_uri->query = NULL; |
| 220 } |
| 221 |
| 222 const char* path_start = rest; |
| 223 if (rest[0] == '/' && rest[1] == '/') { |
| 224 // There is an authority part. |
| 225 const char* authority_start = rest + 2; // 2 for '//'. |
| 226 |
| 227 intptr_t authority_len = |
| 228 ParseAuthority(authority_start, parsed_uri); |
| 229 if (authority_len < 0) { |
| 230 ClearParsedUri(parsed_uri); |
| 231 return false; |
| 232 } |
| 233 path_start = authority_start + authority_len; |
| 234 } else { |
| 235 parsed_uri->userinfo = NULL; |
| 236 parsed_uri->host = NULL; |
| 237 parsed_uri->port = NULL; |
| 238 } |
| 239 |
| 240 // Double slashes in the path do not parse. |
| 241 bool saw_slash = false; |
| 242 for (const char* pos = path_start; pos < question_pos; pos++) { |
| 243 if (*pos == '/') { |
| 244 if (saw_slash) { |
| 245 ClearParsedUri(parsed_uri); |
| 246 return false; |
| 247 } |
| 248 saw_slash = true; |
| 249 } else { |
| 250 saw_slash = false; |
| 251 } |
| 252 } |
| 253 |
| 254 // The path is the substring between the authority and the query. |
| 255 parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start)); |
| 256 return true; |
| 257 } |
| 258 |
| 259 |
| 260 static char* RemoveLastSegment(char* current, |
| 261 char* base, |
| 262 bool relative) { |
| 263 if (relative) { |
| 264 // If we are removing segments from a relative url, do not remove |
| 265 // any initial ".." segments. |
| 266 if (current == base || |
| 267 (((current - base) == 1) && (base[0] == '/'))) { |
| 268 strncpy(base, "/..", 3); |
| 269 return base + 3; |
| 270 } else if (((current - base) >= 3) && |
| 271 (strncmp(current - 3, "/..", 3) == 0)) { |
| 272 // We have run out of segments to remove. Since the base is |
| 273 // relative, start adding ".."s onto the output buffer. |
| 274 strncpy(current, "/..", 3); |
| 275 return current + 3; |
| 276 } |
| 277 } |
| 278 if (current == base) { |
| 279 return current; |
| 280 } |
| 281 ASSERT(current > base); |
| 282 for (current--; current > base; current--) { |
| 283 if (*current == '/') { |
| 284 // We have found the beginning of the last segment. |
| 285 return current; |
| 286 } |
| 287 } |
| 288 ASSERT(current == base); |
| 289 return current; |
| 290 } |
| 291 |
| 292 |
| 293 static intptr_t SegmentLength(const char* input) { |
| 294 const char* cp = input; |
| 295 |
| 296 // Include initial slash in the segment, if any. |
| 297 if (*cp == '/') { |
| 298 cp++; |
| 299 } |
| 300 |
| 301 // Don't include trailing slash in the segment. |
| 302 cp += strcspn(cp, "/"); |
| 303 return cp - input; |
| 304 } |
| 305 |
| 306 |
| 307 // See RFC 3986 Section 5.2.4: Remove Dot Segments. |
| 308 static const char* RemoveDotSegments(const char* path, bool relative) { |
| 309 const char* input = path; |
| 310 |
| 311 // The output path will always be less than or equal to the size of |
| 312 // the input path. |
| 313 Zone* zone = Thread::Current()->zone(); |
| 314 char* buffer = zone->Alloc<char>(strlen(path) + 1); // +1 for '\0' |
| 315 char* output = buffer; |
| 316 |
| 317 while (*input != '\0') { |
| 318 if (strncmp("../", input, 3) == 0) { |
| 319 // Discard initial "../" from the input. It's junk. |
| 320 if (relative) { |
| 321 output = RemoveLastSegment(output, buffer, relative); |
| 322 } |
| 323 input += 3; |
| 324 |
| 325 } else if (strncmp("./", input, 3) == 0) { |
| 326 // Discard initial "./" from the input. It's junk. |
| 327 input += 2; |
| 328 |
| 329 } else if (strncmp("/./", input, 3) == 0) { |
| 330 // Advance past the "/." part of the input. |
| 331 input += 2; |
| 332 |
| 333 } else if (strcmp("/.", input) == 0) { |
| 334 // Pretend the input just contains a "/". |
| 335 input = "/"; |
| 336 |
| 337 } else if (strncmp("/../", input, 4) == 0) { |
| 338 // Advance past the "/.." part of the input and remove one |
| 339 // segment from the output. |
| 340 input += 3; |
| 341 output = RemoveLastSegment(output, buffer, relative); |
| 342 |
| 343 } else if (strcmp("/..", input) == 0) { |
| 344 // Pretend the input contains a "/" and remove one segment from |
| 345 // the output. |
| 346 input = "/"; |
| 347 output = RemoveLastSegment(output, buffer, relative); |
| 348 |
| 349 } else if (strcmp("..", input) == 0) { |
| 350 // The input has been reduced to nothing useful. |
| 351 input += 2; |
| 352 |
| 353 } else if (strcmp(".", input) == 0) { |
| 354 // The input has been reduced to nothing useful. |
| 355 input += 1; |
| 356 |
| 357 } else { |
| 358 intptr_t segment_len = SegmentLength(input); |
| 359 if (input[0] != '/' && output != buffer) { |
| 360 *output = '/'; |
| 361 output++; |
| 362 } |
| 363 strncpy(output, input, segment_len); |
| 364 output += segment_len; |
| 365 input += segment_len; |
| 366 } |
| 367 } |
| 368 *output = '\0'; |
| 369 if (relative) { |
| 370 // When resolving against a relative base path, the result should |
| 371 // be relative. |
| 372 if (buffer[0] == '/') { |
| 373 buffer = buffer + 1; |
| 374 } |
| 375 // If we have wittled the path down to nothing, normalize to "./". |
| 376 if (buffer[0] == '\0') { |
| 377 return "./"; |
| 378 } |
| 379 } |
| 380 return buffer; |
| 381 } |
| 382 |
| 383 |
| 384 // See RFC 3986 Section 5.2.3: Merge Paths. |
| 385 static const char* MergePaths(const char* base_path, const char* ref_path) { |
| 386 Zone* zone = Thread::Current()->zone(); |
| 387 if (base_path[0] == '\0') { |
| 388 // If the base_path is empty, we prepend '/'. |
| 389 return zone->PrintToString("/%s", ref_path); |
| 390 } |
| 391 |
| 392 // We need to find the last '/' in base_path. |
| 393 char* last_slash = strrchr(base_path, '/'); |
| 394 if (last_slash == NULL) { |
| 395 // There is no slash in the base_path. Return the ref_path unchanged. |
| 396 return ref_path; |
| 397 } |
| 398 |
| 399 // We found a '/' in the base_path. Cut off everything after it and |
| 400 // add the ref_path. |
| 401 intptr_t truncated_base_len = last_slash - base_path; |
| 402 intptr_t ref_path_len = strlen(ref_path); |
| 403 intptr_t len = truncated_base_len + ref_path_len + 1; // +1 for '/' |
| 404 char* buffer = zone->Alloc<char>(len + 1); // +1 for '\0' |
| 405 |
| 406 // Copy truncated base. |
| 407 strncpy(buffer, base_path, truncated_base_len); |
| 408 |
| 409 // Add a slash. |
| 410 buffer[truncated_base_len] = '/'; |
| 411 |
| 412 // Copy the ref_path. |
| 413 strncpy((buffer + truncated_base_len + 1), ref_path, ref_path_len); |
| 414 |
| 415 // Add the trailing '\0'. |
| 416 buffer[len] = '\0'; |
| 417 |
| 418 return buffer; |
| 419 } |
| 420 |
| 421 |
| 422 static char* BuildUri(const ParsedUri& uri) { |
| 423 Zone* zone = Thread::Current()->zone(); |
| 424 ASSERT(uri.path != NULL); |
| 425 |
| 426 const char* fragment = uri.fragment == NULL ? "" : uri.fragment; |
| 427 const char* fragment_separator = uri.fragment == NULL ? "" : "#"; |
| 428 const char* query = uri.query == NULL ? "" : uri.query; |
| 429 const char* query_separator = uri.query == NULL ? "" : "?"; |
| 430 |
| 431 // If there is no scheme for this uri, just build a relative uri of |
| 432 // the form: "path[?query][#fragment]". This is sort of a |
| 433 // degenerate case, but it occurs when we resolve relative urls |
| 434 // inside a "dart:" library. |
| 435 if (uri.scheme == NULL) { |
| 436 ASSERT(uri.userinfo == NULL && uri.host == NULL && uri.port == NULL); |
| 437 return zone->PrintToString("%s%s%s%s%s", |
| 438 uri.path, query_separator, query, |
| 439 fragment_separator, fragment); |
| 440 } |
| 441 |
| 442 // Uri with no authority: "scheme:path[?query][#fragment]" |
| 443 if (uri.host == NULL) { |
| 444 ASSERT(uri.userinfo == NULL && uri.port == NULL); |
| 445 return zone->PrintToString("%s:%s%s%s%s%s", |
| 446 uri.scheme, uri.path, query_separator, query, |
| 447 fragment_separator, fragment); |
| 448 } |
| 449 |
| 450 const char* user = uri.userinfo == NULL ? "" : uri.userinfo; |
| 451 const char* user_separator = uri.userinfo == NULL ? "" : "@"; |
| 452 const char* port = uri.port == NULL ? "" : uri.port; |
| 453 const char* port_separator = uri.port == NULL ? "" : ":"; |
| 454 |
| 455 // If the path doesn't start with a '/', add one. We need it to |
| 456 // separate the path from the authority. |
| 457 const char* path_separator = ((uri.path[0] == '\0' || uri.path[0] == '/') |
| 458 ? "" : "/"); |
| 459 |
| 460 // Uri with authority: |
| 461 // "scheme://[userinfo@]host[:port][/]path[?query][#fragment]" |
| 462 return zone->PrintToString( |
| 463 "%s://%s%s%s%s%s%s%s%s%s%s%s", // There is *nothing* wrong with this. |
| 464 uri.scheme, user, user_separator, uri.host, port_separator, port, |
| 465 path_separator, uri.path, query_separator, query, |
| 466 fragment_separator, fragment); |
| 467 } |
| 468 |
| 469 |
| 470 // See RFC 3986 Section 5: Reference Resolution |
| 471 // |
| 472 // If the base uri is a relative path with no scheme or authority |
| 473 // specified, then we diverge from the spec and instead we resolve the |
| 474 // uri in a manner which is consistent with the dart:core Uri |
| 475 // implementation. Nicely, this makes uri resolution associative: |
| 476 // |
| 477 // resolve(resolve(absUrl, relUrl1), relUrl2) |
| 478 // == |
| 479 // resolve((absUrl, resolve(relUrl1, relUrl2)) |
| 480 // |
| 481 // In practice, during uri canonicalization the base uri is always |
| 482 // absolute, so the issue is moot, but consistency is nice to have. |
| 483 bool ResolveUri(const char* ref_uri, |
| 484 const char* base_uri, |
| 485 const char** target_uri) { |
| 486 // Parse the reference uri. |
| 487 ParsedUri ref; |
| 488 if (!ParseUri(ref_uri, &ref)) { |
| 489 *target_uri = NULL; |
| 490 return false; |
| 491 } |
| 492 |
| 493 ParsedUri target; |
| 494 if (ref.scheme != NULL) { |
| 495 if (strcmp(ref.scheme, "dart") == 0) { |
| 496 Zone* zone = Thread::Current()->zone(); |
| 497 *target_uri = zone->MakeCopyOfString(ref_uri); |
| 498 return true; |
| 499 } |
| 500 |
| 501 // When the ref_uri specifies a scheme, the base_uri is ignored. |
| 502 target.scheme = ref.scheme; |
| 503 target.userinfo = ref.userinfo; |
| 504 target.host = ref.host; |
| 505 target.port = ref.port; |
| 506 target.path = RemoveDotSegments(ref.path, false); |
| 507 target.query = ref.query; |
| 508 target.fragment = ref.fragment; |
| 509 *target_uri = BuildUri(target); |
| 510 return true; |
| 511 } |
| 512 |
| 513 // Parse the base uri. |
| 514 ParsedUri base; |
| 515 if (!ParseUri(base_uri, &base)) { |
| 516 *target_uri = NULL; |
| 517 return false; |
| 518 } |
| 519 |
| 520 if ((base.scheme != NULL) && strcmp(base.scheme, "dart") == 0) { |
| 521 Zone* zone = Thread::Current()->zone(); |
| 522 *target_uri = zone->MakeCopyOfString(ref_uri); |
| 523 return true; |
| 524 } |
| 525 |
| 526 if (ref.host != NULL) { |
| 527 // When the ref_uri specifies an authority, we only use the base scheme. |
| 528 target.scheme = base.scheme; |
| 529 target.userinfo = ref.userinfo; |
| 530 target.host = ref.host; |
| 531 target.port = ref.port; |
| 532 target.path = RemoveDotSegments(ref.path, false); |
| 533 target.query = ref.query; |
| 534 target.fragment = ref.fragment; |
| 535 *target_uri = BuildUri(target); |
| 536 return true; |
| 537 } |
| 538 |
| 539 if (ref.path[0] == '\0') { |
| 540 // Empty path. Use most parts of base_uri. |
| 541 target.scheme = base.scheme; |
| 542 target.userinfo = base.userinfo; |
| 543 target.host = base.host; |
| 544 target.port = base.port; |
| 545 target.path = base.path; |
| 546 target.query = ((ref.query == NULL) ? base.query : ref.query); |
| 547 target.fragment = ref.fragment; |
| 548 *target_uri = BuildUri(target); |
| 549 return true; |
| 550 |
| 551 } else if (ref.path[0] == '/') { |
| 552 // Absolute path. ref_path wins. |
| 553 target.scheme = base.scheme; |
| 554 target.userinfo = base.userinfo; |
| 555 target.host = base.host; |
| 556 target.port = base.port; |
| 557 target.path = RemoveDotSegments(ref.path, false); |
| 558 target.query = ref.query; |
| 559 target.fragment = ref.fragment; |
| 560 *target_uri = BuildUri(target); |
| 561 return true; |
| 562 |
| 563 } else { |
| 564 // Relative path. We need to merge base_path and ref_path. |
| 565 bool relative_base = (base.scheme == NULL && |
| 566 base.host == NULL && |
| 567 base.path[0] != '/'); |
| 568 |
| 569 target.scheme = base.scheme; |
| 570 target.userinfo = base.userinfo; |
| 571 target.host = base.host; |
| 572 target.port = base.port; |
| 573 target.path = RemoveDotSegments(MergePaths(base.path, ref.path), |
| 574 relative_base); |
| 575 target.query = ref.query; |
| 576 target.fragment = ref.fragment; |
| 577 *target_uri = BuildUri(target); |
| 578 return true; |
| 579 } |
| 580 } |
| 581 |
| 582 } // namespace dart |
OLD | NEW |