| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Canonicalizer functions for working with and resolving relative URLs. | 5 // Canonicalizer functions for working with and resolving relative URLs. |
| 6 | 6 |
| 7 #include "base/logging.h" | 7 #include "base/logging.h" |
| 8 #include "url/url_canon.h" | 8 #include "url/url_canon.h" |
| 9 #include "url/url_canon_internal.h" | 9 #include "url/url_canon_internal.h" |
| 10 #include "url/url_constants.h" | 10 #include "url/url_constants.h" |
| 11 #include "url/url_file.h" | 11 #include "url/url_file.h" |
| 12 #include "url/url_parse_internal.h" | 12 #include "url/url_parse_internal.h" |
| 13 #include "url/url_util_internal.h" | 13 #include "url/url_util_internal.h" |
| 14 | 14 |
| 15 namespace url { | 15 namespace url { |
| 16 | 16 |
| 17 namespace { | 17 namespace { |
| 18 | 18 |
| 19 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug | 19 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug |
| 20 // 379034), whereas IE is case-insensetive. | 20 // 379034), whereas IE is case-insensitive. |
| 21 // | 21 // |
| 22 // We choose to be more permissive like IE. We don't need to worry about | 22 // We choose to be more permissive like IE. We don't need to worry about |
| 23 // unescaping or anything here: neither IE or Firefox allow this. We also | 23 // unescaping or anything here: neither IE or Firefox allow this. We also |
| 24 // don't have to worry about invalid scheme characters since we are comparing | 24 // don't have to worry about invalid scheme characters since we are comparing |
| 25 // against the canonical scheme of the base. | 25 // against the canonical scheme of the base. |
| 26 // | 26 // |
| 27 // The base URL should always be canonical, therefore is ASCII. | 27 // The base URL should always be canonical, therefore it should be ASCII. |
| 28 template<typename CHAR> | 28 template<typename CHAR> |
| 29 bool AreSchemesEqual(const char* base, | 29 bool AreSchemesEqual(const char* base, |
| 30 const Component& base_scheme, | 30 const Component& base_scheme, |
| 31 const CHAR* cmp, | 31 const CHAR* cmp, |
| 32 const Component& cmp_scheme) { | 32 const Component& cmp_scheme) { |
| 33 if (base_scheme.len != cmp_scheme.len) | 33 if (base_scheme.len != cmp_scheme.len) |
| 34 return false; | 34 return false; |
| 35 for (int i = 0; i < base_scheme.len; i++) { | 35 for (int i = 0; i < base_scheme.len; i++) { |
| 36 // We assume the base is already canonical, so we don't have to | 36 // We assume the base is already canonical, so we don't have to |
| 37 // canonicalize it. | 37 // canonicalize it. |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 75 TrimURL(url, &begin, &url_len); | 75 TrimURL(url, &begin, &url_len); |
| 76 if (begin >= url_len) { | 76 if (begin >= url_len) { |
| 77 // Empty URLs are relative, but do nothing. | 77 // Empty URLs are relative, but do nothing. |
| 78 *relative_component = Component(begin, 0); | 78 *relative_component = Component(begin, 0); |
| 79 *is_relative = true; | 79 *is_relative = true; |
| 80 return true; | 80 return true; |
| 81 } | 81 } |
| 82 | 82 |
| 83 #ifdef WIN32 | 83 #ifdef WIN32 |
| 84 // We special case paths like "C:\foo" so they can link directly to the | 84 // We special case paths like "C:\foo" so they can link directly to the |
| 85 // file on Windows (IE compatability). The security domain stuff should | 85 // file on Windows (IE compatibility). The security domain stuff should |
| 86 // prevent a link like this from actually being followed if its on a | 86 // prevent a link like this from actually being followed if its on a |
| 87 // web page. | 87 // web page. |
| 88 // | 88 // |
| 89 // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/" | 89 // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/" |
| 90 // as relative, as this will just replace the path when the base scheme | 90 // as relative, as this will just replace the path when the base scheme |
| 91 // is a file and the answer will still be correct. | 91 // is a file and the answer will still be correct. |
| 92 // | 92 // |
| 93 // We require strict backslashes when detecting UNC since two forward | 93 // We require strict backslashes when detecting UNC since two forward |
| 94 // shashes should be treated a a relative URL with a hostname. | 94 // slashes should be treated a a relative URL with a hostname. |
| 95 if (DoesBeginWindowsDriveSpec(url, begin, url_len) || | 95 if (DoesBeginWindowsDriveSpec(url, begin, url_len) || |
| 96 DoesBeginUNCPath(url, begin, url_len, true)) | 96 DoesBeginUNCPath(url, begin, url_len, true)) |
| 97 return true; | 97 return true; |
| 98 #endif // WIN32 | 98 #endif // WIN32 |
| 99 | 99 |
| 100 // See if we've got a scheme, if not, we know this is a relative URL. | 100 // See if we've got a scheme, if not, we know this is a relative URL. |
| 101 // BUT: Just because we have a scheme, doesn't make it absolute. | 101 // BUT, just because we have a scheme, doesn't make it absolute. |
| 102 // "http:foo.html" is a relative URL with path "foo.html". If the scheme is | 102 // "http:foo.html" is a relative URL with path "foo.html". If the scheme is |
| 103 // empty, we treat it as relative (":foo") like IE does. | 103 // empty, we treat it as relative (":foo"), like IE does. |
| 104 Component scheme; | 104 Component scheme; |
| 105 const bool scheme_is_empty = | 105 const bool scheme_is_empty = |
| 106 !ExtractScheme(url, url_len, &scheme) || scheme.len == 0; | 106 !ExtractScheme(url, url_len, &scheme) || scheme.len == 0; |
| 107 if (scheme_is_empty) { | 107 if (scheme_is_empty) { |
| 108 if (url[begin] == '#') { | 108 if (url[begin] == '#') { |
| 109 // |url| is a bare fragement (e.g. "#foo"). This can be resolved against | 109 // |url| is a bare fragment (e.g. "#foo"). This can be resolved against |
| 110 // any base. Fall-through. | 110 // any base. Fall-through. |
| 111 } else if (!is_base_hierarchical) { | 111 } else if (!is_base_hierarchical) { |
| 112 // Don't allow relative URLs if the base scheme doesn't support it. | 112 // Don't allow relative URLs if the base scheme doesn't support it. |
| 113 return false; | 113 return false; |
| 114 } | 114 } |
| 115 | 115 |
| 116 *relative_component = MakeRange(begin, url_len); | 116 *relative_component = MakeRange(begin, url_len); |
| 117 *is_relative = true; | 117 *is_relative = true; |
| 118 return true; | 118 return true; |
| 119 } | 119 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 138 | 138 |
| 139 // When the scheme that they both share is not hierarchical, treat the | 139 // When the scheme that they both share is not hierarchical, treat the |
| 140 // incoming scheme as absolute (this way with the base of "data:foo", | 140 // incoming scheme as absolute (this way with the base of "data:foo", |
| 141 // "data:bar" will be reported as absolute. | 141 // "data:bar" will be reported as absolute. |
| 142 if (!is_base_hierarchical) | 142 if (!is_base_hierarchical) |
| 143 return true; | 143 return true; |
| 144 | 144 |
| 145 int colon_offset = scheme.end(); | 145 int colon_offset = scheme.end(); |
| 146 | 146 |
| 147 // If it's a filesystem URL, the only valid way to make it relative is not to | 147 // If it's a filesystem URL, the only valid way to make it relative is not to |
| 148 // supply a scheme. There's no equivalent to e.g. http:index.html. | 148 // supply a scheme. There's no equivalent to e.g. http:index.html. |
| 149 if (CompareSchemeComponent(url, scheme, kFileSystemScheme)) | 149 if (CompareSchemeComponent(url, scheme, kFileSystemScheme)) |
| 150 return true; | 150 return true; |
| 151 | 151 |
| 152 // ExtractScheme guarantees that the colon immediately follows what it | 152 // ExtractScheme guarantees that the colon immediately follows what it |
| 153 // considers to be the scheme. CountConsecutiveSlashes will handle the | 153 // considers to be the scheme. CountConsecutiveSlashes will handle the |
| 154 // case where the begin offset is the end of the input. | 154 // case where the begin offset is the end of the input. |
| 155 int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len); | 155 int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len); |
| 156 | 156 |
| 157 if (num_slashes == 0 || num_slashes == 1) { | 157 if (num_slashes == 0 || num_slashes == 1) { |
| 158 // No slashes means it's a relative path like "http:foo.html". One slash | 158 // No slashes means it's a relative path like "http:foo.html". One slash |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 387 replacements.SetHost(relative_url, relative_parsed.host); | 387 replacements.SetHost(relative_url, relative_parsed.host); |
| 388 replacements.SetPort(relative_url, relative_parsed.port); | 388 replacements.SetPort(relative_url, relative_parsed.port); |
| 389 replacements.SetPath(relative_url, relative_parsed.path); | 389 replacements.SetPath(relative_url, relative_parsed.path); |
| 390 replacements.SetQuery(relative_url, relative_parsed.query); | 390 replacements.SetQuery(relative_url, relative_parsed.query); |
| 391 replacements.SetRef(relative_url, relative_parsed.ref); | 391 replacements.SetRef(relative_url, relative_parsed.ref); |
| 392 | 392 |
| 393 return ReplaceStandardURL(base_url, base_parsed, replacements, | 393 return ReplaceStandardURL(base_url, base_parsed, replacements, |
| 394 query_converter, output, out_parsed); | 394 query_converter, output, out_parsed); |
| 395 } | 395 } |
| 396 | 396 |
| 397 // Resolves a relative URL that happens to be an absolute file path. Examples | 397 // Resolves a relative URL that happens to be an absolute file path. Examples |
| 398 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo". | 398 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo". |
| 399 template<typename CHAR> | 399 template<typename CHAR> |
| 400 bool DoResolveAbsoluteFile(const CHAR* relative_url, | 400 bool DoResolveAbsoluteFile(const CHAR* relative_url, |
| 401 const Component& relative_component, | 401 const Component& relative_component, |
| 402 CharsetConverter* query_converter, | 402 CharsetConverter* query_converter, |
| 403 CanonOutput* output, | 403 CanonOutput* output, |
| 404 Parsed* out_parsed) { | 404 Parsed* out_parsed) { |
| 405 // Parse the file URL. The file URl parsing function uses the same logic | 405 // Parse the file URL. The file URl parsing function uses the same logic |
| 406 // as we do for determining if the file is absolute, in which case it will | 406 // as we do for determining if the file is absolute, in which case it will |
| 407 // not bother to look for a scheme. | 407 // not bother to look for a scheme. |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 453 int num_slashes = CountConsecutiveSlashes( | 453 int num_slashes = CountConsecutiveSlashes( |
| 454 relative_url, relative_component.begin, relative_component.end()); | 454 relative_url, relative_component.begin, relative_component.end()); |
| 455 | 455 |
| 456 #ifdef WIN32 | 456 #ifdef WIN32 |
| 457 // On Windows, two slashes for a file path (regardless of which direction | 457 // On Windows, two slashes for a file path (regardless of which direction |
| 458 // they are) means that it's UNC. Two backslashes on any base scheme mean | 458 // they are) means that it's UNC. Two backslashes on any base scheme mean |
| 459 // that it's an absolute UNC path (we use the base_is_file flag to control | 459 // that it's an absolute UNC path (we use the base_is_file flag to control |
| 460 // how strict the UNC finder is). | 460 // how strict the UNC finder is). |
| 461 // | 461 // |
| 462 // We also allow Windows absolute drive specs on any scheme (for example | 462 // We also allow Windows absolute drive specs on any scheme (for example |
| 463 // "c:\foo") like IE does. There must be no preceeding slashes in this | 463 // "c:\foo") like IE does. There must be no preceding slashes in this |
| 464 // case (we reject anything like "/c:/foo") because that should be treated | 464 // case (we reject anything like "/c:/foo") because that should be treated |
| 465 // as a path. For file URLs, we allow any number of slashes since that would | 465 // as a path. For file URLs, we allow any number of slashes since that would |
| 466 // be setting the path. | 466 // be setting the path. |
| 467 // | 467 // |
| 468 // This assumes the absolute path resolver handles absolute URLs like this | 468 // This assumes the absolute path resolver handles absolute URLs like this |
| 469 // properly. DoCanonicalize does this. | 469 // properly. DoCanonicalize does this. |
| 470 int after_slashes = relative_component.begin + num_slashes; | 470 int after_slashes = relative_component.begin + num_slashes; |
| 471 if (DoesBeginUNCPath(relative_url, relative_component.begin, | 471 if (DoesBeginUNCPath(relative_url, relative_component.begin, |
| 472 relative_component.end(), !base_is_file) || | 472 relative_component.end(), !base_is_file) || |
| 473 ((num_slashes == 0 || base_is_file) && | 473 ((num_slashes == 0 || base_is_file) && |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 551 const Component& relative_component, | 551 const Component& relative_component, |
| 552 CharsetConverter* query_converter, | 552 CharsetConverter* query_converter, |
| 553 CanonOutput* output, | 553 CanonOutput* output, |
| 554 Parsed* out_parsed) { | 554 Parsed* out_parsed) { |
| 555 return DoResolveRelativeURL<base::char16>( | 555 return DoResolveRelativeURL<base::char16>( |
| 556 base_url, base_parsed, base_is_file, relative_url, | 556 base_url, base_parsed, base_is_file, relative_url, |
| 557 relative_component, query_converter, output, out_parsed); | 557 relative_component, query_converter, output, out_parsed); |
| 558 } | 558 } |
| 559 | 559 |
| 560 } // namespace url | 560 } // namespace url |
| OLD | NEW |