OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Canonicalizer functions for working with and resolving relative URLs. | 5 // Canonicalizer functions for working with and resolving relative URLs. |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "url/url_canon.h" | 8 #include "url/url_canon.h" |
9 #include "url/url_canon_internal.h" | 9 #include "url/url_canon_internal.h" |
10 #include "url/url_constants.h" | 10 #include "url/url_constants.h" |
11 #include "url/url_file.h" | 11 #include "url/url_file.h" |
12 #include "url/url_parse_internal.h" | 12 #include "url/url_parse_internal.h" |
13 #include "url/url_util_internal.h" | 13 #include "url/url_util_internal.h" |
14 | 14 |
15 namespace url { | 15 namespace url { |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug | 19 // Firefox does a case-sensitive compare (which is probably wrong--Mozilla bug |
20 // 379034), whereas IE is case-insensetive. | 20 // 379034), whereas IE is case-insensitive. |
21 // | 21 // |
22 // We choose to be more permissive like IE. We don't need to worry about | 22 // We choose to be more permissive like IE. We don't need to worry about |
23 // unescaping or anything here: neither IE or Firefox allow this. We also | 23 // unescaping or anything here: neither IE or Firefox allow this. We also |
24 // don't have to worry about invalid scheme characters since we are comparing | 24 // don't have to worry about invalid scheme characters since we are comparing |
25 // against the canonical scheme of the base. | 25 // against the canonical scheme of the base. |
26 // | 26 // |
27 // The base URL should always be canonical, therefore is ASCII. | 27 // The base URL should always be canonical, therefore it should be ASCII. |
28 template<typename CHAR> | 28 template<typename CHAR> |
29 bool AreSchemesEqual(const char* base, | 29 bool AreSchemesEqual(const char* base, |
30 const Component& base_scheme, | 30 const Component& base_scheme, |
31 const CHAR* cmp, | 31 const CHAR* cmp, |
32 const Component& cmp_scheme) { | 32 const Component& cmp_scheme) { |
33 if (base_scheme.len != cmp_scheme.len) | 33 if (base_scheme.len != cmp_scheme.len) |
34 return false; | 34 return false; |
35 for (int i = 0; i < base_scheme.len; i++) { | 35 for (int i = 0; i < base_scheme.len; i++) { |
36 // We assume the base is already canonical, so we don't have to | 36 // We assume the base is already canonical, so we don't have to |
37 // canonicalize it. | 37 // canonicalize it. |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
75 TrimURL(url, &begin, &url_len); | 75 TrimURL(url, &begin, &url_len); |
76 if (begin >= url_len) { | 76 if (begin >= url_len) { |
77 // Empty URLs are relative, but do nothing. | 77 // Empty URLs are relative, but do nothing. |
78 *relative_component = Component(begin, 0); | 78 *relative_component = Component(begin, 0); |
79 *is_relative = true; | 79 *is_relative = true; |
80 return true; | 80 return true; |
81 } | 81 } |
82 | 82 |
83 #ifdef WIN32 | 83 #ifdef WIN32 |
84 // We special case paths like "C:\foo" so they can link directly to the | 84 // We special case paths like "C:\foo" so they can link directly to the |
85 // file on Windows (IE compatability). The security domain stuff should | 85 // file on Windows (IE compatibility). The security domain stuff should |
86 // prevent a link like this from actually being followed if its on a | 86 // prevent a link like this from actually being followed if its on a |
87 // web page. | 87 // web page. |
88 // | 88 // |
89 // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/" | 89 // We treat "C:/foo" as an absolute URL. We can go ahead and treat "/c:/" |
90 // as relative, as this will just replace the path when the base scheme | 90 // as relative, as this will just replace the path when the base scheme |
91 // is a file and the answer will still be correct. | 91 // is a file and the answer will still be correct. |
92 // | 92 // |
93 // We require strict backslashes when detecting UNC since two forward | 93 // We require strict backslashes when detecting UNC since two forward |
94 // shashes should be treated a a relative URL with a hostname. | 94 // slashes should be treated a a relative URL with a hostname. |
95 if (DoesBeginWindowsDriveSpec(url, begin, url_len) || | 95 if (DoesBeginWindowsDriveSpec(url, begin, url_len) || |
96 DoesBeginUNCPath(url, begin, url_len, true)) | 96 DoesBeginUNCPath(url, begin, url_len, true)) |
97 return true; | 97 return true; |
98 #endif // WIN32 | 98 #endif // WIN32 |
99 | 99 |
100 // See if we've got a scheme, if not, we know this is a relative URL. | 100 // See if we've got a scheme, if not, we know this is a relative URL. |
101 // BUT: Just because we have a scheme, doesn't make it absolute. | 101 // BUT, just because we have a scheme, doesn't make it absolute. |
102 // "http:foo.html" is a relative URL with path "foo.html". If the scheme is | 102 // "http:foo.html" is a relative URL with path "foo.html". If the scheme is |
103 // empty, we treat it as relative (":foo") like IE does. | 103 // empty, we treat it as relative (":foo"), like IE does. |
104 Component scheme; | 104 Component scheme; |
105 const bool scheme_is_empty = | 105 const bool scheme_is_empty = |
106 !ExtractScheme(url, url_len, &scheme) || scheme.len == 0; | 106 !ExtractScheme(url, url_len, &scheme) || scheme.len == 0; |
107 if (scheme_is_empty) { | 107 if (scheme_is_empty) { |
108 if (url[begin] == '#') { | 108 if (url[begin] == '#') { |
109 // |url| is a bare fragement (e.g. "#foo"). This can be resolved against | 109 // |url| is a bare fragment (e.g. "#foo"). This can be resolved against |
110 // any base. Fall-through. | 110 // any base. Fall-through. |
111 } else if (!is_base_hierarchical) { | 111 } else if (!is_base_hierarchical) { |
112 // Don't allow relative URLs if the base scheme doesn't support it. | 112 // Don't allow relative URLs if the base scheme doesn't support it. |
113 return false; | 113 return false; |
114 } | 114 } |
115 | 115 |
116 *relative_component = MakeRange(begin, url_len); | 116 *relative_component = MakeRange(begin, url_len); |
117 *is_relative = true; | 117 *is_relative = true; |
118 return true; | 118 return true; |
119 } | 119 } |
(...skipping 18 matching lines...) Expand all Loading... |
138 | 138 |
139 // When the scheme that they both share is not hierarchical, treat the | 139 // When the scheme that they both share is not hierarchical, treat the |
140 // incoming scheme as absolute (this way with the base of "data:foo", | 140 // incoming scheme as absolute (this way with the base of "data:foo", |
141 // "data:bar" will be reported as absolute. | 141 // "data:bar" will be reported as absolute. |
142 if (!is_base_hierarchical) | 142 if (!is_base_hierarchical) |
143 return true; | 143 return true; |
144 | 144 |
145 int colon_offset = scheme.end(); | 145 int colon_offset = scheme.end(); |
146 | 146 |
147 // If it's a filesystem URL, the only valid way to make it relative is not to | 147 // If it's a filesystem URL, the only valid way to make it relative is not to |
148 // supply a scheme. There's no equivalent to e.g. http:index.html. | 148 // supply a scheme. There's no equivalent to e.g. http:index.html. |
149 if (CompareSchemeComponent(url, scheme, kFileSystemScheme)) | 149 if (CompareSchemeComponent(url, scheme, kFileSystemScheme)) |
150 return true; | 150 return true; |
151 | 151 |
152 // ExtractScheme guarantees that the colon immediately follows what it | 152 // ExtractScheme guarantees that the colon immediately follows what it |
153 // considers to be the scheme. CountConsecutiveSlashes will handle the | 153 // considers to be the scheme. CountConsecutiveSlashes will handle the |
154 // case where the begin offset is the end of the input. | 154 // case where the begin offset is the end of the input. |
155 int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len); | 155 int num_slashes = CountConsecutiveSlashes(url, colon_offset + 1, url_len); |
156 | 156 |
157 if (num_slashes == 0 || num_slashes == 1) { | 157 if (num_slashes == 0 || num_slashes == 1) { |
158 // No slashes means it's a relative path like "http:foo.html". One slash | 158 // No slashes means it's a relative path like "http:foo.html". One slash |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
387 replacements.SetHost(relative_url, relative_parsed.host); | 387 replacements.SetHost(relative_url, relative_parsed.host); |
388 replacements.SetPort(relative_url, relative_parsed.port); | 388 replacements.SetPort(relative_url, relative_parsed.port); |
389 replacements.SetPath(relative_url, relative_parsed.path); | 389 replacements.SetPath(relative_url, relative_parsed.path); |
390 replacements.SetQuery(relative_url, relative_parsed.query); | 390 replacements.SetQuery(relative_url, relative_parsed.query); |
391 replacements.SetRef(relative_url, relative_parsed.ref); | 391 replacements.SetRef(relative_url, relative_parsed.ref); |
392 | 392 |
393 return ReplaceStandardURL(base_url, base_parsed, replacements, | 393 return ReplaceStandardURL(base_url, base_parsed, replacements, |
394 query_converter, output, out_parsed); | 394 query_converter, output, out_parsed); |
395 } | 395 } |
396 | 396 |
397 // Resolves a relative URL that happens to be an absolute file path. Examples | 397 // Resolves a relative URL that happens to be an absolute file path. Examples |
398 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo". | 398 // include: "//hostname/path", "/c:/foo", and "//hostname/c:/foo". |
399 template<typename CHAR> | 399 template<typename CHAR> |
400 bool DoResolveAbsoluteFile(const CHAR* relative_url, | 400 bool DoResolveAbsoluteFile(const CHAR* relative_url, |
401 const Component& relative_component, | 401 const Component& relative_component, |
402 CharsetConverter* query_converter, | 402 CharsetConverter* query_converter, |
403 CanonOutput* output, | 403 CanonOutput* output, |
404 Parsed* out_parsed) { | 404 Parsed* out_parsed) { |
405 // Parse the file URL. The file URl parsing function uses the same logic | 405 // Parse the file URL. The file URl parsing function uses the same logic |
406 // as we do for determining if the file is absolute, in which case it will | 406 // as we do for determining if the file is absolute, in which case it will |
407 // not bother to look for a scheme. | 407 // not bother to look for a scheme. |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
453 int num_slashes = CountConsecutiveSlashes( | 453 int num_slashes = CountConsecutiveSlashes( |
454 relative_url, relative_component.begin, relative_component.end()); | 454 relative_url, relative_component.begin, relative_component.end()); |
455 | 455 |
456 #ifdef WIN32 | 456 #ifdef WIN32 |
457 // On Windows, two slashes for a file path (regardless of which direction | 457 // On Windows, two slashes for a file path (regardless of which direction |
458 // they are) means that it's UNC. Two backslashes on any base scheme mean | 458 // they are) means that it's UNC. Two backslashes on any base scheme mean |
459 // that it's an absolute UNC path (we use the base_is_file flag to control | 459 // that it's an absolute UNC path (we use the base_is_file flag to control |
460 // how strict the UNC finder is). | 460 // how strict the UNC finder is). |
461 // | 461 // |
462 // We also allow Windows absolute drive specs on any scheme (for example | 462 // We also allow Windows absolute drive specs on any scheme (for example |
463 // "c:\foo") like IE does. There must be no preceeding slashes in this | 463 // "c:\foo") like IE does. There must be no preceding slashes in this |
464 // case (we reject anything like "/c:/foo") because that should be treated | 464 // case (we reject anything like "/c:/foo") because that should be treated |
465 // as a path. For file URLs, we allow any number of slashes since that would | 465 // as a path. For file URLs, we allow any number of slashes since that would |
466 // be setting the path. | 466 // be setting the path. |
467 // | 467 // |
468 // This assumes the absolute path resolver handles absolute URLs like this | 468 // This assumes the absolute path resolver handles absolute URLs like this |
469 // properly. DoCanonicalize does this. | 469 // properly. DoCanonicalize does this. |
470 int after_slashes = relative_component.begin + num_slashes; | 470 int after_slashes = relative_component.begin + num_slashes; |
471 if (DoesBeginUNCPath(relative_url, relative_component.begin, | 471 if (DoesBeginUNCPath(relative_url, relative_component.begin, |
472 relative_component.end(), !base_is_file) || | 472 relative_component.end(), !base_is_file) || |
473 ((num_slashes == 0 || base_is_file) && | 473 ((num_slashes == 0 || base_is_file) && |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
551 const Component& relative_component, | 551 const Component& relative_component, |
552 CharsetConverter* query_converter, | 552 CharsetConverter* query_converter, |
553 CanonOutput* output, | 553 CanonOutput* output, |
554 Parsed* out_parsed) { | 554 Parsed* out_parsed) { |
555 return DoResolveRelativeURL<base::char16>( | 555 return DoResolveRelativeURL<base::char16>( |
556 base_url, base_parsed, base_is_file, relative_url, | 556 base_url, base_parsed, base_is_file, relative_url, |
557 relative_component, query_converter, output, out_parsed); | 557 relative_component, query_converter, output, out_parsed); |
558 } | 558 } |
559 | 559 |
560 } // namespace url | 560 } // namespace url |
OLD | NEW |