Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "url/url_util.h" | 5 #include "url/url_util.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <string.h> | 8 #include <string.h> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| 11 #include "base/debug/leak_annotations.h" | 11 #include "base/debug/leak_annotations.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
| 14 #include "url/url_canon_internal.h" | 14 #include "url/url_canon_internal.h" |
| 15 #include "url/url_file.h" | 15 #include "url/url_file.h" |
| 16 #include "url/url_util_internal.h" | 16 #include "url/url_util_internal.h" |
| 17 | 17 |
| 18 namespace url { | 18 namespace url { |
| 19 | 19 |
| 20 namespace { | 20 namespace { |
| 21 | 21 |
| 22 // Pass this enum through for methods which would like to know if whitespace | |
| 23 // removal is necessary. | |
| 24 enum WhitespaceRemovalPolicy { | |
| 25 REMOVE_WHITESPACE, | |
| 26 DONT_REMOVE_WHITESPACE, | |
|
Mike West
2016/12/01 09:39:36
Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a
Charlie Harrison
2016/12/01 13:27:37
Done.
| |
| 27 }; | |
| 28 | |
| 22 const int kNumStandardURLSchemes = 10; | 29 const int kNumStandardURLSchemes = 10; |
| 23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { | 30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { |
| 24 {kHttpScheme, SCHEME_WITH_PORT}, | 31 {kHttpScheme, SCHEME_WITH_PORT}, |
| 25 {kHttpsScheme, SCHEME_WITH_PORT}, | 32 {kHttpsScheme, SCHEME_WITH_PORT}, |
| 26 // Yes, file URLs can have a hostname, so file URLs should be handled as | 33 // Yes, file URLs can have a hostname, so file URLs should be handled as |
| 27 // "standard". File URLs never have a port as specified by the SchemeType | 34 // "standard". File URLs never have a port as specified by the SchemeType |
| 28 // field. | 35 // field. |
| 29 {kFileScheme, SCHEME_WITHOUT_PORT}, | 36 {kFileScheme, SCHEME_WITHOUT_PORT}, |
| 30 {kFtpScheme, SCHEME_WITH_PORT}, | 37 {kFtpScheme, SCHEME_WITH_PORT}, |
| 31 {kGopherScheme, SCHEME_WITH_PORT}, | 38 {kGopherScheme, SCHEME_WITH_PORT}, |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 147 // No scheme. | 154 // No scheme. |
| 148 if (found_scheme) | 155 if (found_scheme) |
| 149 *found_scheme = Component(); | 156 *found_scheme = Component(); |
| 150 return false; | 157 return false; |
| 151 } | 158 } |
| 152 if (found_scheme) | 159 if (found_scheme) |
| 153 *found_scheme = our_scheme; | 160 *found_scheme = our_scheme; |
| 154 return DoCompareSchemeComponent(spec, our_scheme, compare); | 161 return DoCompareSchemeComponent(spec, our_scheme, compare); |
| 155 } | 162 } |
| 156 | 163 |
| 157 template<typename CHAR> | 164 template <typename CHAR> |
| 158 bool DoCanonicalize(const CHAR* in_spec, | 165 bool DoCanonicalize(const CHAR* spec, |
| 159 int in_spec_len, | 166 int spec_len, |
| 160 bool trim_path_end, | 167 bool trim_path_end, |
| 168 WhitespaceRemovalPolicy whitespace_policy, | |
| 161 CharsetConverter* charset_converter, | 169 CharsetConverter* charset_converter, |
| 162 CanonOutput* output, | 170 CanonOutput* output, |
| 163 Parsed* output_parsed) { | 171 Parsed* output_parsed) { |
| 164 // Remove any whitespace from the middle of the relative URL, possibly | 172 // Remove any whitespace from the middle of the relative URL if necessary. |
| 165 // copying to the new buffer. | 173 // Possibly this will result in copying to the new buffer. |
| 166 RawCanonOutputT<CHAR> whitespace_buffer; | 174 RawCanonOutputT<CHAR> whitespace_buffer; |
| 167 int spec_len; | 175 if (whitespace_policy == REMOVE_WHITESPACE) |
| 168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, | 176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len); |
| 169 &whitespace_buffer, &spec_len); | |
| 170 | 177 |
| 171 Parsed parsed_input; | 178 Parsed parsed_input; |
| 172 #ifdef WIN32 | 179 #ifdef WIN32 |
| 173 // For Windows, we allow things that look like absolute Windows paths to be | 180 // For Windows, we allow things that look like absolute Windows paths to be |
| 174 // fixed up magically to file URLs. This is done for IE compatibility. For | 181 // fixed up magically to file URLs. This is done for IE compatibility. For |
| 175 // example, this will change "c:/foo" into a file URL rather than treating | 182 // example, this will change "c:/foo" into a file URL rather than treating |
| 176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). | 183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
| 177 // There is similar logic in url_canon_relative.cc for | 184 // There is similar logic in url_canon_relative.cc for |
| 178 // | 185 // |
| 179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which | 186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 280 if (base_parsed_authority.host.is_nonempty()) { | 287 if (base_parsed_authority.host.is_nonempty()) { |
| 281 RawCanonOutputT<char> temporary_output; | 288 RawCanonOutputT<char> temporary_output; |
| 282 bool did_resolve_succeed = | 289 bool did_resolve_succeed = |
| 283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, | 290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
| 284 relative_component, charset_converter, | 291 relative_component, charset_converter, |
| 285 &temporary_output, output_parsed); | 292 &temporary_output, output_parsed); |
| 286 // The output_parsed is incorrect at this point (because it was built | 293 // The output_parsed is incorrect at this point (because it was built |
| 287 // based on base_parsed_authority instead of base_parsed) and needs to be | 294 // based on base_parsed_authority instead of base_parsed) and needs to be |
| 288 // re-created. | 295 // re-created. |
| 289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, | 296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
| 290 charset_converter, output, output_parsed); | 297 REMOVE_WHITESPACE, charset_converter, output, |
| 298 output_parsed); | |
| 291 return did_resolve_succeed; | 299 return did_resolve_succeed; |
| 292 } | 300 } |
| 293 } else if (is_relative) { | 301 } else if (is_relative) { |
| 294 // Relative, resolve and canonicalize. | 302 // Relative, resolve and canonicalize. |
| 295 bool file_base_scheme = base_parsed.scheme.is_nonempty() && | 303 bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
| 296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); | 304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
| 297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative , | 305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative , |
| 298 relative_component, charset_converter, output, | 306 relative_component, charset_converter, output, |
| 299 output_parsed); | 307 output_parsed); |
| 300 } | 308 } |
| 301 | 309 |
| 302 // Not relative, canonicalize the input. | 310 // Not relative, canonicalize the input. |
| 303 return DoCanonicalize(relative, relative_length, true, charset_converter, | 311 return DoCanonicalize(relative, relative_length, true, DONT_REMOVE_WHITESPACE, |
| 304 output, output_parsed); | 312 charset_converter, output, output_parsed); |
| 305 } | 313 } |
| 306 | 314 |
| 307 template<typename CHAR> | 315 template<typename CHAR> |
| 308 bool DoReplaceComponents(const char* spec, | 316 bool DoReplaceComponents(const char* spec, |
| 309 int spec_len, | 317 int spec_len, |
| 310 const Parsed& parsed, | 318 const Parsed& parsed, |
| 311 const Replacements<CHAR>& replacements, | 319 const Replacements<CHAR>& replacements, |
| 312 CharsetConverter* charset_converter, | 320 CharsetConverter* charset_converter, |
| 313 CanonOutput* output, | 321 CanonOutput* output, |
| 314 Parsed* out_parsed) { | 322 Parsed* out_parsed) { |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 341 if (spec_len - spec_after_colon > 0) { | 349 if (spec_len - spec_after_colon > 0) { |
| 342 scheme_replaced.Append(&spec[spec_after_colon], | 350 scheme_replaced.Append(&spec[spec_after_colon], |
| 343 spec_len - spec_after_colon); | 351 spec_len - spec_after_colon); |
| 344 } | 352 } |
| 345 | 353 |
| 346 // We now need to completely re-parse the resulting string since its meaning | 354 // We now need to completely re-parse the resulting string since its meaning |
| 347 // may have changed with the different scheme. | 355 // may have changed with the different scheme. |
| 348 RawCanonOutput<128> recanonicalized; | 356 RawCanonOutput<128> recanonicalized; |
| 349 Parsed recanonicalized_parsed; | 357 Parsed recanonicalized_parsed; |
| 350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, | 358 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
| 351 charset_converter, | 359 REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
| 352 &recanonicalized, &recanonicalized_parsed); | 360 &recanonicalized_parsed); |
| 353 | 361 |
| 354 // Recurse using the version with the scheme already replaced. This will now | 362 // Recurse using the version with the scheme already replaced. This will now |
| 355 // use the replacement rules for the new scheme. | 363 // use the replacement rules for the new scheme. |
| 356 // | 364 // |
| 357 // Warning: this code assumes that ReplaceComponents will re-check all | 365 // Warning: this code assumes that ReplaceComponents will re-check all |
| 358 // components for validity. This is because we can't fail if DoCanonicalize | 366 // components for validity. This is because we can't fail if DoCanonicalize |
| 359 // failed above since theoretically the thing making it fail could be | 367 // failed above since theoretically the thing making it fail could be |
| 360 // getting replaced here. If ReplaceComponents didn't re-check everything, | 368 // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 361 // we wouldn't know if something *not* getting replaced is a problem. | 369 // we wouldn't know if something *not* getting replaced is a problem. |
| 362 // If the scheme-specific replacers are made more intelligent so they don't | 370 // If the scheme-specific replacers are made more intelligent so they don't |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 528 | 536 |
| 529 return true; | 537 return true; |
| 530 } | 538 } |
| 531 | 539 |
| 532 bool Canonicalize(const char* spec, | 540 bool Canonicalize(const char* spec, |
| 533 int spec_len, | 541 int spec_len, |
| 534 bool trim_path_end, | 542 bool trim_path_end, |
| 535 CharsetConverter* charset_converter, | 543 CharsetConverter* charset_converter, |
| 536 CanonOutput* output, | 544 CanonOutput* output, |
| 537 Parsed* output_parsed) { | 545 Parsed* output_parsed) { |
| 538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 546 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 539 output, output_parsed); | 547 charset_converter, output, output_parsed); |
| 540 } | 548 } |
| 541 | 549 |
| 542 bool Canonicalize(const base::char16* spec, | 550 bool Canonicalize(const base::char16* spec, |
| 543 int spec_len, | 551 int spec_len, |
| 544 bool trim_path_end, | 552 bool trim_path_end, |
| 545 CharsetConverter* charset_converter, | 553 CharsetConverter* charset_converter, |
| 546 CanonOutput* output, | 554 CanonOutput* output, |
| 547 Parsed* output_parsed) { | 555 Parsed* output_parsed) { |
| 548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 556 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 549 output, output_parsed); | 557 charset_converter, output, output_parsed); |
| 550 } | 558 } |
| 551 | 559 |
| 552 bool ResolveRelative(const char* base_spec, | 560 bool ResolveRelative(const char* base_spec, |
| 553 int base_spec_len, | 561 int base_spec_len, |
| 554 const Parsed& base_parsed, | 562 const Parsed& base_parsed, |
| 555 const char* relative, | 563 const char* relative, |
| 556 int relative_length, | 564 int relative_length, |
| 557 CharsetConverter* charset_converter, | 565 CharsetConverter* charset_converter, |
| 558 CanonOutput* output, | 566 CanonOutput* output, |
| 559 Parsed* output_parsed) { | 567 Parsed* output_parsed) { |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 663 return DoCompareSchemeComponent(spec, component, compare_to); | 671 return DoCompareSchemeComponent(spec, component, compare_to); |
| 664 } | 672 } |
| 665 | 673 |
| 666 bool CompareSchemeComponent(const base::char16* spec, | 674 bool CompareSchemeComponent(const base::char16* spec, |
| 667 const Component& component, | 675 const Component& component, |
| 668 const char* compare_to) { | 676 const char* compare_to) { |
| 669 return DoCompareSchemeComponent(spec, component, compare_to); | 677 return DoCompareSchemeComponent(spec, component, compare_to); |
| 670 } | 678 } |
| 671 | 679 |
| 672 } // namespace url | 680 } // namespace url |
| OLD | NEW |