| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "url/url_util.h" | 5 #include "url/url_util.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <string.h> | 8 #include <string.h> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| 11 #include "base/debug/leak_annotations.h" | 11 #include "base/debug/leak_annotations.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
| 14 #include "url/url_canon_internal.h" | 14 #include "url/url_canon_internal.h" |
| 15 #include "url/url_file.h" | 15 #include "url/url_file.h" |
| 16 #include "url/url_util_internal.h" | 16 #include "url/url_util_internal.h" |
| 17 | 17 |
| 18 namespace url { | 18 namespace url { |
| 19 | 19 |
| 20 namespace { | 20 namespace { |
| 21 | 21 |
| 22 // Pass this enum through for methods which would like to know if whitespace |
| 23 // removal is necessary. |
| 24 enum WhitespaceRemovalPolicy { |
| 25 REMOVE_WHITESPACE, |
| 26 DO_NOT_REMOVE_WHITESPACE, |
| 27 }; |
| 28 |
| 22 const int kNumStandardURLSchemes = 10; | 29 const int kNumStandardURLSchemes = 10; |
| 23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { | 30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { |
| 24 {kHttpScheme, SCHEME_WITH_PORT}, | 31 {kHttpScheme, SCHEME_WITH_PORT}, |
| 25 {kHttpsScheme, SCHEME_WITH_PORT}, | 32 {kHttpsScheme, SCHEME_WITH_PORT}, |
| 26 // Yes, file URLs can have a hostname, so file URLs should be handled as | 33 // Yes, file URLs can have a hostname, so file URLs should be handled as |
| 27 // "standard". File URLs never have a port as specified by the SchemeType | 34 // "standard". File URLs never have a port as specified by the SchemeType |
| 28 // field. | 35 // field. |
| 29 {kFileScheme, SCHEME_WITHOUT_PORT}, | 36 {kFileScheme, SCHEME_WITHOUT_PORT}, |
| 30 {kFtpScheme, SCHEME_WITH_PORT}, | 37 {kFtpScheme, SCHEME_WITH_PORT}, |
| 31 {kGopherScheme, SCHEME_WITH_PORT}, | 38 {kGopherScheme, SCHEME_WITH_PORT}, |
| (...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 147 // No scheme. | 154 // No scheme. |
| 148 if (found_scheme) | 155 if (found_scheme) |
| 149 *found_scheme = Component(); | 156 *found_scheme = Component(); |
| 150 return false; | 157 return false; |
| 151 } | 158 } |
| 152 if (found_scheme) | 159 if (found_scheme) |
| 153 *found_scheme = our_scheme; | 160 *found_scheme = our_scheme; |
| 154 return DoCompareSchemeComponent(spec, our_scheme, compare); | 161 return DoCompareSchemeComponent(spec, our_scheme, compare); |
| 155 } | 162 } |
| 156 | 163 |
| 157 template<typename CHAR> | 164 template <typename CHAR> |
| 158 bool DoCanonicalize(const CHAR* in_spec, | 165 bool DoCanonicalize(const CHAR* spec, |
| 159 int in_spec_len, | 166 int spec_len, |
| 160 bool trim_path_end, | 167 bool trim_path_end, |
| 168 WhitespaceRemovalPolicy whitespace_policy, |
| 161 CharsetConverter* charset_converter, | 169 CharsetConverter* charset_converter, |
| 162 CanonOutput* output, | 170 CanonOutput* output, |
| 163 Parsed* output_parsed) { | 171 Parsed* output_parsed) { |
| 164 // Remove any whitespace from the middle of the relative URL, possibly | 172 // Remove any whitespace from the middle of the relative URL if necessary. |
| 165 // copying to the new buffer. | 173 // Possibly this will result in copying to the new buffer. |
| 166 RawCanonOutputT<CHAR> whitespace_buffer; | 174 RawCanonOutputT<CHAR> whitespace_buffer; |
| 167 int spec_len; | 175 if (whitespace_policy == REMOVE_WHITESPACE) |
| 168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, | 176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len); |
| 169 &whitespace_buffer, &spec_len); | |
| 170 | 177 |
| 171 Parsed parsed_input; | 178 Parsed parsed_input; |
| 172 #ifdef WIN32 | 179 #ifdef WIN32 |
| 173 // For Windows, we allow things that look like absolute Windows paths to be | 180 // For Windows, we allow things that look like absolute Windows paths to be |
| 174 // fixed up magically to file URLs. This is done for IE compatibility. For | 181 // fixed up magically to file URLs. This is done for IE compatibility. For |
| 175 // example, this will change "c:/foo" into a file URL rather than treating | 182 // example, this will change "c:/foo" into a file URL rather than treating |
| 176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). | 183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
| 177 // There is similar logic in url_canon_relative.cc for | 184 // There is similar logic in url_canon_relative.cc for |
| 178 // | 185 // |
| 179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which | 186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 280 if (base_parsed_authority.host.is_nonempty()) { | 287 if (base_parsed_authority.host.is_nonempty()) { |
| 281 RawCanonOutputT<char> temporary_output; | 288 RawCanonOutputT<char> temporary_output; |
| 282 bool did_resolve_succeed = | 289 bool did_resolve_succeed = |
| 283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, | 290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
| 284 relative_component, charset_converter, | 291 relative_component, charset_converter, |
| 285 &temporary_output, output_parsed); | 292 &temporary_output, output_parsed); |
| 286 // The output_parsed is incorrect at this point (because it was built | 293 // The output_parsed is incorrect at this point (because it was built |
| 287 // based on base_parsed_authority instead of base_parsed) and needs to be | 294 // based on base_parsed_authority instead of base_parsed) and needs to be |
| 288 // re-created. | 295 // re-created. |
| 289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, | 296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
| 290 charset_converter, output, output_parsed); | 297 REMOVE_WHITESPACE, charset_converter, output, |
| 298 output_parsed); |
| 291 return did_resolve_succeed; | 299 return did_resolve_succeed; |
| 292 } | 300 } |
| 293 } else if (is_relative) { | 301 } else if (is_relative) { |
| 294 // Relative, resolve and canonicalize. | 302 // Relative, resolve and canonicalize. |
| 295 bool file_base_scheme = base_parsed.scheme.is_nonempty() && | 303 bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
| 296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); | 304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
| 297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative
, | 305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative
, |
| 298 relative_component, charset_converter, output, | 306 relative_component, charset_converter, output, |
| 299 output_parsed); | 307 output_parsed); |
| 300 } | 308 } |
| 301 | 309 |
| 302 // Not relative, canonicalize the input. | 310 // Not relative, canonicalize the input. |
| 303 return DoCanonicalize(relative, relative_length, true, charset_converter, | 311 return DoCanonicalize(relative, relative_length, true, |
| 304 output, output_parsed); | 312 DO_NOT_REMOVE_WHITESPACE, charset_converter, output, |
| 313 output_parsed); |
| 305 } | 314 } |
| 306 | 315 |
| 307 template<typename CHAR> | 316 template<typename CHAR> |
| 308 bool DoReplaceComponents(const char* spec, | 317 bool DoReplaceComponents(const char* spec, |
| 309 int spec_len, | 318 int spec_len, |
| 310 const Parsed& parsed, | 319 const Parsed& parsed, |
| 311 const Replacements<CHAR>& replacements, | 320 const Replacements<CHAR>& replacements, |
| 312 CharsetConverter* charset_converter, | 321 CharsetConverter* charset_converter, |
| 313 CanonOutput* output, | 322 CanonOutput* output, |
| 314 Parsed* out_parsed) { | 323 Parsed* out_parsed) { |
| (...skipping 26 matching lines...) Expand all Loading... |
| 341 if (spec_len - spec_after_colon > 0) { | 350 if (spec_len - spec_after_colon > 0) { |
| 342 scheme_replaced.Append(&spec[spec_after_colon], | 351 scheme_replaced.Append(&spec[spec_after_colon], |
| 343 spec_len - spec_after_colon); | 352 spec_len - spec_after_colon); |
| 344 } | 353 } |
| 345 | 354 |
| 346 // We now need to completely re-parse the resulting string since its meaning | 355 // We now need to completely re-parse the resulting string since its meaning |
| 347 // may have changed with the different scheme. | 356 // may have changed with the different scheme. |
| 348 RawCanonOutput<128> recanonicalized; | 357 RawCanonOutput<128> recanonicalized; |
| 349 Parsed recanonicalized_parsed; | 358 Parsed recanonicalized_parsed; |
| 350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, | 359 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
| 351 charset_converter, | 360 REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
| 352 &recanonicalized, &recanonicalized_parsed); | 361 &recanonicalized_parsed); |
| 353 | 362 |
| 354 // Recurse using the version with the scheme already replaced. This will now | 363 // Recurse using the version with the scheme already replaced. This will now |
| 355 // use the replacement rules for the new scheme. | 364 // use the replacement rules for the new scheme. |
| 356 // | 365 // |
| 357 // Warning: this code assumes that ReplaceComponents will re-check all | 366 // Warning: this code assumes that ReplaceComponents will re-check all |
| 358 // components for validity. This is because we can't fail if DoCanonicalize | 367 // components for validity. This is because we can't fail if DoCanonicalize |
| 359 // failed above since theoretically the thing making it fail could be | 368 // failed above since theoretically the thing making it fail could be |
| 360 // getting replaced here. If ReplaceComponents didn't re-check everything, | 369 // getting replaced here. If ReplaceComponents didn't re-check everything, |
| 361 // we wouldn't know if something *not* getting replaced is a problem. | 370 // we wouldn't know if something *not* getting replaced is a problem. |
| 362 // If the scheme-specific replacers are made more intelligent so they don't | 371 // If the scheme-specific replacers are made more intelligent so they don't |
| (...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 528 | 537 |
| 529 return true; | 538 return true; |
| 530 } | 539 } |
| 531 | 540 |
| 532 bool Canonicalize(const char* spec, | 541 bool Canonicalize(const char* spec, |
| 533 int spec_len, | 542 int spec_len, |
| 534 bool trim_path_end, | 543 bool trim_path_end, |
| 535 CharsetConverter* charset_converter, | 544 CharsetConverter* charset_converter, |
| 536 CanonOutput* output, | 545 CanonOutput* output, |
| 537 Parsed* output_parsed) { | 546 Parsed* output_parsed) { |
| 538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 547 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 539 output, output_parsed); | 548 charset_converter, output, output_parsed); |
| 540 } | 549 } |
| 541 | 550 |
| 542 bool Canonicalize(const base::char16* spec, | 551 bool Canonicalize(const base::char16* spec, |
| 543 int spec_len, | 552 int spec_len, |
| 544 bool trim_path_end, | 553 bool trim_path_end, |
| 545 CharsetConverter* charset_converter, | 554 CharsetConverter* charset_converter, |
| 546 CanonOutput* output, | 555 CanonOutput* output, |
| 547 Parsed* output_parsed) { | 556 Parsed* output_parsed) { |
| 548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 557 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
| 549 output, output_parsed); | 558 charset_converter, output, output_parsed); |
| 550 } | 559 } |
| 551 | 560 |
| 552 bool ResolveRelative(const char* base_spec, | 561 bool ResolveRelative(const char* base_spec, |
| 553 int base_spec_len, | 562 int base_spec_len, |
| 554 const Parsed& base_parsed, | 563 const Parsed& base_parsed, |
| 555 const char* relative, | 564 const char* relative, |
| 556 int relative_length, | 565 int relative_length, |
| 557 CharsetConverter* charset_converter, | 566 CharsetConverter* charset_converter, |
| 558 CanonOutput* output, | 567 CanonOutput* output, |
| 559 Parsed* output_parsed) { | 568 Parsed* output_parsed) { |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 663 return DoCompareSchemeComponent(spec, component, compare_to); | 672 return DoCompareSchemeComponent(spec, component, compare_to); |
| 664 } | 673 } |
| 665 | 674 |
| 666 bool CompareSchemeComponent(const base::char16* spec, | 675 bool CompareSchemeComponent(const base::char16* spec, |
| 667 const Component& component, | 676 const Component& component, |
| 668 const char* compare_to) { | 677 const char* compare_to) { |
| 669 return DoCompareSchemeComponent(spec, component, compare_to); | 678 return DoCompareSchemeComponent(spec, component, compare_to); |
| 670 } | 679 } |
| 671 | 680 |
| 672 } // namespace url | 681 } // namespace url |
| OLD | NEW |