OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "url/url_util.h" | 5 #include "url/url_util.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/debug/leak_annotations.h" | 11 #include "base/debug/leak_annotations.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
14 #include "url/url_canon_internal.h" | 14 #include "url/url_canon_internal.h" |
15 #include "url/url_file.h" | 15 #include "url/url_file.h" |
16 #include "url/url_util_internal.h" | 16 #include "url/url_util_internal.h" |
17 | 17 |
18 namespace url { | 18 namespace url { |
19 | 19 |
20 namespace { | 20 namespace { |
21 | 21 |
| 22 // Pass this enum through for methods which would like to know if whitespace |
| 23 // removal is necessary. |
| 24 enum WhitespaceRemovalPolicy { |
| 25 REMOVE_WHITESPACE, |
| 26 DO_NOT_REMOVE_WHITESPACE, |
| 27 }; |
| 28 |
22 const int kNumStandardURLSchemes = 10; | 29 const int kNumStandardURLSchemes = 10; |
23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { | 30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { |
24 {kHttpScheme, SCHEME_WITH_PORT}, | 31 {kHttpScheme, SCHEME_WITH_PORT}, |
25 {kHttpsScheme, SCHEME_WITH_PORT}, | 32 {kHttpsScheme, SCHEME_WITH_PORT}, |
26 // Yes, file URLs can have a hostname, so file URLs should be handled as | 33 // Yes, file URLs can have a hostname, so file URLs should be handled as |
27 // "standard". File URLs never have a port as specified by the SchemeType | 34 // "standard". File URLs never have a port as specified by the SchemeType |
28 // field. | 35 // field. |
29 {kFileScheme, SCHEME_WITHOUT_PORT}, | 36 {kFileScheme, SCHEME_WITHOUT_PORT}, |
30 {kFtpScheme, SCHEME_WITH_PORT}, | 37 {kFtpScheme, SCHEME_WITH_PORT}, |
31 {kGopherScheme, SCHEME_WITH_PORT}, | 38 {kGopherScheme, SCHEME_WITH_PORT}, |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
147 // No scheme. | 154 // No scheme. |
148 if (found_scheme) | 155 if (found_scheme) |
149 *found_scheme = Component(); | 156 *found_scheme = Component(); |
150 return false; | 157 return false; |
151 } | 158 } |
152 if (found_scheme) | 159 if (found_scheme) |
153 *found_scheme = our_scheme; | 160 *found_scheme = our_scheme; |
154 return DoCompareSchemeComponent(spec, our_scheme, compare); | 161 return DoCompareSchemeComponent(spec, our_scheme, compare); |
155 } | 162 } |
156 | 163 |
157 template<typename CHAR> | 164 template <typename CHAR> |
158 bool DoCanonicalize(const CHAR* in_spec, | 165 bool DoCanonicalize(const CHAR* spec, |
159 int in_spec_len, | 166 int spec_len, |
160 bool trim_path_end, | 167 bool trim_path_end, |
| 168 WhitespaceRemovalPolicy whitespace_policy, |
161 CharsetConverter* charset_converter, | 169 CharsetConverter* charset_converter, |
162 CanonOutput* output, | 170 CanonOutput* output, |
163 Parsed* output_parsed) { | 171 Parsed* output_parsed) { |
164 // Remove any whitespace from the middle of the relative URL, possibly | 172 // Remove any whitespace from the middle of the relative URL if necessary. |
165 // copying to the new buffer. | 173 // Possibly this will result in copying to the new buffer. |
166 RawCanonOutputT<CHAR> whitespace_buffer; | 174 RawCanonOutputT<CHAR> whitespace_buffer; |
167 int spec_len; | 175 if (whitespace_policy == REMOVE_WHITESPACE) |
168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, | 176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len); |
169 &whitespace_buffer, &spec_len); | |
170 | 177 |
171 Parsed parsed_input; | 178 Parsed parsed_input; |
172 #ifdef WIN32 | 179 #ifdef WIN32 |
173 // For Windows, we allow things that look like absolute Windows paths to be | 180 // For Windows, we allow things that look like absolute Windows paths to be |
174 // fixed up magically to file URLs. This is done for IE compatibility. For | 181 // fixed up magically to file URLs. This is done for IE compatibility. For |
175 // example, this will change "c:/foo" into a file URL rather than treating | 182 // example, this will change "c:/foo" into a file URL rather than treating |
176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). | 183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
177 // There is similar logic in url_canon_relative.cc for | 184 // There is similar logic in url_canon_relative.cc for |
178 // | 185 // |
179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which | 186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
280 if (base_parsed_authority.host.is_nonempty()) { | 287 if (base_parsed_authority.host.is_nonempty()) { |
281 RawCanonOutputT<char> temporary_output; | 288 RawCanonOutputT<char> temporary_output; |
282 bool did_resolve_succeed = | 289 bool did_resolve_succeed = |
283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, | 290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
284 relative_component, charset_converter, | 291 relative_component, charset_converter, |
285 &temporary_output, output_parsed); | 292 &temporary_output, output_parsed); |
286 // The output_parsed is incorrect at this point (because it was built | 293 // The output_parsed is incorrect at this point (because it was built |
287 // based on base_parsed_authority instead of base_parsed) and needs to be | 294 // based on base_parsed_authority instead of base_parsed) and needs to be |
288 // re-created. | 295 // re-created. |
289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, | 296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
290 charset_converter, output, output_parsed); | 297 REMOVE_WHITESPACE, charset_converter, output, |
| 298 output_parsed); |
291 return did_resolve_succeed; | 299 return did_resolve_succeed; |
292 } | 300 } |
293 } else if (is_relative) { | 301 } else if (is_relative) { |
294 // Relative, resolve and canonicalize. | 302 // Relative, resolve and canonicalize. |
295 bool file_base_scheme = base_parsed.scheme.is_nonempty() && | 303 bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); | 304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative
, | 305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative
, |
298 relative_component, charset_converter, output, | 306 relative_component, charset_converter, output, |
299 output_parsed); | 307 output_parsed); |
300 } | 308 } |
301 | 309 |
302 // Not relative, canonicalize the input. | 310 // Not relative, canonicalize the input. |
303 return DoCanonicalize(relative, relative_length, true, charset_converter, | 311 return DoCanonicalize(relative, relative_length, true, |
304 output, output_parsed); | 312 DO_NOT_REMOVE_WHITESPACE, charset_converter, output, |
| 313 output_parsed); |
305 } | 314 } |
306 | 315 |
307 template<typename CHAR> | 316 template<typename CHAR> |
308 bool DoReplaceComponents(const char* spec, | 317 bool DoReplaceComponents(const char* spec, |
309 int spec_len, | 318 int spec_len, |
310 const Parsed& parsed, | 319 const Parsed& parsed, |
311 const Replacements<CHAR>& replacements, | 320 const Replacements<CHAR>& replacements, |
312 CharsetConverter* charset_converter, | 321 CharsetConverter* charset_converter, |
313 CanonOutput* output, | 322 CanonOutput* output, |
314 Parsed* out_parsed) { | 323 Parsed* out_parsed) { |
(...skipping 26 matching lines...) Expand all Loading... |
341 if (spec_len - spec_after_colon > 0) { | 350 if (spec_len - spec_after_colon > 0) { |
342 scheme_replaced.Append(&spec[spec_after_colon], | 351 scheme_replaced.Append(&spec[spec_after_colon], |
343 spec_len - spec_after_colon); | 352 spec_len - spec_after_colon); |
344 } | 353 } |
345 | 354 |
346 // We now need to completely re-parse the resulting string since its meaning | 355 // We now need to completely re-parse the resulting string since its meaning |
347 // may have changed with the different scheme. | 356 // may have changed with the different scheme. |
348 RawCanonOutput<128> recanonicalized; | 357 RawCanonOutput<128> recanonicalized; |
349 Parsed recanonicalized_parsed; | 358 Parsed recanonicalized_parsed; |
350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, | 359 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
351 charset_converter, | 360 REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
352 &recanonicalized, &recanonicalized_parsed); | 361 &recanonicalized_parsed); |
353 | 362 |
354 // Recurse using the version with the scheme already replaced. This will now | 363 // Recurse using the version with the scheme already replaced. This will now |
355 // use the replacement rules for the new scheme. | 364 // use the replacement rules for the new scheme. |
356 // | 365 // |
357 // Warning: this code assumes that ReplaceComponents will re-check all | 366 // Warning: this code assumes that ReplaceComponents will re-check all |
358 // components for validity. This is because we can't fail if DoCanonicalize | 367 // components for validity. This is because we can't fail if DoCanonicalize |
359 // failed above since theoretically the thing making it fail could be | 368 // failed above since theoretically the thing making it fail could be |
360 // getting replaced here. If ReplaceComponents didn't re-check everything, | 369 // getting replaced here. If ReplaceComponents didn't re-check everything, |
361 // we wouldn't know if something *not* getting replaced is a problem. | 370 // we wouldn't know if something *not* getting replaced is a problem. |
362 // If the scheme-specific replacers are made more intelligent so they don't | 371 // If the scheme-specific replacers are made more intelligent so they don't |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
528 | 537 |
529 return true; | 538 return true; |
530 } | 539 } |
531 | 540 |
532 bool Canonicalize(const char* spec, | 541 bool Canonicalize(const char* spec, |
533 int spec_len, | 542 int spec_len, |
534 bool trim_path_end, | 543 bool trim_path_end, |
535 CharsetConverter* charset_converter, | 544 CharsetConverter* charset_converter, |
536 CanonOutput* output, | 545 CanonOutput* output, |
537 Parsed* output_parsed) { | 546 Parsed* output_parsed) { |
538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 547 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
539 output, output_parsed); | 548 charset_converter, output, output_parsed); |
540 } | 549 } |
541 | 550 |
542 bool Canonicalize(const base::char16* spec, | 551 bool Canonicalize(const base::char16* spec, |
543 int spec_len, | 552 int spec_len, |
544 bool trim_path_end, | 553 bool trim_path_end, |
545 CharsetConverter* charset_converter, | 554 CharsetConverter* charset_converter, |
546 CanonOutput* output, | 555 CanonOutput* output, |
547 Parsed* output_parsed) { | 556 Parsed* output_parsed) { |
548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 557 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
549 output, output_parsed); | 558 charset_converter, output, output_parsed); |
550 } | 559 } |
551 | 560 |
552 bool ResolveRelative(const char* base_spec, | 561 bool ResolveRelative(const char* base_spec, |
553 int base_spec_len, | 562 int base_spec_len, |
554 const Parsed& base_parsed, | 563 const Parsed& base_parsed, |
555 const char* relative, | 564 const char* relative, |
556 int relative_length, | 565 int relative_length, |
557 CharsetConverter* charset_converter, | 566 CharsetConverter* charset_converter, |
558 CanonOutput* output, | 567 CanonOutput* output, |
559 Parsed* output_parsed) { | 568 Parsed* output_parsed) { |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
663 return DoCompareSchemeComponent(spec, component, compare_to); | 672 return DoCompareSchemeComponent(spec, component, compare_to); |
664 } | 673 } |
665 | 674 |
666 bool CompareSchemeComponent(const base::char16* spec, | 675 bool CompareSchemeComponent(const base::char16* spec, |
667 const Component& component, | 676 const Component& component, |
668 const char* compare_to) { | 677 const char* compare_to) { |
669 return DoCompareSchemeComponent(spec, component, compare_to); | 678 return DoCompareSchemeComponent(spec, component, compare_to); |
670 } | 679 } |
671 | 680 |
672 } // namespace url | 681 } // namespace url |
OLD | NEW |