OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "url/url_util.h" | 5 #include "url/url_util.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <string.h> | 8 #include <string.h> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
11 #include "base/debug/leak_annotations.h" | 11 #include "base/debug/leak_annotations.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
14 #include "url/url_canon_internal.h" | 14 #include "url/url_canon_internal.h" |
15 #include "url/url_file.h" | 15 #include "url/url_file.h" |
16 #include "url/url_util_internal.h" | 16 #include "url/url_util_internal.h" |
17 | 17 |
18 namespace url { | 18 namespace url { |
19 | 19 |
20 namespace { | 20 namespace { |
21 | 21 |
22 // Pass this enum through for methods which would like to know if whitespace | |
23 // removal is necessary. | |
24 enum WhitespaceRemovalPolicy { | |
25 REMOVE_WHITESPACE, | |
26 DONT_REMOVE_WHITESPACE, | |
Mike West
2016/12/01 09:39:36
Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a
Charlie Harrison
2016/12/01 13:27:37
Done.
| |
27 }; | |
28 | |
22 const int kNumStandardURLSchemes = 10; | 29 const int kNumStandardURLSchemes = 10; |
23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { | 30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { |
24 {kHttpScheme, SCHEME_WITH_PORT}, | 31 {kHttpScheme, SCHEME_WITH_PORT}, |
25 {kHttpsScheme, SCHEME_WITH_PORT}, | 32 {kHttpsScheme, SCHEME_WITH_PORT}, |
26 // Yes, file URLs can have a hostname, so file URLs should be handled as | 33 // Yes, file URLs can have a hostname, so file URLs should be handled as |
27 // "standard". File URLs never have a port as specified by the SchemeType | 34 // "standard". File URLs never have a port as specified by the SchemeType |
28 // field. | 35 // field. |
29 {kFileScheme, SCHEME_WITHOUT_PORT}, | 36 {kFileScheme, SCHEME_WITHOUT_PORT}, |
30 {kFtpScheme, SCHEME_WITH_PORT}, | 37 {kFtpScheme, SCHEME_WITH_PORT}, |
31 {kGopherScheme, SCHEME_WITH_PORT}, | 38 {kGopherScheme, SCHEME_WITH_PORT}, |
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
147 // No scheme. | 154 // No scheme. |
148 if (found_scheme) | 155 if (found_scheme) |
149 *found_scheme = Component(); | 156 *found_scheme = Component(); |
150 return false; | 157 return false; |
151 } | 158 } |
152 if (found_scheme) | 159 if (found_scheme) |
153 *found_scheme = our_scheme; | 160 *found_scheme = our_scheme; |
154 return DoCompareSchemeComponent(spec, our_scheme, compare); | 161 return DoCompareSchemeComponent(spec, our_scheme, compare); |
155 } | 162 } |
156 | 163 |
157 template<typename CHAR> | 164 template <typename CHAR> |
158 bool DoCanonicalize(const CHAR* in_spec, | 165 bool DoCanonicalize(const CHAR* spec, |
159 int in_spec_len, | 166 int spec_len, |
160 bool trim_path_end, | 167 bool trim_path_end, |
168 WhitespaceRemovalPolicy whitespace_policy, | |
161 CharsetConverter* charset_converter, | 169 CharsetConverter* charset_converter, |
162 CanonOutput* output, | 170 CanonOutput* output, |
163 Parsed* output_parsed) { | 171 Parsed* output_parsed) { |
164 // Remove any whitespace from the middle of the relative URL, possibly | 172 // Remove any whitespace from the middle of the relative URL if necessary. |
165 // copying to the new buffer. | 173 // Possibly this will result in copying to the new buffer. |
166 RawCanonOutputT<CHAR> whitespace_buffer; | 174 RawCanonOutputT<CHAR> whitespace_buffer; |
167 int spec_len; | 175 if (whitespace_policy == REMOVE_WHITESPACE) |
168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, | 176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len); |
169 &whitespace_buffer, &spec_len); | |
170 | 177 |
171 Parsed parsed_input; | 178 Parsed parsed_input; |
172 #ifdef WIN32 | 179 #ifdef WIN32 |
173 // For Windows, we allow things that look like absolute Windows paths to be | 180 // For Windows, we allow things that look like absolute Windows paths to be |
174 // fixed up magically to file URLs. This is done for IE compatibility. For | 181 // fixed up magically to file URLs. This is done for IE compatibility. For |
175 // example, this will change "c:/foo" into a file URL rather than treating | 182 // example, this will change "c:/foo" into a file URL rather than treating |
176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). | 183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). |
177 // There is similar logic in url_canon_relative.cc for | 184 // There is similar logic in url_canon_relative.cc for |
178 // | 185 // |
179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which | 186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
280 if (base_parsed_authority.host.is_nonempty()) { | 287 if (base_parsed_authority.host.is_nonempty()) { |
281 RawCanonOutputT<char> temporary_output; | 288 RawCanonOutputT<char> temporary_output; |
282 bool did_resolve_succeed = | 289 bool did_resolve_succeed = |
283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, | 290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, |
284 relative_component, charset_converter, | 291 relative_component, charset_converter, |
285 &temporary_output, output_parsed); | 292 &temporary_output, output_parsed); |
286 // The output_parsed is incorrect at this point (because it was built | 293 // The output_parsed is incorrect at this point (because it was built |
287 // based on base_parsed_authority instead of base_parsed) and needs to be | 294 // based on base_parsed_authority instead of base_parsed) and needs to be |
288 // re-created. | 295 // re-created. |
289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, | 296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, |
290 charset_converter, output, output_parsed); | 297 REMOVE_WHITESPACE, charset_converter, output, |
298 output_parsed); | |
291 return did_resolve_succeed; | 299 return did_resolve_succeed; |
292 } | 300 } |
293 } else if (is_relative) { | 301 } else if (is_relative) { |
294 // Relative, resolve and canonicalize. | 302 // Relative, resolve and canonicalize. |
295 bool file_base_scheme = base_parsed.scheme.is_nonempty() && | 303 bool file_base_scheme = base_parsed.scheme.is_nonempty() && |
296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); | 304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); |
297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative , | 305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative , |
298 relative_component, charset_converter, output, | 306 relative_component, charset_converter, output, |
299 output_parsed); | 307 output_parsed); |
300 } | 308 } |
301 | 309 |
302 // Not relative, canonicalize the input. | 310 // Not relative, canonicalize the input. |
303 return DoCanonicalize(relative, relative_length, true, charset_converter, | 311 return DoCanonicalize(relative, relative_length, true, DONT_REMOVE_WHITESPACE, |
304 output, output_parsed); | 312 charset_converter, output, output_parsed); |
305 } | 313 } |
306 | 314 |
307 template<typename CHAR> | 315 template<typename CHAR> |
308 bool DoReplaceComponents(const char* spec, | 316 bool DoReplaceComponents(const char* spec, |
309 int spec_len, | 317 int spec_len, |
310 const Parsed& parsed, | 318 const Parsed& parsed, |
311 const Replacements<CHAR>& replacements, | 319 const Replacements<CHAR>& replacements, |
312 CharsetConverter* charset_converter, | 320 CharsetConverter* charset_converter, |
313 CanonOutput* output, | 321 CanonOutput* output, |
314 Parsed* out_parsed) { | 322 Parsed* out_parsed) { |
(...skipping 26 matching lines...) Expand all Loading... | |
341 if (spec_len - spec_after_colon > 0) { | 349 if (spec_len - spec_after_colon > 0) { |
342 scheme_replaced.Append(&spec[spec_after_colon], | 350 scheme_replaced.Append(&spec[spec_after_colon], |
343 spec_len - spec_after_colon); | 351 spec_len - spec_after_colon); |
344 } | 352 } |
345 | 353 |
346 // We now need to completely re-parse the resulting string since its meaning | 354 // We now need to completely re-parse the resulting string since its meaning |
347 // may have changed with the different scheme. | 355 // may have changed with the different scheme. |
348 RawCanonOutput<128> recanonicalized; | 356 RawCanonOutput<128> recanonicalized; |
349 Parsed recanonicalized_parsed; | 357 Parsed recanonicalized_parsed; |
350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, | 358 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, |
351 charset_converter, | 359 REMOVE_WHITESPACE, charset_converter, &recanonicalized, |
352 &recanonicalized, &recanonicalized_parsed); | 360 &recanonicalized_parsed); |
353 | 361 |
354 // Recurse using the version with the scheme already replaced. This will now | 362 // Recurse using the version with the scheme already replaced. This will now |
355 // use the replacement rules for the new scheme. | 363 // use the replacement rules for the new scheme. |
356 // | 364 // |
357 // Warning: this code assumes that ReplaceComponents will re-check all | 365 // Warning: this code assumes that ReplaceComponents will re-check all |
358 // components for validity. This is because we can't fail if DoCanonicalize | 366 // components for validity. This is because we can't fail if DoCanonicalize |
359 // failed above since theoretically the thing making it fail could be | 367 // failed above since theoretically the thing making it fail could be |
360 // getting replaced here. If ReplaceComponents didn't re-check everything, | 368 // getting replaced here. If ReplaceComponents didn't re-check everything, |
361 // we wouldn't know if something *not* getting replaced is a problem. | 369 // we wouldn't know if something *not* getting replaced is a problem. |
362 // If the scheme-specific replacers are made more intelligent so they don't | 370 // If the scheme-specific replacers are made more intelligent so they don't |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
528 | 536 |
529 return true; | 537 return true; |
530 } | 538 } |
531 | 539 |
532 bool Canonicalize(const char* spec, | 540 bool Canonicalize(const char* spec, |
533 int spec_len, | 541 int spec_len, |
534 bool trim_path_end, | 542 bool trim_path_end, |
535 CharsetConverter* charset_converter, | 543 CharsetConverter* charset_converter, |
536 CanonOutput* output, | 544 CanonOutput* output, |
537 Parsed* output_parsed) { | 545 Parsed* output_parsed) { |
538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 546 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
539 output, output_parsed); | 547 charset_converter, output, output_parsed); |
540 } | 548 } |
541 | 549 |
542 bool Canonicalize(const base::char16* spec, | 550 bool Canonicalize(const base::char16* spec, |
543 int spec_len, | 551 int spec_len, |
544 bool trim_path_end, | 552 bool trim_path_end, |
545 CharsetConverter* charset_converter, | 553 CharsetConverter* charset_converter, |
546 CanonOutput* output, | 554 CanonOutput* output, |
547 Parsed* output_parsed) { | 555 Parsed* output_parsed) { |
548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, | 556 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE, |
549 output, output_parsed); | 557 charset_converter, output, output_parsed); |
550 } | 558 } |
551 | 559 |
552 bool ResolveRelative(const char* base_spec, | 560 bool ResolveRelative(const char* base_spec, |
553 int base_spec_len, | 561 int base_spec_len, |
554 const Parsed& base_parsed, | 562 const Parsed& base_parsed, |
555 const char* relative, | 563 const char* relative, |
556 int relative_length, | 564 int relative_length, |
557 CharsetConverter* charset_converter, | 565 CharsetConverter* charset_converter, |
558 CanonOutput* output, | 566 CanonOutput* output, |
559 Parsed* output_parsed) { | 567 Parsed* output_parsed) { |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
663 return DoCompareSchemeComponent(spec, component, compare_to); | 671 return DoCompareSchemeComponent(spec, component, compare_to); |
664 } | 672 } |
665 | 673 |
666 bool CompareSchemeComponent(const base::char16* spec, | 674 bool CompareSchemeComponent(const base::char16* spec, |
667 const Component& component, | 675 const Component& component, |
668 const char* compare_to) { | 676 const char* compare_to) { |
669 return DoCompareSchemeComponent(spec, component, compare_to); | 677 return DoCompareSchemeComponent(spec, component, compare_to); |
670 } | 678 } |
671 | 679 |
672 } // namespace url | 680 } // namespace url |
OLD | NEW |