Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1658)

Side by Side Diff: url/url_util.cc

Issue 2540893004: [url] Avoid scanning for whitespace twice during ResolveRelative (Closed)
Patch Set: minor tweaks + one sanity test Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « url/gurl_unittest.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "url/url_util.h" 5 #include "url/url_util.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 #include <string.h> 8 #include <string.h>
9 #include <vector> 9 #include <vector>
10 10
11 #include "base/debug/leak_annotations.h" 11 #include "base/debug/leak_annotations.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "base/strings/string_util.h" 13 #include "base/strings/string_util.h"
14 #include "url/url_canon_internal.h" 14 #include "url/url_canon_internal.h"
15 #include "url/url_file.h" 15 #include "url/url_file.h"
16 #include "url/url_util_internal.h" 16 #include "url/url_util_internal.h"
17 17
18 namespace url { 18 namespace url {
19 19
20 namespace { 20 namespace {
21 21
22 // Pass this enum through for methods which would like to know if whitespace
23 // removal is necessary.
24 enum WhitespaceRemovalPolicy {
25 REMOVE_WHITESPACE,
26 DONT_REMOVE_WHITESPACE,
Mike West 2016/12/01 09:39:36 Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a
Charlie Harrison 2016/12/01 13:27:37 Done.
27 };
28
22 const int kNumStandardURLSchemes = 10; 29 const int kNumStandardURLSchemes = 10;
23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { 30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {
24 {kHttpScheme, SCHEME_WITH_PORT}, 31 {kHttpScheme, SCHEME_WITH_PORT},
25 {kHttpsScheme, SCHEME_WITH_PORT}, 32 {kHttpsScheme, SCHEME_WITH_PORT},
26 // Yes, file URLs can have a hostname, so file URLs should be handled as 33 // Yes, file URLs can have a hostname, so file URLs should be handled as
27 // "standard". File URLs never have a port as specified by the SchemeType 34 // "standard". File URLs never have a port as specified by the SchemeType
28 // field. 35 // field.
29 {kFileScheme, SCHEME_WITHOUT_PORT}, 36 {kFileScheme, SCHEME_WITHOUT_PORT},
30 {kFtpScheme, SCHEME_WITH_PORT}, 37 {kFtpScheme, SCHEME_WITH_PORT},
31 {kGopherScheme, SCHEME_WITH_PORT}, 38 {kGopherScheme, SCHEME_WITH_PORT},
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 // No scheme. 154 // No scheme.
148 if (found_scheme) 155 if (found_scheme)
149 *found_scheme = Component(); 156 *found_scheme = Component();
150 return false; 157 return false;
151 } 158 }
152 if (found_scheme) 159 if (found_scheme)
153 *found_scheme = our_scheme; 160 *found_scheme = our_scheme;
154 return DoCompareSchemeComponent(spec, our_scheme, compare); 161 return DoCompareSchemeComponent(spec, our_scheme, compare);
155 } 162 }
156 163
157 template<typename CHAR> 164 template <typename CHAR>
158 bool DoCanonicalize(const CHAR* in_spec, 165 bool DoCanonicalize(const CHAR* spec,
159 int in_spec_len, 166 int spec_len,
160 bool trim_path_end, 167 bool trim_path_end,
168 WhitespaceRemovalPolicy whitespace_policy,
161 CharsetConverter* charset_converter, 169 CharsetConverter* charset_converter,
162 CanonOutput* output, 170 CanonOutput* output,
163 Parsed* output_parsed) { 171 Parsed* output_parsed) {
164 // Remove any whitespace from the middle of the relative URL, possibly 172 // Remove any whitespace from the middle of the relative URL if necessary.
165 // copying to the new buffer. 173 // Possibly this will result in copying to the new buffer.
166 RawCanonOutputT<CHAR> whitespace_buffer; 174 RawCanonOutputT<CHAR> whitespace_buffer;
167 int spec_len; 175 if (whitespace_policy == REMOVE_WHITESPACE)
168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len, 176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len);
169 &whitespace_buffer, &spec_len);
170 177
171 Parsed parsed_input; 178 Parsed parsed_input;
172 #ifdef WIN32 179 #ifdef WIN32
173 // For Windows, we allow things that look like absolute Windows paths to be 180 // For Windows, we allow things that look like absolute Windows paths to be
174 // fixed up magically to file URLs. This is done for IE compatibility. For 181 // fixed up magically to file URLs. This is done for IE compatibility. For
175 // example, this will change "c:/foo" into a file URL rather than treating 182 // example, this will change "c:/foo" into a file URL rather than treating
176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt"). 183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").
177 // There is similar logic in url_canon_relative.cc for 184 // There is similar logic in url_canon_relative.cc for
178 // 185 //
179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which 186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
280 if (base_parsed_authority.host.is_nonempty()) { 287 if (base_parsed_authority.host.is_nonempty()) {
281 RawCanonOutputT<char> temporary_output; 288 RawCanonOutputT<char> temporary_output;
282 bool did_resolve_succeed = 289 bool did_resolve_succeed =
283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative, 290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,
284 relative_component, charset_converter, 291 relative_component, charset_converter,
285 &temporary_output, output_parsed); 292 &temporary_output, output_parsed);
286 // The output_parsed is incorrect at this point (because it was built 293 // The output_parsed is incorrect at this point (because it was built
287 // based on base_parsed_authority instead of base_parsed) and needs to be 294 // based on base_parsed_authority instead of base_parsed) and needs to be
288 // re-created. 295 // re-created.
289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true, 296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true,
290 charset_converter, output, output_parsed); 297 REMOVE_WHITESPACE, charset_converter, output,
298 output_parsed);
291 return did_resolve_succeed; 299 return did_resolve_succeed;
292 } 300 }
293 } else if (is_relative) { 301 } else if (is_relative) {
294 // Relative, resolve and canonicalize. 302 // Relative, resolve and canonicalize.
295 bool file_base_scheme = base_parsed.scheme.is_nonempty() && 303 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&
296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme); 304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);
297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative , 305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative ,
298 relative_component, charset_converter, output, 306 relative_component, charset_converter, output,
299 output_parsed); 307 output_parsed);
300 } 308 }
301 309
302 // Not relative, canonicalize the input. 310 // Not relative, canonicalize the input.
303 return DoCanonicalize(relative, relative_length, true, charset_converter, 311 return DoCanonicalize(relative, relative_length, true, DONT_REMOVE_WHITESPACE,
304 output, output_parsed); 312 charset_converter, output, output_parsed);
305 } 313 }
306 314
307 template<typename CHAR> 315 template<typename CHAR>
308 bool DoReplaceComponents(const char* spec, 316 bool DoReplaceComponents(const char* spec,
309 int spec_len, 317 int spec_len,
310 const Parsed& parsed, 318 const Parsed& parsed,
311 const Replacements<CHAR>& replacements, 319 const Replacements<CHAR>& replacements,
312 CharsetConverter* charset_converter, 320 CharsetConverter* charset_converter,
313 CanonOutput* output, 321 CanonOutput* output,
314 Parsed* out_parsed) { 322 Parsed* out_parsed) {
(...skipping 26 matching lines...) Expand all
341 if (spec_len - spec_after_colon > 0) { 349 if (spec_len - spec_after_colon > 0) {
342 scheme_replaced.Append(&spec[spec_after_colon], 350 scheme_replaced.Append(&spec[spec_after_colon],
343 spec_len - spec_after_colon); 351 spec_len - spec_after_colon);
344 } 352 }
345 353
346 // We now need to completely re-parse the resulting string since its meaning 354 // We now need to completely re-parse the resulting string since its meaning
347 // may have changed with the different scheme. 355 // may have changed with the different scheme.
348 RawCanonOutput<128> recanonicalized; 356 RawCanonOutput<128> recanonicalized;
349 Parsed recanonicalized_parsed; 357 Parsed recanonicalized_parsed;
350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true, 358 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,
351 charset_converter, 359 REMOVE_WHITESPACE, charset_converter, &recanonicalized,
352 &recanonicalized, &recanonicalized_parsed); 360 &recanonicalized_parsed);
353 361
354 // Recurse using the version with the scheme already replaced. This will now 362 // Recurse using the version with the scheme already replaced. This will now
355 // use the replacement rules for the new scheme. 363 // use the replacement rules for the new scheme.
356 // 364 //
357 // Warning: this code assumes that ReplaceComponents will re-check all 365 // Warning: this code assumes that ReplaceComponents will re-check all
358 // components for validity. This is because we can't fail if DoCanonicalize 366 // components for validity. This is because we can't fail if DoCanonicalize
359 // failed above since theoretically the thing making it fail could be 367 // failed above since theoretically the thing making it fail could be
360 // getting replaced here. If ReplaceComponents didn't re-check everything, 368 // getting replaced here. If ReplaceComponents didn't re-check everything,
361 // we wouldn't know if something *not* getting replaced is a problem. 369 // we wouldn't know if something *not* getting replaced is a problem.
362 // If the scheme-specific replacers are made more intelligent so they don't 370 // If the scheme-specific replacers are made more intelligent so they don't
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
528 536
529 return true; 537 return true;
530 } 538 }
531 539
532 bool Canonicalize(const char* spec, 540 bool Canonicalize(const char* spec,
533 int spec_len, 541 int spec_len,
534 bool trim_path_end, 542 bool trim_path_end,
535 CharsetConverter* charset_converter, 543 CharsetConverter* charset_converter,
536 CanonOutput* output, 544 CanonOutput* output,
537 Parsed* output_parsed) { 545 Parsed* output_parsed) {
538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, 546 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
539 output, output_parsed); 547 charset_converter, output, output_parsed);
540 } 548 }
541 549
542 bool Canonicalize(const base::char16* spec, 550 bool Canonicalize(const base::char16* spec,
543 int spec_len, 551 int spec_len,
544 bool trim_path_end, 552 bool trim_path_end,
545 CharsetConverter* charset_converter, 553 CharsetConverter* charset_converter,
546 CanonOutput* output, 554 CanonOutput* output,
547 Parsed* output_parsed) { 555 Parsed* output_parsed) {
548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter, 556 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,
549 output, output_parsed); 557 charset_converter, output, output_parsed);
550 } 558 }
551 559
552 bool ResolveRelative(const char* base_spec, 560 bool ResolveRelative(const char* base_spec,
553 int base_spec_len, 561 int base_spec_len,
554 const Parsed& base_parsed, 562 const Parsed& base_parsed,
555 const char* relative, 563 const char* relative,
556 int relative_length, 564 int relative_length,
557 CharsetConverter* charset_converter, 565 CharsetConverter* charset_converter,
558 CanonOutput* output, 566 CanonOutput* output,
559 Parsed* output_parsed) { 567 Parsed* output_parsed) {
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
663 return DoCompareSchemeComponent(spec, component, compare_to); 671 return DoCompareSchemeComponent(spec, component, compare_to);
664 } 672 }
665 673
666 bool CompareSchemeComponent(const base::char16* spec, 674 bool CompareSchemeComponent(const base::char16* spec,
667 const Component& component, 675 const Component& component,
668 const char* compare_to) { 676 const char* compare_to) {
669 return DoCompareSchemeComponent(spec, component, compare_to); 677 return DoCompareSchemeComponent(spec, component, compare_to);
670 } 678 }
671 679
672 } // namespace url 680 } // namespace url
OLDNEW
« no previous file with comments | « url/gurl_unittest.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698