url/url_util.cc - Issue 2540893004: [url] Avoid scanning for whitespace twice during ResolveRelative

Side by Side Diff: url/url_util.cc

Issue 2540893004: [url] Avoid scanning for whitespace twice during ResolveRelative (Closed)

Patch Set: minor tweaks + one sanity test Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "url/url_util.h"	5 #include "url/url_util.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <string.h>	8 #include <string.h>

9 #include <vector>	9 #include <vector>

10	10

11 #include "base/debug/leak_annotations.h"	11 #include "base/debug/leak_annotations.h"

12 #include "base/logging.h"	12 #include "base/logging.h"

13 #include "base/strings/string_util.h"	13 #include "base/strings/string_util.h"

14 #include "url/url_canon_internal.h"	14 #include "url/url_canon_internal.h"

15 #include "url/url_file.h"	15 #include "url/url_file.h"

16 #include "url/url_util_internal.h"	16 #include "url/url_util_internal.h"

17	17

18 namespace url {	18 namespace url {

19	19

20 namespace {	20 namespace {

21	21

	22 // Pass this enum through for methods which would like to know if whitespace

	23 // removal is necessary.

	24 enum WhitespaceRemovalPolicy {

	25 REMOVE_WHITESPACE,

	26 DONT_REMOVE_WHITESPACE,
	Mike West 2016/12/01 09:39:36 Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a quote just bothers me aesthetically. :) Charlie Harrison 2016/12/01 13:27:37 Done. Show quoted text On 2016/12/01 09:39:36, Mike West (slow) wrote: > Nit: I'd prefer `DO_NOT` to `DONT`. The lack of a quote just bothers me > aesthetically. :) Done.
	27 };

	28

22 const int kNumStandardURLSchemes = 10;	29 const int kNumStandardURLSchemes = 10;

23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {	30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {

24 {kHttpScheme, SCHEME_WITH_PORT},	31 {kHttpScheme, SCHEME_WITH_PORT},

25 {kHttpsScheme, SCHEME_WITH_PORT},	32 {kHttpsScheme, SCHEME_WITH_PORT},

26 // Yes, file URLs can have a hostname, so file URLs should be handled as	33 // Yes, file URLs can have a hostname, so file URLs should be handled as

27 // "standard". File URLs never have a port as specified by the SchemeType	34 // "standard". File URLs never have a port as specified by the SchemeType

28 // field.	35 // field.

29 {kFileScheme, SCHEME_WITHOUT_PORT},	36 {kFileScheme, SCHEME_WITHOUT_PORT},

30 {kFtpScheme, SCHEME_WITH_PORT},	37 {kFtpScheme, SCHEME_WITH_PORT},

31 {kGopherScheme, SCHEME_WITH_PORT},	38 {kGopherScheme, SCHEME_WITH_PORT},

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 // No scheme.	154 // No scheme.

148 if (found_scheme)	155 if (found_scheme)

149 *found_scheme = Component();	156 *found_scheme = Component();

150 return false;	157 return false;

151 }	158 }

152 if (found_scheme)	159 if (found_scheme)

153 *found_scheme = our_scheme;	160 *found_scheme = our_scheme;

154 return DoCompareSchemeComponent(spec, our_scheme, compare);	161 return DoCompareSchemeComponent(spec, our_scheme, compare);

155 }	162 }

156	163

157 template<typename CHAR>	164 template <typename CHAR>

158 bool DoCanonicalize(const CHAR* in_spec,	165 bool DoCanonicalize(const CHAR* spec,

159 int in_spec_len,	166 int spec_len,

160 bool trim_path_end,	167 bool trim_path_end,

	168 WhitespaceRemovalPolicy whitespace_policy,

161 CharsetConverter* charset_converter,	169 CharsetConverter* charset_converter,

162 CanonOutput* output,	170 CanonOutput* output,

163 Parsed* output_parsed) {	171 Parsed* output_parsed) {

164 // Remove any whitespace from the middle of the relative URL, possibly	172 // Remove any whitespace from the middle of the relative URL if necessary.

165 // copying to the new buffer.	173 // Possibly this will result in copying to the new buffer.

166 RawCanonOutputT<CHAR> whitespace_buffer;	174 RawCanonOutputT<CHAR> whitespace_buffer;

167 int spec_len;	175 if (whitespace_policy == REMOVE_WHITESPACE)

168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,	176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len);

169 &whitespace_buffer, &spec_len);

170	177

171 Parsed parsed_input;	178 Parsed parsed_input;

172 #ifdef WIN32	179 #ifdef WIN32

173 // For Windows, we allow things that look like absolute Windows paths to be	180 // For Windows, we allow things that look like absolute Windows paths to be

174 // fixed up magically to file URLs. This is done for IE compatibility. For	181 // fixed up magically to file URLs. This is done for IE compatibility. For

175 // example, this will change "c:/foo" into a file URL rather than treating	182 // example, this will change "c:/foo" into a file URL rather than treating

176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").	183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").

177 // There is similar logic in url_canon_relative.cc for	184 // There is similar logic in url_canon_relative.cc for

178 //	185 //

179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which	186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
280 if (base_parsed_authority.host.is_nonempty()) {	287 if (base_parsed_authority.host.is_nonempty()) {

281 RawCanonOutputT<char> temporary_output;	288 RawCanonOutputT<char> temporary_output;

282 bool did_resolve_succeed =	289 bool did_resolve_succeed =

283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,	290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,

284 relative_component, charset_converter,	291 relative_component, charset_converter,

285 &temporary_output, output_parsed);	292 &temporary_output, output_parsed);

286 // The output_parsed is incorrect at this point (because it was built	293 // The output_parsed is incorrect at this point (because it was built

287 // based on base_parsed_authority instead of base_parsed) and needs to be	294 // based on base_parsed_authority instead of base_parsed) and needs to be

288 // re-created.	295 // re-created.

289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true,	296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true,

290 charset_converter, output, output_parsed);	297 REMOVE_WHITESPACE, charset_converter, output,

	298 output_parsed);

291 return did_resolve_succeed;	299 return did_resolve_succeed;

292 }	300 }

293 } else if (is_relative) {	301 } else if (is_relative) {

294 // Relative, resolve and canonicalize.	302 // Relative, resolve and canonicalize.

295 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&	303 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&

296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);	304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);

297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative ,	305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative ,

298 relative_component, charset_converter, output,	306 relative_component, charset_converter, output,

299 output_parsed);	307 output_parsed);

300 }	308 }

301	309

302 // Not relative, canonicalize the input.	310 // Not relative, canonicalize the input.

303 return DoCanonicalize(relative, relative_length, true, charset_converter,	311 return DoCanonicalize(relative, relative_length, true, DONT_REMOVE_WHITESPACE,

304 output, output_parsed);	312 charset_converter, output, output_parsed);

305 }	313 }

306	314

307 template<typename CHAR>	315 template<typename CHAR>

308 bool DoReplaceComponents(const char* spec,	316 bool DoReplaceComponents(const char* spec,

309 int spec_len,	317 int spec_len,

310 const Parsed& parsed,	318 const Parsed& parsed,

311 const Replacements<CHAR>& replacements,	319 const Replacements<CHAR>& replacements,

312 CharsetConverter* charset_converter,	320 CharsetConverter* charset_converter,

313 CanonOutput* output,	321 CanonOutput* output,

314 Parsed* out_parsed) {	322 Parsed* out_parsed) {

(...skipping 26 matching lines...) Expand all Loading...
341 if (spec_len - spec_after_colon > 0) {	349 if (spec_len - spec_after_colon > 0) {

342 scheme_replaced.Append(&spec[spec_after_colon],	350 scheme_replaced.Append(&spec[spec_after_colon],

343 spec_len - spec_after_colon);	351 spec_len - spec_after_colon);

344 }	352 }

345	353

346 // We now need to completely re-parse the resulting string since its meaning	354 // We now need to completely re-parse the resulting string since its meaning

347 // may have changed with the different scheme.	355 // may have changed with the different scheme.

348 RawCanonOutput<128> recanonicalized;	356 RawCanonOutput<128> recanonicalized;

349 Parsed recanonicalized_parsed;	357 Parsed recanonicalized_parsed;

350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,	358 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,

351 charset_converter,	359 REMOVE_WHITESPACE, charset_converter, &recanonicalized,

352 &recanonicalized, &recanonicalized_parsed);	360 &recanonicalized_parsed);

353	361

354 // Recurse using the version with the scheme already replaced. This will now	362 // Recurse using the version with the scheme already replaced. This will now

355 // use the replacement rules for the new scheme.	363 // use the replacement rules for the new scheme.

356 //	364 //

357 // Warning: this code assumes that ReplaceComponents will re-check all	365 // Warning: this code assumes that ReplaceComponents will re-check all

358 // components for validity. This is because we can't fail if DoCanonicalize	366 // components for validity. This is because we can't fail if DoCanonicalize

359 // failed above since theoretically the thing making it fail could be	367 // failed above since theoretically the thing making it fail could be

360 // getting replaced here. If ReplaceComponents didn't re-check everything,	368 // getting replaced here. If ReplaceComponents didn't re-check everything,

361 // we wouldn't know if something not getting replaced is a problem.	369 // we wouldn't know if something not getting replaced is a problem.

362 // If the scheme-specific replacers are made more intelligent so they don't	370 // If the scheme-specific replacers are made more intelligent so they don't

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
528	536

529 return true;	537 return true;

530 }	538 }

531	539

532 bool Canonicalize(const char* spec,	540 bool Canonicalize(const char* spec,

533 int spec_len,	541 int spec_len,

534 bool trim_path_end,	542 bool trim_path_end,

535 CharsetConverter* charset_converter,	543 CharsetConverter* charset_converter,

536 CanonOutput* output,	544 CanonOutput* output,

537 Parsed* output_parsed) {	545 Parsed* output_parsed) {

538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,	546 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,

539 output, output_parsed);	547 charset_converter, output, output_parsed);

540 }	548 }

541	549

542 bool Canonicalize(const base::char16* spec,	550 bool Canonicalize(const base::char16* spec,

543 int spec_len,	551 int spec_len,

544 bool trim_path_end,	552 bool trim_path_end,

545 CharsetConverter* charset_converter,	553 CharsetConverter* charset_converter,

546 CanonOutput* output,	554 CanonOutput* output,

547 Parsed* output_parsed) {	555 Parsed* output_parsed) {

548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,	556 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,

549 output, output_parsed);	557 charset_converter, output, output_parsed);

550 }	558 }

551	559

552 bool ResolveRelative(const char* base_spec,	560 bool ResolveRelative(const char* base_spec,

553 int base_spec_len,	561 int base_spec_len,

554 const Parsed& base_parsed,	562 const Parsed& base_parsed,

555 const char* relative,	563 const char* relative,

556 int relative_length,	564 int relative_length,

557 CharsetConverter* charset_converter,	565 CharsetConverter* charset_converter,

558 CanonOutput* output,	566 CanonOutput* output,

559 Parsed* output_parsed) {	567 Parsed* output_parsed) {

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
663 return DoCompareSchemeComponent(spec, component, compare_to);	671 return DoCompareSchemeComponent(spec, component, compare_to);

664 }	672 }

665	673

666 bool CompareSchemeComponent(const base::char16* spec,	674 bool CompareSchemeComponent(const base::char16* spec,

667 const Component& component,	675 const Component& component,

668 const char* compare_to) {	676 const char* compare_to) {

669 return DoCompareSchemeComponent(spec, component, compare_to);	677 return DoCompareSchemeComponent(spec, component, compare_to);

670 }	678 }

671	679

672 } // namespace url	680 } // namespace url

OLD	NEW

« no previous file with comments | « url/gurl_unittest.cc ('k') | no next file » | no next file with comments »