url/url_util.cc - Issue 2540893004: [url] Avoid scanning for whitespace twice during ResolveRelative

Side by Side Diff: url/url_util.cc

Issue 2540893004: [url] Avoid scanning for whitespace twice during ResolveRelative (Closed)

Patch Set: DONT -> DO_NOT Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "url/url_util.h"	5 #include "url/url_util.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8 #include <string.h>	8 #include <string.h>

9 #include <vector>	9 #include <vector>

10	10

11 #include "base/debug/leak_annotations.h"	11 #include "base/debug/leak_annotations.h"

12 #include "base/logging.h"	12 #include "base/logging.h"

13 #include "base/strings/string_util.h"	13 #include "base/strings/string_util.h"

14 #include "url/url_canon_internal.h"	14 #include "url/url_canon_internal.h"

15 #include "url/url_file.h"	15 #include "url/url_file.h"

16 #include "url/url_util_internal.h"	16 #include "url/url_util_internal.h"

17	17

18 namespace url {	18 namespace url {

19	19

20 namespace {	20 namespace {

21	21

	22 // Pass this enum through for methods which would like to know if whitespace

	23 // removal is necessary.

	24 enum WhitespaceRemovalPolicy {

	25 REMOVE_WHITESPACE,

	26 DO_NOT_REMOVE_WHITESPACE,

	27 };

	28

22 const int kNumStandardURLSchemes = 10;	29 const int kNumStandardURLSchemes = 10;

23 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {	30 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = {

24 {kHttpScheme, SCHEME_WITH_PORT},	31 {kHttpScheme, SCHEME_WITH_PORT},

25 {kHttpsScheme, SCHEME_WITH_PORT},	32 {kHttpsScheme, SCHEME_WITH_PORT},

26 // Yes, file URLs can have a hostname, so file URLs should be handled as	33 // Yes, file URLs can have a hostname, so file URLs should be handled as

27 // "standard". File URLs never have a port as specified by the SchemeType	34 // "standard". File URLs never have a port as specified by the SchemeType

28 // field.	35 // field.

29 {kFileScheme, SCHEME_WITHOUT_PORT},	36 {kFileScheme, SCHEME_WITHOUT_PORT},

30 {kFtpScheme, SCHEME_WITH_PORT},	37 {kFtpScheme, SCHEME_WITH_PORT},

31 {kGopherScheme, SCHEME_WITH_PORT},	38 {kGopherScheme, SCHEME_WITH_PORT},

(...skipping 115 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
147 // No scheme.	154 // No scheme.

148 if (found_scheme)	155 if (found_scheme)

149 *found_scheme = Component();	156 *found_scheme = Component();

150 return false;	157 return false;

151 }	158 }

152 if (found_scheme)	159 if (found_scheme)

153 *found_scheme = our_scheme;	160 *found_scheme = our_scheme;

154 return DoCompareSchemeComponent(spec, our_scheme, compare);	161 return DoCompareSchemeComponent(spec, our_scheme, compare);

155 }	162 }

156	163

157 template<typename CHAR>	164 template <typename CHAR>

158 bool DoCanonicalize(const CHAR* in_spec,	165 bool DoCanonicalize(const CHAR* spec,

159 int in_spec_len,	166 int spec_len,

160 bool trim_path_end,	167 bool trim_path_end,

	168 WhitespaceRemovalPolicy whitespace_policy,

161 CharsetConverter* charset_converter,	169 CharsetConverter* charset_converter,

162 CanonOutput* output,	170 CanonOutput* output,

163 Parsed* output_parsed) {	171 Parsed* output_parsed) {

164 // Remove any whitespace from the middle of the relative URL, possibly	172 // Remove any whitespace from the middle of the relative URL if necessary.

165 // copying to the new buffer.	173 // Possibly this will result in copying to the new buffer.

166 RawCanonOutputT<CHAR> whitespace_buffer;	174 RawCanonOutputT<CHAR> whitespace_buffer;

167 int spec_len;	175 if (whitespace_policy == REMOVE_WHITESPACE)

168 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,	176 spec = RemoveURLWhitespace(spec, spec_len, &whitespace_buffer, &spec_len);

169 &whitespace_buffer, &spec_len);

170	177

171 Parsed parsed_input;	178 Parsed parsed_input;

172 #ifdef WIN32	179 #ifdef WIN32

173 // For Windows, we allow things that look like absolute Windows paths to be	180 // For Windows, we allow things that look like absolute Windows paths to be

174 // fixed up magically to file URLs. This is done for IE compatibility. For	181 // fixed up magically to file URLs. This is done for IE compatibility. For

175 // example, this will change "c:/foo" into a file URL rather than treating	182 // example, this will change "c:/foo" into a file URL rather than treating

176 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").	183 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").

177 // There is similar logic in url_canon_relative.cc for	184 // There is similar logic in url_canon_relative.cc for

178 //	185 //

179 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which	186 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
280 if (base_parsed_authority.host.is_nonempty()) {	287 if (base_parsed_authority.host.is_nonempty()) {

281 RawCanonOutputT<char> temporary_output;	288 RawCanonOutputT<char> temporary_output;

282 bool did_resolve_succeed =	289 bool did_resolve_succeed =

283 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,	290 ResolveRelativeURL(base_spec, base_parsed_authority, false, relative,

284 relative_component, charset_converter,	291 relative_component, charset_converter,

285 &temporary_output, output_parsed);	292 &temporary_output, output_parsed);

286 // The output_parsed is incorrect at this point (because it was built	293 // The output_parsed is incorrect at this point (because it was built

287 // based on base_parsed_authority instead of base_parsed) and needs to be	294 // based on base_parsed_authority instead of base_parsed) and needs to be

288 // re-created.	295 // re-created.

289 DoCanonicalize(temporary_output.data(), temporary_output.length(), true,	296 DoCanonicalize(temporary_output.data(), temporary_output.length(), true,

290 charset_converter, output, output_parsed);	297 REMOVE_WHITESPACE, charset_converter, output,

	298 output_parsed);

291 return did_resolve_succeed;	299 return did_resolve_succeed;

292 }	300 }

293 } else if (is_relative) {	301 } else if (is_relative) {

294 // Relative, resolve and canonicalize.	302 // Relative, resolve and canonicalize.

295 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&	303 bool file_base_scheme = base_parsed.scheme.is_nonempty() &&

296 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);	304 DoCompareSchemeComponent(base_spec, base_parsed.scheme, kFileScheme);

297 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative ,	305 return ResolveRelativeURL(base_spec, base_parsed, file_base_scheme, relative ,

298 relative_component, charset_converter, output,	306 relative_component, charset_converter, output,

299 output_parsed);	307 output_parsed);

300 }	308 }

301	309

302 // Not relative, canonicalize the input.	310 // Not relative, canonicalize the input.

303 return DoCanonicalize(relative, relative_length, true, charset_converter,	311 return DoCanonicalize(relative, relative_length, true,

304 output, output_parsed);	312 DO_NOT_REMOVE_WHITESPACE, charset_converter, output,

	313 output_parsed);

305 }	314 }

306	315

307 template<typename CHAR>	316 template<typename CHAR>

308 bool DoReplaceComponents(const char* spec,	317 bool DoReplaceComponents(const char* spec,

309 int spec_len,	318 int spec_len,

310 const Parsed& parsed,	319 const Parsed& parsed,

311 const Replacements<CHAR>& replacements,	320 const Replacements<CHAR>& replacements,

312 CharsetConverter* charset_converter,	321 CharsetConverter* charset_converter,

313 CanonOutput* output,	322 CanonOutput* output,

314 Parsed* out_parsed) {	323 Parsed* out_parsed) {

(...skipping 26 matching lines...) Expand all Loading...
341 if (spec_len - spec_after_colon > 0) {	350 if (spec_len - spec_after_colon > 0) {

342 scheme_replaced.Append(&spec[spec_after_colon],	351 scheme_replaced.Append(&spec[spec_after_colon],

343 spec_len - spec_after_colon);	352 spec_len - spec_after_colon);

344 }	353 }

345	354

346 // We now need to completely re-parse the resulting string since its meaning	355 // We now need to completely re-parse the resulting string since its meaning

347 // may have changed with the different scheme.	356 // may have changed with the different scheme.

348 RawCanonOutput<128> recanonicalized;	357 RawCanonOutput<128> recanonicalized;

349 Parsed recanonicalized_parsed;	358 Parsed recanonicalized_parsed;

350 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,	359 DoCanonicalize(scheme_replaced.data(), scheme_replaced.length(), true,

351 charset_converter,	360 REMOVE_WHITESPACE, charset_converter, &recanonicalized,

352 &recanonicalized, &recanonicalized_parsed);	361 &recanonicalized_parsed);

353	362

354 // Recurse using the version with the scheme already replaced. This will now	363 // Recurse using the version with the scheme already replaced. This will now

355 // use the replacement rules for the new scheme.	364 // use the replacement rules for the new scheme.

356 //	365 //

357 // Warning: this code assumes that ReplaceComponents will re-check all	366 // Warning: this code assumes that ReplaceComponents will re-check all

358 // components for validity. This is because we can't fail if DoCanonicalize	367 // components for validity. This is because we can't fail if DoCanonicalize

359 // failed above since theoretically the thing making it fail could be	368 // failed above since theoretically the thing making it fail could be

360 // getting replaced here. If ReplaceComponents didn't re-check everything,	369 // getting replaced here. If ReplaceComponents didn't re-check everything,

361 // we wouldn't know if something not getting replaced is a problem.	370 // we wouldn't know if something not getting replaced is a problem.

362 // If the scheme-specific replacers are made more intelligent so they don't	371 // If the scheme-specific replacers are made more intelligent so they don't

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
528	537

529 return true;	538 return true;

530 }	539 }

531	540

532 bool Canonicalize(const char* spec,	541 bool Canonicalize(const char* spec,

533 int spec_len,	542 int spec_len,

534 bool trim_path_end,	543 bool trim_path_end,

535 CharsetConverter* charset_converter,	544 CharsetConverter* charset_converter,

536 CanonOutput* output,	545 CanonOutput* output,

537 Parsed* output_parsed) {	546 Parsed* output_parsed) {

538 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,	547 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,

539 output, output_parsed);	548 charset_converter, output, output_parsed);

540 }	549 }

541	550

542 bool Canonicalize(const base::char16* spec,	551 bool Canonicalize(const base::char16* spec,

543 int spec_len,	552 int spec_len,

544 bool trim_path_end,	553 bool trim_path_end,

545 CharsetConverter* charset_converter,	554 CharsetConverter* charset_converter,

546 CanonOutput* output,	555 CanonOutput* output,

547 Parsed* output_parsed) {	556 Parsed* output_parsed) {

548 return DoCanonicalize(spec, spec_len, trim_path_end, charset_converter,	557 return DoCanonicalize(spec, spec_len, trim_path_end, REMOVE_WHITESPACE,

549 output, output_parsed);	558 charset_converter, output, output_parsed);

550 }	559 }

551	560

552 bool ResolveRelative(const char* base_spec,	561 bool ResolveRelative(const char* base_spec,

553 int base_spec_len,	562 int base_spec_len,

554 const Parsed& base_parsed,	563 const Parsed& base_parsed,

555 const char* relative,	564 const char* relative,

556 int relative_length,	565 int relative_length,

557 CharsetConverter* charset_converter,	566 CharsetConverter* charset_converter,

558 CanonOutput* output,	567 CanonOutput* output,

559 Parsed* output_parsed) {	568 Parsed* output_parsed) {

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
663 return DoCompareSchemeComponent(spec, component, compare_to);	672 return DoCompareSchemeComponent(spec, component, compare_to);

664 }	673 }

665	674

666 bool CompareSchemeComponent(const base::char16* spec,	675 bool CompareSchemeComponent(const base::char16* spec,

667 const Component& component,	676 const Component& component,

668 const char* compare_to) {	677 const char* compare_to) {

669 return DoCompareSchemeComponent(spec, component, compare_to);	678 return DoCompareSchemeComponent(spec, component, compare_to);

670 }	679 }

671	680

672 } // namespace url	681 } // namespace url

OLD	NEW

« no previous file with comments | « url/gurl_unittest.cc ('k') | no next file » | no next file with comments »