url_util.cc - Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a.

Side by Side Diff: url_util.cc

Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a. (Closed) Base URL: https://chromium.googlesource.com/external/github.com/domokit/gurl@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "url/url_util.h"	5 #include "url/url_util.h"

6	6

7 #include <string.h>	7 #include <string.h>

8 #include <vector>	8 #include <vector>

9	9

10 #include "base/debug/leak_annotations.h"	10 #include "base/debug/leak_annotations.h"

11 #include "base/logging.h"	11 #include "base/logging.h"

	12 #include "base/strings/string_util.h"

12 #include "url/url_canon_internal.h"	13 #include "url/url_canon_internal.h"

13 #include "url/url_file.h"	14 #include "url/url_file.h"

14 #include "url/url_util_internal.h"	15 #include "url/url_util_internal.h"

15	16

16 namespace url {	17 namespace url {

17	18

18 namespace {	19 namespace {

19	20

20 // ASCII-specific tolower. The standard library's tolower is locale sensitive,

21 // so we don't want to use it here.

22 template<class Char>

23 inline Char ToLowerASCII(Char c) {

24 return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;

25 }

26

27 // Backend for LowerCaseEqualsASCII.

28 template<typename Iter>

29 inline bool DoLowerCaseEqualsASCII(Iter a_begin, Iter a_end, const char* b) {

30 for (Iter it = a_begin; it != a_end; ++it, ++b) {

31 if (!b \|\| ToLowerASCII(it) != *b)

32 return false;

33 }

34 return *b == 0;

35 }

36

37 const int kNumStandardURLSchemes = 8;	21 const int kNumStandardURLSchemes = 8;

38 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {	22 const char* kStandardURLSchemes[kNumStandardURLSchemes] = {

39 kHttpScheme,	23 kHttpScheme,

40 kHttpsScheme,	24 kHttpsScheme,

41 kFileScheme, // Yes, file urls can have a hostname!	25 kFileScheme, // Yes, file URLs can have a hostname!

42 kFtpScheme,	26 kFtpScheme,

43 kGopherScheme,	27 kGopherScheme,

44 kWsScheme, // WebSocket.	28 kWsScheme, // WebSocket.

45 kWssScheme, // WebSocket secure.	29 kWssScheme, // WebSocket secure.

46 kFileSystemScheme,	30 kFileSystemScheme,

47 };	31 };

48	32

49 // List of the currently installed standard schemes. This list is lazily	33 // List of the currently installed standard schemes. This list is lazily

50 // initialized by InitStandardSchemes and is leaked on shutdown to prevent	34 // initialized by InitStandardSchemes and is leaked on shutdown to prevent

51 // any destructors from being called that will slow us down or cause problems.	35 // any destructors from being called that will slow us down or cause problems.

52 std::vector<const char> standard_schemes = NULL;	36 std::vector<const char> standard_schemes = NULL;

53	37

54 // See the LockStandardSchemes declaration in the header.	38 // See the LockStandardSchemes declaration in the header.

55 bool standard_schemes_locked = false;	39 bool standard_schemes_locked = false;

56	40

	41 // This template converts a given character type to the corresponding

	42 // StringPiece type.

	43 template<typename CHAR> struct CharToStringPiece {

	44 };

	45 template<> struct CharToStringPiece<char> {

	46 typedef base::StringPiece Piece;

	47 };

	48 template<> struct CharToStringPiece<base::char16> {

	49 typedef base::StringPiece16 Piece;

	50 };

	51

57 // Ensures that the standard_schemes list is initialized, does nothing if it	52 // Ensures that the standard_schemes list is initialized, does nothing if it

58 // already has values.	53 // already has values.

59 void InitStandardSchemes() {	54 void InitStandardSchemes() {

60 if (standard_schemes)	55 if (standard_schemes)

61 return;	56 return;

62 standard_schemes = new std::vector<const char*>;	57 standard_schemes = new std::vector<const char*>;

63 for (int i = 0; i < kNumStandardURLSchemes; i++)	58 for (int i = 0; i < kNumStandardURLSchemes; i++)

64 standard_schemes->push_back(kStandardURLSchemes[i]);	59 standard_schemes->push_back(kStandardURLSchemes[i]);

65 }	60 }

66	61

67 // Given a string and a range inside the string, compares it to the given	62 // Given a string and a range inside the string, compares it to the given

68 // lower-case \|compare_to\| buffer.	63 // lower-case \|compare_to\| buffer.

69 template<typename CHAR>	64 template<typename CHAR>

70 inline bool DoCompareSchemeComponent(const CHAR* spec,	65 inline bool DoCompareSchemeComponent(const CHAR* spec,

71 const Component& component,	66 const Component& component,

72 const char* compare_to) {	67 const char* compare_to) {

73 if (!component.is_nonempty())	68 if (!component.is_nonempty())

74 return compare_to[0] == 0; // When component is empty, match empty scheme.	69 return compare_to[0] == 0; // When component is empty, match empty scheme.

75 return LowerCaseEqualsASCII(&spec[component.begin],	70 return base::LowerCaseEqualsASCII(

76 &spec[component.end()],	71 typename CharToStringPiece<CHAR>::Piece(

77 compare_to);	72 &spec[component.begin], component.len),

	73 compare_to);

78 }	74 }

79	75

80 // Returns true if the given scheme identified by \|scheme\| within \|spec\| is one	76 // Returns true if the given scheme identified by \|scheme\| within \|spec\| is one

81 // of the registered "standard" schemes.	77 // of the registered "standard" schemes.

82 template<typename CHAR>	78 template<typename CHAR>

83 bool DoIsStandard(const CHAR* spec, const Component& scheme) {	79 bool DoIsStandard(const CHAR* spec, const Component& scheme) {

84 if (!scheme.is_nonempty())	80 if (!scheme.is_nonempty())

85 return false; // Empty or invalid schemes are non-standard.	81 return false; // Empty or invalid schemes are non-standard.

86	82

87 InitStandardSchemes();	83 InitStandardSchemes();

88 for (size_t i = 0; i < standard_schemes->size(); i++) {	84 for (size_t i = 0; i < standard_schemes->size(); i++) {

89 if (LowerCaseEqualsASCII(&spec[scheme.begin], &spec[scheme.end()],	85 if (base::LowerCaseEqualsASCII(

90 standard_schemes->at(i)))	86 typename CharToStringPiece<CHAR>::Piece(

	87 &spec[scheme.begin], scheme.len),

	88 standard_schemes->at(i)))

91 return true;	89 return true;

92 }	90 }

93 return false;	91 return false;

94 }	92 }

95	93

96 template<typename CHAR>	94 template<typename CHAR>

97 bool DoFindAndCompareScheme(const CHAR* str,	95 bool DoFindAndCompareScheme(const CHAR* str,

98 int str_len,	96 int str_len,

99 const char* compare,	97 const char* compare,

100 Component* found_scheme) {	98 Component* found_scheme) {

(...skipping 26 matching lines...) Expand all Loading...
127 // Remove any whitespace from the middle of the relative URL, possibly	125 // Remove any whitespace from the middle of the relative URL, possibly

128 // copying to the new buffer.	126 // copying to the new buffer.

129 RawCanonOutputT<CHAR> whitespace_buffer;	127 RawCanonOutputT<CHAR> whitespace_buffer;

130 int spec_len;	128 int spec_len;

131 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,	129 const CHAR* spec = RemoveURLWhitespace(in_spec, in_spec_len,

132 &whitespace_buffer, &spec_len);	130 &whitespace_buffer, &spec_len);

133	131

134 Parsed parsed_input;	132 Parsed parsed_input;

135 #ifdef WIN32	133 #ifdef WIN32

136 // For Windows, we allow things that look like absolute Windows paths to be	134 // For Windows, we allow things that look like absolute Windows paths to be

137 // fixed up magically to file URLs. This is done for IE compatability. For	135 // fixed up magically to file URLs. This is done for IE compatibility. For

138 // example, this will change "c:/foo" into a file URL rather than treating	136 // example, this will change "c:/foo" into a file URL rather than treating

139 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").	137 // it as a URL with the protocol "c". It also works for UNC ("\\foo\bar.txt").

140 // There is similar logic in url_canon_relative.cc for	138 // There is similar logic in url_canon_relative.cc for

141 //	139 //

142 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which	140 // For Max & Unix, we don't do this (the equivalent would be "/foo/bar" which

143 // has no meaning as an absolute path name. This is because browsers on Mac	141 // has no meaning as an absolute path name. This is because browsers on Mac

144 // & Unix don't generally do this, so there is no compatibility reason for	142 // & Unix don't generally do this, so there is no compatibility reason for

145 // doing so.	143 // doing so.

146 if (DoesBeginUNCPath(spec, 0, spec_len, false) \|\|	144 if (DoesBeginUNCPath(spec, 0, spec_len, false) \|\|

147 DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {	145 DoesBeginWindowsDriveSpec(spec, 0, spec_len)) {

(...skipping 22 matching lines...) Expand all Loading...
170 charset_converter, output,	168 charset_converter, output,

171 output_parsed);	169 output_parsed);

172	170

173 } else if (DoIsStandard(spec, scheme)) {	171 } else if (DoIsStandard(spec, scheme)) {

174 // All "normal" URLs.	172 // All "normal" URLs.

175 ParseStandardURL(spec, spec_len, &parsed_input);	173 ParseStandardURL(spec, spec_len, &parsed_input);

176 success = CanonicalizeStandardURL(spec, spec_len, parsed_input,	174 success = CanonicalizeStandardURL(spec, spec_len, parsed_input,

177 charset_converter, output, output_parsed);	175 charset_converter, output, output_parsed);

178	176

179 } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {	177 } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) {

180 // Mailto are treated like a standard url with only a scheme, path, query	178 // Mailto URLs are treated like standard URLs, with only a scheme, path,

	179 // and query.

181 ParseMailtoURL(spec, spec_len, &parsed_input);	180 ParseMailtoURL(spec, spec_len, &parsed_input);

182 success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,	181 success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output,

183 output_parsed);	182 output_parsed);

184	183

185 } else {	184 } else {

186 // "Weird" URLs like data: and javascript:	185 // "Weird" URLs like data: and javascript:.

187 ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);	186 ParsePathURL(spec, spec_len, trim_path_end, &parsed_input);

188 success = CanonicalizePathURL(spec, spec_len, parsed_input, output,	187 success = CanonicalizePathURL(spec, spec_len, parsed_input, output,

189 output_parsed);	188 output_parsed);

190 }	189 }

191 return success;	190 return success;

192 }	191 }

193	192

194 template<typename CHAR>	193 template<typename CHAR>

195 bool DoResolveRelative(const char* base_spec,	194 bool DoResolveRelative(const char* base_spec,

196 int base_spec_len,	195 int base_spec_len,

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
266	265

267 template<typename CHAR>	266 template<typename CHAR>

268 bool DoReplaceComponents(const char* spec,	267 bool DoReplaceComponents(const char* spec,

269 int spec_len,	268 int spec_len,

270 const Parsed& parsed,	269 const Parsed& parsed,

271 const Replacements<CHAR>& replacements,	270 const Replacements<CHAR>& replacements,

272 CharsetConverter* charset_converter,	271 CharsetConverter* charset_converter,

273 CanonOutput* output,	272 CanonOutput* output,

274 Parsed* out_parsed) {	273 Parsed* out_parsed) {

275 // If the scheme is overridden, just do a simple string substitution and	274 // If the scheme is overridden, just do a simple string substitution and

276 // reparse the whole thing. There are lots of edge cases that we really don't	275 // re-parse the whole thing. There are lots of edge cases that we really don't

277 // want to deal with. Like what happens if I replace "http://e:8080/foo"	276 // want to deal with. Like what happens if I replace "http://e:8080/foo"

278 // with a file. Does it become "file:///E:/8080/foo" where the port number	277 // with a file. Does it become "file:///E:/8080/foo" where the port number

279 // becomes part of the path? Parsing that string as a file URL says "yes"	278 // becomes part of the path? Parsing that string as a file URL says "yes"

280 // but almost no sane rule for dealing with the components individually would	279 // but almost no sane rule for dealing with the components individually would

281 // come up with that.	280 // come up with that.

282 //	281 //

283 // Why allow these crazy cases at all? Programatically, there is almost no	282 // Why allow these crazy cases at all? Programatically, there is almost no

284 // case for replacing the scheme. The most common case for hitting this is	283 // case for replacing the scheme. The most common case for hitting this is

285 // in JS when building up a URL using the location object. In this case, the	284 // in JS when building up a URL using the location object. In this case, the

286 // JS code expects the string substitution behavior:	285 // JS code expects the string substitution behavior:

(...skipping 26 matching lines...) Expand all Loading...
313	312

314 // Recurse using the version with the scheme already replaced. This will now	313 // Recurse using the version with the scheme already replaced. This will now

315 // use the replacement rules for the new scheme.	314 // use the replacement rules for the new scheme.

316 //	315 //

317 // Warning: this code assumes that ReplaceComponents will re-check all	316 // Warning: this code assumes that ReplaceComponents will re-check all

318 // components for validity. This is because we can't fail if DoCanonicalize	317 // components for validity. This is because we can't fail if DoCanonicalize

319 // failed above since theoretically the thing making it fail could be	318 // failed above since theoretically the thing making it fail could be

320 // getting replaced here. If ReplaceComponents didn't re-check everything,	319 // getting replaced here. If ReplaceComponents didn't re-check everything,

321 // we wouldn't know if something not getting replaced is a problem.	320 // we wouldn't know if something not getting replaced is a problem.

322 // If the scheme-specific replacers are made more intelligent so they don't	321 // If the scheme-specific replacers are made more intelligent so they don't

323 // re-check everything, we should instead recanonicalize the whole thing	322 // re-check everything, we should instead re-canonicalize the whole thing

324 // after this call to check validity (this assumes replacing the scheme is	323 // after this call to check validity (this assumes replacing the scheme is

325 // much much less common than other types of replacements, like clearing the	324 // much much less common than other types of replacements, like clearing the

326 // ref).	325 // ref).

327 Replacements<CHAR> replacements_no_scheme = replacements;	326 Replacements<CHAR> replacements_no_scheme = replacements;

328 replacements_no_scheme.SetScheme(NULL, Component());	327 replacements_no_scheme.SetScheme(NULL, Component());

329 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),	328 return DoReplaceComponents(recanonicalized.data(), recanonicalized.length(),

330 recanonicalized_parsed, replacements_no_scheme,	329 recanonicalized_parsed, replacements_no_scheme,

331 charset_converter, output, out_parsed);	330 charset_converter, output, out_parsed);

332 }	331 }

333	332

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
366 }	365 }

367 }	366 }

368	367

369 void AddStandardScheme(const char* new_scheme) {	368 void AddStandardScheme(const char* new_scheme) {

370 // If this assert triggers, it means you've called AddStandardScheme after	369 // If this assert triggers, it means you've called AddStandardScheme after

371 // LockStandardSchemes have been called (see the header file for	370 // LockStandardSchemes have been called (see the header file for

372 // LockStandardSchemes for more).	371 // LockStandardSchemes for more).

373 //	372 //

374 // This normally means you're trying to set up a new standard scheme too late	373 // This normally means you're trying to set up a new standard scheme too late

375 // in your application's init process. Locate where your app does this	374 // in your application's init process. Locate where your app does this

376 // initialization and calls LockStandardScheme, and add your new standard	375 // initialization and calls LockStandardSchemes, and add your new standard

377 // scheme there.	376 // scheme there.

378 DCHECK(!standard_schemes_locked) <<	377 DCHECK(!standard_schemes_locked) <<

379 "Trying to add a standard scheme after the list has been locked.";	378 "Trying to add a standard scheme after the list has been locked.";

380	379

381 size_t scheme_len = strlen(new_scheme);	380 size_t scheme_len = strlen(new_scheme);

382 if (scheme_len == 0)	381 if (scheme_len == 0)

383 return;	382 return;

384	383

385 // Dulicate the scheme into a new buffer and add it to the list of standard	384 // Duplicate the scheme into a new buffer and add it to the list of standard

386 // schemes. This pointer will be leaked on shutdown.	385 // schemes. This pointer will be leaked on shutdown.

387 char* dup_scheme = new char[scheme_len + 1];	386 char* dup_scheme = new char[scheme_len + 1];

388 ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);	387 ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme);

389 memcpy(dup_scheme, new_scheme, scheme_len + 1);	388 memcpy(dup_scheme, new_scheme, scheme_len + 1);

390	389

391 InitStandardSchemes();	390 InitStandardSchemes();

392 standard_schemes->push_back(dup_scheme);	391 standard_schemes->push_back(dup_scheme);

393 }	392 }

394	393

395 void LockStandardSchemes() {	394 void LockStandardSchemes() {

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
479 int spec_len,	478 int spec_len,

480 const Parsed& parsed,	479 const Parsed& parsed,

481 const Replacements<base::char16>& replacements,	480 const Replacements<base::char16>& replacements,

482 CharsetConverter* charset_converter,	481 CharsetConverter* charset_converter,

483 CanonOutput* output,	482 CanonOutput* output,

484 Parsed* out_parsed) {	483 Parsed* out_parsed) {

485 return DoReplaceComponents(spec, spec_len, parsed, replacements,	484 return DoReplaceComponents(spec, spec_len, parsed, replacements,

486 charset_converter, output, out_parsed);	485 charset_converter, output, out_parsed);

487 }	486 }

488	487

489 // Front-ends for LowerCaseEqualsASCII.

490 bool LowerCaseEqualsASCII(const char* a_begin,

491 const char* a_end,

492 const char* b) {

493 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

494 }

495

496 bool LowerCaseEqualsASCII(const char* a_begin,

497 const char* a_end,

498 const char* b_begin,

499 const char* b_end) {

500 while (a_begin != a_end && b_begin != b_end &&

501 ToLowerASCII(a_begin) == b_begin) {

502 a_begin++;

503 b_begin++;

504 }

505 return a_begin == a_end && b_begin == b_end;

506 }

507

508 bool LowerCaseEqualsASCII(const base::char16* a_begin,

509 const base::char16* a_end,

510 const char* b) {

511 return DoLowerCaseEqualsASCII(a_begin, a_end, b);

512 }

513

514 void DecodeURLEscapeSequences(const char* input,	488 void DecodeURLEscapeSequences(const char* input,

515 int length,	489 int length,

516 CanonOutputW* output) {	490 CanonOutputW* output) {

517 RawCanonOutputT<char> unescaped_chars;	491 RawCanonOutputT<char> unescaped_chars;

518 for (int i = 0; i < length; i++) {	492 for (int i = 0; i < length; i++) {

519 if (input[i] == '%') {	493 if (input[i] == '%') {

520 unsigned char ch;	494 unsigned char ch;

521 if (DecodeEscaped(input, &i, length, &ch)) {	495 if (DecodeEscaped(input, &i, length, &ch)) {

522 unescaped_chars.push_back(ch);	496 unescaped_chars.push_back(ch);

523 } else {	497 } else {

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
577 return DoCompareSchemeComponent(spec, component, compare_to);	551 return DoCompareSchemeComponent(spec, component, compare_to);

578 }	552 }

579	553

580 bool CompareSchemeComponent(const base::char16* spec,	554 bool CompareSchemeComponent(const base::char16* spec,

581 const Component& component,	555 const Component& component,

582 const char* compare_to) {	556 const char* compare_to) {

583 return DoCompareSchemeComponent(spec, component, compare_to);	557 return DoCompareSchemeComponent(spec, component, compare_to);

584 }	558 }

585	559

586 } // namespace url	560 } // namespace url

OLD	NEW

« no previous file with comments | « url_util.h ('k') | url_util_internal.h » ('j') | no next file with comments »