OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "url/url_util.h" | 5 #include "url/url_util.h" |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/debug/leak_annotations.h" | 10 #include "base/debug/leak_annotations.h" |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/strings/string_util.h" | 12 #include "base/strings/string_util.h" |
13 #include "url/url_canon_internal.h" | 13 #include "url/url_canon_internal.h" |
14 #include "url/url_file.h" | 14 #include "url/url_file.h" |
15 #include "url/url_util_internal.h" | 15 #include "url/url_util_internal.h" |
16 | 16 |
17 namespace url { | 17 namespace url { |
18 | 18 |
19 namespace { | 19 namespace { |
20 | 20 |
21 const int kNumStandardURLSchemes = 8; | 21 const int kNumStandardURLSchemes = 8; |
22 const SchemeWithType kStandardURLSchemes[kNumStandardURLSchemes] = { | 22 const char* kStandardURLSchemes[kNumStandardURLSchemes] = { |
23 {kHttpScheme, SCHEME_WITH_PORT}, | 23 kHttpScheme, |
24 {kHttpsScheme, SCHEME_WITH_PORT}, | 24 kHttpsScheme, |
25 // Yes, file URLs can have a hostname, so file URLs should be handled as | 25 kFileScheme, // Yes, file URLs can have a hostname! |
26 // "standard". File URLs never have a port as specified by the SchemeType | 26 kFtpScheme, |
27 // field. | 27 kGopherScheme, |
28 {kFileScheme, SCHEME_WITHOUT_PORT}, | 28 kWsScheme, // WebSocket. |
29 {kFtpScheme, SCHEME_WITH_PORT}, | 29 kWssScheme, // WebSocket secure. |
30 {kGopherScheme, SCHEME_WITH_PORT}, | 30 kFileSystemScheme, |
31 {kWsScheme, SCHEME_WITH_PORT}, // WebSocket. | |
32 {kWssScheme, SCHEME_WITH_PORT}, // WebSocket secure. | |
33 {kFileSystemScheme, SCHEME_WITHOUT_AUTHORITY}, | |
34 }; | 31 }; |
35 | 32 |
36 // List of the currently installed standard schemes. This list is lazily | 33 // List of the currently installed standard schemes. This list is lazily |
37 // initialized by InitStandardSchemes and is leaked on shutdown to prevent | 34 // initialized by InitStandardSchemes and is leaked on shutdown to prevent |
38 // any destructors from being called that will slow us down or cause problems. | 35 // any destructors from being called that will slow us down or cause problems. |
39 std::vector<SchemeWithType>* standard_schemes = NULL; | 36 std::vector<const char*>* standard_schemes = NULL; |
40 | 37 |
41 // See the LockStandardSchemes declaration in the header. | 38 // See the LockStandardSchemes declaration in the header. |
42 bool standard_schemes_locked = false; | 39 bool standard_schemes_locked = false; |
43 | 40 |
44 // This template converts a given character type to the corresponding | 41 // This template converts a given character type to the corresponding |
45 // StringPiece type. | 42 // StringPiece type. |
46 template<typename CHAR> struct CharToStringPiece { | 43 template<typename CHAR> struct CharToStringPiece { |
47 }; | 44 }; |
48 template<> struct CharToStringPiece<char> { | 45 template<> struct CharToStringPiece<char> { |
49 typedef base::StringPiece Piece; | 46 typedef base::StringPiece Piece; |
50 }; | 47 }; |
51 template<> struct CharToStringPiece<base::char16> { | 48 template<> struct CharToStringPiece<base::char16> { |
52 typedef base::StringPiece16 Piece; | 49 typedef base::StringPiece16 Piece; |
53 }; | 50 }; |
54 | 51 |
55 // Ensures that the standard_schemes list is initialized, does nothing if it | 52 // Ensures that the standard_schemes list is initialized, does nothing if it |
56 // already has values. | 53 // already has values. |
57 void InitStandardSchemes() { | 54 void InitStandardSchemes() { |
58 if (standard_schemes) | 55 if (standard_schemes) |
59 return; | 56 return; |
60 standard_schemes = new std::vector<SchemeWithType>; | 57 standard_schemes = new std::vector<const char*>; |
61 for (int i = 0; i < kNumStandardURLSchemes; i++) | 58 for (int i = 0; i < kNumStandardURLSchemes; i++) |
62 standard_schemes->push_back(kStandardURLSchemes[i]); | 59 standard_schemes->push_back(kStandardURLSchemes[i]); |
63 } | 60 } |
64 | 61 |
65 // Given a string and a range inside the string, compares it to the given | 62 // Given a string and a range inside the string, compares it to the given |
66 // lower-case |compare_to| buffer. | 63 // lower-case |compare_to| buffer. |
67 template<typename CHAR> | 64 template<typename CHAR> |
68 inline bool DoCompareSchemeComponent(const CHAR* spec, | 65 inline bool DoCompareSchemeComponent(const CHAR* spec, |
69 const Component& component, | 66 const Component& component, |
70 const char* compare_to) { | 67 const char* compare_to) { |
71 if (!component.is_nonempty()) | 68 if (!component.is_nonempty()) |
72 return compare_to[0] == 0; // When component is empty, match empty scheme. | 69 return compare_to[0] == 0; // When component is empty, match empty scheme. |
73 return base::LowerCaseEqualsASCII( | 70 return base::LowerCaseEqualsASCII( |
74 typename CharToStringPiece<CHAR>::Piece( | 71 typename CharToStringPiece<CHAR>::Piece( |
75 &spec[component.begin], component.len), | 72 &spec[component.begin], component.len), |
76 compare_to); | 73 compare_to); |
77 } | 74 } |
78 | 75 |
79 // Returns true and sets |type| to the SchemeType of the given scheme | 76 // Returns true if the given scheme identified by |scheme| within |spec| is one |
80 // identified by |scheme| within |spec| if the scheme is one of the registered | 77 // of the registered "standard" schemes. |
81 // "standard" schemes. | |
82 template<typename CHAR> | 78 template<typename CHAR> |
83 bool DoIsStandard(const CHAR* spec, | 79 bool DoIsStandard(const CHAR* spec, const Component& scheme) { |
84 const Component& scheme, | |
85 SchemeType* type) { | |
86 if (!scheme.is_nonempty()) | 80 if (!scheme.is_nonempty()) |
87 return false; // Empty or invalid schemes are non-standard. | 81 return false; // Empty or invalid schemes are non-standard. |
88 | 82 |
89 InitStandardSchemes(); | 83 InitStandardSchemes(); |
90 for (size_t i = 0; i < standard_schemes->size(); i++) { | 84 for (size_t i = 0; i < standard_schemes->size(); i++) { |
91 if (base::LowerCaseEqualsASCII( | 85 if (base::LowerCaseEqualsASCII( |
92 typename CharToStringPiece<CHAR>::Piece( | 86 typename CharToStringPiece<CHAR>::Piece( |
93 &spec[scheme.begin], scheme.len), | 87 &spec[scheme.begin], scheme.len), |
94 standard_schemes->at(i).scheme)) { | 88 standard_schemes->at(i))) |
95 *type = standard_schemes->at(i).type; | |
96 return true; | 89 return true; |
97 } | |
98 } | 90 } |
99 return false; | 91 return false; |
100 } | 92 } |
101 | 93 |
102 template<typename CHAR> | 94 template<typename CHAR> |
103 bool DoFindAndCompareScheme(const CHAR* str, | 95 bool DoFindAndCompareScheme(const CHAR* str, |
104 int str_len, | 96 int str_len, |
105 const char* compare, | 97 const char* compare, |
106 Component* found_scheme) { | 98 Component* found_scheme) { |
107 // Before extracting scheme, canonicalize the URL to remove any whitespace. | 99 // Before extracting scheme, canonicalize the URL to remove any whitespace. |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
157 } | 149 } |
158 #endif | 150 #endif |
159 | 151 |
160 Component scheme; | 152 Component scheme; |
161 if (!ExtractScheme(spec, spec_len, &scheme)) | 153 if (!ExtractScheme(spec, spec_len, &scheme)) |
162 return false; | 154 return false; |
163 | 155 |
164 // This is the parsed version of the input URL, we have to canonicalize it | 156 // This is the parsed version of the input URL, we have to canonicalize it |
165 // before storing it in our object. | 157 // before storing it in our object. |
166 bool success; | 158 bool success; |
167 SchemeType unused_scheme_type = SCHEME_WITH_PORT; | |
168 if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { | 159 if (DoCompareSchemeComponent(spec, scheme, url::kFileScheme)) { |
169 // File URLs are special. | 160 // File URLs are special. |
170 ParseFileURL(spec, spec_len, &parsed_input); | 161 ParseFileURL(spec, spec_len, &parsed_input); |
171 success = CanonicalizeFileURL(spec, spec_len, parsed_input, | 162 success = CanonicalizeFileURL(spec, spec_len, parsed_input, |
172 charset_converter, output, output_parsed); | 163 charset_converter, output, output_parsed); |
173 } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) { | 164 } else if (DoCompareSchemeComponent(spec, scheme, url::kFileSystemScheme)) { |
174 // Filesystem URLs are special. | 165 // Filesystem URLs are special. |
175 ParseFileSystemURL(spec, spec_len, &parsed_input); | 166 ParseFileSystemURL(spec, spec_len, &parsed_input); |
176 success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input, | 167 success = CanonicalizeFileSystemURL(spec, spec_len, parsed_input, |
177 charset_converter, output, | 168 charset_converter, output, |
178 output_parsed); | 169 output_parsed); |
179 | 170 |
180 } else if (DoIsStandard(spec, scheme, &unused_scheme_type)) { | 171 } else if (DoIsStandard(spec, scheme)) { |
181 // All "normal" URLs. | 172 // All "normal" URLs. |
182 ParseStandardURL(spec, spec_len, &parsed_input); | 173 ParseStandardURL(spec, spec_len, &parsed_input); |
183 success = CanonicalizeStandardURL(spec, spec_len, parsed_input, | 174 success = CanonicalizeStandardURL(spec, spec_len, parsed_input, |
184 charset_converter, output, output_parsed); | 175 charset_converter, output, output_parsed); |
185 | 176 |
186 } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) { | 177 } else if (DoCompareSchemeComponent(spec, scheme, url::kMailToScheme)) { |
187 // Mailto URLs are treated like standard URLs, with only a scheme, path, | 178 // Mailto URLs are treated like standard URLs, with only a scheme, path, |
188 // and query. | 179 // and query. |
189 ParseMailtoURL(spec, spec_len, &parsed_input); | 180 ParseMailtoURL(spec, spec_len, &parsed_input); |
190 success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output, | 181 success = CanonicalizeMailtoURL(spec, spec_len, parsed_input, output, |
(...skipping 28 matching lines...) Expand all Loading... |
219 bool base_is_hierarchical = false; | 210 bool base_is_hierarchical = false; |
220 if (base_spec && | 211 if (base_spec && |
221 base_parsed.scheme.is_nonempty()) { | 212 base_parsed.scheme.is_nonempty()) { |
222 int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon. | 213 int after_scheme = base_parsed.scheme.end() + 1; // Skip past the colon. |
223 int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme, | 214 int num_slashes = CountConsecutiveSlashes(base_spec, after_scheme, |
224 base_spec_len); | 215 base_spec_len); |
225 base_is_authority_based = num_slashes > 1; | 216 base_is_authority_based = num_slashes > 1; |
226 base_is_hierarchical = num_slashes > 0; | 217 base_is_hierarchical = num_slashes > 0; |
227 } | 218 } |
228 | 219 |
229 SchemeType unused_scheme_type = SCHEME_WITH_PORT; | |
230 bool standard_base_scheme = | 220 bool standard_base_scheme = |
231 base_parsed.scheme.is_nonempty() && | 221 base_parsed.scheme.is_nonempty() && |
232 DoIsStandard(base_spec, base_parsed.scheme, &unused_scheme_type); | 222 DoIsStandard(base_spec, base_parsed.scheme); |
233 | 223 |
234 bool is_relative; | 224 bool is_relative; |
235 Component relative_component; | 225 Component relative_component; |
236 if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length, | 226 if (!IsRelativeURL(base_spec, base_parsed, relative, relative_length, |
237 (base_is_hierarchical || standard_base_scheme), | 227 (base_is_hierarchical || standard_base_scheme), |
238 &is_relative, &relative_component)) { | 228 &is_relative, &relative_component)) { |
239 // Error resolving. | 229 // Error resolving. |
240 return false; | 230 return false; |
241 } | 231 } |
242 | 232 |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
343 // If we get here, then we know the scheme doesn't need to be replaced, so can | 333 // If we get here, then we know the scheme doesn't need to be replaced, so can |
344 // just key off the scheme in the spec to know how to do the replacements. | 334 // just key off the scheme in the spec to know how to do the replacements. |
345 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) { | 335 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileScheme)) { |
346 return ReplaceFileURL(spec, parsed, replacements, charset_converter, output, | 336 return ReplaceFileURL(spec, parsed, replacements, charset_converter, output, |
347 out_parsed); | 337 out_parsed); |
348 } | 338 } |
349 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) { | 339 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kFileSystemScheme)) { |
350 return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, | 340 return ReplaceFileSystemURL(spec, parsed, replacements, charset_converter, |
351 output, out_parsed); | 341 output, out_parsed); |
352 } | 342 } |
353 SchemeType unused_scheme_type = SCHEME_WITH_PORT; | 343 if (DoIsStandard(spec, parsed.scheme)) { |
354 if (DoIsStandard(spec, parsed.scheme, &unused_scheme_type)) { | |
355 return ReplaceStandardURL(spec, parsed, replacements, charset_converter, | 344 return ReplaceStandardURL(spec, parsed, replacements, charset_converter, |
356 output, out_parsed); | 345 output, out_parsed); |
357 } | 346 } |
358 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) { | 347 if (DoCompareSchemeComponent(spec, parsed.scheme, url::kMailToScheme)) { |
359 return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed); | 348 return ReplaceMailtoURL(spec, parsed, replacements, output, out_parsed); |
360 } | 349 } |
361 | 350 |
362 // Default is a path URL. | 351 // Default is a path URL. |
363 return ReplacePathURL(spec, parsed, replacements, output, out_parsed); | 352 return ReplacePathURL(spec, parsed, replacements, output, out_parsed); |
364 } | 353 } |
365 | 354 |
366 } // namespace | 355 } // namespace |
367 | 356 |
368 void Initialize() { | 357 void Initialize() { |
369 InitStandardSchemes(); | 358 InitStandardSchemes(); |
370 } | 359 } |
371 | 360 |
372 void Shutdown() { | 361 void Shutdown() { |
373 if (standard_schemes) { | 362 if (standard_schemes) { |
374 delete standard_schemes; | 363 delete standard_schemes; |
375 standard_schemes = NULL; | 364 standard_schemes = NULL; |
376 } | 365 } |
377 } | 366 } |
378 | 367 |
379 void AddStandardScheme(const char* new_scheme, | 368 void AddStandardScheme(const char* new_scheme) { |
380 SchemeType type) { | |
381 // If this assert triggers, it means you've called AddStandardScheme after | 369 // If this assert triggers, it means you've called AddStandardScheme after |
382 // LockStandardSchemes have been called (see the header file for | 370 // LockStandardSchemes have been called (see the header file for |
383 // LockStandardSchemes for more). | 371 // LockStandardSchemes for more). |
384 // | 372 // |
385 // This normally means you're trying to set up a new standard scheme too late | 373 // This normally means you're trying to set up a new standard scheme too late |
386 // in your application's init process. Locate where your app does this | 374 // in your application's init process. Locate where your app does this |
387 // initialization and calls LockStandardSchemes, and add your new standard | 375 // initialization and calls LockStandardSchemes, and add your new standard |
388 // scheme there. | 376 // scheme there. |
389 DCHECK(!standard_schemes_locked) << | 377 DCHECK(!standard_schemes_locked) << |
390 "Trying to add a standard scheme after the list has been locked."; | 378 "Trying to add a standard scheme after the list has been locked."; |
391 | 379 |
392 size_t scheme_len = strlen(new_scheme); | 380 size_t scheme_len = strlen(new_scheme); |
393 if (scheme_len == 0) | 381 if (scheme_len == 0) |
394 return; | 382 return; |
395 | 383 |
396 // Duplicate the scheme into a new buffer and add it to the list of standard | 384 // Duplicate the scheme into a new buffer and add it to the list of standard |
397 // schemes. This pointer will be leaked on shutdown. | 385 // schemes. This pointer will be leaked on shutdown. |
398 char* dup_scheme = new char[scheme_len + 1]; | 386 char* dup_scheme = new char[scheme_len + 1]; |
399 ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme); | 387 ANNOTATE_LEAKING_OBJECT_PTR(dup_scheme); |
400 memcpy(dup_scheme, new_scheme, scheme_len + 1); | 388 memcpy(dup_scheme, new_scheme, scheme_len + 1); |
401 | 389 |
402 InitStandardSchemes(); | 390 InitStandardSchemes(); |
403 SchemeWithType scheme_with_type; | 391 standard_schemes->push_back(dup_scheme); |
404 scheme_with_type.scheme = dup_scheme; | |
405 scheme_with_type.type = type; | |
406 standard_schemes->push_back(scheme_with_type); | |
407 } | 392 } |
408 | 393 |
409 void LockStandardSchemes() { | 394 void LockStandardSchemes() { |
410 standard_schemes_locked = true; | 395 standard_schemes_locked = true; |
411 } | 396 } |
412 | 397 |
413 bool IsStandard(const char* spec, const Component& scheme) { | 398 bool IsStandard(const char* spec, const Component& scheme) { |
414 SchemeType unused_scheme_type; | 399 return DoIsStandard(spec, scheme); |
415 return DoIsStandard(spec, scheme, &unused_scheme_type); | |
416 } | |
417 | |
418 bool GetStandardSchemeType(const char* spec, | |
419 const Component& scheme, | |
420 SchemeType* type) { | |
421 return DoIsStandard(spec, scheme, type); | |
422 } | 400 } |
423 | 401 |
424 bool IsStandard(const base::char16* spec, const Component& scheme) { | 402 bool IsStandard(const base::char16* spec, const Component& scheme) { |
425 SchemeType unused_scheme_type; | 403 return DoIsStandard(spec, scheme); |
426 return DoIsStandard(spec, scheme, &unused_scheme_type); | |
427 } | 404 } |
428 | 405 |
429 bool FindAndCompareScheme(const char* str, | 406 bool FindAndCompareScheme(const char* str, |
430 int str_len, | 407 int str_len, |
431 const char* compare, | 408 const char* compare, |
432 Component* found_scheme) { | 409 Component* found_scheme) { |
433 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); | 410 return DoFindAndCompareScheme(str, str_len, compare, found_scheme); |
434 } | 411 } |
435 | 412 |
436 bool FindAndCompareScheme(const base::char16* str, | 413 bool FindAndCompareScheme(const base::char16* str, |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
574 return DoCompareSchemeComponent(spec, component, compare_to); | 551 return DoCompareSchemeComponent(spec, component, compare_to); |
575 } | 552 } |
576 | 553 |
577 bool CompareSchemeComponent(const base::char16* spec, | 554 bool CompareSchemeComponent(const base::char16* spec, |
578 const Component& component, | 555 const Component& component, |
579 const char* compare_to) { | 556 const char* compare_to) { |
580 return DoCompareSchemeComponent(spec, component, compare_to); | 557 return DoCompareSchemeComponent(spec, component, compare_to); |
581 } | 558 } |
582 | 559 |
583 } // namespace url | 560 } // namespace url |
OLD | NEW |