OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifdef WIN32 | 5 #ifdef WIN32 |
6 #include <windows.h> | 6 #include <windows.h> |
7 #else | 7 #else |
8 #include <pthread.h> | 8 #include <pthread.h> |
9 #endif | 9 #endif |
10 | 10 |
11 #include <algorithm> | 11 #include <algorithm> |
12 #include <ostream> | 12 #include <ostream> |
13 | 13 |
14 #include "url/gurl.h" | 14 #include "url/gurl.h" |
15 | 15 |
16 #include "base/logging.h" | 16 #include "base/logging.h" |
| 17 #include "base/strings/string_piece.h" |
| 18 #include "base/strings/string_util.h" |
17 #include "url/url_canon_stdstring.h" | 19 #include "url/url_canon_stdstring.h" |
18 #include "url/url_util.h" | 20 #include "url/url_util.h" |
19 | 21 |
20 namespace { | 22 namespace { |
21 | 23 |
22 static std::string* empty_string = NULL; | 24 static std::string* empty_string = NULL; |
23 static GURL* empty_gurl = NULL; | 25 static GURL* empty_gurl = NULL; |
24 | 26 |
25 #ifdef WIN32 | 27 #ifdef WIN32 |
26 | 28 |
(...skipping 25 matching lines...) Expand all Loading... |
52 } | 54 } |
53 | 55 |
54 const std::string& EmptyStringForGURL() { | 56 const std::string& EmptyStringForGURL() { |
55 // Avoid static object construction/destruction on startup/shutdown. | 57 // Avoid static object construction/destruction on startup/shutdown. |
56 pthread_once(&empty_string_once, EmptyStringForGURLOnce); | 58 pthread_once(&empty_string_once, EmptyStringForGURLOnce); |
57 return *empty_string; | 59 return *empty_string; |
58 } | 60 } |
59 | 61 |
60 #endif // WIN32 | 62 #endif // WIN32 |
61 | 63 |
62 } // namespace | 64 } // namespace |
63 | 65 |
64 GURL::GURL() : is_valid_(false) { | 66 GURL::GURL() : is_valid_(false) { |
65 } | 67 } |
66 | 68 |
67 GURL::GURL(const GURL& other) | 69 GURL::GURL(const GURL& other) |
68 : spec_(other.spec_), | 70 : spec_(other.spec_), |
69 is_valid_(other.is_valid_), | 71 is_valid_(other.is_valid_), |
70 parsed_(other.parsed_) { | 72 parsed_(other.parsed_) { |
71 if (other.inner_url_) | 73 if (other.inner_url_) |
72 inner_url_.reset(new GURL(*other.inner_url_)); | 74 inner_url_.reset(new GURL(*other.inner_url_)); |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
123 void GURL::InitializeFromCanonicalSpec() { | 125 void GURL::InitializeFromCanonicalSpec() { |
124 if (is_valid_ && SchemeIsFileSystem()) { | 126 if (is_valid_ && SchemeIsFileSystem()) { |
125 inner_url_.reset( | 127 inner_url_.reset( |
126 new GURL(spec_.data(), parsed_.Length(), | 128 new GURL(spec_.data(), parsed_.Length(), |
127 *parsed_.inner_parsed(), true)); | 129 *parsed_.inner_parsed(), true)); |
128 } | 130 } |
129 | 131 |
130 #ifndef NDEBUG | 132 #ifndef NDEBUG |
131 // For testing purposes, check that the parsed canonical URL is identical to | 133 // For testing purposes, check that the parsed canonical URL is identical to |
132 // what we would have produced. Skip checking for invalid URLs have no meaning | 134 // what we would have produced. Skip checking for invalid URLs have no meaning |
133 // and we can't always canonicalize then reproducabely. | 135 // and we can't always canonicalize then reproducibly. |
134 if (is_valid_) { | 136 if (is_valid_) { |
135 url::Component scheme; | 137 url::Component scheme; |
136 // We can't do this check on the inner_url of a filesystem URL, as | 138 // We can't do this check on the inner_url of a filesystem URL, as |
137 // canonical_spec actually points to the start of the outer URL, so we'd | 139 // canonical_spec actually points to the start of the outer URL, so we'd |
138 // end up with infinite recursion in this constructor. | 140 // end up with infinite recursion in this constructor. |
139 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(), | 141 if (!url::FindAndCompareScheme(spec_.data(), spec_.length(), |
140 url::kFileSystemScheme, &scheme) || | 142 url::kFileSystemScheme, &scheme) || |
141 scheme.begin == parsed_.scheme.begin) { | 143 scheme.begin == parsed_.scheme.begin) { |
142 // We need to retain trailing whitespace on path URLs, as the |parsed_| | 144 // We need to retain trailing whitespace on path URLs, as the |parsed_| |
143 // spec we originally received may legitimately contain trailing white- | 145 // spec we originally received may legitimately contain trailing white- |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
186 } | 188 } |
187 | 189 |
188 bool GURL::operator<(const GURL& other) const { | 190 bool GURL::operator<(const GURL& other) const { |
189 return spec_ < other.spec_; | 191 return spec_ < other.spec_; |
190 } | 192 } |
191 | 193 |
192 bool GURL::operator>(const GURL& other) const { | 194 bool GURL::operator>(const GURL& other) const { |
193 return spec_ > other.spec_; | 195 return spec_ > other.spec_; |
194 } | 196 } |
195 | 197 |
| 198 // Note: code duplicated below (it's inconvenient to use a template here). |
196 GURL GURL::Resolve(const std::string& relative) const { | 199 GURL GURL::Resolve(const std::string& relative) const { |
197 return ResolveWithCharsetConverter(relative, NULL); | |
198 } | |
199 GURL GURL::Resolve(const base::string16& relative) const { | |
200 return ResolveWithCharsetConverter(relative, NULL); | |
201 } | |
202 | |
203 // Note: code duplicated below (it's inconvenient to use a template here). | |
204 GURL GURL::ResolveWithCharsetConverter( | |
205 const std::string& relative, | |
206 url::CharsetConverter* charset_converter) const { | |
207 // Not allowed for invalid URLs. | 200 // Not allowed for invalid URLs. |
208 if (!is_valid_) | 201 if (!is_valid_) |
209 return GURL(); | 202 return GURL(); |
210 | 203 |
211 GURL result; | 204 GURL result; |
212 | 205 |
213 // Reserve enough room in the output for the input, plus some extra so that | 206 // Reserve enough room in the output for the input, plus some extra so that |
214 // we have room if we have to escape a few things without reallocating. | 207 // we have room if we have to escape a few things without reallocating. |
215 result.spec_.reserve(spec_.size() + 32); | 208 result.spec_.reserve(spec_.size() + 32); |
216 url::StdStringCanonOutput output(&result.spec_); | 209 url::StdStringCanonOutput output(&result.spec_); |
217 | 210 |
218 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), | 211 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), |
219 parsed_, relative.data(), | 212 parsed_, relative.data(), |
220 static_cast<int>(relative.length()), | 213 static_cast<int>(relative.length()), |
221 charset_converter, &output, &result.parsed_)) { | 214 nullptr, &output, &result.parsed_)) { |
222 // Error resolving, return an empty URL. | 215 // Error resolving, return an empty URL. |
223 return GURL(); | 216 return GURL(); |
224 } | 217 } |
225 | 218 |
226 output.Complete(); | 219 output.Complete(); |
227 result.is_valid_ = true; | 220 result.is_valid_ = true; |
228 if (result.SchemeIsFileSystem()) { | 221 if (result.SchemeIsFileSystem()) { |
229 result.inner_url_.reset( | 222 result.inner_url_.reset( |
230 new GURL(result.spec_.data(), result.parsed_.Length(), | 223 new GURL(result.spec_.data(), result.parsed_.Length(), |
231 *result.parsed_.inner_parsed(), true)); | 224 *result.parsed_.inner_parsed(), true)); |
232 } | 225 } |
233 return result; | 226 return result; |
234 } | 227 } |
235 | 228 |
236 // Note: code duplicated above (it's inconvenient to use a template here). | 229 // Note: code duplicated above (it's inconvenient to use a template here). |
237 GURL GURL::ResolveWithCharsetConverter( | 230 GURL GURL::Resolve(const base::string16& relative) const { |
238 const base::string16& relative, | |
239 url::CharsetConverter* charset_converter) const { | |
240 // Not allowed for invalid URLs. | 231 // Not allowed for invalid URLs. |
241 if (!is_valid_) | 232 if (!is_valid_) |
242 return GURL(); | 233 return GURL(); |
243 | 234 |
244 GURL result; | 235 GURL result; |
245 | 236 |
246 // Reserve enough room in the output for the input, plus some extra so that | 237 // Reserve enough room in the output for the input, plus some extra so that |
247 // we have room if we have to escape a few things without reallocating. | 238 // we have room if we have to escape a few things without reallocating. |
248 result.spec_.reserve(spec_.size() + 32); | 239 result.spec_.reserve(spec_.size() + 32); |
249 url::StdStringCanonOutput output(&result.spec_); | 240 url::StdStringCanonOutput output(&result.spec_); |
250 | 241 |
251 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), | 242 if (!url::ResolveRelative(spec_.data(), static_cast<int>(spec_.length()), |
252 parsed_, relative.data(), | 243 parsed_, relative.data(), |
253 static_cast<int>(relative.length()), | 244 static_cast<int>(relative.length()), |
254 charset_converter, &output, &result.parsed_)) { | 245 nullptr, &output, &result.parsed_)) { |
255 // Error resolving, return an empty URL. | 246 // Error resolving, return an empty URL. |
256 return GURL(); | 247 return GURL(); |
257 } | 248 } |
258 | 249 |
259 output.Complete(); | 250 output.Complete(); |
260 result.is_valid_ = true; | 251 result.is_valid_ = true; |
261 if (result.SchemeIsFileSystem()) { | 252 if (result.SchemeIsFileSystem()) { |
262 result.inner_url_.reset( | 253 result.inner_url_.reset( |
263 new GURL(result.spec_.data(), result.parsed_.Length(), | 254 new GURL(result.spec_.data(), result.parsed_.Length(), |
264 *result.parsed_.inner_parsed(), true)); | 255 *result.parsed_.inner_parsed(), true)); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
313 output.Complete(); | 304 output.Complete(); |
314 if (result.is_valid_ && result.SchemeIsFileSystem()) { | 305 if (result.is_valid_ && result.SchemeIsFileSystem()) { |
315 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(), | 306 result.inner_url_.reset(new GURL(spec_.data(), result.parsed_.Length(), |
316 *result.parsed_.inner_parsed(), true)); | 307 *result.parsed_.inner_parsed(), true)); |
317 } | 308 } |
318 return result; | 309 return result; |
319 } | 310 } |
320 | 311 |
321 GURL GURL::GetOrigin() const { | 312 GURL GURL::GetOrigin() const { |
322 // This doesn't make sense for invalid or nonstandard URLs, so return | 313 // This doesn't make sense for invalid or nonstandard URLs, so return |
323 // the empty URL | 314 // the empty URL. |
324 if (!is_valid_ || !IsStandard()) | 315 if (!is_valid_ || !IsStandard()) |
325 return GURL(); | 316 return GURL(); |
326 | 317 |
327 if (SchemeIsFileSystem()) | 318 if (SchemeIsFileSystem()) |
328 return inner_url_->GetOrigin(); | 319 return inner_url_->GetOrigin(); |
329 | 320 |
330 url::Replacements<char> replacements; | 321 url::Replacements<char> replacements; |
331 replacements.ClearUsername(); | 322 replacements.ClearUsername(); |
332 replacements.ClearPassword(); | 323 replacements.ClearPassword(); |
333 replacements.ClearPath(); | 324 replacements.ClearPath(); |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
375 return other; | 366 return other; |
376 } | 367 } |
377 | 368 |
378 bool GURL::IsStandard() const { | 369 bool GURL::IsStandard() const { |
379 return url::IsStandard(spec_.data(), parsed_.scheme); | 370 return url::IsStandard(spec_.data(), parsed_.scheme); |
380 } | 371 } |
381 | 372 |
382 bool GURL::SchemeIs(const char* lower_ascii_scheme) const { | 373 bool GURL::SchemeIs(const char* lower_ascii_scheme) const { |
383 if (parsed_.scheme.len <= 0) | 374 if (parsed_.scheme.len <= 0) |
384 return lower_ascii_scheme == NULL; | 375 return lower_ascii_scheme == NULL; |
385 return url::LowerCaseEqualsASCII(spec_.data() + parsed_.scheme.begin, | 376 return base::LowerCaseEqualsASCII( |
386 spec_.data() + parsed_.scheme.end(), | 377 base::StringPiece(spec_.data() + parsed_.scheme.begin, |
387 lower_ascii_scheme); | 378 parsed_.scheme.len), |
| 379 lower_ascii_scheme); |
388 } | 380 } |
389 | 381 |
390 bool GURL::SchemeIsHTTPOrHTTPS() const { | 382 bool GURL::SchemeIsHTTPOrHTTPS() const { |
391 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme); | 383 return SchemeIs(url::kHttpScheme) || SchemeIs(url::kHttpsScheme); |
392 } | 384 } |
393 | 385 |
394 bool GURL::SchemeIsWSOrWSS() const { | 386 bool GURL::SchemeIsWSOrWSS() const { |
395 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme); | 387 return SchemeIs(url::kWsScheme) || SchemeIs(url::kWssScheme); |
396 } | 388 } |
397 | 389 |
(...skipping 11 matching lines...) Expand all Loading... |
409 return int_port; | 401 return int_port; |
410 } | 402 } |
411 | 403 |
412 std::string GURL::ExtractFileName() const { | 404 std::string GURL::ExtractFileName() const { |
413 url::Component file_component; | 405 url::Component file_component; |
414 url::ExtractFileName(spec_.data(), parsed_.path, &file_component); | 406 url::ExtractFileName(spec_.data(), parsed_.path, &file_component); |
415 return ComponentString(file_component); | 407 return ComponentString(file_component); |
416 } | 408 } |
417 | 409 |
418 std::string GURL::PathForRequest() const { | 410 std::string GURL::PathForRequest() const { |
419 DCHECK(parsed_.path.len > 0) << "Canonical path for requests should be non-emp
ty"; | 411 DCHECK(parsed_.path.len > 0) |
| 412 << "Canonical path for requests should be non-empty"; |
420 if (parsed_.ref.len >= 0) { | 413 if (parsed_.ref.len >= 0) { |
421 // Clip off the reference when it exists. The reference starts after the # | 414 // Clip off the reference when it exists. The reference starts after the |
422 // sign, so we have to subtract one to also remove it. | 415 // #-sign, so we have to subtract one to also remove it. |
423 return std::string(spec_, parsed_.path.begin, | 416 return std::string(spec_, parsed_.path.begin, |
424 parsed_.ref.begin - parsed_.path.begin - 1); | 417 parsed_.ref.begin - parsed_.path.begin - 1); |
425 } | 418 } |
426 // Compute the actual path length, rather than depending on the spec's | 419 // Compute the actual path length, rather than depending on the spec's |
427 // terminator. If we're an inner_url, our spec continues on into our outer | 420 // terminator. If we're an inner_url, our spec continues on into our outer |
428 // url's path/query/ref. | 421 // URL's path/query/ref. |
429 int path_len = parsed_.path.len; | 422 int path_len = parsed_.path.len; |
430 if (parsed_.query.is_valid()) | 423 if (parsed_.query.is_valid()) |
431 path_len = parsed_.query.end() - parsed_.path.begin; | 424 path_len = parsed_.query.end() - parsed_.path.begin; |
432 | 425 |
433 return std::string(spec_, parsed_.path.begin, path_len); | 426 return std::string(spec_, parsed_.path.begin, path_len); |
434 } | 427 } |
435 | 428 |
436 std::string GURL::HostNoBrackets() const { | 429 std::string GURL::HostNoBrackets() const { |
437 // If host looks like an IPv6 literal, strip the square brackets. | 430 // If host looks like an IPv6 literal, strip the square brackets. |
438 url::Component h(parsed_.host); | 431 url::Component h(parsed_.host); |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
483 } | 476 } |
484 | 477 |
485 const GURL& GURL::EmptyGURL() { | 478 const GURL& GURL::EmptyGURL() { |
486 // Avoid static object construction/destruction on startup/shutdown. | 479 // Avoid static object construction/destruction on startup/shutdown. |
487 pthread_once(&empty_gurl_once, EmptyGURLOnce); | 480 pthread_once(&empty_gurl_once, EmptyGURLOnce); |
488 return *empty_gurl; | 481 return *empty_gurl; |
489 } | 482 } |
490 | 483 |
491 #endif // WIN32 | 484 #endif // WIN32 |
492 | 485 |
493 bool GURL::DomainIs(const char* lower_ascii_domain, | 486 bool GURL::DomainIs(base::StringPiece lower_ascii_domain) const { |
494 int domain_len) const { | 487 if (!is_valid_ || lower_ascii_domain.empty()) |
495 // Return false if this URL is not valid or domain is empty. | |
496 if (!is_valid_ || !domain_len) | |
497 return false; | 488 return false; |
498 | 489 |
499 // FileSystem URLs have empty parsed_.host, so check this first. | 490 // FileSystem URLs have empty parsed_.host, so check this first. |
500 if (SchemeIsFileSystem() && inner_url_) | 491 if (SchemeIsFileSystem() && inner_url_) |
501 return inner_url_->DomainIs(lower_ascii_domain, domain_len); | 492 return inner_url_->DomainIs(lower_ascii_domain); |
502 | 493 |
503 if (!parsed_.host.is_nonempty()) | 494 if (!parsed_.host.is_nonempty()) |
504 return false; | 495 return false; |
505 | 496 |
506 // Check whether the host name is end with a dot. If yes, treat it | 497 // If the host name ends with a dot but the input domain doesn't, |
507 // the same as no-dot unless the input comparison domain is end | 498 // then we ignore the dot in the host name. |
508 // with dot. | 499 const char* host_last_pos = spec_.data() + parsed_.host.end() - 1; |
509 const char* last_pos = spec_.data() + parsed_.host.end() - 1; | |
510 int host_len = parsed_.host.len; | 500 int host_len = parsed_.host.len; |
511 if ('.' == *last_pos && '.' != lower_ascii_domain[domain_len - 1]) { | 501 int domain_len = lower_ascii_domain.length(); |
512 last_pos--; | 502 if ('.' == *host_last_pos && '.' != lower_ascii_domain[domain_len - 1]) { |
| 503 host_last_pos--; |
513 host_len--; | 504 host_len--; |
514 } | 505 } |
515 | 506 |
516 // Return false if host's length is less than domain's length. | |
517 if (host_len < domain_len) | 507 if (host_len < domain_len) |
518 return false; | 508 return false; |
519 | 509 |
520 // Compare this url whether belong specific domain. | 510 // |host_first_pos| is the start of the compared part of the host name, not |
521 const char* start_pos = spec_.data() + parsed_.host.begin + | 511 // start of the whole host name. |
522 host_len - domain_len; | 512 const char* host_first_pos = spec_.data() + parsed_.host.begin + |
| 513 host_len - domain_len; |
523 | 514 |
524 if (!url::LowerCaseEqualsASCII(start_pos, | 515 if (!base::LowerCaseEqualsASCII( |
525 last_pos + 1, | 516 base::StringPiece(host_first_pos, domain_len), lower_ascii_domain)) |
526 lower_ascii_domain, | |
527 lower_ascii_domain + domain_len)) | |
528 return false; | 517 return false; |
529 | 518 |
530 // Check whether host has right domain start with dot, make sure we got | 519 // Make sure there aren't extra characters in host before the compared part; |
531 // right domain range. For example www.google.com has domain | 520 // if the host name is longer than the input domain name, then the character |
532 // "google.com" but www.iamnotgoogle.com does not. | 521 // immediately before the compared part should be a dot. For example, |
| 522 // www.google.com has domain "google.com", but www.iamnotgoogle.com does not. |
533 if ('.' != lower_ascii_domain[0] && host_len > domain_len && | 523 if ('.' != lower_ascii_domain[0] && host_len > domain_len && |
534 '.' != *(start_pos - 1)) | 524 '.' != *(host_first_pos - 1)) |
535 return false; | 525 return false; |
536 | 526 |
537 return true; | 527 return true; |
538 } | 528 } |
539 | 529 |
540 void GURL::Swap(GURL* other) { | 530 void GURL::Swap(GURL* other) { |
541 spec_.swap(other->spec_); | 531 spec_.swap(other->spec_); |
542 std::swap(is_valid_, other->is_valid_); | 532 std::swap(is_valid_, other->is_valid_); |
543 std::swap(parsed_, other->parsed_); | 533 std::swap(parsed_, other->parsed_); |
544 inner_url_.swap(other->inner_url_); | 534 inner_url_.swap(other->inner_url_); |
545 } | 535 } |
546 | 536 |
547 std::ostream& operator<<(std::ostream& out, const GURL& url) { | 537 std::ostream& operator<<(std::ostream& out, const GURL& url) { |
548 return out << url.possibly_invalid_spec(); | 538 return out << url.possibly_invalid_spec(); |
549 } | 539 } |
OLD | NEW |