Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(463)

Side by Side Diff: extensions/common/url_pattern.cc

Issue 2455373002: Add implicit trailing dot domain matching support to URLPattern. (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « extensions/common/url_pattern.h ('k') | extensions/common/url_pattern_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "extensions/common/url_pattern.h" 5 #include "extensions/common/url_pattern.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <ostream> 9 #include <ostream>
10 10
11 #include "base/macros.h" 11 #include "base/macros.h"
12 #include "base/strings/pattern.h" 12 #include "base/strings/pattern.h"
13 #include "base/strings/string_number_conversions.h" 13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_piece.h" 14 #include "base/strings/string_piece.h"
15 #include "base/strings/string_split.h" 15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h" 16 #include "base/strings/string_util.h"
17 #include "base/strings/stringprintf.h" 17 #include "base/strings/stringprintf.h"
18 #include "content/public/common/url_constants.h" 18 #include "content/public/common/url_constants.h"
19 #include "extensions/common/constants.h" 19 #include "extensions/common/constants.h"
20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" 20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
21 #include "net/base/url_util.h"
21 #include "url/gurl.h" 22 #include "url/gurl.h"
22 #include "url/url_util.h" 23 #include "url/url_util.h"
23 24
24 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 25 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
25 26
26 namespace { 27 namespace {
27 28
28 // TODO(aa): What about more obscure schemes like data: and javascript: ? 29 // TODO(aa): What about more obscure schemes like data: and javascript: ?
29 // Note: keep this array in sync with kValidSchemeMasks. 30 // Note: keep this array in sync with kValidSchemeMasks.
30 const char* const kValidSchemes[] = { 31 const char* const kValidSchemes[] = {
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 // called for the patterns inside URLPatternSet. In those cases, we know that 107 // called for the patterns inside URLPatternSet. In those cases, we know that
107 // the path will have only a single wildcard at the end. This makes figuring 108 // the path will have only a single wildcard at the end. This makes figuring
108 // out overlap much easier. It seems like there is probably a computer-sciency 109 // out overlap much easier. It seems like there is probably a computer-sciency
109 // way to solve the general case, but we don't need that yet. 110 // way to solve the general case, but we don't need that yet.
110 std::string StripTrailingWildcard(const std::string& path) { 111 std::string StripTrailingWildcard(const std::string& path) {
111 size_t wildcard_index = path.find('*'); 112 size_t wildcard_index = path.find('*');
112 size_t path_last = path.size() - 1; 113 size_t path_last = path.size() - 1;
113 return wildcard_index == path_last ? path.substr(0, path_last) : path; 114 return wildcard_index == path_last ? path.substr(0, path_last) : path;
114 } 115 }
115 116
117 bool EndsWithDot(base::StringPiece str) {
118 return base::EndsWith(str, ".", base::CompareCase::SENSITIVE);
119 }
120
116 } // namespace 121 } // namespace
117 122
118 // static 123 // static
119 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) { 124 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
120 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { 125 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
121 if (scheme == kValidSchemes[i]) 126 if (scheme == kValidSchemes[i])
122 return true; 127 return true;
123 } 128 }
124 return false; 129 return false;
125 } 130 }
(...skipping 271 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 402
398 bool URLPattern::MatchesHost(const std::string& host) const { 403 bool URLPattern::MatchesHost(const std::string& host) const {
399 std::string test(url::kHttpScheme); 404 std::string test(url::kHttpScheme);
400 test += url::kStandardSchemeSeparator; 405 test += url::kStandardSchemeSeparator;
401 test += host; 406 test += host;
402 test += "/"; 407 test += "/";
403 return MatchesHost(GURL(test)); 408 return MatchesHost(GURL(test));
404 } 409 }
405 410
406 bool URLPattern::MatchesHost(const GURL& test) const { 411 bool URLPattern::MatchesHost(const GURL& test) const {
412 const std::string test_host(CanonicalizeHostForMatching(test));
Devlin 2016/10/28 19:17:27 This code is called under some pretty performance-
413
407 // If the hosts are exactly equal, we have a match. 414 // If the hosts are exactly equal, we have a match.
408 if (test.host() == host_) 415 if (test_host == host_)
409 return true; 416 return true;
410 417
411 // If we're matching subdomains, and we have no host in the match pattern, 418 // If we're matching subdomains, and we have no host in the match pattern,
412 // that means that we're matching all hosts, which means we have a match no 419 // that means that we're matching all hosts, which means we have a match no
413 // matter what the test host is. 420 // matter what the test host is.
414 if (match_subdomains_ && host_.empty()) 421 if (match_subdomains_ && host_.empty())
415 return true; 422 return true;
416 423
417 // Otherwise, we can only match if our match pattern matches subdomains. 424 // Otherwise, we can only match if our match pattern matches subdomains.
418 if (!match_subdomains_) 425 if (!match_subdomains_)
419 return false; 426 return false;
420 427
421 // We don't do subdomain matching against IP addresses, so we can give up now 428 // We don't do subdomain matching against IP addresses, so we can give up now
422 // if the test host is an IP address. 429 // if the test host is an IP address.
423 if (test.HostIsIPAddress()) 430 if (test.HostIsIPAddress())
424 return false; 431 return false;
425 432
426 // Check if the test host is a subdomain of our host. 433 // Check if the test host is a subdomain of our host.
427 if (test.host().length() <= (host_.length() + 1)) 434 if (test_host.length() <= (host_.length() + 1))
428 return false; 435 return false;
429 436
430 if (test.host().compare(test.host().length() - host_.length(), 437 if (test_host.compare(test_host.length() - host_.length(), host_.length(),
431 host_.length(), host_) != 0) 438 host_) != 0)
432 return false; 439 return false;
433 440
434 return test.host()[test.host().length() - host_.length() - 1] == '.'; 441 return test_host[test_host.length() - host_.length() - 1] == '.';
435 } 442 }
436 443
437 bool URLPattern::ImpliesAllHosts() const { 444 bool URLPattern::ImpliesAllHosts() const {
438 // Check if it matches all urls or is a pattern like http://*/*. 445 // Check if it matches all urls or is a pattern like http://*/*.
439 if (match_all_urls_ || 446 if (match_all_urls_ ||
440 (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) { 447 (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) {
441 return true; 448 return true;
442 } 449 }
443 450
444 // If this doesn't even match subdomains, it can't possibly imply all hosts. 451 // If this doesn't even match subdomains, it can't possibly imply all hosts.
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
600 i != explicit_schemes.end(); ++i) { 607 i != explicit_schemes.end(); ++i) {
601 URLPattern temp = *this; 608 URLPattern temp = *this;
602 temp.SetScheme(*i); 609 temp.SetScheme(*i);
603 temp.SetMatchAllURLs(false); 610 temp.SetMatchAllURLs(false);
604 result.push_back(temp); 611 result.push_back(temp);
605 } 612 }
606 613
607 return result; 614 return result;
608 } 615 }
609 616
617 std::string URLPattern::CanonicalizeHostForMatching(const GURL& url) const {
618 std::string url_host = url.host();
619 if (url_host.empty() || url.HostIsIPAddress())
620 return url_host;
621
622 const bool pattern_host_ends_with_dot = EndsWithDot(host_);
623 const bool url_host_ends_with_dot = EndsWithDot(url_host);
624 if (pattern_host_ends_with_dot != url_host_ends_with_dot) {
625 if (url_host_ends_with_dot)
626 url_host = net::TrimEndingDot(url_host);
627 else
628 url_host += ".";
629 }
630
631 return url_host;
632 }
633
610 // static 634 // static
611 const char* URLPattern::GetParseResultString( 635 const char* URLPattern::GetParseResultString(
612 URLPattern::ParseResult parse_result) { 636 URLPattern::ParseResult parse_result) {
613 return kParseResultMessages[parse_result]; 637 return kParseResultMessages[parse_result];
614 } 638 }
OLDNEW
« no previous file with comments | « extensions/common/url_pattern.h ('k') | extensions/common/url_pattern_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698