extensions/common/url_pattern.cc - Issue 2455373002: Add implicit trailing dot domain matching support to URLPattern.

Side by Side Diff: extensions/common/url_pattern.cc

Issue 2455373002: Add implicit trailing dot domain matching support to URLPattern. (Closed)

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "extensions/common/url_pattern.h"	5 #include "extensions/common/url_pattern.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8	8

9 #include <ostream>	9 #include <ostream>

10	10

11 #include "base/macros.h"	11 #include "base/macros.h"

12 #include "base/strings/pattern.h"	12 #include "base/strings/pattern.h"

13 #include "base/strings/string_number_conversions.h"	13 #include "base/strings/string_number_conversions.h"

14 #include "base/strings/string_piece.h"	14 #include "base/strings/string_piece.h"

15 #include "base/strings/string_split.h"	15 #include "base/strings/string_split.h"

16 #include "base/strings/string_util.h"	16 #include "base/strings/string_util.h"

17 #include "base/strings/stringprintf.h"	17 #include "base/strings/stringprintf.h"

18 #include "content/public/common/url_constants.h"	18 #include "content/public/common/url_constants.h"

19 #include "extensions/common/constants.h"	19 #include "extensions/common/constants.h"

20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"	20 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"

	21 #include "net/base/url_util.h"

21 #include "url/gurl.h"	22 #include "url/gurl.h"

22 #include "url/url_util.h"	23 #include "url/url_util.h"

23	24

24 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";	25 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";

25	26

26 namespace {	27 namespace {

27	28

28 // TODO(aa): What about more obscure schemes like data: and javascript: ?	29 // TODO(aa): What about more obscure schemes like data: and javascript: ?

29 // Note: keep this array in sync with kValidSchemeMasks.	30 // Note: keep this array in sync with kValidSchemeMasks.

30 const char* const kValidSchemes[] = {	31 const char* const kValidSchemes[] = {

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 // called for the patterns inside URLPatternSet. In those cases, we know that	107 // called for the patterns inside URLPatternSet. In those cases, we know that

107 // the path will have only a single wildcard at the end. This makes figuring	108 // the path will have only a single wildcard at the end. This makes figuring

108 // out overlap much easier. It seems like there is probably a computer-sciency	109 // out overlap much easier. It seems like there is probably a computer-sciency

109 // way to solve the general case, but we don't need that yet.	110 // way to solve the general case, but we don't need that yet.

110 std::string StripTrailingWildcard(const std::string& path) {	111 std::string StripTrailingWildcard(const std::string& path) {

111 size_t wildcard_index = path.find('*');	112 size_t wildcard_index = path.find('*');

112 size_t path_last = path.size() - 1;	113 size_t path_last = path.size() - 1;

113 return wildcard_index == path_last ? path.substr(0, path_last) : path;	114 return wildcard_index == path_last ? path.substr(0, path_last) : path;

114 }	115 }

115	116

	117 bool EndsWithDot(base::StringPiece str) {

	118 return base::EndsWith(str, ".", base::CompareCase::SENSITIVE);

	119 }

	120

116 } // namespace	121 } // namespace

117	122

118 // static	123 // static

119 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {	124 bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {

120 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {	125 for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {

121 if (scheme == kValidSchemes[i])	126 if (scheme == kValidSchemes[i])

122 return true;	127 return true;

123 }	128 }

124 return false;	129 return false;

125 }	130 }

(...skipping 271 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
397	402

398 bool URLPattern::MatchesHost(const std::string& host) const {	403 bool URLPattern::MatchesHost(const std::string& host) const {

399 std::string test(url::kHttpScheme);	404 std::string test(url::kHttpScheme);

400 test += url::kStandardSchemeSeparator;	405 test += url::kStandardSchemeSeparator;

401 test += host;	406 test += host;

402 test += "/";	407 test += "/";

403 return MatchesHost(GURL(test));	408 return MatchesHost(GURL(test));

404 }	409 }

405	410

406 bool URLPattern::MatchesHost(const GURL& test) const {	411 bool URLPattern::MatchesHost(const GURL& test) const {

	412 const std::string test_host(CanonicalizeHostForMatching(test));
	Devlin 2016/10/28 19:17:27 This code is called under some pretty performance- This code is called under some pretty performance-sensitive times (e.g. web request matching, etc). We need to do a pretty big overhaul to make it all faster, but I wonder if we can at least avoid a string copy here. Could we instead use a StringPiece test_host and StringPiece pattern_host that both have the trailing dot (if any) removed, and thus avoid the copies?
	413

407 // If the hosts are exactly equal, we have a match.	414 // If the hosts are exactly equal, we have a match.

408 if (test.host() == host_)	415 if (test_host == host_)

409 return true;	416 return true;

410	417

411 // If we're matching subdomains, and we have no host in the match pattern,	418 // If we're matching subdomains, and we have no host in the match pattern,

412 // that means that we're matching all hosts, which means we have a match no	419 // that means that we're matching all hosts, which means we have a match no

413 // matter what the test host is.	420 // matter what the test host is.

414 if (match_subdomains_ && host_.empty())	421 if (match_subdomains_ && host_.empty())

415 return true;	422 return true;

416	423

417 // Otherwise, we can only match if our match pattern matches subdomains.	424 // Otherwise, we can only match if our match pattern matches subdomains.

418 if (!match_subdomains_)	425 if (!match_subdomains_)

419 return false;	426 return false;

420	427

421 // We don't do subdomain matching against IP addresses, so we can give up now	428 // We don't do subdomain matching against IP addresses, so we can give up now

422 // if the test host is an IP address.	429 // if the test host is an IP address.

423 if (test.HostIsIPAddress())	430 if (test.HostIsIPAddress())

424 return false;	431 return false;

425	432

426 // Check if the test host is a subdomain of our host.	433 // Check if the test host is a subdomain of our host.

427 if (test.host().length() <= (host_.length() + 1))	434 if (test_host.length() <= (host_.length() + 1))

428 return false;	435 return false;

429	436

430 if (test.host().compare(test.host().length() - host_.length(),	437 if (test_host.compare(test_host.length() - host_.length(), host_.length(),

431 host_.length(), host_) != 0)	438 host_) != 0)

432 return false;	439 return false;

433	440

434 return test.host()[test.host().length() - host_.length() - 1] == '.';	441 return test_host[test_host.length() - host_.length() - 1] == '.';

435 }	442 }

436	443

437 bool URLPattern::ImpliesAllHosts() const {	444 bool URLPattern::ImpliesAllHosts() const {

438 // Check if it matches all urls or is a pattern like http:///.	445 // Check if it matches all urls or is a pattern like http:///.

439 if (match_all_urls_ \|\|	446 if (match_all_urls_ \|\|

440 (match_subdomains_ && host_.empty() && port_ == "" && path_ == "/")) {	447 (match_subdomains_ && host_.empty() && port_ == "" && path_ == "/")) {

441 return true;	448 return true;

442 }	449 }

443	450

444 // If this doesn't even match subdomains, it can't possibly imply all hosts.	451 // If this doesn't even match subdomains, it can't possibly imply all hosts.

(...skipping 155 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
600 i != explicit_schemes.end(); ++i) {	607 i != explicit_schemes.end(); ++i) {

601 URLPattern temp = *this;	608 URLPattern temp = *this;

602 temp.SetScheme(*i);	609 temp.SetScheme(*i);

603 temp.SetMatchAllURLs(false);	610 temp.SetMatchAllURLs(false);

604 result.push_back(temp);	611 result.push_back(temp);

605 }	612 }

606	613

607 return result;	614 return result;

608 }	615 }

609	616

	617 std::string URLPattern::CanonicalizeHostForMatching(const GURL& url) const {

	618 std::string url_host = url.host();

	619 if (url_host.empty() \|\| url.HostIsIPAddress())

	620 return url_host;

	621

	622 const bool pattern_host_ends_with_dot = EndsWithDot(host_);

	623 const bool url_host_ends_with_dot = EndsWithDot(url_host);

	624 if (pattern_host_ends_with_dot != url_host_ends_with_dot) {

	625 if (url_host_ends_with_dot)

	626 url_host = net::TrimEndingDot(url_host);

	627 else

	628 url_host += ".";

	629 }

	630

	631 return url_host;

	632 }

	633

610 // static	634 // static

611 const char* URLPattern::GetParseResultString(	635 const char* URLPattern::GetParseResultString(

612 URLPattern::ParseResult parse_result) {	636 URLPattern::ParseResult parse_result) {

613 return kParseResultMessages[parse_result];	637 return kParseResultMessages[parse_result];

614 }	638 }

OLD	NEW

« no previous file with comments | « extensions/common/url_pattern.h ('k') | extensions/common/url_pattern_unittest.cc » ('j') | no next file with comments »