chrome/common/extensions/url_pattern.cc - Issue 7229012: Use extension match pattern syntax in content settings extension API

Side by Side Diff: chrome/common/extensions/url_pattern.cc

Issue 7229012: Use extension match pattern syntax in content settings extension API (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: fix Created 9 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/common/extensions/url_pattern.h ('K') | « chrome/common/extensions/url_pattern.h ('k') | chrome/common/extensions/url_pattern_unittest.cc » ('j') | chrome/common/extensions/url_pattern_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/common/extensions/url_pattern.h"	5 #include "chrome/common/extensions/url_pattern.h"

6	6

	7 #include "base/string_number_conversions.h"

7 #include "base/string_piece.h"	8 #include "base/string_piece.h"

8 #include "base/string_split.h"	9 #include "base/string_split.h"

9 #include "base/string_util.h"	10 #include "base/string_util.h"

10 #include "chrome/common/url_constants.h"	11 #include "chrome/common/url_constants.h"

11 #include "googleurl/src/gurl.h"	12 #include "googleurl/src/gurl.h"

12 #include "googleurl/src/url_util.h"	13 #include "googleurl/src/url_util.h"

13	14

14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";	15 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";

15	16

16 namespace {	17 namespace {

(...skipping 24 matching lines...) Expand all Loading...
41	42

42 const char* kParseSuccess = "Success.";	43 const char* kParseSuccess = "Success.";

43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";	44 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";

44 const char* kParseErrorInvalidScheme = "Invalid scheme.";	45 const char* kParseErrorInvalidScheme = "Invalid scheme.";

45 const char* kParseErrorWrongSchemeType = "Wrong scheme type.";	46 const char* kParseErrorWrongSchemeType = "Wrong scheme type.";

46 const char* kParseErrorEmptyHost = "Host can not be empty.";	47 const char* kParseErrorEmptyHost = "Host can not be empty.";

47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";	48 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";

48 const char* kParseErrorEmptyPath = "Empty path.";	49 const char* kParseErrorEmptyPath = "Empty path.";

49 const char* kParseErrorHasColon =	50 const char* kParseErrorHasColon =

50 "Ports are not supported in URL patterns. ':' may not be used in a host.";	51 "Ports are not supported in URL patterns. ':' may not be used in a host.";

	52 const char* kParseErrorInvalidPort =

	53 "Invalid port.";

51	54

52 // Message explaining each URLPattern::ParseResult.	55 // Message explaining each URLPattern::ParseResult.

53 const char* kParseResultMessages[] = {	56 const char* kParseResultMessages[] = {

54 kParseSuccess,	57 kParseSuccess,

55 kParseErrorMissingSchemeSeparator,	58 kParseErrorMissingSchemeSeparator,

56 kParseErrorInvalidScheme,	59 kParseErrorInvalidScheme,

57 kParseErrorWrongSchemeType,	60 kParseErrorWrongSchemeType,

58 kParseErrorEmptyHost,	61 kParseErrorEmptyHost,

59 kParseErrorInvalidHostWildcard,	62 kParseErrorInvalidHostWildcard,

60 kParseErrorEmptyPath,	63 kParseErrorEmptyPath,

61 kParseErrorHasColon	64 kParseErrorHasColon,

	65 kParseErrorInvalidPort,

62 };	66 };

63	67

64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),	68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),

65 must_add_message_for_each_parse_result);	69 must_add_message_for_each_parse_result);

66	70

67 const char kPathSeparator[] = "/";	71 const char kPathSeparator[] = "/";

68	72

69 bool IsStandardScheme(const std::string& scheme) {	73 bool IsStandardScheme(const std::string& scheme) {

70 // "*" gets the same treatment as a standard scheme.	74 // "*" gets the same treatment as a standard scheme.

71 if (scheme == "*")	75 if (scheme == "*")

72 return true;	76 return true;

73	77

74 return url_util::IsStandard(scheme.c_str(),	78 return url_util::IsStandard(scheme.c_str(),

75 url_parse::Component(0, static_cast<int>(scheme.length())));	79 url_parse::Component(0, static_cast<int>(scheme.length())));

76 }	80 }

77	81

	82 bool IsValidPort(const std::string& port) {

	83 if (port.empty() \|\| port == "*")

	84 return true;

	85 int parsed_port;
	Sam Kerner (Chrome) 2011/06/28 18:12:39 I think you need to set an intial value here (int I think you need to set an intial value here (int parsed_port = -1) to avoid one of our builders giving a compile error. bauerb at google 2011/06/28 22:36:52 No, StringToInt always sets its argument (to zero Show quoted text On 2011/06/28 18:12:39, Sam Kerner (Chrome) wrote: > I think you need to set an intial value here (int parsed_port = -1) to avoid one > of our builders giving a compile error. No, StringToInt always sets its argument (to zero if there's a parsing error). Sam Kerner (Chrome) 2011/06/29 13:36:34 I am not saying your code is wrong. I am saying t Show quoted text On 2011/06/28 22:36:52, please use chromium account wrote: > On 2011/06/28 18:12:39, Sam Kerner (Chrome) wrote: > > I think you need to set an intial value here (int parsed_port = -1) to avoid > one > > of our builders giving a compile error. > > No, StringToInt always sets its argument (to zero if there's a parsing error). I am not saying your code is wrong. I am saying that some compilers are not smart enough to see that, and when you commit you will find that a builder breaks with an error about parsed_port being used without being initialized. Bernhard Bauer 2011/06/29 13:53:18 Hm, fair enough. Show quoted text On 2011/06/29 13:36:34, Sam Kerner (Chrome) wrote: > On 2011/06/28 22:36:52, please use chromium account wrote: > > On 2011/06/28 18:12:39, Sam Kerner (Chrome) wrote: > > > I think you need to set an intial value here (int parsed_port = -1) to avoid > > one > > > of our builders giving a compile error. > > > > No, StringToInt always sets its argument (to zero if there's a parsing error). > > I am not saying your code is wrong. I am saying that some compilers are not > smart enough to see that, and when you commit you will find that a builder > breaks with an error about parsed_port being used without being initialized. Hm, fair enough.
	86 if (!base::StringToInt(port, &parsed_port))

	87 return false;

	88 return (parsed_port >= 0) && (parsed_port < 65536);

	89 }

	90

78 } // namespace	91 } // namespace

79	92

80 URLPattern::URLPattern()	93 URLPattern::URLPattern()

81 : valid_schemes_(SCHEME_NONE),	94 : valid_schemes_(SCHEME_NONE),

82 match_all_urls_(false),	95 match_all_urls_(false),

83 match_subdomains_(false) {}	96 match_subdomains_(false),

	97 port_("*") {}

84	98

85 URLPattern::URLPattern(int valid_schemes)	99 URLPattern::URLPattern(int valid_schemes)

86 : valid_schemes_(valid_schemes), match_all_urls_(false),	100 : valid_schemes_(valid_schemes),

87 match_subdomains_(false) {}	101 match_all_urls_(false),

	102 match_subdomains_(false),

	103 port_("*") {}

88	104

89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)	105 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)

90 : valid_schemes_(valid_schemes), match_all_urls_(false),	106 : valid_schemes_(valid_schemes),

91 match_subdomains_(false) {	107 match_all_urls_(false),

	108 match_subdomains_(false),

	109 port_("*") {

92	110

93 // Strict error checking is used, because this constructor is only	111 // Strict error checking is used, because this constructor is only

94 // appropriate when we know \|pattern\| is valid.	112 // appropriate when we know \|pattern\| is valid.

95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT))	113 if (PARSE_SUCCESS != Parse(pattern, ERROR_ON_PORTS))

96 NOTREACHED() << "URLPattern is invalid: " << pattern;	114 NOTREACHED() << "URLPattern is invalid: " << pattern;

97 }	115 }

98	116

99 URLPattern::~URLPattern() {	117 URLPattern::~URLPattern() {

100 }	118 }

101	119

102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern,	120 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern,

103 ParseOption strictness) {	121 ParseOption strictness) {

104 CHECK(strictness == PARSE_LENIENT \|\|

105 strictness == PARSE_STRICT);

106

107 // Special case pattern to match every valid URL.	122 // Special case pattern to match every valid URL.

108 if (pattern == kAllUrlsPattern) {	123 if (pattern == kAllUrlsPattern) {

109 match_all_urls_ = true;	124 match_all_urls_ = true;

110 match_subdomains_ = true;	125 match_subdomains_ = true;

111 scheme_ = "*";	126 scheme_ = "*";

112 host_.clear();	127 host_.clear();

113 SetPath("/*");	128 SetPath("/*");

114 return PARSE_SUCCESS;	129 return PARSE_SUCCESS;

115 }	130 }

116	131

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
173 // The first component can optionally be '*' to match all subdomains.	188 // The first component can optionally be '*' to match all subdomains.

174 std::vector<std::string> host_components;	189 std::vector<std::string> host_components;

175 base::SplitString(host_, '.', &host_components);	190 base::SplitString(host_, '.', &host_components);

176 if (host_components[0] == "*") {	191 if (host_components[0] == "*") {

177 match_subdomains_ = true;	192 match_subdomains_ = true;

178 host_components.erase(host_components.begin(),	193 host_components.erase(host_components.begin(),

179 host_components.begin() + 1);	194 host_components.begin() + 1);

180 }	195 }

181 host_ = JoinString(host_components, '.');	196 host_ = JoinString(host_components, '.');

182	197

183 // No other '*' can occur in the host, though. This isn't necessary, but is

184 // done as a convenience to developers who might otherwise be confused and

185 // think '*' works as a glob in the host.

186 if (host_.find('*') != std::string::npos)

187 return PARSE_ERROR_INVALID_HOST_WILDCARD;

188

189 path_start_pos = host_end_pos;	198 path_start_pos = host_end_pos;

190 }	199 }

191	200

192 SetPath(pattern.substr(path_start_pos));	201 SetPath(pattern.substr(path_start_pos));

193	202

194 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos)	203 size_t port_pos = host_.find(':');
	Sam Kerner (Chrome) 2011/06/28 18:12:39 Pattern http://foo.com:123/* has a host of "foo.co Pattern http://foo.com:123/* has a host of "foo.com:123" with PARSE_LENIENT, and "foo.com" with IGNORE_PORTS. Because ':' is not a valid character in a DNS host name, the old behavior means the pattern never matched any host. I am nervious about changing behavior, because it means installed extensions will suddenly start doing things they did not do before. On the other hand, any developer who tried to load them in chrome for the last few releases got an error. Matt, what do you think? The case this matters is a pattern in the manifest, such as a host permission. I think all the new code uses USE_PORTS, so you could change IGNORE_PORTS to the old behavior and we could figure this out in a separate change. Matt Perry 2011/06/28 21:37:07 I agree. It seems unlikely to matter, but to be on Show quoted text On 2011/06/28 18:12:39, Sam Kerner (Chrome) wrote: > Pattern http://foo.com:123/* has a host of "foo.com:123" with PARSE_LENIENT, and > "foo.com" with IGNORE_PORTS. > > Because ':' is not a valid character in a DNS host name, the old behavior means > the pattern never matched any host. I am nervious about changing behavior, > because it means installed extensions will suddenly start doing things they did > not do before. On the other hand, any developer who tried to load them in > chrome for the last few releases got an error. > > Matt, what do you think? The case this matters is a pattern in the manifest, > such as a host permission. > > I think all the new code uses USE_PORTS, so you could change IGNORE_PORTS to the > old behavior and we could figure this out in a separate change. I agree. It seems unlikely to matter, but to be on the safe side I think we should keep the behavior of IGNORE_PORTS. Bernhard Bauer 2011/06/29 13:53:18 Done. Show quoted text On 2011/06/28 21:37:07, Matt Perry wrote: > On 2011/06/28 18:12:39, Sam Kerner (Chrome) wrote: > > Pattern http://foo.com:123/* has a host of "foo.com:123" with PARSE_LENIENT, > and > > "foo.com" with IGNORE_PORTS. > > > > Because ':' is not a valid character in a DNS host name, the old behavior > means > > the pattern never matched any host. I am nervious about changing behavior, > > because it means installed extensions will suddenly start doing things they > did > > not do before. On the other hand, any developer who tried to load them in > > chrome for the last few releases got an error. > > > > Matt, what do you think? The case this matters is a pattern in the manifest, > > such as a host permission. > > > > I think all the new code uses USE_PORTS, so you could change IGNORE_PORTS to > the > > old behavior and we could figure this out in a separate change. > > I agree. It seems unlikely to matter, but to be on the safe side I think we > should keep the behavior of IGNORE_PORTS. Done.
195 return PARSE_ERROR_HAS_COLON;	204 if (port_pos != std::string::npos) {

	205 if (strictness == ERROR_ON_PORTS)

	206 return PARSE_ERROR_HAS_COLON;

	207

	208 if (strictness == USE_PORTS) {

	209 std::string port = host_.substr(port_pos + 1);

	210 if (!SetPort(port))

	211 return PARSE_ERROR_INVALID_PORT;

	212 }

	213

	214 host_ = host_.substr(0, port_pos);

	215 }

	216

	217 // No other '*' can occur in the host, though. This isn't necessary, but is

	218 // done as a convenience to developers who might otherwise be confused and

	219 // think '*' works as a glob in the host.

	220 if (host_.find('*') != std::string::npos)

	221 return PARSE_ERROR_INVALID_HOST_WILDCARD;

196	222

197 return PARSE_SUCCESS;	223 return PARSE_SUCCESS;

198 }	224 }

199	225

200 bool URLPattern::SetScheme(const std::string& scheme) {	226 bool URLPattern::SetScheme(const std::string& scheme) {

201 scheme_ = scheme;	227 scheme_ = scheme;

202 if (scheme_ == "*") {	228 if (scheme_ == "*") {

203 valid_schemes_ &= (SCHEME_HTTP \| SCHEME_HTTPS);	229 valid_schemes_ &= (SCHEME_HTTP \| SCHEME_HTTPS);

204 } else if (!IsValidScheme(scheme_)) {	230 } else if (!IsValidScheme(scheme_)) {

205 return false;	231 return false;

(...skipping 13 matching lines...) Expand all Loading...
219 return false;	245 return false;

220 }	246 }

221	247

222 void URLPattern::SetPath(const std::string& path) {	248 void URLPattern::SetPath(const std::string& path) {

223 path_ = path;	249 path_ = path;

224 path_escaped_ = path_;	250 path_escaped_ = path_;

225 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");	251 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");

226 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");	252 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");

227 }	253 }

228	254

	255 bool URLPattern::SetPort(const std::string& port) {

	256 if (IsValidPort(port)) {

	257 port_ = port;

	258 return true;

	259 }

	260 return false;

	261 }

	262

229 bool URLPattern::MatchesURL(const GURL &test) const {	263 bool URLPattern::MatchesURL(const GURL &test) const {

230 if (!MatchesScheme(test.scheme()))	264 if (!MatchesScheme(test.scheme()))

231 return false;	265 return false;

232	266

233 if (match_all_urls_)	267 if (match_all_urls_)

234 return true;	268 return true;

235	269

236 // Ignore hostname if scheme is file://.	270 // Ignore hostname if scheme is file://.

237 if (scheme_ != chrome::kFileScheme && !MatchesHost(test))	271 if (scheme_ != chrome::kFileScheme && !MatchesHost(test))

238 return false;	272 return false;

239	273

240 if (!MatchesPath(test.PathForRequest()))	274 if (!MatchesPath(test.PathForRequest()))

241 return false;	275 return false;

242	276

	277 if (!MatchesPort(test.port()))
	Matt Perry 2011/06/28 21:37:07 do you want to handle matching a "default port"? l do you want to handle matching a "default port"? like if the pattern was http://google.com:80/foo, does MatchesURL("http://google.com/foo") match it? If so, you have to check if test.port() == -1 (or use EffectiveIntPort()). bauerb at google 2011/06/28 22:36:52 Ooh! Yes, I want that, thanks! Show quoted text On 2011/06/28 21:37:07, Matt Perry wrote: > do you want to handle matching a "default port"? like if the pattern was > http://google.com:80/foo, does MatchesURL("http://google.com/foo") match it? > > If so, you have to check if test.port() == -1 (or use EffectiveIntPort()). Ooh! Yes, I want that, thanks!
	278 return false;

	279

243 return true;	280 return true;

244 }	281 }

245	282

246 bool URLPattern::MatchesScheme(const std::string& test) const {	283 bool URLPattern::MatchesScheme(const std::string& test) const {

247 if (!IsValidScheme(test))	284 if (!IsValidScheme(test))

248 return false;	285 return false;

249	286

250 return scheme_ == "*" \|\| test == scheme_;	287 return scheme_ == "*" \|\| test == scheme_;

251 }	288 }

252	289

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
289 return test.host()[test.host().length() - host_.length() - 1] == '.';	326 return test.host()[test.host().length() - host_.length() - 1] == '.';

290 }	327 }

291	328

292 bool URLPattern::MatchesPath(const std::string& test) const {	329 bool URLPattern::MatchesPath(const std::string& test) const {

293 if (!MatchPattern(test, path_escaped_))	330 if (!MatchPattern(test, path_escaped_))

294 return false;	331 return false;

295	332

296 return true;	333 return true;

297 }	334 }

298	335

	336 bool URLPattern::MatchesPort(const std::string& test) const {

	337 if (!IsValidPort(test))
	Matt Perry 2011/06/28 21:37:07 this test seems unnecessary. port_ must be a valid this test seems unnecessary. port_ must be a valid port, since we don't allow it to be set otherwise, so if port_ == test, test is a valid port. Bernhard Bauer 2011/06/29 13:53:18 But we don't want a port wildcard to match an inva Show quoted text On 2011/06/28 21:37:07, Matt Perry wrote: > this test seems unnecessary. port_ must be a valid port, since we don't allow it > to be set otherwise, so if port_ == test, test is a valid port. But we don't want a port wildcard to match an invalid port? Matt Perry 2011/06/29 17:18:47 Oh OK, fair enough. Show quoted text On 2011/06/29 13:53:18, Bernhard Bauer wrote: > On 2011/06/28 21:37:07, Matt Perry wrote: > > this test seems unnecessary. port_ must be a valid port, since we don't allow > it > > to be set otherwise, so if port_ == test, test is a valid port. > > But we don't want a port wildcard to match an invalid port? Oh OK, fair enough.
	338 return false;

	339

	340 return port_ == "*" \|\| port_ == test;

	341 }

	342

299 std::string URLPattern::GetAsString() const {	343 std::string URLPattern::GetAsString() const {

300 if (match_all_urls_)	344 if (match_all_urls_)

301 return kAllUrlsPattern;	345 return kAllUrlsPattern;

302	346

303 bool standard_scheme = IsStandardScheme(scheme_);	347 bool standard_scheme = IsStandardScheme(scheme_);

304	348

305 std::string spec = scheme_ +	349 std::string spec = scheme_ +

306 (standard_scheme ? chrome::kStandardSchemeSeparator : ":");	350 (standard_scheme ? chrome::kStandardSchemeSeparator : ":");

307	351

308 if (scheme_ != chrome::kFileScheme && standard_scheme) {	352 if (scheme_ != chrome::kFileScheme && standard_scheme) {

309 if (match_subdomains_) {	353 if (match_subdomains_) {

310 spec += "*";	354 spec += "*";

311 if (!host_.empty())	355 if (!host_.empty())

312 spec += ".";	356 spec += ".";

313 }	357 }

314	358

315 if (!host_.empty())	359 if (!host_.empty())

316 spec += host_;	360 spec += host_;

	361

	362 if (port_ != "*") {

	363 spec += ":";

	364 spec += port_;

	365 }

317 }	366 }

318	367

319 if (!path_.empty())	368 if (!path_.empty())

320 spec += path_;	369 spec += path_;

321	370

322 return spec;	371 return spec;

323 }	372 }

324	373

325 bool URLPattern::OverlapsWith(const URLPattern& other) const {	374 bool URLPattern::OverlapsWith(const URLPattern& other) const {

326 if (!MatchesAnyScheme(other.GetExplicitSchemes()) &&	375 if (!MatchesAnyScheme(other.GetExplicitSchemes()) &&

327 !other.MatchesAnyScheme(GetExplicitSchemes())) {	376 !other.MatchesAnyScheme(GetExplicitSchemes())) {

328 return false;	377 return false;

329 }	378 }

330	379

331 if (!MatchesHost(other.host()) && !other.MatchesHost(host_))	380 if (!MatchesHost(other.host()) && !other.MatchesHost(host_))

332 return false;	381 return false;

333	382

	383 if (port_ != "" && other.port() != "" && port_ != other.port())

	384 return false;

	385

334 // We currently only use OverlapsWith() for the patterns inside	386 // We currently only use OverlapsWith() for the patterns inside

335 // URLPatternSet. In those cases, we know that the path will have only a	387 // URLPatternSet. In those cases, we know that the path will have only a

336 // single wildcard at the end. This makes figuring out overlap much easier. It	388 // single wildcard at the end. This makes figuring out overlap much easier. It

337 // seems like there is probably a computer-sciency way to solve the general	389 // seems like there is probably a computer-sciency way to solve the general

338 // case, but we don't need that yet.	390 // case, but we don't need that yet.

339 DCHECK(path_.find('*') == path_.size() - 1);	391 DCHECK(path_.find('*') == path_.size() - 1);

340 DCHECK(other.path().find('*') == other.path().size() - 1);	392 DCHECK(other.path().find('*') == other.path().size() - 1);

341	393

342 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&	394 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&

343 !other.MatchesPath(path_.substr(0, path_.size() - 1)))	395 !other.MatchesPath(path_.substr(0, path_.size() - 1)))

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
387 }	439 }

388	440

389 return result;	441 return result;

390 }	442 }

391	443

392 // static	444 // static

393 const char* URLPattern::GetParseResultString(	445 const char* URLPattern::GetParseResultString(

394 URLPattern::ParseResult parse_result) {	446 URLPattern::ParseResult parse_result) {

395 return kParseResultMessages[parse_result];	447 return kParseResultMessages[parse_result];

396 }	448 }

OLD	NEW