Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(132)

Side by Side Diff: chrome/common/extensions/url_pattern.cc

Issue 7229012: Use extension match pattern syntax in content settings extension API (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: fix Created 9 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/extensions/url_pattern.h" 5 #include "chrome/common/extensions/url_pattern.h"
6 6
7 #include "base/string_number_conversions.h"
7 #include "base/string_piece.h" 8 #include "base/string_piece.h"
8 #include "base/string_split.h" 9 #include "base/string_split.h"
9 #include "base/string_util.h" 10 #include "base/string_util.h"
10 #include "chrome/common/url_constants.h" 11 #include "chrome/common/url_constants.h"
11 #include "googleurl/src/gurl.h" 12 #include "googleurl/src/gurl.h"
12 #include "googleurl/src/url_util.h" 13 #include "googleurl/src/url_util.h"
13 14
14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; 15 const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
15 16
16 namespace { 17 namespace {
(...skipping 24 matching lines...) Expand all
41 42
42 const char* kParseSuccess = "Success."; 43 const char* kParseSuccess = "Success.";
43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; 44 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator.";
44 const char* kParseErrorInvalidScheme = "Invalid scheme."; 45 const char* kParseErrorInvalidScheme = "Invalid scheme.";
45 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; 46 const char* kParseErrorWrongSchemeType = "Wrong scheme type.";
46 const char* kParseErrorEmptyHost = "Host can not be empty."; 47 const char* kParseErrorEmptyHost = "Host can not be empty.";
47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; 48 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard.";
48 const char* kParseErrorEmptyPath = "Empty path."; 49 const char* kParseErrorEmptyPath = "Empty path.";
49 const char* kParseErrorHasColon = 50 const char* kParseErrorHasColon =
50 "Ports are not supported in URL patterns. ':' may not be used in a host."; 51 "Ports are not supported in URL patterns. ':' may not be used in a host.";
52 const char* kParseErrorInvalidPort =
53 "Invalid port.";
51 54
52 // Message explaining each URLPattern::ParseResult. 55 // Message explaining each URLPattern::ParseResult.
53 const char* kParseResultMessages[] = { 56 const char* kParseResultMessages[] = {
54 kParseSuccess, 57 kParseSuccess,
55 kParseErrorMissingSchemeSeparator, 58 kParseErrorMissingSchemeSeparator,
56 kParseErrorInvalidScheme, 59 kParseErrorInvalidScheme,
57 kParseErrorWrongSchemeType, 60 kParseErrorWrongSchemeType,
58 kParseErrorEmptyHost, 61 kParseErrorEmptyHost,
59 kParseErrorInvalidHostWildcard, 62 kParseErrorInvalidHostWildcard,
60 kParseErrorEmptyPath, 63 kParseErrorEmptyPath,
61 kParseErrorHasColon 64 kParseErrorHasColon,
65 kParseErrorInvalidPort,
62 }; 66 };
63 67
64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), 68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
65 must_add_message_for_each_parse_result); 69 must_add_message_for_each_parse_result);
66 70
67 const char kPathSeparator[] = "/"; 71 const char kPathSeparator[] = "/";
68 72
69 bool IsStandardScheme(const std::string& scheme) { 73 bool IsStandardScheme(const std::string& scheme) {
70 // "*" gets the same treatment as a standard scheme. 74 // "*" gets the same treatment as a standard scheme.
71 if (scheme == "*") 75 if (scheme == "*")
72 return true; 76 return true;
73 77
74 return url_util::IsStandard(scheme.c_str(), 78 return url_util::IsStandard(scheme.c_str(),
75 url_parse::Component(0, static_cast<int>(scheme.length()))); 79 url_parse::Component(0, static_cast<int>(scheme.length())));
76 } 80 }
77 81
82 bool IsValidPort(const std::string& port) {
83 if (port.empty() || port == "*")
84 return true;
85 int parsed_port;
Sam Kerner (Chrome) 2011/06/28 18:12:39 I think you need to set an intial value here (int
bauerb at google 2011/06/28 22:36:52 No, StringToInt always sets its argument (to zero
Sam Kerner (Chrome) 2011/06/29 13:36:34 I am not saying your code is wrong. I am saying t
Bernhard Bauer 2011/06/29 13:53:18 Hm, fair enough.
86 if (!base::StringToInt(port, &parsed_port))
87 return false;
88 return (parsed_port >= 0) && (parsed_port < 65536);
89 }
90
78 } // namespace 91 } // namespace
79 92
80 URLPattern::URLPattern() 93 URLPattern::URLPattern()
81 : valid_schemes_(SCHEME_NONE), 94 : valid_schemes_(SCHEME_NONE),
82 match_all_urls_(false), 95 match_all_urls_(false),
83 match_subdomains_(false) {} 96 match_subdomains_(false),
97 port_("*") {}
84 98
85 URLPattern::URLPattern(int valid_schemes) 99 URLPattern::URLPattern(int valid_schemes)
86 : valid_schemes_(valid_schemes), match_all_urls_(false), 100 : valid_schemes_(valid_schemes),
87 match_subdomains_(false) {} 101 match_all_urls_(false),
102 match_subdomains_(false),
103 port_("*") {}
88 104
89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) 105 URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
90 : valid_schemes_(valid_schemes), match_all_urls_(false), 106 : valid_schemes_(valid_schemes),
91 match_subdomains_(false) { 107 match_all_urls_(false),
108 match_subdomains_(false),
109 port_("*") {
92 110
93 // Strict error checking is used, because this constructor is only 111 // Strict error checking is used, because this constructor is only
94 // appropriate when we know |pattern| is valid. 112 // appropriate when we know |pattern| is valid.
95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT)) 113 if (PARSE_SUCCESS != Parse(pattern, ERROR_ON_PORTS))
96 NOTREACHED() << "URLPattern is invalid: " << pattern; 114 NOTREACHED() << "URLPattern is invalid: " << pattern;
97 } 115 }
98 116
99 URLPattern::~URLPattern() { 117 URLPattern::~URLPattern() {
100 } 118 }
101 119
102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, 120 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern,
103 ParseOption strictness) { 121 ParseOption strictness) {
104 CHECK(strictness == PARSE_LENIENT ||
105 strictness == PARSE_STRICT);
106
107 // Special case pattern to match every valid URL. 122 // Special case pattern to match every valid URL.
108 if (pattern == kAllUrlsPattern) { 123 if (pattern == kAllUrlsPattern) {
109 match_all_urls_ = true; 124 match_all_urls_ = true;
110 match_subdomains_ = true; 125 match_subdomains_ = true;
111 scheme_ = "*"; 126 scheme_ = "*";
112 host_.clear(); 127 host_.clear();
113 SetPath("/*"); 128 SetPath("/*");
114 return PARSE_SUCCESS; 129 return PARSE_SUCCESS;
115 } 130 }
116 131
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 // The first component can optionally be '*' to match all subdomains. 188 // The first component can optionally be '*' to match all subdomains.
174 std::vector<std::string> host_components; 189 std::vector<std::string> host_components;
175 base::SplitString(host_, '.', &host_components); 190 base::SplitString(host_, '.', &host_components);
176 if (host_components[0] == "*") { 191 if (host_components[0] == "*") {
177 match_subdomains_ = true; 192 match_subdomains_ = true;
178 host_components.erase(host_components.begin(), 193 host_components.erase(host_components.begin(),
179 host_components.begin() + 1); 194 host_components.begin() + 1);
180 } 195 }
181 host_ = JoinString(host_components, '.'); 196 host_ = JoinString(host_components, '.');
182 197
183 // No other '*' can occur in the host, though. This isn't necessary, but is
184 // done as a convenience to developers who might otherwise be confused and
185 // think '*' works as a glob in the host.
186 if (host_.find('*') != std::string::npos)
187 return PARSE_ERROR_INVALID_HOST_WILDCARD;
188
189 path_start_pos = host_end_pos; 198 path_start_pos = host_end_pos;
190 } 199 }
191 200
192 SetPath(pattern.substr(path_start_pos)); 201 SetPath(pattern.substr(path_start_pos));
193 202
194 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos) 203 size_t port_pos = host_.find(':');
Sam Kerner (Chrome) 2011/06/28 18:12:39 Pattern http://foo.com:123/* has a host of "foo.co
Matt Perry 2011/06/28 21:37:07 I agree. It seems unlikely to matter, but to be on
Bernhard Bauer 2011/06/29 13:53:18 Done.
195 return PARSE_ERROR_HAS_COLON; 204 if (port_pos != std::string::npos) {
205 if (strictness == ERROR_ON_PORTS)
206 return PARSE_ERROR_HAS_COLON;
207
208 if (strictness == USE_PORTS) {
209 std::string port = host_.substr(port_pos + 1);
210 if (!SetPort(port))
211 return PARSE_ERROR_INVALID_PORT;
212 }
213
214 host_ = host_.substr(0, port_pos);
215 }
216
217 // No other '*' can occur in the host, though. This isn't necessary, but is
218 // done as a convenience to developers who might otherwise be confused and
219 // think '*' works as a glob in the host.
220 if (host_.find('*') != std::string::npos)
221 return PARSE_ERROR_INVALID_HOST_WILDCARD;
196 222
197 return PARSE_SUCCESS; 223 return PARSE_SUCCESS;
198 } 224 }
199 225
200 bool URLPattern::SetScheme(const std::string& scheme) { 226 bool URLPattern::SetScheme(const std::string& scheme) {
201 scheme_ = scheme; 227 scheme_ = scheme;
202 if (scheme_ == "*") { 228 if (scheme_ == "*") {
203 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); 229 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
204 } else if (!IsValidScheme(scheme_)) { 230 } else if (!IsValidScheme(scheme_)) {
205 return false; 231 return false;
(...skipping 13 matching lines...) Expand all
219 return false; 245 return false;
220 } 246 }
221 247
222 void URLPattern::SetPath(const std::string& path) { 248 void URLPattern::SetPath(const std::string& path) {
223 path_ = path; 249 path_ = path;
224 path_escaped_ = path_; 250 path_escaped_ = path_;
225 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); 251 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
226 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); 252 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
227 } 253 }
228 254
255 bool URLPattern::SetPort(const std::string& port) {
256 if (IsValidPort(port)) {
257 port_ = port;
258 return true;
259 }
260 return false;
261 }
262
229 bool URLPattern::MatchesURL(const GURL &test) const { 263 bool URLPattern::MatchesURL(const GURL &test) const {
230 if (!MatchesScheme(test.scheme())) 264 if (!MatchesScheme(test.scheme()))
231 return false; 265 return false;
232 266
233 if (match_all_urls_) 267 if (match_all_urls_)
234 return true; 268 return true;
235 269
236 // Ignore hostname if scheme is file://. 270 // Ignore hostname if scheme is file://.
237 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) 271 if (scheme_ != chrome::kFileScheme && !MatchesHost(test))
238 return false; 272 return false;
239 273
240 if (!MatchesPath(test.PathForRequest())) 274 if (!MatchesPath(test.PathForRequest()))
241 return false; 275 return false;
242 276
277 if (!MatchesPort(test.port()))
Matt Perry 2011/06/28 21:37:07 do you want to handle matching a "default port"? l
bauerb at google 2011/06/28 22:36:52 Ooh! Yes, I want that, thanks!
278 return false;
279
243 return true; 280 return true;
244 } 281 }
245 282
246 bool URLPattern::MatchesScheme(const std::string& test) const { 283 bool URLPattern::MatchesScheme(const std::string& test) const {
247 if (!IsValidScheme(test)) 284 if (!IsValidScheme(test))
248 return false; 285 return false;
249 286
250 return scheme_ == "*" || test == scheme_; 287 return scheme_ == "*" || test == scheme_;
251 } 288 }
252 289
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
289 return test.host()[test.host().length() - host_.length() - 1] == '.'; 326 return test.host()[test.host().length() - host_.length() - 1] == '.';
290 } 327 }
291 328
292 bool URLPattern::MatchesPath(const std::string& test) const { 329 bool URLPattern::MatchesPath(const std::string& test) const {
293 if (!MatchPattern(test, path_escaped_)) 330 if (!MatchPattern(test, path_escaped_))
294 return false; 331 return false;
295 332
296 return true; 333 return true;
297 } 334 }
298 335
336 bool URLPattern::MatchesPort(const std::string& test) const {
337 if (!IsValidPort(test))
Matt Perry 2011/06/28 21:37:07 this test seems unnecessary. port_ must be a valid
Bernhard Bauer 2011/06/29 13:53:18 But we don't want a port wildcard to match an inva
Matt Perry 2011/06/29 17:18:47 Oh OK, fair enough.
338 return false;
339
340 return port_ == "*" || port_ == test;
341 }
342
299 std::string URLPattern::GetAsString() const { 343 std::string URLPattern::GetAsString() const {
300 if (match_all_urls_) 344 if (match_all_urls_)
301 return kAllUrlsPattern; 345 return kAllUrlsPattern;
302 346
303 bool standard_scheme = IsStandardScheme(scheme_); 347 bool standard_scheme = IsStandardScheme(scheme_);
304 348
305 std::string spec = scheme_ + 349 std::string spec = scheme_ +
306 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); 350 (standard_scheme ? chrome::kStandardSchemeSeparator : ":");
307 351
308 if (scheme_ != chrome::kFileScheme && standard_scheme) { 352 if (scheme_ != chrome::kFileScheme && standard_scheme) {
309 if (match_subdomains_) { 353 if (match_subdomains_) {
310 spec += "*"; 354 spec += "*";
311 if (!host_.empty()) 355 if (!host_.empty())
312 spec += "."; 356 spec += ".";
313 } 357 }
314 358
315 if (!host_.empty()) 359 if (!host_.empty())
316 spec += host_; 360 spec += host_;
361
362 if (port_ != "*") {
363 spec += ":";
364 spec += port_;
365 }
317 } 366 }
318 367
319 if (!path_.empty()) 368 if (!path_.empty())
320 spec += path_; 369 spec += path_;
321 370
322 return spec; 371 return spec;
323 } 372 }
324 373
325 bool URLPattern::OverlapsWith(const URLPattern& other) const { 374 bool URLPattern::OverlapsWith(const URLPattern& other) const {
326 if (!MatchesAnyScheme(other.GetExplicitSchemes()) && 375 if (!MatchesAnyScheme(other.GetExplicitSchemes()) &&
327 !other.MatchesAnyScheme(GetExplicitSchemes())) { 376 !other.MatchesAnyScheme(GetExplicitSchemes())) {
328 return false; 377 return false;
329 } 378 }
330 379
331 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) 380 if (!MatchesHost(other.host()) && !other.MatchesHost(host_))
332 return false; 381 return false;
333 382
383 if (port_ != "*" && other.port() != "*" && port_ != other.port())
384 return false;
385
334 // We currently only use OverlapsWith() for the patterns inside 386 // We currently only use OverlapsWith() for the patterns inside
335 // URLPatternSet. In those cases, we know that the path will have only a 387 // URLPatternSet. In those cases, we know that the path will have only a
336 // single wildcard at the end. This makes figuring out overlap much easier. It 388 // single wildcard at the end. This makes figuring out overlap much easier. It
337 // seems like there is probably a computer-sciency way to solve the general 389 // seems like there is probably a computer-sciency way to solve the general
338 // case, but we don't need that yet. 390 // case, but we don't need that yet.
339 DCHECK(path_.find('*') == path_.size() - 1); 391 DCHECK(path_.find('*') == path_.size() - 1);
340 DCHECK(other.path().find('*') == other.path().size() - 1); 392 DCHECK(other.path().find('*') == other.path().size() - 1);
341 393
342 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && 394 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) &&
343 !other.MatchesPath(path_.substr(0, path_.size() - 1))) 395 !other.MatchesPath(path_.substr(0, path_.size() - 1)))
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
387 } 439 }
388 440
389 return result; 441 return result;
390 } 442 }
391 443
392 // static 444 // static
393 const char* URLPattern::GetParseResultString( 445 const char* URLPattern::GetParseResultString(
394 URLPattern::ParseResult parse_result) { 446 URLPattern::ParseResult parse_result) {
395 return kParseResultMessages[parse_result]; 447 return kParseResultMessages[parse_result];
396 } 448 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698