 Chromium Code Reviews
 Chromium Code Reviews Issue 7229012:
  Use extension match pattern syntax in content settings extension API  (Closed) 
  Base URL: svn://svn.chromium.org/chrome/trunk/src
    
  
    Issue 7229012:
  Use extension match pattern syntax in content settings extension API  (Closed) 
  Base URL: svn://svn.chromium.org/chrome/trunk/src| OLD | NEW | 
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 #include "chrome/common/extensions/url_pattern.h" | 5 #include "chrome/common/extensions/url_pattern.h" | 
| 6 | 6 | 
| 7 #include "base/string_number_conversions.h" | |
| 7 #include "base/string_piece.h" | 8 #include "base/string_piece.h" | 
| 8 #include "base/string_split.h" | 9 #include "base/string_split.h" | 
| 9 #include "base/string_util.h" | 10 #include "base/string_util.h" | 
| 10 #include "chrome/common/url_constants.h" | 11 #include "chrome/common/url_constants.h" | 
| 11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" | 
| 12 #include "googleurl/src/url_util.h" | 13 #include "googleurl/src/url_util.h" | 
| 13 | 14 | 
| 14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; | 15 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; | 
| 15 | 16 | 
| 16 namespace { | 17 namespace { | 
| (...skipping 24 matching lines...) Expand all Loading... | |
| 41 | 42 | 
| 42 const char* kParseSuccess = "Success."; | 43 const char* kParseSuccess = "Success."; | 
| 43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; | 44 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; | 
| 44 const char* kParseErrorInvalidScheme = "Invalid scheme."; | 45 const char* kParseErrorInvalidScheme = "Invalid scheme."; | 
| 45 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; | 46 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; | 
| 46 const char* kParseErrorEmptyHost = "Host can not be empty."; | 47 const char* kParseErrorEmptyHost = "Host can not be empty."; | 
| 47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; | 48 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; | 
| 48 const char* kParseErrorEmptyPath = "Empty path."; | 49 const char* kParseErrorEmptyPath = "Empty path."; | 
| 49 const char* kParseErrorHasColon = | 50 const char* kParseErrorHasColon = | 
| 50 "Ports are not supported in URL patterns. ':' may not be used in a host."; | 51 "Ports are not supported in URL patterns. ':' may not be used in a host."; | 
| 52 const char* kParseErrorInvalidPort = | |
| 53 "Invalid port."; | |
| 51 | 54 | 
| 52 // Message explaining each URLPattern::ParseResult. | 55 // Message explaining each URLPattern::ParseResult. | 
| 53 const char* kParseResultMessages[] = { | 56 const char* kParseResultMessages[] = { | 
| 54 kParseSuccess, | 57 kParseSuccess, | 
| 55 kParseErrorMissingSchemeSeparator, | 58 kParseErrorMissingSchemeSeparator, | 
| 56 kParseErrorInvalidScheme, | 59 kParseErrorInvalidScheme, | 
| 57 kParseErrorWrongSchemeType, | 60 kParseErrorWrongSchemeType, | 
| 58 kParseErrorEmptyHost, | 61 kParseErrorEmptyHost, | 
| 59 kParseErrorInvalidHostWildcard, | 62 kParseErrorInvalidHostWildcard, | 
| 60 kParseErrorEmptyPath, | 63 kParseErrorEmptyPath, | 
| 61 kParseErrorHasColon | 64 kParseErrorHasColon, | 
| 65 kParseErrorInvalidPort, | |
| 62 }; | 66 }; | 
| 63 | 67 | 
| 64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), | 68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), | 
| 65 must_add_message_for_each_parse_result); | 69 must_add_message_for_each_parse_result); | 
| 66 | 70 | 
| 67 const char kPathSeparator[] = "/"; | 71 const char kPathSeparator[] = "/"; | 
| 68 | 72 | 
| 69 bool IsStandardScheme(const std::string& scheme) { | 73 bool IsStandardScheme(const std::string& scheme) { | 
| 70 // "*" gets the same treatment as a standard scheme. | 74 // "*" gets the same treatment as a standard scheme. | 
| 71 if (scheme == "*") | 75 if (scheme == "*") | 
| 72 return true; | 76 return true; | 
| 73 | 77 | 
| 74 return url_util::IsStandard(scheme.c_str(), | 78 return url_util::IsStandard(scheme.c_str(), | 
| 75 url_parse::Component(0, static_cast<int>(scheme.length()))); | 79 url_parse::Component(0, static_cast<int>(scheme.length()))); | 
| 76 } | 80 } | 
| 77 | 81 | 
| 82 bool IsValidPort(const std::string& port) { | |
| 83 if (port.empty() || port == "*") | |
| 84 return true; | |
| 85 int parsed_port; | |
| 
Sam Kerner (Chrome)
2011/06/28 18:12:39
I think you need to set an intial value here (int
 
bauerb at google
2011/06/28 22:36:52
No, StringToInt always sets its argument (to zero
 
Sam Kerner (Chrome)
2011/06/29 13:36:34
I am not saying your code is wrong.  I am saying t
 
Bernhard Bauer
2011/06/29 13:53:18
Hm, fair enough.
 | |
| 86 if (!base::StringToInt(port, &parsed_port)) | |
| 87 return false; | |
| 88 return (parsed_port >= 0) && (parsed_port < 65536); | |
| 89 } | |
| 90 | |
| 78 } // namespace | 91 } // namespace | 
| 79 | 92 | 
| 80 URLPattern::URLPattern() | 93 URLPattern::URLPattern() | 
| 81 : valid_schemes_(SCHEME_NONE), | 94 : valid_schemes_(SCHEME_NONE), | 
| 82 match_all_urls_(false), | 95 match_all_urls_(false), | 
| 83 match_subdomains_(false) {} | 96 match_subdomains_(false), | 
| 97 port_("*") {} | |
| 84 | 98 | 
| 85 URLPattern::URLPattern(int valid_schemes) | 99 URLPattern::URLPattern(int valid_schemes) | 
| 86 : valid_schemes_(valid_schemes), match_all_urls_(false), | 100 : valid_schemes_(valid_schemes), | 
| 87 match_subdomains_(false) {} | 101 match_all_urls_(false), | 
| 102 match_subdomains_(false), | |
| 103 port_("*") {} | |
| 88 | 104 | 
| 89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) | 105 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) | 
| 90 : valid_schemes_(valid_schemes), match_all_urls_(false), | 106 : valid_schemes_(valid_schemes), | 
| 91 match_subdomains_(false) { | 107 match_all_urls_(false), | 
| 108 match_subdomains_(false), | |
| 109 port_("*") { | |
| 92 | 110 | 
| 93 // Strict error checking is used, because this constructor is only | 111 // Strict error checking is used, because this constructor is only | 
| 94 // appropriate when we know |pattern| is valid. | 112 // appropriate when we know |pattern| is valid. | 
| 95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT)) | 113 if (PARSE_SUCCESS != Parse(pattern, ERROR_ON_PORTS)) | 
| 96 NOTREACHED() << "URLPattern is invalid: " << pattern; | 114 NOTREACHED() << "URLPattern is invalid: " << pattern; | 
| 97 } | 115 } | 
| 98 | 116 | 
| 99 URLPattern::~URLPattern() { | 117 URLPattern::~URLPattern() { | 
| 100 } | 118 } | 
| 101 | 119 | 
| 102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, | 120 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, | 
| 103 ParseOption strictness) { | 121 ParseOption strictness) { | 
| 104 CHECK(strictness == PARSE_LENIENT || | |
| 105 strictness == PARSE_STRICT); | |
| 106 | |
| 107 // Special case pattern to match every valid URL. | 122 // Special case pattern to match every valid URL. | 
| 108 if (pattern == kAllUrlsPattern) { | 123 if (pattern == kAllUrlsPattern) { | 
| 109 match_all_urls_ = true; | 124 match_all_urls_ = true; | 
| 110 match_subdomains_ = true; | 125 match_subdomains_ = true; | 
| 111 scheme_ = "*"; | 126 scheme_ = "*"; | 
| 112 host_.clear(); | 127 host_.clear(); | 
| 113 SetPath("/*"); | 128 SetPath("/*"); | 
| 114 return PARSE_SUCCESS; | 129 return PARSE_SUCCESS; | 
| 115 } | 130 } | 
| 116 | 131 | 
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 173 // The first component can optionally be '*' to match all subdomains. | 188 // The first component can optionally be '*' to match all subdomains. | 
| 174 std::vector<std::string> host_components; | 189 std::vector<std::string> host_components; | 
| 175 base::SplitString(host_, '.', &host_components); | 190 base::SplitString(host_, '.', &host_components); | 
| 176 if (host_components[0] == "*") { | 191 if (host_components[0] == "*") { | 
| 177 match_subdomains_ = true; | 192 match_subdomains_ = true; | 
| 178 host_components.erase(host_components.begin(), | 193 host_components.erase(host_components.begin(), | 
| 179 host_components.begin() + 1); | 194 host_components.begin() + 1); | 
| 180 } | 195 } | 
| 181 host_ = JoinString(host_components, '.'); | 196 host_ = JoinString(host_components, '.'); | 
| 182 | 197 | 
| 183 // No other '*' can occur in the host, though. This isn't necessary, but is | |
| 184 // done as a convenience to developers who might otherwise be confused and | |
| 185 // think '*' works as a glob in the host. | |
| 186 if (host_.find('*') != std::string::npos) | |
| 187 return PARSE_ERROR_INVALID_HOST_WILDCARD; | |
| 188 | |
| 189 path_start_pos = host_end_pos; | 198 path_start_pos = host_end_pos; | 
| 190 } | 199 } | 
| 191 | 200 | 
| 192 SetPath(pattern.substr(path_start_pos)); | 201 SetPath(pattern.substr(path_start_pos)); | 
| 193 | 202 | 
| 194 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos) | 203 size_t port_pos = host_.find(':'); | 
| 
Sam Kerner (Chrome)
2011/06/28 18:12:39
Pattern http://foo.com:123/* has a host of "foo.co
 
Matt Perry
2011/06/28 21:37:07
I agree. It seems unlikely to matter, but to be on
 
Bernhard Bauer
2011/06/29 13:53:18
Done.
 | |
| 195 return PARSE_ERROR_HAS_COLON; | 204 if (port_pos != std::string::npos) { | 
| 205 if (strictness == ERROR_ON_PORTS) | |
| 206 return PARSE_ERROR_HAS_COLON; | |
| 207 | |
| 208 if (strictness == USE_PORTS) { | |
| 209 std::string port = host_.substr(port_pos + 1); | |
| 210 if (!SetPort(port)) | |
| 211 return PARSE_ERROR_INVALID_PORT; | |
| 212 } | |
| 213 | |
| 214 host_ = host_.substr(0, port_pos); | |
| 215 } | |
| 216 | |
| 217 // No other '*' can occur in the host, though. This isn't necessary, but is | |
| 218 // done as a convenience to developers who might otherwise be confused and | |
| 219 // think '*' works as a glob in the host. | |
| 220 if (host_.find('*') != std::string::npos) | |
| 221 return PARSE_ERROR_INVALID_HOST_WILDCARD; | |
| 196 | 222 | 
| 197 return PARSE_SUCCESS; | 223 return PARSE_SUCCESS; | 
| 198 } | 224 } | 
| 199 | 225 | 
| 200 bool URLPattern::SetScheme(const std::string& scheme) { | 226 bool URLPattern::SetScheme(const std::string& scheme) { | 
| 201 scheme_ = scheme; | 227 scheme_ = scheme; | 
| 202 if (scheme_ == "*") { | 228 if (scheme_ == "*") { | 
| 203 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); | 229 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); | 
| 204 } else if (!IsValidScheme(scheme_)) { | 230 } else if (!IsValidScheme(scheme_)) { | 
| 205 return false; | 231 return false; | 
| (...skipping 13 matching lines...) Expand all Loading... | |
| 219 return false; | 245 return false; | 
| 220 } | 246 } | 
| 221 | 247 | 
| 222 void URLPattern::SetPath(const std::string& path) { | 248 void URLPattern::SetPath(const std::string& path) { | 
| 223 path_ = path; | 249 path_ = path; | 
| 224 path_escaped_ = path_; | 250 path_escaped_ = path_; | 
| 225 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); | 251 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); | 
| 226 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); | 252 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); | 
| 227 } | 253 } | 
| 228 | 254 | 
| 255 bool URLPattern::SetPort(const std::string& port) { | |
| 256 if (IsValidPort(port)) { | |
| 257 port_ = port; | |
| 258 return true; | |
| 259 } | |
| 260 return false; | |
| 261 } | |
| 262 | |
| 229 bool URLPattern::MatchesURL(const GURL &test) const { | 263 bool URLPattern::MatchesURL(const GURL &test) const { | 
| 230 if (!MatchesScheme(test.scheme())) | 264 if (!MatchesScheme(test.scheme())) | 
| 231 return false; | 265 return false; | 
| 232 | 266 | 
| 233 if (match_all_urls_) | 267 if (match_all_urls_) | 
| 234 return true; | 268 return true; | 
| 235 | 269 | 
| 236 // Ignore hostname if scheme is file://. | 270 // Ignore hostname if scheme is file://. | 
| 237 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) | 271 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) | 
| 238 return false; | 272 return false; | 
| 239 | 273 | 
| 240 if (!MatchesPath(test.PathForRequest())) | 274 if (!MatchesPath(test.PathForRequest())) | 
| 241 return false; | 275 return false; | 
| 242 | 276 | 
| 277 if (!MatchesPort(test.port())) | |
| 
Matt Perry
2011/06/28 21:37:07
do you want to handle matching a "default port"? l
 
bauerb at google
2011/06/28 22:36:52
Ooh! Yes, I want that, thanks!
 | |
| 278 return false; | |
| 279 | |
| 243 return true; | 280 return true; | 
| 244 } | 281 } | 
| 245 | 282 | 
| 246 bool URLPattern::MatchesScheme(const std::string& test) const { | 283 bool URLPattern::MatchesScheme(const std::string& test) const { | 
| 247 if (!IsValidScheme(test)) | 284 if (!IsValidScheme(test)) | 
| 248 return false; | 285 return false; | 
| 249 | 286 | 
| 250 return scheme_ == "*" || test == scheme_; | 287 return scheme_ == "*" || test == scheme_; | 
| 251 } | 288 } | 
| 252 | 289 | 
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 289 return test.host()[test.host().length() - host_.length() - 1] == '.'; | 326 return test.host()[test.host().length() - host_.length() - 1] == '.'; | 
| 290 } | 327 } | 
| 291 | 328 | 
| 292 bool URLPattern::MatchesPath(const std::string& test) const { | 329 bool URLPattern::MatchesPath(const std::string& test) const { | 
| 293 if (!MatchPattern(test, path_escaped_)) | 330 if (!MatchPattern(test, path_escaped_)) | 
| 294 return false; | 331 return false; | 
| 295 | 332 | 
| 296 return true; | 333 return true; | 
| 297 } | 334 } | 
| 298 | 335 | 
| 336 bool URLPattern::MatchesPort(const std::string& test) const { | |
| 337 if (!IsValidPort(test)) | |
| 
Matt Perry
2011/06/28 21:37:07
this test seems unnecessary. port_ must be a valid
 
Bernhard Bauer
2011/06/29 13:53:18
But we don't want a port wildcard to match an inva
 
Matt Perry
2011/06/29 17:18:47
Oh OK, fair enough.
 | |
| 338 return false; | |
| 339 | |
| 340 return port_ == "*" || port_ == test; | |
| 341 } | |
| 342 | |
| 299 std::string URLPattern::GetAsString() const { | 343 std::string URLPattern::GetAsString() const { | 
| 300 if (match_all_urls_) | 344 if (match_all_urls_) | 
| 301 return kAllUrlsPattern; | 345 return kAllUrlsPattern; | 
| 302 | 346 | 
| 303 bool standard_scheme = IsStandardScheme(scheme_); | 347 bool standard_scheme = IsStandardScheme(scheme_); | 
| 304 | 348 | 
| 305 std::string spec = scheme_ + | 349 std::string spec = scheme_ + | 
| 306 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); | 350 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); | 
| 307 | 351 | 
| 308 if (scheme_ != chrome::kFileScheme && standard_scheme) { | 352 if (scheme_ != chrome::kFileScheme && standard_scheme) { | 
| 309 if (match_subdomains_) { | 353 if (match_subdomains_) { | 
| 310 spec += "*"; | 354 spec += "*"; | 
| 311 if (!host_.empty()) | 355 if (!host_.empty()) | 
| 312 spec += "."; | 356 spec += "."; | 
| 313 } | 357 } | 
| 314 | 358 | 
| 315 if (!host_.empty()) | 359 if (!host_.empty()) | 
| 316 spec += host_; | 360 spec += host_; | 
| 361 | |
| 362 if (port_ != "*") { | |
| 363 spec += ":"; | |
| 364 spec += port_; | |
| 365 } | |
| 317 } | 366 } | 
| 318 | 367 | 
| 319 if (!path_.empty()) | 368 if (!path_.empty()) | 
| 320 spec += path_; | 369 spec += path_; | 
| 321 | 370 | 
| 322 return spec; | 371 return spec; | 
| 323 } | 372 } | 
| 324 | 373 | 
| 325 bool URLPattern::OverlapsWith(const URLPattern& other) const { | 374 bool URLPattern::OverlapsWith(const URLPattern& other) const { | 
| 326 if (!MatchesAnyScheme(other.GetExplicitSchemes()) && | 375 if (!MatchesAnyScheme(other.GetExplicitSchemes()) && | 
| 327 !other.MatchesAnyScheme(GetExplicitSchemes())) { | 376 !other.MatchesAnyScheme(GetExplicitSchemes())) { | 
| 328 return false; | 377 return false; | 
| 329 } | 378 } | 
| 330 | 379 | 
| 331 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) | 380 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) | 
| 332 return false; | 381 return false; | 
| 333 | 382 | 
| 383 if (port_ != "*" && other.port() != "*" && port_ != other.port()) | |
| 384 return false; | |
| 385 | |
| 334 // We currently only use OverlapsWith() for the patterns inside | 386 // We currently only use OverlapsWith() for the patterns inside | 
| 335 // URLPatternSet. In those cases, we know that the path will have only a | 387 // URLPatternSet. In those cases, we know that the path will have only a | 
| 336 // single wildcard at the end. This makes figuring out overlap much easier. It | 388 // single wildcard at the end. This makes figuring out overlap much easier. It | 
| 337 // seems like there is probably a computer-sciency way to solve the general | 389 // seems like there is probably a computer-sciency way to solve the general | 
| 338 // case, but we don't need that yet. | 390 // case, but we don't need that yet. | 
| 339 DCHECK(path_.find('*') == path_.size() - 1); | 391 DCHECK(path_.find('*') == path_.size() - 1); | 
| 340 DCHECK(other.path().find('*') == other.path().size() - 1); | 392 DCHECK(other.path().find('*') == other.path().size() - 1); | 
| 341 | 393 | 
| 342 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && | 394 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && | 
| 343 !other.MatchesPath(path_.substr(0, path_.size() - 1))) | 395 !other.MatchesPath(path_.substr(0, path_.size() - 1))) | 
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 387 } | 439 } | 
| 388 | 440 | 
| 389 return result; | 441 return result; | 
| 390 } | 442 } | 
| 391 | 443 | 
| 392 // static | 444 // static | 
| 393 const char* URLPattern::GetParseResultString( | 445 const char* URLPattern::GetParseResultString( | 
| 394 URLPattern::ParseResult parse_result) { | 446 URLPattern::ParseResult parse_result) { | 
| 395 return kParseResultMessages[parse_result]; | 447 return kParseResultMessages[parse_result]; | 
| 396 } | 448 } | 
| OLD | NEW |