OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/extensions/url_pattern.h" | 5 #include "chrome/common/extensions/url_pattern.h" |
6 | 6 |
7 #include "base/string_number_conversions.h" | |
7 #include "base/string_piece.h" | 8 #include "base/string_piece.h" |
8 #include "base/string_split.h" | 9 #include "base/string_split.h" |
9 #include "base/string_util.h" | 10 #include "base/string_util.h" |
10 #include "chrome/common/url_constants.h" | 11 #include "chrome/common/url_constants.h" |
11 #include "googleurl/src/gurl.h" | 12 #include "googleurl/src/gurl.h" |
12 #include "googleurl/src/url_util.h" | 13 #include "googleurl/src/url_util.h" |
13 | 14 |
14 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; | 15 const char URLPattern::kAllUrlsPattern[] = "<all_urls>"; |
15 | 16 |
16 namespace { | 17 namespace { |
(...skipping 24 matching lines...) Expand all Loading... | |
41 | 42 |
42 const char* kParseSuccess = "Success."; | 43 const char* kParseSuccess = "Success."; |
43 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; | 44 const char* kParseErrorMissingSchemeSeparator = "Missing scheme separator."; |
44 const char* kParseErrorInvalidScheme = "Invalid scheme."; | 45 const char* kParseErrorInvalidScheme = "Invalid scheme."; |
45 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; | 46 const char* kParseErrorWrongSchemeType = "Wrong scheme type."; |
46 const char* kParseErrorEmptyHost = "Host can not be empty."; | 47 const char* kParseErrorEmptyHost = "Host can not be empty."; |
47 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; | 48 const char* kParseErrorInvalidHostWildcard = "Invalid host wildcard."; |
48 const char* kParseErrorEmptyPath = "Empty path."; | 49 const char* kParseErrorEmptyPath = "Empty path."; |
49 const char* kParseErrorHasColon = | 50 const char* kParseErrorHasColon = |
50 "Ports are not supported in URL patterns. ':' may not be used in a host."; | 51 "Ports are not supported in URL patterns. ':' may not be used in a host."; |
52 const char* kParseErrorInvalidPort = | |
53 "Invalid port."; | |
51 | 54 |
52 // Message explaining each URLPattern::ParseResult. | 55 // Message explaining each URLPattern::ParseResult. |
53 const char* kParseResultMessages[] = { | 56 const char* kParseResultMessages[] = { |
54 kParseSuccess, | 57 kParseSuccess, |
55 kParseErrorMissingSchemeSeparator, | 58 kParseErrorMissingSchemeSeparator, |
56 kParseErrorInvalidScheme, | 59 kParseErrorInvalidScheme, |
57 kParseErrorWrongSchemeType, | 60 kParseErrorWrongSchemeType, |
58 kParseErrorEmptyHost, | 61 kParseErrorEmptyHost, |
59 kParseErrorInvalidHostWildcard, | 62 kParseErrorInvalidHostWildcard, |
60 kParseErrorEmptyPath, | 63 kParseErrorEmptyPath, |
61 kParseErrorHasColon | 64 kParseErrorHasColon, |
65 kParseErrorInvalidPort, | |
62 }; | 66 }; |
63 | 67 |
64 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), | 68 COMPILE_ASSERT(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), |
65 must_add_message_for_each_parse_result); | 69 must_add_message_for_each_parse_result); |
66 | 70 |
67 const char kPathSeparator[] = "/"; | 71 const char kPathSeparator[] = "/"; |
68 | 72 |
69 bool IsStandardScheme(const std::string& scheme) { | 73 bool IsStandardScheme(const std::string& scheme) { |
70 // "*" gets the same treatment as a standard scheme. | 74 // "*" gets the same treatment as a standard scheme. |
71 if (scheme == "*") | 75 if (scheme == "*") |
72 return true; | 76 return true; |
73 | 77 |
74 return url_util::IsStandard(scheme.c_str(), | 78 return url_util::IsStandard(scheme.c_str(), |
75 url_parse::Component(0, static_cast<int>(scheme.length()))); | 79 url_parse::Component(0, static_cast<int>(scheme.length()))); |
76 } | 80 } |
77 | 81 |
82 bool IsValidPort(const std::string& port) { | |
83 if (port.empty() || port == "*") | |
84 return true; | |
85 int parsed_port; | |
Sam Kerner (Chrome)
2011/06/28 18:12:39
I think you need to set an intial value here (int
bauerb at google
2011/06/28 22:36:52
No, StringToInt always sets its argument (to zero
Sam Kerner (Chrome)
2011/06/29 13:36:34
I am not saying your code is wrong. I am saying t
Bernhard Bauer
2011/06/29 13:53:18
Hm, fair enough.
| |
86 if (!base::StringToInt(port, &parsed_port)) | |
87 return false; | |
88 return (parsed_port >= 0) && (parsed_port < 65536); | |
89 } | |
90 | |
78 } // namespace | 91 } // namespace |
79 | 92 |
80 URLPattern::URLPattern() | 93 URLPattern::URLPattern() |
81 : valid_schemes_(SCHEME_NONE), | 94 : valid_schemes_(SCHEME_NONE), |
82 match_all_urls_(false), | 95 match_all_urls_(false), |
83 match_subdomains_(false) {} | 96 match_subdomains_(false), |
97 port_("*") {} | |
84 | 98 |
85 URLPattern::URLPattern(int valid_schemes) | 99 URLPattern::URLPattern(int valid_schemes) |
86 : valid_schemes_(valid_schemes), match_all_urls_(false), | 100 : valid_schemes_(valid_schemes), |
87 match_subdomains_(false) {} | 101 match_all_urls_(false), |
102 match_subdomains_(false), | |
103 port_("*") {} | |
88 | 104 |
89 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) | 105 URLPattern::URLPattern(int valid_schemes, const std::string& pattern) |
90 : valid_schemes_(valid_schemes), match_all_urls_(false), | 106 : valid_schemes_(valid_schemes), |
91 match_subdomains_(false) { | 107 match_all_urls_(false), |
108 match_subdomains_(false), | |
109 port_("*") { | |
92 | 110 |
93 // Strict error checking is used, because this constructor is only | 111 // Strict error checking is used, because this constructor is only |
94 // appropriate when we know |pattern| is valid. | 112 // appropriate when we know |pattern| is valid. |
95 if (PARSE_SUCCESS != Parse(pattern, PARSE_STRICT)) | 113 if (PARSE_SUCCESS != Parse(pattern, ERROR_ON_PORTS)) |
96 NOTREACHED() << "URLPattern is invalid: " << pattern; | 114 NOTREACHED() << "URLPattern is invalid: " << pattern; |
97 } | 115 } |
98 | 116 |
99 URLPattern::~URLPattern() { | 117 URLPattern::~URLPattern() { |
100 } | 118 } |
101 | 119 |
102 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, | 120 URLPattern::ParseResult URLPattern::Parse(const std::string& pattern, |
103 ParseOption strictness) { | 121 ParseOption strictness) { |
104 CHECK(strictness == PARSE_LENIENT || | |
105 strictness == PARSE_STRICT); | |
106 | |
107 // Special case pattern to match every valid URL. | 122 // Special case pattern to match every valid URL. |
108 if (pattern == kAllUrlsPattern) { | 123 if (pattern == kAllUrlsPattern) { |
109 match_all_urls_ = true; | 124 match_all_urls_ = true; |
110 match_subdomains_ = true; | 125 match_subdomains_ = true; |
111 scheme_ = "*"; | 126 scheme_ = "*"; |
112 host_.clear(); | 127 host_.clear(); |
113 SetPath("/*"); | 128 SetPath("/*"); |
114 return PARSE_SUCCESS; | 129 return PARSE_SUCCESS; |
115 } | 130 } |
116 | 131 |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
173 // The first component can optionally be '*' to match all subdomains. | 188 // The first component can optionally be '*' to match all subdomains. |
174 std::vector<std::string> host_components; | 189 std::vector<std::string> host_components; |
175 base::SplitString(host_, '.', &host_components); | 190 base::SplitString(host_, '.', &host_components); |
176 if (host_components[0] == "*") { | 191 if (host_components[0] == "*") { |
177 match_subdomains_ = true; | 192 match_subdomains_ = true; |
178 host_components.erase(host_components.begin(), | 193 host_components.erase(host_components.begin(), |
179 host_components.begin() + 1); | 194 host_components.begin() + 1); |
180 } | 195 } |
181 host_ = JoinString(host_components, '.'); | 196 host_ = JoinString(host_components, '.'); |
182 | 197 |
183 // No other '*' can occur in the host, though. This isn't necessary, but is | |
184 // done as a convenience to developers who might otherwise be confused and | |
185 // think '*' works as a glob in the host. | |
186 if (host_.find('*') != std::string::npos) | |
187 return PARSE_ERROR_INVALID_HOST_WILDCARD; | |
188 | |
189 path_start_pos = host_end_pos; | 198 path_start_pos = host_end_pos; |
190 } | 199 } |
191 | 200 |
192 SetPath(pattern.substr(path_start_pos)); | 201 SetPath(pattern.substr(path_start_pos)); |
193 | 202 |
194 if (strictness == PARSE_STRICT && host_.find(':') != std::string::npos) | 203 size_t port_pos = host_.find(':'); |
Sam Kerner (Chrome)
2011/06/28 18:12:39
Pattern http://foo.com:123/* has a host of "foo.co
Matt Perry
2011/06/28 21:37:07
I agree. It seems unlikely to matter, but to be on
Bernhard Bauer
2011/06/29 13:53:18
Done.
| |
195 return PARSE_ERROR_HAS_COLON; | 204 if (port_pos != std::string::npos) { |
205 if (strictness == ERROR_ON_PORTS) | |
206 return PARSE_ERROR_HAS_COLON; | |
207 | |
208 if (strictness == USE_PORTS) { | |
209 std::string port = host_.substr(port_pos + 1); | |
210 if (!SetPort(port)) | |
211 return PARSE_ERROR_INVALID_PORT; | |
212 } | |
213 | |
214 host_ = host_.substr(0, port_pos); | |
215 } | |
216 | |
217 // No other '*' can occur in the host, though. This isn't necessary, but is | |
218 // done as a convenience to developers who might otherwise be confused and | |
219 // think '*' works as a glob in the host. | |
220 if (host_.find('*') != std::string::npos) | |
221 return PARSE_ERROR_INVALID_HOST_WILDCARD; | |
196 | 222 |
197 return PARSE_SUCCESS; | 223 return PARSE_SUCCESS; |
198 } | 224 } |
199 | 225 |
200 bool URLPattern::SetScheme(const std::string& scheme) { | 226 bool URLPattern::SetScheme(const std::string& scheme) { |
201 scheme_ = scheme; | 227 scheme_ = scheme; |
202 if (scheme_ == "*") { | 228 if (scheme_ == "*") { |
203 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); | 229 valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); |
204 } else if (!IsValidScheme(scheme_)) { | 230 } else if (!IsValidScheme(scheme_)) { |
205 return false; | 231 return false; |
(...skipping 13 matching lines...) Expand all Loading... | |
219 return false; | 245 return false; |
220 } | 246 } |
221 | 247 |
222 void URLPattern::SetPath(const std::string& path) { | 248 void URLPattern::SetPath(const std::string& path) { |
223 path_ = path; | 249 path_ = path; |
224 path_escaped_ = path_; | 250 path_escaped_ = path_; |
225 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); | 251 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); |
226 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); | 252 ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); |
227 } | 253 } |
228 | 254 |
255 bool URLPattern::SetPort(const std::string& port) { | |
256 if (IsValidPort(port)) { | |
257 port_ = port; | |
258 return true; | |
259 } | |
260 return false; | |
261 } | |
262 | |
229 bool URLPattern::MatchesURL(const GURL &test) const { | 263 bool URLPattern::MatchesURL(const GURL &test) const { |
230 if (!MatchesScheme(test.scheme())) | 264 if (!MatchesScheme(test.scheme())) |
231 return false; | 265 return false; |
232 | 266 |
233 if (match_all_urls_) | 267 if (match_all_urls_) |
234 return true; | 268 return true; |
235 | 269 |
236 // Ignore hostname if scheme is file://. | 270 // Ignore hostname if scheme is file://. |
237 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) | 271 if (scheme_ != chrome::kFileScheme && !MatchesHost(test)) |
238 return false; | 272 return false; |
239 | 273 |
240 if (!MatchesPath(test.PathForRequest())) | 274 if (!MatchesPath(test.PathForRequest())) |
241 return false; | 275 return false; |
242 | 276 |
277 if (!MatchesPort(test.port())) | |
Matt Perry
2011/06/28 21:37:07
do you want to handle matching a "default port"? l
bauerb at google
2011/06/28 22:36:52
Ooh! Yes, I want that, thanks!
| |
278 return false; | |
279 | |
243 return true; | 280 return true; |
244 } | 281 } |
245 | 282 |
246 bool URLPattern::MatchesScheme(const std::string& test) const { | 283 bool URLPattern::MatchesScheme(const std::string& test) const { |
247 if (!IsValidScheme(test)) | 284 if (!IsValidScheme(test)) |
248 return false; | 285 return false; |
249 | 286 |
250 return scheme_ == "*" || test == scheme_; | 287 return scheme_ == "*" || test == scheme_; |
251 } | 288 } |
252 | 289 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
289 return test.host()[test.host().length() - host_.length() - 1] == '.'; | 326 return test.host()[test.host().length() - host_.length() - 1] == '.'; |
290 } | 327 } |
291 | 328 |
292 bool URLPattern::MatchesPath(const std::string& test) const { | 329 bool URLPattern::MatchesPath(const std::string& test) const { |
293 if (!MatchPattern(test, path_escaped_)) | 330 if (!MatchPattern(test, path_escaped_)) |
294 return false; | 331 return false; |
295 | 332 |
296 return true; | 333 return true; |
297 } | 334 } |
298 | 335 |
336 bool URLPattern::MatchesPort(const std::string& test) const { | |
337 if (!IsValidPort(test)) | |
Matt Perry
2011/06/28 21:37:07
this test seems unnecessary. port_ must be a valid
Bernhard Bauer
2011/06/29 13:53:18
But we don't want a port wildcard to match an inva
Matt Perry
2011/06/29 17:18:47
Oh OK, fair enough.
| |
338 return false; | |
339 | |
340 return port_ == "*" || port_ == test; | |
341 } | |
342 | |
299 std::string URLPattern::GetAsString() const { | 343 std::string URLPattern::GetAsString() const { |
300 if (match_all_urls_) | 344 if (match_all_urls_) |
301 return kAllUrlsPattern; | 345 return kAllUrlsPattern; |
302 | 346 |
303 bool standard_scheme = IsStandardScheme(scheme_); | 347 bool standard_scheme = IsStandardScheme(scheme_); |
304 | 348 |
305 std::string spec = scheme_ + | 349 std::string spec = scheme_ + |
306 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); | 350 (standard_scheme ? chrome::kStandardSchemeSeparator : ":"); |
307 | 351 |
308 if (scheme_ != chrome::kFileScheme && standard_scheme) { | 352 if (scheme_ != chrome::kFileScheme && standard_scheme) { |
309 if (match_subdomains_) { | 353 if (match_subdomains_) { |
310 spec += "*"; | 354 spec += "*"; |
311 if (!host_.empty()) | 355 if (!host_.empty()) |
312 spec += "."; | 356 spec += "."; |
313 } | 357 } |
314 | 358 |
315 if (!host_.empty()) | 359 if (!host_.empty()) |
316 spec += host_; | 360 spec += host_; |
361 | |
362 if (port_ != "*") { | |
363 spec += ":"; | |
364 spec += port_; | |
365 } | |
317 } | 366 } |
318 | 367 |
319 if (!path_.empty()) | 368 if (!path_.empty()) |
320 spec += path_; | 369 spec += path_; |
321 | 370 |
322 return spec; | 371 return spec; |
323 } | 372 } |
324 | 373 |
325 bool URLPattern::OverlapsWith(const URLPattern& other) const { | 374 bool URLPattern::OverlapsWith(const URLPattern& other) const { |
326 if (!MatchesAnyScheme(other.GetExplicitSchemes()) && | 375 if (!MatchesAnyScheme(other.GetExplicitSchemes()) && |
327 !other.MatchesAnyScheme(GetExplicitSchemes())) { | 376 !other.MatchesAnyScheme(GetExplicitSchemes())) { |
328 return false; | 377 return false; |
329 } | 378 } |
330 | 379 |
331 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) | 380 if (!MatchesHost(other.host()) && !other.MatchesHost(host_)) |
332 return false; | 381 return false; |
333 | 382 |
383 if (port_ != "*" && other.port() != "*" && port_ != other.port()) | |
384 return false; | |
385 | |
334 // We currently only use OverlapsWith() for the patterns inside | 386 // We currently only use OverlapsWith() for the patterns inside |
335 // URLPatternSet. In those cases, we know that the path will have only a | 387 // URLPatternSet. In those cases, we know that the path will have only a |
336 // single wildcard at the end. This makes figuring out overlap much easier. It | 388 // single wildcard at the end. This makes figuring out overlap much easier. It |
337 // seems like there is probably a computer-sciency way to solve the general | 389 // seems like there is probably a computer-sciency way to solve the general |
338 // case, but we don't need that yet. | 390 // case, but we don't need that yet. |
339 DCHECK(path_.find('*') == path_.size() - 1); | 391 DCHECK(path_.find('*') == path_.size() - 1); |
340 DCHECK(other.path().find('*') == other.path().size() - 1); | 392 DCHECK(other.path().find('*') == other.path().size() - 1); |
341 | 393 |
342 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && | 394 if (!MatchesPath(other.path().substr(0, other.path().size() - 1)) && |
343 !other.MatchesPath(path_.substr(0, path_.size() - 1))) | 395 !other.MatchesPath(path_.substr(0, path_.size() - 1))) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
387 } | 439 } |
388 | 440 |
389 return result; | 441 return result; |
390 } | 442 } |
391 | 443 |
392 // static | 444 // static |
393 const char* URLPattern::GetParseResultString( | 445 const char* URLPattern::GetParseResultString( |
394 URLPattern::ParseResult parse_result) { | 446 URLPattern::ParseResult parse_result) { |
395 return kParseResultMessages[parse_result]; | 447 return kParseResultMessages[parse_result]; |
396 } | 448 } |
OLD | NEW |