OLD | NEW |
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 #ifndef CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ | 4 #ifndef CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ |
5 #define CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ | 5 #define CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 | 8 |
9 #include "googleurl/src/gurl.h" | 9 #include "googleurl/src/gurl.h" |
10 | 10 |
11 // A pattern that can be used to match URLs. A URLPattern is a very restricted | 11 // A pattern that can be used to match URLs. A URLPattern is a very restricted |
12 // subset of URL syntax: | 12 // subset of URL syntax: |
13 // | 13 // |
14 // <url-pattern> := <scheme>://<host><path> | 14 // <url-pattern> := <scheme>://<host><path> |
15 // <scheme> := 'http' | 'https' | 'file' | 'ftp' | 'chrome' | 15 // <scheme> := 'http' | 'https' | 'file' | 'ftp' | 'chrome' |
16 // <host> := '*' | '*.' <anychar except '/' and '*'>+ | 16 // <host> := '*' | '*.' <anychar except '/' and '*'>+ |
17 // <path> := '/' <any chars> | 17 // <path> := '/' <any chars> |
18 // | 18 // |
19 // * Host is not used when the scheme is 'file'. | 19 // * Host is not used when the scheme is 'file'. |
20 // * The path can have embedded '*' characters which act as glob wildcards. | 20 // * The path can have embedded '*' characters which act as glob wildcards. |
21 // | 21 // |
22 // Examples of valid patterns: | 22 // Examples of valid patterns: |
23 // - http://*/* | 23 // - http://*/* |
24 // - http://*/foo* | 24 // - http://*/foo* |
25 // - https://*.google.com/foo*bar | 25 // - https://*.google.com/foo*bar |
26 // - chrome://foo/bar | |
27 // - file://monkey* | 26 // - file://monkey* |
28 // - http://127.0.0.1/* | 27 // - http://127.0.0.1/* |
29 // | 28 // |
30 // Examples of invalid patterns: | 29 // Examples of invalid patterns: |
31 // - http://* -- path not specified | 30 // - http://* -- path not specified |
32 // - http://*foo/bar -- * not allowed as substring of host component | 31 // - http://*foo/bar -- * not allowed as substring of host component |
33 // - http://foo.*.bar/baz -- * must be first component | 32 // - http://foo.*.bar/baz -- * must be first component |
34 // - http:/bar -- scheme separator not found | 33 // - http:/bar -- scheme separator not found |
35 // - foo://* -- invalid scheme | 34 // - foo://* -- invalid scheme |
| 35 // - chrome:// -- we don't support chrome internal URLs |
36 // | 36 // |
37 // Design rationale: | 37 // Design rationale: |
38 // * We need to be able to tell users what 'sites' a given URLPattern will | 38 // * We need to be able to tell users what 'sites' a given URLPattern will |
39 // affect. For example "This extension will interact with the site | 39 // affect. For example "This extension will interact with the site |
40 // 'www.google.com'. | 40 // 'www.google.com'. |
41 // * We'd like to be able to convert as many existing Greasemonkey @include | 41 // * We'd like to be able to convert as many existing Greasemonkey @include |
42 // patterns to URLPatterns as possible. Greasemonkey @include patterns are | 42 // patterns to URLPatterns as possible. Greasemonkey @include patterns are |
43 // simple globs, so this won't be perfect. | 43 // simple globs, so this won't be perfect. |
44 // * Although we would like to support any scheme, it isn't clear what to tell | 44 // * Although we would like to support any scheme, it isn't clear what to tell |
45 // users about URLPatterns that affect data or javascript URLs, and saying | 45 // users about URLPatterns that affect data or javascript URLs, so those are |
46 // something useful about chrome-extension URLs is more work, so those are | |
47 // left out for now. | 46 // left out for now. |
48 // | 47 // |
49 // From a 2008-ish crawl of userscripts.org, the following patterns were found | 48 // From a 2008-ish crawl of userscripts.org, the following patterns were found |
50 // in @include lines: | 49 // in @include lines: |
51 // - total lines : 24471 | 50 // - total lines : 24471 |
52 // - @include * : 919 | 51 // - @include * : 919 |
53 // - @include http://[^\*]+?/ : 11128 (no star in host) | 52 // - @include http://[^\*]+?/ : 11128 (no star in host) |
54 // - @include http://\*\.[^\*]+?/ : 2325 (host prefixed by *.) | 53 // - @include http://\*\.[^\*]+?/ : 2325 (host prefixed by *.) |
55 // - @include http://\*[^\.][^\*]+?/: 1524 (host prefixed by *, no dot -- many | 54 // - @include http://\*[^\.][^\*]+?/: 1524 (host prefixed by *, no dot -- many |
56 // appear to only need subdomain | 55 // appear to only need subdomain |
57 // matching, not real prefix matching) | 56 // matching, not real prefix matching) |
58 // - @include http://[^\*/]+\*/ : 320 (host suffixed by *) | 57 // - @include http://[^\*/]+\*/ : 320 (host suffixed by *) |
59 // - @include contains .tld : 297 (host suffixed by .tld -- a special | 58 // - @include contains .tld : 297 (host suffixed by .tld -- a special |
60 // Greasemonkey domain component that | 59 // Greasemonkey domain component that |
61 // tries to match all valid registry- | 60 // tries to match all valid registry- |
62 // controlled suffixes) | 61 // controlled suffixes) |
63 // - @include http://\*/ : 228 (host is * exactly, but there is | 62 // - @include http://\*/ : 228 (host is * exactly, but there is |
64 // more to the pattern) | 63 // more to the pattern) |
65 // | 64 // |
66 // So, we can support at least half of current @include lines without supporting | 65 // So, we can support at least half of current @include lines without supporting |
67 // subdomain matching. We can pick up at least another 10% by supporting | 66 // subdomain matching. We can pick up at least another 10% by supporting |
68 // subdomain matching. It is probably possible to coerce more of the existing | 67 // subdomain matching. It is probably possible to coerce more of the existing |
69 // patterns to URLPattern, but the resulting pattern will be more restrictive | 68 // patterns to URLPattern, but the resulting pattern will be more restrictive |
70 // than the original glob, which is probably better than nothing. | 69 // than the original glob, which is probably better than nothing. |
71 class URLPattern { | 70 class URLPattern { |
72 public: | 71 public: |
| 72 // Returns true if the specified scheme can be used in URL patterns, and false |
| 73 // otherwise. |
| 74 static bool IsValidScheme(const std::string& scheme); |
| 75 |
73 URLPattern() : match_subdomains_(false) {} | 76 URLPattern() : match_subdomains_(false) {} |
74 | 77 |
75 // Initializes this instance by parsing the provided string. On failure, the | 78 // Initializes this instance by parsing the provided string. On failure, the |
76 // instance will have some intermediate values and is in an invalid state. | 79 // instance will have some intermediate values and is in an invalid state. |
77 bool Parse(const std::string& pattern_str); | 80 bool Parse(const std::string& pattern_str); |
78 | 81 |
79 // Returns true if this instance matches the specified URL. | 82 // Returns true if this instance matches the specified URL. |
80 bool MatchesUrl(const GURL& url) const; | 83 bool MatchesUrl(const GURL& url) const; |
81 | 84 |
82 std::string GetAsString() const; | 85 std::string GetAsString() const; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
117 // everything after the scheme in the case of file:// URLs. | 120 // everything after the scheme in the case of file:// URLs. |
118 std::string path_; | 121 std::string path_; |
119 | 122 |
120 // The path with "?" and "\" characters escaped for use with the | 123 // The path with "?" and "\" characters escaped for use with the |
121 // MatchPattern() function. This is populated lazily, the first time it is | 124 // MatchPattern() function. This is populated lazily, the first time it is |
122 // needed. | 125 // needed. |
123 mutable std::string path_escaped_; | 126 mutable std::string path_escaped_; |
124 }; | 127 }; |
125 | 128 |
126 #endif // CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ | 129 #endif // CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ |
OLD | NEW |