Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(57)

Side by Side Diff: chrome/common/extensions/url_pattern.h

Issue 149619: Various minor extension fixes (Closed)
Patch Set: One more test Created 11 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 #ifndef CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ 4 #ifndef CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_
5 #define CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ 5 #define CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_
6 6
7 #include <string> 7 #include <string>
8 8
9 #include "googleurl/src/gurl.h" 9 #include "googleurl/src/gurl.h"
10 10
11 // A pattern that can be used to match URLs. A URLPattern is a very restricted 11 // A pattern that can be used to match URLs. A URLPattern is a very restricted
12 // subset of URL syntax: 12 // subset of URL syntax:
13 // 13 //
14 // <url-pattern> := <scheme>://<host><path> 14 // <url-pattern> := <scheme>://<host><path>
15 // <scheme> := 'http' | 'https' | 'file' | 'ftp' | 'chrome' 15 // <scheme> := 'http' | 'https' | 'file' | 'ftp' | 'chrome'
16 // <host> := '*' | '*.' <anychar except '/' and '*'>+ 16 // <host> := '*' | '*.' <anychar except '/' and '*'>+
17 // <path> := '/' <any chars> 17 // <path> := '/' <any chars>
18 // 18 //
19 // * Host is not used when the scheme is 'file'. 19 // * Host is not used when the scheme is 'file'.
20 // * The path can have embedded '*' characters which act as glob wildcards. 20 // * The path can have embedded '*' characters which act as glob wildcards.
21 // 21 //
22 // Examples of valid patterns: 22 // Examples of valid patterns:
23 // - http://*/* 23 // - http://*/*
24 // - http://*/foo* 24 // - http://*/foo*
25 // - https://*.google.com/foo*bar 25 // - https://*.google.com/foo*bar
26 // - chrome://foo/bar
27 // - file://monkey* 26 // - file://monkey*
28 // - http://127.0.0.1/* 27 // - http://127.0.0.1/*
29 // 28 //
30 // Examples of invalid patterns: 29 // Examples of invalid patterns:
31 // - http://* -- path not specified 30 // - http://* -- path not specified
32 // - http://*foo/bar -- * not allowed as substring of host component 31 // - http://*foo/bar -- * not allowed as substring of host component
33 // - http://foo.*.bar/baz -- * must be first component 32 // - http://foo.*.bar/baz -- * must be first component
34 // - http:/bar -- scheme separator not found 33 // - http:/bar -- scheme separator not found
35 // - foo://* -- invalid scheme 34 // - foo://* -- invalid scheme
35 // - chrome:// -- we don't support chrome internal URLs
36 // 36 //
37 // Design rationale: 37 // Design rationale:
38 // * We need to be able to tell users what 'sites' a given URLPattern will 38 // * We need to be able to tell users what 'sites' a given URLPattern will
39 // affect. For example "This extension will interact with the site 39 // affect. For example "This extension will interact with the site
40 // 'www.google.com'. 40 // 'www.google.com'.
41 // * We'd like to be able to convert as many existing Greasemonkey @include 41 // * We'd like to be able to convert as many existing Greasemonkey @include
42 // patterns to URLPatterns as possible. Greasemonkey @include patterns are 42 // patterns to URLPatterns as possible. Greasemonkey @include patterns are
43 // simple globs, so this won't be perfect. 43 // simple globs, so this won't be perfect.
44 // * Although we would like to support any scheme, it isn't clear what to tell 44 // * Although we would like to support any scheme, it isn't clear what to tell
45 // users about URLPatterns that affect data or javascript URLs, and saying 45 // users about URLPatterns that affect data or javascript URLs, so those are
46 // something useful about chrome-extension URLs is more work, so those are
47 // left out for now. 46 // left out for now.
48 // 47 //
49 // From a 2008-ish crawl of userscripts.org, the following patterns were found 48 // From a 2008-ish crawl of userscripts.org, the following patterns were found
50 // in @include lines: 49 // in @include lines:
51 // - total lines : 24471 50 // - total lines : 24471
52 // - @include * : 919 51 // - @include * : 919
53 // - @include http://[^\*]+?/ : 11128 (no star in host) 52 // - @include http://[^\*]+?/ : 11128 (no star in host)
54 // - @include http://\*\.[^\*]+?/ : 2325 (host prefixed by *.) 53 // - @include http://\*\.[^\*]+?/ : 2325 (host prefixed by *.)
55 // - @include http://\*[^\.][^\*]+?/: 1524 (host prefixed by *, no dot -- many 54 // - @include http://\*[^\.][^\*]+?/: 1524 (host prefixed by *, no dot -- many
56 // appear to only need subdomain 55 // appear to only need subdomain
57 // matching, not real prefix matching) 56 // matching, not real prefix matching)
58 // - @include http://[^\*/]+\*/ : 320 (host suffixed by *) 57 // - @include http://[^\*/]+\*/ : 320 (host suffixed by *)
59 // - @include contains .tld : 297 (host suffixed by .tld -- a special 58 // - @include contains .tld : 297 (host suffixed by .tld -- a special
60 // Greasemonkey domain component that 59 // Greasemonkey domain component that
61 // tries to match all valid registry- 60 // tries to match all valid registry-
62 // controlled suffixes) 61 // controlled suffixes)
63 // - @include http://\*/ : 228 (host is * exactly, but there is 62 // - @include http://\*/ : 228 (host is * exactly, but there is
64 // more to the pattern) 63 // more to the pattern)
65 // 64 //
66 // So, we can support at least half of current @include lines without supporting 65 // So, we can support at least half of current @include lines without supporting
67 // subdomain matching. We can pick up at least another 10% by supporting 66 // subdomain matching. We can pick up at least another 10% by supporting
68 // subdomain matching. It is probably possible to coerce more of the existing 67 // subdomain matching. It is probably possible to coerce more of the existing
69 // patterns to URLPattern, but the resulting pattern will be more restrictive 68 // patterns to URLPattern, but the resulting pattern will be more restrictive
70 // than the original glob, which is probably better than nothing. 69 // than the original glob, which is probably better than nothing.
71 class URLPattern { 70 class URLPattern {
72 public: 71 public:
72 // Returns true if the specified scheme can be used in URL patterns, and false
73 // otherwise.
74 static bool IsValidScheme(const std::string& scheme);
75
73 URLPattern() : match_subdomains_(false) {} 76 URLPattern() : match_subdomains_(false) {}
74 77
75 // Initializes this instance by parsing the provided string. On failure, the 78 // Initializes this instance by parsing the provided string. On failure, the
76 // instance will have some intermediate values and is in an invalid state. 79 // instance will have some intermediate values and is in an invalid state.
77 bool Parse(const std::string& pattern_str); 80 bool Parse(const std::string& pattern_str);
78 81
79 // Returns true if this instance matches the specified URL. 82 // Returns true if this instance matches the specified URL.
80 bool MatchesUrl(const GURL& url) const; 83 bool MatchesUrl(const GURL& url) const;
81 84
82 std::string GetAsString() const; 85 std::string GetAsString() const;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
117 // everything after the scheme in the case of file:// URLs. 120 // everything after the scheme in the case of file:// URLs.
118 std::string path_; 121 std::string path_;
119 122
120 // The path with "?" and "\" characters escaped for use with the 123 // The path with "?" and "\" characters escaped for use with the
121 // MatchPattern() function. This is populated lazily, the first time it is 124 // MatchPattern() function. This is populated lazily, the first time it is
122 // needed. 125 // needed.
123 mutable std::string path_escaped_; 126 mutable std::string path_escaped_;
124 }; 127 };
125 128
126 #endif // CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_ 129 #endif // CHROME_COMMON_EXTENSIONS_URL_PATTERN_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698