| Index: chrome/common/extensions/url_pattern.h
|
| diff --git a/chrome/common/extensions/url_pattern.h b/chrome/common/extensions/url_pattern.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..40b1b5d20c410bd6905fa9a15265c015bc145bed
|
| --- /dev/null
|
| +++ b/chrome/common/extensions/url_pattern.h
|
| @@ -0,0 +1,122 @@
|
| +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +#ifndef CHROME_BROWSER_EXTENSIONS_MATCH_PATTERN_H_
|
| +#define CHROME_BROWSER_EXTENSIONS_MATCH_PATTERN_H_
|
| +
|
| +#include "googleurl/src/gurl.h"
|
| +
|
| +// A pattern that can be used to match URLs. A URLPattern is a very restricted
|
| +// subset of URL syntax:
|
| +//
|
| +// <url-pattern> := <scheme>://<host><path>
|
| +// <scheme> := 'http' | 'https' | 'file' | 'ftp' | 'chrome-ui'
|
| +// <host> := '*' | '*.' <anychar except '/' and '*'>+
|
| +// <path> := '/' <any chars>
|
| +//
|
| +// * Host is not used when the scheme is 'file'.
|
| +// * The path can have embedded '*' characters which act as glob wildcards.
|
| +//
|
| +// Examples of valid patterns:
|
| +// - http://*/*
|
| +// - http://*/foo*
|
| +// - https://*.google.com/foo*bar
|
| +// - chrome-ui://foo/bar
|
| +// - file://monkey*
|
| +// - http://127.0.0.1/*
|
| +//
|
| +// Examples of invalid patterns:
|
| +// - http://* -- path not specified
|
| +// - http://*foo/bar -- * not allowed as substring of host component
|
| +// - http://foo.*.bar/baz -- * must be first component
|
| +// - http:/bar -- scheme separator not found
|
| +// - foo://* -- invalid scheme
|
| +//
|
| +// Design rationale:
|
| +// * We need to be able to tell users what 'sites' a given URLPattern will
|
| +// affect. For example "This extension will interact with the site
|
| +// 'www.google.com'.
|
| +// * We'd like to be able to convert as many existing Greasemonkey @include
|
| +// patterns to URLPatterns as possible. Greasemonkey @include patterns are
|
| +// simple globs, so this won't be perfect.
|
| +// * Although we would like to support any scheme, it isn't clear what to tell
|
| +// users about URLPatterns that affect data or javascript URLs, and saying
|
| +// something useful about chrome-extension URLs is more work, so those are
|
| +// left out for now.
|
| +//
|
| +// From a 2008-ish crawl of userscripts.org, the following patterns were found
|
| +// in @include lines:
|
| +// - total lines : 24271
|
| +// - @include * : 919
|
| +// - @include http://[^\*]+?/ : 11128 (no star in host)
|
| +// - @include http://\*\.[^\*]+?/ : 2325 (host prefixed by *.)
|
| +// - @include http://\*[^\.][^\*]+?/: 1524 (host prefixed by *, no dot -- many
|
| +// appear to only need subdomain
|
| +// matching, not real prefix matching)
|
| +// - @include http://[^\*/]+\*/ : 320 (host suffixed by *)
|
| +// - @include contains .tld : 297 (host suffixed by .tld -- a special
|
| +// Greasemonkey domain component that
|
| +// tries to match all valid registry-
|
| +// controlled suffixes)
|
| +// - @include http://\*/ : 228 (host is * exactly, but there is
|
| +// more to the pattern)
|
| +//
|
| +// So, we can support at least half of current @include lines without supporting
|
| +// subdomain matching. We can pick up at least another 10% by supporting
|
| +// subdomain matching. It is probably possible to coerce more of the existing
|
| +// patterns to URLPattern, but the resulting pattern will be more restrictive
|
| +// than the original glob, which is probably better than nothing.
|
| +class URLPattern {
|
| + public:
|
| + URLPattern() : match_subdomains_(false) {}
|
| +
|
| + // Initializes this instance by parsing the provided string. On failure, the
|
| + // instance will have some intermediate values and is in an invalid state.
|
| + bool Parse(const std::string& pattern_str);
|
| +
|
| + // Returns true if this instance matches the specified URL.
|
| + bool MatchesUrl(const GURL& url);
|
| +
|
| + // Get the scheme the pattern matches. This will always return a valid scheme
|
| + // if is_valid() returns true.
|
| + std::string scheme() const { return scheme_; }
|
| +
|
| + // Gets the host the pattern matches. This can be an empty string if the
|
| + // pattern matches all hosts (the input was <scheme>://*/<whatever>).
|
| + std::string host() const { return host_; }
|
| +
|
| + // Gets whether to match subdomains of host().
|
| + bool match_subdomains() const { return match_subdomains_; }
|
| +
|
| + // Gets the path the pattern matches with the leading slash. This can have
|
| + // embedded asterisks which are interpreted using glob rules.
|
| + std::string path() const { return path_; }
|
| +
|
| + private:
|
| + // Returns true if |test| matches our host.
|
| + bool MatchesHost(const GURL& test);
|
| +
|
| + // Returns true if |test| matches our path.
|
| + bool MatchesPath(const GURL& test);
|
| +
|
| + // The scheme for the pattern.
|
| + std::string scheme_;
|
| +
|
| + // The host without any leading "*" components.
|
| + std::string host_;
|
| +
|
| + // Whether we should match subdomains of the host. This is true if the first
|
| + // component of the pattern's host was "*".
|
| + bool match_subdomains_;
|
| +
|
| + // The path to match. This is everything after the host of the URL, or
|
| + // everything after the scheme in the case of file:// URLs.
|
| + std::string path_;
|
| +
|
| + // The path with "?" and "\" characters escaped for use with the
|
| + // MatchPattern() function. This is populated lazily, the first time it is
|
| + // needed.
|
| + std::string path_escaped_;
|
| +};
|
| +
|
| +#endif CHROME_BROWSER_EXTENSIONS_MATCH_PATTERN_H_
|
|
|