Chromium Code Reviews| Index: chrome/browser/extensions/api/declarative/url_matcher.h |
| diff --git a/chrome/browser/extensions/api/declarative/url_matcher.h b/chrome/browser/extensions/api/declarative/url_matcher.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..a8ce570b0fcbf2aa93d527a6c4205e06582bd972 |
| --- /dev/null |
| +++ b/chrome/browser/extensions/api/declarative/url_matcher.h |
| @@ -0,0 +1,249 @@ |
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |
| +#define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |
| +#pragma once |
| + |
| +#include <set> |
| +#include <vector> |
| + |
| +#include "base/memory/linked_ptr.h" |
| +#include "base/memory/scoped_ptr.h" |
| +#include "base/memory/scoped_vector.h" |
| +#include "chrome/browser/extensions/api/declarative/substring_set_matcher.h" |
| +#include "googleurl/src/gurl.h" |
| + |
| +namespace base { |
| +class DictionaryValue; |
| +} |
| + |
| +namespace extensions { |
| + |
| +// This class represents a single URL matching condition, e.g. a match on the |
| +// host suffix or the containment of a string in the query component of a GURL. |
| +// |
| +// The difference from a SubstringPattern created by URLMatcherConditionFactory |
| +// is that this also supports matches to check whether {Host, Path, Query} of a |
| +// URL contains a string. |
| +class URLMatcherCondition { |
| + public: |
| + enum Criterion { |
| + HOST_PREFIX, |
| + HOST_SUFFIX, |
| + HOST_CONTAINS, |
| + HOST_EQUALS, |
| + PATH_PREFIX, |
| + PATH_SUFFIX, |
| + PATH_CONTAINS, |
| + PATH_EQUALS, |
| + QUERY_PREFIX, |
| + QUERY_SUFFIX, |
| + QUERY_CONTAINS, |
| + QUERY_EQUALS, |
| + HOST_SUFFIX_PATH_PREFIX, |
| + URL_PREFIX, |
| + URL_SUFFIX, |
| + URL_CONTAINS, |
| + URL_EQUALS, |
| + }; |
| + |
| + URLMatcherCondition(Criterion criterion, |
| + const SubstringPattern* substring_pattern); |
| + |
| + Criterion criterion() const { return criterion_; } |
| + const SubstringPattern* substring_pattern() const { |
| + return substring_pattern_; |
| + } |
| + |
| + // Returns whether this URLMatcherCondition needs to be executed on a |
| + // full URL rather than the individual components (see |
| + // URLMatcherConditionFactory). |
| + bool IsFullUrlCondition() const; |
| + |
| + // Returns whether this condition is fulfilled according to |
| + // |matching_substring_patterns| and |url|. |
| + bool IsMatch( |
| + const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| + const GURL& url) const; |
| + |
| + private: |
| + // |criterion_| and |substring_pattern_| describe together what property a URL |
| + // needs to fulfill to be considered a match. |
| + Criterion criterion_; |
| + |
| + // This is the URLMatcherCondition that is used in a SubstringSetMatcher. |
| + // It becomes valid after BuildSubstringPattern has been called. |
| + const SubstringPattern* substring_pattern_; |
| +}; |
| + |
| +// Class to map the problem of finding {host, path, query} {prefixes, suffixes, |
| +// containments, and equality} in GURLs to the substring matching problem. |
| +// |
| +// Say, you want to check whether the path of a URL starts with "/index.html". |
| +// This class preprocesses a URL like "www.google.com/index.html" into something |
| +// like "www.google.com|/index.html". After preprocessing, you can search for |
| +// "|/index.html" in the string and see that this candidate URL actually has |
| +// a path that starts with "/index.html". On the contrary, |
| +// "www.google.com/images/index.html" would be normalized to |
| +// "www.google.com|/images/index.html". It is easy to see that it contains |
| +// "/index.html" but the path of the URL does not start with "/index.html". |
| +// |
| +// This preprocessing is important if you want to match a URL against many |
| +// patterns because it reduces the matching to a "discover all substrings |
| +// of a dictionary in a text" problem, which can be solved very efficiently |
| +// by the Aho-Corasick algorithm. |
| +// |
| +// IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern |
| +// referenced by created URLMatcherConditions. Therefore, it must outlive |
| +// all created URLMatcherCondition and the SubstringSetMatcher. |
| +class URLMatcherConditionFactory { |
| + public: |
| + URLMatcherConditionFactory(); |
| + |
| + // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches. |
| + std::string CanonlicalizeURLForComponentSearches(const GURL& url); |
| + |
| + // Factory methods for various condition types. |
| + scoped_ptr<URLMatcherCondition> CreateHostPrefixCondition( |
|
Matt Perry
2012/02/14 19:56:16
An URLMatcherCondition is now just a Criterion and
battre
2012/02/14 21:56:42
Done.
|
| + const std::string& prefix); |
| + scoped_ptr<URLMatcherCondition> CreateHostSuffixCondition( |
| + const std::string& suffix); |
| + scoped_ptr<URLMatcherCondition> CreateHostContainsCondition( |
| + const std::string& str); |
| + scoped_ptr<URLMatcherCondition> CreateHostEqualsCondition( |
| + const std::string& str); |
| + |
| + scoped_ptr<URLMatcherCondition> CreatePathPrefixCondition( |
| + const std::string& prefix); |
| + scoped_ptr<URLMatcherCondition> CreatePathSuffixCondition( |
| + const std::string& suffix); |
| + scoped_ptr<URLMatcherCondition> CreatePathContainsCondition( |
| + const std::string& str); |
| + scoped_ptr<URLMatcherCondition> CreatePathEqualsCondition( |
| + const std::string& str); |
| + |
| + scoped_ptr<URLMatcherCondition> CreateQueryPrefixCondition( |
| + const std::string& prefix); |
| + scoped_ptr<URLMatcherCondition> CreateQuerySuffixCondition( |
| + const std::string& suffix); |
| + scoped_ptr<URLMatcherCondition> CreateQueryContainsCondition( |
| + const std::string& str); |
| + scoped_ptr<URLMatcherCondition> CreateQueryEqualsCondition( |
| + const std::string& str); |
| + |
| + // This covers the common case, where you don't care whether a domain |
| + // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it |
| + // should be followed by a given |path_prefix|. |
| + scoped_ptr<URLMatcherCondition> CreateHostSuffixPathPrefixCondition( |
| + const std::string& host_suffix, |
| + const std::string& path_prefix); |
| + |
| + // Canonicalizes a URL for "CreateURL*Condition" searches. |
| + std::string CanonlicalizeURLForFullSearches(const GURL& url); |
| + |
| + scoped_ptr<URLMatcherCondition> CreateURLPrefixCondition( |
| + const std::string& prefix); |
| + scoped_ptr<URLMatcherCondition> CreateURLSuffixCondition( |
| + const std::string& suffix); |
| + scoped_ptr<URLMatcherCondition> CreateURLContainsCondition( |
| + const std::string& str); |
| + scoped_ptr<URLMatcherCondition> CreateURLEqualsCondition( |
| + const std::string& str); |
| + |
| + // Removes all patterns from |pattern_singletons_| that are not listed in |
| + // |used_patterns|. These patterns are not referenced any more and may be |
| + // freed. |
| + void ForgetUnusedPatterns( |
| + const std::set<SubstringPattern::ID>& used_patterns); |
| + |
| + private: |
| + // Creates a URLMatcherCondition according to the parameters passed. |
| + // The URLMatcherCondition will refer to a SubstringPattern that is |
| + // owned by |pattern_singletons_|. |
| + scoped_ptr<URLMatcherCondition> CreateCondition( |
| + URLMatcherCondition::Criterion criterion, |
| + const std::string& pattern); |
| + |
| + // Prepends a "." to the hostname if it does not start with one. |
| + std::string CanonicalizeHostname(const std::string& hostname) const; |
| + |
| + // Counter that ensures that all created SubstringPatterns have unique IDs. |
| + int id_counter_; |
| + |
| + // Maps the pattern() value of a SubstringPattern to the instance. |
| + typedef std::map<std::string, linked_ptr<const SubstringPattern> > |
| + PatternSingletons; |
| + PatternSingletons pattern_singletons_; |
|
Matt Perry
2012/02/14 20:47:47
Also, this should use a set, or a hash_set if you
battre
2012/02/14 21:56:42
Done.
|
| +}; |
| + |
| +// This class represents a set of conditions that all need to match on a |
| +// given URL in order to be considered a match. |
| +class URLMatcherConditionSet { |
| + public: |
| + typedef int ID; |
| + typedef ScopedVector<const URLMatcherCondition> Conditions; |
|
Matt Perry
2012/02/14 19:56:16
Likewise, this is overkill. Just make it a std::se
battre
2012/02/14 21:56:42
Done.
|
| + |
| + URLMatcherConditionSet(ID id, scoped_ptr<Conditions> conditions); |
| + |
| + ID id() const { return id_; } |
| + const Conditions& conditions() const { return conditions_; } |
| + |
| + bool IsMatch( |
| + const std::set<SubstringPattern::ID>& matching_substring_patterns, |
| + const GURL& url) const; |
| + |
| + private: |
| + ID id_; |
| + Conditions conditions_; |
| +}; |
| + |
| +// This class allows matching one URL against a large set of |
| +// URLMatcherConditionSets at the same time. |
| +class URLMatcher { |
| + public: |
| + URLMatcher(); |
| + |
| + void AddConditionSets( |
| + scoped_ptr<ScopedVector<const URLMatcherConditionSet> > condition_sets); |
|
Matt Perry
2012/02/14 19:56:16
Ditto, overkill. Pass by const ref.
battre
2012/02/14 21:56:42
Done.
|
| + void RemoveConditionSets( |
| + const std::vector<URLMatcherConditionSet::ID>& condition_ids); |
| + |
| + std::set<URLMatcherConditionSet::ID> MatchUrl(const GURL& url); |
|
Matt Perry
2012/02/14 23:52:25
MatchURL
|
| + |
| + URLMatcherConditionFactory* condition_factory() { |
| + return &condition_factory_; |
| + } |
| + |
| + private: |
| + void UpdateSubstringSetMatcher(bool full_url_conditions); |
| + void UpdateTriggers(); |
| + void UpdateConditionFactory(); |
| + void UpdateInternalDatastructures(); |
| + |
| + URLMatcherConditionFactory condition_factory_; |
| + |
| + // Maps a condition ID (as passed to AddConditions()) to the respective |
| + // URLMatcherConditionSet. |
| + typedef std::map<URLMatcherConditionSet::ID, |
| + linked_ptr<const URLMatcherConditionSet> > |
| + URLMatcherConditionSets; |
| + URLMatcherConditionSets url_matcher_condition_sets_; |
| + |
| + // Maps a SubstringPattern ID to the URLMatcherConditions that need to |
| + // be triggered in case of a SubstringPatter match. |
| + std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> > |
| + substring_match_triggers_; |
| + |
| + SubstringSetMatcher full_url_matcher_; |
| + SubstringSetMatcher url_component_matcher_; |
| + std::set<const SubstringPattern*> registered_full_url_patterns_; |
| + std::set<const SubstringPattern*> registered_url_component_patterns_; |
| + |
| + DISALLOW_COPY_AND_ASSIGN(URLMatcher); |
| +}; |
| + |
| +} // namespace extensions |
| + |
| +#endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_ |