Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1150)

Unified Diff: chrome/browser/extensions/api/declarative/url_matcher.h

Issue 9390018: Implementation of a Matching strategy for URLs in the Declarative WebRequest API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Refactored for memory improvements Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/extensions/api/declarative/url_matcher.h
diff --git a/chrome/browser/extensions/api/declarative/url_matcher.h b/chrome/browser/extensions/api/declarative/url_matcher.h
new file mode 100644
index 0000000000000000000000000000000000000000..a8ce570b0fcbf2aa93d527a6c4205e06582bd972
--- /dev/null
+++ b/chrome/browser/extensions/api/declarative/url_matcher.h
@@ -0,0 +1,249 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
+#define CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_
+#pragma once
+
+#include <set>
+#include <vector>
+
+#include "base/memory/linked_ptr.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/memory/scoped_vector.h"
+#include "chrome/browser/extensions/api/declarative/substring_set_matcher.h"
+#include "googleurl/src/gurl.h"
+
+namespace base {
+class DictionaryValue;
+}
+
+namespace extensions {
+
+// This class represents a single URL matching condition, e.g. a match on the
+// host suffix or the containment of a string in the query component of a GURL.
+//
+// The difference from a SubstringPattern created by URLMatcherConditionFactory
+// is that this also supports matches to check whether {Host, Path, Query} of a
+// URL contains a string.
+class URLMatcherCondition {
+ public:
+ enum Criterion {
+ HOST_PREFIX,
+ HOST_SUFFIX,
+ HOST_CONTAINS,
+ HOST_EQUALS,
+ PATH_PREFIX,
+ PATH_SUFFIX,
+ PATH_CONTAINS,
+ PATH_EQUALS,
+ QUERY_PREFIX,
+ QUERY_SUFFIX,
+ QUERY_CONTAINS,
+ QUERY_EQUALS,
+ HOST_SUFFIX_PATH_PREFIX,
+ URL_PREFIX,
+ URL_SUFFIX,
+ URL_CONTAINS,
+ URL_EQUALS,
+ };
+
+ URLMatcherCondition(Criterion criterion,
+ const SubstringPattern* substring_pattern);
+
+ Criterion criterion() const { return criterion_; }
+ const SubstringPattern* substring_pattern() const {
+ return substring_pattern_;
+ }
+
+ // Returns whether this URLMatcherCondition needs to be executed on a
+ // full URL rather than the individual components (see
+ // URLMatcherConditionFactory).
+ bool IsFullUrlCondition() const;
+
+ // Returns whether this condition is fulfilled according to
+ // |matching_substring_patterns| and |url|.
+ bool IsMatch(
+ const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const GURL& url) const;
+
+ private:
+ // |criterion_| and |substring_pattern_| describe together what property a URL
+ // needs to fulfill to be considered a match.
+ Criterion criterion_;
+
+ // This is the URLMatcherCondition that is used in a SubstringSetMatcher.
+ // It becomes valid after BuildSubstringPattern has been called.
+ const SubstringPattern* substring_pattern_;
+};
+
+// Class to map the problem of finding {host, path, query} {prefixes, suffixes,
+// containments, and equality} in GURLs to the substring matching problem.
+//
+// Say, you want to check whether the path of a URL starts with "/index.html".
+// This class preprocesses a URL like "www.google.com/index.html" into something
+// like "www.google.com|/index.html". After preprocessing, you can search for
+// "|/index.html" in the string and see that this candidate URL actually has
+// a path that starts with "/index.html". On the contrary,
+// "www.google.com/images/index.html" would be normalized to
+// "www.google.com|/images/index.html". It is easy to see that it contains
+// "/index.html" but the path of the URL does not start with "/index.html".
+//
+// This preprocessing is important if you want to match a URL against many
+// patterns because it reduces the matching to a "discover all substrings
+// of a dictionary in a text" problem, which can be solved very efficiently
+// by the Aho-Corasick algorithm.
+//
+// IMPORTANT: The URLMatcherConditionFactory owns the SubstringPattern
+// referenced by created URLMatcherConditions. Therefore, it must outlive
+// all created URLMatcherCondition and the SubstringSetMatcher.
+class URLMatcherConditionFactory {
+ public:
+ URLMatcherConditionFactory();
+
+ // Canonicalizes a URL for "Create{Host,Path,Query}*Condition" searches.
+ std::string CanonlicalizeURLForComponentSearches(const GURL& url);
+
+ // Factory methods for various condition types.
+ scoped_ptr<URLMatcherCondition> CreateHostPrefixCondition(
Matt Perry 2012/02/14 19:56:16 An URLMatcherCondition is now just a Criterion and
battre 2012/02/14 21:56:42 Done.
+ const std::string& prefix);
+ scoped_ptr<URLMatcherCondition> CreateHostSuffixCondition(
+ const std::string& suffix);
+ scoped_ptr<URLMatcherCondition> CreateHostContainsCondition(
+ const std::string& str);
+ scoped_ptr<URLMatcherCondition> CreateHostEqualsCondition(
+ const std::string& str);
+
+ scoped_ptr<URLMatcherCondition> CreatePathPrefixCondition(
+ const std::string& prefix);
+ scoped_ptr<URLMatcherCondition> CreatePathSuffixCondition(
+ const std::string& suffix);
+ scoped_ptr<URLMatcherCondition> CreatePathContainsCondition(
+ const std::string& str);
+ scoped_ptr<URLMatcherCondition> CreatePathEqualsCondition(
+ const std::string& str);
+
+ scoped_ptr<URLMatcherCondition> CreateQueryPrefixCondition(
+ const std::string& prefix);
+ scoped_ptr<URLMatcherCondition> CreateQuerySuffixCondition(
+ const std::string& suffix);
+ scoped_ptr<URLMatcherCondition> CreateQueryContainsCondition(
+ const std::string& str);
+ scoped_ptr<URLMatcherCondition> CreateQueryEqualsCondition(
+ const std::string& str);
+
+ // This covers the common case, where you don't care whether a domain
+ // "foobar.com" is expressed as "foobar.com" or "www.foobar.com", and it
+ // should be followed by a given |path_prefix|.
+ scoped_ptr<URLMatcherCondition> CreateHostSuffixPathPrefixCondition(
+ const std::string& host_suffix,
+ const std::string& path_prefix);
+
+ // Canonicalizes a URL for "CreateURL*Condition" searches.
+ std::string CanonlicalizeURLForFullSearches(const GURL& url);
+
+ scoped_ptr<URLMatcherCondition> CreateURLPrefixCondition(
+ const std::string& prefix);
+ scoped_ptr<URLMatcherCondition> CreateURLSuffixCondition(
+ const std::string& suffix);
+ scoped_ptr<URLMatcherCondition> CreateURLContainsCondition(
+ const std::string& str);
+ scoped_ptr<URLMatcherCondition> CreateURLEqualsCondition(
+ const std::string& str);
+
+ // Removes all patterns from |pattern_singletons_| that are not listed in
+ // |used_patterns|. These patterns are not referenced any more and may be
+ // freed.
+ void ForgetUnusedPatterns(
+ const std::set<SubstringPattern::ID>& used_patterns);
+
+ private:
+ // Creates a URLMatcherCondition according to the parameters passed.
+ // The URLMatcherCondition will refer to a SubstringPattern that is
+ // owned by |pattern_singletons_|.
+ scoped_ptr<URLMatcherCondition> CreateCondition(
+ URLMatcherCondition::Criterion criterion,
+ const std::string& pattern);
+
+ // Prepends a "." to the hostname if it does not start with one.
+ std::string CanonicalizeHostname(const std::string& hostname) const;
+
+ // Counter that ensures that all created SubstringPatterns have unique IDs.
+ int id_counter_;
+
+ // Maps the pattern() value of a SubstringPattern to the instance.
+ typedef std::map<std::string, linked_ptr<const SubstringPattern> >
+ PatternSingletons;
+ PatternSingletons pattern_singletons_;
Matt Perry 2012/02/14 20:47:47 Also, this should use a set, or a hash_set if you
battre 2012/02/14 21:56:42 Done.
+};
+
+// This class represents a set of conditions that all need to match on a
+// given URL in order to be considered a match.
+class URLMatcherConditionSet {
+ public:
+ typedef int ID;
+ typedef ScopedVector<const URLMatcherCondition> Conditions;
Matt Perry 2012/02/14 19:56:16 Likewise, this is overkill. Just make it a std::se
battre 2012/02/14 21:56:42 Done.
+
+ URLMatcherConditionSet(ID id, scoped_ptr<Conditions> conditions);
+
+ ID id() const { return id_; }
+ const Conditions& conditions() const { return conditions_; }
+
+ bool IsMatch(
+ const std::set<SubstringPattern::ID>& matching_substring_patterns,
+ const GURL& url) const;
+
+ private:
+ ID id_;
+ Conditions conditions_;
+};
+
+// This class allows matching one URL against a large set of
+// URLMatcherConditionSets at the same time.
+class URLMatcher {
+ public:
+ URLMatcher();
+
+ void AddConditionSets(
+ scoped_ptr<ScopedVector<const URLMatcherConditionSet> > condition_sets);
Matt Perry 2012/02/14 19:56:16 Ditto, overkill. Pass by const ref.
battre 2012/02/14 21:56:42 Done.
+ void RemoveConditionSets(
+ const std::vector<URLMatcherConditionSet::ID>& condition_ids);
+
+ std::set<URLMatcherConditionSet::ID> MatchUrl(const GURL& url);
Matt Perry 2012/02/14 23:52:25 MatchURL
+
+ URLMatcherConditionFactory* condition_factory() {
+ return &condition_factory_;
+ }
+
+ private:
+ void UpdateSubstringSetMatcher(bool full_url_conditions);
+ void UpdateTriggers();
+ void UpdateConditionFactory();
+ void UpdateInternalDatastructures();
+
+ URLMatcherConditionFactory condition_factory_;
+
+ // Maps a condition ID (as passed to AddConditions()) to the respective
+ // URLMatcherConditionSet.
+ typedef std::map<URLMatcherConditionSet::ID,
+ linked_ptr<const URLMatcherConditionSet> >
+ URLMatcherConditionSets;
+ URLMatcherConditionSets url_matcher_condition_sets_;
+
+ // Maps a SubstringPattern ID to the URLMatcherConditions that need to
+ // be triggered in case of a SubstringPatter match.
+ std::map<SubstringPattern::ID, std::set<URLMatcherConditionSet::ID> >
+ substring_match_triggers_;
+
+ SubstringSetMatcher full_url_matcher_;
+ SubstringSetMatcher url_component_matcher_;
+ std::set<const SubstringPattern*> registered_full_url_patterns_;
+ std::set<const SubstringPattern*> registered_url_component_patterns_;
+
+ DISALLOW_COPY_AND_ASSIGN(URLMatcher);
+};
+
+} // namespace extensions
+
+#endif // CHROME_BROWSER_EXTENSIONS_API_DECLARATIVE_URL_MATCHER_H_

Powered by Google App Engine
This is Rietveld 408576698