Index: components/subresource_filter/core/common/url_pattern_index.h |
diff --git a/components/subresource_filter/core/common/url_pattern_index.h b/components/subresource_filter/core/common/url_pattern_index.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..adeade650483f96224942368a577f567ea4f307e |
--- /dev/null |
+++ b/components/subresource_filter/core/common/url_pattern_index.h |
@@ -0,0 +1,135 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_INDEX_H_ |
+#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_INDEX_H_ |
+ |
+#include <stddef.h> |
+#include <stdint.h> |
+ |
+#include <vector> |
+ |
+#include "base/macros.h" |
+#include "base/strings/string_piece.h" |
+#include "components/subresource_filter/core/common/closed_hash_map.h" |
+#include "components/subresource_filter/core/common/flat/url_pattern_index_generated.h" |
+#include "components/subresource_filter/core/common/proto/rules.pb.h" |
+#include "components/subresource_filter/core/common/uint64_hasher.h" |
+#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" |
+ |
+class GURL; |
+ |
+namespace url { |
+class Origin; |
+} |
+ |
+namespace subresource_filter { |
+ |
+// The integer type used to represent N-grams. |
+using NGram = uint64_t; |
+// The hasher used for hashing N-grams. |
+using NGramHasher = Uint64Hasher; |
+// The hash table probe sequence used both by UrlPatternIndex and its builder. |
+using NGramHashTableProber = DefaultProber<NGram, NGramHasher>; |
+ |
+// FlatBuffer offset aliases. |
+using UrlRuleOffset = flatbuffers::Offset<flat::UrlRule>; |
+using UrlPatternIndexOffset = flatbuffers::Offset<flat::UrlPatternIndex>; |
+ |
+constexpr size_t kNGramSize = 5; |
+static_assert(kNGramSize <= sizeof(NGram), "NGram type is too narrow."); |
+ |
+// Serializes the |rule| to the FlatBuffer |builder|, and returns an offset to |
+// it in the resulting buffer. Returns null offset iff the |rule| could not be |
+// serialized because of unsupported options or it is otherwise invalid. |
+UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule, |
+ flatbuffers::FlatBufferBuilder* builder); |
+ |
+// The class used to construct an index over the URL patterns of a set of URL |
+// rules. The rules themselves need to be converted to FlatBuffers format by the |
+// client of this class, as well as persisted into the |flat_builder| that is |
+// supplied in the constructor. |
+class UrlPatternIndexBuilder { |
+ public: |
+ explicit UrlPatternIndexBuilder(flatbuffers::FlatBufferBuilder* flat_builder); |
+ ~UrlPatternIndexBuilder(); |
+ |
+ // Adds a UrlRule to the index. The caller should have already persisted the |
+ // rule into the same |flat_builder| by a call to SerializeUrlRule returning a |
+ // non-null |offset|, and should pass in the resulting |offset| here. |
+ void IndexUrlRule(UrlRuleOffset offset); |
+ |
+ // Finalizes construction of the index, serializes it using |flat_builder|, |
+ // and returns an offset to it in the resulting FlatBuffer. |
+ UrlPatternIndexOffset Finish(); |
+ |
+ private: |
+ using MutableUrlRuleList = std::vector<UrlRuleOffset>; |
+ using MutableNGramIndex = |
+ ClosedHashMap<NGram, MutableUrlRuleList, NGramHashTableProber>; |
+ |
+ // Returns an N-gram of the |pattern| encoded into the NGram integer type. The |
+ // N-gram is picked using a greedy heuristic, i.e. the one is chosen which |
+ // corresponds to the shortest list of rules within the index. If there are no |
+ // valid N-grams in the |pattern|, the return value is 0. |
+ NGram GetMostDistinctiveNGram(base::StringPiece pattern); |
+ |
+ // This index contains all non-REGEXP rules that have at least one acceptable |
+ // N-gram. For each given rule, the N-gram used as an index key is picked |
+ // greedily (see GetMostDistinctiveNGram). |
+ MutableNGramIndex ngram_index_; |
+ |
+ // A fallback list that contains all the rules with no acceptable N-gram. |
+ MutableUrlRuleList fallback_rules_; |
+ |
+ // Must outlive this instance. |
+ flatbuffers::FlatBufferBuilder* flat_builder_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexBuilder); |
+}; |
+ |
+// Encapsulates a read-only index built over the URL patterns of a set of URL |
+// rules, and provides fast matching of network requests against these rules. |
+class UrlPatternIndexMatcher { |
+ public: |
+ // Creates an instance to access the given |flat_index|. If |flat_index| is |
+ // nullptr, then all requests return no match. |
+ explicit UrlPatternIndexMatcher(const flat::UrlPatternIndex* flat_index); |
+ ~UrlPatternIndexMatcher(); |
+ |
+ // If the index contains one or more UrlRules that match the request, returns |
+ // one of them (it is undefined which one). Otherwise, returns nullptr. |
+ // |
+ // Notes on parameters: |
+ // - |url| should be valid, otherwise the return value is nullptr. |
+ // - Exactly one of |element_type| and |activation_type| should be specified, |
+ // i.e., not equal to *_UNSPECIFIED, otherwise the return value is nullptr. |
+ // - |is_third_party| should be pre-computed by the caller, e.g. using the |
+ // registry_controlled_domains library, to reflect the relation between |
+ // |url| and |first_party_origin|. |
+ // |
+ // A rule is deemed to match the request iff all of the following applies: |
+ // - The |url| matches the rule's UrlPattern (see url_pattern.h). |
+ // - The |first_party_origin| matches the rule's targeted domains list. |
+ // - |element_type| or |activation_type| is among the rule's targeted types. |
+ // - The |is_third_party| bit matches the rule's requirement on the requested |
+ // |url| being first-/third-party w.r.t. its |first_party_origin|. |
+ // - The rule is not generic if |disable_generic_rules| is true. |
+ const flat::UrlRule* FindMatch(const GURL& url, |
+ const url::Origin& first_party_origin, |
+ proto::ElementType element_type, |
+ proto::ActivationType activation_type, |
+ bool is_third_party, |
+ bool disable_generic_rules) const; |
+ |
+ private: |
+ // Must outlive this instance. |
+ const flat::UrlPatternIndex* flat_index_; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(UrlPatternIndexMatcher); |
+}; |
+ |
+} // namespace subresource_filter |
+ |
+#endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_INDEX_H_ |