Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(712)

Unified Diff: components/subresource_filter/core/common/indexed_ruleset.h

Issue 2844293003: Factor out UrlPatternIndex from IndexedRuleset. (Closed)
Patch Set: Address final nits. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/subresource_filter/core/common/indexed_ruleset.h
diff --git a/components/subresource_filter/core/common/indexed_ruleset.h b/components/subresource_filter/core/common/indexed_ruleset.h
index e5206d783fae93e12a56d9ad643647cbc1e228a5..00588f8a0a76514dd8699da5e167fb5ff4c1a881 100644
--- a/components/subresource_filter/core/common/indexed_ruleset.h
+++ b/components/subresource_filter/core/common/indexed_ruleset.h
@@ -5,33 +5,28 @@
#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_INDEXED_RULESET_H_
#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_INDEXED_RULESET_H_
+#include <stddef.h>
#include <stdint.h>
-#include <vector>
-
#include "base/macros.h"
#include "base/numerics/safe_conversions.h"
-#include "base/strings/string_piece.h"
-#include "components/subresource_filter/core/common/closed_hash_map.h"
-#include "components/subresource_filter/core/common/flat/rules_generated.h"
-#include "components/subresource_filter/core/common/proto/rules.pb.h"
-#include "components/subresource_filter/core/common/uint64_hasher.h"
-#include "url/gurl.h"
-#include "url/origin.h"
+#include "components/subresource_filter/core/common/flat/indexed_ruleset_generated.h"
+#include "components/subresource_filter/core/common/url_pattern_index.h"
+#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h"
+
+class GURL;
+
+namespace url {
+class Origin;
+}
namespace subresource_filter {
class FirstPartyOrigin;
-// The integer type used to represent N-grams.
-using NGram = uint64_t;
-// The hasher used for hashing N-grams.
-using NGramHasher = Uint64Hasher;
-// The hash table probe sequence used both by the ruleset builder and matcher.
-using NGramHashTableProber = DefaultProber<NGram, NGramHasher>;
-
-constexpr size_t kNGramSize = 5;
-static_assert(kNGramSize <= sizeof(NGram), "NGram type is too narrow.");
+namespace proto {
+class UrlRule;
+}
// The class used to construct flat data structures representing the set of URL
// filtering rules, as well as the index of those. Internally owns a
@@ -66,45 +61,12 @@ class RulesetIndexer {
size_t size() const { return base::strict_cast<size_t>(builder_.GetSize()); }
private:
- using MutableUrlRuleList = std::vector<flatbuffers::Offset<flat::UrlRule>>;
- using MutableNGramIndex =
- ClosedHashMap<NGram, MutableUrlRuleList, NGramHashTableProber>;
-
- // Encapsulates a subset of the rules, and an index built on the URL patterns
- // in these rules. The ruleset is divided into parts according to metadata of
- // the rules. Currently there are two parts: blacklist and whitelist.
- struct MutableUrlPatternIndex {
- // This index contains all non-REGEXP rules that have at least one
- // acceptable N-gram. For a single rule the N-gram used as an index key is
- // picked greedily (see GetMostDistinctiveNGram).
- MutableNGramIndex ngram_index;
-
- // A fallback list that contains all the rules with no acceptable N-gram,
- // and all the REGEXP rules.
- MutableUrlRuleList fallback_rules;
-
- MutableUrlPatternIndex();
- ~MutableUrlPatternIndex();
- };
-
- // Returns an N-gram of the |pattern| encoded into the NGram integer type. The
- // N-gram is picked using a greedy heuristic, i.e. the one is chosen which
- // corresponds to the shortest list of rules within the |index|. If there are
- // no valid N-grams in the |pattern|, the return value is 0.
- static NGram GetMostDistinctiveNGram(const MutableNGramIndex& index,
- base::StringPiece pattern);
-
- // Serialized an |index| built over a part of the ruleset, and returns its
- // offset in the FlatBuffer.
- flatbuffers::Offset<flat::UrlPatternIndex> SerializeUrlPatternIndex(
- const MutableUrlPatternIndex& index);
-
- MutableUrlPatternIndex blacklist_;
- MutableUrlPatternIndex whitelist_;
- MutableUrlPatternIndex activation_;
-
flatbuffers::FlatBufferBuilder builder_;
+ UrlPatternIndexBuilder blacklist_;
+ UrlPatternIndexBuilder whitelist_;
+ UrlPatternIndexBuilder deactivation_;
+
DISALLOW_COPY_AND_ASSIGN(RulesetIndexer);
};
@@ -142,6 +104,10 @@ class IndexedRulesetMatcher {
private:
const flat::IndexedRuleset* root_;
+ UrlPatternIndexMatcher blacklist_;
+ UrlPatternIndexMatcher whitelist_;
+ UrlPatternIndexMatcher deactivation_;
+
DISALLOW_COPY_AND_ASSIGN(IndexedRulesetMatcher);
};

Powered by Google App Engine
This is Rietveld 408576698