Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Unified Diff: components/subresource_filter/core/common/url_pattern_index.cc

Issue 2844293003: Factor out UrlPatternIndex from IndexedRuleset. (Closed)
Patch Set: Address final nits. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/subresource_filter/core/common/url_pattern_index.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/subresource_filter/core/common/url_pattern_index.cc
diff --git a/components/subresource_filter/core/common/indexed_ruleset.cc b/components/subresource_filter/core/common/url_pattern_index.cc
similarity index 70%
copy from components/subresource_filter/core/common/indexed_ruleset.cc
copy to components/subresource_filter/core/common/url_pattern_index.cc
index 66d08f4c4a41cfa0bfbd1487484fa45eed71c2e0..49c3cb919e1e31f82276f9b38513b83ef4a0afcf 100644
--- a/components/subresource_filter/core/common/indexed_ruleset.cc
+++ b/components/subresource_filter/core/common/url_pattern_index.cc
@@ -1,8 +1,8 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
+// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "components/subresource_filter/core/common/indexed_ruleset.h"
+#include "components/subresource_filter/core/common/url_pattern_index.h"
#include <algorithm>
#include <limits>
@@ -11,10 +11,10 @@
#include "base/logging.h"
#include "base/numerics/safe_conversions.h"
#include "base/strings/string_util.h"
-#include "components/subresource_filter/core/common/first_party_origin.h"
#include "components/subresource_filter/core/common/ngram_extractor.h"
#include "components/subresource_filter/core/common/url_pattern.h"
-#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h"
+#include "url/gurl.h"
+#include "url/origin.h"
namespace subresource_filter {
@@ -49,7 +49,8 @@ class UrlRuleFlatBufferConverter {
// Creates the converter, and initializes |is_convertible| bit. If
// |is_convertible| == true, then all the fields, needed for serializing the
// |rule| to FlatBuffer, are initialized (|options|, |anchor_right|, etc.).
- UrlRuleFlatBufferConverter(const proto::UrlRule& rule) : rule_(rule) {
+ explicit UrlRuleFlatBufferConverter(const proto::UrlRule& rule)
+ : rule_(rule) {
is_convertible_ = InitializeOptions() && InitializeElementTypes() &&
InitializeActivationTypes() && InitializeUrlPattern() &&
IsMeaningful();
@@ -60,12 +61,9 @@ class UrlRuleFlatBufferConverter {
// this client version.
bool is_convertible() const { return is_convertible_; }
- bool has_element_types() const { return !!element_types_; }
- bool has_activation_types() const { return !!activation_types_; }
-
// Writes the URL |rule| to the FlatBuffer using the |builder|, and returns
// the offset to the serialized rule.
- flatbuffers::Offset<flat::UrlRule> SerializeConvertedRule(
+ UrlRuleOffset SerializeConvertedRule(
flatbuffers::FlatBufferBuilder* builder) const {
DCHECK(is_convertible());
@@ -195,11 +193,9 @@ class UrlRuleFlatBufferConverter {
"Activation types can not be stored in uint8_t.");
activation_types_ = static_cast<uint8_t>(rule_.activation_types());
- // Ignore unknown activation types.
- activation_types_ &= proto::ACTIVATION_TYPE_ALL;
- // No need in CSS activation, because the CSS rules are not supported.
+ // Only the following activation types are supported, ignore the others.
activation_types_ &=
- ~(proto::ACTIVATION_TYPE_ELEMHIDE | proto::ACTIVATION_TYPE_GENERICHIDE);
+ proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK;
return true;
}
@@ -246,60 +242,71 @@ class UrlRuleFlatBufferConverter {
} // namespace
-// RulesetIndexer --------------------------------------------------------------
-
-// static
-const int RulesetIndexer::kIndexedFormatVersion = 17;
-
-RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default;
-RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default;
-
-RulesetIndexer::RulesetIndexer() = default;
-RulesetIndexer::~RulesetIndexer() = default;
+// Helpers. --------------------------------------------------------------------
-bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) {
+UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule,
+ flatbuffers::FlatBufferBuilder* builder) {
+ DCHECK(builder);
UrlRuleFlatBufferConverter converter(rule);
if (!converter.is_convertible())
- return false;
+ return UrlRuleOffset();
DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP);
- auto rule_offset = converter.SerializeConvertedRule(&builder_);
-
- auto add_rule_to_index = [&rule, rule_offset](MutableUrlPatternIndex* index) {
- NGram ngram =
- GetMostDistinctiveNGram(index->ngram_index, rule.url_pattern());
- if (ngram) {
- index->ngram_index[ngram].push_back(rule_offset);
- } else {
- // TODO(pkalinnikov): Index fallback rules as well.
- index->fallback_rules.push_back(rule_offset);
- }
- };
+ return converter.SerializeConvertedRule(builder);
+}
- if (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST) {
- add_rule_to_index(&blacklist_);
+// UrlPatternIndexBuilder ------------------------------------------------------
+
+UrlPatternIndexBuilder::UrlPatternIndexBuilder(
+ flatbuffers::FlatBufferBuilder* flat_builder)
+ : flat_builder_(flat_builder) {
+ DCHECK(flat_builder_);
+}
+
+UrlPatternIndexBuilder::~UrlPatternIndexBuilder() = default;
+
+void UrlPatternIndexBuilder::IndexUrlRule(UrlRuleOffset offset) {
+ DCHECK(offset.o);
+
+ const auto* rule = flatbuffers::GetTemporaryPointer(*flat_builder_, offset);
+ DCHECK(rule);
+ NGram ngram = GetMostDistinctiveNGram(ToStringPiece(rule->url_pattern()));
+
+ if (ngram) {
+ ngram_index_[ngram].push_back(offset);
} else {
- if (converter.has_element_types())
- add_rule_to_index(&whitelist_);
- if (converter.has_activation_types())
- add_rule_to_index(&activation_);
+ // TODO(pkalinnikov): Index fallback rules as well.
+ fallback_rules_.push_back(offset);
}
-
- return true;
}
-void RulesetIndexer::Finish() {
- auto blacklist_offset = SerializeUrlPatternIndex(blacklist_);
- auto whitelist_offset = SerializeUrlPatternIndex(whitelist_);
- auto activation_offset = SerializeUrlPatternIndex(activation_);
+UrlPatternIndexOffset UrlPatternIndexBuilder::Finish() {
+ std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table(
+ ngram_index_.table_size());
- auto url_rules_index_offset = flat::CreateIndexedRuleset(
- builder_, blacklist_offset, whitelist_offset, activation_offset);
- builder_.Finish(url_rules_index_offset);
+ flatbuffers::Offset<flat::NGramToRules> empty_slot_offset =
+ flat::CreateNGramToRules(*flat_builder_);
+ for (size_t i = 0, size = ngram_index_.table_size(); i != size; ++i) {
+ const uint32_t entry_index = ngram_index_.hash_table()[i];
+ if (entry_index >= ngram_index_.size()) {
+ flat_hash_table[i] = empty_slot_offset;
+ continue;
+ }
+ const MutableNGramIndex::EntryType& entry =
+ ngram_index_.entries()[entry_index];
+ auto rules_offset = flat_builder_->CreateVector(entry.second);
+ flat_hash_table[i] =
+ flat::CreateNGramToRules(*flat_builder_, entry.first, rules_offset);
+ }
+ auto ngram_index_offset = flat_builder_->CreateVector(flat_hash_table);
+
+ auto fallback_rules_offset = flat_builder_->CreateVector(fallback_rules_);
+
+ return flat::CreateUrlPatternIndex(*flat_builder_, kNGramSize,
+ ngram_index_offset, empty_slot_offset,
+ fallback_rules_offset);
}
-// static
-NGram RulesetIndexer::GetMostDistinctiveNGram(
- const MutableNGramIndex& ngram_index,
+NGram UrlPatternIndexBuilder::GetMostDistinctiveNGram(
base::StringPiece pattern) {
size_t min_list_size = std::numeric_limits<size_t>::max();
NGram best_ngram = 0;
@@ -308,7 +315,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram(
pattern, [](char c) { return c == '*' || c == '^'; });
for (uint64_t ngram : ngrams) {
- const MutableUrlRuleList* rules = ngram_index.Get(ngram);
+ const MutableUrlRuleList* rules = ngram_index_.Get(ngram);
const size_t list_size = rules ? rules->size() : 0;
if (list_size < min_list_size) {
// TODO(pkalinnikov): Pick random of the same-sized lists.
@@ -322,36 +329,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram(
return best_ngram;
}
-flatbuffers::Offset<flat::UrlPatternIndex>
-RulesetIndexer::SerializeUrlPatternIndex(const MutableUrlPatternIndex& index) {
- const MutableNGramIndex& ngram_index = index.ngram_index;
-
- std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table(
- ngram_index.table_size());
-
- flatbuffers::Offset<flat::NGramToRules> empty_slot_offset =
- flat::CreateNGramToRules(builder_);
- for (size_t i = 0, size = ngram_index.table_size(); i != size; ++i) {
- const uint32_t entry_index = ngram_index.hash_table()[i];
- if (entry_index >= ngram_index.size()) {
- flat_hash_table[i] = empty_slot_offset;
- continue;
- }
- const MutableNGramIndex::EntryType& entry =
- ngram_index.entries()[entry_index];
- auto rules_offset = builder_.CreateVector(entry.second);
- flat_hash_table[i] =
- flat::CreateNGramToRules(builder_, entry.first, rules_offset);
- }
- auto ngram_index_offset = builder_.CreateVector(flat_hash_table);
-
- auto fallback_rules_offset = builder_.CreateVector(index.fallback_rules);
-
- return flat::CreateUrlPatternIndex(builder_, kNGramSize, ngram_index_offset,
- empty_slot_offset, fallback_rules_offset);
-}
-
-// IndexedRulesetMatcher -------------------------------------------------------
+// UrlPatternIndex -------------------------------------------------------------
namespace {
@@ -458,8 +436,8 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
proto::ElementType element_type,
proto::ActivationType activation_type,
bool is_third_party) {
- DCHECK(element_type == proto::ELEMENT_TYPE_UNSPECIFIED ||
- activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED);
+ DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) !=
+ (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED));
if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED &&
!(rule.element_types() & element_type)) {
@@ -482,16 +460,17 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
return true;
}
-bool MatchesAny(const FlatUrlRuleList* rules,
- const GURL& url,
- const url::Origin& document_origin,
- proto::ElementType element_type,
- proto::ActivationType activation_type,
- bool is_third_party,
- bool disable_generic_rules) {
- if (!rules)
- return false;
- for (const flat::UrlRule* rule : *rules) {
+const flat::UrlRule* FindMatchAmongCandidates(
+ const FlatUrlRuleList* candidates,
+ const GURL& url,
+ const url::Origin& document_origin,
+ proto::ElementType element_type,
+ proto::ActivationType activation_type,
+ bool is_third_party,
+ bool disable_generic_rules) {
+ if (!candidates)
+ return nullptr;
+ for (const flat::UrlRule* rule : *candidates) {
DCHECK_NE(rule, nullptr);
DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP);
if (!DoesRuleFlagsMatch(*rule, element_type, activation_type,
@@ -503,27 +482,26 @@ bool MatchesAny(const FlatUrlRuleList* rules,
if (DoesOriginMatchDomainList(document_origin, *rule,
disable_generic_rules)) {
- return true;
+ return rule;
}
}
- return false;
+ return nullptr;
}
-// Returns whether the network request matches a particular part of the index.
-// |is_third_party| should reflect the relation between |url| and
-// |document_origin|.
-bool IsMatch(const flat::UrlPatternIndex* index,
- const GURL& url,
- const url::Origin& document_origin,
- proto::ElementType element_type,
- proto::ActivationType activation_type,
- bool is_third_party,
- bool disable_generic_rules) {
- if (!index)
- return false;
- const FlatNGramIndex* hash_table = index->ngram_index();
- const flat::NGramToRules* empty_slot = index->ngram_index_empty_slot();
+// Returns whether the network request matches a UrlPattern |index| represented
+// in its FlatBuffers format. |is_third_party| should reflect the relation
+// between |url| and |document_origin|.
+const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
+ const flat::UrlPatternIndex& index,
+ const GURL& url,
+ const url::Origin& document_origin,
+ proto::ElementType element_type,
+ proto::ActivationType activation_type,
+ bool is_third_party,
+ bool disable_generic_rules) {
+ const FlatNGramIndex* hash_table = index.ngram_index();
+ const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
DCHECK_NE(hash_table, nullptr);
NGramHashTableProber prober;
@@ -543,62 +521,46 @@ bool IsMatch(const flat::UrlPatternIndex* index,
const flat::NGramToRules* entry = hash_table->Get(slot_index);
if (entry == empty_slot)
continue;
- if (MatchesAny(entry->rule_list(), url, document_origin, element_type,
- activation_type, is_third_party, disable_generic_rules)) {
- return true;
- }
+ const flat::UrlRule* rule = FindMatchAmongCandidates(
+ entry->rule_list(), url, document_origin, element_type, activation_type,
+ is_third_party, disable_generic_rules);
+ if (rule)
+ return rule;
}
- const FlatUrlRuleList* rules = index->fallback_rules();
- return MatchesAny(rules, url, document_origin, element_type, activation_type,
- is_third_party, disable_generic_rules);
+ const FlatUrlRuleList* rules = index.fallback_rules();
+ return FindMatchAmongCandidates(rules, url, document_origin, element_type,
+ activation_type, is_third_party,
+ disable_generic_rules);
}
} // namespace
-// static
-bool IndexedRulesetMatcher::Verify(const uint8_t* buffer, size_t size) {
- flatbuffers::Verifier verifier(buffer, size);
- return flat::VerifyIndexedRulesetBuffer(verifier);
+UrlPatternIndexMatcher::UrlPatternIndexMatcher(
+ const flat::UrlPatternIndex* flat_index)
+ : flat_index_(flat_index) {
+ DCHECK(!flat_index || flat_index->n() == kNGramSize);
}
-IndexedRulesetMatcher::IndexedRulesetMatcher(const uint8_t* buffer, size_t size)
- : root_(flat::GetIndexedRuleset(buffer)) {
- const flat::UrlPatternIndex* index = root_->blacklist_index();
- DCHECK(!index || index->n() == kNGramSize);
- index = root_->whitelist_index();
- DCHECK(!index || index->n() == kNGramSize);
-}
+UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default;
-bool IndexedRulesetMatcher::ShouldDisableFilteringForDocument(
- const GURL& document_url,
- const url::Origin& parent_document_origin,
- proto::ActivationType activation_type) const {
- if (!document_url.is_valid() ||
- activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED) {
- return false;
- }
- return IsMatch(
- root_->activation_index(), document_url, parent_document_origin,
- proto::ELEMENT_TYPE_UNSPECIFIED, activation_type,
- FirstPartyOrigin::IsThirdParty(document_url, parent_document_origin),
- false);
-}
-
-bool IndexedRulesetMatcher::ShouldDisallowResourceLoad(
+const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
const GURL& url,
- const FirstPartyOrigin& first_party,
+ const url::Origin& first_party_origin,
proto::ElementType element_type,
+ proto::ActivationType activation_type,
+ bool is_third_party,
bool disable_generic_rules) const {
- if (!url.is_valid() || element_type == proto::ELEMENT_TYPE_UNSPECIFIED)
- return false;
- const bool is_third_party = first_party.IsThirdParty(url);
- return IsMatch(root_->blacklist_index(), url, first_party.origin(),
- element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
- is_third_party, disable_generic_rules) &&
- !IsMatch(root_->whitelist_index(), url, first_party.origin(),
- element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
- is_third_party, disable_generic_rules);
+ if (!flat_index_ || !url.is_valid())
+ return nullptr;
+ if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) ==
+ (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) {
+ return nullptr;
+ }
+
+ return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin,
+ element_type, activation_type,
+ is_third_party, disable_generic_rules);
}
} // namespace subresource_filter
« no previous file with comments | « components/subresource_filter/core/common/url_pattern_index.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698