Index: components/subresource_filter/core/common/url_pattern_index.cc |
diff --git a/components/subresource_filter/core/common/indexed_ruleset.cc b/components/subresource_filter/core/common/url_pattern_index.cc |
similarity index 70% |
copy from components/subresource_filter/core/common/indexed_ruleset.cc |
copy to components/subresource_filter/core/common/url_pattern_index.cc |
index 66d08f4c4a41cfa0bfbd1487484fa45eed71c2e0..49c3cb919e1e31f82276f9b38513b83ef4a0afcf 100644 |
--- a/components/subresource_filter/core/common/indexed_ruleset.cc |
+++ b/components/subresource_filter/core/common/url_pattern_index.cc |
@@ -1,8 +1,8 @@ |
-// Copyright 2016 The Chromium Authors. All rights reserved. |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
-#include "components/subresource_filter/core/common/indexed_ruleset.h" |
+#include "components/subresource_filter/core/common/url_pattern_index.h" |
#include <algorithm> |
#include <limits> |
@@ -11,10 +11,10 @@ |
#include "base/logging.h" |
#include "base/numerics/safe_conversions.h" |
#include "base/strings/string_util.h" |
-#include "components/subresource_filter/core/common/first_party_origin.h" |
#include "components/subresource_filter/core/common/ngram_extractor.h" |
#include "components/subresource_filter/core/common/url_pattern.h" |
-#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h" |
+#include "url/gurl.h" |
+#include "url/origin.h" |
namespace subresource_filter { |
@@ -49,7 +49,8 @@ class UrlRuleFlatBufferConverter { |
// Creates the converter, and initializes |is_convertible| bit. If |
// |is_convertible| == true, then all the fields, needed for serializing the |
// |rule| to FlatBuffer, are initialized (|options|, |anchor_right|, etc.). |
- UrlRuleFlatBufferConverter(const proto::UrlRule& rule) : rule_(rule) { |
+ explicit UrlRuleFlatBufferConverter(const proto::UrlRule& rule) |
+ : rule_(rule) { |
is_convertible_ = InitializeOptions() && InitializeElementTypes() && |
InitializeActivationTypes() && InitializeUrlPattern() && |
IsMeaningful(); |
@@ -60,12 +61,9 @@ class UrlRuleFlatBufferConverter { |
// this client version. |
bool is_convertible() const { return is_convertible_; } |
- bool has_element_types() const { return !!element_types_; } |
- bool has_activation_types() const { return !!activation_types_; } |
- |
// Writes the URL |rule| to the FlatBuffer using the |builder|, and returns |
// the offset to the serialized rule. |
- flatbuffers::Offset<flat::UrlRule> SerializeConvertedRule( |
+ UrlRuleOffset SerializeConvertedRule( |
flatbuffers::FlatBufferBuilder* builder) const { |
DCHECK(is_convertible()); |
@@ -195,11 +193,9 @@ class UrlRuleFlatBufferConverter { |
"Activation types can not be stored in uint8_t."); |
activation_types_ = static_cast<uint8_t>(rule_.activation_types()); |
- // Ignore unknown activation types. |
- activation_types_ &= proto::ACTIVATION_TYPE_ALL; |
- // No need in CSS activation, because the CSS rules are not supported. |
+ // Only the following activation types are supported, ignore the others. |
activation_types_ &= |
- ~(proto::ACTIVATION_TYPE_ELEMHIDE | proto::ACTIVATION_TYPE_GENERICHIDE); |
+ proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK; |
return true; |
} |
@@ -246,60 +242,71 @@ class UrlRuleFlatBufferConverter { |
} // namespace |
-// RulesetIndexer -------------------------------------------------------------- |
- |
-// static |
-const int RulesetIndexer::kIndexedFormatVersion = 17; |
- |
-RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default; |
-RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default; |
- |
-RulesetIndexer::RulesetIndexer() = default; |
-RulesetIndexer::~RulesetIndexer() = default; |
+// Helpers. -------------------------------------------------------------------- |
-bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) { |
+UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule, |
+ flatbuffers::FlatBufferBuilder* builder) { |
+ DCHECK(builder); |
UrlRuleFlatBufferConverter converter(rule); |
if (!converter.is_convertible()) |
- return false; |
+ return UrlRuleOffset(); |
DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP); |
- auto rule_offset = converter.SerializeConvertedRule(&builder_); |
- |
- auto add_rule_to_index = [&rule, rule_offset](MutableUrlPatternIndex* index) { |
- NGram ngram = |
- GetMostDistinctiveNGram(index->ngram_index, rule.url_pattern()); |
- if (ngram) { |
- index->ngram_index[ngram].push_back(rule_offset); |
- } else { |
- // TODO(pkalinnikov): Index fallback rules as well. |
- index->fallback_rules.push_back(rule_offset); |
- } |
- }; |
+ return converter.SerializeConvertedRule(builder); |
+} |
- if (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST) { |
- add_rule_to_index(&blacklist_); |
+// UrlPatternIndexBuilder ------------------------------------------------------ |
+ |
+UrlPatternIndexBuilder::UrlPatternIndexBuilder( |
+ flatbuffers::FlatBufferBuilder* flat_builder) |
+ : flat_builder_(flat_builder) { |
+ DCHECK(flat_builder_); |
+} |
+ |
+UrlPatternIndexBuilder::~UrlPatternIndexBuilder() = default; |
+ |
+void UrlPatternIndexBuilder::IndexUrlRule(UrlRuleOffset offset) { |
+ DCHECK(offset.o); |
+ |
+ const auto* rule = flatbuffers::GetTemporaryPointer(*flat_builder_, offset); |
+ DCHECK(rule); |
+ NGram ngram = GetMostDistinctiveNGram(ToStringPiece(rule->url_pattern())); |
+ |
+ if (ngram) { |
+ ngram_index_[ngram].push_back(offset); |
} else { |
- if (converter.has_element_types()) |
- add_rule_to_index(&whitelist_); |
- if (converter.has_activation_types()) |
- add_rule_to_index(&activation_); |
+ // TODO(pkalinnikov): Index fallback rules as well. |
+ fallback_rules_.push_back(offset); |
} |
- |
- return true; |
} |
-void RulesetIndexer::Finish() { |
- auto blacklist_offset = SerializeUrlPatternIndex(blacklist_); |
- auto whitelist_offset = SerializeUrlPatternIndex(whitelist_); |
- auto activation_offset = SerializeUrlPatternIndex(activation_); |
+UrlPatternIndexOffset UrlPatternIndexBuilder::Finish() { |
+ std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table( |
+ ngram_index_.table_size()); |
- auto url_rules_index_offset = flat::CreateIndexedRuleset( |
- builder_, blacklist_offset, whitelist_offset, activation_offset); |
- builder_.Finish(url_rules_index_offset); |
+ flatbuffers::Offset<flat::NGramToRules> empty_slot_offset = |
+ flat::CreateNGramToRules(*flat_builder_); |
+ for (size_t i = 0, size = ngram_index_.table_size(); i != size; ++i) { |
+ const uint32_t entry_index = ngram_index_.hash_table()[i]; |
+ if (entry_index >= ngram_index_.size()) { |
+ flat_hash_table[i] = empty_slot_offset; |
+ continue; |
+ } |
+ const MutableNGramIndex::EntryType& entry = |
+ ngram_index_.entries()[entry_index]; |
+ auto rules_offset = flat_builder_->CreateVector(entry.second); |
+ flat_hash_table[i] = |
+ flat::CreateNGramToRules(*flat_builder_, entry.first, rules_offset); |
+ } |
+ auto ngram_index_offset = flat_builder_->CreateVector(flat_hash_table); |
+ |
+ auto fallback_rules_offset = flat_builder_->CreateVector(fallback_rules_); |
+ |
+ return flat::CreateUrlPatternIndex(*flat_builder_, kNGramSize, |
+ ngram_index_offset, empty_slot_offset, |
+ fallback_rules_offset); |
} |
-// static |
-NGram RulesetIndexer::GetMostDistinctiveNGram( |
- const MutableNGramIndex& ngram_index, |
+NGram UrlPatternIndexBuilder::GetMostDistinctiveNGram( |
base::StringPiece pattern) { |
size_t min_list_size = std::numeric_limits<size_t>::max(); |
NGram best_ngram = 0; |
@@ -308,7 +315,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram( |
pattern, [](char c) { return c == '*' || c == '^'; }); |
for (uint64_t ngram : ngrams) { |
- const MutableUrlRuleList* rules = ngram_index.Get(ngram); |
+ const MutableUrlRuleList* rules = ngram_index_.Get(ngram); |
const size_t list_size = rules ? rules->size() : 0; |
if (list_size < min_list_size) { |
// TODO(pkalinnikov): Pick random of the same-sized lists. |
@@ -322,36 +329,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram( |
return best_ngram; |
} |
-flatbuffers::Offset<flat::UrlPatternIndex> |
-RulesetIndexer::SerializeUrlPatternIndex(const MutableUrlPatternIndex& index) { |
- const MutableNGramIndex& ngram_index = index.ngram_index; |
- |
- std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table( |
- ngram_index.table_size()); |
- |
- flatbuffers::Offset<flat::NGramToRules> empty_slot_offset = |
- flat::CreateNGramToRules(builder_); |
- for (size_t i = 0, size = ngram_index.table_size(); i != size; ++i) { |
- const uint32_t entry_index = ngram_index.hash_table()[i]; |
- if (entry_index >= ngram_index.size()) { |
- flat_hash_table[i] = empty_slot_offset; |
- continue; |
- } |
- const MutableNGramIndex::EntryType& entry = |
- ngram_index.entries()[entry_index]; |
- auto rules_offset = builder_.CreateVector(entry.second); |
- flat_hash_table[i] = |
- flat::CreateNGramToRules(builder_, entry.first, rules_offset); |
- } |
- auto ngram_index_offset = builder_.CreateVector(flat_hash_table); |
- |
- auto fallback_rules_offset = builder_.CreateVector(index.fallback_rules); |
- |
- return flat::CreateUrlPatternIndex(builder_, kNGramSize, ngram_index_offset, |
- empty_slot_offset, fallback_rules_offset); |
-} |
- |
-// IndexedRulesetMatcher ------------------------------------------------------- |
+// UrlPatternIndex ------------------------------------------------------------- |
namespace { |
@@ -458,8 +436,8 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule, |
proto::ElementType element_type, |
proto::ActivationType activation_type, |
bool is_third_party) { |
- DCHECK(element_type == proto::ELEMENT_TYPE_UNSPECIFIED || |
- activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED); |
+ DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) != |
+ (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)); |
if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED && |
!(rule.element_types() & element_type)) { |
@@ -482,16 +460,17 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule, |
return true; |
} |
-bool MatchesAny(const FlatUrlRuleList* rules, |
- const GURL& url, |
- const url::Origin& document_origin, |
- proto::ElementType element_type, |
- proto::ActivationType activation_type, |
- bool is_third_party, |
- bool disable_generic_rules) { |
- if (!rules) |
- return false; |
- for (const flat::UrlRule* rule : *rules) { |
+const flat::UrlRule* FindMatchAmongCandidates( |
+ const FlatUrlRuleList* candidates, |
+ const GURL& url, |
+ const url::Origin& document_origin, |
+ proto::ElementType element_type, |
+ proto::ActivationType activation_type, |
+ bool is_third_party, |
+ bool disable_generic_rules) { |
+ if (!candidates) |
+ return nullptr; |
+ for (const flat::UrlRule* rule : *candidates) { |
DCHECK_NE(rule, nullptr); |
DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP); |
if (!DoesRuleFlagsMatch(*rule, element_type, activation_type, |
@@ -503,27 +482,26 @@ bool MatchesAny(const FlatUrlRuleList* rules, |
if (DoesOriginMatchDomainList(document_origin, *rule, |
disable_generic_rules)) { |
- return true; |
+ return rule; |
} |
} |
- return false; |
+ return nullptr; |
} |
-// Returns whether the network request matches a particular part of the index. |
-// |is_third_party| should reflect the relation between |url| and |
-// |document_origin|. |
-bool IsMatch(const flat::UrlPatternIndex* index, |
- const GURL& url, |
- const url::Origin& document_origin, |
- proto::ElementType element_type, |
- proto::ActivationType activation_type, |
- bool is_third_party, |
- bool disable_generic_rules) { |
- if (!index) |
- return false; |
- const FlatNGramIndex* hash_table = index->ngram_index(); |
- const flat::NGramToRules* empty_slot = index->ngram_index_empty_slot(); |
+// Returns whether the network request matches a UrlPattern |index| represented |
+// in its FlatBuffers format. |is_third_party| should reflect the relation |
+// between |url| and |document_origin|. |
+const flat::UrlRule* FindMatchInFlatUrlPatternIndex( |
+ const flat::UrlPatternIndex& index, |
+ const GURL& url, |
+ const url::Origin& document_origin, |
+ proto::ElementType element_type, |
+ proto::ActivationType activation_type, |
+ bool is_third_party, |
+ bool disable_generic_rules) { |
+ const FlatNGramIndex* hash_table = index.ngram_index(); |
+ const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot(); |
DCHECK_NE(hash_table, nullptr); |
NGramHashTableProber prober; |
@@ -543,62 +521,46 @@ bool IsMatch(const flat::UrlPatternIndex* index, |
const flat::NGramToRules* entry = hash_table->Get(slot_index); |
if (entry == empty_slot) |
continue; |
- if (MatchesAny(entry->rule_list(), url, document_origin, element_type, |
- activation_type, is_third_party, disable_generic_rules)) { |
- return true; |
- } |
+ const flat::UrlRule* rule = FindMatchAmongCandidates( |
+ entry->rule_list(), url, document_origin, element_type, activation_type, |
+ is_third_party, disable_generic_rules); |
+ if (rule) |
+ return rule; |
} |
- const FlatUrlRuleList* rules = index->fallback_rules(); |
- return MatchesAny(rules, url, document_origin, element_type, activation_type, |
- is_third_party, disable_generic_rules); |
+ const FlatUrlRuleList* rules = index.fallback_rules(); |
+ return FindMatchAmongCandidates(rules, url, document_origin, element_type, |
+ activation_type, is_third_party, |
+ disable_generic_rules); |
} |
} // namespace |
-// static |
-bool IndexedRulesetMatcher::Verify(const uint8_t* buffer, size_t size) { |
- flatbuffers::Verifier verifier(buffer, size); |
- return flat::VerifyIndexedRulesetBuffer(verifier); |
+UrlPatternIndexMatcher::UrlPatternIndexMatcher( |
+ const flat::UrlPatternIndex* flat_index) |
+ : flat_index_(flat_index) { |
+ DCHECK(!flat_index || flat_index->n() == kNGramSize); |
} |
-IndexedRulesetMatcher::IndexedRulesetMatcher(const uint8_t* buffer, size_t size) |
- : root_(flat::GetIndexedRuleset(buffer)) { |
- const flat::UrlPatternIndex* index = root_->blacklist_index(); |
- DCHECK(!index || index->n() == kNGramSize); |
- index = root_->whitelist_index(); |
- DCHECK(!index || index->n() == kNGramSize); |
-} |
+UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default; |
-bool IndexedRulesetMatcher::ShouldDisableFilteringForDocument( |
- const GURL& document_url, |
- const url::Origin& parent_document_origin, |
- proto::ActivationType activation_type) const { |
- if (!document_url.is_valid() || |
- activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED) { |
- return false; |
- } |
- return IsMatch( |
- root_->activation_index(), document_url, parent_document_origin, |
- proto::ELEMENT_TYPE_UNSPECIFIED, activation_type, |
- FirstPartyOrigin::IsThirdParty(document_url, parent_document_origin), |
- false); |
-} |
- |
-bool IndexedRulesetMatcher::ShouldDisallowResourceLoad( |
+const flat::UrlRule* UrlPatternIndexMatcher::FindMatch( |
const GURL& url, |
- const FirstPartyOrigin& first_party, |
+ const url::Origin& first_party_origin, |
proto::ElementType element_type, |
+ proto::ActivationType activation_type, |
+ bool is_third_party, |
bool disable_generic_rules) const { |
- if (!url.is_valid() || element_type == proto::ELEMENT_TYPE_UNSPECIFIED) |
- return false; |
- const bool is_third_party = first_party.IsThirdParty(url); |
- return IsMatch(root_->blacklist_index(), url, first_party.origin(), |
- element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
- is_third_party, disable_generic_rules) && |
- !IsMatch(root_->whitelist_index(), url, first_party.origin(), |
- element_type, proto::ACTIVATION_TYPE_UNSPECIFIED, |
- is_third_party, disable_generic_rules); |
+ if (!flat_index_ || !url.is_valid()) |
+ return nullptr; |
+ if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) == |
+ (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) { |
+ return nullptr; |
+ } |
+ |
+ return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin, |
+ element_type, activation_type, |
+ is_third_party, disable_generic_rules); |
} |
} // namespace subresource_filter |