| Index: components/subresource_filter/core/common/url_pattern_index.cc
|
| diff --git a/components/subresource_filter/core/common/indexed_ruleset.cc b/components/subresource_filter/core/common/url_pattern_index.cc
|
| similarity index 70%
|
| copy from components/subresource_filter/core/common/indexed_ruleset.cc
|
| copy to components/subresource_filter/core/common/url_pattern_index.cc
|
| index 66d08f4c4a41cfa0bfbd1487484fa45eed71c2e0..49c3cb919e1e31f82276f9b38513b83ef4a0afcf 100644
|
| --- a/components/subresource_filter/core/common/indexed_ruleset.cc
|
| +++ b/components/subresource_filter/core/common/url_pattern_index.cc
|
| @@ -1,8 +1,8 @@
|
| -// Copyright 2016 The Chromium Authors. All rights reserved.
|
| +// Copyright 2017 The Chromium Authors. All rights reserved.
|
| // Use of this source code is governed by a BSD-style license that can be
|
| // found in the LICENSE file.
|
|
|
| -#include "components/subresource_filter/core/common/indexed_ruleset.h"
|
| +#include "components/subresource_filter/core/common/url_pattern_index.h"
|
|
|
| #include <algorithm>
|
| #include <limits>
|
| @@ -11,10 +11,10 @@
|
| #include "base/logging.h"
|
| #include "base/numerics/safe_conversions.h"
|
| #include "base/strings/string_util.h"
|
| -#include "components/subresource_filter/core/common/first_party_origin.h"
|
| #include "components/subresource_filter/core/common/ngram_extractor.h"
|
| #include "components/subresource_filter/core/common/url_pattern.h"
|
| -#include "third_party/flatbuffers/src/include/flatbuffers/flatbuffers.h"
|
| +#include "url/gurl.h"
|
| +#include "url/origin.h"
|
|
|
| namespace subresource_filter {
|
|
|
| @@ -49,7 +49,8 @@ class UrlRuleFlatBufferConverter {
|
| // Creates the converter, and initializes |is_convertible| bit. If
|
| // |is_convertible| == true, then all the fields, needed for serializing the
|
| // |rule| to FlatBuffer, are initialized (|options|, |anchor_right|, etc.).
|
| - UrlRuleFlatBufferConverter(const proto::UrlRule& rule) : rule_(rule) {
|
| + explicit UrlRuleFlatBufferConverter(const proto::UrlRule& rule)
|
| + : rule_(rule) {
|
| is_convertible_ = InitializeOptions() && InitializeElementTypes() &&
|
| InitializeActivationTypes() && InitializeUrlPattern() &&
|
| IsMeaningful();
|
| @@ -60,12 +61,9 @@ class UrlRuleFlatBufferConverter {
|
| // this client version.
|
| bool is_convertible() const { return is_convertible_; }
|
|
|
| - bool has_element_types() const { return !!element_types_; }
|
| - bool has_activation_types() const { return !!activation_types_; }
|
| -
|
| // Writes the URL |rule| to the FlatBuffer using the |builder|, and returns
|
| // the offset to the serialized rule.
|
| - flatbuffers::Offset<flat::UrlRule> SerializeConvertedRule(
|
| + UrlRuleOffset SerializeConvertedRule(
|
| flatbuffers::FlatBufferBuilder* builder) const {
|
| DCHECK(is_convertible());
|
|
|
| @@ -195,11 +193,9 @@ class UrlRuleFlatBufferConverter {
|
| "Activation types can not be stored in uint8_t.");
|
| activation_types_ = static_cast<uint8_t>(rule_.activation_types());
|
|
|
| - // Ignore unknown activation types.
|
| - activation_types_ &= proto::ACTIVATION_TYPE_ALL;
|
| - // No need in CSS activation, because the CSS rules are not supported.
|
| + // Only the following activation types are supported, ignore the others.
|
| activation_types_ &=
|
| - ~(proto::ACTIVATION_TYPE_ELEMHIDE | proto::ACTIVATION_TYPE_GENERICHIDE);
|
| + proto::ACTIVATION_TYPE_DOCUMENT | proto::ACTIVATION_TYPE_GENERICBLOCK;
|
|
|
| return true;
|
| }
|
| @@ -246,60 +242,71 @@ class UrlRuleFlatBufferConverter {
|
|
|
| } // namespace
|
|
|
| -// RulesetIndexer --------------------------------------------------------------
|
| -
|
| -// static
|
| -const int RulesetIndexer::kIndexedFormatVersion = 17;
|
| -
|
| -RulesetIndexer::MutableUrlPatternIndex::MutableUrlPatternIndex() = default;
|
| -RulesetIndexer::MutableUrlPatternIndex::~MutableUrlPatternIndex() = default;
|
| -
|
| -RulesetIndexer::RulesetIndexer() = default;
|
| -RulesetIndexer::~RulesetIndexer() = default;
|
| +// Helpers. --------------------------------------------------------------------
|
|
|
| -bool RulesetIndexer::AddUrlRule(const proto::UrlRule& rule) {
|
| +UrlRuleOffset SerializeUrlRule(const proto::UrlRule& rule,
|
| + flatbuffers::FlatBufferBuilder* builder) {
|
| + DCHECK(builder);
|
| UrlRuleFlatBufferConverter converter(rule);
|
| if (!converter.is_convertible())
|
| - return false;
|
| + return UrlRuleOffset();
|
| DCHECK_NE(rule.url_pattern_type(), proto::URL_PATTERN_TYPE_REGEXP);
|
| - auto rule_offset = converter.SerializeConvertedRule(&builder_);
|
| -
|
| - auto add_rule_to_index = [&rule, rule_offset](MutableUrlPatternIndex* index) {
|
| - NGram ngram =
|
| - GetMostDistinctiveNGram(index->ngram_index, rule.url_pattern());
|
| - if (ngram) {
|
| - index->ngram_index[ngram].push_back(rule_offset);
|
| - } else {
|
| - // TODO(pkalinnikov): Index fallback rules as well.
|
| - index->fallback_rules.push_back(rule_offset);
|
| - }
|
| - };
|
| + return converter.SerializeConvertedRule(builder);
|
| +}
|
|
|
| - if (rule.semantics() == proto::RULE_SEMANTICS_BLACKLIST) {
|
| - add_rule_to_index(&blacklist_);
|
| +// UrlPatternIndexBuilder ------------------------------------------------------
|
| +
|
| +UrlPatternIndexBuilder::UrlPatternIndexBuilder(
|
| + flatbuffers::FlatBufferBuilder* flat_builder)
|
| + : flat_builder_(flat_builder) {
|
| + DCHECK(flat_builder_);
|
| +}
|
| +
|
| +UrlPatternIndexBuilder::~UrlPatternIndexBuilder() = default;
|
| +
|
| +void UrlPatternIndexBuilder::IndexUrlRule(UrlRuleOffset offset) {
|
| + DCHECK(offset.o);
|
| +
|
| + const auto* rule = flatbuffers::GetTemporaryPointer(*flat_builder_, offset);
|
| + DCHECK(rule);
|
| + NGram ngram = GetMostDistinctiveNGram(ToStringPiece(rule->url_pattern()));
|
| +
|
| + if (ngram) {
|
| + ngram_index_[ngram].push_back(offset);
|
| } else {
|
| - if (converter.has_element_types())
|
| - add_rule_to_index(&whitelist_);
|
| - if (converter.has_activation_types())
|
| - add_rule_to_index(&activation_);
|
| + // TODO(pkalinnikov): Index fallback rules as well.
|
| + fallback_rules_.push_back(offset);
|
| }
|
| -
|
| - return true;
|
| }
|
|
|
| -void RulesetIndexer::Finish() {
|
| - auto blacklist_offset = SerializeUrlPatternIndex(blacklist_);
|
| - auto whitelist_offset = SerializeUrlPatternIndex(whitelist_);
|
| - auto activation_offset = SerializeUrlPatternIndex(activation_);
|
| +UrlPatternIndexOffset UrlPatternIndexBuilder::Finish() {
|
| + std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table(
|
| + ngram_index_.table_size());
|
|
|
| - auto url_rules_index_offset = flat::CreateIndexedRuleset(
|
| - builder_, blacklist_offset, whitelist_offset, activation_offset);
|
| - builder_.Finish(url_rules_index_offset);
|
| + flatbuffers::Offset<flat::NGramToRules> empty_slot_offset =
|
| + flat::CreateNGramToRules(*flat_builder_);
|
| + for (size_t i = 0, size = ngram_index_.table_size(); i != size; ++i) {
|
| + const uint32_t entry_index = ngram_index_.hash_table()[i];
|
| + if (entry_index >= ngram_index_.size()) {
|
| + flat_hash_table[i] = empty_slot_offset;
|
| + continue;
|
| + }
|
| + const MutableNGramIndex::EntryType& entry =
|
| + ngram_index_.entries()[entry_index];
|
| + auto rules_offset = flat_builder_->CreateVector(entry.second);
|
| + flat_hash_table[i] =
|
| + flat::CreateNGramToRules(*flat_builder_, entry.first, rules_offset);
|
| + }
|
| + auto ngram_index_offset = flat_builder_->CreateVector(flat_hash_table);
|
| +
|
| + auto fallback_rules_offset = flat_builder_->CreateVector(fallback_rules_);
|
| +
|
| + return flat::CreateUrlPatternIndex(*flat_builder_, kNGramSize,
|
| + ngram_index_offset, empty_slot_offset,
|
| + fallback_rules_offset);
|
| }
|
|
|
| -// static
|
| -NGram RulesetIndexer::GetMostDistinctiveNGram(
|
| - const MutableNGramIndex& ngram_index,
|
| +NGram UrlPatternIndexBuilder::GetMostDistinctiveNGram(
|
| base::StringPiece pattern) {
|
| size_t min_list_size = std::numeric_limits<size_t>::max();
|
| NGram best_ngram = 0;
|
| @@ -308,7 +315,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram(
|
| pattern, [](char c) { return c == '*' || c == '^'; });
|
|
|
| for (uint64_t ngram : ngrams) {
|
| - const MutableUrlRuleList* rules = ngram_index.Get(ngram);
|
| + const MutableUrlRuleList* rules = ngram_index_.Get(ngram);
|
| const size_t list_size = rules ? rules->size() : 0;
|
| if (list_size < min_list_size) {
|
| // TODO(pkalinnikov): Pick random of the same-sized lists.
|
| @@ -322,36 +329,7 @@ NGram RulesetIndexer::GetMostDistinctiveNGram(
|
| return best_ngram;
|
| }
|
|
|
| -flatbuffers::Offset<flat::UrlPatternIndex>
|
| -RulesetIndexer::SerializeUrlPatternIndex(const MutableUrlPatternIndex& index) {
|
| - const MutableNGramIndex& ngram_index = index.ngram_index;
|
| -
|
| - std::vector<flatbuffers::Offset<flat::NGramToRules>> flat_hash_table(
|
| - ngram_index.table_size());
|
| -
|
| - flatbuffers::Offset<flat::NGramToRules> empty_slot_offset =
|
| - flat::CreateNGramToRules(builder_);
|
| - for (size_t i = 0, size = ngram_index.table_size(); i != size; ++i) {
|
| - const uint32_t entry_index = ngram_index.hash_table()[i];
|
| - if (entry_index >= ngram_index.size()) {
|
| - flat_hash_table[i] = empty_slot_offset;
|
| - continue;
|
| - }
|
| - const MutableNGramIndex::EntryType& entry =
|
| - ngram_index.entries()[entry_index];
|
| - auto rules_offset = builder_.CreateVector(entry.second);
|
| - flat_hash_table[i] =
|
| - flat::CreateNGramToRules(builder_, entry.first, rules_offset);
|
| - }
|
| - auto ngram_index_offset = builder_.CreateVector(flat_hash_table);
|
| -
|
| - auto fallback_rules_offset = builder_.CreateVector(index.fallback_rules);
|
| -
|
| - return flat::CreateUrlPatternIndex(builder_, kNGramSize, ngram_index_offset,
|
| - empty_slot_offset, fallback_rules_offset);
|
| -}
|
| -
|
| -// IndexedRulesetMatcher -------------------------------------------------------
|
| +// UrlPatternIndex -------------------------------------------------------------
|
|
|
| namespace {
|
|
|
| @@ -458,8 +436,8 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
|
| proto::ElementType element_type,
|
| proto::ActivationType activation_type,
|
| bool is_third_party) {
|
| - DCHECK(element_type == proto::ELEMENT_TYPE_UNSPECIFIED ||
|
| - activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED);
|
| + DCHECK((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) !=
|
| + (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED));
|
|
|
| if (element_type != proto::ELEMENT_TYPE_UNSPECIFIED &&
|
| !(rule.element_types() & element_type)) {
|
| @@ -482,16 +460,17 @@ bool DoesRuleFlagsMatch(const flat::UrlRule& rule,
|
| return true;
|
| }
|
|
|
| -bool MatchesAny(const FlatUrlRuleList* rules,
|
| - const GURL& url,
|
| - const url::Origin& document_origin,
|
| - proto::ElementType element_type,
|
| - proto::ActivationType activation_type,
|
| - bool is_third_party,
|
| - bool disable_generic_rules) {
|
| - if (!rules)
|
| - return false;
|
| - for (const flat::UrlRule* rule : *rules) {
|
| +const flat::UrlRule* FindMatchAmongCandidates(
|
| + const FlatUrlRuleList* candidates,
|
| + const GURL& url,
|
| + const url::Origin& document_origin,
|
| + proto::ElementType element_type,
|
| + proto::ActivationType activation_type,
|
| + bool is_third_party,
|
| + bool disable_generic_rules) {
|
| + if (!candidates)
|
| + return nullptr;
|
| + for (const flat::UrlRule* rule : *candidates) {
|
| DCHECK_NE(rule, nullptr);
|
| DCHECK_NE(rule->url_pattern_type(), flat::UrlPatternType_REGEXP);
|
| if (!DoesRuleFlagsMatch(*rule, element_type, activation_type,
|
| @@ -503,27 +482,26 @@ bool MatchesAny(const FlatUrlRuleList* rules,
|
|
|
| if (DoesOriginMatchDomainList(document_origin, *rule,
|
| disable_generic_rules)) {
|
| - return true;
|
| + return rule;
|
| }
|
| }
|
|
|
| - return false;
|
| + return nullptr;
|
| }
|
|
|
| -// Returns whether the network request matches a particular part of the index.
|
| -// |is_third_party| should reflect the relation between |url| and
|
| -// |document_origin|.
|
| -bool IsMatch(const flat::UrlPatternIndex* index,
|
| - const GURL& url,
|
| - const url::Origin& document_origin,
|
| - proto::ElementType element_type,
|
| - proto::ActivationType activation_type,
|
| - bool is_third_party,
|
| - bool disable_generic_rules) {
|
| - if (!index)
|
| - return false;
|
| - const FlatNGramIndex* hash_table = index->ngram_index();
|
| - const flat::NGramToRules* empty_slot = index->ngram_index_empty_slot();
|
| +// Returns whether the network request matches a UrlPattern |index| represented
|
| +// in its FlatBuffers format. |is_third_party| should reflect the relation
|
| +// between |url| and |document_origin|.
|
| +const flat::UrlRule* FindMatchInFlatUrlPatternIndex(
|
| + const flat::UrlPatternIndex& index,
|
| + const GURL& url,
|
| + const url::Origin& document_origin,
|
| + proto::ElementType element_type,
|
| + proto::ActivationType activation_type,
|
| + bool is_third_party,
|
| + bool disable_generic_rules) {
|
| + const FlatNGramIndex* hash_table = index.ngram_index();
|
| + const flat::NGramToRules* empty_slot = index.ngram_index_empty_slot();
|
| DCHECK_NE(hash_table, nullptr);
|
|
|
| NGramHashTableProber prober;
|
| @@ -543,62 +521,46 @@ bool IsMatch(const flat::UrlPatternIndex* index,
|
| const flat::NGramToRules* entry = hash_table->Get(slot_index);
|
| if (entry == empty_slot)
|
| continue;
|
| - if (MatchesAny(entry->rule_list(), url, document_origin, element_type,
|
| - activation_type, is_third_party, disable_generic_rules)) {
|
| - return true;
|
| - }
|
| + const flat::UrlRule* rule = FindMatchAmongCandidates(
|
| + entry->rule_list(), url, document_origin, element_type, activation_type,
|
| + is_third_party, disable_generic_rules);
|
| + if (rule)
|
| + return rule;
|
| }
|
|
|
| - const FlatUrlRuleList* rules = index->fallback_rules();
|
| - return MatchesAny(rules, url, document_origin, element_type, activation_type,
|
| - is_third_party, disable_generic_rules);
|
| + const FlatUrlRuleList* rules = index.fallback_rules();
|
| + return FindMatchAmongCandidates(rules, url, document_origin, element_type,
|
| + activation_type, is_third_party,
|
| + disable_generic_rules);
|
| }
|
|
|
| } // namespace
|
|
|
| -// static
|
| -bool IndexedRulesetMatcher::Verify(const uint8_t* buffer, size_t size) {
|
| - flatbuffers::Verifier verifier(buffer, size);
|
| - return flat::VerifyIndexedRulesetBuffer(verifier);
|
| +UrlPatternIndexMatcher::UrlPatternIndexMatcher(
|
| + const flat::UrlPatternIndex* flat_index)
|
| + : flat_index_(flat_index) {
|
| + DCHECK(!flat_index || flat_index->n() == kNGramSize);
|
| }
|
|
|
| -IndexedRulesetMatcher::IndexedRulesetMatcher(const uint8_t* buffer, size_t size)
|
| - : root_(flat::GetIndexedRuleset(buffer)) {
|
| - const flat::UrlPatternIndex* index = root_->blacklist_index();
|
| - DCHECK(!index || index->n() == kNGramSize);
|
| - index = root_->whitelist_index();
|
| - DCHECK(!index || index->n() == kNGramSize);
|
| -}
|
| +UrlPatternIndexMatcher::~UrlPatternIndexMatcher() = default;
|
|
|
| -bool IndexedRulesetMatcher::ShouldDisableFilteringForDocument(
|
| - const GURL& document_url,
|
| - const url::Origin& parent_document_origin,
|
| - proto::ActivationType activation_type) const {
|
| - if (!document_url.is_valid() ||
|
| - activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED) {
|
| - return false;
|
| - }
|
| - return IsMatch(
|
| - root_->activation_index(), document_url, parent_document_origin,
|
| - proto::ELEMENT_TYPE_UNSPECIFIED, activation_type,
|
| - FirstPartyOrigin::IsThirdParty(document_url, parent_document_origin),
|
| - false);
|
| -}
|
| -
|
| -bool IndexedRulesetMatcher::ShouldDisallowResourceLoad(
|
| +const flat::UrlRule* UrlPatternIndexMatcher::FindMatch(
|
| const GURL& url,
|
| - const FirstPartyOrigin& first_party,
|
| + const url::Origin& first_party_origin,
|
| proto::ElementType element_type,
|
| + proto::ActivationType activation_type,
|
| + bool is_third_party,
|
| bool disable_generic_rules) const {
|
| - if (!url.is_valid() || element_type == proto::ELEMENT_TYPE_UNSPECIFIED)
|
| - return false;
|
| - const bool is_third_party = first_party.IsThirdParty(url);
|
| - return IsMatch(root_->blacklist_index(), url, first_party.origin(),
|
| - element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
|
| - is_third_party, disable_generic_rules) &&
|
| - !IsMatch(root_->whitelist_index(), url, first_party.origin(),
|
| - element_type, proto::ACTIVATION_TYPE_UNSPECIFIED,
|
| - is_third_party, disable_generic_rules);
|
| + if (!flat_index_ || !url.is_valid())
|
| + return nullptr;
|
| + if ((element_type == proto::ELEMENT_TYPE_UNSPECIFIED) ==
|
| + (activation_type == proto::ACTIVATION_TYPE_UNSPECIFIED)) {
|
| + return nullptr;
|
| + }
|
| +
|
| + return FindMatchInFlatUrlPatternIndex(*flat_index_, url, first_party_origin,
|
| + element_type, activation_type,
|
| + is_third_party, disable_generic_rules);
|
| }
|
|
|
| } // namespace subresource_filter
|
|
|