| Index: components/subresource_filter/core/common/flat/rules.fbs
|
| diff --git a/components/subresource_filter/core/common/flat/rules.fbs b/components/subresource_filter/core/common/flat/rules.fbs
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..c75502838e49f43e899ce8f4ff09e60655d1bccc
|
| --- /dev/null
|
| +++ b/components/subresource_filter/core/common/flat/rules.fbs
|
| @@ -0,0 +1,117 @@
|
| +namespace subresource_filter.flat;
|
| +
|
| +// Corresponds to subresource_filter::proto::UrlPatternType.
|
| +enum UrlPatternType : ubyte {
|
| + SUBSTRING,
|
| + WILDCARDED,
|
| + REGEXP,
|
| +}
|
| +
|
| +// Corresponds to subresource_filter::proto::AnchorType.
|
| +enum AnchorType : ubyte {
|
| + NONE,
|
| + BOUNDARY,
|
| + SUBDOMAIN,
|
| +}
|
| +
|
| +// URL rule matching options. These correspond to multiple fields of
|
| +// subresource_filter::proto::UrlRule, but here, they are represented as flags
|
| +// of the same bitmask to allow for compact storage.
|
| +enum OptionFlag : ubyte (bit_flags) {
|
| + IS_WHITELIST,
|
| + APPLIES_TO_FIRST_PARTY,
|
| + APPLIES_TO_THIRD_PARTY,
|
| + IS_MATCH_CASE,
|
| +}
|
| +
|
| +// The flat representation of a single URL rule. For more details regarding the
|
| +// fields please see the comments to subresource_filter::proto::UrlRule.
|
| +table UrlRule {
|
| + // Rule matching options, a bitmask consisting of OptionFlags.
|
| + options : ubyte;
|
| +
|
| + // A bitmask of element types, same as proto::UrlRule::element_types. Enables
|
| + // all element types except POPUP by default.
|
| + element_types : ushort = 2047;
|
| +
|
| + // A bitmask of activation types, same as proto::UrlRule::activation_types.
|
| + // Disables all activation types by default.
|
| + activation_types : ubyte = 0;
|
| +
|
| + // Use SUBSTRING as default, since it's the most used pattern type. Same as
|
| + // the corresponding proto::UrlRule::url_pattern_type.
|
| + url_pattern_type : UrlPatternType = SUBSTRING;
|
| +
|
| + // Use NONE as default, since most of the rules are not anchored.
|
| + anchor_left : AnchorType = NONE;
|
| + anchor_right : AnchorType = NONE;
|
| +
|
| + // The list of domains to be included/excluded from the filter's affected set.
|
| + // If a particular string in the list starts with '~' then the respective
|
| + // domain is excluded, otherwise included.
|
| + domains : [string];
|
| +
|
| + // A URL pattern in the format defined by |url_pattern_type|.
|
| + url_pattern : string;
|
| +
|
| + // The compound Knuth-Morris-Pratt failure function corresponding to
|
| + // |url_pattern|. Used for SUBSTRING and WILDCARDED URL patterns only.
|
| + //
|
| + // The |url_pattern| is split into subpatterns separated by a '*' wildcard.
|
| + // Then for each subpattern a failure function of the KMP algorithm is built,
|
| + // with the caveat that if some subpattern contains at least one '^'
|
| + // placeholder, all the separator characters in this subpattern are
|
| + // considered equivalent, and the failure function subarray is prefixed with
|
| + // the value 1.
|
| + //
|
| + // The failure functions of subpatterns are stored sequentially in the
|
| + // |failure_function| array. Some subpatterns, however, will not have a
|
| + // corresponding failure function, e.g. the first subpattern if the rule's
|
| + // |anchor_left| is BOUNDARY.
|
| + failure_function : [ubyte];
|
| +}
|
| +
|
| +// Contains an N-gram (acting as a key in a hash table) and a list of URL rules
|
| +// associated with that N-gram.
|
| +table NGramToRules {
|
| + // A string consisting of N (up to 8) non-special characters, which are stored
|
| + // in the lowest N non-zero bytes, lower bytes corresponding to later symbols.
|
| + ngram : ulong;
|
| +
|
| + // The list of rules containing |ngram| as a substring of their URL pattern.
|
| + rule_list : [UrlRule];
|
| +}
|
| +
|
| +// A data structure used to select only a handful of URL rule candidates that
|
| +// need to be matched against a certain resource URL.
|
| +table UrlPatternIndex {
|
| + // The N of an N-gram index. Note: |n| should be between 1 and 8.
|
| + n : uint;
|
| +
|
| + // A hash table with open addressing. The keys of the table are N-grams.
|
| + ngram_index : [NGramToRules];
|
| +
|
| + // The slot that is pointed to by all empty slots of |ngram_index| hash table.
|
| + // Note: This is a workaround needed because null offsets are not allowed as
|
| + // elements of FlatBuffer arrays.
|
| + ngram_index_empty_slot : NGramToRules;
|
| +
|
| + // A list storing the rules that doesn't contain any valid N-grams in their
|
| + // URL patterns. Contains all the REGEXP rules as well.
|
| + // TODO(pkalinnikov): Think about better implementation for the fallback
|
| + // index. Possibly make it a hash map and maybe merge it with the N-gram
|
| + // index, since we can treat any sequence of characters shorter than N as an
|
| + // N-gram with zero bytes used for padding.
|
| + fallback_rules : [UrlRule];
|
| +}
|
| +
|
| +// The top-level data structure used to store URL rules.
|
| +table IndexedRuleset {
|
| + // The index of all blacklist URL rules.
|
| + blacklist_index : UrlPatternIndex;
|
| +
|
| + // The index of all whitelist URL rules.
|
| + whitelist_index : UrlPatternIndex;
|
| +}
|
| +
|
| +root_type IndexedRuleset;
|
|
|