| Index: components/subresource_filter/core/common/url_pattern_matching.h
|
| diff --git a/components/subresource_filter/core/common/url_pattern_matching.h b/components/subresource_filter/core/common/url_pattern_matching.h
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..74c6a3a0ece196334f9f08cfdcf5186fd28ad7fa
|
| --- /dev/null
|
| +++ b/components/subresource_filter/core/common/url_pattern_matching.h
|
| @@ -0,0 +1,61 @@
|
| +// Copyright 2016 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +// The matching logic distinguishes between the terms URL pattern and
|
| +// subpattern. A URL pattern usually stands for the full thing, e.g.
|
| +// "example.com^*path*par=val^", whereas subpattern denotes a maximal substring
|
| +// of a pattern not containing the wildcard '*' character. For the example above
|
| +// the subpatterns are: "example.com^", "path" and "par=val^".
|
| +//
|
| +// The separator placeholder '^' symbol is used in subpatterns to match any
|
| +// separator character, which is any ASCII symbol except letters, digits, and
|
| +// the following: '_', '-', '.', '%'. Note that the separator placeholder
|
| +// character '^' is itself a separator, as well as '\0'.
|
| +
|
| +#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_MATCHING_H_
|
| +#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_MATCHING_H_
|
| +
|
| +#include <stddef.h>
|
| +
|
| +#include <vector>
|
| +
|
| +#include "base/strings/string_piece.h"
|
| +#include "url/gurl.h"
|
| +
|
| +namespace subresource_filter {
|
| +
|
| +struct UrlPattern;
|
| +
|
| +// Builds a compound Knuth-Morris-Pratt failure function used to match URLs
|
| +// against the |pattern|.
|
| +//
|
| +// The |pattern| is split on the '*' wildcard symbol and then a failure function
|
| +// is built for each subpattern by BuildFailureFunctionFuzzy or
|
| +// BuildFailureFunction (depending on whether the subpattern contains separator
|
| +// placeholders), and appended to the returned vector. Some of the subpatterns
|
| +// can be exempted from being indexed. E.g., if the |pattern| has a BOUNDARY
|
| +// left anchor, the first subpattern can be matched by checking if it's a prefix
|
| +// of a URL.
|
| +//
|
| +// Each subpattern indexed with BuildFailureFunctionFuzzy is prepended with a
|
| +// value 1 (to distinguish them from the subpatterns indexed with
|
| +// BuildFailureFunction, their failure functions always start with 0).
|
| +//
|
| +// The URL |pattern| must be normalized. Namely, it must not have the following
|
| +// substrings: **, *<END>, <BEGIN>*. If the the |pattern| has a BOUNDARY anchor,
|
| +// the corresponding side of its string must not end with a '*' wildcard.
|
| +void BuildFailureFunction(const UrlPattern& pattern,
|
| + std::vector<size_t>* failure);
|
| +
|
| +// Returns whether the |url| matches the URL |pattern|. The |failure| function
|
| +// must be the output of BuildFailureFunction() called with the same |pattern|.
|
| +//
|
| +// TODO(pkalinnikov): Outline algorithms implemented in this function.
|
| +bool IsMatch(const GURL& url,
|
| + const UrlPattern& pattern,
|
| + const std::vector<size_t>& failure);
|
| +
|
| +} // namespace subresource_filter
|
| +
|
| +#endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_URL_PATTERN_MATCHING_H_
|
|
|