Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(252)

Unified Diff: components/subresource_filter/core/common/url_pattern_matching.h

Issue 2153743002: Save failure function to FlatBuffer. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add newline. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/subresource_filter/core/common/url_pattern_matching.h
diff --git a/components/subresource_filter/core/common/url_pattern_matching.h b/components/subresource_filter/core/common/url_pattern_matching.h
index 74c6a3a0ece196334f9f08cfdcf5186fd28ad7fa..fe6441910f65f090b1e5f385bc94499e186db511 100644
--- a/components/subresource_filter/core/common/url_pattern_matching.h
+++ b/components/subresource_filter/core/common/url_pattern_matching.h
@@ -18,14 +18,20 @@
#include <stddef.h>
+#include <iterator>
#include <vector>
+#include "base/logging.h"
#include "base/strings/string_piece.h"
+#include "components/subresource_filter/core/common/fuzzy_pattern_matching.h"
+#include "components/subresource_filter/core/common/knuth_morris_pratt.h"
+#include "components/subresource_filter/core/common/string_splitter.h"
+#include "components/subresource_filter/core/common/url_pattern.h"
#include "url/gurl.h"
namespace subresource_filter {
-struct UrlPattern;
+// Public interface declaration ------------------------------------------------
// Builds a compound Knuth-Morris-Pratt failure function used to match URLs
// against the |pattern|.
@@ -45,16 +51,138 @@ struct UrlPattern;
// The URL |pattern| must be normalized. Namely, it must not have the following
// substrings: **, *<END>, <BEGIN>*. If the the |pattern| has a BOUNDARY anchor,
// the corresponding side of its string must not end with a '*' wildcard.
+template <typename IntType>
void BuildFailureFunction(const UrlPattern& pattern,
- std::vector<size_t>* failure);
+ std::vector<IntType>* failure);
// Returns whether the |url| matches the URL |pattern|. The |failure| function
-// must be the output of BuildFailureFunction() called with the same |pattern|.
+// between |failure_begin| and |failure_end| must be the output of
+// BuildFailureFunction() called with the same |pattern|.
//
// TODO(pkalinnikov): Outline algorithms implemented in this function.
+template <typename FailureIter>
bool IsMatch(const GURL& url,
const UrlPattern& pattern,
- const std::vector<size_t>& failure);
+ FailureIter failure_begin,
+ FailureIter failure_end);
+
+// Implementation --------------------------------------------------------------
+
+namespace impl {
+
+inline bool IsWildcard(char c) {
+ return c == '*';
+}
+
+template <typename IntType>
+inline size_t FindFirstOccurrenceFuzzy(base::StringPiece text,
+ base::StringPiece subpattern,
+ const IntType* failure) {
+ return *AllOccurrencesFuzzy<IntType>(text, subpattern, failure).begin();
+}
+
+} // namespace impl
+
+template <typename IntType>
+void BuildFailureFunction(const UrlPattern& pattern,
+ std::vector<IntType>* failure) {
+ auto subpatterns =
+ CreateStringSplitter(pattern.url_pattern, impl::IsWildcard);
+ auto subpattern_it = subpatterns.begin();
+ auto subpattern_end = subpatterns.end();
+
+ if (subpattern_it == subpattern_end)
+ return;
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY)
+ ++subpattern_it;
+
+ while (subpattern_it != subpattern_end) {
+ const base::StringPiece part = *subpattern_it++;
+ DCHECK(!part.empty());
+ if (pattern.anchor_right == proto::ANCHOR_TYPE_BOUNDARY &&
+ subpattern_it == subpattern_end) {
+ break;
+ }
+
+ if (part.find(kSeparatorPlaceholder) == base::StringPiece::npos) {
+ BuildFailureFunction(part, failure);
+ } else {
+ // Prepend the value '1' to the failure function to let matchers
+ // distinguish between the subpatterns with separator placeholders and
+ // those without.
+ failure->push_back(1);
+ BuildFailureFunctionFuzzy(part, failure);
+ }
+ }
+}
+
+// TODO(pkalinnikov): Support SUBDOMAIN anchors.
+template <typename FailureIter>
+bool IsMatch(const GURL& url,
+ const UrlPattern& pattern,
+ FailureIter failure_begin,
+ FailureIter failure_end) {
+ auto subpatterns =
+ CreateStringSplitter(pattern.url_pattern, impl::IsWildcard);
+ auto subpattern_it = subpatterns.begin();
+ auto subpattern_end = subpatterns.end();
+
+ if (subpattern_it == subpattern_end) {
+ return pattern.anchor_left != proto::ANCHOR_TYPE_BOUNDARY ||
+ pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY ||
+ url.is_empty();
+ }
+
+ base::StringPiece spec = url.spec();
+
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY) {
+ const base::StringPiece subpattern = *subpattern_it++;
+ if (!StartsWithFuzzy(spec, subpattern))
+ return false;
+ if (subpattern_it == subpattern_end) {
+ return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY ||
+ spec.size() == subpattern.size();
+ }
+ spec.remove_prefix(subpattern.size());
+ }
+
+ base::StringPiece subpattern;
+ while (subpattern_it != subpattern_end) {
+ subpattern = *subpattern_it++;
+ DCHECK(!subpattern.empty());
+
+ if (subpattern_it == subpattern_end &&
+ pattern.anchor_right == proto::ANCHOR_TYPE_BOUNDARY) {
+ break;
+ }
+
+ // The subpatterns with separator placeholders were indexed differently and
+ // should be matched accordingly. Their failure functions were prepended by
+ // a non-zero value, and we need to skip it. If the value turned to be zero,
+ // it is the initial value of a failure function of a regular substring.
+ CHECK(failure_begin < failure_end);
+ const bool has_separator_placeholders = (*failure_begin != 0);
+ if (has_separator_placeholders)
+ ++failure_begin;
+ CHECK(static_cast<size_t>(std::distance(failure_begin, failure_end)) >=
+ subpattern.size());
+
+ // If the subpattern has separator placeholders, it should be matched with
+ // FindFirstOccurrenceOfSubpattern, otherwise it can be found as a regular
+ // substring.
+ const size_t match_end =
+ (has_separator_placeholders
+ ? impl::FindFirstOccurrenceFuzzy(spec, subpattern, &*failure_begin)
+ : FindFirstOccurrence(spec, subpattern, &*failure_begin));
+ if (match_end == base::StringPiece::npos)
+ return false;
+ spec.remove_prefix(match_end);
+ failure_begin += subpattern.size();
+ }
+
+ return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY ||
+ EndsWithFuzzy(spec, subpattern);
+}
} // namespace subresource_filter

Powered by Google App Engine
This is Rietveld 408576698