Index: components/subresource_filter/core/common/url_pattern_matching.h |
diff --git a/components/subresource_filter/core/common/url_pattern_matching.h b/components/subresource_filter/core/common/url_pattern_matching.h |
index 74c6a3a0ece196334f9f08cfdcf5186fd28ad7fa..fe6441910f65f090b1e5f385bc94499e186db511 100644 |
--- a/components/subresource_filter/core/common/url_pattern_matching.h |
+++ b/components/subresource_filter/core/common/url_pattern_matching.h |
@@ -18,14 +18,20 @@ |
#include <stddef.h> |
+#include <iterator> |
#include <vector> |
+#include "base/logging.h" |
#include "base/strings/string_piece.h" |
+#include "components/subresource_filter/core/common/fuzzy_pattern_matching.h" |
+#include "components/subresource_filter/core/common/knuth_morris_pratt.h" |
+#include "components/subresource_filter/core/common/string_splitter.h" |
+#include "components/subresource_filter/core/common/url_pattern.h" |
#include "url/gurl.h" |
namespace subresource_filter { |
-struct UrlPattern; |
+// Public interface declaration ------------------------------------------------ |
// Builds a compound Knuth-Morris-Pratt failure function used to match URLs |
// against the |pattern|. |
@@ -45,16 +51,138 @@ struct UrlPattern; |
// The URL |pattern| must be normalized. Namely, it must not have the following |
// substrings: **, *<END>, <BEGIN>*. If the the |pattern| has a BOUNDARY anchor, |
// the corresponding side of its string must not end with a '*' wildcard. |
+template <typename IntType> |
void BuildFailureFunction(const UrlPattern& pattern, |
- std::vector<size_t>* failure); |
+ std::vector<IntType>* failure); |
// Returns whether the |url| matches the URL |pattern|. The |failure| function |
-// must be the output of BuildFailureFunction() called with the same |pattern|. |
+// between |failure_begin| and |failure_end| must be the output of |
+// BuildFailureFunction() called with the same |pattern|. |
// |
// TODO(pkalinnikov): Outline algorithms implemented in this function. |
+template <typename FailureIter> |
bool IsMatch(const GURL& url, |
const UrlPattern& pattern, |
- const std::vector<size_t>& failure); |
+ FailureIter failure_begin, |
+ FailureIter failure_end); |
+ |
+// Implementation -------------------------------------------------------------- |
+ |
+namespace impl { |
+ |
+inline bool IsWildcard(char c) { |
+ return c == '*'; |
+} |
+ |
+template <typename IntType> |
+inline size_t FindFirstOccurrenceFuzzy(base::StringPiece text, |
+ base::StringPiece subpattern, |
+ const IntType* failure) { |
+ return *AllOccurrencesFuzzy<IntType>(text, subpattern, failure).begin(); |
+} |
+ |
+} // namespace impl |
+ |
+template <typename IntType> |
+void BuildFailureFunction(const UrlPattern& pattern, |
+ std::vector<IntType>* failure) { |
+ auto subpatterns = |
+ CreateStringSplitter(pattern.url_pattern, impl::IsWildcard); |
+ auto subpattern_it = subpatterns.begin(); |
+ auto subpattern_end = subpatterns.end(); |
+ |
+ if (subpattern_it == subpattern_end) |
+ return; |
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY) |
+ ++subpattern_it; |
+ |
+ while (subpattern_it != subpattern_end) { |
+ const base::StringPiece part = *subpattern_it++; |
+ DCHECK(!part.empty()); |
+ if (pattern.anchor_right == proto::ANCHOR_TYPE_BOUNDARY && |
+ subpattern_it == subpattern_end) { |
+ break; |
+ } |
+ |
+ if (part.find(kSeparatorPlaceholder) == base::StringPiece::npos) { |
+ BuildFailureFunction(part, failure); |
+ } else { |
+ // Prepend the value '1' to the failure function to let matchers |
+ // distinguish between the subpatterns with separator placeholders and |
+ // those without. |
+ failure->push_back(1); |
+ BuildFailureFunctionFuzzy(part, failure); |
+ } |
+ } |
+} |
+ |
+// TODO(pkalinnikov): Support SUBDOMAIN anchors. |
+template <typename FailureIter> |
+bool IsMatch(const GURL& url, |
+ const UrlPattern& pattern, |
+ FailureIter failure_begin, |
+ FailureIter failure_end) { |
+ auto subpatterns = |
+ CreateStringSplitter(pattern.url_pattern, impl::IsWildcard); |
+ auto subpattern_it = subpatterns.begin(); |
+ auto subpattern_end = subpatterns.end(); |
+ |
+ if (subpattern_it == subpattern_end) { |
+ return pattern.anchor_left != proto::ANCHOR_TYPE_BOUNDARY || |
+ pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY || |
+ url.is_empty(); |
+ } |
+ |
+ base::StringPiece spec = url.spec(); |
+ |
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY) { |
+ const base::StringPiece subpattern = *subpattern_it++; |
+ if (!StartsWithFuzzy(spec, subpattern)) |
+ return false; |
+ if (subpattern_it == subpattern_end) { |
+ return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY || |
+ spec.size() == subpattern.size(); |
+ } |
+ spec.remove_prefix(subpattern.size()); |
+ } |
+ |
+ base::StringPiece subpattern; |
+ while (subpattern_it != subpattern_end) { |
+ subpattern = *subpattern_it++; |
+ DCHECK(!subpattern.empty()); |
+ |
+ if (subpattern_it == subpattern_end && |
+ pattern.anchor_right == proto::ANCHOR_TYPE_BOUNDARY) { |
+ break; |
+ } |
+ |
+ // The subpatterns with separator placeholders were indexed differently and |
+ // should be matched accordingly. Their failure functions were prepended by |
+ // a non-zero value, and we need to skip it. If the value turned to be zero, |
+ // it is the initial value of a failure function of a regular substring. |
+ CHECK(failure_begin < failure_end); |
+ const bool has_separator_placeholders = (*failure_begin != 0); |
+ if (has_separator_placeholders) |
+ ++failure_begin; |
+ CHECK(static_cast<size_t>(std::distance(failure_begin, failure_end)) >= |
+ subpattern.size()); |
+ |
+ // If the subpattern has separator placeholders, it should be matched with |
+ // FindFirstOccurrenceOfSubpattern, otherwise it can be found as a regular |
+ // substring. |
+ const size_t match_end = |
+ (has_separator_placeholders |
+ ? impl::FindFirstOccurrenceFuzzy(spec, subpattern, &*failure_begin) |
+ : FindFirstOccurrence(spec, subpattern, &*failure_begin)); |
+ if (match_end == base::StringPiece::npos) |
+ return false; |
+ spec.remove_prefix(match_end); |
+ failure_begin += subpattern.size(); |
+ } |
+ |
+ return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY || |
+ EndsWithFuzzy(spec, subpattern); |
+} |
} // namespace subresource_filter |