Index: components/subresource_filter/core/common/url_pattern_matching.h |
diff --git a/components/subresource_filter/core/common/url_pattern_matching.h b/components/subresource_filter/core/common/url_pattern_matching.h |
index fe6441910f65f090b1e5f385bc94499e186db511..24608b89b7e04fbfaa74b037d82e06ab79e5d5be 100644 |
--- a/components/subresource_filter/core/common/url_pattern_matching.h |
+++ b/components/subresource_filter/core/common/url_pattern_matching.h |
@@ -18,6 +18,7 @@ |
#include <stddef.h> |
+#include <algorithm> |
#include <iterator> |
#include <vector> |
@@ -28,6 +29,7 @@ |
#include "components/subresource_filter/core/common/string_splitter.h" |
#include "components/subresource_filter/core/common/url_pattern.h" |
#include "url/gurl.h" |
+#include "url/third_party/mozilla/url_parse.h" |
namespace subresource_filter { |
@@ -70,10 +72,28 @@ bool IsMatch(const GURL& url, |
namespace impl { |
-inline bool IsWildcard(char c) { |
- return c == '*'; |
+class IsWildcard { |
+ public: |
+ bool operator()(char c) const { return c == '*'; } |
+}; |
+ |
+// Returns whether |position| within the |url| belongs to its |host| component |
+// and corresponds to the beginning of a (sub-)domain. |
+inline bool IsSubdomainAnchored(base::StringPiece url, |
+ url::Component host, |
+ size_t position) { |
+ DCHECK_LE(position, url.size()); |
+ const size_t host_begin = static_cast<size_t>(host.begin); |
+ const size_t host_end = static_cast<size_t>(host.end()); |
+ DCHECK_LE(host_end, url.size()); |
+ |
+ return position == host_begin || |
+ (position > host_begin && position <= host_end && |
+ url[position - 1] == '.'); |
} |
+// Returns the position just beyond the leftmost fuzzy occurrence of |
+// |subpattern| in the |text|. |
template <typename IntType> |
inline size_t FindFirstOccurrenceFuzzy(base::StringPiece text, |
base::StringPiece subpattern, |
@@ -81,13 +101,44 @@ inline size_t FindFirstOccurrenceFuzzy(base::StringPiece text, |
return *AllOccurrencesFuzzy<IntType>(text, subpattern, failure).begin(); |
} |
+// Returns the position just beyond the leftmost occurrence of |subpattern| in |
+// the |url|, such that it satisfies a SUBDOMAIN anchor. |
+template <typename IntType> |
+inline size_t FindSubdomainAnchored(base::StringPiece url, |
+ url::Component host, |
+ base::StringPiece subpattern, |
+ const IntType* failure) { |
+ auto occurrences = AllOccurrences<IntType>(url, subpattern, failure); |
+ return *std::find_if(occurrences.begin(), occurrences.end(), |
+ [url, host, subpattern](size_t match_end_position) { |
+ DCHECK_GE(match_end_position, subpattern.size()); |
+ return IsSubdomainAnchored( |
+ url, host, match_end_position - subpattern.size()); |
+ }); |
+} |
+ |
+// Returns the position just beyond the leftmost fuzzy occurrence of |
+// |subpattern| in the |url|, such that it satisfies a SUBDOMAIN anchor. |
+template <typename IntType> |
+inline size_t FindSubdomainAnchoredFuzzy(base::StringPiece url, |
+ url::Component host, |
+ base::StringPiece subpattern, |
+ const IntType* failure) { |
+ auto occurrences = AllOccurrencesFuzzy<IntType>(url, subpattern, failure); |
+ return *std::find_if(occurrences.begin(), occurrences.end(), |
+ [url, host, subpattern](size_t match_end_position) { |
+ DCHECK_GE(match_end_position, subpattern.size()); |
+ return IsSubdomainAnchored( |
+ url, host, match_end_position - subpattern.size()); |
+ }); |
+} |
+ |
} // namespace impl |
template <typename IntType> |
void BuildFailureFunction(const UrlPattern& pattern, |
std::vector<IntType>* failure) { |
- auto subpatterns = |
- CreateStringSplitter(pattern.url_pattern, impl::IsWildcard); |
+ StringSplitter<impl::IsWildcard> subpatterns(pattern.url_pattern); |
auto subpattern_it = subpatterns.begin(); |
auto subpattern_end = subpatterns.end(); |
@@ -116,14 +167,14 @@ void BuildFailureFunction(const UrlPattern& pattern, |
} |
} |
-// TODO(pkalinnikov): Support SUBDOMAIN anchors. |
template <typename FailureIter> |
bool IsMatch(const GURL& url, |
const UrlPattern& pattern, |
FailureIter failure_begin, |
FailureIter failure_end) { |
- auto subpatterns = |
- CreateStringSplitter(pattern.url_pattern, impl::IsWildcard); |
+ DCHECK(url.is_valid()); |
+ |
+ StringSplitter<impl::IsWildcard> subpatterns(pattern.url_pattern); |
auto subpattern_it = subpatterns.begin(); |
auto subpattern_end = subpatterns.end(); |
@@ -133,20 +184,57 @@ bool IsMatch(const GURL& url, |
url.is_empty(); |
} |
- base::StringPiece spec = url.spec(); |
+ const base::StringPiece spec = url.possibly_invalid_spec(); |
+ const url::Component host_part = url.parsed_for_possibly_invalid_spec().host; |
+ base::StringPiece subpattern = *subpattern_it++; |
+ if (subpattern_it == subpattern_end && |
+ pattern.anchor_right == proto::ANCHOR_TYPE_BOUNDARY) { |
+ if (!EndsWithFuzzy(spec, subpattern)) |
+ return false; |
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY) |
+ return spec.size() == subpattern.size(); |
+ if (pattern.anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) { |
+ DCHECK_LE(subpattern.size(), spec.size()); |
+ return url.has_host() && |
+ impl::IsSubdomainAnchored(spec, host_part, |
+ spec.size() - subpattern.size()); |
+ } |
+ return true; |
+ } |
+ |
+ base::StringPiece text = spec; |
if (pattern.anchor_left == proto::ANCHOR_TYPE_BOUNDARY) { |
- const base::StringPiece subpattern = *subpattern_it++; |
if (!StartsWithFuzzy(spec, subpattern)) |
return false; |
- if (subpattern_it == subpattern_end) { |
- return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY || |
- spec.size() == subpattern.size(); |
- } |
- spec.remove_prefix(subpattern.size()); |
+ if (subpattern_it == subpattern_end) |
+ return true; |
+ text.remove_prefix(subpattern.size()); |
+ } else if (pattern.anchor_left == proto::ANCHOR_TYPE_SUBDOMAIN) { |
+ if (!url.has_host()) |
+ return false; |
+ |
+ const bool has_separator_placeholders = (*failure_begin != 0); |
+ if (has_separator_placeholders) |
+ ++failure_begin; |
+ |
+ const size_t position = |
+ has_separator_placeholders |
+ ? impl::FindSubdomainAnchoredFuzzy(spec, host_part, subpattern, |
+ &*failure_begin) |
+ : impl::FindSubdomainAnchored(spec, host_part, subpattern, |
+ &*failure_begin); |
+ if (position == base::StringPiece::npos) |
+ return false; |
+ if (subpattern_it == subpattern_end) |
+ return true; |
+ text.remove_prefix(position); |
+ } else { |
+ DCHECK_EQ(pattern.anchor_left, proto::ANCHOR_TYPE_NONE); |
+ // Get back to the initial subpattern, process it in the loop below. |
+ subpattern_it = subpatterns.begin(); |
} |
- base::StringPiece subpattern; |
while (subpattern_it != subpattern_end) { |
subpattern = *subpattern_it++; |
DCHECK(!subpattern.empty()); |
@@ -172,16 +260,16 @@ bool IsMatch(const GURL& url, |
// substring. |
const size_t match_end = |
(has_separator_placeholders |
- ? impl::FindFirstOccurrenceFuzzy(spec, subpattern, &*failure_begin) |
- : FindFirstOccurrence(spec, subpattern, &*failure_begin)); |
+ ? impl::FindFirstOccurrenceFuzzy(text, subpattern, &*failure_begin) |
+ : FindFirstOccurrence(text, subpattern, &*failure_begin)); |
if (match_end == base::StringPiece::npos) |
return false; |
- spec.remove_prefix(match_end); |
+ text.remove_prefix(match_end); |
failure_begin += subpattern.size(); |
} |
return pattern.anchor_right != proto::ANCHOR_TYPE_BOUNDARY || |
- EndsWithFuzzy(spec, subpattern); |
+ EndsWithFuzzy(text, subpattern); |
} |
} // namespace subresource_filter |