Index: components/subresource_filter/core/common/knuth_morris_pratt.h |
diff --git a/components/subresource_filter/core/common/knuth_morris_pratt.h b/components/subresource_filter/core/common/knuth_morris_pratt.h |
deleted file mode 100644 |
index 29e50e7307bdf4d5409c0e67d3685680aca5ea91..0000000000000000000000000000000000000000 |
--- a/components/subresource_filter/core/common/knuth_morris_pratt.h |
+++ /dev/null |
@@ -1,249 +0,0 @@ |
-// Copyright 2016 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-// This file implements the "failure function" notion used by the |
-// Knuth-Morris-Pratt algorithm for solving the string matching problem, i.e. |
-// finding all occurences of some pattern |P| in arbitrary text. It also |
-// provides tools for traversing these occurences. |
-// |
-// Consider a character string |P| of length |n|. The failure function of this |
-// string is an integer array |F| of length |n|, defined in the following way: |
-// F[i] = max{0 <= j <= i : P[0..j-1] == P[i-j+1..i]}, |
-// i.e. F[i] is the length of the longest proper suffix of P[0..i] that is also |
-// a prefix of |P|. |
-// |
-// For example, the "abacaba" string has the following failure function: |
-// [0, 0, 1, 0, 1, 2, 3]. |
-// For further examples see the "knuth_morris_pratt_unittest.cc" file. |
- |
-#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_ |
-#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_ |
- |
-#include <stddef.h> |
- |
-#include <functional> |
-#include <iterator> |
-#include <limits> |
-#include <type_traits> |
-#include <vector> |
- |
-#include "base/logging.h" |
-#include "base/numerics/safe_conversions.h" |
-#include "base/strings/string_piece.h" |
- |
-namespace subresource_filter { |
- |
-// Appends elements of the Knuth-Morris-Pratt failure function corresponding to |
-// |pattern| at the end of the |failure| vector. The EquivalentTo should be a |
-// bool(char, char) functor returning true iff the two characters are |
-// equivalent, and is required to be reflexive, symmetric and transitive. |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-void BuildFailureFunction(base::StringPiece pattern, |
- std::vector<IntType>* failure, |
- EquivalentTo equivalent_to = EquivalentTo()) { |
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned."); |
- DCHECK_LE(pattern.size(), std::numeric_limits<IntType>::max()); |
- |
- if (pattern.empty()) |
- return; |
- |
- const size_t base_index = failure->size(); |
- failure->push_back(0); |
- for (size_t i = 1; i != pattern.size(); ++i) { |
- const char c = pattern[i]; |
- |
- IntType prefix_match_size = failure->back(); |
- DCHECK_LT(prefix_match_size, i); |
- while (prefix_match_size && !equivalent_to(pattern[prefix_match_size], c)) |
- prefix_match_size = (*failure)[base_index + prefix_match_size - 1]; |
- |
- if (prefix_match_size || equivalent_to(pattern[0], c)) |
- ++prefix_match_size; |
- failure->push_back(prefix_match_size); |
- } |
-} |
- |
-// Returns the position of the leftmost occurrence of the |pattern| in the |
-// |text|, i.e. the index of the character coming *after* the occurrence, within |
-// the [0, text.size()] range. If the pattern does not occur in the |text|, the |
-// returned value is base::StringPiece::npos. |
-// |
-// The method supports the use-case when the |text| is assumed to be the |
-// continuation of some imaginary string matching the initial |
-// |prefix_match_size| characters of the |pattern|. Hence, the |pattern| can be |
-// partially matched by this imaginary string and partially by the actual prefix |
-// of the |text|. |
-// |
-// The |failure| array is the failure function created by BuildFailureFunction |
-// with the same EquivalentTo and |pattern|, and containing at least |
-// pattern.size() values. |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-size_t FindOccurrence(base::StringPiece text, |
- base::StringPiece pattern, |
- const IntType* failure, |
- IntType prefix_match_size, |
- EquivalentTo equivalent_to = EquivalentTo()) { |
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned."); |
- DCHECK_LE(prefix_match_size, pattern.size()); |
- DCHECK_LE(pattern.size(), std::numeric_limits<IntType>::max()); |
- |
- if (prefix_match_size == pattern.size()) |
- return 0; |
- |
- for (size_t i = 0; i != text.size(); ++i) { |
- const char c = text[i]; |
- while (prefix_match_size && !equivalent_to(pattern[prefix_match_size], c)) |
- prefix_match_size = failure[prefix_match_size - 1]; |
- if (prefix_match_size || equivalent_to(pattern[0], c)) |
- ++prefix_match_size; |
- if (prefix_match_size == pattern.size()) |
- return i + 1; |
- } |
- return base::StringPiece::npos; |
-} |
- |
-// Same as FindOccurrence, but starts the search from scratch, i.e. with |
-// prefix_match_size == 0. |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-size_t FindFirstOccurrence(base::StringPiece text, |
- base::StringPiece pattern, |
- const IntType* failure, |
- EquivalentTo equivalent_to = EquivalentTo()) { |
- return FindOccurrence(text, pattern, failure, static_cast<IntType>(0), |
- equivalent_to); |
-} |
- |
-// Same as FindOccurrence, but preforms the search assuming the |text| starts |
-// right after a full match of the |pattern|. The returned value is either |
-// base::StringPiece::npos if no more occurrences found, or is within the range |
-// [1, text.size()] if there are occurrences apart from the virtual prefix. |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-size_t FindNextOccurrence(base::StringPiece text, |
- base::StringPiece pattern, |
- const IntType* failure, |
- EquivalentTo equivalent_to = EquivalentTo()) { |
- if (pattern.empty()) |
- return text.empty() ? base::StringPiece::npos : 1; |
- |
- const IntType prefix_match_size = failure[pattern.size() - 1]; |
- return FindOccurrence(text, pattern, failure, prefix_match_size, |
- equivalent_to); |
-} |
- |
-// AllOccurrences can be used to find all occurrences of a |pattern| in a |text| |
-// with respect to EquivalentTo relation between characters, and iterate over |
-// them using an input iterator. The values pointed to by an iterator indicate |
-// positions of occurrences, i.e. indices of |text| characters coming *after* |
-// the occurrences. Out-of-range iterators are dereferenced to |
-// base::StringPiece::npos. |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-class AllOccurrences { |
- public: |
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned."); |
- |
- class Iterator : public std::iterator<std::input_iterator_tag, size_t> { |
- public: |
- bool operator==(const Iterator& rhs) const { |
- return match_end_ == rhs.match_end_; |
- } |
- |
- bool operator!=(const Iterator& rhs) const { return !operator==(rhs); } |
- |
- size_t operator*() const { return match_end_; } |
- const size_t* operator->() const { return &match_end_; } |
- |
- Iterator& operator++() { |
- DCHECK(match_end_ == base::StringPiece::npos || |
- match_end_ <= owner_->text_.size()); |
- |
- const size_t relative_match_end = owner_->FindNextOccurrence(match_end_); |
- if (relative_match_end == base::StringPiece::npos) |
- match_end_ = base::StringPiece::npos; |
- else |
- match_end_ += relative_match_end; |
- |
- return *this; |
- } |
- |
- Iterator operator++(int) { |
- Iterator copy(*this); |
- operator++(); |
- return copy; |
- } |
- |
- protected: |
- friend class AllOccurrences; |
- |
- // Creates an iterator, which points to the occurrence ending just before |
- // the position denoted by |match_end|. If |match_end| == |
- // base::StringPiece::npos, the iterator is assumed to be at the end. |
- Iterator(const AllOccurrences* owner, size_t match_end) |
- : owner_(owner), match_end_(match_end) {} |
- |
- const AllOccurrences* owner_; |
- size_t match_end_; |
- }; |
- |
- AllOccurrences(base::StringPiece text, |
- base::StringPiece pattern, |
- const IntType* failure, |
- EquivalentTo equivalent_to = EquivalentTo()) |
- : text_(text), |
- pattern_(pattern), |
- failure_(failure), |
- equivalent_to_(equivalent_to) {} |
- |
- Iterator begin() const { |
- const size_t match_end = |
- FindFirstOccurrence(text_, pattern_, failure_, equivalent_to_); |
- return Iterator(this, match_end); |
- } |
- |
- Iterator end() const { return Iterator(this, base::StringPiece::npos); } |
- |
- protected: |
- // Returns the position of the next occurrence assuming the current occurrence |
- // ends right before the |match_end| position. |
- size_t FindNextOccurrence(size_t match_end) const { |
- return subresource_filter::FindNextOccurrence( |
- text_.substr(match_end), pattern_, failure_, equivalent_to_); |
- } |
- |
- base::StringPiece text_; |
- base::StringPiece pattern_; |
- const IntType* failure_; |
- |
- EquivalentTo equivalent_to_; |
-}; |
- |
-// A helper function used to create an AllOccurrences instance without knowing |
-// the direct type of the |equivalent_to| functor. |
-// |
-// Typical usage: |
-// auto occurrences = CreateAllOccurrences( |
-// "word, word: word-word", |
-// "word?", |
-// <failure function for "word?">, |
-// [](char c1, char c2) { |
-// return c1 == c2 || (ispunct(c1) && ispunct(c2)); |
-// }); |
-// |
-// for (size_t match_end : occurrences) { |
-// // The |pattern| matches the following substring of the |text|: |
-// // text[|match_end|-pattern.size()..|match_end|-1] |
-// ... process the |match_end| ... |
-// } |
-template <typename IntType, typename EquivalentTo = std::equal_to<char>> |
-AllOccurrences<IntType, EquivalentTo> CreateAllOccurrences( |
- base::StringPiece text, |
- base::StringPiece pattern, |
- const IntType* failure, |
- EquivalentTo equivalent_to = EquivalentTo()) { |
- return AllOccurrences<IntType, EquivalentTo>(text, pattern, failure, |
- equivalent_to); |
-} |
- |
-} // namespace subresource_filter |
- |
-#endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_ |