Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(587)

Unified Diff: components/subresource_filter/core/common/knuth_morris_pratt.h

Issue 2793993002: [subresource_filter] Replace KMP by std::search. (Closed)
Patch Set: Fix DCHECK. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/subresource_filter/core/common/knuth_morris_pratt.h
diff --git a/components/subresource_filter/core/common/knuth_morris_pratt.h b/components/subresource_filter/core/common/knuth_morris_pratt.h
deleted file mode 100644
index 29e50e7307bdf4d5409c0e67d3685680aca5ea91..0000000000000000000000000000000000000000
--- a/components/subresource_filter/core/common/knuth_morris_pratt.h
+++ /dev/null
@@ -1,249 +0,0 @@
-// Copyright 2016 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// This file implements the "failure function" notion used by the
-// Knuth-Morris-Pratt algorithm for solving the string matching problem, i.e.
-// finding all occurences of some pattern |P| in arbitrary text. It also
-// provides tools for traversing these occurences.
-//
-// Consider a character string |P| of length |n|. The failure function of this
-// string is an integer array |F| of length |n|, defined in the following way:
-// F[i] = max{0 <= j <= i : P[0..j-1] == P[i-j+1..i]},
-// i.e. F[i] is the length of the longest proper suffix of P[0..i] that is also
-// a prefix of |P|.
-//
-// For example, the "abacaba" string has the following failure function:
-// [0, 0, 1, 0, 1, 2, 3].
-// For further examples see the "knuth_morris_pratt_unittest.cc" file.
-
-#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_
-#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_
-
-#include <stddef.h>
-
-#include <functional>
-#include <iterator>
-#include <limits>
-#include <type_traits>
-#include <vector>
-
-#include "base/logging.h"
-#include "base/numerics/safe_conversions.h"
-#include "base/strings/string_piece.h"
-
-namespace subresource_filter {
-
-// Appends elements of the Knuth-Morris-Pratt failure function corresponding to
-// |pattern| at the end of the |failure| vector. The EquivalentTo should be a
-// bool(char, char) functor returning true iff the two characters are
-// equivalent, and is required to be reflexive, symmetric and transitive.
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-void BuildFailureFunction(base::StringPiece pattern,
- std::vector<IntType>* failure,
- EquivalentTo equivalent_to = EquivalentTo()) {
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned.");
- DCHECK_LE(pattern.size(), std::numeric_limits<IntType>::max());
-
- if (pattern.empty())
- return;
-
- const size_t base_index = failure->size();
- failure->push_back(0);
- for (size_t i = 1; i != pattern.size(); ++i) {
- const char c = pattern[i];
-
- IntType prefix_match_size = failure->back();
- DCHECK_LT(prefix_match_size, i);
- while (prefix_match_size && !equivalent_to(pattern[prefix_match_size], c))
- prefix_match_size = (*failure)[base_index + prefix_match_size - 1];
-
- if (prefix_match_size || equivalent_to(pattern[0], c))
- ++prefix_match_size;
- failure->push_back(prefix_match_size);
- }
-}
-
-// Returns the position of the leftmost occurrence of the |pattern| in the
-// |text|, i.e. the index of the character coming *after* the occurrence, within
-// the [0, text.size()] range. If the pattern does not occur in the |text|, the
-// returned value is base::StringPiece::npos.
-//
-// The method supports the use-case when the |text| is assumed to be the
-// continuation of some imaginary string matching the initial
-// |prefix_match_size| characters of the |pattern|. Hence, the |pattern| can be
-// partially matched by this imaginary string and partially by the actual prefix
-// of the |text|.
-//
-// The |failure| array is the failure function created by BuildFailureFunction
-// with the same EquivalentTo and |pattern|, and containing at least
-// pattern.size() values.
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-size_t FindOccurrence(base::StringPiece text,
- base::StringPiece pattern,
- const IntType* failure,
- IntType prefix_match_size,
- EquivalentTo equivalent_to = EquivalentTo()) {
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned.");
- DCHECK_LE(prefix_match_size, pattern.size());
- DCHECK_LE(pattern.size(), std::numeric_limits<IntType>::max());
-
- if (prefix_match_size == pattern.size())
- return 0;
-
- for (size_t i = 0; i != text.size(); ++i) {
- const char c = text[i];
- while (prefix_match_size && !equivalent_to(pattern[prefix_match_size], c))
- prefix_match_size = failure[prefix_match_size - 1];
- if (prefix_match_size || equivalent_to(pattern[0], c))
- ++prefix_match_size;
- if (prefix_match_size == pattern.size())
- return i + 1;
- }
- return base::StringPiece::npos;
-}
-
-// Same as FindOccurrence, but starts the search from scratch, i.e. with
-// prefix_match_size == 0.
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-size_t FindFirstOccurrence(base::StringPiece text,
- base::StringPiece pattern,
- const IntType* failure,
- EquivalentTo equivalent_to = EquivalentTo()) {
- return FindOccurrence(text, pattern, failure, static_cast<IntType>(0),
- equivalent_to);
-}
-
-// Same as FindOccurrence, but preforms the search assuming the |text| starts
-// right after a full match of the |pattern|. The returned value is either
-// base::StringPiece::npos if no more occurrences found, or is within the range
-// [1, text.size()] if there are occurrences apart from the virtual prefix.
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-size_t FindNextOccurrence(base::StringPiece text,
- base::StringPiece pattern,
- const IntType* failure,
- EquivalentTo equivalent_to = EquivalentTo()) {
- if (pattern.empty())
- return text.empty() ? base::StringPiece::npos : 1;
-
- const IntType prefix_match_size = failure[pattern.size() - 1];
- return FindOccurrence(text, pattern, failure, prefix_match_size,
- equivalent_to);
-}
-
-// AllOccurrences can be used to find all occurrences of a |pattern| in a |text|
-// with respect to EquivalentTo relation between characters, and iterate over
-// them using an input iterator. The values pointed to by an iterator indicate
-// positions of occurrences, i.e. indices of |text| characters coming *after*
-// the occurrences. Out-of-range iterators are dereferenced to
-// base::StringPiece::npos.
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-class AllOccurrences {
- public:
- static_assert(std::is_unsigned<IntType>::value, "IntType is not unsigned.");
-
- class Iterator : public std::iterator<std::input_iterator_tag, size_t> {
- public:
- bool operator==(const Iterator& rhs) const {
- return match_end_ == rhs.match_end_;
- }
-
- bool operator!=(const Iterator& rhs) const { return !operator==(rhs); }
-
- size_t operator*() const { return match_end_; }
- const size_t* operator->() const { return &match_end_; }
-
- Iterator& operator++() {
- DCHECK(match_end_ == base::StringPiece::npos ||
- match_end_ <= owner_->text_.size());
-
- const size_t relative_match_end = owner_->FindNextOccurrence(match_end_);
- if (relative_match_end == base::StringPiece::npos)
- match_end_ = base::StringPiece::npos;
- else
- match_end_ += relative_match_end;
-
- return *this;
- }
-
- Iterator operator++(int) {
- Iterator copy(*this);
- operator++();
- return copy;
- }
-
- protected:
- friend class AllOccurrences;
-
- // Creates an iterator, which points to the occurrence ending just before
- // the position denoted by |match_end|. If |match_end| ==
- // base::StringPiece::npos, the iterator is assumed to be at the end.
- Iterator(const AllOccurrences* owner, size_t match_end)
- : owner_(owner), match_end_(match_end) {}
-
- const AllOccurrences* owner_;
- size_t match_end_;
- };
-
- AllOccurrences(base::StringPiece text,
- base::StringPiece pattern,
- const IntType* failure,
- EquivalentTo equivalent_to = EquivalentTo())
- : text_(text),
- pattern_(pattern),
- failure_(failure),
- equivalent_to_(equivalent_to) {}
-
- Iterator begin() const {
- const size_t match_end =
- FindFirstOccurrence(text_, pattern_, failure_, equivalent_to_);
- return Iterator(this, match_end);
- }
-
- Iterator end() const { return Iterator(this, base::StringPiece::npos); }
-
- protected:
- // Returns the position of the next occurrence assuming the current occurrence
- // ends right before the |match_end| position.
- size_t FindNextOccurrence(size_t match_end) const {
- return subresource_filter::FindNextOccurrence(
- text_.substr(match_end), pattern_, failure_, equivalent_to_);
- }
-
- base::StringPiece text_;
- base::StringPiece pattern_;
- const IntType* failure_;
-
- EquivalentTo equivalent_to_;
-};
-
-// A helper function used to create an AllOccurrences instance without knowing
-// the direct type of the |equivalent_to| functor.
-//
-// Typical usage:
-// auto occurrences = CreateAllOccurrences(
-// "word, word: word-word",
-// "word?",
-// <failure function for "word?">,
-// [](char c1, char c2) {
-// return c1 == c2 || (ispunct(c1) && ispunct(c2));
-// });
-//
-// for (size_t match_end : occurrences) {
-// // The |pattern| matches the following substring of the |text|:
-// // text[|match_end|-pattern.size()..|match_end|-1]
-// ... process the |match_end| ...
-// }
-template <typename IntType, typename EquivalentTo = std::equal_to<char>>
-AllOccurrences<IntType, EquivalentTo> CreateAllOccurrences(
- base::StringPiece text,
- base::StringPiece pattern,
- const IntType* failure,
- EquivalentTo equivalent_to = EquivalentTo()) {
- return AllOccurrences<IntType, EquivalentTo>(text, pattern, failure,
- equivalent_to);
-}
-
-} // namespace subresource_filter
-
-#endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_KNUTH_MORRIS_PRATT_H_

Powered by Google App Engine
This is Rietveld 408576698