Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(138)

Side by Side Diff: components/subresource_filter/core/common/fuzzy_pattern_matching.h

Issue 2793993002: [subresource_filter] Replace KMP by std::search. (Closed)
Patch Set: Fix DCHECK. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // The separator placeholder '^' symbol is used in subpatterns to match any 5 // The separator placeholder '^' symbol is used in subpatterns to match any
6 // separator character, which is any ASCII symbol except letters, digits, and 6 // separator character, which is any ASCII symbol except letters, digits, and
7 // the following: '_', '-', '.', '%'. Note that the separator placeholder 7 // the following: '_', '-', '.', '%'. Note that the separator placeholder
8 // character '^' is itself a separator, as well as '\0'. 8 // character '^' is itself a separator, as well as '\0'.
9 // TODO(pkalinnikov): In addition, a separator placeholder at the end of the 9 // TODO(pkalinnikov): In addition, a separator placeholder at the end of the
10 // pattern can be matched by the end of |text|. 10 // pattern can be matched by the end of |text|.
11 // 11 //
12 // We define a fuzzy occurrence as an occurrence of a |subpattern| in |text| 12 // We define a fuzzy occurrence as an occurrence of a |subpattern| in |text|
13 // such that all its non-placeholder characters are equal to the corresponding 13 // such that all its non-placeholder characters are equal to the corresponding
14 // characters of the |text|, whereas each '^' placeholder can correspond to any 14 // characters of the |text|, whereas each '^' placeholder can correspond to any
15 // type of separator in |text|. 15 // type of separator in |text|.
16 16
17 #ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_ 17 #ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_
18 #define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_ 18 #define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_
19 19
20 #include <stddef.h> 20 #include <stddef.h>
21 21
22 #include <type_traits>
23 #include <vector>
24
25 #include "base/logging.h"
26 #include "base/strings/string_piece.h" 22 #include "base/strings/string_piece.h"
27 #include "components/subresource_filter/core/common/knuth_morris_pratt.h"
28 23
29 namespace subresource_filter { 24 namespace subresource_filter {
30 25
31 constexpr char kSeparatorPlaceholder = '^'; 26 constexpr char kSeparatorPlaceholder = '^';
32 27
33 inline bool IsAscii(char c) { 28 inline bool IsAscii(char c) {
34 return !(c & ~0x7F); 29 return !(c & ~0x7F);
35 } 30 }
36 31
37 inline bool IsAlphaNumericAscii(char c) { 32 inline bool IsAlphaNumericAscii(char c) {
(...skipping 11 matching lines...) Expand all
49 case '.': 44 case '.':
50 case '%': 45 case '%':
51 return false; 46 return false;
52 case kSeparatorPlaceholder: 47 case kSeparatorPlaceholder:
53 return true; 48 return true;
54 default: 49 default:
55 return !IsAlphaNumericAscii(c) && IsAscii(c); 50 return !IsAlphaNumericAscii(c) && IsAscii(c);
56 } 51 }
57 } 52 }
58 53
59 class IsEqualOrBothSeparators {
60 public:
61 bool operator()(char first, char second) const {
62 return first == second || (IsSeparator(first) && IsSeparator(second));
63 }
64 };
65
66 // Returns whether |text| starts with a fuzzy occurrence of |subpattern|. 54 // Returns whether |text| starts with a fuzzy occurrence of |subpattern|.
67 bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); 55 bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
68 56
69 // Returns whether |text| ends with a fuzzy occurrence of |subpattern|. 57 // Returns whether |text| ends with a fuzzy occurrence of |subpattern|.
70 bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); 58 bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);
71 59
72 // Appends elements of the Knuth-Morris-Pratt failure function corresponding to 60 // Returns the position of the leftmost fuzzy occurrence of a |subpattern| in
73 // |subpattern| at the end of the |failure| vector. All separator characters, 61 // the |text| starting no earlier than |from| the specified position.
74 // including the separator placeholder, are considered equivalent. This is 62 size_t FindFuzzy(base::StringPiece text,
75 // necessary in order to support fuzzy separator matching while also keeping the 63 base::StringPiece subpattern,
76 // equality predicate transitive and symmetric which is needed for KMP. However, 64 size_t from = 0);
77 // the separators will need to be checked in a second pass for KMP match
78 // candidates, which is implemented by AllOccurrencesFuzzy.
79 template <typename FailureFunctionElement>
80 void BuildFailureFunctionFuzzy(base::StringPiece subpattern,
81 std::vector<FailureFunctionElement>* failure) {
82 static_assert(std::is_unsigned<FailureFunctionElement>::value,
83 "The type is not unsigned.");
84 BuildFailureFunction<FailureFunctionElement, IsEqualOrBothSeparators>(
85 subpattern, failure);
86 }
87
88 // Iterator range that yields all fuzzy occurrences of |subpattern| in |text|.
89 // The values in the range are the positions just beyond each occurrence.
90 // Out-of-range iterators are dereferenced to base::StringPiece::npos.
91 //
92 // The |failure| array is the failure function created by
93 // BuildFailureFunctionFuzzy for the same |subpattern|, and containing at least
94 // subpattern.size() values.
95 template <typename IntType>
96 class AllOccurrencesFuzzy
97 : private AllOccurrences<IntType, IsEqualOrBothSeparators> {
98 public:
99 using Base = AllOccurrences<IntType, IsEqualOrBothSeparators>;
100 using BaseIterator = typename Base::Iterator;
101
102 class Iterator : public BaseIterator {
103 public:
104 Iterator& operator++() {
105 BaseIterator::operator++();
106 AdvanceWhileFalsePositive();
107 return *this;
108 }
109
110 Iterator operator++(int) {
111 Iterator copy(*this);
112 operator++();
113 return copy;
114 }
115
116 private:
117 friend class AllOccurrencesFuzzy;
118
119 explicit Iterator(const BaseIterator& base_iterator)
120 : BaseIterator(base_iterator) {
121 AdvanceWhileFalsePositive();
122 }
123
124 const AllOccurrencesFuzzy& owner() const {
125 return *static_cast<const AllOccurrencesFuzzy*>(BaseIterator::owner_);
126 }
127
128 // Moves the iterator forward until it either reaches the end, or meets an
129 // occurrence exactly matching all non-placeholder separators.
130 void AdvanceWhileFalsePositive() {
131 while (BaseIterator::match_end_ != base::StringPiece::npos && !IsMatch())
132 BaseIterator::operator++();
133 }
134
135 // Returns whether the currenct match meets all separators.
136 bool IsMatch() const {
137 DCHECK_LE(BaseIterator::match_end_, owner().text_.size());
138 DCHECK_GE(BaseIterator::match_end_, owner().pattern_.size());
139
140 // TODO(pkalinnikov): Store a vector of separator positions externally and
141 // check if they are equal to the corresponding characters of the |text|.
142 return StartsWithFuzzy(owner().text_.substr(BaseIterator::match_end_ -
143 owner().pattern_.size()),
144 owner().pattern_);
145 }
146 };
147
148 AllOccurrencesFuzzy(base::StringPiece text,
149 base::StringPiece subpattern,
150 const IntType* failure)
151 : Base(text, subpattern, failure) {}
152
153 Iterator begin() const { return Iterator(Base::begin()); }
154 Iterator end() const { return Iterator(Base::end()); }
155 };
156 65
157 } // namespace subresource_filter 66 } // namespace subresource_filter
158 67
159 #endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_ 68 #endif // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698