Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(82)

Side by Side Diff: components/url_matcher/substring_set_matcher.cc

Issue 1549993003: Switch to standard integer types in components/, part 4 of 4. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/url_matcher/substring_set_matcher.h" 5 #include "components/url_matcher/substring_set_matcher.h"
6 6
7 #include <stddef.h>
8
7 #include <algorithm> 9 #include <algorithm>
8 #include <queue> 10 #include <queue>
9 11
10 #include "base/logging.h" 12 #include "base/logging.h"
11 #include "base/stl_util.h" 13 #include "base/stl_util.h"
12 14
13 namespace url_matcher { 15 namespace url_matcher {
14 16
15 namespace { 17 namespace {
16 18
17 // Compare StringPattern instances based on their string patterns. 19 // Compare StringPattern instances based on their string patterns.
18 bool ComparePatterns(const StringPattern* a, const StringPattern* b) { 20 bool ComparePatterns(const StringPattern* a, const StringPattern* b) {
19 return a->pattern() < b->pattern(); 21 return a->pattern() < b->pattern();
20 } 22 }
21 23
22 // Given the set of patterns, compute how many nodes will the corresponding 24 // Given the set of patterns, compute how many nodes will the corresponding
23 // Aho-Corasick tree have. Note that |patterns| need to be sorted. 25 // Aho-Corasick tree have. Note that |patterns| need to be sorted.
24 uint32 TreeSize(const std::vector<const StringPattern*>& patterns) { 26 uint32_t TreeSize(const std::vector<const StringPattern*>& patterns) {
25 uint32 result = 1u; // 1 for the root node. 27 uint32_t result = 1u; // 1 for the root node.
26 if (patterns.empty()) 28 if (patterns.empty())
27 return result; 29 return result;
28 30
29 std::vector<const StringPattern*>::const_iterator last = patterns.begin(); 31 std::vector<const StringPattern*>::const_iterator last = patterns.begin();
30 std::vector<const StringPattern*>::const_iterator current = last + 1; 32 std::vector<const StringPattern*>::const_iterator current = last + 1;
31 // For the first pattern, each letter is a label of an edge to a new node. 33 // For the first pattern, each letter is a label of an edge to a new node.
32 result += (*last)->pattern().size(); 34 result += (*last)->pattern().size();
33 35
34 // For the subsequent patterns, only count the edges which were not counted 36 // For the subsequent patterns, only count the edges which were not counted
35 // yet. For this it suffices to test against the previous pattern, because the 37 // yet. For this it suffices to test against the previous pattern, because the
36 // patterns are sorted. 38 // patterns are sorted.
37 for (; current != patterns.end(); ++last, ++current) { 39 for (; current != patterns.end(); ++last, ++current) {
38 const std::string& last_pattern = (*last)->pattern(); 40 const std::string& last_pattern = (*last)->pattern();
39 const std::string& current_pattern = (*current)->pattern(); 41 const std::string& current_pattern = (*current)->pattern();
40 const uint32 prefix_bound = 42 const uint32_t prefix_bound =
41 std::min(last_pattern.size(), current_pattern.size()); 43 std::min(last_pattern.size(), current_pattern.size());
42 44
43 uint32 common_prefix = 0; 45 uint32_t common_prefix = 0;
44 while (common_prefix < prefix_bound && 46 while (common_prefix < prefix_bound &&
45 last_pattern[common_prefix] == current_pattern[common_prefix]) 47 last_pattern[common_prefix] == current_pattern[common_prefix])
46 ++common_prefix; 48 ++common_prefix;
47 result += current_pattern.size() - common_prefix; 49 result += current_pattern.size() - common_prefix;
48 } 50 }
49 return result; 51 return result;
50 } 52 }
51 53
52 } // namespace 54 } // namespace
53 55
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
106 RebuildAhoCorasickTree(sorted_patterns); 108 RebuildAhoCorasickTree(sorted_patterns);
107 } 109 }
108 110
109 bool SubstringSetMatcher::Match(const std::string& text, 111 bool SubstringSetMatcher::Match(const std::string& text,
110 std::set<StringPattern::ID>* matches) const { 112 std::set<StringPattern::ID>* matches) const {
111 const size_t old_number_of_matches = matches->size(); 113 const size_t old_number_of_matches = matches->size();
112 114
113 // Handle patterns matching the empty string. 115 // Handle patterns matching the empty string.
114 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end()); 116 matches->insert(tree_[0].matches().begin(), tree_[0].matches().end());
115 117
116 uint32 current_node = 0; 118 uint32_t current_node = 0;
117 for (std::string::const_iterator i = text.begin(); i != text.end(); ++i) { 119 for (std::string::const_iterator i = text.begin(); i != text.end(); ++i) {
118 uint32 edge_from_current = tree_[current_node].GetEdge(*i); 120 uint32_t edge_from_current = tree_[current_node].GetEdge(*i);
119 while (edge_from_current == AhoCorasickNode::kNoSuchEdge && 121 while (edge_from_current == AhoCorasickNode::kNoSuchEdge &&
120 current_node != 0) { 122 current_node != 0) {
121 current_node = tree_[current_node].failure(); 123 current_node = tree_[current_node].failure();
122 edge_from_current = tree_[current_node].GetEdge(*i); 124 edge_from_current = tree_[current_node].GetEdge(*i);
123 } 125 }
124 if (edge_from_current != AhoCorasickNode::kNoSuchEdge) { 126 if (edge_from_current != AhoCorasickNode::kNoSuchEdge) {
125 current_node = edge_from_current; 127 current_node = edge_from_current;
126 matches->insert(tree_[current_node].matches().begin(), 128 matches->insert(tree_[current_node].matches().begin(),
127 tree_[current_node].matches().end()); 129 tree_[current_node].matches().end());
128 } else { 130 } else {
(...skipping 27 matching lines...) Expand all
156 158
157 CreateFailureEdges(); 159 CreateFailureEdges();
158 } 160 }
159 161
160 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree( 162 void SubstringSetMatcher::InsertPatternIntoAhoCorasickTree(
161 const StringPattern* pattern) { 163 const StringPattern* pattern) {
162 const std::string& text = pattern->pattern(); 164 const std::string& text = pattern->pattern();
163 const std::string::const_iterator text_end = text.end(); 165 const std::string::const_iterator text_end = text.end();
164 166
165 // Iterators on the tree and the text. 167 // Iterators on the tree and the text.
166 uint32 current_node = 0; 168 uint32_t current_node = 0;
167 std::string::const_iterator i = text.begin(); 169 std::string::const_iterator i = text.begin();
168 170
169 // Follow existing paths for as long as possible. 171 // Follow existing paths for as long as possible.
170 while (i != text_end) { 172 while (i != text_end) {
171 uint32 edge_from_current = tree_[current_node].GetEdge(*i); 173 uint32_t edge_from_current = tree_[current_node].GetEdge(*i);
172 if (edge_from_current == AhoCorasickNode::kNoSuchEdge) 174 if (edge_from_current == AhoCorasickNode::kNoSuchEdge)
173 break; 175 break;
174 current_node = edge_from_current; 176 current_node = edge_from_current;
175 ++i; 177 ++i;
176 } 178 }
177 179
178 // Create new nodes if necessary. 180 // Create new nodes if necessary.
179 while (i != text_end) { 181 while (i != text_end) {
180 tree_.push_back(AhoCorasickNode()); 182 tree_.push_back(AhoCorasickNode());
181 tree_[current_node].SetEdge(*i, tree_.size() - 1); 183 tree_[current_node].SetEdge(*i, tree_.size() - 1);
182 current_node = tree_.size() - 1; 184 current_node = tree_.size() - 1;
183 ++i; 185 ++i;
184 } 186 }
185 187
186 // Register match. 188 // Register match.
187 tree_[current_node].AddMatch(pattern->id()); 189 tree_[current_node].AddMatch(pattern->id());
188 } 190 }
189 191
190 void SubstringSetMatcher::CreateFailureEdges() { 192 void SubstringSetMatcher::CreateFailureEdges() {
191 typedef AhoCorasickNode::Edges Edges; 193 typedef AhoCorasickNode::Edges Edges;
192 194
193 std::queue<uint32> queue; 195 std::queue<uint32_t> queue;
194 196
195 AhoCorasickNode& root = tree_[0]; 197 AhoCorasickNode& root = tree_[0];
196 root.set_failure(0); 198 root.set_failure(0);
197 const Edges& root_edges = root.edges(); 199 const Edges& root_edges = root.edges();
198 for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end(); 200 for (Edges::const_iterator e = root_edges.begin(); e != root_edges.end();
199 ++e) { 201 ++e) {
200 const uint32& leads_to = e->second; 202 const uint32_t& leads_to = e->second;
201 tree_[leads_to].set_failure(0); 203 tree_[leads_to].set_failure(0);
202 queue.push(leads_to); 204 queue.push(leads_to);
203 } 205 }
204 206
205 while (!queue.empty()) { 207 while (!queue.empty()) {
206 AhoCorasickNode& current_node = tree_[queue.front()]; 208 AhoCorasickNode& current_node = tree_[queue.front()];
207 queue.pop(); 209 queue.pop();
208 for (Edges::const_iterator e = current_node.edges().begin(); 210 for (Edges::const_iterator e = current_node.edges().begin();
209 e != current_node.edges().end(); ++e) { 211 e != current_node.edges().end(); ++e) {
210 const char& edge_label = e->first; 212 const char& edge_label = e->first;
211 const uint32& leads_to = e->second; 213 const uint32_t& leads_to = e->second;
212 queue.push(leads_to); 214 queue.push(leads_to);
213 215
214 uint32 failure = current_node.failure(); 216 uint32_t failure = current_node.failure();
215 uint32 edge_from_failure = tree_[failure].GetEdge(edge_label); 217 uint32_t edge_from_failure = tree_[failure].GetEdge(edge_label);
216 while (edge_from_failure == AhoCorasickNode::kNoSuchEdge && 218 while (edge_from_failure == AhoCorasickNode::kNoSuchEdge &&
217 failure != 0) { 219 failure != 0) {
218 failure = tree_[failure].failure(); 220 failure = tree_[failure].failure();
219 edge_from_failure = tree_[failure].GetEdge(edge_label); 221 edge_from_failure = tree_[failure].GetEdge(edge_label);
220 } 222 }
221 223
222 const uint32 follow_in_case_of_failure = 224 const uint32_t follow_in_case_of_failure =
223 edge_from_failure != AhoCorasickNode::kNoSuchEdge 225 edge_from_failure != AhoCorasickNode::kNoSuchEdge ? edge_from_failure
224 ? edge_from_failure 226 : 0;
225 : 0;
226 tree_[leads_to].set_failure(follow_in_case_of_failure); 227 tree_[leads_to].set_failure(follow_in_case_of_failure);
227 tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches()); 228 tree_[leads_to].AddMatches(tree_[follow_in_case_of_failure].matches());
228 } 229 }
229 } 230 }
230 } 231 }
231 232
232 const uint32 SubstringSetMatcher::AhoCorasickNode::kNoSuchEdge = 0xFFFFFFFF; 233 const uint32_t SubstringSetMatcher::AhoCorasickNode::kNoSuchEdge = 0xFFFFFFFF;
233 234
234 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode() 235 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode()
235 : failure_(kNoSuchEdge) {} 236 : failure_(kNoSuchEdge) {}
236 237
237 SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {} 238 SubstringSetMatcher::AhoCorasickNode::~AhoCorasickNode() {}
238 239
239 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode( 240 SubstringSetMatcher::AhoCorasickNode::AhoCorasickNode(
240 const SubstringSetMatcher::AhoCorasickNode& other) 241 const SubstringSetMatcher::AhoCorasickNode& other)
241 : edges_(other.edges_), 242 : edges_(other.edges_),
242 failure_(other.failure_), 243 failure_(other.failure_),
243 matches_(other.matches_) {} 244 matches_(other.matches_) {}
244 245
245 SubstringSetMatcher::AhoCorasickNode& 246 SubstringSetMatcher::AhoCorasickNode&
246 SubstringSetMatcher::AhoCorasickNode::operator=( 247 SubstringSetMatcher::AhoCorasickNode::operator=(
247 const SubstringSetMatcher::AhoCorasickNode& other) { 248 const SubstringSetMatcher::AhoCorasickNode& other) {
248 edges_ = other.edges_; 249 edges_ = other.edges_;
249 failure_ = other.failure_; 250 failure_ = other.failure_;
250 matches_ = other.matches_; 251 matches_ = other.matches_;
251 return *this; 252 return *this;
252 } 253 }
253 254
254 uint32 SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const { 255 uint32_t SubstringSetMatcher::AhoCorasickNode::GetEdge(char c) const {
255 Edges::const_iterator i = edges_.find(c); 256 Edges::const_iterator i = edges_.find(c);
256 return i == edges_.end() ? kNoSuchEdge : i->second; 257 return i == edges_.end() ? kNoSuchEdge : i->second;
257 } 258 }
258 259
259 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, uint32 node) { 260 void SubstringSetMatcher::AhoCorasickNode::SetEdge(char c, uint32_t node) {
260 edges_[c] = node; 261 edges_[c] = node;
261 } 262 }
262 263
263 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) { 264 void SubstringSetMatcher::AhoCorasickNode::AddMatch(StringPattern::ID id) {
264 matches_.insert(id); 265 matches_.insert(id);
265 } 266 }
266 267
267 void SubstringSetMatcher::AhoCorasickNode::AddMatches( 268 void SubstringSetMatcher::AhoCorasickNode::AddMatches(
268 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) { 269 const SubstringSetMatcher::AhoCorasickNode::Matches& matches) {
269 matches_.insert(matches.begin(), matches.end()); 270 matches_.insert(matches.begin(), matches.end());
270 } 271 }
271 272
272 } // namespace url_matcher 273 } // namespace url_matcher
OLDNEW
« no previous file with comments | « components/url_matcher/substring_set_matcher.h ('k') | components/url_matcher/substring_set_matcher_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698