Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(504)

Side by Side Diff: trunk/src/extensions/common/matcher/substring_set_matcher.h

Issue 15027007: Revert 198871 "Speed improvements to SubstringSetMatcher" (Closed) Base URL: svn://svn.chromium.org/chrome/
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | trunk/src/extensions/common/matcher/substring_set_matcher.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 5 #ifndef EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 6 #define EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
7 7
8 #include <limits>
9 #include <map> 8 #include <map>
10 #include <set> 9 #include <set>
11 #include <string> 10 #include <string>
12 #include <vector> 11 #include <vector>
13 12
14 #include "base/basictypes.h" 13 #include "base/basictypes.h"
15 #include "extensions/common/matcher/string_pattern.h" 14 #include "extensions/common/matcher/string_pattern.h"
16 15
17 namespace extensions { 16 namespace extensions {
18 17
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 // text[i + k] at depth k + 1, we follow a failure edge. This edge 73 // text[i + k] at depth k + 1, we follow a failure edge. This edge
75 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that 74 // corresponds to the longest proper suffix of text[i, ..., i + k - 1] that
76 // is a prefix of any registered pattern. 75 // is a prefix of any registered pattern.
77 // 76 //
78 // If your brain thinks "Forget it, let's go shopping.", don't worry. 77 // If your brain thinks "Forget it, let's go shopping.", don't worry.
79 // Take a nap and read an introductory text on the Aho Corasick algorithm. 78 // Take a nap and read an introductory text on the Aho Corasick algorithm.
80 // It will make sense. Eventually. 79 // It will make sense. Eventually.
81 class AhoCorasickNode { 80 class AhoCorasickNode {
82 public: 81 public:
83 // Key: label of the edge, value: node index in |tree_| of parent class. 82 // Key: label of the edge, value: node index in |tree_| of parent class.
84 typedef std::map<char, uint32> Edges; 83 typedef std::map<char, int> Edges;
85 typedef std::set<StringPattern::ID> Matches; 84 typedef std::set<StringPattern::ID> Matches;
86 85
87 static const uint32 kNoSuchEdge; // Represents an invalid node index.
88
89 AhoCorasickNode(); 86 AhoCorasickNode();
90 ~AhoCorasickNode(); 87 ~AhoCorasickNode();
91 AhoCorasickNode(const AhoCorasickNode& other); 88 AhoCorasickNode(const AhoCorasickNode& other);
92 AhoCorasickNode& operator=(const AhoCorasickNode& other); 89 AhoCorasickNode& operator=(const AhoCorasickNode& other);
93 90
94 uint32 GetEdge(char c) const; 91 bool HasEdge(char c) const;
95 void SetEdge(char c, uint32 node); 92 int GetEdge(char c) const;
93 void SetEdge(char c, int node);
96 const Edges& edges() const { return edges_; } 94 const Edges& edges() const { return edges_; }
97 95
98 uint32 failure() const { return failure_; } 96 int failure() const { return failure_; }
99 void set_failure(uint32 failure) { failure_ = failure; } 97 void set_failure(int failure) { failure_ = failure; }
100 98
101 void AddMatch(StringPattern::ID id); 99 void AddMatch(StringPattern::ID id);
102 void AddMatches(const Matches& matches); 100 void AddMatches(const Matches& matches);
103 const Matches& matches() const { return matches_; } 101 const Matches& matches() const { return matches_; }
104 102
105 private: 103 private:
106 // Outgoing edges of current node. 104 // Outgoing edges of current node.
107 Edges edges_; 105 Edges edges_;
108 106
109 // Node index that failure edge leads to. 107 // Node index that failure edge leads to.
110 uint32 failure_; 108 int failure_;
111 109
112 // Identifiers of matches. 110 // Identifiers of matches.
113 Matches matches_; 111 Matches matches_;
114 }; 112 };
115 113
116 typedef std::map<StringPattern::ID, const StringPattern*> SubstringPatternMap; 114 void RebuildAhoCorasickTree();
117 typedef std::vector<const StringPattern*> SubstringPatternVector;
118
119 // |sorted_patterns| is a copy of |patterns_| sorted by the pattern string.
120 void RebuildAhoCorasickTree(const SubstringPatternVector& sorted_patterns);
121 115
122 // Inserts a path for |pattern->pattern()| into the tree and adds 116 // Inserts a path for |pattern->pattern()| into the tree and adds
123 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with 117 // |pattern->id()| to the set of matches. Ownership of |pattern| remains with
124 // the caller. 118 // the caller.
125 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern); 119 void InsertPatternIntoAhoCorasickTree(const StringPattern* pattern);
126 void CreateFailureEdges(); 120 void CreateFailureEdges();
127 121
128 // Set of all registered StringPatterns. Used to regenerate the 122 // Set of all registered StringPatterns. Used to regenerate the
129 // Aho-Corasick tree in case patterns are registered or unregistered. 123 // Aho-Corasick tree in case patterns are registered or unregistered.
130 SubstringPatternMap patterns_; 124 typedef std::map<StringPattern::ID, const StringPattern*>
125 SubstringPatternSet;
126 SubstringPatternSet patterns_;
131 127
132 // The nodes of a Aho-Corasick tree. 128 // The nodes of a Aho-Corasick tree.
133 std::vector<AhoCorasickNode> tree_; 129 std::vector<AhoCorasickNode> tree_;
134 130
135 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher); 131 DISALLOW_COPY_AND_ASSIGN(SubstringSetMatcher);
136 }; 132 };
137 133
138 } // namespace extensions 134 } // namespace extensions
139 135
140 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_ 136 #endif // EXTENSIONS_COMMON_MATCHER_SUBSTRING_SET_MATCHER_H_
OLDNEW
« no previous file with comments | « no previous file | trunk/src/extensions/common/matcher/substring_set_matcher.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698