Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(788)

Side by Side Diff: third_party/re2/re2/prefilter_tree.h

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/prefilter.cc ('k') | third_party/re2/re2/prefilter_tree.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2009 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // The PrefilterTree class is used to form an AND-OR tree of strings
6 // that would trigger each regexp. The 'prefilter' of each regexp is
7 // added tp PrefilterTree, and then PrefilterTree is used to find all
8 // the unique strings across the prefilters. During search, by using
9 // matches from a string matching engine, PrefilterTree deduces the
10 // set of regexps that are to be triggered. The 'string matching
11 // engine' itself is outside of this class, and the caller can use any
12 // favorite engine. PrefilterTree provides a set of strings (called
13 // atoms) that the user of this class should use to do the string
14 // matching.
15 //
16 #ifndef RE2_PREFILTER_TREE_H_
17 #define RE2_PREFILTER_TREE_H_
18
19 #include "util/util.h"
20 #include "util/sparse_array.h"
21
22 namespace re2 {
23
24 typedef SparseArray<int> IntMap;
25 typedef map<int, int> StdIntMap;
26
27 class Prefilter;
28
29 class PrefilterTree {
30 public:
31 PrefilterTree();
32 ~PrefilterTree();
33
34 // Adds the prefilter for the next regexp. Note that we assume that
35 // Add called sequentially for all regexps. All Add calls
36 // must precede Compile.
37 void Add(Prefilter* prefilter);
38
39 // The Compile returns a vector of string in atom_vec.
40 // Call this after all the prefilters are added through Add.
41 // No calls to Add after Compile are allowed.
42 // The caller should use the returned set of strings to do string matching.
43 // Each time a string matches, the corresponding index then has to be
44 // and passed to RegexpsGivenStrings below.
45 void Compile(vector<string>* atom_vec);
46
47 // Given the indices of the atoms that matched, returns the indexes
48 // of regexps that should be searched. The matched_atoms should
49 // contain all the ids of string atoms that were found to match the
50 // content. The caller can use any string match engine to perform
51 // this function. This function is thread safe.
52 void RegexpsGivenStrings(const vector<int>& matched_atoms,
53 vector<int>* regexps) const;
54
55 // Print debug prefilter. Also prints unique ids associated with
56 // nodes of the prefilter of the regexp.
57 void PrintPrefilter(int regexpid);
58
59
60 // Each unique node has a corresponding Entry that helps in
61 // passing the matching trigger information along the tree.
62 struct Entry {
63 public:
64 // How many children should match before this node triggers the
65 // parent. For an atom and an OR node, this is 1 and for an AND
66 // node, it is the number of unique children.
67 int propagate_up_at_count;
68
69 // When this node is ready to trigger the parent, what are the indices
70 // of the parent nodes to trigger. The reason there may be more than
71 // one is because of sharing. For example (abc | def) and (xyz | def)
72 // are two different nodes, but they share the atom 'def'. So when
73 // 'def' matches, it triggers two parents, corresponding to the two
74 // different OR nodes.
75 StdIntMap* parents;
76
77 // When this node is ready to trigger the parent, what are the
78 // regexps that are triggered.
79 vector<int> regexps;
80 };
81
82 private:
83 // This function assigns unique ids to various parts of the
84 // prefilter, by looking at if these nodes are already in the
85 // PrefilterTree.
86 void AssignUniqueIds(vector<string>* atom_vec);
87
88 // Given the matching atoms, find the regexps to be triggered.
89 void PropagateMatch(const vector<int>& atom_ids,
90 IntMap* regexps) const;
91
92 // Returns the prefilter node that has the same NodeString as this
93 // node. For the canonical node, returns node.
94 Prefilter* CanonicalNode(Prefilter* node);
95
96 // A string that uniquely identifies the node. Assumes that the
97 // children of node has already been assigned unique ids.
98 string NodeString(Prefilter* node) const;
99
100 // Recursively constructs a readable prefilter string.
101 string DebugNodeString(Prefilter* node) const;
102
103 // Used for debugging.
104 void PrintDebugInfo();
105
106 // These are all the nodes formed by Compile. Essentially, there is
107 // one node for each unique atom and each unique AND/OR node.
108 vector<Entry> entries_;
109
110 // Map node string to canonical Prefilter node.
111 map<string, Prefilter*> node_map_;
112
113 // indices of regexps that always pass through the filter (since we
114 // found no required literals in these regexps).
115 vector<int> unfiltered_;
116
117 // vector of Prefilter for all regexps.
118 vector<Prefilter*> prefilter_vec_;
119
120 // Atom index in returned strings to entry id mapping.
121 vector<int> atom_index_to_id_;
122
123 // Has the prefilter tree been compiled.
124 bool compiled_;
125
126 DISALLOW_COPY_AND_ASSIGN(PrefilterTree);
127 };
128
129 } // namespace
130
131 #endif // RE2_PREFILTER_TREE_H_
OLDNEW
« no previous file with comments | « third_party/re2/re2/prefilter.cc ('k') | third_party/re2/re2/prefilter_tree.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698