| OLD | NEW |
| (Empty) |
| 1 // Copyright 2006 The RE2 Authors. All Rights Reserved. | |
| 2 // Use of this source code is governed by a BSD-style | |
| 3 // license that can be found in the LICENSE file. | |
| 4 | |
| 5 // Helper class for traversing Regexps without recursion. | |
| 6 // Clients should declare their own subclasses that override | |
| 7 // the PreVisit and PostVisit methods, which are called before | |
| 8 // and after visiting the subexpressions. | |
| 9 | |
| 10 // Not quite the Visitor pattern, because (among other things) | |
| 11 // the Visitor pattern is recursive. | |
| 12 | |
| 13 #ifndef RE2_WALKER_INL_H__ | |
| 14 #define RE2_WALKER_INL_H__ | |
| 15 | |
| 16 #include "re2/regexp.h" | |
| 17 | |
| 18 namespace re2 { | |
| 19 | |
| 20 template<typename T> struct WalkState; | |
| 21 | |
| 22 template<typename T> class Regexp::Walker { | |
| 23 public: | |
| 24 Walker(); | |
| 25 virtual ~Walker(); | |
| 26 | |
| 27 // Virtual method called before visiting re's children. | |
| 28 // PreVisit passes ownership of its return value to its caller. | |
| 29 // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg | |
| 30 // and passed to the child PreVisits and PostVisits as parent_arg. | |
| 31 // At the top-most Regexp, parent_arg is arg passed to walk. | |
| 32 // If PreVisit sets *stop to true, the walk does not recurse | |
| 33 // into the children. Instead it behaves as though the return | |
| 34 // value from PreVisit is the return value from PostVisit. | |
| 35 // The default PreVisit returns parent_arg. | |
| 36 virtual T PreVisit(Regexp* re, T parent_arg, bool* stop); | |
| 37 | |
| 38 // Virtual method called after visiting re's children. | |
| 39 // The pre_arg is the T that PreVisit returned. | |
| 40 // The child_args is a vector of the T that the child PostVisits returned. | |
| 41 // PostVisit takes ownership of pre_arg. | |
| 42 // PostVisit takes ownership of the Ts | |
| 43 // in *child_args, but not the vector itself. | |
| 44 // PostVisit passes ownership of its return value | |
| 45 // to its caller. | |
| 46 // The default PostVisit simply returns pre_arg. | |
| 47 virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg, | |
| 48 T* child_args, int nchild_args); | |
| 49 | |
| 50 // Virtual method called to copy a T, | |
| 51 // when Walk notices that more than one child is the same re. | |
| 52 virtual T Copy(T arg); | |
| 53 | |
| 54 // Virtual method called to do a "quick visit" of the re, | |
| 55 // but not its children. Only called once the visit budget | |
| 56 // has been used up and we're trying to abort the walk | |
| 57 // as quickly as possible. Should return a value that | |
| 58 // makes sense for the parent PostVisits still to be run. | |
| 59 // This function is (hopefully) only called by | |
| 60 // WalkExponential, but must be implemented by all clients, | |
| 61 // just in case. | |
| 62 virtual T ShortVisit(Regexp* re, T parent_arg) = 0; | |
| 63 | |
| 64 // Walks over a regular expression. | |
| 65 // Top_arg is passed as parent_arg to PreVisit and PostVisit of re. | |
| 66 // Returns the T returned by PostVisit on re. | |
| 67 T Walk(Regexp* re, T top_arg); | |
| 68 | |
| 69 // Like Walk, but doesn't use Copy. This can lead to | |
| 70 // exponential runtimes on cross-linked Regexps like the | |
| 71 // ones generated by Simplify. To help limit this, | |
| 72 // at most max_visits nodes will be visited and then | |
| 73 // the walk will be cut off early. | |
| 74 // If the walk *is* cut off early, ShortVisit(re) | |
| 75 // will be called on regexps that cannot be fully | |
| 76 // visited rather than calling PreVisit/PostVisit. | |
| 77 T WalkExponential(Regexp* re, T top_arg, int max_visits); | |
| 78 | |
| 79 // Clears the stack. Should never be necessary, since | |
| 80 // Walk always enters and exits with an empty stack. | |
| 81 // Logs DFATAL if stack is not already clear. | |
| 82 void Reset(); | |
| 83 | |
| 84 // Returns whether walk was cut off. | |
| 85 bool stopped_early() { return stopped_early_; } | |
| 86 | |
| 87 private: | |
| 88 // Walk state for the entire traversal. | |
| 89 stack<WalkState<T> >* stack_; | |
| 90 bool stopped_early_; | |
| 91 int max_visits_; | |
| 92 | |
| 93 T WalkInternal(Regexp* re, T top_arg, bool use_copy); | |
| 94 | |
| 95 DISALLOW_COPY_AND_ASSIGN(Walker); | |
| 96 }; | |
| 97 | |
| 98 template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re, | |
| 99 T parent_arg, | |
| 100 bool* stop) { | |
| 101 return parent_arg; | |
| 102 } | |
| 103 | |
| 104 template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re, | |
| 105 T parent_arg, | |
| 106 T pre_arg, | |
| 107 T* child_args, | |
| 108 int nchild_args) { | |
| 109 return pre_arg; | |
| 110 } | |
| 111 | |
| 112 template<typename T> T Regexp::Walker<T>::Copy(T arg) { | |
| 113 return arg; | |
| 114 } | |
| 115 | |
| 116 // State about a single level in the traversal. | |
| 117 template<typename T> struct WalkState { | |
| 118 WalkState<T>(Regexp* re, T parent) | |
| 119 : re(re), | |
| 120 n(-1), | |
| 121 parent_arg(parent), | |
| 122 child_args(NULL) { } | |
| 123 | |
| 124 Regexp* re; // The regexp | |
| 125 int n; // The index of the next child to process; -1 means need to PreVisit | |
| 126 T parent_arg; // Accumulated arguments. | |
| 127 T pre_arg; | |
| 128 T child_arg; // One-element buffer for child_args. | |
| 129 T* child_args; | |
| 130 }; | |
| 131 | |
| 132 template<typename T> Regexp::Walker<T>::Walker() { | |
| 133 stack_ = new stack<WalkState<T> >; | |
| 134 stopped_early_ = false; | |
| 135 } | |
| 136 | |
| 137 template<typename T> Regexp::Walker<T>::~Walker() { | |
| 138 Reset(); | |
| 139 delete stack_; | |
| 140 } | |
| 141 | |
| 142 // Clears the stack. Should never be necessary, since | |
| 143 // Walk always enters and exits with an empty stack. | |
| 144 // Logs DFATAL if stack is not already clear. | |
| 145 template<typename T> void Regexp::Walker<T>::Reset() { | |
| 146 if (stack_ && stack_->size() > 0) { | |
| 147 LOG(DFATAL) << "Stack not empty."; | |
| 148 while (stack_->size() > 0) { | |
| 149 delete stack_->top().child_args; | |
| 150 stack_->pop(); | |
| 151 } | |
| 152 } | |
| 153 } | |
| 154 | |
| 155 template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg, | |
| 156 bool use_copy) { | |
| 157 Reset(); | |
| 158 | |
| 159 if (re == NULL) { | |
| 160 LOG(DFATAL) << "Walk NULL"; | |
| 161 return top_arg; | |
| 162 } | |
| 163 | |
| 164 stack_->push(WalkState<T>(re, top_arg)); | |
| 165 | |
| 166 WalkState<T>* s; | |
| 167 for (;;) { | |
| 168 T t; | |
| 169 s = &stack_->top(); | |
| 170 Regexp* re = s->re; | |
| 171 switch (s->n) { | |
| 172 case -1: { | |
| 173 if (--max_visits_ < 0) { | |
| 174 stopped_early_ = true; | |
| 175 t = ShortVisit(re, s->parent_arg); | |
| 176 break; | |
| 177 } | |
| 178 bool stop = false; | |
| 179 s->pre_arg = PreVisit(re, s->parent_arg, &stop); | |
| 180 if (stop) { | |
| 181 t = s->pre_arg; | |
| 182 break; | |
| 183 } | |
| 184 s->n = 0; | |
| 185 s->child_args = NULL; | |
| 186 if (re->nsub_ == 1) | |
| 187 s->child_args = &s->child_arg; | |
| 188 else if (re->nsub_ > 1) | |
| 189 s->child_args = new T[re->nsub_]; | |
| 190 // Fall through. | |
| 191 } | |
| 192 default: { | |
| 193 if (re->nsub_ > 0) { | |
| 194 Regexp** sub = re->sub(); | |
| 195 if (s->n < re->nsub_) { | |
| 196 if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) { | |
| 197 s->child_args[s->n] = Copy(s->child_args[s->n - 1]); | |
| 198 s->n++; | |
| 199 } else { | |
| 200 stack_->push(WalkState<T>(sub[s->n], s->pre_arg)); | |
| 201 } | |
| 202 continue; | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n); | |
| 207 if (re->nsub_ > 1) | |
| 208 delete[] s->child_args; | |
| 209 break; | |
| 210 } | |
| 211 } | |
| 212 | |
| 213 // We've finished stack_->top(). | |
| 214 // Update next guy down. | |
| 215 stack_->pop(); | |
| 216 if (stack_->size() == 0) | |
| 217 return t; | |
| 218 s = &stack_->top(); | |
| 219 if (s->child_args != NULL) | |
| 220 s->child_args[s->n] = t; | |
| 221 else | |
| 222 s->child_arg = t; | |
| 223 s->n++; | |
| 224 } | |
| 225 } | |
| 226 | |
| 227 template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) { | |
| 228 // Without the exponential walking behavior, | |
| 229 // this budget should be more than enough for any | |
| 230 // regexp, and yet not enough to get us in trouble | |
| 231 // as far as CPU time. | |
| 232 max_visits_ = 1000000; | |
| 233 return WalkInternal(re, top_arg, true); | |
| 234 } | |
| 235 | |
| 236 template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg, | |
| 237 int max_visits) { | |
| 238 max_visits_ = max_visits; | |
| 239 return WalkInternal(re, top_arg, false); | |
| 240 } | |
| 241 | |
| 242 } // namespace re2 | |
| 243 | |
| 244 #endif // RE2_WALKER_INL_H__ | |
| OLD | NEW |