| OLD | NEW | 
|---|
| 1 // Copyright 2006 The RE2 Authors.  All Rights Reserved. | 1 // Copyright 2006 The RE2 Authors.  All Rights Reserved. | 
| 2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style | 
| 3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. | 
| 4 | 4 | 
| 5 // Format a regular expression structure as a string. | 5 // Format a regular expression structure as a string. | 
| 6 // Tested by parse_test.cc | 6 // Tested by parse_test.cc | 
| 7 | 7 | 
| 8 #include "util/util.h" | 8 #include "util/util.h" | 
| 9 #include "re2/regexp.h" | 9 #include "re2/regexp.h" | 
| 10 #include "re2/walker-inl.h" | 10 #include "re2/walker-inl.h" | 
| (...skipping 24 matching lines...) Expand all  Loading... | 
| 35   virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); | 35   virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); | 
| 36   virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, | 36   virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, | 
| 37                         int* child_args, int nchild_args); | 37                         int* child_args, int nchild_args); | 
| 38   virtual int ShortVisit(Regexp* re, int parent_arg) { | 38   virtual int ShortVisit(Regexp* re, int parent_arg) { | 
| 39     return 0; | 39     return 0; | 
| 40   } | 40   } | 
| 41 | 41 | 
| 42  private: | 42  private: | 
| 43   string* t_;  // The string the walker appends to. | 43   string* t_;  // The string the walker appends to. | 
| 44 | 44 | 
| 45   DISALLOW_COPY_AND_ASSIGN(ToStringWalker); | 45   DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker); | 
| 46 }; | 46 }; | 
| 47 | 47 | 
| 48 string Regexp::ToString() { | 48 string Regexp::ToString() { | 
| 49   string t; | 49   string t; | 
| 50   ToStringWalker w(&t); | 50   ToStringWalker w(&t); | 
| 51   w.WalkExponential(this, PrecToplevel, 100000); | 51   w.WalkExponential(this, PrecToplevel, 100000); | 
| 52   if (w.stopped_early()) | 52   if (w.stopped_early()) | 
| 53     t += " [truncated]"; | 53     t += " [truncated]"; | 
| 54   return t; | 54   return t; | 
| 55 } | 55 } | 
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 87       break; | 87       break; | 
| 88 | 88 | 
| 89     case kRegexpAlternate: | 89     case kRegexpAlternate: | 
| 90       if (prec < PrecAlternate) | 90       if (prec < PrecAlternate) | 
| 91         t_->append("(?:"); | 91         t_->append("(?:"); | 
| 92       nprec = PrecAlternate; | 92       nprec = PrecAlternate; | 
| 93       break; | 93       break; | 
| 94 | 94 | 
| 95     case kRegexpCapture: | 95     case kRegexpCapture: | 
| 96       t_->append("("); | 96       t_->append("("); | 
| 97       if (re->cap() == 0) |  | 
| 98         LOG(DFATAL) << "kRegexpCapture cap() == 0"; |  | 
| 99       if (re->name()) { | 97       if (re->name()) { | 
| 100         t_->append("?P<"); | 98         t_->append("?P<"); | 
| 101         t_->append(*re->name()); | 99         t_->append(*re->name()); | 
| 102         t_->append(">"); | 100         t_->append(">"); | 
| 103       } | 101       } | 
| 104       nprec = PrecParen; | 102       nprec = PrecParen; | 
| 105       break; | 103       break; | 
| 106 | 104 | 
| 107     case kRegexpStar: | 105     case kRegexpStar: | 
| 108     case kRegexpPlus: | 106     case kRegexpPlus: | 
| 109     case kRegexpQuest: | 107     case kRegexpQuest: | 
| 110     case kRegexpRepeat: | 108     case kRegexpRepeat: | 
| 111       if (prec < PrecUnary) | 109       if (prec < PrecUnary) | 
| 112         t_->append("(?:"); | 110         t_->append("(?:"); | 
| 113       // The subprecedence here is PrecAtom instead of PrecUnary | 111       // The subprecedence here is PrecAtom instead of PrecUnary | 
| 114       // because PCRE treats two unary ops in a row as a parse error. | 112       // because PCRE treats two unary ops in a row as a parse error. | 
| 115       nprec = PrecAtom; | 113       nprec = PrecAtom; | 
| 116       break; | 114       break; | 
| 117   } | 115   } | 
| 118 | 116 | 
| 119   return nprec; | 117   return nprec; | 
| 120 } | 118 } | 
| 121 | 119 | 
| 122 static void AppendLiteral(string *t, Rune r, bool foldcase) { | 120 static void AppendLiteral(string *t, Rune r, bool foldcase) { | 
| 123   if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { | 121   if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { | 
| 124     t->append(1, '\\'); | 122     t->append(1, '\\'); | 
| 125     t->append(1, static_cast<char>(r)); | 123     t->append(1, r); | 
| 126   } else if (foldcase && 'a' <= r && r <= 'z') { | 124   } else if (foldcase && 'a' <= r && r <= 'z') { | 
| 127     if ('a' <= r && r <= 'z') | 125     if ('a' <= r && r <= 'z') | 
| 128       r += 'A' - 'a'; | 126       r += 'A' - 'a'; | 
| 129     t->append(1, '['); | 127     t->append(1, '['); | 
| 130     t->append(1, static_cast<char>(r)); | 128     t->append(1, r); | 
| 131     t->append(1, static_cast<char>(r) + 'a' - 'A'); | 129     t->append(1, r + 'a' - 'A'); | 
| 132     t->append(1, ']'); | 130     t->append(1, ']'); | 
| 133   } else { | 131   } else { | 
| 134     AppendCCRange(t, r, r); | 132     AppendCCRange(t, r, r); | 
| 135   } | 133   } | 
| 136 } | 134 } | 
| 137 | 135 | 
| 138 // Visits re after children are processed. | 136 // Visits re after children are processed. | 
| 139 // For childless regexps, all the work is done here. | 137 // For childless regexps, all the work is done here. | 
| 140 // For regexps with children, append any unary suffixes or ). | 138 // For regexps with children, append any unary suffixes or ). | 
| 141 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, | 139 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, | 
| 142                               int* child_args, int nchild_args) { | 140                               int* child_args, int nchild_args) { | 
| 143   int prec = parent_arg; | 141   int prec = parent_arg; | 
| 144   switch (re->op()) { | 142   switch (re->op()) { | 
| 145     case kRegexpNoMatch: | 143     case kRegexpNoMatch: | 
| 146       // There's no simple symbol for "no match", but | 144       // There's no simple symbol for "no match", but | 
| 147       // [^0-Runemax] excludes everything. | 145       // [^0-Runemax] excludes everything. | 
| 148       t_->append("[^\\x00-\\x{10ffff}]"); | 146       t_->append("[^\\x00-\\x{10ffff}]"); | 
| 149       break; | 147       break; | 
| 150 | 148 | 
| 151     case kRegexpEmptyMatch: | 149     case kRegexpEmptyMatch: | 
| 152       // Append (?:) to make empty string visible, | 150       // Append (?:) to make empty string visible, | 
| 153       // unless this is already being parenthesized. | 151       // unless this is already being parenthesized. | 
| 154       if (prec < PrecEmpty) | 152       if (prec < PrecEmpty) | 
| 155         t_->append("(?:)"); | 153         t_->append("(?:)"); | 
| 156       break; | 154       break; | 
| 157 | 155 | 
| 158     case kRegexpLiteral: | 156     case kRegexpLiteral: | 
| 159       AppendLiteral(t_, re->rune(), | 157       AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); | 
| 160                     (re->parse_flags() & Regexp::FoldCase) != 0); |  | 
| 161       break; | 158       break; | 
| 162 | 159 | 
| 163     case kRegexpLiteralString: | 160     case kRegexpLiteralString: | 
| 164       for (int i = 0; i < re->nrunes(); i++) | 161       for (int i = 0; i < re->nrunes(); i++) | 
| 165         AppendLiteral(t_, re->runes()[i], | 162         AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); | 
| 166                       (re->parse_flags() & Regexp::FoldCase) != 0); |  | 
| 167       if (prec < PrecConcat) | 163       if (prec < PrecConcat) | 
| 168         t_->append(")"); | 164         t_->append(")"); | 
| 169       break; | 165       break; | 
| 170 | 166 | 
| 171     case kRegexpConcat: | 167     case kRegexpConcat: | 
| 172       if (prec < PrecConcat) | 168       if (prec < PrecConcat) | 
| 173         t_->append(")"); | 169         t_->append(")"); | 
| 174       break; | 170       break; | 
| 175 | 171 | 
| 176     case kRegexpAlternate: | 172     case kRegexpAlternate: | 
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 294     t_->append("|"); | 290     t_->append("|"); | 
| 295 | 291 | 
| 296   return 0; | 292   return 0; | 
| 297 } | 293 } | 
| 298 | 294 | 
| 299 // Appends a rune for use in a character class to the string t. | 295 // Appends a rune for use in a character class to the string t. | 
| 300 static void AppendCCChar(string* t, Rune r) { | 296 static void AppendCCChar(string* t, Rune r) { | 
| 301   if (0x20 <= r && r <= 0x7E) { | 297   if (0x20 <= r && r <= 0x7E) { | 
| 302     if (strchr("[]^-\\", r)) | 298     if (strchr("[]^-\\", r)) | 
| 303       t->append("\\"); | 299       t->append("\\"); | 
| 304     t->append(1, static_cast<char>(r)); | 300     t->append(1, r); | 
| 305     return; | 301     return; | 
| 306   } | 302   } | 
| 307   switch (r) { | 303   switch (r) { | 
| 308     default: | 304     default: | 
| 309       break; | 305       break; | 
| 310 | 306 | 
| 311     case '\r': | 307     case '\r': | 
| 312       t->append("\\r"); | 308       t->append("\\r"); | 
| 313       return; | 309       return; | 
| 314 | 310 | 
| (...skipping 21 matching lines...) Expand all  Loading... | 
| 336   if (lo > hi) | 332   if (lo > hi) | 
| 337     return; | 333     return; | 
| 338   AppendCCChar(t, lo); | 334   AppendCCChar(t, lo); | 
| 339   if (lo < hi) { | 335   if (lo < hi) { | 
| 340     t->append("-"); | 336     t->append("-"); | 
| 341     AppendCCChar(t, hi); | 337     AppendCCChar(t, hi); | 
| 342   } | 338   } | 
| 343 } | 339 } | 
| 344 | 340 | 
| 345 }  // namespace re2 | 341 }  // namespace re2 | 
| OLD | NEW | 
|---|