| OLD | NEW |
| 1 // Copyright 2006 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2006 The RE2 Authors. All Rights Reserved. |
| 2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
| 3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 // Format a regular expression structure as a string. | 5 // Format a regular expression structure as a string. |
| 6 // Tested by parse_test.cc | 6 // Tested by parse_test.cc |
| 7 | 7 |
| 8 #include "util/util.h" | 8 #include "util/util.h" |
| 9 #include "re2/regexp.h" | 9 #include "re2/regexp.h" |
| 10 #include "re2/walker-inl.h" | 10 #include "re2/walker-inl.h" |
| (...skipping 24 matching lines...) Expand all Loading... |
| 35 virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); | 35 virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); |
| 36 virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, | 36 virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, |
| 37 int* child_args, int nchild_args); | 37 int* child_args, int nchild_args); |
| 38 virtual int ShortVisit(Regexp* re, int parent_arg) { | 38 virtual int ShortVisit(Regexp* re, int parent_arg) { |
| 39 return 0; | 39 return 0; |
| 40 } | 40 } |
| 41 | 41 |
| 42 private: | 42 private: |
| 43 string* t_; // The string the walker appends to. | 43 string* t_; // The string the walker appends to. |
| 44 | 44 |
| 45 DISALLOW_COPY_AND_ASSIGN(ToStringWalker); | 45 DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker); |
| 46 }; | 46 }; |
| 47 | 47 |
| 48 string Regexp::ToString() { | 48 string Regexp::ToString() { |
| 49 string t; | 49 string t; |
| 50 ToStringWalker w(&t); | 50 ToStringWalker w(&t); |
| 51 w.WalkExponential(this, PrecToplevel, 100000); | 51 w.WalkExponential(this, PrecToplevel, 100000); |
| 52 if (w.stopped_early()) | 52 if (w.stopped_early()) |
| 53 t += " [truncated]"; | 53 t += " [truncated]"; |
| 54 return t; | 54 return t; |
| 55 } | 55 } |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 87 break; | 87 break; |
| 88 | 88 |
| 89 case kRegexpAlternate: | 89 case kRegexpAlternate: |
| 90 if (prec < PrecAlternate) | 90 if (prec < PrecAlternate) |
| 91 t_->append("(?:"); | 91 t_->append("(?:"); |
| 92 nprec = PrecAlternate; | 92 nprec = PrecAlternate; |
| 93 break; | 93 break; |
| 94 | 94 |
| 95 case kRegexpCapture: | 95 case kRegexpCapture: |
| 96 t_->append("("); | 96 t_->append("("); |
| 97 if (re->cap() == 0) | |
| 98 LOG(DFATAL) << "kRegexpCapture cap() == 0"; | |
| 99 if (re->name()) { | 97 if (re->name()) { |
| 100 t_->append("?P<"); | 98 t_->append("?P<"); |
| 101 t_->append(*re->name()); | 99 t_->append(*re->name()); |
| 102 t_->append(">"); | 100 t_->append(">"); |
| 103 } | 101 } |
| 104 nprec = PrecParen; | 102 nprec = PrecParen; |
| 105 break; | 103 break; |
| 106 | 104 |
| 107 case kRegexpStar: | 105 case kRegexpStar: |
| 108 case kRegexpPlus: | 106 case kRegexpPlus: |
| 109 case kRegexpQuest: | 107 case kRegexpQuest: |
| 110 case kRegexpRepeat: | 108 case kRegexpRepeat: |
| 111 if (prec < PrecUnary) | 109 if (prec < PrecUnary) |
| 112 t_->append("(?:"); | 110 t_->append("(?:"); |
| 113 // The subprecedence here is PrecAtom instead of PrecUnary | 111 // The subprecedence here is PrecAtom instead of PrecUnary |
| 114 // because PCRE treats two unary ops in a row as a parse error. | 112 // because PCRE treats two unary ops in a row as a parse error. |
| 115 nprec = PrecAtom; | 113 nprec = PrecAtom; |
| 116 break; | 114 break; |
| 117 } | 115 } |
| 118 | 116 |
| 119 return nprec; | 117 return nprec; |
| 120 } | 118 } |
| 121 | 119 |
| 122 static void AppendLiteral(string *t, Rune r, bool foldcase) { | 120 static void AppendLiteral(string *t, Rune r, bool foldcase) { |
| 123 if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { | 121 if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { |
| 124 t->append(1, '\\'); | 122 t->append(1, '\\'); |
| 125 t->append(1, static_cast<char>(r)); | 123 t->append(1, r); |
| 126 } else if (foldcase && 'a' <= r && r <= 'z') { | 124 } else if (foldcase && 'a' <= r && r <= 'z') { |
| 127 if ('a' <= r && r <= 'z') | 125 if ('a' <= r && r <= 'z') |
| 128 r += 'A' - 'a'; | 126 r += 'A' - 'a'; |
| 129 t->append(1, '['); | 127 t->append(1, '['); |
| 130 t->append(1, static_cast<char>(r)); | 128 t->append(1, r); |
| 131 t->append(1, static_cast<char>(r) + 'a' - 'A'); | 129 t->append(1, r + 'a' - 'A'); |
| 132 t->append(1, ']'); | 130 t->append(1, ']'); |
| 133 } else { | 131 } else { |
| 134 AppendCCRange(t, r, r); | 132 AppendCCRange(t, r, r); |
| 135 } | 133 } |
| 136 } | 134 } |
| 137 | 135 |
| 138 // Visits re after children are processed. | 136 // Visits re after children are processed. |
| 139 // For childless regexps, all the work is done here. | 137 // For childless regexps, all the work is done here. |
| 140 // For regexps with children, append any unary suffixes or ). | 138 // For regexps with children, append any unary suffixes or ). |
| 141 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, | 139 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, |
| 142 int* child_args, int nchild_args) { | 140 int* child_args, int nchild_args) { |
| 143 int prec = parent_arg; | 141 int prec = parent_arg; |
| 144 switch (re->op()) { | 142 switch (re->op()) { |
| 145 case kRegexpNoMatch: | 143 case kRegexpNoMatch: |
| 146 // There's no simple symbol for "no match", but | 144 // There's no simple symbol for "no match", but |
| 147 // [^0-Runemax] excludes everything. | 145 // [^0-Runemax] excludes everything. |
| 148 t_->append("[^\\x00-\\x{10ffff}]"); | 146 t_->append("[^\\x00-\\x{10ffff}]"); |
| 149 break; | 147 break; |
| 150 | 148 |
| 151 case kRegexpEmptyMatch: | 149 case kRegexpEmptyMatch: |
| 152 // Append (?:) to make empty string visible, | 150 // Append (?:) to make empty string visible, |
| 153 // unless this is already being parenthesized. | 151 // unless this is already being parenthesized. |
| 154 if (prec < PrecEmpty) | 152 if (prec < PrecEmpty) |
| 155 t_->append("(?:)"); | 153 t_->append("(?:)"); |
| 156 break; | 154 break; |
| 157 | 155 |
| 158 case kRegexpLiteral: | 156 case kRegexpLiteral: |
| 159 AppendLiteral(t_, re->rune(), | 157 AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); |
| 160 (re->parse_flags() & Regexp::FoldCase) != 0); | |
| 161 break; | 158 break; |
| 162 | 159 |
| 163 case kRegexpLiteralString: | 160 case kRegexpLiteralString: |
| 164 for (int i = 0; i < re->nrunes(); i++) | 161 for (int i = 0; i < re->nrunes(); i++) |
| 165 AppendLiteral(t_, re->runes()[i], | 162 AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); |
| 166 (re->parse_flags() & Regexp::FoldCase) != 0); | |
| 167 if (prec < PrecConcat) | 163 if (prec < PrecConcat) |
| 168 t_->append(")"); | 164 t_->append(")"); |
| 169 break; | 165 break; |
| 170 | 166 |
| 171 case kRegexpConcat: | 167 case kRegexpConcat: |
| 172 if (prec < PrecConcat) | 168 if (prec < PrecConcat) |
| 173 t_->append(")"); | 169 t_->append(")"); |
| 174 break; | 170 break; |
| 175 | 171 |
| 176 case kRegexpAlternate: | 172 case kRegexpAlternate: |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 294 t_->append("|"); | 290 t_->append("|"); |
| 295 | 291 |
| 296 return 0; | 292 return 0; |
| 297 } | 293 } |
| 298 | 294 |
| 299 // Appends a rune for use in a character class to the string t. | 295 // Appends a rune for use in a character class to the string t. |
| 300 static void AppendCCChar(string* t, Rune r) { | 296 static void AppendCCChar(string* t, Rune r) { |
| 301 if (0x20 <= r && r <= 0x7E) { | 297 if (0x20 <= r && r <= 0x7E) { |
| 302 if (strchr("[]^-\\", r)) | 298 if (strchr("[]^-\\", r)) |
| 303 t->append("\\"); | 299 t->append("\\"); |
| 304 t->append(1, static_cast<char>(r)); | 300 t->append(1, r); |
| 305 return; | 301 return; |
| 306 } | 302 } |
| 307 switch (r) { | 303 switch (r) { |
| 308 default: | 304 default: |
| 309 break; | 305 break; |
| 310 | 306 |
| 311 case '\r': | 307 case '\r': |
| 312 t->append("\\r"); | 308 t->append("\\r"); |
| 313 return; | 309 return; |
| 314 | 310 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 336 if (lo > hi) | 332 if (lo > hi) |
| 337 return; | 333 return; |
| 338 AppendCCChar(t, lo); | 334 AppendCCChar(t, lo); |
| 339 if (lo < hi) { | 335 if (lo < hi) { |
| 340 t->append("-"); | 336 t->append("-"); |
| 341 AppendCCChar(t, hi); | 337 AppendCCChar(t, hi); |
| 342 } | 338 } |
| 343 } | 339 } |
| 344 | 340 |
| 345 } // namespace re2 | 341 } // namespace re2 |
| OLD | NEW |