OLD | NEW |
1 // Copyright 2006 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2006 The RE2 Authors. All Rights Reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Format a regular expression structure as a string. | 5 // Format a regular expression structure as a string. |
6 // Tested by parse_test.cc | 6 // Tested by parse_test.cc |
7 | 7 |
8 #include "util/util.h" | 8 #include "util/util.h" |
9 #include "re2/regexp.h" | 9 #include "re2/regexp.h" |
10 #include "re2/walker-inl.h" | 10 #include "re2/walker-inl.h" |
(...skipping 24 matching lines...) Expand all Loading... |
35 virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); | 35 virtual int PreVisit(Regexp* re, int parent_arg, bool* stop); |
36 virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, | 36 virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg, |
37 int* child_args, int nchild_args); | 37 int* child_args, int nchild_args); |
38 virtual int ShortVisit(Regexp* re, int parent_arg) { | 38 virtual int ShortVisit(Regexp* re, int parent_arg) { |
39 return 0; | 39 return 0; |
40 } | 40 } |
41 | 41 |
42 private: | 42 private: |
43 string* t_; // The string the walker appends to. | 43 string* t_; // The string the walker appends to. |
44 | 44 |
45 DISALLOW_COPY_AND_ASSIGN(ToStringWalker); | 45 DISALLOW_EVIL_CONSTRUCTORS(ToStringWalker); |
46 }; | 46 }; |
47 | 47 |
48 string Regexp::ToString() { | 48 string Regexp::ToString() { |
49 string t; | 49 string t; |
50 ToStringWalker w(&t); | 50 ToStringWalker w(&t); |
51 w.WalkExponential(this, PrecToplevel, 100000); | 51 w.WalkExponential(this, PrecToplevel, 100000); |
52 if (w.stopped_early()) | 52 if (w.stopped_early()) |
53 t += " [truncated]"; | 53 t += " [truncated]"; |
54 return t; | 54 return t; |
55 } | 55 } |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
87 break; | 87 break; |
88 | 88 |
89 case kRegexpAlternate: | 89 case kRegexpAlternate: |
90 if (prec < PrecAlternate) | 90 if (prec < PrecAlternate) |
91 t_->append("(?:"); | 91 t_->append("(?:"); |
92 nprec = PrecAlternate; | 92 nprec = PrecAlternate; |
93 break; | 93 break; |
94 | 94 |
95 case kRegexpCapture: | 95 case kRegexpCapture: |
96 t_->append("("); | 96 t_->append("("); |
97 if (re->cap() == 0) | |
98 LOG(DFATAL) << "kRegexpCapture cap() == 0"; | |
99 if (re->name()) { | 97 if (re->name()) { |
100 t_->append("?P<"); | 98 t_->append("?P<"); |
101 t_->append(*re->name()); | 99 t_->append(*re->name()); |
102 t_->append(">"); | 100 t_->append(">"); |
103 } | 101 } |
104 nprec = PrecParen; | 102 nprec = PrecParen; |
105 break; | 103 break; |
106 | 104 |
107 case kRegexpStar: | 105 case kRegexpStar: |
108 case kRegexpPlus: | 106 case kRegexpPlus: |
109 case kRegexpQuest: | 107 case kRegexpQuest: |
110 case kRegexpRepeat: | 108 case kRegexpRepeat: |
111 if (prec < PrecUnary) | 109 if (prec < PrecUnary) |
112 t_->append("(?:"); | 110 t_->append("(?:"); |
113 // The subprecedence here is PrecAtom instead of PrecUnary | 111 // The subprecedence here is PrecAtom instead of PrecUnary |
114 // because PCRE treats two unary ops in a row as a parse error. | 112 // because PCRE treats two unary ops in a row as a parse error. |
115 nprec = PrecAtom; | 113 nprec = PrecAtom; |
116 break; | 114 break; |
117 } | 115 } |
118 | 116 |
119 return nprec; | 117 return nprec; |
120 } | 118 } |
121 | 119 |
122 static void AppendLiteral(string *t, Rune r, bool foldcase) { | 120 static void AppendLiteral(string *t, Rune r, bool foldcase) { |
123 if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { | 121 if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) { |
124 t->append(1, '\\'); | 122 t->append(1, '\\'); |
125 t->append(1, static_cast<char>(r)); | 123 t->append(1, r); |
126 } else if (foldcase && 'a' <= r && r <= 'z') { | 124 } else if (foldcase && 'a' <= r && r <= 'z') { |
127 if ('a' <= r && r <= 'z') | 125 if ('a' <= r && r <= 'z') |
128 r += 'A' - 'a'; | 126 r += 'A' - 'a'; |
129 t->append(1, '['); | 127 t->append(1, '['); |
130 t->append(1, static_cast<char>(r)); | 128 t->append(1, r); |
131 t->append(1, static_cast<char>(r) + 'a' - 'A'); | 129 t->append(1, r + 'a' - 'A'); |
132 t->append(1, ']'); | 130 t->append(1, ']'); |
133 } else { | 131 } else { |
134 AppendCCRange(t, r, r); | 132 AppendCCRange(t, r, r); |
135 } | 133 } |
136 } | 134 } |
137 | 135 |
138 // Visits re after children are processed. | 136 // Visits re after children are processed. |
139 // For childless regexps, all the work is done here. | 137 // For childless regexps, all the work is done here. |
140 // For regexps with children, append any unary suffixes or ). | 138 // For regexps with children, append any unary suffixes or ). |
141 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, | 139 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg, |
142 int* child_args, int nchild_args) { | 140 int* child_args, int nchild_args) { |
143 int prec = parent_arg; | 141 int prec = parent_arg; |
144 switch (re->op()) { | 142 switch (re->op()) { |
145 case kRegexpNoMatch: | 143 case kRegexpNoMatch: |
146 // There's no simple symbol for "no match", but | 144 // There's no simple symbol for "no match", but |
147 // [^0-Runemax] excludes everything. | 145 // [^0-Runemax] excludes everything. |
148 t_->append("[^\\x00-\\x{10ffff}]"); | 146 t_->append("[^\\x00-\\x{10ffff}]"); |
149 break; | 147 break; |
150 | 148 |
151 case kRegexpEmptyMatch: | 149 case kRegexpEmptyMatch: |
152 // Append (?:) to make empty string visible, | 150 // Append (?:) to make empty string visible, |
153 // unless this is already being parenthesized. | 151 // unless this is already being parenthesized. |
154 if (prec < PrecEmpty) | 152 if (prec < PrecEmpty) |
155 t_->append("(?:)"); | 153 t_->append("(?:)"); |
156 break; | 154 break; |
157 | 155 |
158 case kRegexpLiteral: | 156 case kRegexpLiteral: |
159 AppendLiteral(t_, re->rune(), | 157 AppendLiteral(t_, re->rune(), re->parse_flags() & Regexp::FoldCase); |
160 (re->parse_flags() & Regexp::FoldCase) != 0); | |
161 break; | 158 break; |
162 | 159 |
163 case kRegexpLiteralString: | 160 case kRegexpLiteralString: |
164 for (int i = 0; i < re->nrunes(); i++) | 161 for (int i = 0; i < re->nrunes(); i++) |
165 AppendLiteral(t_, re->runes()[i], | 162 AppendLiteral(t_, re->runes()[i], re->parse_flags() & Regexp::FoldCase); |
166 (re->parse_flags() & Regexp::FoldCase) != 0); | |
167 if (prec < PrecConcat) | 163 if (prec < PrecConcat) |
168 t_->append(")"); | 164 t_->append(")"); |
169 break; | 165 break; |
170 | 166 |
171 case kRegexpConcat: | 167 case kRegexpConcat: |
172 if (prec < PrecConcat) | 168 if (prec < PrecConcat) |
173 t_->append(")"); | 169 t_->append(")"); |
174 break; | 170 break; |
175 | 171 |
176 case kRegexpAlternate: | 172 case kRegexpAlternate: |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 t_->append("|"); | 290 t_->append("|"); |
295 | 291 |
296 return 0; | 292 return 0; |
297 } | 293 } |
298 | 294 |
299 // Appends a rune for use in a character class to the string t. | 295 // Appends a rune for use in a character class to the string t. |
300 static void AppendCCChar(string* t, Rune r) { | 296 static void AppendCCChar(string* t, Rune r) { |
301 if (0x20 <= r && r <= 0x7E) { | 297 if (0x20 <= r && r <= 0x7E) { |
302 if (strchr("[]^-\\", r)) | 298 if (strchr("[]^-\\", r)) |
303 t->append("\\"); | 299 t->append("\\"); |
304 t->append(1, static_cast<char>(r)); | 300 t->append(1, r); |
305 return; | 301 return; |
306 } | 302 } |
307 switch (r) { | 303 switch (r) { |
308 default: | 304 default: |
309 break; | 305 break; |
310 | 306 |
311 case '\r': | 307 case '\r': |
312 t->append("\\r"); | 308 t->append("\\r"); |
313 return; | 309 return; |
314 | 310 |
(...skipping 21 matching lines...) Expand all Loading... |
336 if (lo > hi) | 332 if (lo > hi) |
337 return; | 333 return; |
338 AppendCCChar(t, lo); | 334 AppendCCChar(t, lo); |
339 if (lo < hi) { | 335 if (lo < hi) { |
340 t->append("-"); | 336 t->append("-"); |
341 AppendCCChar(t, hi); | 337 AppendCCChar(t, hi); |
342 } | 338 } |
343 } | 339 } |
344 | 340 |
345 } // namespace re2 | 341 } // namespace re2 |
OLD | NEW |