Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Side by Side Diff: third_party/re2/re2/tostring.cc

Issue 1544433002: Replace RE2 import with a dependency (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Re-Added LICENSE and OWNERS file Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/re2/re2/testing/tester.cc ('k') | third_party/re2/re2/unicode.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Format a regular expression structure as a string.
6 // Tested by parse_test.cc
7
8 #include "util/util.h"
9 #include "re2/regexp.h"
10 #include "re2/walker-inl.h"
11
12 namespace re2 {
13
14 enum {
15 PrecAtom,
16 PrecUnary,
17 PrecConcat,
18 PrecAlternate,
19 PrecEmpty,
20 PrecParen,
21 PrecToplevel,
22 };
23
24 // Helper function. See description below.
25 static void AppendCCRange(string* t, Rune lo, Rune hi);
26
27 // Walker to generate string in s_.
28 // The arg pointers are actually integers giving the
29 // context precedence.
30 // The child_args are always NULL.
31 class ToStringWalker : public Regexp::Walker<int> {
32 public:
33 explicit ToStringWalker(string* t) : t_(t) {}
34
35 virtual int PreVisit(Regexp* re, int parent_arg, bool* stop);
36 virtual int PostVisit(Regexp* re, int parent_arg, int pre_arg,
37 int* child_args, int nchild_args);
38 virtual int ShortVisit(Regexp* re, int parent_arg) {
39 return 0;
40 }
41
42 private:
43 string* t_; // The string the walker appends to.
44
45 DISALLOW_COPY_AND_ASSIGN(ToStringWalker);
46 };
47
48 string Regexp::ToString() {
49 string t;
50 ToStringWalker w(&t);
51 w.WalkExponential(this, PrecToplevel, 100000);
52 if (w.stopped_early())
53 t += " [truncated]";
54 return t;
55 }
56
57 #define ToString DontCallToString // Avoid accidental recursion.
58
59 // Visits re before children are processed.
60 // Appends ( if needed and passes new precedence to children.
61 int ToStringWalker::PreVisit(Regexp* re, int parent_arg, bool* stop) {
62 int prec = parent_arg;
63 int nprec = PrecAtom;
64
65 switch (re->op()) {
66 case kRegexpNoMatch:
67 case kRegexpEmptyMatch:
68 case kRegexpLiteral:
69 case kRegexpAnyChar:
70 case kRegexpAnyByte:
71 case kRegexpBeginLine:
72 case kRegexpEndLine:
73 case kRegexpBeginText:
74 case kRegexpEndText:
75 case kRegexpWordBoundary:
76 case kRegexpNoWordBoundary:
77 case kRegexpCharClass:
78 case kRegexpHaveMatch:
79 nprec = PrecAtom;
80 break;
81
82 case kRegexpConcat:
83 case kRegexpLiteralString:
84 if (prec < PrecConcat)
85 t_->append("(?:");
86 nprec = PrecConcat;
87 break;
88
89 case kRegexpAlternate:
90 if (prec < PrecAlternate)
91 t_->append("(?:");
92 nprec = PrecAlternate;
93 break;
94
95 case kRegexpCapture:
96 t_->append("(");
97 if (re->cap() == 0)
98 LOG(DFATAL) << "kRegexpCapture cap() == 0";
99 if (re->name()) {
100 t_->append("?P<");
101 t_->append(*re->name());
102 t_->append(">");
103 }
104 nprec = PrecParen;
105 break;
106
107 case kRegexpStar:
108 case kRegexpPlus:
109 case kRegexpQuest:
110 case kRegexpRepeat:
111 if (prec < PrecUnary)
112 t_->append("(?:");
113 // The subprecedence here is PrecAtom instead of PrecUnary
114 // because PCRE treats two unary ops in a row as a parse error.
115 nprec = PrecAtom;
116 break;
117 }
118
119 return nprec;
120 }
121
122 static void AppendLiteral(string *t, Rune r, bool foldcase) {
123 if (r != 0 && r < 0x80 && strchr("(){}[]*+?|.^$\\", r)) {
124 t->append(1, '\\');
125 t->append(1, static_cast<char>(r));
126 } else if (foldcase && 'a' <= r && r <= 'z') {
127 if ('a' <= r && r <= 'z')
128 r += 'A' - 'a';
129 t->append(1, '[');
130 t->append(1, static_cast<char>(r));
131 t->append(1, static_cast<char>(r) + 'a' - 'A');
132 t->append(1, ']');
133 } else {
134 AppendCCRange(t, r, r);
135 }
136 }
137
138 // Visits re after children are processed.
139 // For childless regexps, all the work is done here.
140 // For regexps with children, append any unary suffixes or ).
141 int ToStringWalker::PostVisit(Regexp* re, int parent_arg, int pre_arg,
142 int* child_args, int nchild_args) {
143 int prec = parent_arg;
144 switch (re->op()) {
145 case kRegexpNoMatch:
146 // There's no simple symbol for "no match", but
147 // [^0-Runemax] excludes everything.
148 t_->append("[^\\x00-\\x{10ffff}]");
149 break;
150
151 case kRegexpEmptyMatch:
152 // Append (?:) to make empty string visible,
153 // unless this is already being parenthesized.
154 if (prec < PrecEmpty)
155 t_->append("(?:)");
156 break;
157
158 case kRegexpLiteral:
159 AppendLiteral(t_, re->rune(),
160 (re->parse_flags() & Regexp::FoldCase) != 0);
161 break;
162
163 case kRegexpLiteralString:
164 for (int i = 0; i < re->nrunes(); i++)
165 AppendLiteral(t_, re->runes()[i],
166 (re->parse_flags() & Regexp::FoldCase) != 0);
167 if (prec < PrecConcat)
168 t_->append(")");
169 break;
170
171 case kRegexpConcat:
172 if (prec < PrecConcat)
173 t_->append(")");
174 break;
175
176 case kRegexpAlternate:
177 // Clumsy but workable: the children all appended |
178 // at the end of their strings, so just remove the last one.
179 if ((*t_)[t_->size()-1] == '|')
180 t_->erase(t_->size()-1);
181 else
182 LOG(DFATAL) << "Bad final char: " << t_;
183 if (prec < PrecAlternate)
184 t_->append(")");
185 break;
186
187 case kRegexpStar:
188 t_->append("*");
189 if (re->parse_flags() & Regexp::NonGreedy)
190 t_->append("?");
191 if (prec < PrecUnary)
192 t_->append(")");
193 break;
194
195 case kRegexpPlus:
196 t_->append("+");
197 if (re->parse_flags() & Regexp::NonGreedy)
198 t_->append("?");
199 if (prec < PrecUnary)
200 t_->append(")");
201 break;
202
203 case kRegexpQuest:
204 t_->append("?");
205 if (re->parse_flags() & Regexp::NonGreedy)
206 t_->append("?");
207 if (prec < PrecUnary)
208 t_->append(")");
209 break;
210
211 case kRegexpRepeat:
212 if (re->max() == -1)
213 t_->append(StringPrintf("{%d,}", re->min()));
214 else if (re->min() == re->max())
215 t_->append(StringPrintf("{%d}", re->min()));
216 else
217 t_->append(StringPrintf("{%d,%d}", re->min(), re->max()));
218 if (re->parse_flags() & Regexp::NonGreedy)
219 t_->append("?");
220 if (prec < PrecUnary)
221 t_->append(")");
222 break;
223
224 case kRegexpAnyChar:
225 t_->append(".");
226 break;
227
228 case kRegexpAnyByte:
229 t_->append("\\C");
230 break;
231
232 case kRegexpBeginLine:
233 t_->append("^");
234 break;
235
236 case kRegexpEndLine:
237 t_->append("$");
238 break;
239
240 case kRegexpBeginText:
241 t_->append("(?-m:^)");
242 break;
243
244 case kRegexpEndText:
245 if (re->parse_flags() & Regexp::WasDollar)
246 t_->append("(?-m:$)");
247 else
248 t_->append("\\z");
249 break;
250
251 case kRegexpWordBoundary:
252 t_->append("\\b");
253 break;
254
255 case kRegexpNoWordBoundary:
256 t_->append("\\B");
257 break;
258
259 case kRegexpCharClass: {
260 if (re->cc()->size() == 0) {
261 t_->append("[^\\x00-\\x{10ffff}]");
262 break;
263 }
264 t_->append("[");
265 // Heuristic: show class as negated if it contains the
266 // non-character 0xFFFE.
267 CharClass* cc = re->cc();
268 if (cc->Contains(0xFFFE)) {
269 cc = cc->Negate();
270 t_->append("^");
271 }
272 for (CharClass::iterator i = cc->begin(); i != cc->end(); ++i)
273 AppendCCRange(t_, i->lo, i->hi);
274 if (cc != re->cc())
275 cc->Delete();
276 t_->append("]");
277 break;
278 }
279
280 case kRegexpCapture:
281 t_->append(")");
282 break;
283
284 case kRegexpHaveMatch:
285 // There's no syntax accepted by the parser to generate
286 // this node (it is generated by RE2::Set) so make something
287 // up that is readable but won't compile.
288 t_->append("(?HaveMatch:%d)", re->match_id());
289 break;
290 }
291
292 // If the parent is an alternation, append the | for it.
293 if (prec == PrecAlternate)
294 t_->append("|");
295
296 return 0;
297 }
298
299 // Appends a rune for use in a character class to the string t.
300 static void AppendCCChar(string* t, Rune r) {
301 if (0x20 <= r && r <= 0x7E) {
302 if (strchr("[]^-\\", r))
303 t->append("\\");
304 t->append(1, static_cast<char>(r));
305 return;
306 }
307 switch (r) {
308 default:
309 break;
310
311 case '\r':
312 t->append("\\r");
313 return;
314
315 case '\t':
316 t->append("\\t");
317 return;
318
319 case '\n':
320 t->append("\\n");
321 return;
322
323 case '\f':
324 t->append("\\f");
325 return;
326 }
327
328 if (r < 0x100) {
329 StringAppendF(t, "\\x%02x", static_cast<int>(r));
330 return;
331 }
332 StringAppendF(t, "\\x{%x}", static_cast<int>(r));
333 }
334
335 static void AppendCCRange(string* t, Rune lo, Rune hi) {
336 if (lo > hi)
337 return;
338 AppendCCChar(t, lo);
339 if (lo < hi) {
340 t->append("-");
341 AppendCCChar(t, hi);
342 }
343 }
344
345 } // namespace re2
OLDNEW
« no previous file with comments | « third_party/re2/re2/testing/tester.cc ('k') | third_party/re2/re2/unicode.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698