Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: runtime/vm/regexp_ast.h

Issue 678193004: Copy irregexp related code from V8. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_REGEXP_AST_H_
6 #define VM_REGEXP_AST_H_
7
8 // SNIP
9
10 namespace dart {
11
12 // SNIP
13
14 class RegExpAlternative;
15 class RegExpAssertion;
16 class RegExpAtom;
17 class RegExpBackReference;
18 class RegExpCapture;
19 class RegExpCharacterClass;
20 class RegExpCompiler;
21 class RegExpDisjunction;
22 class RegExpEmpty;
23 class RegExpLookahead;
24 class RegExpQuantifier;
25 class RegExpText;
26
27 // SNIP
28
29 class RegExpVisitor BASE_EMBEDDED {
30 public:
31 virtual ~RegExpVisitor() { }
32 #define MAKE_CASE(Name) \
33 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
34 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
35 #undef MAKE_CASE
36 };
37
38
39 class RegExpTree : public ZoneObject {
40 public:
41 static const int kInfinity = kMaxInt;
42 virtual ~RegExpTree() {}
43 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
44 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
45 RegExpNode* on_success) = 0;
46 virtual bool IsTextElement() { return false; }
47 virtual bool IsAnchoredAtStart() { return false; }
48 virtual bool IsAnchoredAtEnd() { return false; }
49 virtual int min_match() = 0;
50 virtual int max_match() = 0;
51 // Returns the interval of registers used for captures within this
52 // expression.
53 virtual Interval CaptureRegisters() { return Interval::Empty(); }
54 virtual void AppendToText(RegExpText* text, Zone* zone);
55 OStream& Print(OStream& os, Zone* zone); // NOLINT
56 #define MAKE_ASTYPE(Name) \
57 virtual RegExp##Name* As##Name(); \
58 virtual bool Is##Name();
59 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
60 #undef MAKE_ASTYPE
61 };
62
63
64 class RegExpDisjunction FINAL : public RegExpTree {
65 public:
66 explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
67 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
68 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
69 RegExpNode* on_success) OVERRIDE;
70 virtual RegExpDisjunction* AsDisjunction() OVERRIDE;
71 virtual Interval CaptureRegisters() OVERRIDE;
72 virtual bool IsDisjunction() OVERRIDE;
73 virtual bool IsAnchoredAtStart() OVERRIDE;
74 virtual bool IsAnchoredAtEnd() OVERRIDE;
75 virtual int min_match() OVERRIDE { return min_match_; }
76 virtual int max_match() OVERRIDE { return max_match_; }
77 ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
78 private:
79 ZoneList<RegExpTree*>* alternatives_;
80 int min_match_;
81 int max_match_;
82 };
83
84
85 class RegExpAlternative FINAL : public RegExpTree {
86 public:
87 explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
88 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
89 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
90 RegExpNode* on_success) OVERRIDE;
91 virtual RegExpAlternative* AsAlternative() OVERRIDE;
92 virtual Interval CaptureRegisters() OVERRIDE;
93 virtual bool IsAlternative() OVERRIDE;
94 virtual bool IsAnchoredAtStart() OVERRIDE;
95 virtual bool IsAnchoredAtEnd() OVERRIDE;
96 virtual int min_match() OVERRIDE { return min_match_; }
97 virtual int max_match() OVERRIDE { return max_match_; }
98 ZoneList<RegExpTree*>* nodes() { return nodes_; }
99 private:
100 ZoneList<RegExpTree*>* nodes_;
101 int min_match_;
102 int max_match_;
103 };
104
105
106 class RegExpAssertion FINAL : public RegExpTree {
107 public:
108 enum AssertionType {
109 START_OF_LINE,
110 START_OF_INPUT,
111 END_OF_LINE,
112 END_OF_INPUT,
113 BOUNDARY,
114 NON_BOUNDARY
115 };
116 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
117 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
118 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
119 RegExpNode* on_success) OVERRIDE;
120 virtual RegExpAssertion* AsAssertion() OVERRIDE;
121 virtual bool IsAssertion() OVERRIDE;
122 virtual bool IsAnchoredAtStart() OVERRIDE;
123 virtual bool IsAnchoredAtEnd() OVERRIDE;
124 virtual int min_match() OVERRIDE { return 0; }
125 virtual int max_match() OVERRIDE { return 0; }
126 AssertionType assertion_type() { return assertion_type_; }
127 private:
128 AssertionType assertion_type_;
129 };
130
131
132 class CharacterSet FINAL BASE_EMBEDDED {
133 public:
134 explicit CharacterSet(uc16 standard_set_type)
135 : ranges_(NULL),
136 standard_set_type_(standard_set_type) {}
137 explicit CharacterSet(ZoneList<CharacterRange>* ranges)
138 : ranges_(ranges),
139 standard_set_type_(0) {}
140 ZoneList<CharacterRange>* ranges(Zone* zone);
141 uc16 standard_set_type() { return standard_set_type_; }
142 void set_standard_set_type(uc16 special_set_type) {
143 standard_set_type_ = special_set_type;
144 }
145 bool is_standard() { return standard_set_type_ != 0; }
146 void Canonicalize();
147 private:
148 ZoneList<CharacterRange>* ranges_;
149 // If non-zero, the value represents a standard set (e.g., all whitespace
150 // characters) without having to expand the ranges.
151 uc16 standard_set_type_;
152 };
153
154
155 class RegExpCharacterClass FINAL : public RegExpTree {
156 public:
157 RegExpCharacterClass(ZoneList<CharacterRange>* ranges, bool is_negated)
158 : set_(ranges),
159 is_negated_(is_negated) { }
160 explicit RegExpCharacterClass(uc16 type)
161 : set_(type),
162 is_negated_(false) { }
163 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
164 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
165 RegExpNode* on_success) OVERRIDE;
166 virtual RegExpCharacterClass* AsCharacterClass() OVERRIDE;
167 virtual bool IsCharacterClass() OVERRIDE;
168 virtual bool IsTextElement() OVERRIDE { return true; }
169 virtual int min_match() OVERRIDE { return 1; }
170 virtual int max_match() OVERRIDE { return 1; }
171 virtual void AppendToText(RegExpText* text, Zone* zone) OVERRIDE;
172 CharacterSet character_set() { return set_; }
173 // TODO(lrn): Remove need for complex version if is_standard that
174 // recognizes a mangled standard set and just do { return set_.is_special(); }
175 bool is_standard(Zone* zone);
176 // Returns a value representing the standard character set if is_standard()
177 // returns true.
178 // Currently used values are:
179 // s : unicode whitespace
180 // S : unicode non-whitespace
181 // w : ASCII word character (digit, letter, underscore)
182 // W : non-ASCII word character
183 // d : ASCII digit
184 // D : non-ASCII digit
185 // . : non-unicode non-newline
186 // * : All characters
187 uc16 standard_type() { return set_.standard_set_type(); }
188 ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
189 bool is_negated() { return is_negated_; }
190
191 private:
192 CharacterSet set_;
193 bool is_negated_;
194 };
195
196
197 class RegExpAtom FINAL : public RegExpTree {
198 public:
199 explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
200 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
201 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
202 RegExpNode* on_success) OVERRIDE;
203 virtual RegExpAtom* AsAtom() OVERRIDE;
204 virtual bool IsAtom() OVERRIDE;
205 virtual bool IsTextElement() OVERRIDE { return true; }
206 virtual int min_match() OVERRIDE { return data_.length(); }
207 virtual int max_match() OVERRIDE { return data_.length(); }
208 virtual void AppendToText(RegExpText* text, Zone* zone) OVERRIDE;
209 Vector<const uc16> data() { return data_; }
210 int length() { return data_.length(); }
211 private:
212 Vector<const uc16> data_;
213 };
214
215
216 class RegExpText FINAL : public RegExpTree {
217 public:
218 explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
219 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
220 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
221 RegExpNode* on_success) OVERRIDE;
222 virtual RegExpText* AsText() OVERRIDE;
223 virtual bool IsText() OVERRIDE;
224 virtual bool IsTextElement() OVERRIDE { return true; }
225 virtual int min_match() OVERRIDE { return length_; }
226 virtual int max_match() OVERRIDE { return length_; }
227 virtual void AppendToText(RegExpText* text, Zone* zone) OVERRIDE;
228 void AddElement(TextElement elm, Zone* zone) {
229 elements_.Add(elm, zone);
230 length_ += elm.length();
231 }
232 ZoneList<TextElement>* elements() { return &elements_; }
233 private:
234 ZoneList<TextElement> elements_;
235 int length_;
236 };
237
238
239 class RegExpQuantifier FINAL : public RegExpTree {
240 public:
241 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
242 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
243 : body_(body),
244 min_(min),
245 max_(max),
246 min_match_(min * body->min_match()),
247 quantifier_type_(type) {
248 if (max > 0 && body->max_match() > kInfinity / max) {
249 max_match_ = kInfinity;
250 } else {
251 max_match_ = max * body->max_match();
252 }
253 }
254 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
255 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
256 RegExpNode* on_success) OVERRIDE;
257 static RegExpNode* ToNode(int min,
258 int max,
259 bool is_greedy,
260 RegExpTree* body,
261 RegExpCompiler* compiler,
262 RegExpNode* on_success,
263 bool not_at_start = false);
264 virtual RegExpQuantifier* AsQuantifier() OVERRIDE;
265 virtual Interval CaptureRegisters() OVERRIDE;
266 virtual bool IsQuantifier() OVERRIDE;
267 virtual int min_match() OVERRIDE { return min_match_; }
268 virtual int max_match() OVERRIDE { return max_match_; }
269 int min() { return min_; }
270 int max() { return max_; }
271 bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
272 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
273 bool is_greedy() { return quantifier_type_ == GREEDY; }
274 RegExpTree* body() { return body_; }
275
276 private:
277 RegExpTree* body_;
278 int min_;
279 int max_;
280 int min_match_;
281 int max_match_;
282 QuantifierType quantifier_type_;
283 };
284
285
286 class RegExpCapture FINAL : public RegExpTree {
287 public:
288 explicit RegExpCapture(RegExpTree* body, int index)
289 : body_(body), index_(index) { }
290 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
291 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
292 RegExpNode* on_success) OVERRIDE;
293 static RegExpNode* ToNode(RegExpTree* body,
294 int index,
295 RegExpCompiler* compiler,
296 RegExpNode* on_success);
297 virtual RegExpCapture* AsCapture() OVERRIDE;
298 virtual bool IsAnchoredAtStart() OVERRIDE;
299 virtual bool IsAnchoredAtEnd() OVERRIDE;
300 virtual Interval CaptureRegisters() OVERRIDE;
301 virtual bool IsCapture() OVERRIDE;
302 virtual int min_match() OVERRIDE { return body_->min_match(); }
303 virtual int max_match() OVERRIDE { return body_->max_match(); }
304 RegExpTree* body() { return body_; }
305 int index() { return index_; }
306 static int StartRegister(int index) { return index * 2; }
307 static int EndRegister(int index) { return index * 2 + 1; }
308
309 private:
310 RegExpTree* body_;
311 int index_;
312 };
313
314
315 class RegExpLookahead FINAL : public RegExpTree {
316 public:
317 RegExpLookahead(RegExpTree* body,
318 bool is_positive,
319 int capture_count,
320 int capture_from)
321 : body_(body),
322 is_positive_(is_positive),
323 capture_count_(capture_count),
324 capture_from_(capture_from) { }
325
326 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
327 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
328 RegExpNode* on_success) OVERRIDE;
329 virtual RegExpLookahead* AsLookahead() OVERRIDE;
330 virtual Interval CaptureRegisters() OVERRIDE;
331 virtual bool IsLookahead() OVERRIDE;
332 virtual bool IsAnchoredAtStart() OVERRIDE;
333 virtual int min_match() OVERRIDE { return 0; }
334 virtual int max_match() OVERRIDE { return 0; }
335 RegExpTree* body() { return body_; }
336 bool is_positive() { return is_positive_; }
337 int capture_count() { return capture_count_; }
338 int capture_from() { return capture_from_; }
339
340 private:
341 RegExpTree* body_;
342 bool is_positive_;
343 int capture_count_;
344 int capture_from_;
345 };
346
347
348 class RegExpBackReference FINAL : public RegExpTree {
349 public:
350 explicit RegExpBackReference(RegExpCapture* capture)
351 : capture_(capture) { }
352 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
353 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
354 RegExpNode* on_success) OVERRIDE;
355 virtual RegExpBackReference* AsBackReference() OVERRIDE;
356 virtual bool IsBackReference() OVERRIDE;
357 virtual int min_match() OVERRIDE { return 0; }
358 virtual int max_match() OVERRIDE { return capture_->max_match(); }
359 int index() { return capture_->index(); }
360 RegExpCapture* capture() { return capture_; }
361 private:
362 RegExpCapture* capture_;
363 };
364
365
366 class RegExpEmpty FINAL : public RegExpTree {
367 public:
368 RegExpEmpty() { }
369 virtual void* Accept(RegExpVisitor* visitor, void* data) OVERRIDE;
370 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
371 RegExpNode* on_success) OVERRIDE;
372 virtual RegExpEmpty* AsEmpty() OVERRIDE;
373 virtual bool IsEmpty() OVERRIDE;
374 virtual int min_match() OVERRIDE { return 0; }
375 virtual int max_match() OVERRIDE { return 0; }
376 static RegExpEmpty* GetInstance() {
377 static RegExpEmpty* instance = ::new RegExpEmpty();
378 return instance;
379 }
380 };
381
382 // SNIP
383
384 } // namespace dart
385
386 #endif // VM_REGEXP_AST_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698