Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Side by Side Diff: runtime/vm/regexp_ast.h

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase, enable tests, remove *CodeUnitsAt Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_REGEXP_AST_H_
6 #define VM_REGEXP_AST_H_
7
8 #include "platform/globals.h"
9 #include "platform/utils.h"
10 #include "vm/allocation.h"
11 #include "vm/regexp.h"
12
13 namespace dart {
14
15 class RegExpAlternative;
16 class RegExpAssertion;
17 class RegExpAtom;
18 class RegExpBackReference;
19 class RegExpCapture;
20 class RegExpCharacterClass;
21 class RegExpCompiler;
22 class RegExpDisjunction;
23 class RegExpEmpty;
24 class RegExpLookahead;
25 class RegExpQuantifier;
26 class RegExpText;
27
28
29 class RegExpVisitor : public ValueObject {
30 public:
31 virtual ~RegExpVisitor() { }
32 #define MAKE_CASE(Name) \
33 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
34 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
35 #undef MAKE_CASE
36 };
37
38
39 class RegExpTree : public ZoneAllocated {
40 public:
41 static const intptr_t kInfinity = kMaxInt32;
42 virtual ~RegExpTree() {}
43 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
44 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
45 RegExpNode* on_success) = 0;
46 virtual bool IsTextElement() const { return false; }
47 virtual bool IsAnchoredAtStart() const { return false; }
48 virtual bool IsAnchoredAtEnd() const { return false; }
49 virtual intptr_t min_match() const = 0;
50 virtual intptr_t max_match() const = 0;
51 // Returns the interval of registers used for captures within this
52 // expression.
53 virtual Interval CaptureRegisters() const { return Interval::Empty(); }
54 virtual void AppendToText(RegExpText* text);
55 void Print();
56 #define MAKE_ASTYPE(Name) \
57 virtual RegExp##Name* As##Name(); \
58 virtual bool Is##Name() const;
59 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
60 #undef MAKE_ASTYPE
61 };
62
63
64 class RegExpDisjunction : public RegExpTree {
65 public:
66 explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives);
67 virtual void* Accept(RegExpVisitor* visitor, void* data);
68 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
69 RegExpNode* on_success);
70 virtual RegExpDisjunction* AsDisjunction();
71 virtual Interval CaptureRegisters() const;
72 virtual bool IsDisjunction() const;
73 virtual bool IsAnchoredAtStart() const;
74 virtual bool IsAnchoredAtEnd() const;
75 virtual intptr_t min_match() const { return min_match_; }
76 virtual intptr_t max_match() const { return max_match_; }
77 ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; }
78 private:
79 ZoneGrowableArray<RegExpTree*>* alternatives_;
80 intptr_t min_match_;
81 intptr_t max_match_;
82 };
83
84
85 class RegExpAlternative : public RegExpTree {
86 public:
87 explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes);
88 virtual void* Accept(RegExpVisitor* visitor, void* data);
89 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
90 RegExpNode* on_success);
91 virtual RegExpAlternative* AsAlternative();
92 virtual Interval CaptureRegisters() const;
93 virtual bool IsAlternative() const;
94 virtual bool IsAnchoredAtStart() const;
95 virtual bool IsAnchoredAtEnd() const;
96 virtual intptr_t min_match() const { return min_match_; }
97 virtual intptr_t max_match() const { return max_match_; }
98 ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; }
99 private:
100 ZoneGrowableArray<RegExpTree*>* nodes_;
101 intptr_t min_match_;
102 intptr_t max_match_;
103 };
104
105
106 class RegExpAssertion : public RegExpTree {
107 public:
108 enum AssertionType {
109 START_OF_LINE,
110 START_OF_INPUT,
111 END_OF_LINE,
112 END_OF_INPUT,
113 BOUNDARY,
114 NON_BOUNDARY
115 };
116 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
117 virtual void* Accept(RegExpVisitor* visitor, void* data);
118 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
119 RegExpNode* on_success);
120 virtual RegExpAssertion* AsAssertion();
121 virtual bool IsAssertion() const;
122 virtual bool IsAnchoredAtStart() const;
123 virtual bool IsAnchoredAtEnd() const;
124 virtual intptr_t min_match() const { return 0; }
125 virtual intptr_t max_match() const { return 0; }
126 AssertionType assertion_type() const { return assertion_type_; }
127 private:
128 AssertionType assertion_type_;
129 };
130
131
132 class CharacterSet : public ValueObject {
133 public:
134 explicit CharacterSet(uint16_t standard_set_type)
135 : ranges_(NULL),
136 standard_set_type_(standard_set_type) {}
137 explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges)
138 : ranges_(ranges),
139 standard_set_type_(0) {}
140 CharacterSet(const CharacterSet& that)
141 : ValueObject(),
142 ranges_(that.ranges_),
143 standard_set_type_(that.standard_set_type_) {}
144 ZoneGrowableArray<CharacterRange>* ranges();
145 uint16_t standard_set_type() const { return standard_set_type_; }
146 void set_standard_set_type(uint16_t special_set_type) {
147 standard_set_type_ = special_set_type;
148 }
149 bool is_standard() { return standard_set_type_ != 0; }
150 void Canonicalize();
151 private:
152 ZoneGrowableArray<CharacterRange>* ranges_;
153 // If non-zero, the value represents a standard set (e.g., all whitespace
154 // characters) without having to expand the ranges.
155 uint16_t standard_set_type_;
156 };
157
158
159 class RegExpCharacterClass : public RegExpTree {
160 public:
161 RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
162 bool is_negated)
163 : set_(ranges),
164 is_negated_(is_negated) { }
165 explicit RegExpCharacterClass(uint16_t type)
166 : set_(type),
167 is_negated_(false) { }
168 virtual void* Accept(RegExpVisitor* visitor, void* data);
169 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
170 RegExpNode* on_success);
171 virtual RegExpCharacterClass* AsCharacterClass();
172 virtual bool IsCharacterClass() const;
173 virtual bool IsTextElement() const { return true; }
174 virtual intptr_t min_match() const { return 1; }
175 virtual intptr_t max_match() const { return 1; }
176 virtual void AppendToText(RegExpText* text);
177 CharacterSet character_set() const { return set_; }
178 // TODO(lrn): Remove need for complex version if is_standard that
179 // recognizes a mangled standard set and just do { return set_.is_special(); }
180 bool is_standard();
181 // Returns a value representing the standard character set if is_standard()
182 // returns true.
183 // Currently used values are:
184 // s : unicode whitespace
185 // S : unicode non-whitespace
186 // w : ASCII word character (digit, letter, underscore)
187 // W : non-ASCII word character
188 // d : ASCII digit
189 // D : non-ASCII digit
190 // . : non-unicode non-newline
191 // * : All characters
192 uint16_t standard_type() const { return set_.standard_set_type(); }
193 ZoneGrowableArray<CharacterRange>* ranges() {
194 return set_.ranges();
195 }
196 bool is_negated() const { return is_negated_; }
197
198 private:
199 CharacterSet set_;
200 bool is_negated_;
201 };
202
203
204 class RegExpAtom : public RegExpTree {
205 public:
206 explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { }
207 virtual void* Accept(RegExpVisitor* visitor, void* data);
208 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
209 RegExpNode* on_success);
210 virtual RegExpAtom* AsAtom();
211 virtual bool IsAtom() const;
212 virtual bool IsTextElement() const { return true; }
213 virtual intptr_t min_match() const { return data_->length(); }
214 virtual intptr_t max_match() const { return data_->length(); }
215 virtual void AppendToText(RegExpText* text);
216 ZoneGrowableArray<uint16_t>* data() const { return data_; }
217 intptr_t length() const { return data_->length(); }
218 private:
219 ZoneGrowableArray<uint16_t>* data_;
220 };
221
222
223 class RegExpText : public RegExpTree {
224 public:
225 RegExpText() : elements_(2), length_(0) {}
226 virtual void* Accept(RegExpVisitor* visitor, void* data);
227 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
228 RegExpNode* on_success);
229 virtual RegExpText* AsText();
230 virtual bool IsText() const;
231 virtual bool IsTextElement() const { return true; }
232 virtual intptr_t min_match() const { return length_; }
233 virtual intptr_t max_match() const { return length_; }
234 virtual void AppendToText(RegExpText* text);
235 void AddElement(TextElement elm) {
236 elements_.Add(elm);
237 length_ += elm.length();
238 }
239 GrowableArray<TextElement>* elements() { return &elements_; }
240 private:
241 GrowableArray<TextElement> elements_;
242 intptr_t length_;
243 };
244
245
246 class RegExpQuantifier : public RegExpTree {
247 public:
248 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
249 RegExpQuantifier(intptr_t min, intptr_t max,
250 QuantifierType type, RegExpTree* body)
251 : body_(body),
252 min_(min),
253 max_(max),
254 min_match_(min * body->min_match()),
255 quantifier_type_(type) {
256 if (max > 0 && body->max_match() > kInfinity / max) {
257 max_match_ = kInfinity;
258 } else {
259 max_match_ = max * body->max_match();
260 }
261 }
262 virtual void* Accept(RegExpVisitor* visitor, void* data);
263 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
264 RegExpNode* on_success);
265 static RegExpNode* ToNode(intptr_t min,
266 intptr_t max,
267 bool is_greedy,
268 RegExpTree* body,
269 RegExpCompiler* compiler,
270 RegExpNode* on_success,
271 bool not_at_start = false);
272 virtual RegExpQuantifier* AsQuantifier();
273 virtual Interval CaptureRegisters() const;
274 virtual bool IsQuantifier() const;
275 virtual intptr_t min_match() const { return min_match_; }
276 virtual intptr_t max_match() const { return max_match_; }
277 intptr_t min() const { return min_; }
278 intptr_t max() const { return max_; }
279 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
280 bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
281 bool is_greedy() const { return quantifier_type_ == GREEDY; }
282 RegExpTree* body() const { return body_; }
283
284 private:
285 RegExpTree* body_;
286 intptr_t min_;
287 intptr_t max_;
288 intptr_t min_match_;
289 intptr_t max_match_;
290 QuantifierType quantifier_type_;
291 };
292
293
294 class RegExpCapture : public RegExpTree {
295 public:
296 explicit RegExpCapture(RegExpTree* body, intptr_t index)
297 : body_(body), index_(index) { }
298 virtual void* Accept(RegExpVisitor* visitor, void* data);
299 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
300 RegExpNode* on_success);
301 static RegExpNode* ToNode(RegExpTree* body,
302 intptr_t index,
303 RegExpCompiler* compiler,
304 RegExpNode* on_success);
305 virtual RegExpCapture* AsCapture();
306 virtual bool IsAnchoredAtStart() const;
307 virtual bool IsAnchoredAtEnd() const;
308 virtual Interval CaptureRegisters() const;
309 virtual bool IsCapture() const;
310 virtual intptr_t min_match() const { return body_->min_match(); }
311 virtual intptr_t max_match() const { return body_->max_match(); }
312 RegExpTree* body() const { return body_; }
313 intptr_t index() const { return index_; }
314 static intptr_t StartRegister(intptr_t index) { return index * 2; }
315 static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; }
316
317 private:
318 RegExpTree* body_;
319 intptr_t index_;
320 };
321
322
323 class RegExpLookahead : public RegExpTree {
324 public:
325 RegExpLookahead(RegExpTree* body,
326 bool is_positive,
327 intptr_t capture_count,
328 intptr_t capture_from)
329 : body_(body),
330 is_positive_(is_positive),
331 capture_count_(capture_count),
332 capture_from_(capture_from) { }
333
334 virtual void* Accept(RegExpVisitor* visitor, void* data);
335 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
336 RegExpNode* on_success);
337 virtual RegExpLookahead* AsLookahead();
338 virtual Interval CaptureRegisters() const;
339 virtual bool IsLookahead() const;
340 virtual bool IsAnchoredAtStart() const;
341 virtual intptr_t min_match() const { return 0; }
342 virtual intptr_t max_match() const { return 0; }
343 RegExpTree* body() const { return body_; }
344 bool is_positive() const { return is_positive_; }
345 intptr_t capture_count() const { return capture_count_; }
346 intptr_t capture_from() const { return capture_from_; }
347
348 private:
349 RegExpTree* body_;
350 bool is_positive_;
351 intptr_t capture_count_;
352 intptr_t capture_from_;
353 };
354
355
356 class RegExpBackReference : public RegExpTree {
357 public:
358 explicit RegExpBackReference(RegExpCapture* capture)
359 : capture_(capture) { }
360 virtual void* Accept(RegExpVisitor* visitor, void* data);
361 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
362 RegExpNode* on_success);
363 virtual RegExpBackReference* AsBackReference();
364 virtual bool IsBackReference() const;
365 virtual intptr_t min_match() const { return 0; }
366 virtual intptr_t max_match() const { return capture_->max_match(); }
367 intptr_t index() const { return capture_->index(); }
368 RegExpCapture* capture() const { return capture_; }
369 private:
370 RegExpCapture* capture_;
371 };
372
373
374 class RegExpEmpty : public RegExpTree {
375 public:
376 RegExpEmpty() { }
377 virtual void* Accept(RegExpVisitor* visitor, void* data);
378 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
379 RegExpNode* on_success);
380 virtual RegExpEmpty* AsEmpty();
381 virtual bool IsEmpty() const;
382 virtual intptr_t min_match() const { return 0; }
383 virtual intptr_t max_match() const { return 0; }
384 static RegExpEmpty* GetInstance() {
385 static RegExpEmpty* instance = ::new RegExpEmpty();
386 return instance;
387 }
388 };
389
390 } // namespace dart
391
392 #endif // VM_REGEXP_AST_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698