Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: runtime/vm/regexp_ast.h

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_REGEXP_AST_H_
6 #define VM_REGEXP_AST_H_
7
8 #include "platform/globals.h"
9 #include "platform/utils.h"
10 #include "vm/allocation.h"
11 #include "vm/regexp.h"
12
13 namespace dart {
14
15 // Forward declarations.
16 class RegExpAlternative;
17 class RegExpAssertion;
18 class RegExpAtom;
19 class RegExpBackReference;
20 class RegExpCapture;
21 class RegExpCharacterClass;
22 class RegExpCompiler;
23 class RegExpDisjunction;
24 class RegExpEmpty;
25 class RegExpLookahead;
26 class RegExpQuantifier;
27 class RegExpText;
28
29
30 class RegExpVisitor : public ValueObject {
31 public:
32 virtual ~RegExpVisitor() { }
33 #define MAKE_CASE(Name) \
34 virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
35 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
36 #undef MAKE_CASE
37 };
38
39
40 class RegExpTree : public ZoneAllocated {
41 public:
42 static const intptr_t kInfinity = kMaxInt32;
43 virtual ~RegExpTree() {}
44 virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
45 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
46 RegExpNode* on_success) = 0;
47 virtual bool IsTextElement() const { return false; }
48 virtual bool IsAnchoredAtStart() const { return false; }
49 virtual bool IsAnchoredAtEnd() const { return false; }
50 virtual intptr_t min_match() const = 0;
51 virtual intptr_t max_match() const = 0;
52 // Returns the interval of registers used for captures within this
53 // expression.
54 virtual Interval CaptureRegisters() const { return Interval::Empty(); }
55 virtual void AppendToText(RegExpText* text);
56 void Print();
57 #define MAKE_ASTYPE(Name) \
58 virtual RegExp##Name* As##Name(); \
59 virtual bool Is##Name() const;
60 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
61 #undef MAKE_ASTYPE
62 };
63
64
65 class RegExpText : public RegExpTree {
66 public:
67 RegExpText() : elements_(2), length_(0) {}
68 virtual void* Accept(RegExpVisitor* visitor, void* data);
69 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
70 RegExpNode* on_success);
71 virtual RegExpText* AsText();
72 virtual bool IsText() const;
73 virtual bool IsTextElement() const { return true; }
74 virtual intptr_t min_match() const { return length_; }
75 virtual intptr_t max_match() const { return length_; }
76 virtual void AppendToText(RegExpText* text);
77 void AddElement(TextElement elm) {
78 elements_.Add(elm);
79 length_ += elm.length();
80 }
81 GrowableArray<TextElement>* elements() { return &elements_; }
82 private:
83 GrowableArray<TextElement> elements_;
84 intptr_t length_;
85 };
86
87
88 class RegExpQuantifier : public RegExpTree {
89 public:
90 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
91 RegExpQuantifier(intptr_t min, intptr_t max,
92 QuantifierType type, RegExpTree* body)
93 : body_(body),
94 min_(min),
95 max_(max),
96 min_match_(min * body->min_match()),
97 quantifier_type_(type) {
98 if (max > 0 && body->max_match() > kInfinity / max) {
99 max_match_ = kInfinity;
100 } else {
101 max_match_ = max * body->max_match();
102 }
103 }
104 virtual void* Accept(RegExpVisitor* visitor, void* data);
105 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
106 RegExpNode* on_success);
107 static RegExpNode* ToNode(intptr_t min,
108 intptr_t max,
109 bool is_greedy,
110 RegExpTree* body,
111 RegExpCompiler* compiler,
112 RegExpNode* on_success,
113 bool not_at_start = false);
114 virtual RegExpQuantifier* AsQuantifier();
115 virtual Interval CaptureRegisters() const;
116 virtual bool IsQuantifier() const;
117 virtual intptr_t min_match() const { return min_match_; }
118 virtual intptr_t max_match() const { return max_match_; }
119 intptr_t min() const { return min_; }
120 intptr_t max() const { return max_; }
121 bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
122 bool is_non_greedy() const { return quantifier_type_ == NON_GREEDY; }
123 bool is_greedy() const { return quantifier_type_ == GREEDY; }
124 RegExpTree* body() const { return body_; }
125
126 private:
127 RegExpTree* body_;
128 intptr_t min_;
129 intptr_t max_;
130 intptr_t min_match_;
131 intptr_t max_match_;
132 QuantifierType quantifier_type_;
133 };
134
135
136 class RegExpCapture : public RegExpTree {
137 public:
138 explicit RegExpCapture(RegExpTree* body, intptr_t index)
139 : body_(body), index_(index) { }
140 virtual void* Accept(RegExpVisitor* visitor, void* data);
141 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
142 RegExpNode* on_success);
143 static RegExpNode* ToNode(RegExpTree* body,
144 intptr_t index,
145 RegExpCompiler* compiler,
146 RegExpNode* on_success);
147 virtual RegExpCapture* AsCapture();
148 virtual bool IsAnchoredAtStart() const;
149 virtual bool IsAnchoredAtEnd() const;
150 virtual Interval CaptureRegisters() const;
151 virtual bool IsCapture() const;
152 virtual intptr_t min_match() const { return body_->min_match(); }
153 virtual intptr_t max_match() const { return body_->max_match(); }
154 RegExpTree* body() const { return body_; }
155 intptr_t index() const { return index_; }
156 static intptr_t StartRegister(intptr_t index) { return index * 2; }
157 static intptr_t EndRegister(intptr_t index) { return index * 2 + 1; }
158
159 private:
160 RegExpTree* body_;
161 intptr_t index_;
162 };
163
164
165 class RegExpDisjunction : public RegExpTree {
166 public:
167 explicit RegExpDisjunction(ZoneGrowableArray<RegExpTree*>* alternatives);
168 virtual void* Accept(RegExpVisitor* visitor, void* data);
169 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
170 RegExpNode* on_success);
171 virtual RegExpDisjunction* AsDisjunction();
172 virtual Interval CaptureRegisters() const;
173 virtual bool IsDisjunction() const;
174 virtual bool IsAnchoredAtStart() const;
175 virtual bool IsAnchoredAtEnd() const;
176 virtual intptr_t min_match() const { return min_match_; }
177 virtual intptr_t max_match() const { return max_match_; }
178 ZoneGrowableArray<RegExpTree*>* alternatives() const { return alternatives_; }
179 private:
180 ZoneGrowableArray<RegExpTree*>* alternatives_;
181 intptr_t min_match_;
182 intptr_t max_match_;
183 };
184
185
186 class RegExpAlternative : public RegExpTree {
187 public:
188 explicit RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes);
189 virtual void* Accept(RegExpVisitor* visitor, void* data);
190 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
191 RegExpNode* on_success);
192 virtual RegExpAlternative* AsAlternative();
193 virtual Interval CaptureRegisters() const;
194 virtual bool IsAlternative() const;
195 virtual bool IsAnchoredAtStart() const;
196 virtual bool IsAnchoredAtEnd() const;
197 virtual intptr_t min_match() const { return min_match_; }
198 virtual intptr_t max_match() const { return max_match_; }
199 ZoneGrowableArray<RegExpTree*>* nodes() const { return nodes_; }
200 private:
201 ZoneGrowableArray<RegExpTree*>* nodes_;
202 intptr_t min_match_;
203 intptr_t max_match_;
204 };
205
206
207 class RegExpAssertion : public RegExpTree {
208 public:
209 enum AssertionType {
210 START_OF_LINE,
211 START_OF_INPUT,
212 END_OF_LINE,
213 END_OF_INPUT,
214 BOUNDARY,
215 NON_BOUNDARY
216 };
217 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { }
218 virtual void* Accept(RegExpVisitor* visitor, void* data);
219 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
220 RegExpNode* on_success);
221 virtual RegExpAssertion* AsAssertion();
222 virtual bool IsAssertion() const;
223 virtual bool IsAnchoredAtStart() const;
224 virtual bool IsAnchoredAtEnd() const;
225 virtual intptr_t min_match() const { return 0; }
226 virtual intptr_t max_match() const { return 0; }
227 AssertionType assertion_type() const { return assertion_type_; }
228 private:
229 AssertionType assertion_type_;
230 };
231
232
233 class CharacterSet : public ValueObject {
234 public:
235 explicit CharacterSet(uint16_t standard_set_type)
236 : ranges_(NULL),
237 standard_set_type_(standard_set_type) {}
238 explicit CharacterSet(ZoneGrowableArray<CharacterRange>* ranges)
239 : ranges_(ranges),
240 standard_set_type_(0) {}
241 CharacterSet(const CharacterSet& that)
242 : ValueObject(),
243 ranges_(that.ranges_),
244 standard_set_type_(that.standard_set_type_) {}
245 ZoneGrowableArray<CharacterRange>* ranges();
246 uint16_t standard_set_type() const { return standard_set_type_; }
247 void set_standard_set_type(uint16_t special_set_type) {
248 standard_set_type_ = special_set_type;
249 }
250 bool is_standard() { return standard_set_type_ != 0; }
251 void Canonicalize();
252 private:
253 ZoneGrowableArray<CharacterRange>* ranges_;
254 // If non-zero, the value represents a standard set (e.g., all whitespace
255 // characters) without having to expand the ranges.
256 uint16_t standard_set_type_;
257 };
258
259
260 class RegExpCharacterClass : public RegExpTree {
261 public:
262 RegExpCharacterClass(ZoneGrowableArray<CharacterRange>* ranges,
263 bool is_negated)
264 : set_(ranges),
265 is_negated_(is_negated) { }
266 explicit RegExpCharacterClass(uint16_t type)
267 : set_(type),
268 is_negated_(false) { }
269 virtual void* Accept(RegExpVisitor* visitor, void* data);
270 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
271 RegExpNode* on_success);
272 virtual RegExpCharacterClass* AsCharacterClass();
273 virtual bool IsCharacterClass() const;
274 virtual bool IsTextElement() const { return true; }
275 virtual intptr_t min_match() const { return 1; }
276 virtual intptr_t max_match() const { return 1; }
277 virtual void AppendToText(RegExpText* text);
278 CharacterSet character_set() const { return set_; }
279 // TODO(lrn): Remove need for complex version if is_standard that
280 // recognizes a mangled standard set and just do { return set_.is_special(); }
281 bool is_standard();
282 // Returns a value representing the standard character set if is_standard()
283 // returns true.
284 // Currently used values are:
285 // s : unicode whitespace
286 // S : unicode non-whitespace
287 // w : ASCII word character (digit, letter, underscore)
288 // W : non-ASCII word character
289 // d : ASCII digit
290 // D : non-ASCII digit
291 // . : non-unicode non-newline
292 // * : All characters
293 uint16_t standard_type() const { return set_.standard_set_type(); }
294 ZoneGrowableArray<CharacterRange>* ranges() {
295 return set_.ranges();
296 }
297 bool is_negated() const { return is_negated_; }
298
299 private:
300 CharacterSet set_;
301 bool is_negated_;
302 };
303
304
305 class RegExpAtom : public RegExpTree {
306 public:
307 explicit RegExpAtom(ZoneGrowableArray<uint16_t>* data) : data_(data) { }
308 virtual void* Accept(RegExpVisitor* visitor, void* data);
309 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
310 RegExpNode* on_success);
311 virtual RegExpAtom* AsAtom();
312 virtual bool IsAtom() const;
313 virtual bool IsTextElement() const { return true; }
314 virtual intptr_t min_match() const { return data_->length(); }
315 virtual intptr_t max_match() const { return data_->length(); }
316 virtual void AppendToText(RegExpText* text);
317 ZoneGrowableArray<uint16_t>* data() const { return data_; }
318 intptr_t length() const { return data_->length(); }
319 private:
320 ZoneGrowableArray<uint16_t>* data_;
321 };
322
323
324 class RegExpLookahead : public RegExpTree {
325 public:
326 RegExpLookahead(RegExpTree* body,
327 bool is_positive,
328 intptr_t capture_count,
329 intptr_t capture_from)
330 : body_(body),
331 is_positive_(is_positive),
332 capture_count_(capture_count),
333 capture_from_(capture_from) { }
334
335 virtual void* Accept(RegExpVisitor* visitor, void* data);
336 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
337 RegExpNode* on_success);
338 virtual RegExpLookahead* AsLookahead();
339 virtual Interval CaptureRegisters() const;
340 virtual bool IsLookahead() const;
341 virtual bool IsAnchoredAtStart() const;
342 virtual intptr_t min_match() const { return 0; }
343 virtual intptr_t max_match() const { return 0; }
344 RegExpTree* body() const { return body_; }
345 bool is_positive() const { return is_positive_; }
346 intptr_t capture_count() const { return capture_count_; }
347 intptr_t capture_from() const { return capture_from_; }
348
349 private:
350 RegExpTree* body_;
351 bool is_positive_;
352 intptr_t capture_count_;
353 intptr_t capture_from_;
354 };
355
356
357 class RegExpBackReference : public RegExpTree {
358 public:
359 explicit RegExpBackReference(RegExpCapture* capture)
360 : capture_(capture) { }
361 virtual void* Accept(RegExpVisitor* visitor, void* data);
362 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
363 RegExpNode* on_success);
364 virtual RegExpBackReference* AsBackReference();
365 virtual bool IsBackReference() const;
366 virtual intptr_t min_match() const { return 0; }
367 virtual intptr_t max_match() const { return capture_->max_match(); }
368 intptr_t index() const { return capture_->index(); }
369 RegExpCapture* capture() const { return capture_; }
370 private:
371 RegExpCapture* capture_;
372 };
373
374
375 class RegExpEmpty : public RegExpTree {
376 public:
377 RegExpEmpty() { }
378 virtual void* Accept(RegExpVisitor* visitor, void* data);
379 virtual RegExpNode* ToNode(RegExpCompiler* compiler,
380 RegExpNode* on_success);
381 virtual RegExpEmpty* AsEmpty();
382 virtual bool IsEmpty() const;
383 virtual intptr_t min_match() const { return 0; }
384 virtual intptr_t max_match() const { return 0; }
385 static RegExpEmpty* GetInstance() {
386 static RegExpEmpty* instance = ::new RegExpEmpty();
387 return instance;
388 }
389 };
390
391 } // namespace dart
392
393 #endif // VM_REGEXP_AST_H_
OLDNEW
« no previous file with comments | « runtime/vm/regexp_assembler.cc ('k') | runtime/vm/regexp_ast.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698