Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(505)

Side by Side Diff: runtime/vm/regexp_ast.cc

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: rebase, enable tests, remove *CodeUnitsAt Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_ast.h"
6
7 #include "platform/utils.h"
8 #include "vm/os.h"
9
10 namespace dart {
11
12 #define MAKE_ACCEPT(Name) \
13 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
14 return visitor->Visit##Name(this, data); \
15 }
16 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
17 #undef MAKE_ACCEPT
18
19 #define MAKE_TYPE_CASE(Name) \
20 RegExp##Name* RegExpTree::As##Name() { \
21 return NULL; \
22 } \
23 bool RegExpTree::Is##Name() const { return false; }
24 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
25 #undef MAKE_TYPE_CASE
26
27 #define MAKE_TYPE_CASE(Name) \
28 RegExp##Name* RegExp##Name::As##Name() { \
29 return this; \
30 } \
31 bool RegExp##Name::Is##Name() const { return true; }
32 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
33 #undef MAKE_TYPE_CASE
34
35
36 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) {
37 Interval result = Interval::Empty();
38 for (intptr_t i = 0; i < children->length(); i++)
39 result = result.Union(children->At(i)->CaptureRegisters());
40 return result;
41 }
42
43
44 Interval RegExpAlternative::CaptureRegisters() const {
45 return ListCaptureRegisters(nodes());
46 }
47
48
49 Interval RegExpDisjunction::CaptureRegisters() const {
50 return ListCaptureRegisters(alternatives());
51 }
52
53
54 Interval RegExpLookahead::CaptureRegisters() const {
55 return body()->CaptureRegisters();
56 }
57
58
59 Interval RegExpCapture::CaptureRegisters() const {
60 Interval self(StartRegister(index()), EndRegister(index()));
61 return self.Union(body()->CaptureRegisters());
62 }
63
64
65 Interval RegExpQuantifier::CaptureRegisters() const {
66 return body()->CaptureRegisters();
67 }
68
69
70 bool RegExpAssertion::IsAnchoredAtStart() const {
71 return assertion_type() == RegExpAssertion::START_OF_INPUT;
72 }
73
74
75 bool RegExpAssertion::IsAnchoredAtEnd() const {
76 return assertion_type() == RegExpAssertion::END_OF_INPUT;
77 }
78
79
80 bool RegExpAlternative::IsAnchoredAtStart() const {
81 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
82 for (intptr_t i = 0; i < nodes->length(); i++) {
83 RegExpTree* node = nodes->At(i);
84 if (node->IsAnchoredAtStart()) { return true; }
85 if (node->max_match() > 0) { return false; }
86 }
87 return false;
88 }
89
90
91 bool RegExpAlternative::IsAnchoredAtEnd() const {
92 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
93 for (intptr_t i = nodes->length() - 1; i >= 0; i--) {
94 RegExpTree* node = nodes->At(i);
95 if (node->IsAnchoredAtEnd()) { return true; }
96 if (node->max_match() > 0) { return false; }
97 }
98 return false;
99 }
100
101
102 bool RegExpDisjunction::IsAnchoredAtStart() const {
103 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
104 for (intptr_t i = 0; i < alternatives->length(); i++) {
105 if (!alternatives->At(i)->IsAnchoredAtStart())
106 return false;
107 }
108 return true;
109 }
110
111
112 bool RegExpDisjunction::IsAnchoredAtEnd() const {
113 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
114 for (intptr_t i = 0; i < alternatives->length(); i++) {
115 if (!alternatives->At(i)->IsAnchoredAtEnd())
116 return false;
117 }
118 return true;
119 }
120
121
122 bool RegExpLookahead::IsAnchoredAtStart() const {
123 return is_positive() && body()->IsAnchoredAtStart();
124 }
125
126
127 bool RegExpCapture::IsAnchoredAtStart() const {
128 return body()->IsAnchoredAtStart();
129 }
130
131
132 bool RegExpCapture::IsAnchoredAtEnd() const {
133 return body()->IsAnchoredAtEnd();
134 }
135
136
137 // Convert regular expression trees to a simple sexp representation.
138 // This representation should be different from the input grammar
139 // in as many cases as possible, to make it more difficult for incorrect
140 // parses to look as correct ones which is likely if the input and
141 // output formats are alike.
142 class RegExpUnparser : public RegExpVisitor {
143 public:
144 void VisitCharacterRange(CharacterRange that);
145 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \
146 void* data);
147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
148 #undef MAKE_CASE
149 };
150
151
152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153 OS::Print("(|");
154 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
155 OS::Print(" ");
156 (*that->alternatives())[i]->Accept(this, data);
157 }
158 OS::Print(")");
159 return NULL;
160 }
161
162
163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164 OS::Print("(:");
165 for (intptr_t i = 0; i < that->nodes()->length(); i++) {
166 OS::Print(" ");
167 (*that->nodes())[i]->Accept(this, data);
168 }
169 OS::Print(")");
170 return NULL;
171 }
172
173
174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175 PrintUtf16(that.from());
176 if (!that.IsSingleton()) {
177 OS::Print("-");
178 PrintUtf16(that.to());
179 }
180 }
181
182
183
184 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
185 void* data) {
186 if (that->is_negated()) OS::Print("^");
187 OS::Print("[");
188 for (intptr_t i = 0; i < that->ranges()->length(); i++) {
189 if (i > 0) OS::Print(" ");
190 VisitCharacterRange((*that->ranges())[i]);
191 }
192 OS::Print("]");
193 return NULL;
194 }
195
196
197 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
198 switch (that->assertion_type()) {
199 case RegExpAssertion::START_OF_INPUT:
200 OS::Print("@^i");
201 break;
202 case RegExpAssertion::END_OF_INPUT:
203 OS::Print("@$i");
204 break;
205 case RegExpAssertion::START_OF_LINE:
206 OS::Print("@^l");
207 break;
208 case RegExpAssertion::END_OF_LINE:
209 OS::Print("@$l");
210 break;
211 case RegExpAssertion::BOUNDARY:
212 OS::Print("@b");
213 break;
214 case RegExpAssertion::NON_BOUNDARY:
215 OS::Print("@B");
216 break;
217 }
218 return NULL;
219 }
220
221
222 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
223 OS::Print("'");
224 ZoneGrowableArray<uint16_t>* chardata = that->data();
225 for (intptr_t i = 0; i < chardata->length(); i++) {
226 PrintUtf16(chardata->At(i));
227 }
228 OS::Print("'");
229 return NULL;
230 }
231
232
233 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
234 if (that->elements()->length() == 1) {
235 (*that->elements())[0].tree()->Accept(this, data);
236 } else {
237 OS::Print("(!");
238 for (intptr_t i = 0; i < that->elements()->length(); i++) {
239 OS::Print(" ");
240 (*that->elements())[i].tree()->Accept(this, data);
241 }
242 OS::Print(")");
243 }
244 return NULL;
245 }
246
247
248 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
249 OS::Print("(# %" Pd " ", that->min());
250 if (that->max() == RegExpTree::kInfinity) {
251 OS::Print("- ");
252 } else {
253 OS::Print("%" Pd " ", that->max());
254 }
255 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
256 that->body()->Accept(this, data);
257 OS::Print(")");
258 return NULL;
259 }
260
261
262 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
263 OS::Print("(^ ");
264 that->body()->Accept(this, data);
265 OS::Print(")");
266 return NULL;
267 }
268
269
270 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
271 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- "));
272 that->body()->Accept(this, data);
273 OS::Print(")");
274 return NULL;
275 }
276
277
278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
279 void*) {
280 OS::Print("(<- %" Pd ")", that->index());
281 return NULL;
282 }
283
284
285 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) {
286 OS::Print("%%");
287 return NULL;
288 }
289
290
291 void RegExpTree::Print() {
292 RegExpUnparser unparser;
293 Accept(&unparser, NULL);
294 }
295
296
297 RegExpDisjunction::RegExpDisjunction(
298 ZoneGrowableArray<RegExpTree*>* alternatives)
299 : alternatives_(alternatives) {
300 ASSERT(alternatives->length() > 1);
301 RegExpTree* first_alternative = alternatives->At(0);
302 min_match_ = first_alternative->min_match();
303 max_match_ = first_alternative->max_match();
304 for (intptr_t i = 1; i < alternatives->length(); i++) {
305 RegExpTree* alternative = alternatives->At(i);
306 min_match_ = Utils::Minimum(min_match_, alternative->min_match());
307 max_match_ = Utils::Maximum(max_match_, alternative->max_match());
308 }
309 }
310
311
312 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) {
313 if (RegExpTree::kInfinity - previous < increase) {
314 return RegExpTree::kInfinity;
315 } else {
316 return previous + increase;
317 }
318 }
319
320 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes)
321 : nodes_(nodes) {
322 ASSERT(nodes->length() > 1);
323 min_match_ = 0;
324 max_match_ = 0;
325 for (intptr_t i = 0; i < nodes->length(); i++) {
326 RegExpTree* node = nodes->At(i);
327 intptr_t node_min_match = node->min_match();
328 min_match_ = IncreaseBy(min_match_, node_min_match);
329 intptr_t node_max_match = node->max_match();
330 max_match_ = IncreaseBy(max_match_, node_max_match);
331 }
332 }
333
334 } // namespace dart
OLDNEW
« runtime/vm/object.cc ('K') | « runtime/vm/regexp_ast.h ('k') | runtime/vm/regexp_parser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698