Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: runtime/vm/regexp_ast.cc

Issue 539153002: Port and integrate the irregexp engine from V8 (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Addressed Ivan's comments. Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.h ('k') | runtime/vm/regexp_parser.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #include "vm/regexp_ast.h"
6
7 #include "platform/utils.h"
8 #include "vm/os.h"
9
10 namespace dart {
11
12
13 // Convert regular expression trees to a simple sexp representation.
14 // This representation should be different from the input grammar
15 // in as many cases as possible, to make it more difficult for incorrect
16 // parses to look as correct ones which is likely if the input and
17 // output formats are alike.
18 class RegExpUnparser : public RegExpVisitor {
19 public:
20 void VisitCharacterRange(CharacterRange that);
21 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \
22 void* data);
23 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
24 #undef MAKE_CASE
25 };
26
27
28 static inline void PrintUtf16(uint16_t c) {
29 const char* format = (0x20 <= c && c <= 0x7F)
30 ? "%c"
31 : (c <= 0xff) ? "\\x%02x" : "\\u%04x";
32 OS::Print(format, c);
33 }
34
35
36 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
37 OS::Print("(|");
38 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
39 OS::Print(" ");
40 (*that->alternatives())[i]->Accept(this, data);
41 }
42 OS::Print(")");
43 return NULL;
44 }
45
46
47 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
48 OS::Print("(:");
49 for (intptr_t i = 0; i < that->nodes()->length(); i++) {
50 OS::Print(" ");
51 (*that->nodes())[i]->Accept(this, data);
52 }
53 OS::Print(")");
54 return NULL;
55 }
56
57
58 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
59 PrintUtf16(that.from());
60 if (!that.IsSingleton()) {
61 OS::Print("-");
62 PrintUtf16(that.to());
63 }
64 }
65
66
67
68 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
69 void* data) {
70 if (that->is_negated()) OS::Print("^");
71 OS::Print("[");
72 for (intptr_t i = 0; i < that->ranges()->length(); i++) {
73 if (i > 0) OS::Print(" ");
74 VisitCharacterRange((*that->ranges())[i]);
75 }
76 OS::Print("]");
77 return NULL;
78 }
79
80
81 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
82 switch (that->assertion_type()) {
83 case RegExpAssertion::START_OF_INPUT:
84 OS::Print("@^i");
85 break;
86 case RegExpAssertion::END_OF_INPUT:
87 OS::Print("@$i");
88 break;
89 case RegExpAssertion::START_OF_LINE:
90 OS::Print("@^l");
91 break;
92 case RegExpAssertion::END_OF_LINE:
93 OS::Print("@$l");
94 break;
95 case RegExpAssertion::BOUNDARY:
96 OS::Print("@b");
97 break;
98 case RegExpAssertion::NON_BOUNDARY:
99 OS::Print("@B");
100 break;
101 }
102 return NULL;
103 }
104
105
106 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
107 OS::Print("'");
108 ZoneGrowableArray<uint16_t>* chardata = that->data();
109 for (intptr_t i = 0; i < chardata->length(); i++) {
110 PrintUtf16(chardata->At(i));
111 }
112 OS::Print("'");
113 return NULL;
114 }
115
116
117 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
118 if (that->elements()->length() == 1) {
119 (*that->elements())[0].tree()->Accept(this, data);
120 } else {
121 OS::Print("(!");
122 for (intptr_t i = 0; i < that->elements()->length(); i++) {
123 OS::Print(" ");
124 (*that->elements())[i].tree()->Accept(this, data);
125 }
126 OS::Print(")");
127 }
128 return NULL;
129 }
130
131
132 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
133 OS::Print("(# %" Pd " ", that->min());
134 if (that->max() == RegExpTree::kInfinity) {
135 OS::Print("- ");
136 } else {
137 OS::Print("%" Pd " ", that->max());
138 }
139 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
140 that->body()->Accept(this, data);
141 OS::Print(")");
142 return NULL;
143 }
144
145
146 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
147 OS::Print("(^ ");
148 that->body()->Accept(this, data);
149 OS::Print(")");
150 return NULL;
151 }
152
153
154 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
155 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- "));
156 that->body()->Accept(this, data);
157 OS::Print(")");
158 return NULL;
159 }
160
161
162 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
163 void*) {
164 OS::Print("(<- %" Pd ")", that->index());
165 return NULL;
166 }
167
168
169 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) {
170 OS::Print("%%");
171 return NULL;
172 }
173
174
175 #define MAKE_ACCEPT(Name) \
176 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
177 return visitor->Visit##Name(this, data); \
178 }
179 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
180 #undef MAKE_ACCEPT
181
182 #define MAKE_TYPE_CASE(Name) \
183 RegExp##Name* RegExpTree::As##Name() { \
184 return NULL; \
185 } \
186 bool RegExpTree::Is##Name() const { return false; }
187 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
188 #undef MAKE_TYPE_CASE
189
190 #define MAKE_TYPE_CASE(Name) \
191 RegExp##Name* RegExp##Name::As##Name() { \
192 return this; \
193 } \
194 bool RegExp##Name::Is##Name() const { return true; }
195 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
196 #undef MAKE_TYPE_CASE
197
198 void RegExpTree::Print() {
199 RegExpUnparser unparser;
200 Accept(&unparser, NULL);
201 }
202
203
204 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) {
205 Interval result = Interval::Empty();
206 for (intptr_t i = 0; i < children->length(); i++)
207 result = result.Union(children->At(i)->CaptureRegisters());
208 return result;
209 }
210
211
212 Interval RegExpQuantifier::CaptureRegisters() const {
213 return body()->CaptureRegisters();
214 }
215
216
217 bool RegExpCapture::IsAnchoredAtStart() const {
218 return body()->IsAnchoredAtStart();
219 }
220
221
222 bool RegExpCapture::IsAnchoredAtEnd() const {
223 return body()->IsAnchoredAtEnd();
224 }
225
226
227 Interval RegExpCapture::CaptureRegisters() const {
228 Interval self(StartRegister(index()), EndRegister(index()));
229 return self.Union(body()->CaptureRegisters());
230 }
231
232
233 RegExpDisjunction::RegExpDisjunction(
234 ZoneGrowableArray<RegExpTree*>* alternatives)
235 : alternatives_(alternatives) {
236 ASSERT(alternatives->length() > 1);
237 RegExpTree* first_alternative = alternatives->At(0);
238 min_match_ = first_alternative->min_match();
239 max_match_ = first_alternative->max_match();
240 for (intptr_t i = 1; i < alternatives->length(); i++) {
241 RegExpTree* alternative = alternatives->At(i);
242 min_match_ = Utils::Minimum(min_match_, alternative->min_match());
243 max_match_ = Utils::Maximum(max_match_, alternative->max_match());
244 }
245 }
246
247
248 Interval RegExpDisjunction::CaptureRegisters() const {
249 return ListCaptureRegisters(alternatives());
250 }
251
252
253 bool RegExpDisjunction::IsAnchoredAtStart() const {
254 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
255 for (intptr_t i = 0; i < alternatives->length(); i++) {
256 if (!alternatives->At(i)->IsAnchoredAtStart())
257 return false;
258 }
259 return true;
260 }
261
262
263 bool RegExpDisjunction::IsAnchoredAtEnd() const {
264 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
265 for (intptr_t i = 0; i < alternatives->length(); i++) {
266 if (!alternatives->At(i)->IsAnchoredAtEnd())
267 return false;
268 }
269 return true;
270 }
271
272
273 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) {
274 if (RegExpTree::kInfinity - previous < increase) {
275 return RegExpTree::kInfinity;
276 } else {
277 return previous + increase;
278 }
279 }
280
281 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes)
282 : nodes_(nodes) {
283 ASSERT(nodes->length() > 1);
284 min_match_ = 0;
285 max_match_ = 0;
286 for (intptr_t i = 0; i < nodes->length(); i++) {
287 RegExpTree* node = nodes->At(i);
288 intptr_t node_min_match = node->min_match();
289 min_match_ = IncreaseBy(min_match_, node_min_match);
290 intptr_t node_max_match = node->max_match();
291 max_match_ = IncreaseBy(max_match_, node_max_match);
292 }
293 }
294
295
296 Interval RegExpAlternative::CaptureRegisters() const {
297 return ListCaptureRegisters(nodes());
298 }
299
300
301 bool RegExpAlternative::IsAnchoredAtStart() const {
302 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
303 for (intptr_t i = 0; i < nodes->length(); i++) {
304 RegExpTree* node = nodes->At(i);
305 if (node->IsAnchoredAtStart()) { return true; }
306 if (node->max_match() > 0) { return false; }
307 }
308 return false;
309 }
310
311
312 bool RegExpAlternative::IsAnchoredAtEnd() const {
313 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
314 for (intptr_t i = nodes->length() - 1; i >= 0; i--) {
315 RegExpTree* node = nodes->At(i);
316 if (node->IsAnchoredAtEnd()) { return true; }
317 if (node->max_match() > 0) { return false; }
318 }
319 return false;
320 }
321
322
323 bool RegExpAssertion::IsAnchoredAtStart() const {
324 return assertion_type() == RegExpAssertion::START_OF_INPUT;
325 }
326
327
328 bool RegExpAssertion::IsAnchoredAtEnd() const {
329 return assertion_type() == RegExpAssertion::END_OF_INPUT;
330 }
331
332
333 void RegExpTree::AppendToText(RegExpText* text) {
334 UNREACHABLE();
335 }
336
337
338 void RegExpAtom::AppendToText(RegExpText* text) {
339 text->AddElement(TextElement::Atom(this));
340 }
341
342
343 void RegExpCharacterClass::AppendToText(RegExpText* text) {
344 text->AddElement(TextElement::CharClass(this));
345 }
346
347
348 void RegExpText::AppendToText(RegExpText* text) {
349 for (intptr_t i = 0; i < elements()->length(); i++)
350 text->AddElement((*elements())[i]);
351 }
352
353
354 Interval RegExpLookahead::CaptureRegisters() const {
355 return body()->CaptureRegisters();
356 }
357
358
359 bool RegExpLookahead::IsAnchoredAtStart() const {
360 return is_positive() && body()->IsAnchoredAtStart();
361 }
362
363 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.h ('k') | runtime/vm/regexp_parser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698