Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: runtime/vm/regexp_ast.cc

Issue 683433003: Integrate the Irregexp Regular Expression Engine. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: more comments Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/regexp_ast.h ('k') | runtime/vm/regexp_parser.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/regexp_ast.h" 5 #include "vm/regexp_ast.h"
6 6
7 // SNIP 7 #include "platform/utils.h"
8 #include "vm/os.h"
8 9
9 namespace dart { 10 namespace dart {
10 11
11 #define MAKE_ACCEPT(Name) \ 12 #define MAKE_ACCEPT(Name) \
12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ 13 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13 return visitor->Visit##Name(this, data); \ 14 return visitor->Visit##Name(this, data); \
14 } 15 }
15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) 16 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 #undef MAKE_ACCEPT 17 #undef MAKE_ACCEPT
17 18
18 #define MAKE_TYPE_CASE(Name) \ 19 #define MAKE_TYPE_CASE(Name) \
19 RegExp##Name* RegExpTree::As##Name() { \ 20 RegExp##Name* RegExpTree::As##Name() { \
20 return NULL; \ 21 return NULL; \
21 } \ 22 } \
22 bool RegExpTree::Is##Name() { return false; } 23 bool RegExpTree::Is##Name() const { return false; }
23 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 24 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
24 #undef MAKE_TYPE_CASE 25 #undef MAKE_TYPE_CASE
25 26
26 #define MAKE_TYPE_CASE(Name) \ 27 #define MAKE_TYPE_CASE(Name) \
27 RegExp##Name* RegExp##Name::As##Name() { \ 28 RegExp##Name* RegExp##Name::As##Name() { \
28 return this; \ 29 return this; \
29 } \ 30 } \
30 bool RegExp##Name::Is##Name() { return true; } 31 bool RegExp##Name::Is##Name() const { return true; }
31 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 32 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
32 #undef MAKE_TYPE_CASE 33 #undef MAKE_TYPE_CASE
33 34
34 35
35 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { 36 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) {
36 Interval result = Interval::Empty(); 37 Interval result = Interval::Empty();
37 for (int i = 0; i < children->length(); i++) 38 for (intptr_t i = 0; i < children->length(); i++)
38 result = result.Union(children->at(i)->CaptureRegisters()); 39 result = result.Union(children->At(i)->CaptureRegisters());
39 return result; 40 return result;
40 } 41 }
41 42
42 43
43 Interval RegExpAlternative::CaptureRegisters() { 44 Interval RegExpAlternative::CaptureRegisters() const {
44 return ListCaptureRegisters(nodes()); 45 return ListCaptureRegisters(nodes());
45 } 46 }
46 47
47 48
48 Interval RegExpDisjunction::CaptureRegisters() { 49 Interval RegExpDisjunction::CaptureRegisters() const {
49 return ListCaptureRegisters(alternatives()); 50 return ListCaptureRegisters(alternatives());
50 } 51 }
51 52
52 53
53 Interval RegExpLookahead::CaptureRegisters() { 54 Interval RegExpLookahead::CaptureRegisters() const {
54 return body()->CaptureRegisters(); 55 return body()->CaptureRegisters();
55 } 56 }
56 57
57 58
58 Interval RegExpCapture::CaptureRegisters() { 59 Interval RegExpCapture::CaptureRegisters() const {
59 Interval self(StartRegister(index()), EndRegister(index())); 60 Interval self(StartRegister(index()), EndRegister(index()));
60 return self.Union(body()->CaptureRegisters()); 61 return self.Union(body()->CaptureRegisters());
61 } 62 }
62 63
63 64
64 Interval RegExpQuantifier::CaptureRegisters() { 65 Interval RegExpQuantifier::CaptureRegisters() const {
65 return body()->CaptureRegisters(); 66 return body()->CaptureRegisters();
66 } 67 }
67 68
68 69
69 bool RegExpAssertion::IsAnchoredAtStart() { 70 bool RegExpAssertion::IsAnchoredAtStart() const {
70 return assertion_type() == RegExpAssertion::START_OF_INPUT; 71 return assertion_type() == RegExpAssertion::START_OF_INPUT;
71 } 72 }
72 73
73 74
74 bool RegExpAssertion::IsAnchoredAtEnd() { 75 bool RegExpAssertion::IsAnchoredAtEnd() const {
75 return assertion_type() == RegExpAssertion::END_OF_INPUT; 76 return assertion_type() == RegExpAssertion::END_OF_INPUT;
76 } 77 }
77 78
78 79
79 bool RegExpAlternative::IsAnchoredAtStart() { 80 bool RegExpAlternative::IsAnchoredAtStart() const {
80 ZoneList<RegExpTree*>* nodes = this->nodes(); 81 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
81 for (int i = 0; i < nodes->length(); i++) { 82 for (intptr_t i = 0; i < nodes->length(); i++) {
82 RegExpTree* node = nodes->at(i); 83 RegExpTree* node = nodes->At(i);
83 if (node->IsAnchoredAtStart()) { return true; } 84 if (node->IsAnchoredAtStart()) { return true; }
84 if (node->max_match() > 0) { return false; } 85 if (node->max_match() > 0) { return false; }
85 } 86 }
86 return false; 87 return false;
87 } 88 }
88 89
89 90
90 bool RegExpAlternative::IsAnchoredAtEnd() { 91 bool RegExpAlternative::IsAnchoredAtEnd() const {
91 ZoneList<RegExpTree*>* nodes = this->nodes(); 92 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes();
92 for (int i = nodes->length() - 1; i >= 0; i--) { 93 for (intptr_t i = nodes->length() - 1; i >= 0; i--) {
93 RegExpTree* node = nodes->at(i); 94 RegExpTree* node = nodes->At(i);
94 if (node->IsAnchoredAtEnd()) { return true; } 95 if (node->IsAnchoredAtEnd()) { return true; }
95 if (node->max_match() > 0) { return false; } 96 if (node->max_match() > 0) { return false; }
96 } 97 }
97 return false; 98 return false;
98 } 99 }
99 100
100 101
101 bool RegExpDisjunction::IsAnchoredAtStart() { 102 bool RegExpDisjunction::IsAnchoredAtStart() const {
102 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 103 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
103 for (int i = 0; i < alternatives->length(); i++) { 104 for (intptr_t i = 0; i < alternatives->length(); i++) {
104 if (!alternatives->at(i)->IsAnchoredAtStart()) 105 if (!alternatives->At(i)->IsAnchoredAtStart())
105 return false; 106 return false;
106 } 107 }
107 return true; 108 return true;
108 } 109 }
109 110
110 111
111 bool RegExpDisjunction::IsAnchoredAtEnd() { 112 bool RegExpDisjunction::IsAnchoredAtEnd() const {
112 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 113 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives();
113 for (int i = 0; i < alternatives->length(); i++) { 114 for (intptr_t i = 0; i < alternatives->length(); i++) {
114 if (!alternatives->at(i)->IsAnchoredAtEnd()) 115 if (!alternatives->At(i)->IsAnchoredAtEnd())
115 return false; 116 return false;
116 } 117 }
117 return true; 118 return true;
118 } 119 }
119 120
120 121
121 bool RegExpLookahead::IsAnchoredAtStart() { 122 bool RegExpLookahead::IsAnchoredAtStart() const {
122 return is_positive() && body()->IsAnchoredAtStart(); 123 return is_positive() && body()->IsAnchoredAtStart();
123 } 124 }
124 125
125 126
126 bool RegExpCapture::IsAnchoredAtStart() { 127 bool RegExpCapture::IsAnchoredAtStart() const {
127 return body()->IsAnchoredAtStart(); 128 return body()->IsAnchoredAtStart();
128 } 129 }
129 130
130 131
131 bool RegExpCapture::IsAnchoredAtEnd() { 132 bool RegExpCapture::IsAnchoredAtEnd() const {
132 return body()->IsAnchoredAtEnd(); 133 return body()->IsAnchoredAtEnd();
133 } 134 }
134 135
135 136
136 // Convert regular expression trees to a simple sexp representation. 137 // Convert regular expression trees to a simple sexp representation.
137 // This representation should be different from the input grammar 138 // This representation should be different from the input grammar
138 // in as many cases as possible, to make it more difficult for incorrect 139 // in as many cases as possible, to make it more difficult for incorrect
139 // parses to look as correct ones which is likely if the input and 140 // parses to look as correct ones which is likely if the input and
140 // output formats are alike. 141 // output formats are alike.
141 class RegExpUnparser FINAL : public RegExpVisitor { 142 class RegExpUnparser : public RegExpVisitor {
142 public: 143 public:
143 RegExpUnparser(OStream& os, Zone* zone) : os_(os), zone_(zone) {}
144 void VisitCharacterRange(CharacterRange that); 144 void VisitCharacterRange(CharacterRange that);
145 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \ 145 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, \
146 void* data) OVERRIDE; 146 void* data);
147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
148 #undef MAKE_CASE 148 #undef MAKE_CASE
149 private:
150 OStream& os_;
151 Zone* zone_;
152 }; 149 };
153 150
154 151
155 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { 152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
156 os_ << "(|"; 153 OS::Print("(|");
157 for (int i = 0; i < that->alternatives()->length(); i++) { 154 for (intptr_t i = 0; i < that->alternatives()->length(); i++) {
158 os_ << " "; 155 OS::Print(" ");
159 that->alternatives()->at(i)->Accept(this, data); 156 (*that->alternatives())[i]->Accept(this, data);
160 } 157 }
161 os_ << ")"; 158 OS::Print(")");
162 return NULL; 159 return NULL;
163 } 160 }
164 161
165 162
166 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { 163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
167 os_ << "(:"; 164 OS::Print("(:");
168 for (int i = 0; i < that->nodes()->length(); i++) { 165 for (intptr_t i = 0; i < that->nodes()->length(); i++) {
169 os_ << " "; 166 OS::Print(" ");
170 that->nodes()->at(i)->Accept(this, data); 167 (*that->nodes())[i]->Accept(this, data);
171 } 168 }
172 os_ << ")"; 169 OS::Print(")");
173 return NULL; 170 return NULL;
174 } 171 }
175 172
176 173
177 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { 174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
178 os_ << AsUC16(that.from()); 175 PrintUtf16(that.from());
179 if (!that.IsSingleton()) { 176 if (!that.IsSingleton()) {
180 os_ << "-" << AsUC16(that.to()); 177 OS::Print("-");
178 PrintUtf16(that.to());
181 } 179 }
182 } 180 }
183 181
184 182
185 183
186 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, 184 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
187 void* data) { 185 void* data) {
188 if (that->is_negated()) os_ << "^"; 186 if (that->is_negated()) OS::Print("^");
189 os_ << "["; 187 OS::Print("[");
190 for (int i = 0; i < that->ranges(zone_)->length(); i++) { 188 for (intptr_t i = 0; i < that->ranges()->length(); i++) {
191 if (i > 0) os_ << " "; 189 if (i > 0) OS::Print(" ");
192 VisitCharacterRange(that->ranges(zone_)->at(i)); 190 VisitCharacterRange((*that->ranges())[i]);
193 } 191 }
194 os_ << "]"; 192 OS::Print("]");
195 return NULL; 193 return NULL;
196 } 194 }
197 195
198 196
199 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { 197 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
200 switch (that->assertion_type()) { 198 switch (that->assertion_type()) {
201 case RegExpAssertion::START_OF_INPUT: 199 case RegExpAssertion::START_OF_INPUT:
202 os_ << "@^i"; 200 OS::Print("@^i");
203 break; 201 break;
204 case RegExpAssertion::END_OF_INPUT: 202 case RegExpAssertion::END_OF_INPUT:
205 os_ << "@$i"; 203 OS::Print("@$i");
206 break; 204 break;
207 case RegExpAssertion::START_OF_LINE: 205 case RegExpAssertion::START_OF_LINE:
208 os_ << "@^l"; 206 OS::Print("@^l");
209 break; 207 break;
210 case RegExpAssertion::END_OF_LINE: 208 case RegExpAssertion::END_OF_LINE:
211 os_ << "@$l"; 209 OS::Print("@$l");
212 break; 210 break;
213 case RegExpAssertion::BOUNDARY: 211 case RegExpAssertion::BOUNDARY:
214 os_ << "@b"; 212 OS::Print("@b");
215 break; 213 break;
216 case RegExpAssertion::NON_BOUNDARY: 214 case RegExpAssertion::NON_BOUNDARY:
217 os_ << "@B"; 215 OS::Print("@B");
218 break; 216 break;
219 } 217 }
220 return NULL; 218 return NULL;
221 } 219 }
222 220
223 221
224 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { 222 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
225 os_ << "'"; 223 OS::Print("'");
226 Vector<const uc16> chardata = that->data(); 224 ZoneGrowableArray<uint16_t>* chardata = that->data();
227 for (int i = 0; i < chardata.length(); i++) { 225 for (intptr_t i = 0; i < chardata->length(); i++) {
228 os_ << AsUC16(chardata[i]); 226 PrintUtf16(chardata->At(i));
229 } 227 }
230 os_ << "'"; 228 OS::Print("'");
231 return NULL; 229 return NULL;
232 } 230 }
233 231
234 232
235 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { 233 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
236 if (that->elements()->length() == 1) { 234 if (that->elements()->length() == 1) {
237 that->elements()->at(0).tree()->Accept(this, data); 235 (*that->elements())[0].tree()->Accept(this, data);
238 } else { 236 } else {
239 os_ << "(!"; 237 OS::Print("(!");
240 for (int i = 0; i < that->elements()->length(); i++) { 238 for (intptr_t i = 0; i < that->elements()->length(); i++) {
241 os_ << " "; 239 OS::Print(" ");
242 that->elements()->at(i).tree()->Accept(this, data); 240 (*that->elements())[i].tree()->Accept(this, data);
243 } 241 }
244 os_ << ")"; 242 OS::Print(")");
245 } 243 }
246 return NULL; 244 return NULL;
247 } 245 }
248 246
249 247
250 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { 248 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
251 os_ << "(# " << that->min() << " "; 249 OS::Print("(# %" Pd " ", that->min());
252 if (that->max() == RegExpTree::kInfinity) { 250 if (that->max() == RegExpTree::kInfinity) {
253 os_ << "- "; 251 OS::Print("- ");
254 } else { 252 } else {
255 os_ << that->max() << " "; 253 OS::Print("%" Pd " ", that->max());
256 } 254 }
257 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); 255 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
258 that->body()->Accept(this, data); 256 that->body()->Accept(this, data);
259 os_ << ")"; 257 OS::Print(")");
260 return NULL; 258 return NULL;
261 } 259 }
262 260
263 261
264 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { 262 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
265 os_ << "(^ "; 263 OS::Print("(^ ");
266 that->body()->Accept(this, data); 264 that->body()->Accept(this, data);
267 os_ << ")"; 265 OS::Print(")");
268 return NULL; 266 return NULL;
269 } 267 }
270 268
271 269
272 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { 270 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
273 os_ << "(-> " << (that->is_positive() ? "+ " : "- "); 271 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- "));
274 that->body()->Accept(this, data); 272 that->body()->Accept(this, data);
275 os_ << ")"; 273 OS::Print(")");
276 return NULL; 274 return NULL;
277 } 275 }
278 276
279 277
280 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, 278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
281 void* data) { 279 void*) {
282 os_ << "(<- " << that->index() << ")"; 280 OS::Print("(<- %" Pd ")", that->index());
283 return NULL; 281 return NULL;
284 } 282 }
285 283
286 284
287 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { 285 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) {
288 os_ << '%'; 286 OS::Print("%%");
289 return NULL; 287 return NULL;
290 } 288 }
291 289
292 290
293 OStream& RegExpTree::Print(OStream& os, Zone* zone) { // NOLINT 291 void RegExpTree::Print() {
294 RegExpUnparser unparser(os, zone); 292 RegExpUnparser unparser;
295 Accept(&unparser, NULL); 293 Accept(&unparser, NULL);
296 return os;
297 } 294 }
298 295
299 296
300 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) 297 RegExpDisjunction::RegExpDisjunction(
298 ZoneGrowableArray<RegExpTree*>* alternatives)
301 : alternatives_(alternatives) { 299 : alternatives_(alternatives) {
302 DCHECK(alternatives->length() > 1); 300 ASSERT(alternatives->length() > 1);
303 RegExpTree* first_alternative = alternatives->at(0); 301 RegExpTree* first_alternative = alternatives->At(0);
304 min_match_ = first_alternative->min_match(); 302 min_match_ = first_alternative->min_match();
305 max_match_ = first_alternative->max_match(); 303 max_match_ = first_alternative->max_match();
306 for (int i = 1; i < alternatives->length(); i++) { 304 for (intptr_t i = 1; i < alternatives->length(); i++) {
307 RegExpTree* alternative = alternatives->at(i); 305 RegExpTree* alternative = alternatives->At(i);
308 min_match_ = Min(min_match_, alternative->min_match()); 306 min_match_ = Utils::Minimum(min_match_, alternative->min_match());
309 max_match_ = Max(max_match_, alternative->max_match()); 307 max_match_ = Utils::Maximum(max_match_, alternative->max_match());
310 } 308 }
311 } 309 }
312 310
313 311
314 static int IncreaseBy(int previous, int increase) { 312 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) {
315 if (RegExpTree::kInfinity - previous < increase) { 313 if (RegExpTree::kInfinity - previous < increase) {
316 return RegExpTree::kInfinity; 314 return RegExpTree::kInfinity;
317 } else { 315 } else {
318 return previous + increase; 316 return previous + increase;
319 } 317 }
320 } 318 }
321 319
322 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) 320 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes)
323 : nodes_(nodes) { 321 : nodes_(nodes) {
324 DCHECK(nodes->length() > 1); 322 ASSERT(nodes->length() > 1);
325 min_match_ = 0; 323 min_match_ = 0;
326 max_match_ = 0; 324 max_match_ = 0;
327 for (int i = 0; i < nodes->length(); i++) { 325 for (intptr_t i = 0; i < nodes->length(); i++) {
328 RegExpTree* node = nodes->at(i); 326 RegExpTree* node = nodes->At(i);
329 int node_min_match = node->min_match(); 327 intptr_t node_min_match = node->min_match();
330 min_match_ = IncreaseBy(min_match_, node_min_match); 328 min_match_ = IncreaseBy(min_match_, node_min_match);
331 int node_max_match = node->max_match(); 329 intptr_t node_max_match = node->max_match();
332 max_match_ = IncreaseBy(max_match_, node_max_match); 330 max_match_ = IncreaseBy(max_match_, node_max_match);
333 } 331 }
334 } 332 }
335 333
336 } // namespace dart 334 } // namespace dart
OLDNEW
« no previous file with comments | « runtime/vm/regexp_ast.h ('k') | runtime/vm/regexp_parser.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698