OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/regexp_ast.h" | 5 #include "vm/regexp_ast.h" |
6 | 6 |
7 #include "platform/utils.h" | 7 #include "platform/utils.h" |
8 #include "vm/os.h" | 8 #include "vm/os.h" |
9 | 9 |
10 namespace dart { | 10 namespace dart { |
(...skipping 10 matching lines...) Expand all Loading... |
21 bool RegExpTree::Is##Name() const { return false; } | 21 bool RegExpTree::Is##Name() const { return false; } |
22 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) | 22 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
23 #undef MAKE_TYPE_CASE | 23 #undef MAKE_TYPE_CASE |
24 | 24 |
25 #define MAKE_TYPE_CASE(Name) \ | 25 #define MAKE_TYPE_CASE(Name) \ |
26 RegExp##Name* RegExp##Name::As##Name() { return this; } \ | 26 RegExp##Name* RegExp##Name::As##Name() { return this; } \ |
27 bool RegExp##Name::Is##Name() const { return true; } | 27 bool RegExp##Name::Is##Name() const { return true; } |
28 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) | 28 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) |
29 #undef MAKE_TYPE_CASE | 29 #undef MAKE_TYPE_CASE |
30 | 30 |
31 | |
32 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) { | 31 static Interval ListCaptureRegisters(ZoneGrowableArray<RegExpTree*>* children) { |
33 Interval result = Interval::Empty(); | 32 Interval result = Interval::Empty(); |
34 for (intptr_t i = 0; i < children->length(); i++) | 33 for (intptr_t i = 0; i < children->length(); i++) |
35 result = result.Union(children->At(i)->CaptureRegisters()); | 34 result = result.Union(children->At(i)->CaptureRegisters()); |
36 return result; | 35 return result; |
37 } | 36 } |
38 | 37 |
39 | |
40 Interval RegExpAlternative::CaptureRegisters() const { | 38 Interval RegExpAlternative::CaptureRegisters() const { |
41 return ListCaptureRegisters(nodes()); | 39 return ListCaptureRegisters(nodes()); |
42 } | 40 } |
43 | 41 |
44 | |
45 Interval RegExpDisjunction::CaptureRegisters() const { | 42 Interval RegExpDisjunction::CaptureRegisters() const { |
46 return ListCaptureRegisters(alternatives()); | 43 return ListCaptureRegisters(alternatives()); |
47 } | 44 } |
48 | 45 |
49 | |
50 Interval RegExpLookahead::CaptureRegisters() const { | 46 Interval RegExpLookahead::CaptureRegisters() const { |
51 return body()->CaptureRegisters(); | 47 return body()->CaptureRegisters(); |
52 } | 48 } |
53 | 49 |
54 | |
55 Interval RegExpCapture::CaptureRegisters() const { | 50 Interval RegExpCapture::CaptureRegisters() const { |
56 Interval self(StartRegister(index()), EndRegister(index())); | 51 Interval self(StartRegister(index()), EndRegister(index())); |
57 return self.Union(body()->CaptureRegisters()); | 52 return self.Union(body()->CaptureRegisters()); |
58 } | 53 } |
59 | 54 |
60 | |
61 Interval RegExpQuantifier::CaptureRegisters() const { | 55 Interval RegExpQuantifier::CaptureRegisters() const { |
62 return body()->CaptureRegisters(); | 56 return body()->CaptureRegisters(); |
63 } | 57 } |
64 | 58 |
65 | |
66 bool RegExpAssertion::IsAnchoredAtStart() const { | 59 bool RegExpAssertion::IsAnchoredAtStart() const { |
67 return assertion_type() == RegExpAssertion::START_OF_INPUT; | 60 return assertion_type() == RegExpAssertion::START_OF_INPUT; |
68 } | 61 } |
69 | 62 |
70 | |
71 bool RegExpAssertion::IsAnchoredAtEnd() const { | 63 bool RegExpAssertion::IsAnchoredAtEnd() const { |
72 return assertion_type() == RegExpAssertion::END_OF_INPUT; | 64 return assertion_type() == RegExpAssertion::END_OF_INPUT; |
73 } | 65 } |
74 | 66 |
75 | |
76 bool RegExpAlternative::IsAnchoredAtStart() const { | 67 bool RegExpAlternative::IsAnchoredAtStart() const { |
77 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); | 68 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
78 for (intptr_t i = 0; i < nodes->length(); i++) { | 69 for (intptr_t i = 0; i < nodes->length(); i++) { |
79 RegExpTree* node = nodes->At(i); | 70 RegExpTree* node = nodes->At(i); |
80 if (node->IsAnchoredAtStart()) { | 71 if (node->IsAnchoredAtStart()) { |
81 return true; | 72 return true; |
82 } | 73 } |
83 if (node->max_match() > 0) { | 74 if (node->max_match() > 0) { |
84 return false; | 75 return false; |
85 } | 76 } |
86 } | 77 } |
87 return false; | 78 return false; |
88 } | 79 } |
89 | 80 |
90 | |
91 bool RegExpAlternative::IsAnchoredAtEnd() const { | 81 bool RegExpAlternative::IsAnchoredAtEnd() const { |
92 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); | 82 ZoneGrowableArray<RegExpTree*>* nodes = this->nodes(); |
93 for (intptr_t i = nodes->length() - 1; i >= 0; i--) { | 83 for (intptr_t i = nodes->length() - 1; i >= 0; i--) { |
94 RegExpTree* node = nodes->At(i); | 84 RegExpTree* node = nodes->At(i); |
95 if (node->IsAnchoredAtEnd()) { | 85 if (node->IsAnchoredAtEnd()) { |
96 return true; | 86 return true; |
97 } | 87 } |
98 if (node->max_match() > 0) { | 88 if (node->max_match() > 0) { |
99 return false; | 89 return false; |
100 } | 90 } |
101 } | 91 } |
102 return false; | 92 return false; |
103 } | 93 } |
104 | 94 |
105 | |
106 bool RegExpDisjunction::IsAnchoredAtStart() const { | 95 bool RegExpDisjunction::IsAnchoredAtStart() const { |
107 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 96 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
108 for (intptr_t i = 0; i < alternatives->length(); i++) { | 97 for (intptr_t i = 0; i < alternatives->length(); i++) { |
109 if (!alternatives->At(i)->IsAnchoredAtStart()) return false; | 98 if (!alternatives->At(i)->IsAnchoredAtStart()) return false; |
110 } | 99 } |
111 return true; | 100 return true; |
112 } | 101 } |
113 | 102 |
114 | |
115 bool RegExpDisjunction::IsAnchoredAtEnd() const { | 103 bool RegExpDisjunction::IsAnchoredAtEnd() const { |
116 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); | 104 ZoneGrowableArray<RegExpTree*>* alternatives = this->alternatives(); |
117 for (intptr_t i = 0; i < alternatives->length(); i++) { | 105 for (intptr_t i = 0; i < alternatives->length(); i++) { |
118 if (!alternatives->At(i)->IsAnchoredAtEnd()) return false; | 106 if (!alternatives->At(i)->IsAnchoredAtEnd()) return false; |
119 } | 107 } |
120 return true; | 108 return true; |
121 } | 109 } |
122 | 110 |
123 | |
124 bool RegExpLookahead::IsAnchoredAtStart() const { | 111 bool RegExpLookahead::IsAnchoredAtStart() const { |
125 return is_positive() && body()->IsAnchoredAtStart(); | 112 return is_positive() && body()->IsAnchoredAtStart(); |
126 } | 113 } |
127 | 114 |
128 | |
129 bool RegExpCapture::IsAnchoredAtStart() const { | 115 bool RegExpCapture::IsAnchoredAtStart() const { |
130 return body()->IsAnchoredAtStart(); | 116 return body()->IsAnchoredAtStart(); |
131 } | 117 } |
132 | 118 |
133 | |
134 bool RegExpCapture::IsAnchoredAtEnd() const { | 119 bool RegExpCapture::IsAnchoredAtEnd() const { |
135 return body()->IsAnchoredAtEnd(); | 120 return body()->IsAnchoredAtEnd(); |
136 } | 121 } |
137 | 122 |
138 | |
139 // Convert regular expression trees to a simple sexp representation. | 123 // Convert regular expression trees to a simple sexp representation. |
140 // This representation should be different from the input grammar | 124 // This representation should be different from the input grammar |
141 // in as many cases as possible, to make it more difficult for incorrect | 125 // in as many cases as possible, to make it more difficult for incorrect |
142 // parses to look as correct ones which is likely if the input and | 126 // parses to look as correct ones which is likely if the input and |
143 // output formats are alike. | 127 // output formats are alike. |
144 class RegExpUnparser : public RegExpVisitor { | 128 class RegExpUnparser : public RegExpVisitor { |
145 public: | 129 public: |
146 void VisitCharacterRange(CharacterRange that); | 130 void VisitCharacterRange(CharacterRange that); |
147 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data); | 131 #define MAKE_CASE(Name) virtual void* Visit##Name(RegExp##Name*, void* data); |
148 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) | 132 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) |
149 #undef MAKE_CASE | 133 #undef MAKE_CASE |
150 }; | 134 }; |
151 | 135 |
152 | |
153 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { | 136 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { |
154 OS::Print("(|"); | 137 OS::Print("(|"); |
155 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { | 138 for (intptr_t i = 0; i < that->alternatives()->length(); i++) { |
156 OS::Print(" "); | 139 OS::Print(" "); |
157 (*that->alternatives())[i]->Accept(this, data); | 140 (*that->alternatives())[i]->Accept(this, data); |
158 } | 141 } |
159 OS::Print(")"); | 142 OS::Print(")"); |
160 return NULL; | 143 return NULL; |
161 } | 144 } |
162 | 145 |
163 | |
164 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { | 146 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { |
165 OS::Print("(:"); | 147 OS::Print("(:"); |
166 for (intptr_t i = 0; i < that->nodes()->length(); i++) { | 148 for (intptr_t i = 0; i < that->nodes()->length(); i++) { |
167 OS::Print(" "); | 149 OS::Print(" "); |
168 (*that->nodes())[i]->Accept(this, data); | 150 (*that->nodes())[i]->Accept(this, data); |
169 } | 151 } |
170 OS::Print(")"); | 152 OS::Print(")"); |
171 return NULL; | 153 return NULL; |
172 } | 154 } |
173 | 155 |
174 | |
175 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { | 156 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { |
176 PrintUtf16(that.from()); | 157 PrintUtf16(that.from()); |
177 if (!that.IsSingleton()) { | 158 if (!that.IsSingleton()) { |
178 OS::Print("-"); | 159 OS::Print("-"); |
179 PrintUtf16(that.to()); | 160 PrintUtf16(that.to()); |
180 } | 161 } |
181 } | 162 } |
182 | 163 |
183 | |
184 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, | 164 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, |
185 void* data) { | 165 void* data) { |
186 if (that->is_negated()) OS::Print("^"); | 166 if (that->is_negated()) OS::Print("^"); |
187 OS::Print("["); | 167 OS::Print("["); |
188 for (intptr_t i = 0; i < that->ranges()->length(); i++) { | 168 for (intptr_t i = 0; i < that->ranges()->length(); i++) { |
189 if (i > 0) OS::Print(" "); | 169 if (i > 0) OS::Print(" "); |
190 VisitCharacterRange((*that->ranges())[i]); | 170 VisitCharacterRange((*that->ranges())[i]); |
191 } | 171 } |
192 OS::Print("]"); | 172 OS::Print("]"); |
193 return NULL; | 173 return NULL; |
194 } | 174 } |
195 | 175 |
196 | |
197 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { | 176 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { |
198 switch (that->assertion_type()) { | 177 switch (that->assertion_type()) { |
199 case RegExpAssertion::START_OF_INPUT: | 178 case RegExpAssertion::START_OF_INPUT: |
200 OS::Print("@^i"); | 179 OS::Print("@^i"); |
201 break; | 180 break; |
202 case RegExpAssertion::END_OF_INPUT: | 181 case RegExpAssertion::END_OF_INPUT: |
203 OS::Print("@$i"); | 182 OS::Print("@$i"); |
204 break; | 183 break; |
205 case RegExpAssertion::START_OF_LINE: | 184 case RegExpAssertion::START_OF_LINE: |
206 OS::Print("@^l"); | 185 OS::Print("@^l"); |
207 break; | 186 break; |
208 case RegExpAssertion::END_OF_LINE: | 187 case RegExpAssertion::END_OF_LINE: |
209 OS::Print("@$l"); | 188 OS::Print("@$l"); |
210 break; | 189 break; |
211 case RegExpAssertion::BOUNDARY: | 190 case RegExpAssertion::BOUNDARY: |
212 OS::Print("@b"); | 191 OS::Print("@b"); |
213 break; | 192 break; |
214 case RegExpAssertion::NON_BOUNDARY: | 193 case RegExpAssertion::NON_BOUNDARY: |
215 OS::Print("@B"); | 194 OS::Print("@B"); |
216 break; | 195 break; |
217 } | 196 } |
218 return NULL; | 197 return NULL; |
219 } | 198 } |
220 | 199 |
221 | |
222 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { | 200 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { |
223 OS::Print("'"); | 201 OS::Print("'"); |
224 ZoneGrowableArray<uint16_t>* chardata = that->data(); | 202 ZoneGrowableArray<uint16_t>* chardata = that->data(); |
225 for (intptr_t i = 0; i < chardata->length(); i++) { | 203 for (intptr_t i = 0; i < chardata->length(); i++) { |
226 PrintUtf16(chardata->At(i)); | 204 PrintUtf16(chardata->At(i)); |
227 } | 205 } |
228 OS::Print("'"); | 206 OS::Print("'"); |
229 return NULL; | 207 return NULL; |
230 } | 208 } |
231 | 209 |
232 | |
233 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { | 210 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { |
234 if (that->elements()->length() == 1) { | 211 if (that->elements()->length() == 1) { |
235 (*that->elements())[0].tree()->Accept(this, data); | 212 (*that->elements())[0].tree()->Accept(this, data); |
236 } else { | 213 } else { |
237 OS::Print("(!"); | 214 OS::Print("(!"); |
238 for (intptr_t i = 0; i < that->elements()->length(); i++) { | 215 for (intptr_t i = 0; i < that->elements()->length(); i++) { |
239 OS::Print(" "); | 216 OS::Print(" "); |
240 (*that->elements())[i].tree()->Accept(this, data); | 217 (*that->elements())[i].tree()->Accept(this, data); |
241 } | 218 } |
242 OS::Print(")"); | 219 OS::Print(")"); |
243 } | 220 } |
244 return NULL; | 221 return NULL; |
245 } | 222 } |
246 | 223 |
247 | |
248 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { | 224 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { |
249 OS::Print("(# %" Pd " ", that->min()); | 225 OS::Print("(# %" Pd " ", that->min()); |
250 if (that->max() == RegExpTree::kInfinity) { | 226 if (that->max() == RegExpTree::kInfinity) { |
251 OS::Print("- "); | 227 OS::Print("- "); |
252 } else { | 228 } else { |
253 OS::Print("%" Pd " ", that->max()); | 229 OS::Print("%" Pd " ", that->max()); |
254 } | 230 } |
255 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); | 231 OS::Print(that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); |
256 that->body()->Accept(this, data); | 232 that->body()->Accept(this, data); |
257 OS::Print(")"); | 233 OS::Print(")"); |
258 return NULL; | 234 return NULL; |
259 } | 235 } |
260 | 236 |
261 | |
262 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { | 237 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { |
263 OS::Print("(^ "); | 238 OS::Print("(^ "); |
264 that->body()->Accept(this, data); | 239 that->body()->Accept(this, data); |
265 OS::Print(")"); | 240 OS::Print(")"); |
266 return NULL; | 241 return NULL; |
267 } | 242 } |
268 | 243 |
269 | |
270 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { | 244 void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) { |
271 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- ")); | 245 OS::Print("(-> %s", (that->is_positive() ? "+ " : "- ")); |
272 that->body()->Accept(this, data); | 246 that->body()->Accept(this, data); |
273 OS::Print(")"); | 247 OS::Print(")"); |
274 return NULL; | 248 return NULL; |
275 } | 249 } |
276 | 250 |
277 | |
278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, void*) { | 251 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, void*) { |
279 OS::Print("(<- %" Pd ")", that->index()); | 252 OS::Print("(<- %" Pd ")", that->index()); |
280 return NULL; | 253 return NULL; |
281 } | 254 } |
282 | 255 |
283 | |
284 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) { | 256 void* RegExpUnparser::VisitEmpty(RegExpEmpty*, void*) { |
285 OS::Print("%%"); | 257 OS::Print("%%"); |
286 return NULL; | 258 return NULL; |
287 } | 259 } |
288 | 260 |
289 | |
290 void RegExpTree::Print() { | 261 void RegExpTree::Print() { |
291 RegExpUnparser unparser; | 262 RegExpUnparser unparser; |
292 Accept(&unparser, NULL); | 263 Accept(&unparser, NULL); |
293 } | 264 } |
294 | 265 |
295 | |
296 RegExpDisjunction::RegExpDisjunction( | 266 RegExpDisjunction::RegExpDisjunction( |
297 ZoneGrowableArray<RegExpTree*>* alternatives) | 267 ZoneGrowableArray<RegExpTree*>* alternatives) |
298 : alternatives_(alternatives) { | 268 : alternatives_(alternatives) { |
299 ASSERT(alternatives->length() > 1); | 269 ASSERT(alternatives->length() > 1); |
300 RegExpTree* first_alternative = alternatives->At(0); | 270 RegExpTree* first_alternative = alternatives->At(0); |
301 min_match_ = first_alternative->min_match(); | 271 min_match_ = first_alternative->min_match(); |
302 max_match_ = first_alternative->max_match(); | 272 max_match_ = first_alternative->max_match(); |
303 for (intptr_t i = 1; i < alternatives->length(); i++) { | 273 for (intptr_t i = 1; i < alternatives->length(); i++) { |
304 RegExpTree* alternative = alternatives->At(i); | 274 RegExpTree* alternative = alternatives->At(i); |
305 min_match_ = Utils::Minimum(min_match_, alternative->min_match()); | 275 min_match_ = Utils::Minimum(min_match_, alternative->min_match()); |
306 max_match_ = Utils::Maximum(max_match_, alternative->max_match()); | 276 max_match_ = Utils::Maximum(max_match_, alternative->max_match()); |
307 } | 277 } |
308 } | 278 } |
309 | 279 |
310 | |
311 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) { | 280 static intptr_t IncreaseBy(intptr_t previous, intptr_t increase) { |
312 if (RegExpTree::kInfinity - previous < increase) { | 281 if (RegExpTree::kInfinity - previous < increase) { |
313 return RegExpTree::kInfinity; | 282 return RegExpTree::kInfinity; |
314 } else { | 283 } else { |
315 return previous + increase; | 284 return previous + increase; |
316 } | 285 } |
317 } | 286 } |
318 | 287 |
319 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes) | 288 RegExpAlternative::RegExpAlternative(ZoneGrowableArray<RegExpTree*>* nodes) |
320 : nodes_(nodes) { | 289 : nodes_(nodes) { |
321 ASSERT(nodes->length() > 1); | 290 ASSERT(nodes->length() > 1); |
322 min_match_ = 0; | 291 min_match_ = 0; |
323 max_match_ = 0; | 292 max_match_ = 0; |
324 for (intptr_t i = 0; i < nodes->length(); i++) { | 293 for (intptr_t i = 0; i < nodes->length(); i++) { |
325 RegExpTree* node = nodes->At(i); | 294 RegExpTree* node = nodes->At(i); |
326 intptr_t node_min_match = node->min_match(); | 295 intptr_t node_min_match = node->min_match(); |
327 min_match_ = IncreaseBy(min_match_, node_min_match); | 296 min_match_ = IncreaseBy(min_match_, node_min_match); |
328 intptr_t node_max_match = node->max_match(); | 297 intptr_t node_max_match = node->max_match(); |
329 max_match_ = IncreaseBy(max_match_, node_max_match); | 298 max_match_ = IncreaseBy(max_match_, node_max_match); |
330 } | 299 } |
331 } | 300 } |
332 | 301 |
333 } // namespace dart | 302 } // namespace dart |
OLD | NEW |