OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef RUNTIME_VM_REGEXP_H_ | 5 #ifndef RUNTIME_VM_REGEXP_H_ |
6 #define RUNTIME_VM_REGEXP_H_ | 6 #define RUNTIME_VM_REGEXP_H_ |
7 | 7 |
8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
9 #include "vm/intermediate_language.h" | 9 #include "vm/intermediate_language.h" |
10 #include "vm/flow_graph_compiler.h" | 10 #include "vm/flow_graph_compiler.h" |
11 #include "vm/object.h" | 11 #include "vm/object.h" |
12 #include "vm/regexp_assembler.h" | 12 #include "vm/regexp_assembler.h" |
13 | 13 |
14 namespace dart { | 14 namespace dart { |
15 | 15 |
16 class NodeVisitor; | 16 class NodeVisitor; |
17 class RegExpCompiler; | 17 class RegExpCompiler; |
18 class RegExpMacroAssembler; | 18 class RegExpMacroAssembler; |
19 class RegExpNode; | 19 class RegExpNode; |
20 class RegExpTree; | 20 class RegExpTree; |
21 class BoyerMooreLookahead; | 21 class BoyerMooreLookahead; |
22 | 22 |
23 | 23 |
24 // Represents code units in the range from from_ to to_, both ends are | 24 // Represents code units in the range from from_ to to_, both ends are |
25 // inclusive. | 25 // inclusive. |
26 class CharacterRange { | 26 class CharacterRange { |
27 public: | 27 public: |
28 CharacterRange() : from_(0), to_(0) { } | 28 CharacterRange() : from_(0), to_(0) {} |
29 CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) { } | 29 CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) {} |
30 | 30 |
31 static void AddClassEscape(uint16_t type, | 31 static void AddClassEscape(uint16_t type, |
32 ZoneGrowableArray<CharacterRange>* ranges); | 32 ZoneGrowableArray<CharacterRange>* ranges); |
33 static GrowableArray<const intptr_t> GetWordBounds(); | 33 static GrowableArray<const intptr_t> GetWordBounds(); |
34 static inline CharacterRange Singleton(uint16_t value) { | 34 static inline CharacterRange Singleton(uint16_t value) { |
35 return CharacterRange(value, value); | 35 return CharacterRange(value, value); |
36 } | 36 } |
37 static inline CharacterRange Range(uint16_t from, uint16_t to) { | 37 static inline CharacterRange Range(uint16_t from, uint16_t to) { |
38 ASSERT(from <= to); | 38 ASSERT(from <= to); |
39 return CharacterRange(from, to); | 39 return CharacterRange(from, to); |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 private: | 74 private: |
75 uint16_t from_; | 75 uint16_t from_; |
76 uint16_t to_; | 76 uint16_t to_; |
77 | 77 |
78 DISALLOW_ALLOCATION(); | 78 DISALLOW_ALLOCATION(); |
79 }; | 79 }; |
80 | 80 |
81 | 81 |
82 // A set of unsigned integers that behaves especially well on small | 82 // A set of unsigned integers that behaves especially well on small |
83 // integers (< 32). May do zone-allocation. | 83 // integers (< 32). May do zone-allocation. |
84 class OutSet: public ZoneAllocated { | 84 class OutSet : public ZoneAllocated { |
85 public: | 85 public: |
86 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 86 OutSet() : first_(0), remaining_(NULL), successors_(NULL) {} |
87 OutSet* Extend(unsigned value, Zone* zone); | 87 OutSet* Extend(unsigned value, Zone* zone); |
88 bool Get(unsigned value) const; | 88 bool Get(unsigned value) const; |
89 static const unsigned kFirstLimit = 32; | 89 static const unsigned kFirstLimit = 32; |
90 | 90 |
91 private: | 91 private: |
92 // Destructively set a value in this set. In most cases you want | 92 // Destructively set a value in this set. In most cases you want |
93 // to use Extend instead to ensure that only one instance exists | 93 // to use Extend instead to ensure that only one instance exists |
94 // that contains the same values. | 94 // that contains the same values. |
95 void Set(unsigned value, Zone* zone); | 95 void Set(unsigned value, Zone* zone); |
96 | 96 |
97 // The successors are a list of sets that contain the same values | 97 // The successors are a list of sets that contain the same values |
98 // as this set and the one more value that is not present in this | 98 // as this set and the one more value that is not present in this |
99 // set. | 99 // set. |
100 ZoneGrowableArray<OutSet*>* successors() { return successors_; } | 100 ZoneGrowableArray<OutSet*>* successors() { return successors_; } |
101 | 101 |
102 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) | 102 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) |
103 : first_(first), remaining_(remaining), successors_(NULL) { } | 103 : first_(first), remaining_(remaining), successors_(NULL) {} |
104 uint32_t first_; | 104 uint32_t first_; |
105 ZoneGrowableArray<unsigned>* remaining_; | 105 ZoneGrowableArray<unsigned>* remaining_; |
106 ZoneGrowableArray<OutSet*>* successors_; | 106 ZoneGrowableArray<OutSet*>* successors_; |
107 friend class Trace; | 107 friend class Trace; |
108 }; | 108 }; |
109 | 109 |
110 | 110 |
111 #define FOR_EACH_NODE_TYPE(VISIT) \ | 111 #define FOR_EACH_NODE_TYPE(VISIT) \ |
112 VISIT(End) \ | 112 VISIT(End) \ |
113 VISIT(Action) \ | 113 VISIT(Action) \ |
114 VISIT(Choice) \ | 114 VISIT(Choice) \ |
115 VISIT(BackReference) \ | 115 VISIT(BackReference) \ |
116 VISIT(Assertion) \ | 116 VISIT(Assertion) \ |
117 VISIT(Text) | 117 VISIT(Text) |
118 | 118 |
119 | 119 |
120 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 120 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
121 VISIT(Disjunction) \ | 121 VISIT(Disjunction) \ |
122 VISIT(Alternative) \ | 122 VISIT(Alternative) \ |
123 VISIT(Assertion) \ | 123 VISIT(Assertion) \ |
124 VISIT(CharacterClass) \ | 124 VISIT(CharacterClass) \ |
125 VISIT(Atom) \ | 125 VISIT(Atom) \ |
126 VISIT(Quantifier) \ | 126 VISIT(Quantifier) \ |
127 VISIT(Capture) \ | 127 VISIT(Capture) \ |
128 VISIT(Lookahead) \ | 128 VISIT(Lookahead) \ |
129 VISIT(BackReference) \ | 129 VISIT(BackReference) \ |
130 VISIT(Empty) \ | 130 VISIT(Empty) \ |
131 VISIT(Text) | 131 VISIT(Text) |
132 | 132 |
133 | 133 |
134 #define FORWARD_DECLARE(Name) class RegExp##Name; | 134 #define FORWARD_DECLARE(Name) class RegExp##Name; |
135 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | 135 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) |
136 #undef FORWARD_DECLARE | 136 #undef FORWARD_DECLARE |
137 | 137 |
138 | 138 |
139 class TextElement { | 139 class TextElement { |
140 public: | 140 public: |
141 enum TextType { | 141 enum TextType { ATOM, CHAR_CLASS }; |
142 ATOM, | |
143 CHAR_CLASS | |
144 }; | |
145 | 142 |
146 static TextElement Atom(RegExpAtom* atom); | 143 static TextElement Atom(RegExpAtom* atom); |
147 static TextElement CharClass(RegExpCharacterClass* char_class); | 144 static TextElement CharClass(RegExpCharacterClass* char_class); |
148 | 145 |
149 intptr_t cp_offset() const { return cp_offset_; } | 146 intptr_t cp_offset() const { return cp_offset_; } |
150 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } | 147 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } |
151 intptr_t length() const; | 148 intptr_t length() const; |
152 | 149 |
153 TextType text_type() const { return text_type_; } | 150 TextType text_type() const { return text_type_; } |
154 | 151 |
(...skipping 28 matching lines...) Expand all Loading... |
183 | 180 |
184 struct NodeInfo { | 181 struct NodeInfo { |
185 NodeInfo() | 182 NodeInfo() |
186 : being_analyzed(false), | 183 : being_analyzed(false), |
187 been_analyzed(false), | 184 been_analyzed(false), |
188 follows_word_interest(false), | 185 follows_word_interest(false), |
189 follows_newline_interest(false), | 186 follows_newline_interest(false), |
190 follows_start_interest(false), | 187 follows_start_interest(false), |
191 at_end(false), | 188 at_end(false), |
192 visited(false), | 189 visited(false), |
193 replacement_calculated(false) { } | 190 replacement_calculated(false) {} |
194 | 191 |
195 // Returns true if the interests and assumptions of this node | 192 // Returns true if the interests and assumptions of this node |
196 // matches the given one. | 193 // matches the given one. |
197 bool Matches(NodeInfo* that) { | 194 bool Matches(NodeInfo* that) { |
198 return (at_end == that->at_end) && | 195 return (at_end == that->at_end) && |
199 (follows_word_interest == that->follows_word_interest) && | 196 (follows_word_interest == that->follows_word_interest) && |
200 (follows_newline_interest == that->follows_newline_interest) && | 197 (follows_newline_interest == that->follows_newline_interest) && |
201 (follows_start_interest == that->follows_start_interest); | 198 (follows_start_interest == that->follows_start_interest); |
202 } | 199 } |
203 | 200 |
204 // Updates the interests of this node given the interests of the | 201 // Updates the interests of this node given the interests of the |
205 // node preceding it. | 202 // node preceding it. |
206 void AddFromPreceding(NodeInfo* that) { | 203 void AddFromPreceding(NodeInfo* that) { |
207 at_end |= that->at_end; | 204 at_end |= that->at_end; |
208 follows_word_interest |= that->follows_word_interest; | 205 follows_word_interest |= that->follows_word_interest; |
209 follows_newline_interest |= that->follows_newline_interest; | 206 follows_newline_interest |= that->follows_newline_interest; |
210 follows_start_interest |= that->follows_start_interest; | 207 follows_start_interest |= that->follows_start_interest; |
211 } | 208 } |
212 | 209 |
213 bool HasLookbehind() { | 210 bool HasLookbehind() { |
214 return follows_word_interest || | 211 return follows_word_interest || follows_newline_interest || |
215 follows_newline_interest || | |
216 follows_start_interest; | 212 follows_start_interest; |
217 } | 213 } |
218 | 214 |
219 // Sets the interests of this node to include the interests of the | 215 // Sets the interests of this node to include the interests of the |
220 // following node. | 216 // following node. |
221 void AddFromFollowing(NodeInfo* that) { | 217 void AddFromFollowing(NodeInfo* that) { |
222 follows_word_interest |= that->follows_word_interest; | 218 follows_word_interest |= that->follows_word_interest; |
223 follows_newline_interest |= that->follows_newline_interest; | 219 follows_newline_interest |= that->follows_newline_interest; |
224 follows_start_interest |= that->follows_start_interest; | 220 follows_start_interest |= that->follows_start_interest; |
225 } | 221 } |
226 | 222 |
227 void ResetCompilationState() { | 223 void ResetCompilationState() { |
228 being_analyzed = false; | 224 being_analyzed = false; |
229 been_analyzed = false; | 225 been_analyzed = false; |
230 } | 226 } |
231 | 227 |
232 bool being_analyzed: 1; | 228 bool being_analyzed : 1; |
233 bool been_analyzed: 1; | 229 bool been_analyzed : 1; |
234 | 230 |
235 // These bits are set of this node has to know what the preceding | 231 // These bits are set of this node has to know what the preceding |
236 // character was. | 232 // character was. |
237 bool follows_word_interest: 1; | 233 bool follows_word_interest : 1; |
238 bool follows_newline_interest: 1; | 234 bool follows_newline_interest : 1; |
239 bool follows_start_interest: 1; | 235 bool follows_start_interest : 1; |
240 | 236 |
241 bool at_end: 1; | 237 bool at_end : 1; |
242 bool visited: 1; | 238 bool visited : 1; |
243 bool replacement_calculated: 1; | 239 bool replacement_calculated : 1; |
244 }; | 240 }; |
245 | 241 |
246 | 242 |
247 // Details of a quick mask-compare check that can look ahead in the | 243 // Details of a quick mask-compare check that can look ahead in the |
248 // input stream. | 244 // input stream. |
249 class QuickCheckDetails { | 245 class QuickCheckDetails { |
250 public: | 246 public: |
251 QuickCheckDetails() | 247 QuickCheckDetails() |
252 : characters_(0), | 248 : characters_(0), mask_(0), value_(0), cannot_match_(false) {} |
253 mask_(0), | |
254 value_(0), | |
255 cannot_match_(false) { } | |
256 explicit QuickCheckDetails(intptr_t characters) | 249 explicit QuickCheckDetails(intptr_t characters) |
257 : characters_(characters), | 250 : characters_(characters), mask_(0), value_(0), cannot_match_(false) {} |
258 mask_(0), | |
259 value_(0), | |
260 cannot_match_(false) { } | |
261 bool Rationalize(bool one_byte); | 251 bool Rationalize(bool one_byte); |
262 // Merge in the information from another branch of an alternation. | 252 // Merge in the information from another branch of an alternation. |
263 void Merge(QuickCheckDetails* other, intptr_t from_index); | 253 void Merge(QuickCheckDetails* other, intptr_t from_index); |
264 // Advance the current position by some amount. | 254 // Advance the current position by some amount. |
265 void Advance(intptr_t by, bool one_byte); | 255 void Advance(intptr_t by, bool one_byte); |
266 void Clear(); | 256 void Clear(); |
267 bool cannot_match() { return cannot_match_; } | 257 bool cannot_match() { return cannot_match_; } |
268 void set_cannot_match() { cannot_match_ = true; } | 258 void set_cannot_match() { cannot_match_ = true; } |
269 struct Position { | 259 struct Position { |
270 Position() : mask(0), value(0), determines_perfectly(false) { } | 260 Position() : mask(0), value(0), determines_perfectly(false) {} |
271 uint16_t mask; | 261 uint16_t mask; |
272 uint16_t value; | 262 uint16_t value; |
273 bool determines_perfectly; | 263 bool determines_perfectly; |
274 }; | 264 }; |
275 intptr_t characters() { return characters_; } | 265 intptr_t characters() { return characters_; } |
276 void set_characters(intptr_t characters) { characters_ = characters; } | 266 void set_characters(intptr_t characters) { characters_ = characters; } |
277 Position* positions(intptr_t index) { | 267 Position* positions(intptr_t index) { |
278 ASSERT(index >= 0); | 268 ASSERT(index >= 0); |
279 ASSERT(index < characters_); | 269 ASSERT(index < characters_); |
280 return positions_ + index; | 270 return positions_ + index; |
(...skipping 10 matching lines...) Expand all Loading... |
291 uint32_t mask_; | 281 uint32_t mask_; |
292 uint32_t value_; | 282 uint32_t value_; |
293 // If set to true, there is no way this quick check can match at all. | 283 // If set to true, there is no way this quick check can match at all. |
294 // E.g., if it requires to be at the start of the input, and isn't. | 284 // E.g., if it requires to be at the start of the input, and isn't. |
295 bool cannot_match_; | 285 bool cannot_match_; |
296 | 286 |
297 DISALLOW_ALLOCATION(); | 287 DISALLOW_ALLOCATION(); |
298 }; | 288 }; |
299 | 289 |
300 | 290 |
301 class RegExpNode: public ZoneAllocated { | 291 class RegExpNode : public ZoneAllocated { |
302 public: | 292 public: |
303 explicit RegExpNode(Zone* zone) | 293 explicit RegExpNode(Zone* zone) |
304 : replacement_(NULL), trace_count_(0), zone_(zone) { | 294 : replacement_(NULL), trace_count_(0), zone_(zone) { |
305 bm_info_[0] = bm_info_[1] = NULL; | 295 bm_info_[0] = bm_info_[1] = NULL; |
306 } | 296 } |
307 virtual ~RegExpNode(); | 297 virtual ~RegExpNode(); |
308 virtual void Accept(NodeVisitor* visitor) = 0; | 298 virtual void Accept(NodeVisitor* visitor) = 0; |
309 // Generates a goto to this node or actually generates the code at this point. | 299 // Generates a goto to this node or actually generates the code at this point. |
310 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | 300 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; |
311 // How many characters must this node consume at a minimum in order to | 301 // How many characters must this node consume at a minimum in order to |
312 // succeed. If we have found at least 'still_to_find' characters that | 302 // succeed. If we have found at least 'still_to_find' characters that |
313 // must be consumed there is no need to ask any following nodes whether | 303 // must be consumed there is no need to ask any following nodes whether |
314 // they are sure to eat any more characters. The not_at_start argument is | 304 // they are sure to eat any more characters. The not_at_start argument is |
315 // used to indicate that we know we are not at the start of the input. In | 305 // used to indicate that we know we are not at the start of the input. In |
316 // this case anchored branches will always fail and can be ignored when | 306 // this case anchored branches will always fail and can be ignored when |
317 // determining how many characters are consumed on success. | 307 // determining how many characters are consumed on success. |
318 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 308 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 309 intptr_t budget, |
319 bool not_at_start) = 0; | 310 bool not_at_start) = 0; |
320 // Emits some quick code that checks whether the preloaded characters match. | 311 // Emits some quick code that checks whether the preloaded characters match. |
321 // Falls through on certain failure, jumps to the label on possible success. | 312 // Falls through on certain failure, jumps to the label on possible success. |
322 // If the node cannot make a quick check it does nothing and returns false. | 313 // If the node cannot make a quick check it does nothing and returns false. |
323 bool EmitQuickCheck(RegExpCompiler* compiler, | 314 bool EmitQuickCheck(RegExpCompiler* compiler, |
324 Trace* bounds_check_trace, | 315 Trace* bounds_check_trace, |
325 Trace* trace, | 316 Trace* trace, |
326 bool preload_has_checked_bounds, | 317 bool preload_has_checked_bounds, |
327 BlockLabel* on_possible_success, | 318 BlockLabel* on_possible_success, |
328 QuickCheckDetails* details_return, | 319 QuickCheckDetails* details_return, |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
365 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) { | 356 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) { |
366 return this; | 357 return this; |
367 } | 358 } |
368 // Helper for FilterOneByte. | 359 // Helper for FilterOneByte. |
369 RegExpNode* replacement() { | 360 RegExpNode* replacement() { |
370 ASSERT(info()->replacement_calculated); | 361 ASSERT(info()->replacement_calculated); |
371 return replacement_; | 362 return replacement_; |
372 } | 363 } |
373 RegExpNode* set_replacement(RegExpNode* replacement) { | 364 RegExpNode* set_replacement(RegExpNode* replacement) { |
374 info()->replacement_calculated = true; | 365 info()->replacement_calculated = true; |
375 replacement_ = replacement; | 366 replacement_ = replacement; |
376 return replacement; // For convenience. | 367 return replacement; // For convenience. |
377 } | 368 } |
378 | 369 |
379 // We want to avoid recalculating the lookahead info, so we store it on the | 370 // We want to avoid recalculating the lookahead info, so we store it on the |
380 // node. Only info that is for this node is stored. We can tell that the | 371 // node. Only info that is for this node is stored. We can tell that the |
381 // info is for this node when offset == 0, so the information is calculated | 372 // info is for this node when offset == 0, so the information is calculated |
382 // relative to this node. | 373 // relative to this node. |
383 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { | 374 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { |
384 if (offset == 0) set_bm_info(not_at_start, bm); | 375 if (offset == 0) set_bm_info(not_at_start, bm); |
385 } | 376 } |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
421 // deferred operations in the current trace and generating a goto. | 412 // deferred operations in the current trace and generating a goto. |
422 intptr_t trace_count_; | 413 intptr_t trace_count_; |
423 BoyerMooreLookahead* bm_info_[2]; | 414 BoyerMooreLookahead* bm_info_[2]; |
424 Zone* zone_; | 415 Zone* zone_; |
425 }; | 416 }; |
426 | 417 |
427 | 418 |
428 // A simple closed interval. | 419 // A simple closed interval. |
429 class Interval { | 420 class Interval { |
430 public: | 421 public: |
431 Interval() : from_(kNone), to_(kNone) { } | 422 Interval() : from_(kNone), to_(kNone) {} |
432 Interval(intptr_t from, intptr_t to) : from_(from), to_(to) { } | 423 Interval(intptr_t from, intptr_t to) : from_(from), to_(to) {} |
433 | 424 |
434 Interval Union(Interval that) { | 425 Interval Union(Interval that) { |
435 if (that.from_ == kNone) | 426 if (that.from_ == kNone) |
436 return *this; | 427 return *this; |
437 else if (from_ == kNone) | 428 else if (from_ == kNone) |
438 return that; | 429 return that; |
439 else | 430 else |
440 return Interval(Utils::Minimum(from_, that.from_), | 431 return Interval(Utils::Minimum(from_, that.from_), |
441 Utils::Maximum(to_, that.to_)); | 432 Utils::Maximum(to_, that.to_)); |
442 } | 433 } |
443 bool Contains(intptr_t value) const { | 434 bool Contains(intptr_t value) const { |
444 return (from_ <= value) && (value <= to_); | 435 return (from_ <= value) && (value <= to_); |
445 } | 436 } |
446 bool is_empty() const { return from_ == kNone; } | 437 bool is_empty() const { return from_ == kNone; } |
447 intptr_t from() const { return from_; } | 438 intptr_t from() const { return from_; } |
448 intptr_t to() const { return to_; } | 439 intptr_t to() const { return to_; } |
449 static Interval Empty() { return Interval(); } | 440 static Interval Empty() { return Interval(); } |
450 static const intptr_t kNone = -1; | 441 static const intptr_t kNone = -1; |
451 | 442 |
452 private: | 443 private: |
453 intptr_t from_; | 444 intptr_t from_; |
454 intptr_t to_; | 445 intptr_t to_; |
455 | 446 |
456 DISALLOW_ALLOCATION(); | 447 DISALLOW_ALLOCATION(); |
457 }; | 448 }; |
458 | 449 |
459 | 450 |
460 class SeqRegExpNode: public RegExpNode { | 451 class SeqRegExpNode : public RegExpNode { |
461 public: | 452 public: |
462 explicit SeqRegExpNode(RegExpNode* on_success) | 453 explicit SeqRegExpNode(RegExpNode* on_success) |
463 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 454 : RegExpNode(on_success->zone()), on_success_(on_success) {} |
464 RegExpNode* on_success() { return on_success_; } | 455 RegExpNode* on_success() { return on_success_; } |
465 void set_on_success(RegExpNode* node) { on_success_ = node; } | 456 void set_on_success(RegExpNode* node) { on_success_ = node; } |
466 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); | 457 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); |
467 virtual void FillInBMInfo(intptr_t offset, | 458 virtual void FillInBMInfo(intptr_t offset, |
468 intptr_t budget, | 459 intptr_t budget, |
469 BoyerMooreLookahead* bm, | 460 BoyerMooreLookahead* bm, |
470 bool not_at_start) { | 461 bool not_at_start) { |
471 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 462 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
472 if (offset == 0) set_bm_info(not_at_start, bm); | 463 if (offset == 0) set_bm_info(not_at_start, bm); |
473 } | 464 } |
474 | 465 |
475 protected: | 466 protected: |
476 RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case); | 467 RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case); |
477 | 468 |
478 private: | 469 private: |
479 RegExpNode* on_success_; | 470 RegExpNode* on_success_; |
480 }; | 471 }; |
481 | 472 |
482 | 473 |
483 class ActionNode: public SeqRegExpNode { | 474 class ActionNode : public SeqRegExpNode { |
484 public: | 475 public: |
485 enum ActionType { | 476 enum ActionType { |
486 SET_REGISTER, | 477 SET_REGISTER, |
487 INCREMENT_REGISTER, | 478 INCREMENT_REGISTER, |
488 STORE_POSITION, | 479 STORE_POSITION, |
489 BEGIN_SUBMATCH, | 480 BEGIN_SUBMATCH, |
490 POSITIVE_SUBMATCH_SUCCESS, | 481 POSITIVE_SUBMATCH_SUCCESS, |
491 EMPTY_MATCH_CHECK, | 482 EMPTY_MATCH_CHECK, |
492 CLEAR_CAPTURES | 483 CLEAR_CAPTURES |
493 }; | 484 }; |
494 static ActionNode* SetRegister(intptr_t reg, intptr_t val, | 485 static ActionNode* SetRegister(intptr_t reg, |
| 486 intptr_t val, |
495 RegExpNode* on_success); | 487 RegExpNode* on_success); |
496 static ActionNode* IncrementRegister(intptr_t reg, RegExpNode* on_success); | 488 static ActionNode* IncrementRegister(intptr_t reg, RegExpNode* on_success); |
497 static ActionNode* StorePosition(intptr_t reg, | 489 static ActionNode* StorePosition(intptr_t reg, |
498 bool is_capture, | 490 bool is_capture, |
499 RegExpNode* on_success); | 491 RegExpNode* on_success); |
500 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); | 492 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); |
501 static ActionNode* BeginSubmatch(intptr_t stack_pointer_reg, | 493 static ActionNode* BeginSubmatch(intptr_t stack_pointer_reg, |
502 intptr_t position_reg, | 494 intptr_t position_reg, |
503 RegExpNode* on_success); | 495 RegExpNode* on_success); |
504 static ActionNode* PositiveSubmatchSuccess(intptr_t stack_pointer_reg, | 496 static ActionNode* PositiveSubmatchSuccess(intptr_t stack_pointer_reg, |
505 intptr_t restore_reg, | 497 intptr_t restore_reg, |
506 intptr_t clear_capture_count, | 498 intptr_t clear_capture_count, |
507 intptr_t clear_capture_from, | 499 intptr_t clear_capture_from, |
508 RegExpNode* on_success); | 500 RegExpNode* on_success); |
509 static ActionNode* EmptyMatchCheck(intptr_t start_register, | 501 static ActionNode* EmptyMatchCheck(intptr_t start_register, |
510 intptr_t repetition_register, | 502 intptr_t repetition_register, |
511 intptr_t repetition_limit, | 503 intptr_t repetition_limit, |
512 RegExpNode* on_success); | 504 RegExpNode* on_success); |
513 virtual void Accept(NodeVisitor* visitor); | 505 virtual void Accept(NodeVisitor* visitor); |
514 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 506 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
515 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 507 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 508 intptr_t budget, |
516 bool not_at_start); | 509 bool not_at_start); |
517 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 510 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
518 RegExpCompiler* compiler, | 511 RegExpCompiler* compiler, |
519 intptr_t filled_in, | 512 intptr_t filled_in, |
520 bool not_at_start) { | 513 bool not_at_start) { |
521 return on_success()->GetQuickCheckDetails( | 514 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
522 details, compiler, filled_in, not_at_start); | 515 not_at_start); |
523 } | 516 } |
524 virtual void FillInBMInfo(intptr_t offset, | 517 virtual void FillInBMInfo(intptr_t offset, |
525 intptr_t budget, | 518 intptr_t budget, |
526 BoyerMooreLookahead* bm, | 519 BoyerMooreLookahead* bm, |
527 bool not_at_start); | 520 bool not_at_start); |
528 ActionType action_type() { return action_type_; } | 521 ActionType action_type() { return action_type_; } |
529 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 522 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
530 virtual intptr_t GreedyLoopTextLength() { | 523 virtual intptr_t GreedyLoopTextLength() { |
531 return kNodeIsTooComplexForGreedyLoops; | 524 return kNodeIsTooComplexForGreedyLoops; |
532 } | 525 } |
(...skipping 21 matching lines...) Expand all Loading... |
554 intptr_t start_register; | 547 intptr_t start_register; |
555 intptr_t repetition_register; | 548 intptr_t repetition_register; |
556 intptr_t repetition_limit; | 549 intptr_t repetition_limit; |
557 } u_empty_match_check; | 550 } u_empty_match_check; |
558 struct { | 551 struct { |
559 intptr_t range_from; | 552 intptr_t range_from; |
560 intptr_t range_to; | 553 intptr_t range_to; |
561 } u_clear_captures; | 554 } u_clear_captures; |
562 } data_; | 555 } data_; |
563 ActionNode(ActionType action_type, RegExpNode* on_success) | 556 ActionNode(ActionType action_type, RegExpNode* on_success) |
564 : SeqRegExpNode(on_success), | 557 : SeqRegExpNode(on_success), action_type_(action_type) {} |
565 action_type_(action_type) { } | |
566 ActionType action_type_; | 558 ActionType action_type_; |
567 friend class DotPrinter; | 559 friend class DotPrinter; |
568 }; | 560 }; |
569 | 561 |
570 | 562 |
571 class TextNode: public SeqRegExpNode { | 563 class TextNode : public SeqRegExpNode { |
572 public: | 564 public: |
573 TextNode(ZoneGrowableArray<TextElement>* elms, | 565 TextNode(ZoneGrowableArray<TextElement>* elms, RegExpNode* on_success) |
574 RegExpNode* on_success) | 566 : SeqRegExpNode(on_success), elms_(elms) {} |
| 567 TextNode(RegExpCharacterClass* that, RegExpNode* on_success) |
575 : SeqRegExpNode(on_success), | 568 : SeqRegExpNode(on_success), |
576 elms_(elms) { } | 569 elms_(new (zone()) ZoneGrowableArray<TextElement>(1)) { |
577 TextNode(RegExpCharacterClass* that, | |
578 RegExpNode* on_success) | |
579 : SeqRegExpNode(on_success), | |
580 elms_(new(zone()) ZoneGrowableArray<TextElement>(1)) { | |
581 elms_->Add(TextElement::CharClass(that)); | 570 elms_->Add(TextElement::CharClass(that)); |
582 } | 571 } |
583 virtual void Accept(NodeVisitor* visitor); | 572 virtual void Accept(NodeVisitor* visitor); |
584 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 573 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
585 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 574 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 575 intptr_t budget, |
586 bool not_at_start); | 576 bool not_at_start); |
587 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 577 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
588 RegExpCompiler* compiler, | 578 RegExpCompiler* compiler, |
589 intptr_t characters_filled_in, | 579 intptr_t characters_filled_in, |
590 bool not_at_start); | 580 bool not_at_start); |
591 ZoneGrowableArray<TextElement>* elements() { return elms_; } | 581 ZoneGrowableArray<TextElement>* elements() { return elms_; } |
592 void MakeCaseIndependent(bool is_one_byte); | 582 void MakeCaseIndependent(bool is_one_byte); |
593 virtual intptr_t GreedyLoopTextLength(); | 583 virtual intptr_t GreedyLoopTextLength(); |
594 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 584 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
595 RegExpCompiler* compiler); | 585 RegExpCompiler* compiler); |
(...skipping 19 matching lines...) Expand all Loading... |
615 TextEmitPassType pass, | 605 TextEmitPassType pass, |
616 bool preloaded, | 606 bool preloaded, |
617 Trace* trace, | 607 Trace* trace, |
618 bool first_element_checked, | 608 bool first_element_checked, |
619 intptr_t* checked_up_to); | 609 intptr_t* checked_up_to); |
620 intptr_t Length(); | 610 intptr_t Length(); |
621 ZoneGrowableArray<TextElement>* elms_; | 611 ZoneGrowableArray<TextElement>* elms_; |
622 }; | 612 }; |
623 | 613 |
624 | 614 |
625 class AssertionNode: public SeqRegExpNode { | 615 class AssertionNode : public SeqRegExpNode { |
626 public: | 616 public: |
627 enum AssertionType { | 617 enum AssertionType { |
628 AT_END, | 618 AT_END, |
629 AT_START, | 619 AT_START, |
630 AT_BOUNDARY, | 620 AT_BOUNDARY, |
631 AT_NON_BOUNDARY, | 621 AT_NON_BOUNDARY, |
632 AFTER_NEWLINE | 622 AFTER_NEWLINE |
633 }; | 623 }; |
634 static AssertionNode* AtEnd(RegExpNode* on_success) { | 624 static AssertionNode* AtEnd(RegExpNode* on_success) { |
635 return new(on_success->zone()) AssertionNode(AT_END, on_success); | 625 return new (on_success->zone()) AssertionNode(AT_END, on_success); |
636 } | 626 } |
637 static AssertionNode* AtStart(RegExpNode* on_success) { | 627 static AssertionNode* AtStart(RegExpNode* on_success) { |
638 return new(on_success->zone()) AssertionNode(AT_START, on_success); | 628 return new (on_success->zone()) AssertionNode(AT_START, on_success); |
639 } | 629 } |
640 static AssertionNode* AtBoundary(RegExpNode* on_success) { | 630 static AssertionNode* AtBoundary(RegExpNode* on_success) { |
641 return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); | 631 return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); |
642 } | 632 } |
643 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { | 633 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { |
644 return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, | 634 return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success); |
645 on_success); | |
646 } | 635 } |
647 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 636 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
648 return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); | 637 return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); |
649 } | 638 } |
650 virtual void Accept(NodeVisitor* visitor); | 639 virtual void Accept(NodeVisitor* visitor); |
651 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 640 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
652 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 641 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 642 intptr_t budget, |
653 bool not_at_start); | 643 bool not_at_start); |
654 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 644 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
655 RegExpCompiler* compiler, | 645 RegExpCompiler* compiler, |
656 intptr_t filled_in, | 646 intptr_t filled_in, |
657 bool not_at_start); | 647 bool not_at_start); |
658 virtual void FillInBMInfo(intptr_t offset, | 648 virtual void FillInBMInfo(intptr_t offset, |
659 intptr_t budget, | 649 intptr_t budget, |
660 BoyerMooreLookahead* bm, | 650 BoyerMooreLookahead* bm, |
661 bool not_at_start); | 651 bool not_at_start); |
662 AssertionType assertion_type() { return assertion_type_; } | 652 AssertionType assertion_type() { return assertion_type_; } |
663 | 653 |
664 private: | 654 private: |
665 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 655 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
666 enum IfPrevious { kIsNonWord, kIsWord }; | 656 enum IfPrevious { kIsNonWord, kIsWord }; |
667 void BacktrackIfPrevious(RegExpCompiler* compiler, | 657 void BacktrackIfPrevious(RegExpCompiler* compiler, |
668 Trace* trace, | 658 Trace* trace, |
669 IfPrevious backtrack_if_previous); | 659 IfPrevious backtrack_if_previous); |
670 AssertionNode(AssertionType t, RegExpNode* on_success) | 660 AssertionNode(AssertionType t, RegExpNode* on_success) |
671 : SeqRegExpNode(on_success), assertion_type_(t) { } | 661 : SeqRegExpNode(on_success), assertion_type_(t) {} |
672 AssertionType assertion_type_; | 662 AssertionType assertion_type_; |
673 }; | 663 }; |
674 | 664 |
675 | 665 |
676 class BackReferenceNode: public SeqRegExpNode { | 666 class BackReferenceNode : public SeqRegExpNode { |
677 public: | 667 public: |
678 BackReferenceNode(intptr_t start_reg, | 668 BackReferenceNode(intptr_t start_reg, |
679 intptr_t end_reg, | 669 intptr_t end_reg, |
680 RegExpNode* on_success) | 670 RegExpNode* on_success) |
681 : SeqRegExpNode(on_success), | 671 : SeqRegExpNode(on_success), start_reg_(start_reg), end_reg_(end_reg) {} |
682 start_reg_(start_reg), | |
683 end_reg_(end_reg) { } | |
684 virtual void Accept(NodeVisitor* visitor); | 672 virtual void Accept(NodeVisitor* visitor); |
685 intptr_t start_register() { return start_reg_; } | 673 intptr_t start_register() { return start_reg_; } |
686 intptr_t end_register() { return end_reg_; } | 674 intptr_t end_register() { return end_reg_; } |
687 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 675 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
688 virtual intptr_t EatsAtLeast(intptr_t still_to_find, | 676 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
689 intptr_t recursion_depth, | 677 intptr_t recursion_depth, |
690 bool not_at_start); | 678 bool not_at_start); |
691 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 679 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
692 RegExpCompiler* compiler, | 680 RegExpCompiler* compiler, |
693 intptr_t characters_filled_in, | 681 intptr_t characters_filled_in, |
694 bool not_at_start) { | 682 bool not_at_start) { |
695 return; | 683 return; |
696 } | 684 } |
697 virtual void FillInBMInfo(intptr_t offset, | 685 virtual void FillInBMInfo(intptr_t offset, |
698 intptr_t budget, | 686 intptr_t budget, |
699 BoyerMooreLookahead* bm, | 687 BoyerMooreLookahead* bm, |
700 bool not_at_start); | 688 bool not_at_start); |
701 | 689 |
702 private: | 690 private: |
703 intptr_t start_reg_; | 691 intptr_t start_reg_; |
704 intptr_t end_reg_; | 692 intptr_t end_reg_; |
705 }; | 693 }; |
706 | 694 |
707 | 695 |
708 class EndNode: public RegExpNode { | 696 class EndNode : public RegExpNode { |
709 public: | 697 public: |
710 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 698 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
711 explicit EndNode(Action action, Zone* zone) | 699 explicit EndNode(Action action, Zone* zone) |
712 : RegExpNode(zone), action_(action) { } | 700 : RegExpNode(zone), action_(action) {} |
713 virtual void Accept(NodeVisitor* visitor); | 701 virtual void Accept(NodeVisitor* visitor); |
714 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 702 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
715 virtual intptr_t EatsAtLeast(intptr_t still_to_find, | 703 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
716 intptr_t recursion_depth, | 704 intptr_t recursion_depth, |
717 bool not_at_start) { return 0; } | 705 bool not_at_start) { |
| 706 return 0; |
| 707 } |
718 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 708 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
719 RegExpCompiler* compiler, | 709 RegExpCompiler* compiler, |
720 intptr_t characters_filled_in, | 710 intptr_t characters_filled_in, |
721 bool not_at_start) { | 711 bool not_at_start) { |
722 // Returning 0 from EatsAtLeast should ensure we never get here. | 712 // Returning 0 from EatsAtLeast should ensure we never get here. |
723 UNREACHABLE(); | 713 UNREACHABLE(); |
724 } | 714 } |
725 virtual void FillInBMInfo(intptr_t offset, | 715 virtual void FillInBMInfo(intptr_t offset, |
726 intptr_t budget, | 716 intptr_t budget, |
727 BoyerMooreLookahead* bm, | 717 BoyerMooreLookahead* bm, |
728 bool not_at_start) { | 718 bool not_at_start) { |
729 // Returning 0 from EatsAtLeast should ensure we never get here. | 719 // Returning 0 from EatsAtLeast should ensure we never get here. |
730 UNREACHABLE(); | 720 UNREACHABLE(); |
731 } | 721 } |
732 | 722 |
733 private: | 723 private: |
734 Action action_; | 724 Action action_; |
735 }; | 725 }; |
736 | 726 |
737 | 727 |
738 class NegativeSubmatchSuccess: public EndNode { | 728 class NegativeSubmatchSuccess : public EndNode { |
739 public: | 729 public: |
740 NegativeSubmatchSuccess(intptr_t stack_pointer_reg, | 730 NegativeSubmatchSuccess(intptr_t stack_pointer_reg, |
741 intptr_t position_reg, | 731 intptr_t position_reg, |
742 intptr_t clear_capture_count, | 732 intptr_t clear_capture_count, |
743 intptr_t clear_capture_start, | 733 intptr_t clear_capture_start, |
744 Zone* zone) | 734 Zone* zone) |
745 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), | 735 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), |
746 stack_pointer_register_(stack_pointer_reg), | 736 stack_pointer_register_(stack_pointer_reg), |
747 current_position_register_(position_reg), | 737 current_position_register_(position_reg), |
748 clear_capture_count_(clear_capture_count), | 738 clear_capture_count_(clear_capture_count), |
749 clear_capture_start_(clear_capture_start) { } | 739 clear_capture_start_(clear_capture_start) {} |
750 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 740 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
751 | 741 |
752 private: | 742 private: |
753 intptr_t stack_pointer_register_; | 743 intptr_t stack_pointer_register_; |
754 intptr_t current_position_register_; | 744 intptr_t current_position_register_; |
755 intptr_t clear_capture_count_; | 745 intptr_t clear_capture_count_; |
756 intptr_t clear_capture_start_; | 746 intptr_t clear_capture_start_; |
757 }; | 747 }; |
758 | 748 |
759 | 749 |
760 class Guard: public ZoneAllocated { | 750 class Guard : public ZoneAllocated { |
761 public: | 751 public: |
762 enum Relation { LT, GEQ }; | 752 enum Relation { LT, GEQ }; |
763 Guard(intptr_t reg, Relation op, intptr_t value) | 753 Guard(intptr_t reg, Relation op, intptr_t value) |
764 : reg_(reg), | 754 : reg_(reg), op_(op), value_(value) {} |
765 op_(op), | |
766 value_(value) { } | |
767 intptr_t reg() { return reg_; } | 755 intptr_t reg() { return reg_; } |
768 Relation op() { return op_; } | 756 Relation op() { return op_; } |
769 intptr_t value() { return value_; } | 757 intptr_t value() { return value_; } |
770 | 758 |
771 private: | 759 private: |
772 intptr_t reg_; | 760 intptr_t reg_; |
773 Relation op_; | 761 Relation op_; |
774 intptr_t value_; | 762 intptr_t value_; |
775 }; | 763 }; |
776 | 764 |
777 | 765 |
778 class GuardedAlternative { | 766 class GuardedAlternative { |
779 public: | 767 public: |
780 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { } | 768 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) {} |
781 void AddGuard(Guard* guard, Zone* zone); | 769 void AddGuard(Guard* guard, Zone* zone); |
782 RegExpNode* node() { return node_; } | 770 RegExpNode* node() { return node_; } |
783 void set_node(RegExpNode* node) { node_ = node; } | 771 void set_node(RegExpNode* node) { node_ = node; } |
784 ZoneGrowableArray<Guard*>* guards() { return guards_; } | 772 ZoneGrowableArray<Guard*>* guards() { return guards_; } |
785 | 773 |
786 private: | 774 private: |
787 RegExpNode* node_; | 775 RegExpNode* node_; |
788 ZoneGrowableArray<Guard*>* guards_; | 776 ZoneGrowableArray<Guard*>* guards_; |
789 | 777 |
790 DISALLOW_ALLOCATION(); | 778 DISALLOW_ALLOCATION(); |
791 }; | 779 }; |
792 | 780 |
793 | 781 |
794 struct AlternativeGeneration; | 782 struct AlternativeGeneration; |
795 | 783 |
796 | 784 |
797 class ChoiceNode: public RegExpNode { | 785 class ChoiceNode : public RegExpNode { |
798 public: | 786 public: |
799 explicit ChoiceNode(intptr_t expected_size, Zone* zone) | 787 explicit ChoiceNode(intptr_t expected_size, Zone* zone) |
800 : RegExpNode(zone), | 788 : RegExpNode(zone), |
801 alternatives_(new(zone) | 789 alternatives_(new (zone) |
802 ZoneGrowableArray<GuardedAlternative>(expected_size)), | 790 ZoneGrowableArray<GuardedAlternative>(expected_size)), |
803 not_at_start_(false), | 791 not_at_start_(false), |
804 being_calculated_(false) { } | 792 being_calculated_(false) {} |
805 virtual void Accept(NodeVisitor* visitor); | 793 virtual void Accept(NodeVisitor* visitor); |
806 void AddAlternative(GuardedAlternative node) { | 794 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } |
807 alternatives()->Add(node); | |
808 } | |
809 ZoneGrowableArray<GuardedAlternative>* alternatives() { | 795 ZoneGrowableArray<GuardedAlternative>* alternatives() { |
810 return alternatives_; | 796 return alternatives_; |
811 } | 797 } |
812 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 798 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
813 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 799 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 800 intptr_t budget, |
814 bool not_at_start); | 801 bool not_at_start); |
815 intptr_t EatsAtLeastHelper(intptr_t still_to_find, | 802 intptr_t EatsAtLeastHelper(intptr_t still_to_find, |
816 intptr_t budget, | 803 intptr_t budget, |
817 RegExpNode* ignore_this_node, | 804 RegExpNode* ignore_this_node, |
818 bool not_at_start); | 805 bool not_at_start); |
819 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 806 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
820 RegExpCompiler* compiler, | 807 RegExpCompiler* compiler, |
821 intptr_t characters_filled_in, | 808 intptr_t characters_filled_in, |
822 bool not_at_start); | 809 bool not_at_start); |
823 virtual void FillInBMInfo(intptr_t offset, | 810 virtual void FillInBMInfo(intptr_t offset, |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
868 intptr_t first_choice, | 855 intptr_t first_choice, |
869 Trace* trace, | 856 Trace* trace, |
870 PreloadState* preloads); | 857 PreloadState* preloads); |
871 // If true, this node is never checked at the start of the input. | 858 // If true, this node is never checked at the start of the input. |
872 // Allows a new trace to start with at_start() set to false. | 859 // Allows a new trace to start with at_start() set to false. |
873 bool not_at_start_; | 860 bool not_at_start_; |
874 bool being_calculated_; | 861 bool being_calculated_; |
875 }; | 862 }; |
876 | 863 |
877 | 864 |
878 class NegativeLookaheadChoiceNode: public ChoiceNode { | 865 class NegativeLookaheadChoiceNode : public ChoiceNode { |
879 public: | 866 public: |
880 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | 867 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, |
881 GuardedAlternative then_do_this, | 868 GuardedAlternative then_do_this, |
882 Zone* zone) | 869 Zone* zone) |
883 : ChoiceNode(2, zone) { | 870 : ChoiceNode(2, zone) { |
884 AddAlternative(this_must_fail); | 871 AddAlternative(this_must_fail); |
885 AddAlternative(then_do_this); | 872 AddAlternative(then_do_this); |
886 } | 873 } |
887 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 874 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 875 intptr_t budget, |
888 bool not_at_start); | 876 bool not_at_start); |
889 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 877 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
890 RegExpCompiler* compiler, | 878 RegExpCompiler* compiler, |
891 intptr_t characters_filled_in, | 879 intptr_t characters_filled_in, |
892 bool not_at_start); | 880 bool not_at_start); |
893 virtual void FillInBMInfo(intptr_t offset, | 881 virtual void FillInBMInfo(intptr_t offset, |
894 intptr_t budget, | 882 intptr_t budget, |
895 BoyerMooreLookahead* bm, | 883 BoyerMooreLookahead* bm, |
896 bool not_at_start) { | 884 bool not_at_start) { |
897 (*alternatives_)[1].node()->FillInBMInfo( | 885 (*alternatives_)[1].node()->FillInBMInfo(offset, budget - 1, bm, |
898 offset, budget - 1, bm, not_at_start); | 886 not_at_start); |
899 if (offset == 0) set_bm_info(not_at_start, bm); | 887 if (offset == 0) set_bm_info(not_at_start, bm); |
900 } | 888 } |
901 // For a negative lookahead we don't emit the quick check for the | 889 // For a negative lookahead we don't emit the quick check for the |
902 // alternative that is expected to fail. This is because quick check code | 890 // alternative that is expected to fail. This is because quick check code |
903 // starts by loading enough characters for the alternative that takes fewest | 891 // starts by loading enough characters for the alternative that takes fewest |
904 // characters, but on a negative lookahead the negative branch did not take | 892 // characters, but on a negative lookahead the negative branch did not take |
905 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 893 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
906 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 894 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
907 return !is_first; | 895 return !is_first; |
908 } | 896 } |
909 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); | 897 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); |
910 }; | 898 }; |
911 | 899 |
912 | 900 |
913 class LoopChoiceNode: public ChoiceNode { | 901 class LoopChoiceNode : public ChoiceNode { |
914 public: | 902 public: |
915 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) | 903 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) |
916 : ChoiceNode(2, zone), | 904 : ChoiceNode(2, zone), |
917 loop_node_(NULL), | 905 loop_node_(NULL), |
918 continue_node_(NULL), | 906 continue_node_(NULL), |
919 body_can_be_zero_length_(body_can_be_zero_length) { } | 907 body_can_be_zero_length_(body_can_be_zero_length) {} |
920 void AddLoopAlternative(GuardedAlternative alt); | 908 void AddLoopAlternative(GuardedAlternative alt); |
921 void AddContinueAlternative(GuardedAlternative alt); | 909 void AddContinueAlternative(GuardedAlternative alt); |
922 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 910 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
923 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 911 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 912 intptr_t budget, |
924 bool not_at_start); | 913 bool not_at_start); |
925 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 914 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
926 RegExpCompiler* compiler, | 915 RegExpCompiler* compiler, |
927 intptr_t characters_filled_in, | 916 intptr_t characters_filled_in, |
928 bool not_at_start); | 917 bool not_at_start); |
929 virtual void FillInBMInfo(intptr_t offset, | 918 virtual void FillInBMInfo(intptr_t offset, |
930 intptr_t budget, | 919 intptr_t budget, |
931 BoyerMooreLookahead* bm, | 920 BoyerMooreLookahead* bm, |
932 bool not_at_start); | 921 bool not_at_start); |
933 RegExpNode* loop_node() { return loop_node_; } | 922 RegExpNode* loop_node() { return loop_node_; } |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
990 | 979 |
991 ContainedInLattice AddRange(ContainedInLattice a, | 980 ContainedInLattice AddRange(ContainedInLattice a, |
992 const intptr_t* ranges, | 981 const intptr_t* ranges, |
993 intptr_t ranges_size, | 982 intptr_t ranges_size, |
994 Interval new_range); | 983 Interval new_range); |
995 | 984 |
996 | 985 |
997 class BoyerMoorePositionInfo : public ZoneAllocated { | 986 class BoyerMoorePositionInfo : public ZoneAllocated { |
998 public: | 987 public: |
999 explicit BoyerMoorePositionInfo(Zone* zone) | 988 explicit BoyerMoorePositionInfo(Zone* zone) |
1000 : map_(new(zone) ZoneGrowableArray<bool>(kMapSize)), | 989 : map_(new (zone) ZoneGrowableArray<bool>(kMapSize)), |
1001 map_count_(0), | 990 map_count_(0), |
1002 w_(kNotYet), | 991 w_(kNotYet), |
1003 s_(kNotYet), | 992 s_(kNotYet), |
1004 d_(kNotYet), | 993 d_(kNotYet), |
1005 surrogate_(kNotYet) { | 994 surrogate_(kNotYet) { |
1006 for (intptr_t i = 0; i < kMapSize; i++) { | 995 for (intptr_t i = 0; i < kMapSize; i++) { |
1007 map_->Add(false); | 996 map_->Add(false); |
1008 } | 997 } |
1009 } | 998 } |
1010 | 999 |
1011 bool& at(intptr_t i) { return (*map_)[i]; } | 1000 bool& at(intptr_t i) { return (*map_)[i]; } |
1012 | 1001 |
1013 static const intptr_t kMapSize = 128; | 1002 static const intptr_t kMapSize = 128; |
1014 static const intptr_t kMask = kMapSize - 1; | 1003 static const intptr_t kMask = kMapSize - 1; |
1015 | 1004 |
1016 intptr_t map_count() const { return map_count_; } | 1005 intptr_t map_count() const { return map_count_; } |
1017 | 1006 |
1018 void Set(intptr_t character); | 1007 void Set(intptr_t character); |
1019 void SetInterval(const Interval& interval); | 1008 void SetInterval(const Interval& interval); |
1020 void SetAll(); | 1009 void SetAll(); |
1021 bool is_non_word() { return w_ == kLatticeOut; } | 1010 bool is_non_word() { return w_ == kLatticeOut; } |
1022 bool is_word() { return w_ == kLatticeIn; } | 1011 bool is_word() { return w_ == kLatticeIn; } |
1023 | 1012 |
1024 private: | 1013 private: |
1025 ZoneGrowableArray<bool>* map_; | 1014 ZoneGrowableArray<bool>* map_; |
1026 intptr_t map_count_; // Number of set bits in the map. | 1015 intptr_t map_count_; // Number of set bits in the map. |
1027 ContainedInLattice w_; // The \w character class. | 1016 ContainedInLattice w_; // The \w character class. |
1028 ContainedInLattice s_; // The \s character class. | 1017 ContainedInLattice s_; // The \s character class. |
1029 ContainedInLattice d_; // The \d character class. | 1018 ContainedInLattice d_; // The \d character class. |
1030 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | 1019 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. |
1031 }; | 1020 }; |
1032 | 1021 |
1033 | 1022 |
1034 class BoyerMooreLookahead : public ZoneAllocated{ | 1023 class BoyerMooreLookahead : public ZoneAllocated { |
1035 public: | 1024 public: |
1036 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, | 1025 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, Zone* Zone); |
1037 Zone* Zone); | |
1038 | 1026 |
1039 intptr_t length() { return length_; } | 1027 intptr_t length() { return length_; } |
1040 intptr_t max_char() { return max_char_; } | 1028 intptr_t max_char() { return max_char_; } |
1041 RegExpCompiler* compiler() { return compiler_; } | 1029 RegExpCompiler* compiler() { return compiler_; } |
1042 | 1030 |
1043 intptr_t Count(intptr_t map_number) { | 1031 intptr_t Count(intptr_t map_number) { |
1044 return bitmaps_->At(map_number)->map_count(); | 1032 return bitmaps_->At(map_number)->map_count(); |
1045 } | 1033 } |
1046 | 1034 |
1047 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } | 1035 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } |
1048 | 1036 |
1049 void Set(intptr_t map_number, intptr_t character) { | 1037 void Set(intptr_t map_number, intptr_t character) { |
1050 if (character > max_char_) return; | 1038 if (character > max_char_) return; |
1051 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | 1039 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); |
1052 info->Set(character); | 1040 info->Set(character); |
1053 } | 1041 } |
1054 | 1042 |
1055 void SetInterval(intptr_t map_number, const Interval& interval) { | 1043 void SetInterval(intptr_t map_number, const Interval& interval) { |
1056 if (interval.from() > max_char_) return; | 1044 if (interval.from() > max_char_) return; |
1057 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | 1045 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); |
1058 if (interval.to() > max_char_) { | 1046 if (interval.to() > max_char_) { |
1059 info->SetInterval(Interval(interval.from(), max_char_)); | 1047 info->SetInterval(Interval(interval.from(), max_char_)); |
1060 } else { | 1048 } else { |
1061 info->SetInterval(interval); | 1049 info->SetInterval(interval); |
1062 } | 1050 } |
1063 } | 1051 } |
1064 | 1052 |
1065 void SetAll(intptr_t map_number) { | 1053 void SetAll(intptr_t map_number) { bitmaps_->At(map_number)->SetAll(); } |
1066 bitmaps_->At(map_number)->SetAll(); | |
1067 } | |
1068 | 1054 |
1069 void SetRest(intptr_t from_map) { | 1055 void SetRest(intptr_t from_map) { |
1070 for (intptr_t i = from_map; i < length_; i++) SetAll(i); | 1056 for (intptr_t i = from_map; i < length_; i++) |
| 1057 SetAll(i); |
1071 } | 1058 } |
1072 void EmitSkipInstructions(RegExpMacroAssembler* masm); | 1059 void EmitSkipInstructions(RegExpMacroAssembler* masm); |
1073 | 1060 |
1074 private: | 1061 private: |
1075 // This is the value obtained by EatsAtLeast. If we do not have at least this | 1062 // This is the value obtained by EatsAtLeast. If we do not have at least this |
1076 // many characters left in the sample string then the match is bound to fail. | 1063 // many characters left in the sample string then the match is bound to fail. |
1077 // Therefore it is OK to read a character this far ahead of the current match | 1064 // Therefore it is OK to read a character this far ahead of the current match |
1078 // point. | 1065 // point. |
1079 intptr_t length_; | 1066 intptr_t length_; |
1080 RegExpCompiler* compiler_; | 1067 RegExpCompiler* compiler_; |
1081 // 0xff for Latin1, 0xffff for UTF-16. | 1068 // 0xff for Latin1, 0xffff for UTF-16. |
1082 intptr_t max_char_; | 1069 intptr_t max_char_; |
1083 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; | 1070 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; |
1084 | 1071 |
1085 intptr_t GetSkipTable(intptr_t min_lookahead, | 1072 intptr_t GetSkipTable(intptr_t min_lookahead, |
1086 intptr_t max_lookahead, | 1073 intptr_t max_lookahead, |
1087 const TypedData& boolean_skip_table); | 1074 const TypedData& boolean_skip_table); |
1088 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to); | 1075 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to); |
1089 intptr_t FindBestInterval( | 1076 intptr_t FindBestInterval(intptr_t max_number_of_chars, |
1090 intptr_t max_number_of_chars, | 1077 intptr_t old_biggest_points, |
1091 intptr_t old_biggest_points, | 1078 intptr_t* from, |
1092 intptr_t* from, intptr_t* to); | 1079 intptr_t* to); |
1093 }; | 1080 }; |
1094 | 1081 |
1095 | 1082 |
1096 // There are many ways to generate code for a node. This class encapsulates | 1083 // There are many ways to generate code for a node. This class encapsulates |
1097 // the current way we should be generating. In other words it encapsulates | 1084 // the current way we should be generating. In other words it encapsulates |
1098 // the current state of the code generator. The effect of this is that we | 1085 // the current state of the code generator. The effect of this is that we |
1099 // generate code for paths that the matcher can take through the regular | 1086 // generate code for paths that the matcher can take through the regular |
1100 // expression. A given node in the regexp can be code-generated several times | 1087 // expression. A given node in the regexp can be code-generated several times |
1101 // as it can be part of several traces. For example for the regexp: | 1088 // as it can be part of several traces. For example for the regexp: |
1102 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1089 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
1103 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1090 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
1104 // to match foo is generated only once (the traces have a common prefix). The | 1091 // to match foo is generated only once (the traces have a common prefix). The |
1105 // code to store the capture is deferred and generated (twice) after the places | 1092 // code to store the capture is deferred and generated (twice) after the places |
1106 // where baz has been matched. | 1093 // where baz has been matched. |
1107 class Trace { | 1094 class Trace { |
1108 public: | 1095 public: |
1109 // A value for a property that is either known to be true, know to be false, | 1096 // A value for a property that is either known to be true, know to be false, |
1110 // or not known. | 1097 // or not known. |
1111 enum TriBool { | 1098 enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 }; |
1112 UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 | |
1113 }; | |
1114 | 1099 |
1115 class DeferredAction { | 1100 class DeferredAction { |
1116 public: | 1101 public: |
1117 DeferredAction(ActionNode::ActionType action_type, intptr_t reg) | 1102 DeferredAction(ActionNode::ActionType action_type, intptr_t reg) |
1118 : action_type_(action_type), reg_(reg), next_(NULL) { } | 1103 : action_type_(action_type), reg_(reg), next_(NULL) {} |
1119 DeferredAction* next() { return next_; } | 1104 DeferredAction* next() { return next_; } |
1120 bool Mentions(intptr_t reg); | 1105 bool Mentions(intptr_t reg); |
1121 intptr_t reg() { return reg_; } | 1106 intptr_t reg() { return reg_; } |
1122 ActionNode::ActionType action_type() { return action_type_; } | 1107 ActionNode::ActionType action_type() { return action_type_; } |
| 1108 |
1123 private: | 1109 private: |
1124 ActionNode::ActionType action_type_; | 1110 ActionNode::ActionType action_type_; |
1125 intptr_t reg_; | 1111 intptr_t reg_; |
1126 DeferredAction* next_; | 1112 DeferredAction* next_; |
1127 friend class Trace; | 1113 friend class Trace; |
1128 | 1114 |
1129 DISALLOW_ALLOCATION(); | 1115 DISALLOW_ALLOCATION(); |
1130 }; | 1116 }; |
1131 | 1117 |
1132 class DeferredCapture : public DeferredAction { | 1118 class DeferredCapture : public DeferredAction { |
1133 public: | 1119 public: |
1134 DeferredCapture(intptr_t reg, bool is_capture, Trace* trace) | 1120 DeferredCapture(intptr_t reg, bool is_capture, Trace* trace) |
1135 : DeferredAction(ActionNode::STORE_POSITION, reg), | 1121 : DeferredAction(ActionNode::STORE_POSITION, reg), |
1136 cp_offset_(trace->cp_offset()), | 1122 cp_offset_(trace->cp_offset()), |
1137 is_capture_(is_capture) { } | 1123 is_capture_(is_capture) {} |
1138 intptr_t cp_offset() { return cp_offset_; } | 1124 intptr_t cp_offset() { return cp_offset_; } |
1139 bool is_capture() { return is_capture_; } | 1125 bool is_capture() { return is_capture_; } |
| 1126 |
1140 private: | 1127 private: |
1141 intptr_t cp_offset_; | 1128 intptr_t cp_offset_; |
1142 bool is_capture_; | 1129 bool is_capture_; |
1143 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } | 1130 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } |
1144 }; | 1131 }; |
1145 | 1132 |
1146 class DeferredSetRegister : public DeferredAction { | 1133 class DeferredSetRegister : public DeferredAction { |
1147 public: | 1134 public: |
1148 DeferredSetRegister(intptr_t reg, intptr_t value) | 1135 DeferredSetRegister(intptr_t reg, intptr_t value) |
1149 : DeferredAction(ActionNode::SET_REGISTER, reg), | 1136 : DeferredAction(ActionNode::SET_REGISTER, reg), value_(value) {} |
1150 value_(value) { } | |
1151 intptr_t value() { return value_; } | 1137 intptr_t value() { return value_; } |
| 1138 |
1152 private: | 1139 private: |
1153 intptr_t value_; | 1140 intptr_t value_; |
1154 }; | 1141 }; |
1155 | 1142 |
1156 class DeferredClearCaptures : public DeferredAction { | 1143 class DeferredClearCaptures : public DeferredAction { |
1157 public: | 1144 public: |
1158 explicit DeferredClearCaptures(Interval range) | 1145 explicit DeferredClearCaptures(Interval range) |
1159 : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), | 1146 : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {} |
1160 range_(range) { } | |
1161 Interval range() { return range_; } | 1147 Interval range() { return range_; } |
| 1148 |
1162 private: | 1149 private: |
1163 Interval range_; | 1150 Interval range_; |
1164 }; | 1151 }; |
1165 | 1152 |
1166 class DeferredIncrementRegister : public DeferredAction { | 1153 class DeferredIncrementRegister : public DeferredAction { |
1167 public: | 1154 public: |
1168 explicit DeferredIncrementRegister(intptr_t reg) | 1155 explicit DeferredIncrementRegister(intptr_t reg) |
1169 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } | 1156 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {} |
1170 }; | 1157 }; |
1171 | 1158 |
1172 Trace() | 1159 Trace() |
1173 : cp_offset_(0), | 1160 : cp_offset_(0), |
1174 actions_(NULL), | 1161 actions_(NULL), |
1175 backtrack_(NULL), | 1162 backtrack_(NULL), |
1176 stop_node_(NULL), | 1163 stop_node_(NULL), |
1177 loop_label_(NULL), | 1164 loop_label_(NULL), |
1178 characters_preloaded_(0), | 1165 characters_preloaded_(0), |
1179 bound_checked_up_to_(0), | 1166 bound_checked_up_to_(0), |
1180 flush_budget_(100), | 1167 flush_budget_(100), |
1181 at_start_(UNKNOWN) { } | 1168 at_start_(UNKNOWN) {} |
1182 | 1169 |
1183 // End the trace. This involves flushing the deferred actions in the trace | 1170 // End the trace. This involves flushing the deferred actions in the trace |
1184 // and pushing a backtrack location onto the backtrack stack. Once this is | 1171 // and pushing a backtrack location onto the backtrack stack. Once this is |
1185 // done we can start a new trace or go to one that has already been | 1172 // done we can start a new trace or go to one that has already been |
1186 // generated. | 1173 // generated. |
1187 void Flush(RegExpCompiler* compiler, RegExpNode* successor); | 1174 void Flush(RegExpCompiler* compiler, RegExpNode* successor); |
1188 intptr_t cp_offset() { return cp_offset_; } | 1175 intptr_t cp_offset() { return cp_offset_; } |
1189 DeferredAction* actions() { return actions_; } | 1176 DeferredAction* actions() { return actions_; } |
1190 // A trivial trace is one that has no deferred actions or other state that | 1177 // A trivial trace is one that has no deferred actions or other state that |
1191 // affects the assumptions used when generating code. There is no recorded | 1178 // affects the assumptions used when generating code. There is no recorded |
1192 // backtrack location in a trivial trace, so with a trivial trace we will | 1179 // backtrack location in a trivial trace, so with a trivial trace we will |
1193 // generate code that, on a failure to match, gets the backtrack location | 1180 // generate code that, on a failure to match, gets the backtrack location |
1194 // from the backtrack stack rather than using a direct jump instruction. We | 1181 // from the backtrack stack rather than using a direct jump instruction. We |
1195 // always start code generation with a trivial trace and non-trivial traces | 1182 // always start code generation with a trivial trace and non-trivial traces |
1196 // are created as we emit code for nodes or add to the list of deferred | 1183 // are created as we emit code for nodes or add to the list of deferred |
1197 // actions in the trace. The location of the code generated for a node using | 1184 // actions in the trace. The location of the code generated for a node using |
1198 // a trivial trace is recorded in a label in the node so that gotos can be | 1185 // a trivial trace is recorded in a label in the node so that gotos can be |
1199 // generated to that code. | 1186 // generated to that code. |
1200 bool is_trivial() { | 1187 bool is_trivial() { |
1201 return backtrack_ == NULL && | 1188 return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0 && |
1202 actions_ == NULL && | 1189 characters_preloaded_ == 0 && bound_checked_up_to_ == 0 && |
1203 cp_offset_ == 0 && | 1190 quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN; |
1204 characters_preloaded_ == 0 && | |
1205 bound_checked_up_to_ == 0 && | |
1206 quick_check_performed_.characters() == 0 && | |
1207 at_start_ == UNKNOWN; | |
1208 } | 1191 } |
1209 TriBool at_start() { return at_start_; } | 1192 TriBool at_start() { return at_start_; } |
1210 void set_at_start(bool at_start) { | 1193 void set_at_start(bool at_start) { |
1211 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; | 1194 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; |
1212 } | 1195 } |
1213 BlockLabel* backtrack() { return backtrack_; } | 1196 BlockLabel* backtrack() { return backtrack_; } |
1214 BlockLabel* loop_label() { return loop_label_; } | 1197 BlockLabel* loop_label() { return loop_label_; } |
1215 RegExpNode* stop_node() { return stop_node_; } | 1198 RegExpNode* stop_node() { return stop_node_; } |
1216 intptr_t characters_preloaded() { return characters_preloaded_; } | 1199 intptr_t characters_preloaded() { return characters_preloaded_; } |
1217 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } | 1200 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1282 Trace counter_backtrack_trace_; | 1265 Trace counter_backtrack_trace_; |
1283 }; | 1266 }; |
1284 | 1267 |
1285 | 1268 |
1286 struct PreloadState { | 1269 struct PreloadState { |
1287 static const intptr_t kEatsAtLeastNotYetInitialized = -1; | 1270 static const intptr_t kEatsAtLeastNotYetInitialized = -1; |
1288 bool preload_is_current_; | 1271 bool preload_is_current_; |
1289 bool preload_has_checked_bounds_; | 1272 bool preload_has_checked_bounds_; |
1290 intptr_t preload_characters_; | 1273 intptr_t preload_characters_; |
1291 intptr_t eats_at_least_; | 1274 intptr_t eats_at_least_; |
1292 void init() { | 1275 void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; } |
1293 eats_at_least_ = kEatsAtLeastNotYetInitialized; | |
1294 } | |
1295 | 1276 |
1296 DISALLOW_ALLOCATION(); | 1277 DISALLOW_ALLOCATION(); |
1297 }; | 1278 }; |
1298 | 1279 |
1299 | 1280 |
1300 class NodeVisitor : public ValueObject { | 1281 class NodeVisitor : public ValueObject { |
1301 public: | 1282 public: |
1302 virtual ~NodeVisitor() { } | 1283 virtual ~NodeVisitor() {} |
1303 #define DECLARE_VISIT(Type) \ | 1284 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; |
1304 virtual void Visit##Type(Type##Node* that) = 0; | 1285 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
1305 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
1306 #undef DECLARE_VISIT | 1286 #undef DECLARE_VISIT |
1307 virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } | 1287 virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } |
1308 }; | 1288 }; |
1309 | 1289 |
1310 | 1290 |
1311 // Assertion propagation moves information about assertions such as | 1291 // Assertion propagation moves information about assertions such as |
1312 // \b to the affected nodes. For instance, in /.\b./ information must | 1292 // \b to the affected nodes. For instance, in /.\b./ information must |
1313 // be propagated to the first '.' that whatever follows needs to know | 1293 // be propagated to the first '.' that whatever follows needs to know |
1314 // if it matched a word or a non-word, and to the second '.' that it | 1294 // if it matched a word or a non-word, and to the second '.' that it |
1315 // has to check if it succeeds a word or non-word. In this case the | 1295 // has to check if it succeeds a word or non-word. In this case the |
1316 // result will be something like: | 1296 // result will be something like: |
1317 // | 1297 // |
1318 // +-------+ +------------+ | 1298 // +-------+ +------------+ |
1319 // | . | | . | | 1299 // | . | | . | |
1320 // +-------+ ---> +------------+ | 1300 // +-------+ ---> +------------+ |
1321 // | word? | | check word | | 1301 // | word? | | check word | |
1322 // +-------+ +------------+ | 1302 // +-------+ +------------+ |
1323 class Analysis: public NodeVisitor { | 1303 class Analysis : public NodeVisitor { |
1324 public: | 1304 public: |
1325 Analysis(bool ignore_case, bool is_one_byte) | 1305 Analysis(bool ignore_case, bool is_one_byte) |
1326 : ignore_case_(ignore_case), | 1306 : ignore_case_(ignore_case), |
1327 is_one_byte_(is_one_byte), | 1307 is_one_byte_(is_one_byte), |
1328 error_message_(NULL) { } | 1308 error_message_(NULL) {} |
1329 void EnsureAnalyzed(RegExpNode* node); | 1309 void EnsureAnalyzed(RegExpNode* node); |
1330 | 1310 |
1331 #define DECLARE_VISIT(Type) \ | 1311 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
1332 virtual void Visit##Type(Type##Node* that); | 1312 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
1333 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
1334 #undef DECLARE_VISIT | 1313 #undef DECLARE_VISIT |
1335 virtual void VisitLoopChoice(LoopChoiceNode* that); | 1314 virtual void VisitLoopChoice(LoopChoiceNode* that); |
1336 | 1315 |
1337 bool has_failed() { return error_message_ != NULL; } | 1316 bool has_failed() { return error_message_ != NULL; } |
1338 const char* error_message() { | 1317 const char* error_message() { |
1339 ASSERT(error_message_ != NULL); | 1318 ASSERT(error_message_ != NULL); |
1340 return error_message_; | 1319 return error_message_; |
1341 } | 1320 } |
1342 void fail(const char* error_message) { | 1321 void fail(const char* error_message) { error_message_ = error_message; } |
1343 error_message_ = error_message; | |
1344 } | |
1345 | 1322 |
1346 private: | 1323 private: |
1347 bool ignore_case_; | 1324 bool ignore_case_; |
1348 bool is_one_byte_; | 1325 bool is_one_byte_; |
1349 const char* error_message_; | 1326 const char* error_message_; |
1350 | 1327 |
1351 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1328 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
1352 }; | 1329 }; |
1353 | 1330 |
1354 | 1331 |
1355 struct RegExpCompileData : public ZoneAllocated { | 1332 struct RegExpCompileData : public ZoneAllocated { |
1356 RegExpCompileData() | 1333 RegExpCompileData() |
1357 : tree(NULL), | 1334 : tree(NULL), |
1358 node(NULL), | 1335 node(NULL), |
1359 simple(true), | 1336 simple(true), |
1360 contains_anchor(false), | 1337 contains_anchor(false), |
1361 error(String::Handle(String::null())), | 1338 error(String::Handle(String::null())), |
1362 capture_count(0) { } | 1339 capture_count(0) {} |
1363 RegExpTree* tree; | 1340 RegExpTree* tree; |
1364 RegExpNode* node; | 1341 RegExpNode* node; |
1365 bool simple; | 1342 bool simple; |
1366 bool contains_anchor; | 1343 bool contains_anchor; |
1367 String& error; | 1344 String& error; |
1368 intptr_t capture_count; | 1345 intptr_t capture_count; |
1369 }; | 1346 }; |
1370 | 1347 |
1371 | 1348 |
1372 class RegExpEngine: public AllStatic { | 1349 class RegExpEngine : public AllStatic { |
1373 public: | 1350 public: |
1374 struct CompilationResult { | 1351 struct CompilationResult { |
1375 explicit CompilationResult(const char* error_message) | 1352 explicit CompilationResult(const char* error_message) |
1376 : backtrack_goto(NULL), | 1353 : backtrack_goto(NULL), |
1377 graph_entry(NULL), | 1354 graph_entry(NULL), |
1378 num_blocks(-1), | 1355 num_blocks(-1), |
1379 num_stack_locals(-1), | 1356 num_stack_locals(-1), |
1380 error_message(error_message), | 1357 error_message(error_message), |
1381 bytecode(NULL), | 1358 bytecode(NULL), |
1382 num_registers(-1) {} | 1359 num_registers(-1) {} |
1383 | 1360 |
1384 CompilationResult(TypedData* bytecode, intptr_t num_registers) | 1361 CompilationResult(TypedData* bytecode, intptr_t num_registers) |
1385 : backtrack_goto(NULL), | 1362 : backtrack_goto(NULL), |
1386 graph_entry(NULL), | 1363 graph_entry(NULL), |
1387 num_blocks(-1), | 1364 num_blocks(-1), |
1388 num_stack_locals(-1), | 1365 num_stack_locals(-1), |
1389 error_message(NULL), | 1366 error_message(NULL), |
1390 bytecode(bytecode), | 1367 bytecode(bytecode), |
1391 num_registers(num_registers) {} | 1368 num_registers(num_registers) {} |
1392 | 1369 |
1393 CompilationResult(IndirectGotoInstr* backtrack_goto, | 1370 CompilationResult(IndirectGotoInstr* backtrack_goto, |
1394 GraphEntryInstr* graph_entry, | 1371 GraphEntryInstr* graph_entry, |
1395 intptr_t num_blocks, | 1372 intptr_t num_blocks, |
1396 intptr_t num_stack_locals, | 1373 intptr_t num_stack_locals, |
1397 intptr_t num_registers) | 1374 intptr_t num_registers) |
1398 : backtrack_goto(backtrack_goto), | 1375 : backtrack_goto(backtrack_goto), |
1399 graph_entry(graph_entry), | 1376 graph_entry(graph_entry), |
1400 num_blocks(num_blocks), | 1377 num_blocks(num_blocks), |
1401 num_stack_locals(num_stack_locals), | 1378 num_stack_locals(num_stack_locals), |
1402 error_message(NULL), | 1379 error_message(NULL), |
1403 bytecode(NULL) {} | 1380 bytecode(NULL) {} |
1404 | 1381 |
1405 IndirectGotoInstr* backtrack_goto; | 1382 IndirectGotoInstr* backtrack_goto; |
1406 GraphEntryInstr* graph_entry; | 1383 GraphEntryInstr* graph_entry; |
1407 const intptr_t num_blocks; | 1384 const intptr_t num_blocks; |
1408 const intptr_t num_stack_locals; | 1385 const intptr_t num_stack_locals; |
1409 | 1386 |
1410 const char* error_message; | 1387 const char* error_message; |
1411 | 1388 |
1412 TypedData* bytecode; | 1389 TypedData* bytecode; |
1413 intptr_t num_registers; | 1390 intptr_t num_registers; |
1414 }; | 1391 }; |
1415 | 1392 |
1416 static CompilationResult CompileIR( | 1393 static CompilationResult CompileIR( |
1417 RegExpCompileData* input, | 1394 RegExpCompileData* input, |
1418 const ParsedFunction* parsed_function, | 1395 const ParsedFunction* parsed_function, |
1419 const ZoneGrowableArray<const ICData*>& ic_data_array); | 1396 const ZoneGrowableArray<const ICData*>& ic_data_array); |
1420 | 1397 |
1421 static CompilationResult CompileBytecode( | 1398 static CompilationResult CompileBytecode(RegExpCompileData* data, |
1422 RegExpCompileData* data, | 1399 const RegExp& regexp, |
1423 const RegExp& regexp, | 1400 bool is_one_byte, |
1424 bool is_one_byte, | 1401 Zone* zone); |
1425 Zone* zone); | |
1426 | 1402 |
1427 static RawRegExp* CreateRegExp( | 1403 static RawRegExp* CreateRegExp(Thread* thread, |
1428 Thread* thread, | 1404 const String& pattern, |
1429 const String& pattern, | 1405 bool multi_line, |
1430 bool multi_line, | 1406 bool ignore_case); |
1431 bool ignore_case); | |
1432 | 1407 |
1433 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1408 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
1434 }; | 1409 }; |
1435 | 1410 |
1436 } // namespace dart | 1411 } // namespace dart |
1437 | 1412 |
1438 #endif // RUNTIME_VM_REGEXP_H_ | 1413 #endif // RUNTIME_VM_REGEXP_H_ |
OLD | NEW |