| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef RUNTIME_VM_REGEXP_H_ | 5 #ifndef RUNTIME_VM_REGEXP_H_ |
| 6 #define RUNTIME_VM_REGEXP_H_ | 6 #define RUNTIME_VM_REGEXP_H_ |
| 7 | 7 |
| 8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
| 9 #include "vm/intermediate_language.h" | 9 #include "vm/intermediate_language.h" |
| 10 #include "vm/flow_graph_compiler.h" | 10 #include "vm/flow_graph_compiler.h" |
| 11 #include "vm/object.h" | 11 #include "vm/object.h" |
| 12 #include "vm/regexp_assembler.h" | 12 #include "vm/regexp_assembler.h" |
| 13 | 13 |
| 14 namespace dart { | 14 namespace dart { |
| 15 | 15 |
| 16 class NodeVisitor; | 16 class NodeVisitor; |
| 17 class RegExpCompiler; | 17 class RegExpCompiler; |
| 18 class RegExpMacroAssembler; | 18 class RegExpMacroAssembler; |
| 19 class RegExpNode; | 19 class RegExpNode; |
| 20 class RegExpTree; | 20 class RegExpTree; |
| 21 class BoyerMooreLookahead; | 21 class BoyerMooreLookahead; |
| 22 | 22 |
| 23 | 23 |
| 24 // Represents code units in the range from from_ to to_, both ends are | 24 // Represents code units in the range from from_ to to_, both ends are |
| 25 // inclusive. | 25 // inclusive. |
| 26 class CharacterRange { | 26 class CharacterRange { |
| 27 public: | 27 public: |
| 28 CharacterRange() : from_(0), to_(0) { } | 28 CharacterRange() : from_(0), to_(0) {} |
| 29 CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) { } | 29 CharacterRange(uint16_t from, uint16_t to) : from_(from), to_(to) {} |
| 30 | 30 |
| 31 static void AddClassEscape(uint16_t type, | 31 static void AddClassEscape(uint16_t type, |
| 32 ZoneGrowableArray<CharacterRange>* ranges); | 32 ZoneGrowableArray<CharacterRange>* ranges); |
| 33 static GrowableArray<const intptr_t> GetWordBounds(); | 33 static GrowableArray<const intptr_t> GetWordBounds(); |
| 34 static inline CharacterRange Singleton(uint16_t value) { | 34 static inline CharacterRange Singleton(uint16_t value) { |
| 35 return CharacterRange(value, value); | 35 return CharacterRange(value, value); |
| 36 } | 36 } |
| 37 static inline CharacterRange Range(uint16_t from, uint16_t to) { | 37 static inline CharacterRange Range(uint16_t from, uint16_t to) { |
| 38 ASSERT(from <= to); | 38 ASSERT(from <= to); |
| 39 return CharacterRange(from, to); | 39 return CharacterRange(from, to); |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 74 private: | 74 private: |
| 75 uint16_t from_; | 75 uint16_t from_; |
| 76 uint16_t to_; | 76 uint16_t to_; |
| 77 | 77 |
| 78 DISALLOW_ALLOCATION(); | 78 DISALLOW_ALLOCATION(); |
| 79 }; | 79 }; |
| 80 | 80 |
| 81 | 81 |
| 82 // A set of unsigned integers that behaves especially well on small | 82 // A set of unsigned integers that behaves especially well on small |
| 83 // integers (< 32). May do zone-allocation. | 83 // integers (< 32). May do zone-allocation. |
| 84 class OutSet: public ZoneAllocated { | 84 class OutSet : public ZoneAllocated { |
| 85 public: | 85 public: |
| 86 OutSet() : first_(0), remaining_(NULL), successors_(NULL) { } | 86 OutSet() : first_(0), remaining_(NULL), successors_(NULL) {} |
| 87 OutSet* Extend(unsigned value, Zone* zone); | 87 OutSet* Extend(unsigned value, Zone* zone); |
| 88 bool Get(unsigned value) const; | 88 bool Get(unsigned value) const; |
| 89 static const unsigned kFirstLimit = 32; | 89 static const unsigned kFirstLimit = 32; |
| 90 | 90 |
| 91 private: | 91 private: |
| 92 // Destructively set a value in this set. In most cases you want | 92 // Destructively set a value in this set. In most cases you want |
| 93 // to use Extend instead to ensure that only one instance exists | 93 // to use Extend instead to ensure that only one instance exists |
| 94 // that contains the same values. | 94 // that contains the same values. |
| 95 void Set(unsigned value, Zone* zone); | 95 void Set(unsigned value, Zone* zone); |
| 96 | 96 |
| 97 // The successors are a list of sets that contain the same values | 97 // The successors are a list of sets that contain the same values |
| 98 // as this set and the one more value that is not present in this | 98 // as this set and the one more value that is not present in this |
| 99 // set. | 99 // set. |
| 100 ZoneGrowableArray<OutSet*>* successors() { return successors_; } | 100 ZoneGrowableArray<OutSet*>* successors() { return successors_; } |
| 101 | 101 |
| 102 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) | 102 OutSet(uint32_t first, ZoneGrowableArray<unsigned>* remaining) |
| 103 : first_(first), remaining_(remaining), successors_(NULL) { } | 103 : first_(first), remaining_(remaining), successors_(NULL) {} |
| 104 uint32_t first_; | 104 uint32_t first_; |
| 105 ZoneGrowableArray<unsigned>* remaining_; | 105 ZoneGrowableArray<unsigned>* remaining_; |
| 106 ZoneGrowableArray<OutSet*>* successors_; | 106 ZoneGrowableArray<OutSet*>* successors_; |
| 107 friend class Trace; | 107 friend class Trace; |
| 108 }; | 108 }; |
| 109 | 109 |
| 110 | 110 |
| 111 #define FOR_EACH_NODE_TYPE(VISIT) \ | 111 #define FOR_EACH_NODE_TYPE(VISIT) \ |
| 112 VISIT(End) \ | 112 VISIT(End) \ |
| 113 VISIT(Action) \ | 113 VISIT(Action) \ |
| 114 VISIT(Choice) \ | 114 VISIT(Choice) \ |
| 115 VISIT(BackReference) \ | 115 VISIT(BackReference) \ |
| 116 VISIT(Assertion) \ | 116 VISIT(Assertion) \ |
| 117 VISIT(Text) | 117 VISIT(Text) |
| 118 | 118 |
| 119 | 119 |
| 120 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ | 120 #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ |
| 121 VISIT(Disjunction) \ | 121 VISIT(Disjunction) \ |
| 122 VISIT(Alternative) \ | 122 VISIT(Alternative) \ |
| 123 VISIT(Assertion) \ | 123 VISIT(Assertion) \ |
| 124 VISIT(CharacterClass) \ | 124 VISIT(CharacterClass) \ |
| 125 VISIT(Atom) \ | 125 VISIT(Atom) \ |
| 126 VISIT(Quantifier) \ | 126 VISIT(Quantifier) \ |
| 127 VISIT(Capture) \ | 127 VISIT(Capture) \ |
| 128 VISIT(Lookahead) \ | 128 VISIT(Lookahead) \ |
| 129 VISIT(BackReference) \ | 129 VISIT(BackReference) \ |
| 130 VISIT(Empty) \ | 130 VISIT(Empty) \ |
| 131 VISIT(Text) | 131 VISIT(Text) |
| 132 | 132 |
| 133 | 133 |
| 134 #define FORWARD_DECLARE(Name) class RegExp##Name; | 134 #define FORWARD_DECLARE(Name) class RegExp##Name; |
| 135 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) | 135 FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) |
| 136 #undef FORWARD_DECLARE | 136 #undef FORWARD_DECLARE |
| 137 | 137 |
| 138 | 138 |
| 139 class TextElement { | 139 class TextElement { |
| 140 public: | 140 public: |
| 141 enum TextType { | 141 enum TextType { ATOM, CHAR_CLASS }; |
| 142 ATOM, | |
| 143 CHAR_CLASS | |
| 144 }; | |
| 145 | 142 |
| 146 static TextElement Atom(RegExpAtom* atom); | 143 static TextElement Atom(RegExpAtom* atom); |
| 147 static TextElement CharClass(RegExpCharacterClass* char_class); | 144 static TextElement CharClass(RegExpCharacterClass* char_class); |
| 148 | 145 |
| 149 intptr_t cp_offset() const { return cp_offset_; } | 146 intptr_t cp_offset() const { return cp_offset_; } |
| 150 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } | 147 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } |
| 151 intptr_t length() const; | 148 intptr_t length() const; |
| 152 | 149 |
| 153 TextType text_type() const { return text_type_; } | 150 TextType text_type() const { return text_type_; } |
| 154 | 151 |
| (...skipping 28 matching lines...) Expand all Loading... |
| 183 | 180 |
| 184 struct NodeInfo { | 181 struct NodeInfo { |
| 185 NodeInfo() | 182 NodeInfo() |
| 186 : being_analyzed(false), | 183 : being_analyzed(false), |
| 187 been_analyzed(false), | 184 been_analyzed(false), |
| 188 follows_word_interest(false), | 185 follows_word_interest(false), |
| 189 follows_newline_interest(false), | 186 follows_newline_interest(false), |
| 190 follows_start_interest(false), | 187 follows_start_interest(false), |
| 191 at_end(false), | 188 at_end(false), |
| 192 visited(false), | 189 visited(false), |
| 193 replacement_calculated(false) { } | 190 replacement_calculated(false) {} |
| 194 | 191 |
| 195 // Returns true if the interests and assumptions of this node | 192 // Returns true if the interests and assumptions of this node |
| 196 // matches the given one. | 193 // matches the given one. |
| 197 bool Matches(NodeInfo* that) { | 194 bool Matches(NodeInfo* that) { |
| 198 return (at_end == that->at_end) && | 195 return (at_end == that->at_end) && |
| 199 (follows_word_interest == that->follows_word_interest) && | 196 (follows_word_interest == that->follows_word_interest) && |
| 200 (follows_newline_interest == that->follows_newline_interest) && | 197 (follows_newline_interest == that->follows_newline_interest) && |
| 201 (follows_start_interest == that->follows_start_interest); | 198 (follows_start_interest == that->follows_start_interest); |
| 202 } | 199 } |
| 203 | 200 |
| 204 // Updates the interests of this node given the interests of the | 201 // Updates the interests of this node given the interests of the |
| 205 // node preceding it. | 202 // node preceding it. |
| 206 void AddFromPreceding(NodeInfo* that) { | 203 void AddFromPreceding(NodeInfo* that) { |
| 207 at_end |= that->at_end; | 204 at_end |= that->at_end; |
| 208 follows_word_interest |= that->follows_word_interest; | 205 follows_word_interest |= that->follows_word_interest; |
| 209 follows_newline_interest |= that->follows_newline_interest; | 206 follows_newline_interest |= that->follows_newline_interest; |
| 210 follows_start_interest |= that->follows_start_interest; | 207 follows_start_interest |= that->follows_start_interest; |
| 211 } | 208 } |
| 212 | 209 |
| 213 bool HasLookbehind() { | 210 bool HasLookbehind() { |
| 214 return follows_word_interest || | 211 return follows_word_interest || follows_newline_interest || |
| 215 follows_newline_interest || | |
| 216 follows_start_interest; | 212 follows_start_interest; |
| 217 } | 213 } |
| 218 | 214 |
| 219 // Sets the interests of this node to include the interests of the | 215 // Sets the interests of this node to include the interests of the |
| 220 // following node. | 216 // following node. |
| 221 void AddFromFollowing(NodeInfo* that) { | 217 void AddFromFollowing(NodeInfo* that) { |
| 222 follows_word_interest |= that->follows_word_interest; | 218 follows_word_interest |= that->follows_word_interest; |
| 223 follows_newline_interest |= that->follows_newline_interest; | 219 follows_newline_interest |= that->follows_newline_interest; |
| 224 follows_start_interest |= that->follows_start_interest; | 220 follows_start_interest |= that->follows_start_interest; |
| 225 } | 221 } |
| 226 | 222 |
| 227 void ResetCompilationState() { | 223 void ResetCompilationState() { |
| 228 being_analyzed = false; | 224 being_analyzed = false; |
| 229 been_analyzed = false; | 225 been_analyzed = false; |
| 230 } | 226 } |
| 231 | 227 |
| 232 bool being_analyzed: 1; | 228 bool being_analyzed : 1; |
| 233 bool been_analyzed: 1; | 229 bool been_analyzed : 1; |
| 234 | 230 |
| 235 // These bits are set of this node has to know what the preceding | 231 // These bits are set of this node has to know what the preceding |
| 236 // character was. | 232 // character was. |
| 237 bool follows_word_interest: 1; | 233 bool follows_word_interest : 1; |
| 238 bool follows_newline_interest: 1; | 234 bool follows_newline_interest : 1; |
| 239 bool follows_start_interest: 1; | 235 bool follows_start_interest : 1; |
| 240 | 236 |
| 241 bool at_end: 1; | 237 bool at_end : 1; |
| 242 bool visited: 1; | 238 bool visited : 1; |
| 243 bool replacement_calculated: 1; | 239 bool replacement_calculated : 1; |
| 244 }; | 240 }; |
| 245 | 241 |
| 246 | 242 |
| 247 // Details of a quick mask-compare check that can look ahead in the | 243 // Details of a quick mask-compare check that can look ahead in the |
| 248 // input stream. | 244 // input stream. |
| 249 class QuickCheckDetails { | 245 class QuickCheckDetails { |
| 250 public: | 246 public: |
| 251 QuickCheckDetails() | 247 QuickCheckDetails() |
| 252 : characters_(0), | 248 : characters_(0), mask_(0), value_(0), cannot_match_(false) {} |
| 253 mask_(0), | |
| 254 value_(0), | |
| 255 cannot_match_(false) { } | |
| 256 explicit QuickCheckDetails(intptr_t characters) | 249 explicit QuickCheckDetails(intptr_t characters) |
| 257 : characters_(characters), | 250 : characters_(characters), mask_(0), value_(0), cannot_match_(false) {} |
| 258 mask_(0), | |
| 259 value_(0), | |
| 260 cannot_match_(false) { } | |
| 261 bool Rationalize(bool one_byte); | 251 bool Rationalize(bool one_byte); |
| 262 // Merge in the information from another branch of an alternation. | 252 // Merge in the information from another branch of an alternation. |
| 263 void Merge(QuickCheckDetails* other, intptr_t from_index); | 253 void Merge(QuickCheckDetails* other, intptr_t from_index); |
| 264 // Advance the current position by some amount. | 254 // Advance the current position by some amount. |
| 265 void Advance(intptr_t by, bool one_byte); | 255 void Advance(intptr_t by, bool one_byte); |
| 266 void Clear(); | 256 void Clear(); |
| 267 bool cannot_match() { return cannot_match_; } | 257 bool cannot_match() { return cannot_match_; } |
| 268 void set_cannot_match() { cannot_match_ = true; } | 258 void set_cannot_match() { cannot_match_ = true; } |
| 269 struct Position { | 259 struct Position { |
| 270 Position() : mask(0), value(0), determines_perfectly(false) { } | 260 Position() : mask(0), value(0), determines_perfectly(false) {} |
| 271 uint16_t mask; | 261 uint16_t mask; |
| 272 uint16_t value; | 262 uint16_t value; |
| 273 bool determines_perfectly; | 263 bool determines_perfectly; |
| 274 }; | 264 }; |
| 275 intptr_t characters() { return characters_; } | 265 intptr_t characters() { return characters_; } |
| 276 void set_characters(intptr_t characters) { characters_ = characters; } | 266 void set_characters(intptr_t characters) { characters_ = characters; } |
| 277 Position* positions(intptr_t index) { | 267 Position* positions(intptr_t index) { |
| 278 ASSERT(index >= 0); | 268 ASSERT(index >= 0); |
| 279 ASSERT(index < characters_); | 269 ASSERT(index < characters_); |
| 280 return positions_ + index; | 270 return positions_ + index; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 291 uint32_t mask_; | 281 uint32_t mask_; |
| 292 uint32_t value_; | 282 uint32_t value_; |
| 293 // If set to true, there is no way this quick check can match at all. | 283 // If set to true, there is no way this quick check can match at all. |
| 294 // E.g., if it requires to be at the start of the input, and isn't. | 284 // E.g., if it requires to be at the start of the input, and isn't. |
| 295 bool cannot_match_; | 285 bool cannot_match_; |
| 296 | 286 |
| 297 DISALLOW_ALLOCATION(); | 287 DISALLOW_ALLOCATION(); |
| 298 }; | 288 }; |
| 299 | 289 |
| 300 | 290 |
| 301 class RegExpNode: public ZoneAllocated { | 291 class RegExpNode : public ZoneAllocated { |
| 302 public: | 292 public: |
| 303 explicit RegExpNode(Zone* zone) | 293 explicit RegExpNode(Zone* zone) |
| 304 : replacement_(NULL), trace_count_(0), zone_(zone) { | 294 : replacement_(NULL), trace_count_(0), zone_(zone) { |
| 305 bm_info_[0] = bm_info_[1] = NULL; | 295 bm_info_[0] = bm_info_[1] = NULL; |
| 306 } | 296 } |
| 307 virtual ~RegExpNode(); | 297 virtual ~RegExpNode(); |
| 308 virtual void Accept(NodeVisitor* visitor) = 0; | 298 virtual void Accept(NodeVisitor* visitor) = 0; |
| 309 // Generates a goto to this node or actually generates the code at this point. | 299 // Generates a goto to this node or actually generates the code at this point. |
| 310 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; | 300 virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0; |
| 311 // How many characters must this node consume at a minimum in order to | 301 // How many characters must this node consume at a minimum in order to |
| 312 // succeed. If we have found at least 'still_to_find' characters that | 302 // succeed. If we have found at least 'still_to_find' characters that |
| 313 // must be consumed there is no need to ask any following nodes whether | 303 // must be consumed there is no need to ask any following nodes whether |
| 314 // they are sure to eat any more characters. The not_at_start argument is | 304 // they are sure to eat any more characters. The not_at_start argument is |
| 315 // used to indicate that we know we are not at the start of the input. In | 305 // used to indicate that we know we are not at the start of the input. In |
| 316 // this case anchored branches will always fail and can be ignored when | 306 // this case anchored branches will always fail and can be ignored when |
| 317 // determining how many characters are consumed on success. | 307 // determining how many characters are consumed on success. |
| 318 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 308 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 309 intptr_t budget, |
| 319 bool not_at_start) = 0; | 310 bool not_at_start) = 0; |
| 320 // Emits some quick code that checks whether the preloaded characters match. | 311 // Emits some quick code that checks whether the preloaded characters match. |
| 321 // Falls through on certain failure, jumps to the label on possible success. | 312 // Falls through on certain failure, jumps to the label on possible success. |
| 322 // If the node cannot make a quick check it does nothing and returns false. | 313 // If the node cannot make a quick check it does nothing and returns false. |
| 323 bool EmitQuickCheck(RegExpCompiler* compiler, | 314 bool EmitQuickCheck(RegExpCompiler* compiler, |
| 324 Trace* bounds_check_trace, | 315 Trace* bounds_check_trace, |
| 325 Trace* trace, | 316 Trace* trace, |
| 326 bool preload_has_checked_bounds, | 317 bool preload_has_checked_bounds, |
| 327 BlockLabel* on_possible_success, | 318 BlockLabel* on_possible_success, |
| 328 QuickCheckDetails* details_return, | 319 QuickCheckDetails* details_return, |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 365 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) { | 356 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case) { |
| 366 return this; | 357 return this; |
| 367 } | 358 } |
| 368 // Helper for FilterOneByte. | 359 // Helper for FilterOneByte. |
| 369 RegExpNode* replacement() { | 360 RegExpNode* replacement() { |
| 370 ASSERT(info()->replacement_calculated); | 361 ASSERT(info()->replacement_calculated); |
| 371 return replacement_; | 362 return replacement_; |
| 372 } | 363 } |
| 373 RegExpNode* set_replacement(RegExpNode* replacement) { | 364 RegExpNode* set_replacement(RegExpNode* replacement) { |
| 374 info()->replacement_calculated = true; | 365 info()->replacement_calculated = true; |
| 375 replacement_ = replacement; | 366 replacement_ = replacement; |
| 376 return replacement; // For convenience. | 367 return replacement; // For convenience. |
| 377 } | 368 } |
| 378 | 369 |
| 379 // We want to avoid recalculating the lookahead info, so we store it on the | 370 // We want to avoid recalculating the lookahead info, so we store it on the |
| 380 // node. Only info that is for this node is stored. We can tell that the | 371 // node. Only info that is for this node is stored. We can tell that the |
| 381 // info is for this node when offset == 0, so the information is calculated | 372 // info is for this node when offset == 0, so the information is calculated |
| 382 // relative to this node. | 373 // relative to this node. |
| 383 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { | 374 void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, intptr_t offset) { |
| 384 if (offset == 0) set_bm_info(not_at_start, bm); | 375 if (offset == 0) set_bm_info(not_at_start, bm); |
| 385 } | 376 } |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 421 // deferred operations in the current trace and generating a goto. | 412 // deferred operations in the current trace and generating a goto. |
| 422 intptr_t trace_count_; | 413 intptr_t trace_count_; |
| 423 BoyerMooreLookahead* bm_info_[2]; | 414 BoyerMooreLookahead* bm_info_[2]; |
| 424 Zone* zone_; | 415 Zone* zone_; |
| 425 }; | 416 }; |
| 426 | 417 |
| 427 | 418 |
| 428 // A simple closed interval. | 419 // A simple closed interval. |
| 429 class Interval { | 420 class Interval { |
| 430 public: | 421 public: |
| 431 Interval() : from_(kNone), to_(kNone) { } | 422 Interval() : from_(kNone), to_(kNone) {} |
| 432 Interval(intptr_t from, intptr_t to) : from_(from), to_(to) { } | 423 Interval(intptr_t from, intptr_t to) : from_(from), to_(to) {} |
| 433 | 424 |
| 434 Interval Union(Interval that) { | 425 Interval Union(Interval that) { |
| 435 if (that.from_ == kNone) | 426 if (that.from_ == kNone) |
| 436 return *this; | 427 return *this; |
| 437 else if (from_ == kNone) | 428 else if (from_ == kNone) |
| 438 return that; | 429 return that; |
| 439 else | 430 else |
| 440 return Interval(Utils::Minimum(from_, that.from_), | 431 return Interval(Utils::Minimum(from_, that.from_), |
| 441 Utils::Maximum(to_, that.to_)); | 432 Utils::Maximum(to_, that.to_)); |
| 442 } | 433 } |
| 443 bool Contains(intptr_t value) const { | 434 bool Contains(intptr_t value) const { |
| 444 return (from_ <= value) && (value <= to_); | 435 return (from_ <= value) && (value <= to_); |
| 445 } | 436 } |
| 446 bool is_empty() const { return from_ == kNone; } | 437 bool is_empty() const { return from_ == kNone; } |
| 447 intptr_t from() const { return from_; } | 438 intptr_t from() const { return from_; } |
| 448 intptr_t to() const { return to_; } | 439 intptr_t to() const { return to_; } |
| 449 static Interval Empty() { return Interval(); } | 440 static Interval Empty() { return Interval(); } |
| 450 static const intptr_t kNone = -1; | 441 static const intptr_t kNone = -1; |
| 451 | 442 |
| 452 private: | 443 private: |
| 453 intptr_t from_; | 444 intptr_t from_; |
| 454 intptr_t to_; | 445 intptr_t to_; |
| 455 | 446 |
| 456 DISALLOW_ALLOCATION(); | 447 DISALLOW_ALLOCATION(); |
| 457 }; | 448 }; |
| 458 | 449 |
| 459 | 450 |
| 460 class SeqRegExpNode: public RegExpNode { | 451 class SeqRegExpNode : public RegExpNode { |
| 461 public: | 452 public: |
| 462 explicit SeqRegExpNode(RegExpNode* on_success) | 453 explicit SeqRegExpNode(RegExpNode* on_success) |
| 463 : RegExpNode(on_success->zone()), on_success_(on_success) { } | 454 : RegExpNode(on_success->zone()), on_success_(on_success) {} |
| 464 RegExpNode* on_success() { return on_success_; } | 455 RegExpNode* on_success() { return on_success_; } |
| 465 void set_on_success(RegExpNode* node) { on_success_ = node; } | 456 void set_on_success(RegExpNode* node) { on_success_ = node; } |
| 466 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); | 457 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); |
| 467 virtual void FillInBMInfo(intptr_t offset, | 458 virtual void FillInBMInfo(intptr_t offset, |
| 468 intptr_t budget, | 459 intptr_t budget, |
| 469 BoyerMooreLookahead* bm, | 460 BoyerMooreLookahead* bm, |
| 470 bool not_at_start) { | 461 bool not_at_start) { |
| 471 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); | 462 on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start); |
| 472 if (offset == 0) set_bm_info(not_at_start, bm); | 463 if (offset == 0) set_bm_info(not_at_start, bm); |
| 473 } | 464 } |
| 474 | 465 |
| 475 protected: | 466 protected: |
| 476 RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case); | 467 RegExpNode* FilterSuccessor(intptr_t depth, bool ignore_case); |
| 477 | 468 |
| 478 private: | 469 private: |
| 479 RegExpNode* on_success_; | 470 RegExpNode* on_success_; |
| 480 }; | 471 }; |
| 481 | 472 |
| 482 | 473 |
| 483 class ActionNode: public SeqRegExpNode { | 474 class ActionNode : public SeqRegExpNode { |
| 484 public: | 475 public: |
| 485 enum ActionType { | 476 enum ActionType { |
| 486 SET_REGISTER, | 477 SET_REGISTER, |
| 487 INCREMENT_REGISTER, | 478 INCREMENT_REGISTER, |
| 488 STORE_POSITION, | 479 STORE_POSITION, |
| 489 BEGIN_SUBMATCH, | 480 BEGIN_SUBMATCH, |
| 490 POSITIVE_SUBMATCH_SUCCESS, | 481 POSITIVE_SUBMATCH_SUCCESS, |
| 491 EMPTY_MATCH_CHECK, | 482 EMPTY_MATCH_CHECK, |
| 492 CLEAR_CAPTURES | 483 CLEAR_CAPTURES |
| 493 }; | 484 }; |
| 494 static ActionNode* SetRegister(intptr_t reg, intptr_t val, | 485 static ActionNode* SetRegister(intptr_t reg, |
| 486 intptr_t val, |
| 495 RegExpNode* on_success); | 487 RegExpNode* on_success); |
| 496 static ActionNode* IncrementRegister(intptr_t reg, RegExpNode* on_success); | 488 static ActionNode* IncrementRegister(intptr_t reg, RegExpNode* on_success); |
| 497 static ActionNode* StorePosition(intptr_t reg, | 489 static ActionNode* StorePosition(intptr_t reg, |
| 498 bool is_capture, | 490 bool is_capture, |
| 499 RegExpNode* on_success); | 491 RegExpNode* on_success); |
| 500 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); | 492 static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success); |
| 501 static ActionNode* BeginSubmatch(intptr_t stack_pointer_reg, | 493 static ActionNode* BeginSubmatch(intptr_t stack_pointer_reg, |
| 502 intptr_t position_reg, | 494 intptr_t position_reg, |
| 503 RegExpNode* on_success); | 495 RegExpNode* on_success); |
| 504 static ActionNode* PositiveSubmatchSuccess(intptr_t stack_pointer_reg, | 496 static ActionNode* PositiveSubmatchSuccess(intptr_t stack_pointer_reg, |
| 505 intptr_t restore_reg, | 497 intptr_t restore_reg, |
| 506 intptr_t clear_capture_count, | 498 intptr_t clear_capture_count, |
| 507 intptr_t clear_capture_from, | 499 intptr_t clear_capture_from, |
| 508 RegExpNode* on_success); | 500 RegExpNode* on_success); |
| 509 static ActionNode* EmptyMatchCheck(intptr_t start_register, | 501 static ActionNode* EmptyMatchCheck(intptr_t start_register, |
| 510 intptr_t repetition_register, | 502 intptr_t repetition_register, |
| 511 intptr_t repetition_limit, | 503 intptr_t repetition_limit, |
| 512 RegExpNode* on_success); | 504 RegExpNode* on_success); |
| 513 virtual void Accept(NodeVisitor* visitor); | 505 virtual void Accept(NodeVisitor* visitor); |
| 514 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 506 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 515 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 507 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 508 intptr_t budget, |
| 516 bool not_at_start); | 509 bool not_at_start); |
| 517 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 510 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 518 RegExpCompiler* compiler, | 511 RegExpCompiler* compiler, |
| 519 intptr_t filled_in, | 512 intptr_t filled_in, |
| 520 bool not_at_start) { | 513 bool not_at_start) { |
| 521 return on_success()->GetQuickCheckDetails( | 514 return on_success()->GetQuickCheckDetails(details, compiler, filled_in, |
| 522 details, compiler, filled_in, not_at_start); | 515 not_at_start); |
| 523 } | 516 } |
| 524 virtual void FillInBMInfo(intptr_t offset, | 517 virtual void FillInBMInfo(intptr_t offset, |
| 525 intptr_t budget, | 518 intptr_t budget, |
| 526 BoyerMooreLookahead* bm, | 519 BoyerMooreLookahead* bm, |
| 527 bool not_at_start); | 520 bool not_at_start); |
| 528 ActionType action_type() { return action_type_; } | 521 ActionType action_type() { return action_type_; } |
| 529 // TODO(erikcorry): We should allow some action nodes in greedy loops. | 522 // TODO(erikcorry): We should allow some action nodes in greedy loops. |
| 530 virtual intptr_t GreedyLoopTextLength() { | 523 virtual intptr_t GreedyLoopTextLength() { |
| 531 return kNodeIsTooComplexForGreedyLoops; | 524 return kNodeIsTooComplexForGreedyLoops; |
| 532 } | 525 } |
| (...skipping 21 matching lines...) Expand all Loading... |
| 554 intptr_t start_register; | 547 intptr_t start_register; |
| 555 intptr_t repetition_register; | 548 intptr_t repetition_register; |
| 556 intptr_t repetition_limit; | 549 intptr_t repetition_limit; |
| 557 } u_empty_match_check; | 550 } u_empty_match_check; |
| 558 struct { | 551 struct { |
| 559 intptr_t range_from; | 552 intptr_t range_from; |
| 560 intptr_t range_to; | 553 intptr_t range_to; |
| 561 } u_clear_captures; | 554 } u_clear_captures; |
| 562 } data_; | 555 } data_; |
| 563 ActionNode(ActionType action_type, RegExpNode* on_success) | 556 ActionNode(ActionType action_type, RegExpNode* on_success) |
| 564 : SeqRegExpNode(on_success), | 557 : SeqRegExpNode(on_success), action_type_(action_type) {} |
| 565 action_type_(action_type) { } | |
| 566 ActionType action_type_; | 558 ActionType action_type_; |
| 567 friend class DotPrinter; | 559 friend class DotPrinter; |
| 568 }; | 560 }; |
| 569 | 561 |
| 570 | 562 |
| 571 class TextNode: public SeqRegExpNode { | 563 class TextNode : public SeqRegExpNode { |
| 572 public: | 564 public: |
| 573 TextNode(ZoneGrowableArray<TextElement>* elms, | 565 TextNode(ZoneGrowableArray<TextElement>* elms, RegExpNode* on_success) |
| 574 RegExpNode* on_success) | 566 : SeqRegExpNode(on_success), elms_(elms) {} |
| 567 TextNode(RegExpCharacterClass* that, RegExpNode* on_success) |
| 575 : SeqRegExpNode(on_success), | 568 : SeqRegExpNode(on_success), |
| 576 elms_(elms) { } | 569 elms_(new (zone()) ZoneGrowableArray<TextElement>(1)) { |
| 577 TextNode(RegExpCharacterClass* that, | |
| 578 RegExpNode* on_success) | |
| 579 : SeqRegExpNode(on_success), | |
| 580 elms_(new(zone()) ZoneGrowableArray<TextElement>(1)) { | |
| 581 elms_->Add(TextElement::CharClass(that)); | 570 elms_->Add(TextElement::CharClass(that)); |
| 582 } | 571 } |
| 583 virtual void Accept(NodeVisitor* visitor); | 572 virtual void Accept(NodeVisitor* visitor); |
| 584 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 573 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 585 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 574 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 575 intptr_t budget, |
| 586 bool not_at_start); | 576 bool not_at_start); |
| 587 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 577 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 588 RegExpCompiler* compiler, | 578 RegExpCompiler* compiler, |
| 589 intptr_t characters_filled_in, | 579 intptr_t characters_filled_in, |
| 590 bool not_at_start); | 580 bool not_at_start); |
| 591 ZoneGrowableArray<TextElement>* elements() { return elms_; } | 581 ZoneGrowableArray<TextElement>* elements() { return elms_; } |
| 592 void MakeCaseIndependent(bool is_one_byte); | 582 void MakeCaseIndependent(bool is_one_byte); |
| 593 virtual intptr_t GreedyLoopTextLength(); | 583 virtual intptr_t GreedyLoopTextLength(); |
| 594 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( | 584 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( |
| 595 RegExpCompiler* compiler); | 585 RegExpCompiler* compiler); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 615 TextEmitPassType pass, | 605 TextEmitPassType pass, |
| 616 bool preloaded, | 606 bool preloaded, |
| 617 Trace* trace, | 607 Trace* trace, |
| 618 bool first_element_checked, | 608 bool first_element_checked, |
| 619 intptr_t* checked_up_to); | 609 intptr_t* checked_up_to); |
| 620 intptr_t Length(); | 610 intptr_t Length(); |
| 621 ZoneGrowableArray<TextElement>* elms_; | 611 ZoneGrowableArray<TextElement>* elms_; |
| 622 }; | 612 }; |
| 623 | 613 |
| 624 | 614 |
| 625 class AssertionNode: public SeqRegExpNode { | 615 class AssertionNode : public SeqRegExpNode { |
| 626 public: | 616 public: |
| 627 enum AssertionType { | 617 enum AssertionType { |
| 628 AT_END, | 618 AT_END, |
| 629 AT_START, | 619 AT_START, |
| 630 AT_BOUNDARY, | 620 AT_BOUNDARY, |
| 631 AT_NON_BOUNDARY, | 621 AT_NON_BOUNDARY, |
| 632 AFTER_NEWLINE | 622 AFTER_NEWLINE |
| 633 }; | 623 }; |
| 634 static AssertionNode* AtEnd(RegExpNode* on_success) { | 624 static AssertionNode* AtEnd(RegExpNode* on_success) { |
| 635 return new(on_success->zone()) AssertionNode(AT_END, on_success); | 625 return new (on_success->zone()) AssertionNode(AT_END, on_success); |
| 636 } | 626 } |
| 637 static AssertionNode* AtStart(RegExpNode* on_success) { | 627 static AssertionNode* AtStart(RegExpNode* on_success) { |
| 638 return new(on_success->zone()) AssertionNode(AT_START, on_success); | 628 return new (on_success->zone()) AssertionNode(AT_START, on_success); |
| 639 } | 629 } |
| 640 static AssertionNode* AtBoundary(RegExpNode* on_success) { | 630 static AssertionNode* AtBoundary(RegExpNode* on_success) { |
| 641 return new(on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); | 631 return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success); |
| 642 } | 632 } |
| 643 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { | 633 static AssertionNode* AtNonBoundary(RegExpNode* on_success) { |
| 644 return new(on_success->zone()) AssertionNode(AT_NON_BOUNDARY, | 634 return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success); |
| 645 on_success); | |
| 646 } | 635 } |
| 647 static AssertionNode* AfterNewline(RegExpNode* on_success) { | 636 static AssertionNode* AfterNewline(RegExpNode* on_success) { |
| 648 return new(on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); | 637 return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success); |
| 649 } | 638 } |
| 650 virtual void Accept(NodeVisitor* visitor); | 639 virtual void Accept(NodeVisitor* visitor); |
| 651 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 640 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 652 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 641 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 642 intptr_t budget, |
| 653 bool not_at_start); | 643 bool not_at_start); |
| 654 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 644 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 655 RegExpCompiler* compiler, | 645 RegExpCompiler* compiler, |
| 656 intptr_t filled_in, | 646 intptr_t filled_in, |
| 657 bool not_at_start); | 647 bool not_at_start); |
| 658 virtual void FillInBMInfo(intptr_t offset, | 648 virtual void FillInBMInfo(intptr_t offset, |
| 659 intptr_t budget, | 649 intptr_t budget, |
| 660 BoyerMooreLookahead* bm, | 650 BoyerMooreLookahead* bm, |
| 661 bool not_at_start); | 651 bool not_at_start); |
| 662 AssertionType assertion_type() { return assertion_type_; } | 652 AssertionType assertion_type() { return assertion_type_; } |
| 663 | 653 |
| 664 private: | 654 private: |
| 665 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); | 655 void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace); |
| 666 enum IfPrevious { kIsNonWord, kIsWord }; | 656 enum IfPrevious { kIsNonWord, kIsWord }; |
| 667 void BacktrackIfPrevious(RegExpCompiler* compiler, | 657 void BacktrackIfPrevious(RegExpCompiler* compiler, |
| 668 Trace* trace, | 658 Trace* trace, |
| 669 IfPrevious backtrack_if_previous); | 659 IfPrevious backtrack_if_previous); |
| 670 AssertionNode(AssertionType t, RegExpNode* on_success) | 660 AssertionNode(AssertionType t, RegExpNode* on_success) |
| 671 : SeqRegExpNode(on_success), assertion_type_(t) { } | 661 : SeqRegExpNode(on_success), assertion_type_(t) {} |
| 672 AssertionType assertion_type_; | 662 AssertionType assertion_type_; |
| 673 }; | 663 }; |
| 674 | 664 |
| 675 | 665 |
| 676 class BackReferenceNode: public SeqRegExpNode { | 666 class BackReferenceNode : public SeqRegExpNode { |
| 677 public: | 667 public: |
| 678 BackReferenceNode(intptr_t start_reg, | 668 BackReferenceNode(intptr_t start_reg, |
| 679 intptr_t end_reg, | 669 intptr_t end_reg, |
| 680 RegExpNode* on_success) | 670 RegExpNode* on_success) |
| 681 : SeqRegExpNode(on_success), | 671 : SeqRegExpNode(on_success), start_reg_(start_reg), end_reg_(end_reg) {} |
| 682 start_reg_(start_reg), | |
| 683 end_reg_(end_reg) { } | |
| 684 virtual void Accept(NodeVisitor* visitor); | 672 virtual void Accept(NodeVisitor* visitor); |
| 685 intptr_t start_register() { return start_reg_; } | 673 intptr_t start_register() { return start_reg_; } |
| 686 intptr_t end_register() { return end_reg_; } | 674 intptr_t end_register() { return end_reg_; } |
| 687 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 675 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 688 virtual intptr_t EatsAtLeast(intptr_t still_to_find, | 676 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 689 intptr_t recursion_depth, | 677 intptr_t recursion_depth, |
| 690 bool not_at_start); | 678 bool not_at_start); |
| 691 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 679 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 692 RegExpCompiler* compiler, | 680 RegExpCompiler* compiler, |
| 693 intptr_t characters_filled_in, | 681 intptr_t characters_filled_in, |
| 694 bool not_at_start) { | 682 bool not_at_start) { |
| 695 return; | 683 return; |
| 696 } | 684 } |
| 697 virtual void FillInBMInfo(intptr_t offset, | 685 virtual void FillInBMInfo(intptr_t offset, |
| 698 intptr_t budget, | 686 intptr_t budget, |
| 699 BoyerMooreLookahead* bm, | 687 BoyerMooreLookahead* bm, |
| 700 bool not_at_start); | 688 bool not_at_start); |
| 701 | 689 |
| 702 private: | 690 private: |
| 703 intptr_t start_reg_; | 691 intptr_t start_reg_; |
| 704 intptr_t end_reg_; | 692 intptr_t end_reg_; |
| 705 }; | 693 }; |
| 706 | 694 |
| 707 | 695 |
| 708 class EndNode: public RegExpNode { | 696 class EndNode : public RegExpNode { |
| 709 public: | 697 public: |
| 710 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; | 698 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS }; |
| 711 explicit EndNode(Action action, Zone* zone) | 699 explicit EndNode(Action action, Zone* zone) |
| 712 : RegExpNode(zone), action_(action) { } | 700 : RegExpNode(zone), action_(action) {} |
| 713 virtual void Accept(NodeVisitor* visitor); | 701 virtual void Accept(NodeVisitor* visitor); |
| 714 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 702 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 715 virtual intptr_t EatsAtLeast(intptr_t still_to_find, | 703 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 716 intptr_t recursion_depth, | 704 intptr_t recursion_depth, |
| 717 bool not_at_start) { return 0; } | 705 bool not_at_start) { |
| 706 return 0; |
| 707 } |
| 718 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 708 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 719 RegExpCompiler* compiler, | 709 RegExpCompiler* compiler, |
| 720 intptr_t characters_filled_in, | 710 intptr_t characters_filled_in, |
| 721 bool not_at_start) { | 711 bool not_at_start) { |
| 722 // Returning 0 from EatsAtLeast should ensure we never get here. | 712 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 723 UNREACHABLE(); | 713 UNREACHABLE(); |
| 724 } | 714 } |
| 725 virtual void FillInBMInfo(intptr_t offset, | 715 virtual void FillInBMInfo(intptr_t offset, |
| 726 intptr_t budget, | 716 intptr_t budget, |
| 727 BoyerMooreLookahead* bm, | 717 BoyerMooreLookahead* bm, |
| 728 bool not_at_start) { | 718 bool not_at_start) { |
| 729 // Returning 0 from EatsAtLeast should ensure we never get here. | 719 // Returning 0 from EatsAtLeast should ensure we never get here. |
| 730 UNREACHABLE(); | 720 UNREACHABLE(); |
| 731 } | 721 } |
| 732 | 722 |
| 733 private: | 723 private: |
| 734 Action action_; | 724 Action action_; |
| 735 }; | 725 }; |
| 736 | 726 |
| 737 | 727 |
| 738 class NegativeSubmatchSuccess: public EndNode { | 728 class NegativeSubmatchSuccess : public EndNode { |
| 739 public: | 729 public: |
| 740 NegativeSubmatchSuccess(intptr_t stack_pointer_reg, | 730 NegativeSubmatchSuccess(intptr_t stack_pointer_reg, |
| 741 intptr_t position_reg, | 731 intptr_t position_reg, |
| 742 intptr_t clear_capture_count, | 732 intptr_t clear_capture_count, |
| 743 intptr_t clear_capture_start, | 733 intptr_t clear_capture_start, |
| 744 Zone* zone) | 734 Zone* zone) |
| 745 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), | 735 : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone), |
| 746 stack_pointer_register_(stack_pointer_reg), | 736 stack_pointer_register_(stack_pointer_reg), |
| 747 current_position_register_(position_reg), | 737 current_position_register_(position_reg), |
| 748 clear_capture_count_(clear_capture_count), | 738 clear_capture_count_(clear_capture_count), |
| 749 clear_capture_start_(clear_capture_start) { } | 739 clear_capture_start_(clear_capture_start) {} |
| 750 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 740 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 751 | 741 |
| 752 private: | 742 private: |
| 753 intptr_t stack_pointer_register_; | 743 intptr_t stack_pointer_register_; |
| 754 intptr_t current_position_register_; | 744 intptr_t current_position_register_; |
| 755 intptr_t clear_capture_count_; | 745 intptr_t clear_capture_count_; |
| 756 intptr_t clear_capture_start_; | 746 intptr_t clear_capture_start_; |
| 757 }; | 747 }; |
| 758 | 748 |
| 759 | 749 |
| 760 class Guard: public ZoneAllocated { | 750 class Guard : public ZoneAllocated { |
| 761 public: | 751 public: |
| 762 enum Relation { LT, GEQ }; | 752 enum Relation { LT, GEQ }; |
| 763 Guard(intptr_t reg, Relation op, intptr_t value) | 753 Guard(intptr_t reg, Relation op, intptr_t value) |
| 764 : reg_(reg), | 754 : reg_(reg), op_(op), value_(value) {} |
| 765 op_(op), | |
| 766 value_(value) { } | |
| 767 intptr_t reg() { return reg_; } | 755 intptr_t reg() { return reg_; } |
| 768 Relation op() { return op_; } | 756 Relation op() { return op_; } |
| 769 intptr_t value() { return value_; } | 757 intptr_t value() { return value_; } |
| 770 | 758 |
| 771 private: | 759 private: |
| 772 intptr_t reg_; | 760 intptr_t reg_; |
| 773 Relation op_; | 761 Relation op_; |
| 774 intptr_t value_; | 762 intptr_t value_; |
| 775 }; | 763 }; |
| 776 | 764 |
| 777 | 765 |
| 778 class GuardedAlternative { | 766 class GuardedAlternative { |
| 779 public: | 767 public: |
| 780 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { } | 768 explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) {} |
| 781 void AddGuard(Guard* guard, Zone* zone); | 769 void AddGuard(Guard* guard, Zone* zone); |
| 782 RegExpNode* node() { return node_; } | 770 RegExpNode* node() { return node_; } |
| 783 void set_node(RegExpNode* node) { node_ = node; } | 771 void set_node(RegExpNode* node) { node_ = node; } |
| 784 ZoneGrowableArray<Guard*>* guards() { return guards_; } | 772 ZoneGrowableArray<Guard*>* guards() { return guards_; } |
| 785 | 773 |
| 786 private: | 774 private: |
| 787 RegExpNode* node_; | 775 RegExpNode* node_; |
| 788 ZoneGrowableArray<Guard*>* guards_; | 776 ZoneGrowableArray<Guard*>* guards_; |
| 789 | 777 |
| 790 DISALLOW_ALLOCATION(); | 778 DISALLOW_ALLOCATION(); |
| 791 }; | 779 }; |
| 792 | 780 |
| 793 | 781 |
| 794 struct AlternativeGeneration; | 782 struct AlternativeGeneration; |
| 795 | 783 |
| 796 | 784 |
| 797 class ChoiceNode: public RegExpNode { | 785 class ChoiceNode : public RegExpNode { |
| 798 public: | 786 public: |
| 799 explicit ChoiceNode(intptr_t expected_size, Zone* zone) | 787 explicit ChoiceNode(intptr_t expected_size, Zone* zone) |
| 800 : RegExpNode(zone), | 788 : RegExpNode(zone), |
| 801 alternatives_(new(zone) | 789 alternatives_(new (zone) |
| 802 ZoneGrowableArray<GuardedAlternative>(expected_size)), | 790 ZoneGrowableArray<GuardedAlternative>(expected_size)), |
| 803 not_at_start_(false), | 791 not_at_start_(false), |
| 804 being_calculated_(false) { } | 792 being_calculated_(false) {} |
| 805 virtual void Accept(NodeVisitor* visitor); | 793 virtual void Accept(NodeVisitor* visitor); |
| 806 void AddAlternative(GuardedAlternative node) { | 794 void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } |
| 807 alternatives()->Add(node); | |
| 808 } | |
| 809 ZoneGrowableArray<GuardedAlternative>* alternatives() { | 795 ZoneGrowableArray<GuardedAlternative>* alternatives() { |
| 810 return alternatives_; | 796 return alternatives_; |
| 811 } | 797 } |
| 812 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 798 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 813 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 799 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 800 intptr_t budget, |
| 814 bool not_at_start); | 801 bool not_at_start); |
| 815 intptr_t EatsAtLeastHelper(intptr_t still_to_find, | 802 intptr_t EatsAtLeastHelper(intptr_t still_to_find, |
| 816 intptr_t budget, | 803 intptr_t budget, |
| 817 RegExpNode* ignore_this_node, | 804 RegExpNode* ignore_this_node, |
| 818 bool not_at_start); | 805 bool not_at_start); |
| 819 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 806 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 820 RegExpCompiler* compiler, | 807 RegExpCompiler* compiler, |
| 821 intptr_t characters_filled_in, | 808 intptr_t characters_filled_in, |
| 822 bool not_at_start); | 809 bool not_at_start); |
| 823 virtual void FillInBMInfo(intptr_t offset, | 810 virtual void FillInBMInfo(intptr_t offset, |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 868 intptr_t first_choice, | 855 intptr_t first_choice, |
| 869 Trace* trace, | 856 Trace* trace, |
| 870 PreloadState* preloads); | 857 PreloadState* preloads); |
| 871 // If true, this node is never checked at the start of the input. | 858 // If true, this node is never checked at the start of the input. |
| 872 // Allows a new trace to start with at_start() set to false. | 859 // Allows a new trace to start with at_start() set to false. |
| 873 bool not_at_start_; | 860 bool not_at_start_; |
| 874 bool being_calculated_; | 861 bool being_calculated_; |
| 875 }; | 862 }; |
| 876 | 863 |
| 877 | 864 |
| 878 class NegativeLookaheadChoiceNode: public ChoiceNode { | 865 class NegativeLookaheadChoiceNode : public ChoiceNode { |
| 879 public: | 866 public: |
| 880 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, | 867 explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail, |
| 881 GuardedAlternative then_do_this, | 868 GuardedAlternative then_do_this, |
| 882 Zone* zone) | 869 Zone* zone) |
| 883 : ChoiceNode(2, zone) { | 870 : ChoiceNode(2, zone) { |
| 884 AddAlternative(this_must_fail); | 871 AddAlternative(this_must_fail); |
| 885 AddAlternative(then_do_this); | 872 AddAlternative(then_do_this); |
| 886 } | 873 } |
| 887 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 874 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 875 intptr_t budget, |
| 888 bool not_at_start); | 876 bool not_at_start); |
| 889 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 877 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 890 RegExpCompiler* compiler, | 878 RegExpCompiler* compiler, |
| 891 intptr_t characters_filled_in, | 879 intptr_t characters_filled_in, |
| 892 bool not_at_start); | 880 bool not_at_start); |
| 893 virtual void FillInBMInfo(intptr_t offset, | 881 virtual void FillInBMInfo(intptr_t offset, |
| 894 intptr_t budget, | 882 intptr_t budget, |
| 895 BoyerMooreLookahead* bm, | 883 BoyerMooreLookahead* bm, |
| 896 bool not_at_start) { | 884 bool not_at_start) { |
| 897 (*alternatives_)[1].node()->FillInBMInfo( | 885 (*alternatives_)[1].node()->FillInBMInfo(offset, budget - 1, bm, |
| 898 offset, budget - 1, bm, not_at_start); | 886 not_at_start); |
| 899 if (offset == 0) set_bm_info(not_at_start, bm); | 887 if (offset == 0) set_bm_info(not_at_start, bm); |
| 900 } | 888 } |
| 901 // For a negative lookahead we don't emit the quick check for the | 889 // For a negative lookahead we don't emit the quick check for the |
| 902 // alternative that is expected to fail. This is because quick check code | 890 // alternative that is expected to fail. This is because quick check code |
| 903 // starts by loading enough characters for the alternative that takes fewest | 891 // starts by loading enough characters for the alternative that takes fewest |
| 904 // characters, but on a negative lookahead the negative branch did not take | 892 // characters, but on a negative lookahead the negative branch did not take |
| 905 // part in that calculation (EatsAtLeast) so the assumptions don't hold. | 893 // part in that calculation (EatsAtLeast) so the assumptions don't hold. |
| 906 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { | 894 virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { |
| 907 return !is_first; | 895 return !is_first; |
| 908 } | 896 } |
| 909 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); | 897 virtual RegExpNode* FilterOneByte(intptr_t depth, bool ignore_case); |
| 910 }; | 898 }; |
| 911 | 899 |
| 912 | 900 |
| 913 class LoopChoiceNode: public ChoiceNode { | 901 class LoopChoiceNode : public ChoiceNode { |
| 914 public: | 902 public: |
| 915 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) | 903 explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone) |
| 916 : ChoiceNode(2, zone), | 904 : ChoiceNode(2, zone), |
| 917 loop_node_(NULL), | 905 loop_node_(NULL), |
| 918 continue_node_(NULL), | 906 continue_node_(NULL), |
| 919 body_can_be_zero_length_(body_can_be_zero_length) { } | 907 body_can_be_zero_length_(body_can_be_zero_length) {} |
| 920 void AddLoopAlternative(GuardedAlternative alt); | 908 void AddLoopAlternative(GuardedAlternative alt); |
| 921 void AddContinueAlternative(GuardedAlternative alt); | 909 void AddContinueAlternative(GuardedAlternative alt); |
| 922 virtual void Emit(RegExpCompiler* compiler, Trace* trace); | 910 virtual void Emit(RegExpCompiler* compiler, Trace* trace); |
| 923 virtual intptr_t EatsAtLeast(intptr_t still_to_find, intptr_t budget, | 911 virtual intptr_t EatsAtLeast(intptr_t still_to_find, |
| 912 intptr_t budget, |
| 924 bool not_at_start); | 913 bool not_at_start); |
| 925 virtual void GetQuickCheckDetails(QuickCheckDetails* details, | 914 virtual void GetQuickCheckDetails(QuickCheckDetails* details, |
| 926 RegExpCompiler* compiler, | 915 RegExpCompiler* compiler, |
| 927 intptr_t characters_filled_in, | 916 intptr_t characters_filled_in, |
| 928 bool not_at_start); | 917 bool not_at_start); |
| 929 virtual void FillInBMInfo(intptr_t offset, | 918 virtual void FillInBMInfo(intptr_t offset, |
| 930 intptr_t budget, | 919 intptr_t budget, |
| 931 BoyerMooreLookahead* bm, | 920 BoyerMooreLookahead* bm, |
| 932 bool not_at_start); | 921 bool not_at_start); |
| 933 RegExpNode* loop_node() { return loop_node_; } | 922 RegExpNode* loop_node() { return loop_node_; } |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 990 | 979 |
| 991 ContainedInLattice AddRange(ContainedInLattice a, | 980 ContainedInLattice AddRange(ContainedInLattice a, |
| 992 const intptr_t* ranges, | 981 const intptr_t* ranges, |
| 993 intptr_t ranges_size, | 982 intptr_t ranges_size, |
| 994 Interval new_range); | 983 Interval new_range); |
| 995 | 984 |
| 996 | 985 |
| 997 class BoyerMoorePositionInfo : public ZoneAllocated { | 986 class BoyerMoorePositionInfo : public ZoneAllocated { |
| 998 public: | 987 public: |
| 999 explicit BoyerMoorePositionInfo(Zone* zone) | 988 explicit BoyerMoorePositionInfo(Zone* zone) |
| 1000 : map_(new(zone) ZoneGrowableArray<bool>(kMapSize)), | 989 : map_(new (zone) ZoneGrowableArray<bool>(kMapSize)), |
| 1001 map_count_(0), | 990 map_count_(0), |
| 1002 w_(kNotYet), | 991 w_(kNotYet), |
| 1003 s_(kNotYet), | 992 s_(kNotYet), |
| 1004 d_(kNotYet), | 993 d_(kNotYet), |
| 1005 surrogate_(kNotYet) { | 994 surrogate_(kNotYet) { |
| 1006 for (intptr_t i = 0; i < kMapSize; i++) { | 995 for (intptr_t i = 0; i < kMapSize; i++) { |
| 1007 map_->Add(false); | 996 map_->Add(false); |
| 1008 } | 997 } |
| 1009 } | 998 } |
| 1010 | 999 |
| 1011 bool& at(intptr_t i) { return (*map_)[i]; } | 1000 bool& at(intptr_t i) { return (*map_)[i]; } |
| 1012 | 1001 |
| 1013 static const intptr_t kMapSize = 128; | 1002 static const intptr_t kMapSize = 128; |
| 1014 static const intptr_t kMask = kMapSize - 1; | 1003 static const intptr_t kMask = kMapSize - 1; |
| 1015 | 1004 |
| 1016 intptr_t map_count() const { return map_count_; } | 1005 intptr_t map_count() const { return map_count_; } |
| 1017 | 1006 |
| 1018 void Set(intptr_t character); | 1007 void Set(intptr_t character); |
| 1019 void SetInterval(const Interval& interval); | 1008 void SetInterval(const Interval& interval); |
| 1020 void SetAll(); | 1009 void SetAll(); |
| 1021 bool is_non_word() { return w_ == kLatticeOut; } | 1010 bool is_non_word() { return w_ == kLatticeOut; } |
| 1022 bool is_word() { return w_ == kLatticeIn; } | 1011 bool is_word() { return w_ == kLatticeIn; } |
| 1023 | 1012 |
| 1024 private: | 1013 private: |
| 1025 ZoneGrowableArray<bool>* map_; | 1014 ZoneGrowableArray<bool>* map_; |
| 1026 intptr_t map_count_; // Number of set bits in the map. | 1015 intptr_t map_count_; // Number of set bits in the map. |
| 1027 ContainedInLattice w_; // The \w character class. | 1016 ContainedInLattice w_; // The \w character class. |
| 1028 ContainedInLattice s_; // The \s character class. | 1017 ContainedInLattice s_; // The \s character class. |
| 1029 ContainedInLattice d_; // The \d character class. | 1018 ContainedInLattice d_; // The \d character class. |
| 1030 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. | 1019 ContainedInLattice surrogate_; // Surrogate UTF-16 code units. |
| 1031 }; | 1020 }; |
| 1032 | 1021 |
| 1033 | 1022 |
| 1034 class BoyerMooreLookahead : public ZoneAllocated{ | 1023 class BoyerMooreLookahead : public ZoneAllocated { |
| 1035 public: | 1024 public: |
| 1036 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, | 1025 BoyerMooreLookahead(intptr_t length, RegExpCompiler* compiler, Zone* Zone); |
| 1037 Zone* Zone); | |
| 1038 | 1026 |
| 1039 intptr_t length() { return length_; } | 1027 intptr_t length() { return length_; } |
| 1040 intptr_t max_char() { return max_char_; } | 1028 intptr_t max_char() { return max_char_; } |
| 1041 RegExpCompiler* compiler() { return compiler_; } | 1029 RegExpCompiler* compiler() { return compiler_; } |
| 1042 | 1030 |
| 1043 intptr_t Count(intptr_t map_number) { | 1031 intptr_t Count(intptr_t map_number) { |
| 1044 return bitmaps_->At(map_number)->map_count(); | 1032 return bitmaps_->At(map_number)->map_count(); |
| 1045 } | 1033 } |
| 1046 | 1034 |
| 1047 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } | 1035 BoyerMoorePositionInfo* at(intptr_t i) { return bitmaps_->At(i); } |
| 1048 | 1036 |
| 1049 void Set(intptr_t map_number, intptr_t character) { | 1037 void Set(intptr_t map_number, intptr_t character) { |
| 1050 if (character > max_char_) return; | 1038 if (character > max_char_) return; |
| 1051 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | 1039 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); |
| 1052 info->Set(character); | 1040 info->Set(character); |
| 1053 } | 1041 } |
| 1054 | 1042 |
| 1055 void SetInterval(intptr_t map_number, const Interval& interval) { | 1043 void SetInterval(intptr_t map_number, const Interval& interval) { |
| 1056 if (interval.from() > max_char_) return; | 1044 if (interval.from() > max_char_) return; |
| 1057 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); | 1045 BoyerMoorePositionInfo* info = bitmaps_->At(map_number); |
| 1058 if (interval.to() > max_char_) { | 1046 if (interval.to() > max_char_) { |
| 1059 info->SetInterval(Interval(interval.from(), max_char_)); | 1047 info->SetInterval(Interval(interval.from(), max_char_)); |
| 1060 } else { | 1048 } else { |
| 1061 info->SetInterval(interval); | 1049 info->SetInterval(interval); |
| 1062 } | 1050 } |
| 1063 } | 1051 } |
| 1064 | 1052 |
| 1065 void SetAll(intptr_t map_number) { | 1053 void SetAll(intptr_t map_number) { bitmaps_->At(map_number)->SetAll(); } |
| 1066 bitmaps_->At(map_number)->SetAll(); | |
| 1067 } | |
| 1068 | 1054 |
| 1069 void SetRest(intptr_t from_map) { | 1055 void SetRest(intptr_t from_map) { |
| 1070 for (intptr_t i = from_map; i < length_; i++) SetAll(i); | 1056 for (intptr_t i = from_map; i < length_; i++) |
| 1057 SetAll(i); |
| 1071 } | 1058 } |
| 1072 void EmitSkipInstructions(RegExpMacroAssembler* masm); | 1059 void EmitSkipInstructions(RegExpMacroAssembler* masm); |
| 1073 | 1060 |
| 1074 private: | 1061 private: |
| 1075 // This is the value obtained by EatsAtLeast. If we do not have at least this | 1062 // This is the value obtained by EatsAtLeast. If we do not have at least this |
| 1076 // many characters left in the sample string then the match is bound to fail. | 1063 // many characters left in the sample string then the match is bound to fail. |
| 1077 // Therefore it is OK to read a character this far ahead of the current match | 1064 // Therefore it is OK to read a character this far ahead of the current match |
| 1078 // point. | 1065 // point. |
| 1079 intptr_t length_; | 1066 intptr_t length_; |
| 1080 RegExpCompiler* compiler_; | 1067 RegExpCompiler* compiler_; |
| 1081 // 0xff for Latin1, 0xffff for UTF-16. | 1068 // 0xff for Latin1, 0xffff for UTF-16. |
| 1082 intptr_t max_char_; | 1069 intptr_t max_char_; |
| 1083 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; | 1070 ZoneGrowableArray<BoyerMoorePositionInfo*>* bitmaps_; |
| 1084 | 1071 |
| 1085 intptr_t GetSkipTable(intptr_t min_lookahead, | 1072 intptr_t GetSkipTable(intptr_t min_lookahead, |
| 1086 intptr_t max_lookahead, | 1073 intptr_t max_lookahead, |
| 1087 const TypedData& boolean_skip_table); | 1074 const TypedData& boolean_skip_table); |
| 1088 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to); | 1075 bool FindWorthwhileInterval(intptr_t* from, intptr_t* to); |
| 1089 intptr_t FindBestInterval( | 1076 intptr_t FindBestInterval(intptr_t max_number_of_chars, |
| 1090 intptr_t max_number_of_chars, | 1077 intptr_t old_biggest_points, |
| 1091 intptr_t old_biggest_points, | 1078 intptr_t* from, |
| 1092 intptr_t* from, intptr_t* to); | 1079 intptr_t* to); |
| 1093 }; | 1080 }; |
| 1094 | 1081 |
| 1095 | 1082 |
| 1096 // There are many ways to generate code for a node. This class encapsulates | 1083 // There are many ways to generate code for a node. This class encapsulates |
| 1097 // the current way we should be generating. In other words it encapsulates | 1084 // the current way we should be generating. In other words it encapsulates |
| 1098 // the current state of the code generator. The effect of this is that we | 1085 // the current state of the code generator. The effect of this is that we |
| 1099 // generate code for paths that the matcher can take through the regular | 1086 // generate code for paths that the matcher can take through the regular |
| 1100 // expression. A given node in the regexp can be code-generated several times | 1087 // expression. A given node in the regexp can be code-generated several times |
| 1101 // as it can be part of several traces. For example for the regexp: | 1088 // as it can be part of several traces. For example for the regexp: |
| 1102 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part | 1089 // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part |
| 1103 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code | 1090 // of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code |
| 1104 // to match foo is generated only once (the traces have a common prefix). The | 1091 // to match foo is generated only once (the traces have a common prefix). The |
| 1105 // code to store the capture is deferred and generated (twice) after the places | 1092 // code to store the capture is deferred and generated (twice) after the places |
| 1106 // where baz has been matched. | 1093 // where baz has been matched. |
| 1107 class Trace { | 1094 class Trace { |
| 1108 public: | 1095 public: |
| 1109 // A value for a property that is either known to be true, know to be false, | 1096 // A value for a property that is either known to be true, know to be false, |
| 1110 // or not known. | 1097 // or not known. |
| 1111 enum TriBool { | 1098 enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 }; |
| 1112 UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 | |
| 1113 }; | |
| 1114 | 1099 |
| 1115 class DeferredAction { | 1100 class DeferredAction { |
| 1116 public: | 1101 public: |
| 1117 DeferredAction(ActionNode::ActionType action_type, intptr_t reg) | 1102 DeferredAction(ActionNode::ActionType action_type, intptr_t reg) |
| 1118 : action_type_(action_type), reg_(reg), next_(NULL) { } | 1103 : action_type_(action_type), reg_(reg), next_(NULL) {} |
| 1119 DeferredAction* next() { return next_; } | 1104 DeferredAction* next() { return next_; } |
| 1120 bool Mentions(intptr_t reg); | 1105 bool Mentions(intptr_t reg); |
| 1121 intptr_t reg() { return reg_; } | 1106 intptr_t reg() { return reg_; } |
| 1122 ActionNode::ActionType action_type() { return action_type_; } | 1107 ActionNode::ActionType action_type() { return action_type_; } |
| 1108 |
| 1123 private: | 1109 private: |
| 1124 ActionNode::ActionType action_type_; | 1110 ActionNode::ActionType action_type_; |
| 1125 intptr_t reg_; | 1111 intptr_t reg_; |
| 1126 DeferredAction* next_; | 1112 DeferredAction* next_; |
| 1127 friend class Trace; | 1113 friend class Trace; |
| 1128 | 1114 |
| 1129 DISALLOW_ALLOCATION(); | 1115 DISALLOW_ALLOCATION(); |
| 1130 }; | 1116 }; |
| 1131 | 1117 |
| 1132 class DeferredCapture : public DeferredAction { | 1118 class DeferredCapture : public DeferredAction { |
| 1133 public: | 1119 public: |
| 1134 DeferredCapture(intptr_t reg, bool is_capture, Trace* trace) | 1120 DeferredCapture(intptr_t reg, bool is_capture, Trace* trace) |
| 1135 : DeferredAction(ActionNode::STORE_POSITION, reg), | 1121 : DeferredAction(ActionNode::STORE_POSITION, reg), |
| 1136 cp_offset_(trace->cp_offset()), | 1122 cp_offset_(trace->cp_offset()), |
| 1137 is_capture_(is_capture) { } | 1123 is_capture_(is_capture) {} |
| 1138 intptr_t cp_offset() { return cp_offset_; } | 1124 intptr_t cp_offset() { return cp_offset_; } |
| 1139 bool is_capture() { return is_capture_; } | 1125 bool is_capture() { return is_capture_; } |
| 1126 |
| 1140 private: | 1127 private: |
| 1141 intptr_t cp_offset_; | 1128 intptr_t cp_offset_; |
| 1142 bool is_capture_; | 1129 bool is_capture_; |
| 1143 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } | 1130 void set_cp_offset(intptr_t cp_offset) { cp_offset_ = cp_offset; } |
| 1144 }; | 1131 }; |
| 1145 | 1132 |
| 1146 class DeferredSetRegister : public DeferredAction { | 1133 class DeferredSetRegister : public DeferredAction { |
| 1147 public: | 1134 public: |
| 1148 DeferredSetRegister(intptr_t reg, intptr_t value) | 1135 DeferredSetRegister(intptr_t reg, intptr_t value) |
| 1149 : DeferredAction(ActionNode::SET_REGISTER, reg), | 1136 : DeferredAction(ActionNode::SET_REGISTER, reg), value_(value) {} |
| 1150 value_(value) { } | |
| 1151 intptr_t value() { return value_; } | 1137 intptr_t value() { return value_; } |
| 1138 |
| 1152 private: | 1139 private: |
| 1153 intptr_t value_; | 1140 intptr_t value_; |
| 1154 }; | 1141 }; |
| 1155 | 1142 |
| 1156 class DeferredClearCaptures : public DeferredAction { | 1143 class DeferredClearCaptures : public DeferredAction { |
| 1157 public: | 1144 public: |
| 1158 explicit DeferredClearCaptures(Interval range) | 1145 explicit DeferredClearCaptures(Interval range) |
| 1159 : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), | 1146 : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {} |
| 1160 range_(range) { } | |
| 1161 Interval range() { return range_; } | 1147 Interval range() { return range_; } |
| 1148 |
| 1162 private: | 1149 private: |
| 1163 Interval range_; | 1150 Interval range_; |
| 1164 }; | 1151 }; |
| 1165 | 1152 |
| 1166 class DeferredIncrementRegister : public DeferredAction { | 1153 class DeferredIncrementRegister : public DeferredAction { |
| 1167 public: | 1154 public: |
| 1168 explicit DeferredIncrementRegister(intptr_t reg) | 1155 explicit DeferredIncrementRegister(intptr_t reg) |
| 1169 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { } | 1156 : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {} |
| 1170 }; | 1157 }; |
| 1171 | 1158 |
| 1172 Trace() | 1159 Trace() |
| 1173 : cp_offset_(0), | 1160 : cp_offset_(0), |
| 1174 actions_(NULL), | 1161 actions_(NULL), |
| 1175 backtrack_(NULL), | 1162 backtrack_(NULL), |
| 1176 stop_node_(NULL), | 1163 stop_node_(NULL), |
| 1177 loop_label_(NULL), | 1164 loop_label_(NULL), |
| 1178 characters_preloaded_(0), | 1165 characters_preloaded_(0), |
| 1179 bound_checked_up_to_(0), | 1166 bound_checked_up_to_(0), |
| 1180 flush_budget_(100), | 1167 flush_budget_(100), |
| 1181 at_start_(UNKNOWN) { } | 1168 at_start_(UNKNOWN) {} |
| 1182 | 1169 |
| 1183 // End the trace. This involves flushing the deferred actions in the trace | 1170 // End the trace. This involves flushing the deferred actions in the trace |
| 1184 // and pushing a backtrack location onto the backtrack stack. Once this is | 1171 // and pushing a backtrack location onto the backtrack stack. Once this is |
| 1185 // done we can start a new trace or go to one that has already been | 1172 // done we can start a new trace or go to one that has already been |
| 1186 // generated. | 1173 // generated. |
| 1187 void Flush(RegExpCompiler* compiler, RegExpNode* successor); | 1174 void Flush(RegExpCompiler* compiler, RegExpNode* successor); |
| 1188 intptr_t cp_offset() { return cp_offset_; } | 1175 intptr_t cp_offset() { return cp_offset_; } |
| 1189 DeferredAction* actions() { return actions_; } | 1176 DeferredAction* actions() { return actions_; } |
| 1190 // A trivial trace is one that has no deferred actions or other state that | 1177 // A trivial trace is one that has no deferred actions or other state that |
| 1191 // affects the assumptions used when generating code. There is no recorded | 1178 // affects the assumptions used when generating code. There is no recorded |
| 1192 // backtrack location in a trivial trace, so with a trivial trace we will | 1179 // backtrack location in a trivial trace, so with a trivial trace we will |
| 1193 // generate code that, on a failure to match, gets the backtrack location | 1180 // generate code that, on a failure to match, gets the backtrack location |
| 1194 // from the backtrack stack rather than using a direct jump instruction. We | 1181 // from the backtrack stack rather than using a direct jump instruction. We |
| 1195 // always start code generation with a trivial trace and non-trivial traces | 1182 // always start code generation with a trivial trace and non-trivial traces |
| 1196 // are created as we emit code for nodes or add to the list of deferred | 1183 // are created as we emit code for nodes or add to the list of deferred |
| 1197 // actions in the trace. The location of the code generated for a node using | 1184 // actions in the trace. The location of the code generated for a node using |
| 1198 // a trivial trace is recorded in a label in the node so that gotos can be | 1185 // a trivial trace is recorded in a label in the node so that gotos can be |
| 1199 // generated to that code. | 1186 // generated to that code. |
| 1200 bool is_trivial() { | 1187 bool is_trivial() { |
| 1201 return backtrack_ == NULL && | 1188 return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0 && |
| 1202 actions_ == NULL && | 1189 characters_preloaded_ == 0 && bound_checked_up_to_ == 0 && |
| 1203 cp_offset_ == 0 && | 1190 quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN; |
| 1204 characters_preloaded_ == 0 && | |
| 1205 bound_checked_up_to_ == 0 && | |
| 1206 quick_check_performed_.characters() == 0 && | |
| 1207 at_start_ == UNKNOWN; | |
| 1208 } | 1191 } |
| 1209 TriBool at_start() { return at_start_; } | 1192 TriBool at_start() { return at_start_; } |
| 1210 void set_at_start(bool at_start) { | 1193 void set_at_start(bool at_start) { |
| 1211 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; | 1194 at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; |
| 1212 } | 1195 } |
| 1213 BlockLabel* backtrack() { return backtrack_; } | 1196 BlockLabel* backtrack() { return backtrack_; } |
| 1214 BlockLabel* loop_label() { return loop_label_; } | 1197 BlockLabel* loop_label() { return loop_label_; } |
| 1215 RegExpNode* stop_node() { return stop_node_; } | 1198 RegExpNode* stop_node() { return stop_node_; } |
| 1216 intptr_t characters_preloaded() { return characters_preloaded_; } | 1199 intptr_t characters_preloaded() { return characters_preloaded_; } |
| 1217 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } | 1200 intptr_t bound_checked_up_to() { return bound_checked_up_to_; } |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1282 Trace counter_backtrack_trace_; | 1265 Trace counter_backtrack_trace_; |
| 1283 }; | 1266 }; |
| 1284 | 1267 |
| 1285 | 1268 |
| 1286 struct PreloadState { | 1269 struct PreloadState { |
| 1287 static const intptr_t kEatsAtLeastNotYetInitialized = -1; | 1270 static const intptr_t kEatsAtLeastNotYetInitialized = -1; |
| 1288 bool preload_is_current_; | 1271 bool preload_is_current_; |
| 1289 bool preload_has_checked_bounds_; | 1272 bool preload_has_checked_bounds_; |
| 1290 intptr_t preload_characters_; | 1273 intptr_t preload_characters_; |
| 1291 intptr_t eats_at_least_; | 1274 intptr_t eats_at_least_; |
| 1292 void init() { | 1275 void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; } |
| 1293 eats_at_least_ = kEatsAtLeastNotYetInitialized; | |
| 1294 } | |
| 1295 | 1276 |
| 1296 DISALLOW_ALLOCATION(); | 1277 DISALLOW_ALLOCATION(); |
| 1297 }; | 1278 }; |
| 1298 | 1279 |
| 1299 | 1280 |
| 1300 class NodeVisitor : public ValueObject { | 1281 class NodeVisitor : public ValueObject { |
| 1301 public: | 1282 public: |
| 1302 virtual ~NodeVisitor() { } | 1283 virtual ~NodeVisitor() {} |
| 1303 #define DECLARE_VISIT(Type) \ | 1284 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0; |
| 1304 virtual void Visit##Type(Type##Node* that) = 0; | 1285 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1305 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| 1306 #undef DECLARE_VISIT | 1286 #undef DECLARE_VISIT |
| 1307 virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } | 1287 virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); } |
| 1308 }; | 1288 }; |
| 1309 | 1289 |
| 1310 | 1290 |
| 1311 // Assertion propagation moves information about assertions such as | 1291 // Assertion propagation moves information about assertions such as |
| 1312 // \b to the affected nodes. For instance, in /.\b./ information must | 1292 // \b to the affected nodes. For instance, in /.\b./ information must |
| 1313 // be propagated to the first '.' that whatever follows needs to know | 1293 // be propagated to the first '.' that whatever follows needs to know |
| 1314 // if it matched a word or a non-word, and to the second '.' that it | 1294 // if it matched a word or a non-word, and to the second '.' that it |
| 1315 // has to check if it succeeds a word or non-word. In this case the | 1295 // has to check if it succeeds a word or non-word. In this case the |
| 1316 // result will be something like: | 1296 // result will be something like: |
| 1317 // | 1297 // |
| 1318 // +-------+ +------------+ | 1298 // +-------+ +------------+ |
| 1319 // | . | | . | | 1299 // | . | | . | |
| 1320 // +-------+ ---> +------------+ | 1300 // +-------+ ---> +------------+ |
| 1321 // | word? | | check word | | 1301 // | word? | | check word | |
| 1322 // +-------+ +------------+ | 1302 // +-------+ +------------+ |
| 1323 class Analysis: public NodeVisitor { | 1303 class Analysis : public NodeVisitor { |
| 1324 public: | 1304 public: |
| 1325 Analysis(bool ignore_case, bool is_one_byte) | 1305 Analysis(bool ignore_case, bool is_one_byte) |
| 1326 : ignore_case_(ignore_case), | 1306 : ignore_case_(ignore_case), |
| 1327 is_one_byte_(is_one_byte), | 1307 is_one_byte_(is_one_byte), |
| 1328 error_message_(NULL) { } | 1308 error_message_(NULL) {} |
| 1329 void EnsureAnalyzed(RegExpNode* node); | 1309 void EnsureAnalyzed(RegExpNode* node); |
| 1330 | 1310 |
| 1331 #define DECLARE_VISIT(Type) \ | 1311 #define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that); |
| 1332 virtual void Visit##Type(Type##Node* that); | 1312 FOR_EACH_NODE_TYPE(DECLARE_VISIT) |
| 1333 FOR_EACH_NODE_TYPE(DECLARE_VISIT) | |
| 1334 #undef DECLARE_VISIT | 1313 #undef DECLARE_VISIT |
| 1335 virtual void VisitLoopChoice(LoopChoiceNode* that); | 1314 virtual void VisitLoopChoice(LoopChoiceNode* that); |
| 1336 | 1315 |
| 1337 bool has_failed() { return error_message_ != NULL; } | 1316 bool has_failed() { return error_message_ != NULL; } |
| 1338 const char* error_message() { | 1317 const char* error_message() { |
| 1339 ASSERT(error_message_ != NULL); | 1318 ASSERT(error_message_ != NULL); |
| 1340 return error_message_; | 1319 return error_message_; |
| 1341 } | 1320 } |
| 1342 void fail(const char* error_message) { | 1321 void fail(const char* error_message) { error_message_ = error_message; } |
| 1343 error_message_ = error_message; | |
| 1344 } | |
| 1345 | 1322 |
| 1346 private: | 1323 private: |
| 1347 bool ignore_case_; | 1324 bool ignore_case_; |
| 1348 bool is_one_byte_; | 1325 bool is_one_byte_; |
| 1349 const char* error_message_; | 1326 const char* error_message_; |
| 1350 | 1327 |
| 1351 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); | 1328 DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis); |
| 1352 }; | 1329 }; |
| 1353 | 1330 |
| 1354 | 1331 |
| 1355 struct RegExpCompileData : public ZoneAllocated { | 1332 struct RegExpCompileData : public ZoneAllocated { |
| 1356 RegExpCompileData() | 1333 RegExpCompileData() |
| 1357 : tree(NULL), | 1334 : tree(NULL), |
| 1358 node(NULL), | 1335 node(NULL), |
| 1359 simple(true), | 1336 simple(true), |
| 1360 contains_anchor(false), | 1337 contains_anchor(false), |
| 1361 error(String::Handle(String::null())), | 1338 error(String::Handle(String::null())), |
| 1362 capture_count(0) { } | 1339 capture_count(0) {} |
| 1363 RegExpTree* tree; | 1340 RegExpTree* tree; |
| 1364 RegExpNode* node; | 1341 RegExpNode* node; |
| 1365 bool simple; | 1342 bool simple; |
| 1366 bool contains_anchor; | 1343 bool contains_anchor; |
| 1367 String& error; | 1344 String& error; |
| 1368 intptr_t capture_count; | 1345 intptr_t capture_count; |
| 1369 }; | 1346 }; |
| 1370 | 1347 |
| 1371 | 1348 |
| 1372 class RegExpEngine: public AllStatic { | 1349 class RegExpEngine : public AllStatic { |
| 1373 public: | 1350 public: |
| 1374 struct CompilationResult { | 1351 struct CompilationResult { |
| 1375 explicit CompilationResult(const char* error_message) | 1352 explicit CompilationResult(const char* error_message) |
| 1376 : backtrack_goto(NULL), | 1353 : backtrack_goto(NULL), |
| 1377 graph_entry(NULL), | 1354 graph_entry(NULL), |
| 1378 num_blocks(-1), | 1355 num_blocks(-1), |
| 1379 num_stack_locals(-1), | 1356 num_stack_locals(-1), |
| 1380 error_message(error_message), | 1357 error_message(error_message), |
| 1381 bytecode(NULL), | 1358 bytecode(NULL), |
| 1382 num_registers(-1) {} | 1359 num_registers(-1) {} |
| 1383 | 1360 |
| 1384 CompilationResult(TypedData* bytecode, intptr_t num_registers) | 1361 CompilationResult(TypedData* bytecode, intptr_t num_registers) |
| 1385 : backtrack_goto(NULL), | 1362 : backtrack_goto(NULL), |
| 1386 graph_entry(NULL), | 1363 graph_entry(NULL), |
| 1387 num_blocks(-1), | 1364 num_blocks(-1), |
| 1388 num_stack_locals(-1), | 1365 num_stack_locals(-1), |
| 1389 error_message(NULL), | 1366 error_message(NULL), |
| 1390 bytecode(bytecode), | 1367 bytecode(bytecode), |
| 1391 num_registers(num_registers) {} | 1368 num_registers(num_registers) {} |
| 1392 | 1369 |
| 1393 CompilationResult(IndirectGotoInstr* backtrack_goto, | 1370 CompilationResult(IndirectGotoInstr* backtrack_goto, |
| 1394 GraphEntryInstr* graph_entry, | 1371 GraphEntryInstr* graph_entry, |
| 1395 intptr_t num_blocks, | 1372 intptr_t num_blocks, |
| 1396 intptr_t num_stack_locals, | 1373 intptr_t num_stack_locals, |
| 1397 intptr_t num_registers) | 1374 intptr_t num_registers) |
| 1398 : backtrack_goto(backtrack_goto), | 1375 : backtrack_goto(backtrack_goto), |
| 1399 graph_entry(graph_entry), | 1376 graph_entry(graph_entry), |
| 1400 num_blocks(num_blocks), | 1377 num_blocks(num_blocks), |
| 1401 num_stack_locals(num_stack_locals), | 1378 num_stack_locals(num_stack_locals), |
| 1402 error_message(NULL), | 1379 error_message(NULL), |
| 1403 bytecode(NULL) {} | 1380 bytecode(NULL) {} |
| 1404 | 1381 |
| 1405 IndirectGotoInstr* backtrack_goto; | 1382 IndirectGotoInstr* backtrack_goto; |
| 1406 GraphEntryInstr* graph_entry; | 1383 GraphEntryInstr* graph_entry; |
| 1407 const intptr_t num_blocks; | 1384 const intptr_t num_blocks; |
| 1408 const intptr_t num_stack_locals; | 1385 const intptr_t num_stack_locals; |
| 1409 | 1386 |
| 1410 const char* error_message; | 1387 const char* error_message; |
| 1411 | 1388 |
| 1412 TypedData* bytecode; | 1389 TypedData* bytecode; |
| 1413 intptr_t num_registers; | 1390 intptr_t num_registers; |
| 1414 }; | 1391 }; |
| 1415 | 1392 |
| 1416 static CompilationResult CompileIR( | 1393 static CompilationResult CompileIR( |
| 1417 RegExpCompileData* input, | 1394 RegExpCompileData* input, |
| 1418 const ParsedFunction* parsed_function, | 1395 const ParsedFunction* parsed_function, |
| 1419 const ZoneGrowableArray<const ICData*>& ic_data_array); | 1396 const ZoneGrowableArray<const ICData*>& ic_data_array); |
| 1420 | 1397 |
| 1421 static CompilationResult CompileBytecode( | 1398 static CompilationResult CompileBytecode(RegExpCompileData* data, |
| 1422 RegExpCompileData* data, | 1399 const RegExp& regexp, |
| 1423 const RegExp& regexp, | 1400 bool is_one_byte, |
| 1424 bool is_one_byte, | 1401 Zone* zone); |
| 1425 Zone* zone); | |
| 1426 | 1402 |
| 1427 static RawRegExp* CreateRegExp( | 1403 static RawRegExp* CreateRegExp(Thread* thread, |
| 1428 Thread* thread, | 1404 const String& pattern, |
| 1429 const String& pattern, | 1405 bool multi_line, |
| 1430 bool multi_line, | 1406 bool ignore_case); |
| 1431 bool ignore_case); | |
| 1432 | 1407 |
| 1433 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); | 1408 static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); |
| 1434 }; | 1409 }; |
| 1435 | 1410 |
| 1436 } // namespace dart | 1411 } // namespace dart |
| 1437 | 1412 |
| 1438 #endif // RUNTIME_VM_REGEXP_H_ | 1413 #endif // RUNTIME_VM_REGEXP_H_ |
| OLD | NEW |