| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef VM_REGEXP_ASSEMBLER_H_ | 5 #ifndef VM_REGEXP_ASSEMBLER_IR_H_ |
| 6 #define VM_REGEXP_ASSEMBLER_H_ | 6 #define VM_REGEXP_ASSEMBLER_IR_H_ |
| 7 | 7 |
| 8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
| 9 #include "vm/intermediate_language.h" | 9 #include "vm/intermediate_language.h" |
| 10 #include "vm/object.h" | 10 #include "vm/object.h" |
| 11 #include "vm/regexp_assembler.h" |
| 11 | 12 |
| 12 namespace dart { | 13 namespace dart { |
| 13 | 14 |
| 14 // Utility function for the DotPrinter | |
| 15 void PrintUtf16(uint16_t c); | |
| 16 | |
| 17 | |
| 18 /// Convenience wrapper around a BlockEntryInstr pointer. | |
| 19 class BlockLabel : public ValueObject { | |
| 20 public: | |
| 21 BlockLabel() | |
| 22 : block_(new JoinEntryInstr(-1, -1)), | |
| 23 is_bound_(false), | |
| 24 is_linked_(false) { } | |
| 25 | |
| 26 BlockLabel(const BlockLabel& that) | |
| 27 : ValueObject(), | |
| 28 block_(that.block_), | |
| 29 is_bound_(that.is_bound_), | |
| 30 is_linked_(that.is_linked_) { } | |
| 31 | |
| 32 BlockLabel& operator=(const BlockLabel& that) { | |
| 33 block_ = that.block_; | |
| 34 is_bound_ = that.is_bound_; | |
| 35 is_linked_ = that.is_linked_; | |
| 36 return *this; | |
| 37 } | |
| 38 | |
| 39 JoinEntryInstr* block() const { return block_; } | |
| 40 | |
| 41 bool IsBound() const { return is_bound_; } | |
| 42 void SetBound(intptr_t block_id) { | |
| 43 ASSERT(!is_bound_); | |
| 44 block_->set_block_id(block_id); | |
| 45 is_bound_ = true; | |
| 46 } | |
| 47 | |
| 48 bool IsLinked() const { return !is_bound_ && is_linked_; } | |
| 49 void SetLinked() { | |
| 50 is_linked_ = true; | |
| 51 } | |
| 52 | |
| 53 intptr_t Position() const { | |
| 54 ASSERT(IsBound()); | |
| 55 return block_->block_id(); | |
| 56 } | |
| 57 | |
| 58 private: | |
| 59 JoinEntryInstr* block_; | |
| 60 | |
| 61 bool is_bound_; | |
| 62 bool is_linked_; | |
| 63 }; | |
| 64 | |
| 65 | |
| 66 class RegExpMacroAssembler : public ZoneAllocated { | |
| 67 public: | |
| 68 // The implementation must be able to handle at least: | |
| 69 static const intptr_t kMaxRegister = (1 << 16) - 1; | |
| 70 static const intptr_t kMaxCPOffset = (1 << 15) - 1; | |
| 71 static const intptr_t kMinCPOffset = -(1 << 15); | |
| 72 | |
| 73 static const intptr_t kTableSizeBits = 7; | |
| 74 static const intptr_t kTableSize = 1 << kTableSizeBits; | |
| 75 static const intptr_t kTableMask = kTableSize - 1; | |
| 76 | |
| 77 enum { | |
| 78 kParamStringIndex = 0, | |
| 79 kParamStartOffsetIndex, | |
| 80 kParamCount | |
| 81 }; | |
| 82 | |
| 83 enum IrregexpImplementation { | |
| 84 kIRImplementation | |
| 85 }; | |
| 86 | |
| 87 explicit RegExpMacroAssembler(Zone* zone); | |
| 88 virtual ~RegExpMacroAssembler(); | |
| 89 // The maximal number of pushes between stack checks. Users must supply | |
| 90 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck) | |
| 91 // at least once for every stack_limit() pushes that are executed. | |
| 92 virtual intptr_t stack_limit_slack() = 0; | |
| 93 virtual bool CanReadUnaligned() = 0; | |
| 94 virtual void AdvanceCurrentPosition(intptr_t by) = 0; // Signed cp change. | |
| 95 virtual void AdvanceRegister(intptr_t reg, intptr_t by) = 0; // r[reg] += by. | |
| 96 // Continues execution from the position pushed on the top of the backtrack | |
| 97 // stack by an earlier PushBacktrack(BlockLabel*). | |
| 98 virtual void Backtrack() = 0; | |
| 99 virtual void BindBlock(BlockLabel* label) = 0; | |
| 100 virtual void CheckAtStart(BlockLabel* on_at_start) = 0; | |
| 101 // Dispatch after looking the current character up in a 2-bits-per-entry | |
| 102 // map. The destinations vector has up to 4 labels. | |
| 103 virtual void CheckCharacter(unsigned c, BlockLabel* on_equal) = 0; | |
| 104 // Bitwise and the current character with the given constant and then | |
| 105 // check for a match with c. | |
| 106 virtual void CheckCharacterAfterAnd(unsigned c, | |
| 107 unsigned and_with, | |
| 108 BlockLabel* on_equal) = 0; | |
| 109 virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater) = 0; | |
| 110 virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less) = 0; | |
| 111 virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position) = 0; | |
| 112 virtual void CheckNotAtStart(BlockLabel* on_not_at_start) = 0; | |
| 113 virtual void CheckNotBackReference( | |
| 114 intptr_t start_reg, BlockLabel* on_no_match) = 0; | |
| 115 virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg, | |
| 116 BlockLabel* on_no_match) = 0; | |
| 117 // Check the current character for a match with a literal character. If we | |
| 118 // fail to match then goto the on_failure label. End of input always | |
| 119 // matches. If the label is NULL then we should pop a backtrack address off | |
| 120 // the stack and go to that. | |
| 121 virtual void CheckNotCharacter(unsigned c, BlockLabel* on_not_equal) = 0; | |
| 122 virtual void CheckNotCharacterAfterAnd(unsigned c, | |
| 123 unsigned and_with, | |
| 124 BlockLabel* on_not_equal) = 0; | |
| 125 // Subtract a constant from the current character, then and with the given | |
| 126 // constant and then check for a match with c. | |
| 127 virtual void CheckNotCharacterAfterMinusAnd(uint16_t c, | |
| 128 uint16_t minus, | |
| 129 uint16_t and_with, | |
| 130 BlockLabel* on_not_equal) = 0; | |
| 131 virtual void CheckCharacterInRange(uint16_t from, | |
| 132 uint16_t to, // Both inclusive. | |
| 133 BlockLabel* on_in_range) = 0; | |
| 134 virtual void CheckCharacterNotInRange(uint16_t from, | |
| 135 uint16_t to, // Both inclusive. | |
| 136 BlockLabel* on_not_in_range) = 0; | |
| 137 | |
| 138 // The current character (modulus the kTableSize) is looked up in the byte | |
| 139 // array, and if the found byte is non-zero, we jump to the on_bit_set label. | |
| 140 virtual void CheckBitInTable(const TypedData& table, | |
| 141 BlockLabel* on_bit_set) = 0; | |
| 142 | |
| 143 // Checks whether the given offset from the current position is before | |
| 144 // the end of the string. May overwrite the current character. | |
| 145 virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input) { | |
| 146 LoadCurrentCharacter(cp_offset, on_outside_input, true); | |
| 147 } | |
| 148 // Check whether a standard/default character class matches the current | |
| 149 // character. Returns false if the type of special character class does | |
| 150 // not have custom support. | |
| 151 // May clobber the current loaded character. | |
| 152 virtual bool CheckSpecialCharacterClass(uint16_t type, | |
| 153 BlockLabel* on_no_match) { | |
| 154 return false; | |
| 155 } | |
| 156 virtual void Fail() = 0; | |
| 157 // Check whether a register is >= a given constant and go to a label if it | |
| 158 // is. Backtracks instead if the label is NULL. | |
| 159 virtual void IfRegisterGE( | |
| 160 intptr_t reg, intptr_t comparand, BlockLabel* if_ge) = 0; | |
| 161 // Check whether a register is < a given constant and go to a label if it is. | |
| 162 // Backtracks instead if the label is NULL. | |
| 163 virtual void IfRegisterLT( | |
| 164 intptr_t reg, intptr_t comparand, BlockLabel* if_lt) = 0; | |
| 165 // Check whether a register is == to the current position and go to a | |
| 166 // label if it is. | |
| 167 virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) = 0; | |
| 168 virtual IrregexpImplementation Implementation() = 0; | |
| 169 // The assembler is closed, iff there is no current instruction assigned. | |
| 170 virtual bool IsClosed() const = 0; | |
| 171 // Jump to the target label without setting it as the current instruction. | |
| 172 virtual void GoTo(BlockLabel* to) = 0; | |
| 173 virtual void LoadCurrentCharacter(intptr_t cp_offset, | |
| 174 BlockLabel* on_end_of_input, | |
| 175 bool check_bounds = true, | |
| 176 intptr_t characters = 1) = 0; | |
| 177 virtual void PopCurrentPosition() = 0; | |
| 178 virtual void PopRegister(intptr_t register_index) = 0; | |
| 179 // Prints string within the generated code. Used for debugging. | |
| 180 virtual void Print(const char* str) = 0; | |
| 181 // Prints all emitted blocks. | |
| 182 virtual void PrintBlocks() = 0; | |
| 183 // Pushes the label on the backtrack stack, so that a following Backtrack | |
| 184 // will go to this label. Always checks the backtrack stack limit. | |
| 185 virtual void PushBacktrack(BlockLabel* label) = 0; | |
| 186 virtual void PushCurrentPosition() = 0; | |
| 187 virtual void PushRegister(intptr_t register_index) = 0; | |
| 188 virtual void ReadCurrentPositionFromRegister(intptr_t reg) = 0; | |
| 189 virtual void ReadStackPointerFromRegister(intptr_t reg) = 0; | |
| 190 virtual void SetCurrentPositionFromEnd(intptr_t by) = 0; | |
| 191 virtual void SetRegister(intptr_t register_index, intptr_t to) = 0; | |
| 192 // Return whether the matching (with a global regexp) will be restarted. | |
| 193 virtual bool Succeed() = 0; | |
| 194 virtual void WriteCurrentPositionToRegister( | |
| 195 intptr_t reg, intptr_t cp_offset) = 0; | |
| 196 virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0; | |
| 197 virtual void WriteStackPointerToRegister(intptr_t reg) = 0; | |
| 198 | |
| 199 // Controls the generation of large inlined constants in the code. | |
| 200 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } | |
| 201 bool slow_safe() { return slow_safe_compiler_; } | |
| 202 | |
| 203 enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK }; | |
| 204 // Set whether the regular expression has the global flag. Exiting due to | |
| 205 // a failure in a global regexp may still mean success overall. | |
| 206 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; } | |
| 207 inline bool global() { return global_mode_ != NOT_GLOBAL; } | |
| 208 inline bool global_with_zero_length_check() { | |
| 209 return global_mode_ == GLOBAL; | |
| 210 } | |
| 211 | |
| 212 Zone* zone() const { return zone_; } | |
| 213 | |
| 214 private: | |
| 215 bool slow_safe_compiler_; | |
| 216 bool global_mode_; | |
| 217 Zone* zone_; | |
| 218 }; | |
| 219 | |
| 220 | |
| 221 class IRRegExpMacroAssembler : public RegExpMacroAssembler { | 15 class IRRegExpMacroAssembler : public RegExpMacroAssembler { |
| 222 public: | 16 public: |
| 223 // Type of input string to generate code for. | 17 // Type of input string to generate code for. |
| 224 enum Mode { ASCII = 1, UC16 = 2 }; | 18 enum Mode { ASCII = 1, UC16 = 2 }; |
| 225 | 19 |
| 226 // Result of calling generated native RegExp code. | 20 // Result of calling generated native RegExp code. |
| 227 // RETRY: Something significant changed during execution, and the matching | 21 // RETRY: Something significant changed during execution, and the matching |
| 228 // should be retried from scratch. | 22 // should be retried from scratch. |
| 229 // EXCEPTION: Something failed during execution. If no exception has been | 23 // EXCEPTION: Something failed during execution. If no exception has been |
| 230 // thrown, it's an internal out-of-memory, and the caller should | 24 // thrown, it's an internal out-of-memory, and the caller should |
| (...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 639 IdAllocator block_id_; | 433 IdAllocator block_id_; |
| 640 IdAllocator temp_id_; | 434 IdAllocator temp_id_; |
| 641 IdAllocator arg_id_; | 435 IdAllocator arg_id_; |
| 642 IdAllocator local_id_; | 436 IdAllocator local_id_; |
| 643 IdAllocator indirect_id_; | 437 IdAllocator indirect_id_; |
| 644 }; | 438 }; |
| 645 | 439 |
| 646 | 440 |
| 647 } // namespace dart | 441 } // namespace dart |
| 648 | 442 |
| 649 #endif // VM_REGEXP_ASSEMBLER_H_ | 443 #endif // VM_REGEXP_ASSEMBLER_IR_H_ |
| OLD | NEW |