OLD | NEW |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #ifndef VM_REGEXP_ASSEMBLER_H_ | 5 #ifndef VM_REGEXP_ASSEMBLER_IR_H_ |
6 #define VM_REGEXP_ASSEMBLER_H_ | 6 #define VM_REGEXP_ASSEMBLER_IR_H_ |
7 | 7 |
8 #include "vm/assembler.h" | 8 #include "vm/assembler.h" |
9 #include "vm/intermediate_language.h" | 9 #include "vm/intermediate_language.h" |
10 #include "vm/object.h" | 10 #include "vm/object.h" |
| 11 #include "vm/regexp_assembler.h" |
11 | 12 |
12 namespace dart { | 13 namespace dart { |
13 | 14 |
14 // Utility function for the DotPrinter | |
15 void PrintUtf16(uint16_t c); | |
16 | |
17 | |
18 /// Convenience wrapper around a BlockEntryInstr pointer. | |
19 class BlockLabel : public ValueObject { | |
20 public: | |
21 BlockLabel() | |
22 : block_(new JoinEntryInstr(-1, -1)), | |
23 is_bound_(false), | |
24 is_linked_(false) { } | |
25 | |
26 BlockLabel(const BlockLabel& that) | |
27 : ValueObject(), | |
28 block_(that.block_), | |
29 is_bound_(that.is_bound_), | |
30 is_linked_(that.is_linked_) { } | |
31 | |
32 BlockLabel& operator=(const BlockLabel& that) { | |
33 block_ = that.block_; | |
34 is_bound_ = that.is_bound_; | |
35 is_linked_ = that.is_linked_; | |
36 return *this; | |
37 } | |
38 | |
39 JoinEntryInstr* block() const { return block_; } | |
40 | |
41 bool IsBound() const { return is_bound_; } | |
42 void SetBound(intptr_t block_id) { | |
43 ASSERT(!is_bound_); | |
44 block_->set_block_id(block_id); | |
45 is_bound_ = true; | |
46 } | |
47 | |
48 bool IsLinked() const { return !is_bound_ && is_linked_; } | |
49 void SetLinked() { | |
50 is_linked_ = true; | |
51 } | |
52 | |
53 intptr_t Position() const { | |
54 ASSERT(IsBound()); | |
55 return block_->block_id(); | |
56 } | |
57 | |
58 private: | |
59 JoinEntryInstr* block_; | |
60 | |
61 bool is_bound_; | |
62 bool is_linked_; | |
63 }; | |
64 | |
65 | |
66 class RegExpMacroAssembler : public ZoneAllocated { | |
67 public: | |
68 // The implementation must be able to handle at least: | |
69 static const intptr_t kMaxRegister = (1 << 16) - 1; | |
70 static const intptr_t kMaxCPOffset = (1 << 15) - 1; | |
71 static const intptr_t kMinCPOffset = -(1 << 15); | |
72 | |
73 static const intptr_t kTableSizeBits = 7; | |
74 static const intptr_t kTableSize = 1 << kTableSizeBits; | |
75 static const intptr_t kTableMask = kTableSize - 1; | |
76 | |
77 enum { | |
78 kParamStringIndex = 0, | |
79 kParamStartOffsetIndex, | |
80 kParamCount | |
81 }; | |
82 | |
83 enum IrregexpImplementation { | |
84 kIRImplementation | |
85 }; | |
86 | |
87 explicit RegExpMacroAssembler(Zone* zone); | |
88 virtual ~RegExpMacroAssembler(); | |
89 // The maximal number of pushes between stack checks. Users must supply | |
90 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck) | |
91 // at least once for every stack_limit() pushes that are executed. | |
92 virtual intptr_t stack_limit_slack() = 0; | |
93 virtual bool CanReadUnaligned() = 0; | |
94 virtual void AdvanceCurrentPosition(intptr_t by) = 0; // Signed cp change. | |
95 virtual void AdvanceRegister(intptr_t reg, intptr_t by) = 0; // r[reg] += by. | |
96 // Continues execution from the position pushed on the top of the backtrack | |
97 // stack by an earlier PushBacktrack(BlockLabel*). | |
98 virtual void Backtrack() = 0; | |
99 virtual void BindBlock(BlockLabel* label) = 0; | |
100 virtual void CheckAtStart(BlockLabel* on_at_start) = 0; | |
101 // Dispatch after looking the current character up in a 2-bits-per-entry | |
102 // map. The destinations vector has up to 4 labels. | |
103 virtual void CheckCharacter(unsigned c, BlockLabel* on_equal) = 0; | |
104 // Bitwise and the current character with the given constant and then | |
105 // check for a match with c. | |
106 virtual void CheckCharacterAfterAnd(unsigned c, | |
107 unsigned and_with, | |
108 BlockLabel* on_equal) = 0; | |
109 virtual void CheckCharacterGT(uint16_t limit, BlockLabel* on_greater) = 0; | |
110 virtual void CheckCharacterLT(uint16_t limit, BlockLabel* on_less) = 0; | |
111 virtual void CheckGreedyLoop(BlockLabel* on_tos_equals_current_position) = 0; | |
112 virtual void CheckNotAtStart(BlockLabel* on_not_at_start) = 0; | |
113 virtual void CheckNotBackReference( | |
114 intptr_t start_reg, BlockLabel* on_no_match) = 0; | |
115 virtual void CheckNotBackReferenceIgnoreCase(intptr_t start_reg, | |
116 BlockLabel* on_no_match) = 0; | |
117 // Check the current character for a match with a literal character. If we | |
118 // fail to match then goto the on_failure label. End of input always | |
119 // matches. If the label is NULL then we should pop a backtrack address off | |
120 // the stack and go to that. | |
121 virtual void CheckNotCharacter(unsigned c, BlockLabel* on_not_equal) = 0; | |
122 virtual void CheckNotCharacterAfterAnd(unsigned c, | |
123 unsigned and_with, | |
124 BlockLabel* on_not_equal) = 0; | |
125 // Subtract a constant from the current character, then and with the given | |
126 // constant and then check for a match with c. | |
127 virtual void CheckNotCharacterAfterMinusAnd(uint16_t c, | |
128 uint16_t minus, | |
129 uint16_t and_with, | |
130 BlockLabel* on_not_equal) = 0; | |
131 virtual void CheckCharacterInRange(uint16_t from, | |
132 uint16_t to, // Both inclusive. | |
133 BlockLabel* on_in_range) = 0; | |
134 virtual void CheckCharacterNotInRange(uint16_t from, | |
135 uint16_t to, // Both inclusive. | |
136 BlockLabel* on_not_in_range) = 0; | |
137 | |
138 // The current character (modulus the kTableSize) is looked up in the byte | |
139 // array, and if the found byte is non-zero, we jump to the on_bit_set label. | |
140 virtual void CheckBitInTable(const TypedData& table, | |
141 BlockLabel* on_bit_set) = 0; | |
142 | |
143 // Checks whether the given offset from the current position is before | |
144 // the end of the string. May overwrite the current character. | |
145 virtual void CheckPosition(intptr_t cp_offset, BlockLabel* on_outside_input) { | |
146 LoadCurrentCharacter(cp_offset, on_outside_input, true); | |
147 } | |
148 // Check whether a standard/default character class matches the current | |
149 // character. Returns false if the type of special character class does | |
150 // not have custom support. | |
151 // May clobber the current loaded character. | |
152 virtual bool CheckSpecialCharacterClass(uint16_t type, | |
153 BlockLabel* on_no_match) { | |
154 return false; | |
155 } | |
156 virtual void Fail() = 0; | |
157 // Check whether a register is >= a given constant and go to a label if it | |
158 // is. Backtracks instead if the label is NULL. | |
159 virtual void IfRegisterGE( | |
160 intptr_t reg, intptr_t comparand, BlockLabel* if_ge) = 0; | |
161 // Check whether a register is < a given constant and go to a label if it is. | |
162 // Backtracks instead if the label is NULL. | |
163 virtual void IfRegisterLT( | |
164 intptr_t reg, intptr_t comparand, BlockLabel* if_lt) = 0; | |
165 // Check whether a register is == to the current position and go to a | |
166 // label if it is. | |
167 virtual void IfRegisterEqPos(intptr_t reg, BlockLabel* if_eq) = 0; | |
168 virtual IrregexpImplementation Implementation() = 0; | |
169 // The assembler is closed, iff there is no current instruction assigned. | |
170 virtual bool IsClosed() const = 0; | |
171 // Jump to the target label without setting it as the current instruction. | |
172 virtual void GoTo(BlockLabel* to) = 0; | |
173 virtual void LoadCurrentCharacter(intptr_t cp_offset, | |
174 BlockLabel* on_end_of_input, | |
175 bool check_bounds = true, | |
176 intptr_t characters = 1) = 0; | |
177 virtual void PopCurrentPosition() = 0; | |
178 virtual void PopRegister(intptr_t register_index) = 0; | |
179 // Prints string within the generated code. Used for debugging. | |
180 virtual void Print(const char* str) = 0; | |
181 // Prints all emitted blocks. | |
182 virtual void PrintBlocks() = 0; | |
183 // Pushes the label on the backtrack stack, so that a following Backtrack | |
184 // will go to this label. Always checks the backtrack stack limit. | |
185 virtual void PushBacktrack(BlockLabel* label) = 0; | |
186 virtual void PushCurrentPosition() = 0; | |
187 virtual void PushRegister(intptr_t register_index) = 0; | |
188 virtual void ReadCurrentPositionFromRegister(intptr_t reg) = 0; | |
189 virtual void ReadStackPointerFromRegister(intptr_t reg) = 0; | |
190 virtual void SetCurrentPositionFromEnd(intptr_t by) = 0; | |
191 virtual void SetRegister(intptr_t register_index, intptr_t to) = 0; | |
192 // Return whether the matching (with a global regexp) will be restarted. | |
193 virtual bool Succeed() = 0; | |
194 virtual void WriteCurrentPositionToRegister( | |
195 intptr_t reg, intptr_t cp_offset) = 0; | |
196 virtual void ClearRegisters(intptr_t reg_from, intptr_t reg_to) = 0; | |
197 virtual void WriteStackPointerToRegister(intptr_t reg) = 0; | |
198 | |
199 // Controls the generation of large inlined constants in the code. | |
200 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } | |
201 bool slow_safe() { return slow_safe_compiler_; } | |
202 | |
203 enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK }; | |
204 // Set whether the regular expression has the global flag. Exiting due to | |
205 // a failure in a global regexp may still mean success overall. | |
206 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; } | |
207 inline bool global() { return global_mode_ != NOT_GLOBAL; } | |
208 inline bool global_with_zero_length_check() { | |
209 return global_mode_ == GLOBAL; | |
210 } | |
211 | |
212 Zone* zone() const { return zone_; } | |
213 | |
214 private: | |
215 bool slow_safe_compiler_; | |
216 bool global_mode_; | |
217 Zone* zone_; | |
218 }; | |
219 | |
220 | |
221 class IRRegExpMacroAssembler : public RegExpMacroAssembler { | 15 class IRRegExpMacroAssembler : public RegExpMacroAssembler { |
222 public: | 16 public: |
223 // Type of input string to generate code for. | 17 // Type of input string to generate code for. |
224 enum Mode { ASCII = 1, UC16 = 2 }; | 18 enum Mode { ASCII = 1, UC16 = 2 }; |
225 | 19 |
226 // Result of calling generated native RegExp code. | 20 // Result of calling generated native RegExp code. |
227 // RETRY: Something significant changed during execution, and the matching | 21 // RETRY: Something significant changed during execution, and the matching |
228 // should be retried from scratch. | 22 // should be retried from scratch. |
229 // EXCEPTION: Something failed during execution. If no exception has been | 23 // EXCEPTION: Something failed during execution. If no exception has been |
230 // thrown, it's an internal out-of-memory, and the caller should | 24 // thrown, it's an internal out-of-memory, and the caller should |
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
639 IdAllocator block_id_; | 433 IdAllocator block_id_; |
640 IdAllocator temp_id_; | 434 IdAllocator temp_id_; |
641 IdAllocator arg_id_; | 435 IdAllocator arg_id_; |
642 IdAllocator local_id_; | 436 IdAllocator local_id_; |
643 IdAllocator indirect_id_; | 437 IdAllocator indirect_id_; |
644 }; | 438 }; |
645 | 439 |
646 | 440 |
647 } // namespace dart | 441 } // namespace dart |
648 | 442 |
649 #endif // VM_REGEXP_ASSEMBLER_H_ | 443 #endif // VM_REGEXP_ASSEMBLER_IR_H_ |
OLD | NEW |