| OLD | NEW |
| (Empty) |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #ifndef V8_REGEXP_MACRO_ASSEMBLER_H_ | |
| 6 #define V8_REGEXP_MACRO_ASSEMBLER_H_ | |
| 7 | |
| 8 #include "src/ast.h" | |
| 9 | |
| 10 namespace v8 { | |
| 11 namespace internal { | |
| 12 | |
| 13 struct DisjunctDecisionRow { | |
| 14 RegExpCharacterClass cc; | |
| 15 Label* on_match; | |
| 16 }; | |
| 17 | |
| 18 | |
| 19 class RegExpMacroAssembler { | |
| 20 public: | |
| 21 // The implementation must be able to handle at least: | |
| 22 static const int kMaxRegister = (1 << 16) - 1; | |
| 23 static const int kMaxCPOffset = (1 << 15) - 1; | |
| 24 static const int kMinCPOffset = -(1 << 15); | |
| 25 | |
| 26 static const int kTableSizeBits = 7; | |
| 27 static const int kTableSize = 1 << kTableSizeBits; | |
| 28 static const int kTableMask = kTableSize - 1; | |
| 29 | |
| 30 enum IrregexpImplementation { | |
| 31 kIA32Implementation, | |
| 32 kARMImplementation, | |
| 33 kARM64Implementation, | |
| 34 kMIPSImplementation, | |
| 35 kPPCImplementation, | |
| 36 kX64Implementation, | |
| 37 kX87Implementation, | |
| 38 kBytecodeImplementation | |
| 39 }; | |
| 40 | |
| 41 enum StackCheckFlag { | |
| 42 kNoStackLimitCheck = false, | |
| 43 kCheckStackLimit = true | |
| 44 }; | |
| 45 | |
| 46 RegExpMacroAssembler(Isolate* isolate, Zone* zone); | |
| 47 virtual ~RegExpMacroAssembler(); | |
| 48 // This function is called when code generation is aborted, so that | |
| 49 // the assembler could clean up internal data structures. | |
| 50 virtual void AbortedCodeGeneration() {} | |
| 51 // The maximal number of pushes between stack checks. Users must supply | |
| 52 // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck) | |
| 53 // at least once for every stack_limit() pushes that are executed. | |
| 54 virtual int stack_limit_slack() = 0; | |
| 55 virtual bool CanReadUnaligned() = 0; | |
| 56 virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change. | |
| 57 virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by. | |
| 58 // Continues execution from the position pushed on the top of the backtrack | |
| 59 // stack by an earlier PushBacktrack(Label*). | |
| 60 virtual void Backtrack() = 0; | |
| 61 virtual void Bind(Label* label) = 0; | |
| 62 virtual void CheckAtStart(Label* on_at_start) = 0; | |
| 63 // Dispatch after looking the current character up in a 2-bits-per-entry | |
| 64 // map. The destinations vector has up to 4 labels. | |
| 65 virtual void CheckCharacter(unsigned c, Label* on_equal) = 0; | |
| 66 // Bitwise and the current character with the given constant and then | |
| 67 // check for a match with c. | |
| 68 virtual void CheckCharacterAfterAnd(unsigned c, | |
| 69 unsigned and_with, | |
| 70 Label* on_equal) = 0; | |
| 71 virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; | |
| 72 virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; | |
| 73 virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0; | |
| 74 virtual void CheckNotAtStart(Label* on_not_at_start) = 0; | |
| 75 virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; | |
| 76 virtual void CheckNotBackReferenceIgnoreCase(int start_reg, | |
| 77 Label* on_no_match) = 0; | |
| 78 // Check the current character for a match with a literal character. If we | |
| 79 // fail to match then goto the on_failure label. End of input always | |
| 80 // matches. If the label is NULL then we should pop a backtrack address off | |
| 81 // the stack and go to that. | |
| 82 virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0; | |
| 83 virtual void CheckNotCharacterAfterAnd(unsigned c, | |
| 84 unsigned and_with, | |
| 85 Label* on_not_equal) = 0; | |
| 86 // Subtract a constant from the current character, then and with the given | |
| 87 // constant and then check for a match with c. | |
| 88 virtual void CheckNotCharacterAfterMinusAnd(uc16 c, | |
| 89 uc16 minus, | |
| 90 uc16 and_with, | |
| 91 Label* on_not_equal) = 0; | |
| 92 virtual void CheckCharacterInRange(uc16 from, | |
| 93 uc16 to, // Both inclusive. | |
| 94 Label* on_in_range) = 0; | |
| 95 virtual void CheckCharacterNotInRange(uc16 from, | |
| 96 uc16 to, // Both inclusive. | |
| 97 Label* on_not_in_range) = 0; | |
| 98 | |
| 99 // The current character (modulus the kTableSize) is looked up in the byte | |
| 100 // array, and if the found byte is non-zero, we jump to the on_bit_set label. | |
| 101 virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0; | |
| 102 | |
| 103 // Checks whether the given offset from the current position is before | |
| 104 // the end of the string. May overwrite the current character. | |
| 105 virtual void CheckPosition(int cp_offset, Label* on_outside_input) { | |
| 106 LoadCurrentCharacter(cp_offset, on_outside_input, true); | |
| 107 } | |
| 108 // Check whether a standard/default character class matches the current | |
| 109 // character. Returns false if the type of special character class does | |
| 110 // not have custom support. | |
| 111 // May clobber the current loaded character. | |
| 112 virtual bool CheckSpecialCharacterClass(uc16 type, | |
| 113 Label* on_no_match) { | |
| 114 return false; | |
| 115 } | |
| 116 virtual void Fail() = 0; | |
| 117 virtual Handle<HeapObject> GetCode(Handle<String> source) = 0; | |
| 118 virtual void GoTo(Label* label) = 0; | |
| 119 // Check whether a register is >= a given constant and go to a label if it | |
| 120 // is. Backtracks instead if the label is NULL. | |
| 121 virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0; | |
| 122 // Check whether a register is < a given constant and go to a label if it is. | |
| 123 // Backtracks instead if the label is NULL. | |
| 124 virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0; | |
| 125 // Check whether a register is == to the current position and go to a | |
| 126 // label if it is. | |
| 127 virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0; | |
| 128 virtual IrregexpImplementation Implementation() = 0; | |
| 129 virtual void LoadCurrentCharacter(int cp_offset, | |
| 130 Label* on_end_of_input, | |
| 131 bool check_bounds = true, | |
| 132 int characters = 1) = 0; | |
| 133 virtual void PopCurrentPosition() = 0; | |
| 134 virtual void PopRegister(int register_index) = 0; | |
| 135 // Pushes the label on the backtrack stack, so that a following Backtrack | |
| 136 // will go to this label. Always checks the backtrack stack limit. | |
| 137 virtual void PushBacktrack(Label* label) = 0; | |
| 138 virtual void PushCurrentPosition() = 0; | |
| 139 virtual void PushRegister(int register_index, | |
| 140 StackCheckFlag check_stack_limit) = 0; | |
| 141 virtual void ReadCurrentPositionFromRegister(int reg) = 0; | |
| 142 virtual void ReadStackPointerFromRegister(int reg) = 0; | |
| 143 virtual void SetCurrentPositionFromEnd(int by) = 0; | |
| 144 virtual void SetRegister(int register_index, int to) = 0; | |
| 145 // Return whether the matching (with a global regexp) will be restarted. | |
| 146 virtual bool Succeed() = 0; | |
| 147 virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0; | |
| 148 virtual void ClearRegisters(int reg_from, int reg_to) = 0; | |
| 149 virtual void WriteStackPointerToRegister(int reg) = 0; | |
| 150 | |
| 151 // Controls the generation of large inlined constants in the code. | |
| 152 void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; } | |
| 153 bool slow_safe() { return slow_safe_compiler_; } | |
| 154 | |
| 155 enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK }; | |
| 156 // Set whether the regular expression has the global flag. Exiting due to | |
| 157 // a failure in a global regexp may still mean success overall. | |
| 158 inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; } | |
| 159 inline bool global() { return global_mode_ != NOT_GLOBAL; } | |
| 160 inline bool global_with_zero_length_check() { | |
| 161 return global_mode_ == GLOBAL; | |
| 162 } | |
| 163 | |
| 164 Isolate* isolate() const { return isolate_; } | |
| 165 Zone* zone() const { return zone_; } | |
| 166 | |
| 167 private: | |
| 168 bool slow_safe_compiler_; | |
| 169 bool global_mode_; | |
| 170 Isolate* isolate_; | |
| 171 Zone* zone_; | |
| 172 }; | |
| 173 | |
| 174 | |
| 175 #ifndef V8_INTERPRETED_REGEXP // Avoid compiling unused code. | |
| 176 | |
| 177 class NativeRegExpMacroAssembler: public RegExpMacroAssembler { | |
| 178 public: | |
| 179 // Type of input string to generate code for. | |
| 180 enum Mode { LATIN1 = 1, UC16 = 2 }; | |
| 181 | |
| 182 // Result of calling generated native RegExp code. | |
| 183 // RETRY: Something significant changed during execution, and the matching | |
| 184 // should be retried from scratch. | |
| 185 // EXCEPTION: Something failed during execution. If no exception has been | |
| 186 // thrown, it's an internal out-of-memory, and the caller should | |
| 187 // throw the exception. | |
| 188 // FAILURE: Matching failed. | |
| 189 // SUCCESS: Matching succeeded, and the output array has been filled with | |
| 190 // capture positions. | |
| 191 enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 }; | |
| 192 | |
| 193 NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone); | |
| 194 virtual ~NativeRegExpMacroAssembler(); | |
| 195 virtual bool CanReadUnaligned(); | |
| 196 | |
| 197 static Result Match(Handle<Code> regexp, | |
| 198 Handle<String> subject, | |
| 199 int* offsets_vector, | |
| 200 int offsets_vector_length, | |
| 201 int previous_index, | |
| 202 Isolate* isolate); | |
| 203 | |
| 204 // Compares two-byte strings case insensitively. | |
| 205 // Called from generated RegExp code. | |
| 206 static int CaseInsensitiveCompareUC16(Address byte_offset1, | |
| 207 Address byte_offset2, | |
| 208 size_t byte_length, | |
| 209 Isolate* isolate); | |
| 210 | |
| 211 // Called from RegExp if the backtrack stack limit is hit. | |
| 212 // Tries to expand the stack. Returns the new stack-pointer if | |
| 213 // successful, and updates the stack_top address, or returns 0 if unable | |
| 214 // to grow the stack. | |
| 215 // This function must not trigger a garbage collection. | |
| 216 static Address GrowStack(Address stack_pointer, Address* stack_top, | |
| 217 Isolate* isolate); | |
| 218 | |
| 219 static const byte* StringCharacterPosition(String* subject, int start_index); | |
| 220 | |
| 221 static int CheckStackGuardState(Isolate* isolate, int start_index, | |
| 222 bool is_direct_call, Address* return_address, | |
| 223 Code* re_code, String** subject, | |
| 224 const byte** input_start, | |
| 225 const byte** input_end); | |
| 226 | |
| 227 // Byte map of one byte characters with a 0xff if the character is a word | |
| 228 // character (digit, letter or underscore) and 0x00 otherwise. | |
| 229 // Used by generated RegExp code. | |
| 230 static const byte word_character_map[256]; | |
| 231 | |
| 232 static Address word_character_map_address() { | |
| 233 return const_cast<Address>(&word_character_map[0]); | |
| 234 } | |
| 235 | |
| 236 static Result Execute(Code* code, | |
| 237 String* input, | |
| 238 int start_offset, | |
| 239 const byte* input_start, | |
| 240 const byte* input_end, | |
| 241 int* output, | |
| 242 int output_size, | |
| 243 Isolate* isolate); | |
| 244 }; | |
| 245 | |
| 246 #endif // V8_INTERPRETED_REGEXP | |
| 247 | |
| 248 } } // namespace v8::internal | |
| 249 | |
| 250 #endif // V8_REGEXP_MACRO_ASSEMBLER_H_ | |
| OLD | NEW |