| OLD | NEW |
| 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/regexp_assembler.h" | 5 #include "vm/regexp_assembler.h" |
| 6 | 6 |
| 7 #include "vm/bit_vector.h" | |
| 8 #include "vm/compiler.h" | |
| 9 #include "vm/dart_entry.h" | |
| 10 #include "vm/flow_graph_builder.h" | |
| 11 #include "vm/il_printer.h" | |
| 12 #include "vm/object_store.h" | |
| 13 #include "vm/regexp.h" | 7 #include "vm/regexp.h" |
| 14 #include "vm/resolver.h" | |
| 15 #include "vm/stack_frame.h" | |
| 16 #include "vm/unibrow-inl.h" | |
| 17 #include "vm/unicode.h" | |
| 18 | |
| 19 #define Z zone() | |
| 20 | |
| 21 // Debugging output macros. TAG() is called at the head of each interesting | |
| 22 // function and prints its name during execution if irregexp tracing is enabled. | |
| 23 #define TAG() if (FLAG_trace_irregexp) { TAG_(); } | |
| 24 #define TAG_() \ | |
| 25 Print(PushArgument( \ | |
| 26 Bind(new(Z) ConstantInstr(String::ZoneHandle(Z, String::Concat( \ | |
| 27 String::Handle(String::New("TAG: ")), \ | |
| 28 String::Handle(String::New(__FUNCTION__)), Heap::kOld)))))); | |
| 29 | |
| 30 #define PRINT(arg) if (FLAG_trace_irregexp) { Print(arg); } | |
| 31 | 8 |
| 32 namespace dart { | 9 namespace dart { |
| 33 | 10 |
| 34 DEFINE_FLAG(bool, trace_irregexp, false, "Trace irregexps"); | |
| 35 | |
| 36 | |
| 37 static const intptr_t kInvalidTryIndex = CatchClauseNode::kInvalidTryIndex; | |
| 38 static const intptr_t kNoSourcePos = Scanner::kNoSourcePos; | |
| 39 static const intptr_t kMinStackSize = 512; | |
| 40 | |
| 41 | |
| 42 void PrintUtf16(uint16_t c) { | |
| 43 const char* format = (0x20 <= c && c <= 0x7F) ? | |
| 44 "%c" : (c <= 0xff) ? "\\x%02x" : "\\u%04x"; | |
| 45 OS::Print(format, c); | |
| 46 } | |
| 47 | |
| 48 | |
| 49 /* | |
| 50 * This assembler uses the following main local variables: | |
| 51 * - stack_: A pointer to a growable list which we use as an all-purpose stack | |
| 52 * storing backtracking offsets, positions & stored register values. | |
| 53 * - current_character_: Stores the currently loaded characters (possibly more | |
| 54 * than one). | |
| 55 * - current_position_: The current position within the string, stored as a | |
| 56 * negative offset from the end of the string (i.e. the | |
| 57 * position corresponding to str[0] is -str.length). | |
| 58 * Note that current_position_ is *not* byte-based, unlike | |
| 59 * original V8 code. | |
| 60 * | |
| 61 * Results are returned though an array of capture indices, stored at | |
| 62 * matches_param_. A null array specifies a failure to match. The match indices | |
| 63 * [start_inclusive, end_exclusive] for capture group i are stored at positions | |
| 64 * matches_param_[i * 2] and matches_param_[i * 2 + 1], respectively. Match | |
| 65 * indices of -1 denote non-matched groups. Note that we store these indices | |
| 66 * as a negative offset from the end of the string in registers_array_ | |
| 67 * during processing, and convert them to standard indexes when copying them | |
| 68 * to matches_param_ on successful match. | |
| 69 */ | |
| 70 | |
| 71 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) | 11 RegExpMacroAssembler::RegExpMacroAssembler(Zone* zone) |
| 72 : slow_safe_compiler_(false), | 12 : slow_safe_compiler_(false), |
| 73 global_mode_(NOT_GLOBAL), | 13 global_mode_(NOT_GLOBAL), |
| 74 zone_(zone) { | 14 zone_(zone) { |
| 75 } | 15 } |
| 76 | 16 |
| 77 | 17 |
| 78 RegExpMacroAssembler::~RegExpMacroAssembler() { | 18 RegExpMacroAssembler::~RegExpMacroAssembler() { |
| 79 } | 19 } |
| 80 | 20 |
| 81 | |
| 82 IRRegExpMacroAssembler::IRRegExpMacroAssembler( | |
| 83 intptr_t specialization_cid, | |
| 84 intptr_t capture_count, | |
| 85 const ParsedFunction* parsed_function, | |
| 86 const ZoneGrowableArray<const ICData*>& ic_data_array, | |
| 87 Zone* zone) | |
| 88 : RegExpMacroAssembler(zone), | |
| 89 specialization_cid_(specialization_cid), | |
| 90 parsed_function_(parsed_function), | |
| 91 ic_data_array_(ic_data_array), | |
| 92 current_instruction_(NULL), | |
| 93 stack_(NULL), | |
| 94 stack_pointer_(NULL), | |
| 95 current_character_(NULL), | |
| 96 current_position_(NULL), | |
| 97 string_param_(NULL), | |
| 98 string_param_length_(NULL), | |
| 99 start_index_param_(NULL), | |
| 100 registers_count_(0), | |
| 101 saved_registers_count_((capture_count + 1) * 2), | |
| 102 stack_array_cell_(Array::ZoneHandle(zone, Array::New(1, Heap::kOld))), | |
| 103 // The registers array is allocated at a fixed size after assembly. | |
| 104 registers_array_(TypedData::ZoneHandle(zone, TypedData::null())) { | |
| 105 switch (specialization_cid) { | |
| 106 case kOneByteStringCid: | |
| 107 case kExternalOneByteStringCid: mode_ = ASCII; break; | |
| 108 case kTwoByteStringCid: | |
| 109 case kExternalTwoByteStringCid: mode_ = UC16; break; | |
| 110 default: UNREACHABLE(); | |
| 111 } | |
| 112 | |
| 113 InitializeLocals(); | |
| 114 | |
| 115 // Allocate an initial stack backing of the minimum stack size. The stack | |
| 116 // backing is indirectly referred to so we can reuse it on subsequent matches | |
| 117 // even in the case where the backing has been enlarged and thus reallocated. | |
| 118 stack_array_cell_.SetAt(0, TypedData::Handle(zone, | |
| 119 TypedData::New(kTypedDataInt32ArrayCid, kMinStackSize / 4, Heap::kOld))); | |
| 120 | |
| 121 // Create and generate all preset blocks. | |
| 122 entry_block_ = | |
| 123 new(zone) GraphEntryInstr( | |
| 124 *parsed_function_, | |
| 125 new(zone) TargetEntryInstr(block_id_.Alloc(), kInvalidTryIndex), | |
| 126 Isolate::kNoDeoptId); | |
| 127 start_block_ = | |
| 128 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
| 129 success_block_ = | |
| 130 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
| 131 backtrack_block_ = | |
| 132 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
| 133 exit_block_ = | |
| 134 new(zone) JoinEntryInstr(block_id_.Alloc(), kInvalidTryIndex); | |
| 135 | |
| 136 GenerateEntryBlock(); | |
| 137 GenerateSuccessBlock(); | |
| 138 GenerateExitBlock(); | |
| 139 | |
| 140 blocks_.Add(entry_block_); | |
| 141 blocks_.Add(entry_block_->normal_entry()); | |
| 142 blocks_.Add(start_block_); | |
| 143 blocks_.Add(success_block_); | |
| 144 blocks_.Add(backtrack_block_); | |
| 145 blocks_.Add(exit_block_); | |
| 146 | |
| 147 // Begin emission at the start_block_. | |
| 148 set_current_instruction(start_block_); | |
| 149 } | |
| 150 | |
| 151 | |
| 152 IRRegExpMacroAssembler::~IRRegExpMacroAssembler() { } | |
| 153 | |
| 154 | |
| 155 void IRRegExpMacroAssembler::InitializeLocals() { | |
| 156 // All generated functions are expected to have a current-context variable. | |
| 157 // This variable is unused in irregexp functions. | |
| 158 parsed_function_->current_context_var()->set_index(GetNextLocalIndex()); | |
| 159 | |
| 160 // Create local variables and parameters. | |
| 161 stack_ = Local(Symbols::stack()); | |
| 162 stack_pointer_ = Local(Symbols::stack_pointer()); | |
| 163 registers_ = Local(Symbols::position_registers()); | |
| 164 current_character_ = Local(Symbols::current_character()); | |
| 165 current_position_ = Local(Symbols::current_position()); | |
| 166 string_param_length_ = Local(Symbols::string_param_length()); | |
| 167 capture_length_ = Local(Symbols::capture_length()); | |
| 168 match_start_index_ = Local(Symbols::match_start_index()); | |
| 169 capture_start_index_ = Local(Symbols::capture_start_index()); | |
| 170 match_end_index_ = Local(Symbols::match_end_index()); | |
| 171 char_in_capture_ = Local(Symbols::char_in_capture()); | |
| 172 char_in_match_ = Local(Symbols::char_in_match()); | |
| 173 index_temp_ = Local(Symbols::index_temp()); | |
| 174 result_ = Local(Symbols::result()); | |
| 175 | |
| 176 string_param_ = Parameter(Symbols::string_param(), 0); | |
| 177 start_index_param_ = Parameter(Symbols::start_index_param(), 1); | |
| 178 } | |
| 179 | |
| 180 | |
| 181 void IRRegExpMacroAssembler::GenerateEntryBlock() { | |
| 182 set_current_instruction(entry_block_->normal_entry()); | |
| 183 TAG(); | |
| 184 | |
| 185 // Store string.length. | |
| 186 PushArgumentInstr* string_push = PushLocal(string_param_); | |
| 187 | |
| 188 StoreLocal( | |
| 189 string_param_length_, | |
| 190 Bind(InstanceCall( | |
| 191 InstanceCallDescriptor( | |
| 192 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))), | |
| 193 string_push))); | |
| 194 | |
| 195 // Store (start_index - string.length) as the current position (since it's a | |
| 196 // negative offset from the end of the string). | |
| 197 PushArgumentInstr* start_index_push = PushLocal(start_index_param_); | |
| 198 PushArgumentInstr* length_push = PushLocal(string_param_length_); | |
| 199 | |
| 200 StoreLocal(current_position_, Bind(Sub(start_index_push, length_push))); | |
| 201 | |
| 202 // Generate a local list variable to represent "registers" and | |
| 203 // initialize capture registers (others remain garbage). | |
| 204 StoreLocal(registers_, Bind(new(Z) ConstantInstr(registers_array_))); | |
| 205 ClearRegisters(0, saved_registers_count_ - 1); | |
| 206 | |
| 207 // Generate a local list variable to represent the backtracking stack. | |
| 208 PushArgumentInstr* stack_cell_push = | |
| 209 PushArgument(Bind(new(Z) ConstantInstr(stack_array_cell_))); | |
| 210 StoreLocal(stack_, Bind(InstanceCall( | |
| 211 InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 212 stack_cell_push, | |
| 213 PushArgument(Bind(Uint64Constant(0)))))); | |
| 214 StoreLocal(stack_pointer_, Bind(Int64Constant(-1))); | |
| 215 | |
| 216 // Jump to the start block. | |
| 217 current_instruction_->Goto(start_block_); | |
| 218 } | |
| 219 | |
| 220 | |
| 221 void IRRegExpMacroAssembler::GenerateBacktrackBlock() { | |
| 222 set_current_instruction(backtrack_block_); | |
| 223 TAG(); | |
| 224 CheckPreemption(); | |
| 225 | |
| 226 const intptr_t entries_count = entry_block_->indirect_entries().length(); | |
| 227 | |
| 228 TypedData& offsets = TypedData::ZoneHandle(Z, | |
| 229 TypedData::New(kTypedDataInt32ArrayCid, entries_count, Heap::kOld)); | |
| 230 | |
| 231 PushArgumentInstr* block_offsets_push = | |
| 232 PushArgument(Bind(new(Z) ConstantInstr(offsets))); | |
| 233 PushArgumentInstr* block_id_push = PushArgument(Bind(PopStack())); | |
| 234 | |
| 235 Value* offset_value = | |
| 236 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 237 block_offsets_push, | |
| 238 block_id_push)); | |
| 239 | |
| 240 backtrack_goto_ = new(Z) IndirectGotoInstr(&offsets, offset_value); | |
| 241 CloseBlockWith(backtrack_goto_); | |
| 242 | |
| 243 // Add an edge from the "indirect" goto to each of the targets. | |
| 244 for (intptr_t j = 0; j < entries_count; j++) { | |
| 245 backtrack_goto_->AddSuccessor( | |
| 246 TargetWithJoinGoto(entry_block_->indirect_entries().At(j))); | |
| 247 } | |
| 248 } | |
| 249 | |
| 250 | |
| 251 void IRRegExpMacroAssembler::GenerateSuccessBlock() { | |
| 252 set_current_instruction(success_block_); | |
| 253 TAG(); | |
| 254 | |
| 255 Value* type = Bind(new(Z) ConstantInstr( | |
| 256 TypeArguments::ZoneHandle(Z, TypeArguments::null()))); | |
| 257 Value* length = Bind(Uint64Constant(saved_registers_count_)); | |
| 258 Value* array = Bind(new(Z) CreateArrayInstr(kNoSourcePos, type, length)); | |
| 259 StoreLocal(result_, array); | |
| 260 | |
| 261 // Store captured offsets in the `matches` parameter. | |
| 262 for (intptr_t i = 0; i < saved_registers_count_; i++) { | |
| 263 PushArgumentInstr* matches_push = PushLocal(result_); | |
| 264 PushArgumentInstr* index_push = PushArgument(Bind(Uint64Constant(i))); | |
| 265 | |
| 266 // Convert negative offsets from the end of the string to string indices. | |
| 267 // TODO(zerny): use positive offsets from the get-go. | |
| 268 PushArgumentInstr* offset_push = PushArgument(LoadRegister(i)); | |
| 269 PushArgumentInstr* len_push = PushLocal(string_param_length_); | |
| 270 PushArgumentInstr* value_push = | |
| 271 PushArgument(Bind(Add(offset_push, len_push))); | |
| 272 | |
| 273 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
| 274 matches_push, | |
| 275 index_push, | |
| 276 value_push)); | |
| 277 } | |
| 278 | |
| 279 // Print the result if tracing. | |
| 280 PRINT(PushLocal(result_)); | |
| 281 | |
| 282 // Return true on success. | |
| 283 AppendInstruction(new(Z) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_)))); | |
| 284 } | |
| 285 | |
| 286 | |
| 287 void IRRegExpMacroAssembler::GenerateExitBlock() { | |
| 288 set_current_instruction(exit_block_); | |
| 289 TAG(); | |
| 290 | |
| 291 // Return false on failure. | |
| 292 AppendInstruction(new(Z) ReturnInstr(kNoSourcePos, Bind(LoadLocal(result_)))); | |
| 293 } | |
| 294 | |
| 295 | |
| 296 void IRRegExpMacroAssembler::FinalizeRegistersArray() { | |
| 297 ASSERT(registers_count_ >= saved_registers_count_); | |
| 298 registers_array_ = | |
| 299 TypedData::New(kTypedDataInt32ArrayCid, registers_count_, Heap::kOld); | |
| 300 } | |
| 301 | |
| 302 | |
| 303 #if defined(TARGET_ARCH_ARM64) || \ | |
| 304 defined(TARGET_ARCH_ARM) || \ | |
| 305 defined(TARGET_ARCH_MIPS) | |
| 306 // Disabling unaligned accesses forces the regexp engine to load characters one | |
| 307 // by one instead of up to 4 at once, along with the associated performance hit. | |
| 308 // TODO(zerny): Be less conservative about disabling unaligned accesses. | |
| 309 // For instance, ARMv6 supports unaligned accesses. Once it is enabled here, | |
| 310 // update LoadCodeUnitsInstr methods for the appropriate architectures. | |
| 311 static const bool kEnableUnalignedAccesses = false; | |
| 312 #else | |
| 313 static const bool kEnableUnalignedAccesses = true; | |
| 314 #endif | |
| 315 bool IRRegExpMacroAssembler::CanReadUnaligned() { | |
| 316 return kEnableUnalignedAccesses && !slow_safe(); | |
| 317 } | |
| 318 | |
| 319 | |
| 320 RawArray* IRRegExpMacroAssembler::Execute( | |
| 321 const Function& function, | |
| 322 const String& input, | |
| 323 const Smi& start_offset, | |
| 324 Zone* zone) { | |
| 325 // Create the argument list. | |
| 326 const Array& args = Array::Handle(Array::New(2)); | |
| 327 args.SetAt(0, input); | |
| 328 args.SetAt(1, start_offset); | |
| 329 | |
| 330 // And finally call the generated code. | |
| 331 | |
| 332 const Object& retval = | |
| 333 Object::Handle(zone, DartEntry::InvokeFunction(function, args)); | |
| 334 if (retval.IsError()) { | |
| 335 const Error& error = Error::Cast(retval); | |
| 336 OS::Print("%s\n", error.ToErrorCString()); | |
| 337 // Should never happen. | |
| 338 UNREACHABLE(); | |
| 339 } | |
| 340 | |
| 341 if (retval.IsNull()) { | |
| 342 return Array::null(); | |
| 343 } | |
| 344 | |
| 345 ASSERT(retval.IsArray()); | |
| 346 return Array::Cast(retval).raw(); | |
| 347 } | |
| 348 | |
| 349 | |
| 350 RawBool* IRRegExpMacroAssembler::CaseInsensitiveCompareUC16( | |
| 351 RawString* str_raw, | |
| 352 RawSmi* lhs_index_raw, | |
| 353 RawSmi* rhs_index_raw, | |
| 354 RawSmi* length_raw) { | |
| 355 const String& str = String::Handle(str_raw); | |
| 356 const Smi& lhs_index = Smi::Handle(lhs_index_raw); | |
| 357 const Smi& rhs_index = Smi::Handle(rhs_index_raw); | |
| 358 const Smi& length = Smi::Handle(length_raw); | |
| 359 | |
| 360 // TODO(zerny): Optimize as single instance. V8 has this as an | |
| 361 // isolate member. | |
| 362 unibrow::Mapping<unibrow::Ecma262Canonicalize> canonicalize; | |
| 363 | |
| 364 for (intptr_t i = 0; i < length.Value(); i++) { | |
| 365 int32_t c1 = str.CharAt(lhs_index.Value() + i); | |
| 366 int32_t c2 = str.CharAt(rhs_index.Value() + i); | |
| 367 if (c1 != c2) { | |
| 368 int32_t s1[1] = { c1 }; | |
| 369 canonicalize.get(c1, '\0', s1); | |
| 370 if (s1[0] != c2) { | |
| 371 int32_t s2[1] = { c2 }; | |
| 372 canonicalize.get(c2, '\0', s2); | |
| 373 if (s1[0] != s2[0]) { | |
| 374 return Bool::False().raw(); | |
| 375 } | |
| 376 } | |
| 377 } | |
| 378 } | |
| 379 return Bool::True().raw(); | |
| 380 } | |
| 381 | |
| 382 | |
| 383 LocalVariable* IRRegExpMacroAssembler::Parameter(const String& name, | |
| 384 intptr_t index) const { | |
| 385 const Type& local_type = Type::ZoneHandle(Z, Type::DynamicType()); | |
| 386 LocalVariable* local = | |
| 387 new(Z) LocalVariable(kNoSourcePos, name, local_type); | |
| 388 | |
| 389 intptr_t param_frame_index = kParamEndSlotFromFp + kParamCount - index; | |
| 390 local->set_index(param_frame_index); | |
| 391 | |
| 392 return local; | |
| 393 } | |
| 394 | |
| 395 | |
| 396 LocalVariable* IRRegExpMacroAssembler::Local(const String& name) { | |
| 397 const Type& local_type = Type::ZoneHandle(Z, Type::DynamicType()); | |
| 398 LocalVariable* local = | |
| 399 new(Z) LocalVariable(kNoSourcePos, name, local_type); | |
| 400 local->set_index(GetNextLocalIndex()); | |
| 401 | |
| 402 return local; | |
| 403 } | |
| 404 | |
| 405 | |
| 406 ConstantInstr* IRRegExpMacroAssembler::Int64Constant(int64_t value) const { | |
| 407 return new(Z) ConstantInstr( | |
| 408 Integer::ZoneHandle(Z, Integer::New(value, Heap::kOld))); | |
| 409 } | |
| 410 | |
| 411 | |
| 412 ConstantInstr* IRRegExpMacroAssembler::Uint64Constant(uint64_t value) const { | |
| 413 return new(Z) ConstantInstr( | |
| 414 Integer::ZoneHandle(Z, Integer::NewFromUint64(value, Heap::kOld))); | |
| 415 } | |
| 416 | |
| 417 | |
| 418 ConstantInstr* IRRegExpMacroAssembler::BoolConstant(bool value) const { | |
| 419 return new(Z) ConstantInstr(value ? Bool::True() : Bool::False()); | |
| 420 } | |
| 421 | |
| 422 | |
| 423 ConstantInstr* IRRegExpMacroAssembler::StringConstant(const char* value) const { | |
| 424 return new(Z) ConstantInstr( | |
| 425 String::ZoneHandle(Z, String::New(value, Heap::kOld))); | |
| 426 } | |
| 427 | |
| 428 | |
| 429 ConstantInstr* IRRegExpMacroAssembler::WordCharacterMapConstant() const { | |
| 430 const Library& lib = Library::Handle(Z, Library::CoreLibrary()); | |
| 431 const Class& regexp_class = Class::Handle(Z, | |
| 432 lib.LookupClassAllowPrivate(Symbols::JSSyntaxRegExp())); | |
| 433 const Field& word_character_field = Field::ZoneHandle(Z, | |
| 434 regexp_class.LookupStaticField(Symbols::_wordCharacterMap())); | |
| 435 ASSERT(!word_character_field.IsNull()); | |
| 436 | |
| 437 if (word_character_field.IsUninitialized()) { | |
| 438 word_character_field.EvaluateInitializer(); | |
| 439 } | |
| 440 ASSERT(!word_character_field.IsUninitialized()); | |
| 441 | |
| 442 return new(Z) ConstantInstr( | |
| 443 Instance::ZoneHandle(Z, word_character_field.value())); | |
| 444 } | |
| 445 | |
| 446 | |
| 447 ComparisonInstr* IRRegExpMacroAssembler::Comparison( | |
| 448 ComparisonKind kind, PushArgumentInstr* lhs, PushArgumentInstr* rhs) { | |
| 449 Token::Kind strict_comparison = Token::kEQ_STRICT; | |
| 450 Token::Kind intermediate_operator = Token::kILLEGAL; | |
| 451 switch (kind) { | |
| 452 case kEQ: | |
| 453 intermediate_operator = Token::kEQ; | |
| 454 break; | |
| 455 case kNE: | |
| 456 intermediate_operator = Token::kEQ; | |
| 457 strict_comparison = Token::kNE_STRICT; | |
| 458 break; | |
| 459 case kLT: | |
| 460 intermediate_operator = Token::kLT; | |
| 461 break; | |
| 462 case kGT: | |
| 463 intermediate_operator = Token::kGT; | |
| 464 break; | |
| 465 case kLTE: | |
| 466 intermediate_operator = Token::kLTE; | |
| 467 break; | |
| 468 case kGTE: | |
| 469 intermediate_operator = Token::kGTE; | |
| 470 break; | |
| 471 default: | |
| 472 UNREACHABLE(); | |
| 473 } | |
| 474 | |
| 475 ASSERT(intermediate_operator != Token::kILLEGAL); | |
| 476 | |
| 477 Value* lhs_value = | |
| 478 Bind(InstanceCall( | |
| 479 InstanceCallDescriptor::FromToken(intermediate_operator), | |
| 480 lhs, | |
| 481 rhs)); | |
| 482 Value* rhs_value = Bind(BoolConstant(true)); | |
| 483 | |
| 484 return new(Z) StrictCompareInstr( | |
| 485 kNoSourcePos, strict_comparison, lhs_value, rhs_value, true); | |
| 486 } | |
| 487 | |
| 488 ComparisonInstr* IRRegExpMacroAssembler::Comparison( | |
| 489 ComparisonKind kind, Definition* lhs, Definition* rhs) { | |
| 490 PushArgumentInstr* lhs_push = PushArgument(Bind(lhs)); | |
| 491 PushArgumentInstr* rhs_push = PushArgument(Bind(rhs)); | |
| 492 return Comparison(kind, lhs_push, rhs_push); | |
| 493 } | |
| 494 | |
| 495 | |
| 496 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
| 497 const Function& function) const { | |
| 498 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 499 new(Z) ZoneGrowableArray<PushArgumentInstr*>(0); | |
| 500 return StaticCall(function, arguments); | |
| 501 } | |
| 502 | |
| 503 | |
| 504 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
| 505 const Function& function, | |
| 506 PushArgumentInstr* arg1) const { | |
| 507 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 508 new(Z) ZoneGrowableArray<PushArgumentInstr*>(1); | |
| 509 arguments->Add(arg1); | |
| 510 | |
| 511 return StaticCall(function, arguments); | |
| 512 } | |
| 513 | |
| 514 | |
| 515 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
| 516 const Function& function, | |
| 517 PushArgumentInstr* arg1, | |
| 518 PushArgumentInstr* arg2) const { | |
| 519 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 520 new(Z) ZoneGrowableArray<PushArgumentInstr*>(2); | |
| 521 arguments->Add(arg1); | |
| 522 arguments->Add(arg2); | |
| 523 | |
| 524 return StaticCall(function, arguments); | |
| 525 } | |
| 526 | |
| 527 | |
| 528 StaticCallInstr* IRRegExpMacroAssembler::StaticCall( | |
| 529 const Function& function, | |
| 530 ZoneGrowableArray<PushArgumentInstr*>* arguments) const { | |
| 531 return new(Z) StaticCallInstr(kNoSourcePos, | |
| 532 function, | |
| 533 Object::null_array(), | |
| 534 arguments, | |
| 535 ic_data_array_); | |
| 536 } | |
| 537 | |
| 538 | |
| 539 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
| 540 const InstanceCallDescriptor& desc, | |
| 541 PushArgumentInstr* arg1) const { | |
| 542 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 543 new(Z) ZoneGrowableArray<PushArgumentInstr*>(1); | |
| 544 arguments->Add(arg1); | |
| 545 | |
| 546 return InstanceCall(desc, arguments); | |
| 547 } | |
| 548 | |
| 549 | |
| 550 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
| 551 const InstanceCallDescriptor& desc, | |
| 552 PushArgumentInstr* arg1, | |
| 553 PushArgumentInstr* arg2) const { | |
| 554 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 555 new(Z) ZoneGrowableArray<PushArgumentInstr*>(2); | |
| 556 arguments->Add(arg1); | |
| 557 arguments->Add(arg2); | |
| 558 | |
| 559 return InstanceCall(desc, arguments); | |
| 560 } | |
| 561 | |
| 562 | |
| 563 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
| 564 const InstanceCallDescriptor& desc, | |
| 565 PushArgumentInstr* arg1, | |
| 566 PushArgumentInstr* arg2, | |
| 567 PushArgumentInstr* arg3) const { | |
| 568 ZoneGrowableArray<PushArgumentInstr*>* arguments = | |
| 569 new(Z) ZoneGrowableArray<PushArgumentInstr*>(3); | |
| 570 arguments->Add(arg1); | |
| 571 arguments->Add(arg2); | |
| 572 arguments->Add(arg3); | |
| 573 | |
| 574 return InstanceCall(desc, arguments); | |
| 575 } | |
| 576 | |
| 577 | |
| 578 InstanceCallInstr* IRRegExpMacroAssembler::InstanceCall( | |
| 579 const InstanceCallDescriptor& desc, | |
| 580 ZoneGrowableArray<PushArgumentInstr*> *arguments) const { | |
| 581 return | |
| 582 new(Z) InstanceCallInstr(kNoSourcePos, | |
| 583 desc.name, | |
| 584 desc.token_kind, | |
| 585 arguments, | |
| 586 Object::null_array(), | |
| 587 desc.checked_argument_count, | |
| 588 ic_data_array_); | |
| 589 } | |
| 590 | |
| 591 | |
| 592 LoadLocalInstr* IRRegExpMacroAssembler::LoadLocal(LocalVariable* local) const { | |
| 593 return new(Z) LoadLocalInstr(*local); | |
| 594 } | |
| 595 | |
| 596 | |
| 597 void IRRegExpMacroAssembler::StoreLocal(LocalVariable* local, | |
| 598 Value* value) { | |
| 599 Do(new(Z) StoreLocalInstr(*local, value)); | |
| 600 } | |
| 601 | |
| 602 | |
| 603 void IRRegExpMacroAssembler::set_current_instruction(Instruction* instruction) { | |
| 604 current_instruction_ = instruction; | |
| 605 } | |
| 606 | |
| 607 | |
| 608 Value* IRRegExpMacroAssembler::Bind(Definition* definition) { | |
| 609 AppendInstruction(definition); | |
| 610 definition->set_temp_index(temp_id_.Alloc()); | |
| 611 | |
| 612 return new(Z) Value(definition); | |
| 613 } | |
| 614 | |
| 615 | |
| 616 void IRRegExpMacroAssembler::Do(Definition* definition) { | |
| 617 AppendInstruction(definition); | |
| 618 } | |
| 619 | |
| 620 | |
| 621 Value* IRRegExpMacroAssembler::BindLoadLocal(const LocalVariable& local) { | |
| 622 if (local.IsConst()) { | |
| 623 return Bind(new(Z) ConstantInstr(*local.ConstValue())); | |
| 624 } | |
| 625 ASSERT(!local.is_captured()); | |
| 626 return Bind(new(Z) LoadLocalInstr(local)); | |
| 627 } | |
| 628 | |
| 629 | |
| 630 // In some cases, the V8 irregexp engine generates unreachable code by emitting | |
| 631 // a jmp not followed by a bind. We cannot do the same, since it is impossible | |
| 632 // to append to a block following a jmp. In such cases, assume that we are doing | |
| 633 // the correct thing, but output a warning when tracing. | |
| 634 #define HANDLE_DEAD_CODE_EMISSION() \ | |
| 635 if (current_instruction_ == NULL) { \ | |
| 636 if (FLAG_trace_irregexp) { \ | |
| 637 OS::Print("WARNING: Attempting to append to a closed assembler. " \ | |
| 638 "This could be either a bug or generation of dead code " \ | |
| 639 "inherited from V8.\n"); \ | |
| 640 } \ | |
| 641 BlockLabel dummy; \ | |
| 642 BindBlock(&dummy); \ | |
| 643 } | |
| 644 | |
| 645 void IRRegExpMacroAssembler::AppendInstruction(Instruction* instruction) { | |
| 646 HANDLE_DEAD_CODE_EMISSION(); | |
| 647 | |
| 648 ASSERT(current_instruction_ != NULL); | |
| 649 ASSERT(current_instruction_->next() == NULL); | |
| 650 | |
| 651 temp_id_.Dealloc(instruction->InputCount()); | |
| 652 arg_id_.Dealloc(instruction->ArgumentCount()); | |
| 653 | |
| 654 current_instruction_->LinkTo(instruction); | |
| 655 set_current_instruction(instruction); | |
| 656 } | |
| 657 | |
| 658 | |
| 659 void IRRegExpMacroAssembler::CloseBlockWith(Instruction* instruction) { | |
| 660 HANDLE_DEAD_CODE_EMISSION(); | |
| 661 | |
| 662 ASSERT(current_instruction_ != NULL); | |
| 663 ASSERT(current_instruction_->next() == NULL); | |
| 664 | |
| 665 temp_id_.Dealloc(instruction->InputCount()); | |
| 666 arg_id_.Dealloc(instruction->ArgumentCount()); | |
| 667 | |
| 668 current_instruction_->LinkTo(instruction); | |
| 669 set_current_instruction(NULL); | |
| 670 } | |
| 671 | |
| 672 | |
| 673 void IRRegExpMacroAssembler::GoTo(BlockLabel* to) { | |
| 674 if (to == NULL) { | |
| 675 Backtrack(); | |
| 676 } else { | |
| 677 to->SetLinked(); | |
| 678 GoTo(to->block()); | |
| 679 } | |
| 680 } | |
| 681 | |
| 682 | |
| 683 // Closes the current block with a goto, and unsets current_instruction_. | |
| 684 // BindBlock() must be called before emission can continue. | |
| 685 void IRRegExpMacroAssembler::GoTo(JoinEntryInstr* to) { | |
| 686 HANDLE_DEAD_CODE_EMISSION(); | |
| 687 | |
| 688 ASSERT(current_instruction_ != NULL); | |
| 689 ASSERT(current_instruction_->next() == NULL); | |
| 690 current_instruction_->Goto(to); | |
| 691 set_current_instruction(NULL); | |
| 692 } | |
| 693 | |
| 694 | |
| 695 PushArgumentInstr* IRRegExpMacroAssembler::PushArgument(Value* value) { | |
| 696 arg_id_.Alloc(); | |
| 697 PushArgumentInstr* push = new(Z) PushArgumentInstr(value); | |
| 698 // Do *not* use Do() for push argument instructions. | |
| 699 AppendInstruction(push); | |
| 700 return push; | |
| 701 } | |
| 702 | |
| 703 | |
| 704 PushArgumentInstr* IRRegExpMacroAssembler::PushLocal(LocalVariable* local) { | |
| 705 return PushArgument(Bind(LoadLocal(local))); | |
| 706 } | |
| 707 | |
| 708 | |
| 709 void IRRegExpMacroAssembler::Print(const char* str) { | |
| 710 Print(PushArgument( | |
| 711 Bind(new(Z) ConstantInstr( | |
| 712 String::ZoneHandle(Z, String::New(str, Heap::kOld)))))); | |
| 713 } | |
| 714 | |
| 715 | |
| 716 void IRRegExpMacroAssembler::Print(PushArgumentInstr* argument) { | |
| 717 const Library& lib = Library::Handle(Library::CoreLibrary()); | |
| 718 const Function& print_fn = Function::ZoneHandle( | |
| 719 Z, lib.LookupFunctionAllowPrivate(Symbols::print())); | |
| 720 Do(StaticCall(print_fn, argument)); | |
| 721 } | |
| 722 | |
| 723 | |
| 724 void IRRegExpMacroAssembler::PrintBlocks() { | |
| 725 for (intptr_t i = 0; i < blocks_.length(); i++) { | |
| 726 FlowGraphPrinter::PrintBlock(blocks_[i], false); | |
| 727 } | |
| 728 } | |
| 729 | |
| 730 | |
| 731 intptr_t IRRegExpMacroAssembler::stack_limit_slack() { | |
| 732 return 32; | |
| 733 } | |
| 734 | |
| 735 | |
| 736 void IRRegExpMacroAssembler::AdvanceCurrentPosition(intptr_t by) { | |
| 737 TAG(); | |
| 738 if (by != 0) { | |
| 739 PushArgumentInstr* cur_pos_push = PushLocal(current_position_); | |
| 740 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by))); | |
| 741 | |
| 742 Value* new_pos_value = Bind(Add(cur_pos_push, by_push)); | |
| 743 StoreLocal(current_position_, new_pos_value); | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 | |
| 748 void IRRegExpMacroAssembler::AdvanceRegister(intptr_t reg, intptr_t by) { | |
| 749 TAG(); | |
| 750 ASSERT(reg >= 0); | |
| 751 ASSERT(reg < registers_count_); | |
| 752 | |
| 753 if (by != 0) { | |
| 754 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 755 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
| 756 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
| 757 PushArgumentInstr* by_push = PushArgument(Bind(Int64Constant(by))); | |
| 758 PushArgumentInstr* value_push = PushArgument(Bind(Add(reg_push, by_push))); | |
| 759 StoreRegister(registers_push, index_push, value_push); | |
| 760 } | |
| 761 } | |
| 762 | |
| 763 | |
| 764 void IRRegExpMacroAssembler::Backtrack() { | |
| 765 TAG(); | |
| 766 GoTo(backtrack_block_); | |
| 767 } | |
| 768 | |
| 769 | |
| 770 // A BindBlock is analogous to assigning a label to a basic block. | |
| 771 // If the BlockLabel does not yet contain a block, it is created. | |
| 772 // If there is a current instruction, append a goto to the bound block. | |
| 773 void IRRegExpMacroAssembler::BindBlock(BlockLabel* label) { | |
| 774 ASSERT(!label->IsBound()); | |
| 775 ASSERT(label->block()->next() == NULL); | |
| 776 | |
| 777 label->SetBound(block_id_.Alloc()); | |
| 778 blocks_.Add(label->block()); | |
| 779 | |
| 780 if (current_instruction_ != NULL) { | |
| 781 GoTo(label); | |
| 782 } | |
| 783 set_current_instruction(label->block()); | |
| 784 | |
| 785 // Print the id of the current block if tracing. | |
| 786 PRINT(PushArgument(Bind(Uint64Constant(label->block()->block_id())))); | |
| 787 } | |
| 788 | |
| 789 | |
| 790 intptr_t IRRegExpMacroAssembler::GetNextLocalIndex() { | |
| 791 intptr_t id = local_id_.Alloc(); | |
| 792 return kFirstLocalSlotFromFp - id; | |
| 793 } | |
| 794 | |
| 795 | |
| 796 Value* IRRegExpMacroAssembler::LoadRegister(intptr_t index) { | |
| 797 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 798 PushArgumentInstr* index_push = PushRegisterIndex(index); | |
| 799 return Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 800 registers_push, | |
| 801 index_push)); | |
| 802 } | |
| 803 | |
| 804 void IRRegExpMacroAssembler::StoreRegister(intptr_t index, intptr_t value) { | |
| 805 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 806 PushArgumentInstr* index_push = PushRegisterIndex(index); | |
| 807 PushArgumentInstr* value_push = PushArgument(Bind(Uint64Constant(value))); | |
| 808 StoreRegister(registers_push, index_push, value_push); | |
| 809 } | |
| 810 | |
| 811 | |
| 812 void IRRegExpMacroAssembler::StoreRegister(PushArgumentInstr* registers, | |
| 813 PushArgumentInstr* index, | |
| 814 PushArgumentInstr* value) { | |
| 815 TAG(); | |
| 816 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
| 817 registers, | |
| 818 index, | |
| 819 value)); | |
| 820 } | |
| 821 | |
| 822 PushArgumentInstr* IRRegExpMacroAssembler::PushRegisterIndex(intptr_t index) { | |
| 823 if (registers_count_ <= index) { | |
| 824 registers_count_ = index + 1; | |
| 825 } | |
| 826 return PushArgument(Bind(Uint64Constant(index))); | |
| 827 } | |
| 828 | |
| 829 | |
| 830 void IRRegExpMacroAssembler::CheckCharacter(uint32_t c, BlockLabel* on_equal) { | |
| 831 TAG(); | |
| 832 Definition* cur_char_def = LoadLocal(current_character_); | |
| 833 Definition* char_def = Uint64Constant(c); | |
| 834 | |
| 835 BranchOrBacktrack(Comparison(kEQ, cur_char_def, char_def), on_equal); | |
| 836 } | |
| 837 | |
| 838 | |
| 839 void IRRegExpMacroAssembler::CheckCharacterGT(uint16_t limit, | |
| 840 BlockLabel* on_greater) { | |
| 841 TAG(); | |
| 842 BranchOrBacktrack(Comparison(kGT, | |
| 843 LoadLocal(current_character_), | |
| 844 Uint64Constant(limit)), | |
| 845 on_greater); | |
| 846 } | |
| 847 | |
| 848 | |
| 849 void IRRegExpMacroAssembler::CheckAtStart(BlockLabel* on_at_start) { | |
| 850 TAG(); | |
| 851 | |
| 852 BlockLabel not_at_start; | |
| 853 | |
| 854 // Did we start the match at the start of the string at all? | |
| 855 BranchOrBacktrack(Comparison(kNE, | |
| 856 LoadLocal(start_index_param_), | |
| 857 Uint64Constant(0)), | |
| 858 ¬_at_start); | |
| 859 | |
| 860 // If we did, are we still at the start of the input, i.e. is | |
| 861 // (offset == string_length * -1)? | |
| 862 Definition* neg_len_def = | |
| 863 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE), | |
| 864 PushLocal(string_param_length_)); | |
| 865 Definition* offset_def = LoadLocal(current_position_); | |
| 866 BranchOrBacktrack(Comparison(kEQ, neg_len_def, offset_def), | |
| 867 on_at_start); | |
| 868 | |
| 869 BindBlock(¬_at_start); | |
| 870 } | |
| 871 | |
| 872 | |
| 873 void IRRegExpMacroAssembler::CheckNotAtStart(BlockLabel* on_not_at_start) { | |
| 874 TAG(); | |
| 875 | |
| 876 // Did we start the match at the start of the string at all? | |
| 877 BranchOrBacktrack(Comparison(kNE, | |
| 878 LoadLocal(start_index_param_), | |
| 879 Uint64Constant(0)), | |
| 880 on_not_at_start); | |
| 881 | |
| 882 // If we did, are we still at the start of the input, i.e. is | |
| 883 // (offset == string_length * -1)? | |
| 884 Definition* neg_len_def = | |
| 885 InstanceCall(InstanceCallDescriptor::FromToken(Token::kNEGATE), | |
| 886 PushLocal(string_param_length_)); | |
| 887 Definition* offset_def = LoadLocal(current_position_); | |
| 888 BranchOrBacktrack(Comparison(kNE, neg_len_def, offset_def), | |
| 889 on_not_at_start); | |
| 890 } | |
| 891 | |
| 892 | |
| 893 void IRRegExpMacroAssembler::CheckCharacterLT(uint16_t limit, | |
| 894 BlockLabel* on_less) { | |
| 895 TAG(); | |
| 896 BranchOrBacktrack(Comparison(kLT, | |
| 897 LoadLocal(current_character_), | |
| 898 Uint64Constant(limit)), | |
| 899 on_less); | |
| 900 } | |
| 901 | |
| 902 | |
| 903 void IRRegExpMacroAssembler::CheckGreedyLoop(BlockLabel* on_equal) { | |
| 904 TAG(); | |
| 905 | |
| 906 BlockLabel fallthrough; | |
| 907 | |
| 908 Definition* head = PeekStack(); | |
| 909 Definition* cur_pos_def = LoadLocal(current_position_); | |
| 910 BranchOrBacktrack(Comparison(kNE, head, cur_pos_def), | |
| 911 &fallthrough); | |
| 912 | |
| 913 // Pop, throwing away the value. | |
| 914 Do(PopStack()); | |
| 915 | |
| 916 BranchOrBacktrack(NULL, on_equal); | |
| 917 | |
| 918 BindBlock(&fallthrough); | |
| 919 } | |
| 920 | |
| 921 | |
| 922 void IRRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase( | |
| 923 intptr_t start_reg, | |
| 924 BlockLabel* on_no_match) { | |
| 925 TAG(); | |
| 926 ASSERT(start_reg + 1 <= registers_count_); | |
| 927 | |
| 928 BlockLabel fallthrough; | |
| 929 | |
| 930 PushArgumentInstr* end_push = PushArgument(LoadRegister(start_reg + 1)); | |
| 931 PushArgumentInstr* start_push = PushArgument(LoadRegister(start_reg)); | |
| 932 StoreLocal(capture_length_, Bind(Sub(end_push, start_push))); | |
| 933 | |
| 934 // The length of a capture should not be negative. This can only happen | |
| 935 // if the end of the capture is unrecorded, or at a point earlier than | |
| 936 // the start of the capture. | |
| 937 // BranchOrBacktrack(less, on_no_match); | |
| 938 | |
| 939 BranchOrBacktrack(Comparison(kLT, | |
| 940 LoadLocal(capture_length_), | |
| 941 Uint64Constant(0)), | |
| 942 on_no_match); | |
| 943 | |
| 944 // If length is zero, either the capture is empty or it is completely | |
| 945 // uncaptured. In either case succeed immediately. | |
| 946 BranchOrBacktrack(Comparison(kEQ, | |
| 947 LoadLocal(capture_length_), | |
| 948 Uint64Constant(0)), | |
| 949 &fallthrough); | |
| 950 | |
| 951 | |
| 952 // Check that there are sufficient characters left in the input. | |
| 953 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
| 954 PushArgumentInstr* len_push = PushLocal(capture_length_); | |
| 955 BranchOrBacktrack( | |
| 956 Comparison(kGT, | |
| 957 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), | |
| 958 pos_push, | |
| 959 len_push), | |
| 960 Uint64Constant(0)), | |
| 961 on_no_match); | |
| 962 | |
| 963 pos_push = PushLocal(current_position_); | |
| 964 len_push = PushLocal(string_param_length_); | |
| 965 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push))); | |
| 966 | |
| 967 pos_push = PushArgument(LoadRegister(start_reg)); | |
| 968 len_push = PushLocal(string_param_length_); | |
| 969 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push))); | |
| 970 | |
| 971 pos_push = PushLocal(match_start_index_); | |
| 972 len_push = PushLocal(capture_length_); | |
| 973 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push))); | |
| 974 | |
| 975 BlockLabel success; | |
| 976 if (mode_ == ASCII) { | |
| 977 BlockLabel loop_increment; | |
| 978 BlockLabel loop; | |
| 979 BindBlock(&loop); | |
| 980 | |
| 981 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_)); | |
| 982 StoreLocal(char_in_match_, CharacterAt(match_start_index_)); | |
| 983 | |
| 984 BranchOrBacktrack(Comparison(kEQ, | |
| 985 LoadLocal(char_in_capture_), | |
| 986 LoadLocal(char_in_match_)), | |
| 987 &loop_increment); | |
| 988 | |
| 989 // Mismatch, try case-insensitive match (converting letters to lower-case). | |
| 990 PushArgumentInstr* match_char_push = PushLocal(char_in_match_); | |
| 991 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(0x20))); | |
| 992 StoreLocal(char_in_match_, | |
| 993 Bind(InstanceCall( | |
| 994 InstanceCallDescriptor::FromToken(Token::kBIT_OR), | |
| 995 match_char_push, | |
| 996 mask_push))); | |
| 997 | |
| 998 BlockLabel convert_capture; | |
| 999 BlockLabel on_not_in_range; | |
| 1000 BranchOrBacktrack(Comparison(kLT, | |
| 1001 LoadLocal(char_in_match_), | |
| 1002 Uint64Constant('a')), | |
| 1003 &on_not_in_range); | |
| 1004 BranchOrBacktrack(Comparison(kGT, | |
| 1005 LoadLocal(char_in_match_), | |
| 1006 Uint64Constant('z')), | |
| 1007 &on_not_in_range); | |
| 1008 GoTo(&convert_capture); | |
| 1009 BindBlock(&on_not_in_range); | |
| 1010 | |
| 1011 // Latin-1: Check for values in range [224,254] but not 247. | |
| 1012 BranchOrBacktrack(Comparison(kLT, | |
| 1013 LoadLocal(char_in_match_), | |
| 1014 Uint64Constant(224)), | |
| 1015 on_no_match); | |
| 1016 BranchOrBacktrack(Comparison(kGT, | |
| 1017 LoadLocal(char_in_match_), | |
| 1018 Uint64Constant(254)), | |
| 1019 on_no_match); | |
| 1020 | |
| 1021 BranchOrBacktrack(Comparison(kEQ, | |
| 1022 LoadLocal(char_in_match_), | |
| 1023 Uint64Constant(247)), | |
| 1024 on_no_match); | |
| 1025 | |
| 1026 // Also convert capture character. | |
| 1027 BindBlock(&convert_capture); | |
| 1028 | |
| 1029 PushArgumentInstr* capture_char_push = PushLocal(char_in_capture_); | |
| 1030 mask_push = PushArgument(Bind(Uint64Constant(0x20))); | |
| 1031 StoreLocal(char_in_capture_, | |
| 1032 Bind(InstanceCall( | |
| 1033 InstanceCallDescriptor::FromToken(Token::kBIT_OR), | |
| 1034 capture_char_push, | |
| 1035 mask_push))); | |
| 1036 | |
| 1037 BranchOrBacktrack(Comparison(kNE, | |
| 1038 LoadLocal(char_in_match_), | |
| 1039 LoadLocal(char_in_capture_)), | |
| 1040 on_no_match); | |
| 1041 | |
| 1042 BindBlock(&loop_increment); | |
| 1043 | |
| 1044 // Increment indexes into capture and match strings. | |
| 1045 PushArgumentInstr* index_push = PushLocal(capture_start_index_); | |
| 1046 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1))); | |
| 1047 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push))); | |
| 1048 | |
| 1049 index_push = PushLocal(match_start_index_); | |
| 1050 inc_push = PushArgument(Bind(Uint64Constant(1))); | |
| 1051 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push))); | |
| 1052 | |
| 1053 // Compare to end of match, and loop if not done. | |
| 1054 BranchOrBacktrack(Comparison(kLT, | |
| 1055 LoadLocal(match_start_index_), | |
| 1056 LoadLocal(match_end_index_)), | |
| 1057 &loop); | |
| 1058 } else { | |
| 1059 ASSERT(mode_ == UC16); | |
| 1060 | |
| 1061 Value* string_value = Bind(LoadLocal(string_param_)); | |
| 1062 Value* lhs_index_value = Bind(LoadLocal(match_start_index_)); | |
| 1063 Value* rhs_index_value = Bind(LoadLocal(capture_start_index_)); | |
| 1064 Value* length_value = Bind(LoadLocal(capture_length_)); | |
| 1065 | |
| 1066 Definition* is_match_def = | |
| 1067 new(Z) CaseInsensitiveCompareUC16Instr( | |
| 1068 string_value, | |
| 1069 lhs_index_value, | |
| 1070 rhs_index_value, | |
| 1071 length_value, | |
| 1072 specialization_cid_); | |
| 1073 | |
| 1074 BranchOrBacktrack(Comparison(kNE, is_match_def, BoolConstant(true)), | |
| 1075 on_no_match); | |
| 1076 } | |
| 1077 | |
| 1078 BindBlock(&success); | |
| 1079 | |
| 1080 // Move current character position to position after match. | |
| 1081 PushArgumentInstr* match_end_push = PushLocal(match_end_index_); | |
| 1082 len_push = PushLocal(string_param_length_); | |
| 1083 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push))); | |
| 1084 | |
| 1085 BindBlock(&fallthrough); | |
| 1086 } | |
| 1087 | |
| 1088 | |
| 1089 void IRRegExpMacroAssembler::CheckNotBackReference( | |
| 1090 intptr_t start_reg, | |
| 1091 BlockLabel* on_no_match) { | |
| 1092 TAG(); | |
| 1093 ASSERT(start_reg + 1 <= registers_count_); | |
| 1094 | |
| 1095 BlockLabel fallthrough; | |
| 1096 BlockLabel success; | |
| 1097 | |
| 1098 // Find length of back-referenced capture. | |
| 1099 PushArgumentInstr* end_push = PushArgument(LoadRegister(start_reg + 1)); | |
| 1100 PushArgumentInstr* start_push = PushArgument(LoadRegister(start_reg)); | |
| 1101 StoreLocal(capture_length_, Bind(Sub(end_push, start_push))); | |
| 1102 | |
| 1103 // Fail on partial or illegal capture (start of capture after end of capture). | |
| 1104 BranchOrBacktrack(Comparison(kLT, | |
| 1105 LoadLocal(capture_length_), | |
| 1106 Uint64Constant(0)), | |
| 1107 on_no_match); | |
| 1108 | |
| 1109 // Succeed on empty capture (including no capture) | |
| 1110 BranchOrBacktrack(Comparison(kEQ, | |
| 1111 LoadLocal(capture_length_), | |
| 1112 Uint64Constant(0)), | |
| 1113 &fallthrough); | |
| 1114 | |
| 1115 // Check that there are sufficient characters left in the input. | |
| 1116 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
| 1117 PushArgumentInstr* len_push = PushLocal(capture_length_); | |
| 1118 BranchOrBacktrack( | |
| 1119 Comparison(kGT, | |
| 1120 InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), | |
| 1121 pos_push, | |
| 1122 len_push), | |
| 1123 Uint64Constant(0)), | |
| 1124 on_no_match); | |
| 1125 | |
| 1126 // Compute pointers to match string and capture string. | |
| 1127 pos_push = PushLocal(current_position_); | |
| 1128 len_push = PushLocal(string_param_length_); | |
| 1129 StoreLocal(match_start_index_, Bind(Add(pos_push, len_push))); | |
| 1130 | |
| 1131 pos_push = PushArgument(LoadRegister(start_reg)); | |
| 1132 len_push = PushLocal(string_param_length_); | |
| 1133 StoreLocal(capture_start_index_, Bind(Add(pos_push, len_push))); | |
| 1134 | |
| 1135 pos_push = PushLocal(match_start_index_); | |
| 1136 len_push = PushLocal(capture_length_); | |
| 1137 StoreLocal(match_end_index_, Bind(Add(pos_push, len_push))); | |
| 1138 | |
| 1139 BlockLabel loop; | |
| 1140 BindBlock(&loop); | |
| 1141 | |
| 1142 StoreLocal(char_in_capture_, CharacterAt(capture_start_index_)); | |
| 1143 StoreLocal(char_in_match_, CharacterAt(match_start_index_)); | |
| 1144 | |
| 1145 BranchOrBacktrack(Comparison(kNE, | |
| 1146 LoadLocal(char_in_capture_), | |
| 1147 LoadLocal(char_in_match_)), | |
| 1148 on_no_match); | |
| 1149 | |
| 1150 // Increment indexes into capture and match strings. | |
| 1151 PushArgumentInstr* index_push = PushLocal(capture_start_index_); | |
| 1152 PushArgumentInstr* inc_push = PushArgument(Bind(Uint64Constant(1))); | |
| 1153 StoreLocal(capture_start_index_, Bind(Add(index_push, inc_push))); | |
| 1154 | |
| 1155 index_push = PushLocal(match_start_index_); | |
| 1156 inc_push = PushArgument(Bind(Uint64Constant(1))); | |
| 1157 StoreLocal(match_start_index_, Bind(Add(index_push, inc_push))); | |
| 1158 | |
| 1159 // Check if we have reached end of match area. | |
| 1160 BranchOrBacktrack(Comparison(kLT, | |
| 1161 LoadLocal(match_start_index_), | |
| 1162 LoadLocal(match_end_index_)), | |
| 1163 &loop); | |
| 1164 | |
| 1165 BindBlock(&success); | |
| 1166 | |
| 1167 // Move current character position to position after match. | |
| 1168 PushArgumentInstr* match_end_push = PushLocal(match_end_index_); | |
| 1169 len_push = PushLocal(string_param_length_); | |
| 1170 StoreLocal(current_position_, Bind(Sub(match_end_push, len_push))); | |
| 1171 | |
| 1172 BindBlock(&fallthrough); | |
| 1173 } | |
| 1174 | |
| 1175 | |
| 1176 void IRRegExpMacroAssembler::CheckNotCharacter(uint32_t c, | |
| 1177 BlockLabel* on_not_equal) { | |
| 1178 TAG(); | |
| 1179 BranchOrBacktrack(Comparison(kNE, | |
| 1180 LoadLocal(current_character_), | |
| 1181 Uint64Constant(c)), | |
| 1182 on_not_equal); | |
| 1183 } | |
| 1184 | |
| 1185 | |
| 1186 void IRRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c, | |
| 1187 uint32_t mask, | |
| 1188 BlockLabel* on_equal) { | |
| 1189 TAG(); | |
| 1190 | |
| 1191 Definition* actual_def = LoadLocal(current_character_); | |
| 1192 Definition* expected_def = Uint64Constant(c); | |
| 1193 | |
| 1194 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
| 1195 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
| 1196 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
| 1197 actual_push, | |
| 1198 mask_push); | |
| 1199 | |
| 1200 BranchOrBacktrack(Comparison(kEQ, actual_def, expected_def), on_equal); | |
| 1201 } | |
| 1202 | |
| 1203 | |
| 1204 void IRRegExpMacroAssembler::CheckNotCharacterAfterAnd( | |
| 1205 uint32_t c, | |
| 1206 uint32_t mask, | |
| 1207 BlockLabel* on_not_equal) { | |
| 1208 TAG(); | |
| 1209 | |
| 1210 Definition* actual_def = LoadLocal(current_character_); | |
| 1211 Definition* expected_def = Uint64Constant(c); | |
| 1212 | |
| 1213 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
| 1214 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
| 1215 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
| 1216 actual_push, | |
| 1217 mask_push); | |
| 1218 | |
| 1219 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal); | |
| 1220 } | |
| 1221 | |
| 1222 | |
| 1223 void IRRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd( | |
| 1224 uint16_t c, | |
| 1225 uint16_t minus, | |
| 1226 uint16_t mask, | |
| 1227 BlockLabel* on_not_equal) { | |
| 1228 TAG(); | |
| 1229 ASSERT(minus < Utf16::kMaxCodeUnit); // NOLINT | |
| 1230 | |
| 1231 Definition* actual_def = LoadLocal(current_character_); | |
| 1232 Definition* expected_def = Uint64Constant(c); | |
| 1233 | |
| 1234 PushArgumentInstr* actual_push = PushArgument(Bind(actual_def)); | |
| 1235 PushArgumentInstr* minus_push = PushArgument(Bind(Uint64Constant(minus))); | |
| 1236 | |
| 1237 actual_push = PushArgument(Bind(Sub(actual_push, minus_push))); | |
| 1238 PushArgumentInstr* mask_push = PushArgument(Bind(Uint64Constant(mask))); | |
| 1239 actual_def = InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
| 1240 actual_push, | |
| 1241 mask_push); | |
| 1242 | |
| 1243 BranchOrBacktrack(Comparison(kNE, actual_def, expected_def), on_not_equal); | |
| 1244 } | |
| 1245 | |
| 1246 | |
| 1247 void IRRegExpMacroAssembler::CheckCharacterInRange( | |
| 1248 uint16_t from, | |
| 1249 uint16_t to, | |
| 1250 BlockLabel* on_in_range) { | |
| 1251 TAG(); | |
| 1252 ASSERT(from <= to); | |
| 1253 | |
| 1254 // TODO(zerny): All range comparisons could be done cheaper with unsigned | |
| 1255 // compares. This pattern repeats in various places. | |
| 1256 | |
| 1257 BlockLabel on_not_in_range; | |
| 1258 BranchOrBacktrack(Comparison(kLT, | |
| 1259 LoadLocal(current_character_), | |
| 1260 Uint64Constant(from)), | |
| 1261 &on_not_in_range); | |
| 1262 BranchOrBacktrack(Comparison(kGT, | |
| 1263 LoadLocal(current_character_), | |
| 1264 Uint64Constant(to)), | |
| 1265 &on_not_in_range); | |
| 1266 BranchOrBacktrack(NULL, on_in_range); | |
| 1267 | |
| 1268 BindBlock(&on_not_in_range); | |
| 1269 } | |
| 1270 | |
| 1271 | |
| 1272 void IRRegExpMacroAssembler::CheckCharacterNotInRange( | |
| 1273 uint16_t from, | |
| 1274 uint16_t to, | |
| 1275 BlockLabel* on_not_in_range) { | |
| 1276 TAG(); | |
| 1277 ASSERT(from <= to); | |
| 1278 | |
| 1279 BranchOrBacktrack(Comparison(kLT, | |
| 1280 LoadLocal(current_character_), | |
| 1281 Uint64Constant(from)), | |
| 1282 on_not_in_range); | |
| 1283 | |
| 1284 BranchOrBacktrack(Comparison(kGT, | |
| 1285 LoadLocal(current_character_), | |
| 1286 Uint64Constant(to)), | |
| 1287 on_not_in_range); | |
| 1288 } | |
| 1289 | |
| 1290 | |
| 1291 void IRRegExpMacroAssembler::CheckBitInTable( | |
| 1292 const TypedData& table, | |
| 1293 BlockLabel* on_bit_set) { | |
| 1294 TAG(); | |
| 1295 | |
| 1296 PushArgumentInstr* table_push = | |
| 1297 PushArgument(Bind(new(Z) ConstantInstr(table))); | |
| 1298 PushArgumentInstr* index_push = PushLocal(current_character_); | |
| 1299 | |
| 1300 if (mode_ != ASCII || kTableMask != Symbols::kMaxOneCharCodeSymbol) { | |
| 1301 PushArgumentInstr* mask_push = | |
| 1302 PushArgument(Bind(Uint64Constant(kTableSize - 1))); | |
| 1303 index_push = PushArgument( | |
| 1304 Bind(InstanceCall(InstanceCallDescriptor::FromToken(Token::kBIT_AND), | |
| 1305 index_push, | |
| 1306 mask_push))); | |
| 1307 } | |
| 1308 | |
| 1309 Definition* byte_def = | |
| 1310 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 1311 table_push, | |
| 1312 index_push); | |
| 1313 Definition* zero_def = Int64Constant(0); | |
| 1314 | |
| 1315 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_bit_set); | |
| 1316 } | |
| 1317 | |
| 1318 | |
| 1319 bool IRRegExpMacroAssembler::CheckSpecialCharacterClass( | |
| 1320 uint16_t type, | |
| 1321 BlockLabel* on_no_match) { | |
| 1322 TAG(); | |
| 1323 | |
| 1324 // Range checks (c in min..max) are generally implemented by an unsigned | |
| 1325 // (c - min) <= (max - min) check | |
| 1326 switch (type) { | |
| 1327 case 's': | |
| 1328 // Match space-characters | |
| 1329 if (mode_ == ASCII) { | |
| 1330 // One byte space characters are '\t'..'\r', ' ' and \u00a0. | |
| 1331 BlockLabel success; | |
| 1332 // Space (' '). | |
| 1333 BranchOrBacktrack(Comparison(kEQ, | |
| 1334 LoadLocal(current_character_), | |
| 1335 Uint64Constant(' ')), | |
| 1336 &success); | |
| 1337 // Check range 0x09..0x0d. | |
| 1338 CheckCharacterInRange('\t', '\r', &success); | |
| 1339 // \u00a0 (NBSP). | |
| 1340 BranchOrBacktrack(Comparison(kNE, | |
| 1341 LoadLocal(current_character_), | |
| 1342 Uint64Constant(0x00a0)), | |
| 1343 on_no_match); | |
| 1344 BindBlock(&success); | |
| 1345 return true; | |
| 1346 } | |
| 1347 return false; | |
| 1348 case 'S': | |
| 1349 // The emitted code for generic character classes is good enough. | |
| 1350 return false; | |
| 1351 case 'd': | |
| 1352 // Match ASCII digits ('0'..'9') | |
| 1353 CheckCharacterNotInRange('0', '9', on_no_match); | |
| 1354 return true; | |
| 1355 case 'D': | |
| 1356 // Match non ASCII-digits | |
| 1357 CheckCharacterInRange('0', '9', on_no_match); | |
| 1358 return true; | |
| 1359 case '.': { | |
| 1360 // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029) | |
| 1361 BranchOrBacktrack(Comparison(kEQ, | |
| 1362 LoadLocal(current_character_), | |
| 1363 Uint64Constant('\n')), | |
| 1364 on_no_match); | |
| 1365 BranchOrBacktrack(Comparison(kEQ, | |
| 1366 LoadLocal(current_character_), | |
| 1367 Uint64Constant('\r')), | |
| 1368 on_no_match); | |
| 1369 if (mode_ == UC16) { | |
| 1370 BranchOrBacktrack(Comparison(kEQ, | |
| 1371 LoadLocal(current_character_), | |
| 1372 Uint64Constant(0x2028)), | |
| 1373 on_no_match); | |
| 1374 BranchOrBacktrack(Comparison(kEQ, | |
| 1375 LoadLocal(current_character_), | |
| 1376 Uint64Constant(0x2029)), | |
| 1377 on_no_match); | |
| 1378 } | |
| 1379 return true; | |
| 1380 } | |
| 1381 case 'w': { | |
| 1382 if (mode_ != ASCII) { | |
| 1383 // Table is 128 entries, so all ASCII characters can be tested. | |
| 1384 BranchOrBacktrack(Comparison(kGT, | |
| 1385 LoadLocal(current_character_), | |
| 1386 Uint64Constant('z')), | |
| 1387 on_no_match); | |
| 1388 } | |
| 1389 | |
| 1390 PushArgumentInstr* table_push = | |
| 1391 PushArgument(Bind(WordCharacterMapConstant())); | |
| 1392 PushArgumentInstr* index_push = PushLocal(current_character_); | |
| 1393 | |
| 1394 Definition* byte_def = | |
| 1395 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 1396 table_push, | |
| 1397 index_push); | |
| 1398 Definition* zero_def = Int64Constant(0); | |
| 1399 | |
| 1400 BranchOrBacktrack(Comparison(kEQ, byte_def, zero_def), on_no_match); | |
| 1401 | |
| 1402 return true; | |
| 1403 } | |
| 1404 case 'W': { | |
| 1405 BlockLabel done; | |
| 1406 if (mode_ != ASCII) { | |
| 1407 // Table is 128 entries, so all ASCII characters can be tested. | |
| 1408 BranchOrBacktrack(Comparison(kGT, | |
| 1409 LoadLocal(current_character_), | |
| 1410 Uint64Constant('z')), | |
| 1411 &done); | |
| 1412 } | |
| 1413 | |
| 1414 // TODO(zerny): Refactor to use CheckBitInTable if possible. | |
| 1415 | |
| 1416 PushArgumentInstr* table_push = | |
| 1417 PushArgument(Bind(WordCharacterMapConstant())); | |
| 1418 PushArgumentInstr* index_push = PushLocal(current_character_); | |
| 1419 | |
| 1420 Definition* byte_def = | |
| 1421 InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 1422 table_push, | |
| 1423 index_push); | |
| 1424 Definition* zero_def = Int64Constant(0); | |
| 1425 | |
| 1426 BranchOrBacktrack(Comparison(kNE, byte_def, zero_def), on_no_match); | |
| 1427 | |
| 1428 if (mode_ != ASCII) { | |
| 1429 BindBlock(&done); | |
| 1430 } | |
| 1431 return true; | |
| 1432 } | |
| 1433 // Non-standard classes (with no syntactic shorthand) used internally. | |
| 1434 case '*': | |
| 1435 // Match any character. | |
| 1436 return true; | |
| 1437 case 'n': { | |
| 1438 // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 or 0x2029). | |
| 1439 // The opposite of '.'. | |
| 1440 BlockLabel success; | |
| 1441 BranchOrBacktrack(Comparison(kEQ, | |
| 1442 LoadLocal(current_character_), | |
| 1443 Uint64Constant('\n')), | |
| 1444 &success); | |
| 1445 BranchOrBacktrack(Comparison(kEQ, | |
| 1446 LoadLocal(current_character_), | |
| 1447 Uint64Constant('\r')), | |
| 1448 &success); | |
| 1449 if (mode_ == UC16) { | |
| 1450 BranchOrBacktrack(Comparison(kEQ, | |
| 1451 LoadLocal(current_character_), | |
| 1452 Uint64Constant(0x2028)), | |
| 1453 &success); | |
| 1454 BranchOrBacktrack(Comparison(kEQ, | |
| 1455 LoadLocal(current_character_), | |
| 1456 Uint64Constant(0x2029)), | |
| 1457 &success); | |
| 1458 } | |
| 1459 BranchOrBacktrack(NULL, on_no_match); | |
| 1460 BindBlock(&success); | |
| 1461 return true; | |
| 1462 } | |
| 1463 // No custom implementation (yet): s(uint16_t), S(uint16_t). | |
| 1464 default: | |
| 1465 return false; | |
| 1466 } | |
| 1467 } | |
| 1468 | |
| 1469 | |
| 1470 void IRRegExpMacroAssembler::Fail() { | |
| 1471 TAG(); | |
| 1472 ASSERT(FAILURE == 0); // Return value for failure is zero. | |
| 1473 if (!global()) { | |
| 1474 UNREACHABLE(); // Dart regexps are always global. | |
| 1475 } | |
| 1476 GoTo(exit_block_); | |
| 1477 } | |
| 1478 | |
| 1479 | |
| 1480 void IRRegExpMacroAssembler::IfRegisterGE(intptr_t reg, | |
| 1481 intptr_t comparand, | |
| 1482 BlockLabel* if_ge) { | |
| 1483 TAG(); | |
| 1484 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
| 1485 PushArgumentInstr* pos = PushArgument(Bind(Int64Constant(comparand))); | |
| 1486 BranchOrBacktrack(Comparison(kGTE, reg_push, pos), if_ge); | |
| 1487 } | |
| 1488 | |
| 1489 | |
| 1490 void IRRegExpMacroAssembler::IfRegisterLT(intptr_t reg, | |
| 1491 intptr_t comparand, | |
| 1492 BlockLabel* if_lt) { | |
| 1493 TAG(); | |
| 1494 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
| 1495 PushArgumentInstr* pos = PushArgument(Bind(Int64Constant(comparand))); | |
| 1496 BranchOrBacktrack(Comparison(kLT, reg_push, pos), if_lt); | |
| 1497 } | |
| 1498 | |
| 1499 | |
| 1500 void IRRegExpMacroAssembler::IfRegisterEqPos(intptr_t reg, | |
| 1501 BlockLabel* if_eq) { | |
| 1502 TAG(); | |
| 1503 PushArgumentInstr* reg_push = PushArgument(LoadRegister(reg)); | |
| 1504 PushArgumentInstr* pos = PushArgument(Bind(LoadLocal(current_position_))); | |
| 1505 BranchOrBacktrack(Comparison(kEQ, reg_push, pos), if_eq); | |
| 1506 } | |
| 1507 | |
| 1508 | |
| 1509 RegExpMacroAssembler::IrregexpImplementation | |
| 1510 IRRegExpMacroAssembler::Implementation() { | |
| 1511 return kIRImplementation; | |
| 1512 } | |
| 1513 | |
| 1514 | |
| 1515 void IRRegExpMacroAssembler::LoadCurrentCharacter(intptr_t cp_offset, | |
| 1516 BlockLabel* on_end_of_input, | |
| 1517 bool check_bounds, | |
| 1518 intptr_t characters) { | |
| 1519 TAG(); | |
| 1520 ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. | |
| 1521 ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) | |
| 1522 if (check_bounds) { | |
| 1523 CheckPosition(cp_offset + characters - 1, on_end_of_input); | |
| 1524 } | |
| 1525 LoadCurrentCharacterUnchecked(cp_offset, characters); | |
| 1526 } | |
| 1527 | |
| 1528 | |
| 1529 void IRRegExpMacroAssembler::PopCurrentPosition() { | |
| 1530 TAG(); | |
| 1531 StoreLocal(current_position_, Bind(PopStack())); | |
| 1532 } | |
| 1533 | |
| 1534 | |
| 1535 void IRRegExpMacroAssembler::PopRegister(intptr_t reg) { | |
| 1536 TAG(); | |
| 1537 ASSERT(reg < registers_count_); | |
| 1538 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 1539 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
| 1540 PushArgumentInstr* pop_push = PushArgument(Bind(PopStack())); | |
| 1541 StoreRegister(registers_push, index_push, pop_push); | |
| 1542 } | |
| 1543 | |
| 1544 | |
| 1545 void IRRegExpMacroAssembler::PushStack(Definition *definition) { | |
| 1546 PushArgumentInstr* stack_push = PushLocal(stack_); | |
| 1547 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
| 1548 StoreLocal(stack_pointer_, | |
| 1549 Bind(Add(stack_pointer_push, | |
| 1550 PushArgument(Bind(Uint64Constant(1)))))); | |
| 1551 stack_pointer_push = PushLocal(stack_pointer_); | |
| 1552 // TODO(zerny): bind value and push could break stack discipline. | |
| 1553 PushArgumentInstr* value_push = PushArgument(Bind(definition)); | |
| 1554 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
| 1555 stack_push, | |
| 1556 stack_pointer_push, | |
| 1557 value_push)); | |
| 1558 } | |
| 1559 | |
| 1560 | |
| 1561 Definition* IRRegExpMacroAssembler::PopStack() { | |
| 1562 PushArgumentInstr* stack_push = PushLocal(stack_); | |
| 1563 PushArgumentInstr* stack_pointer_push1 = PushLocal(stack_pointer_); | |
| 1564 PushArgumentInstr* stack_pointer_push2 = PushLocal(stack_pointer_); | |
| 1565 StoreLocal(stack_pointer_, | |
| 1566 Bind(Sub(stack_pointer_push2, | |
| 1567 PushArgument(Bind(Uint64Constant(1)))))); | |
| 1568 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 1569 stack_push, | |
| 1570 stack_pointer_push1); | |
| 1571 } | |
| 1572 | |
| 1573 | |
| 1574 Definition* IRRegExpMacroAssembler::PeekStack() { | |
| 1575 PushArgumentInstr* stack_push = PushLocal(stack_); | |
| 1576 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
| 1577 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kINDEX), | |
| 1578 stack_push, | |
| 1579 stack_pointer_push); | |
| 1580 } | |
| 1581 | |
| 1582 | |
| 1583 // Pushes the location corresponding to label to the backtracking stack. | |
| 1584 void IRRegExpMacroAssembler::PushBacktrack(BlockLabel* label) { | |
| 1585 TAG(); | |
| 1586 | |
| 1587 // Ensure that targets of indirect jumps are never accessed through a | |
| 1588 // normal control flow instructions by creating a new block for each backtrack | |
| 1589 // target. | |
| 1590 IndirectEntryInstr* indirect_target = IndirectWithJoinGoto(label->block()); | |
| 1591 | |
| 1592 // Add a fake edge from the graph entry for data flow analysis. | |
| 1593 entry_block_->AddIndirectEntry(indirect_target); | |
| 1594 | |
| 1595 ConstantInstr* offset = Uint64Constant(indirect_target->indirect_id()); | |
| 1596 PushStack(offset); | |
| 1597 CheckStackLimit(); | |
| 1598 } | |
| 1599 | |
| 1600 | |
| 1601 void IRRegExpMacroAssembler::PushCurrentPosition() { | |
| 1602 TAG(); | |
| 1603 PushStack(LoadLocal(current_position_)); | |
| 1604 } | |
| 1605 | |
| 1606 | |
| 1607 void IRRegExpMacroAssembler::PushRegister(intptr_t reg) { | |
| 1608 TAG(); | |
| 1609 // TODO(zerny): Refactor PushStack so it can be reused here. | |
| 1610 PushArgumentInstr* stack_push = PushLocal(stack_); | |
| 1611 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
| 1612 StoreLocal(stack_pointer_, | |
| 1613 Bind(Add(stack_pointer_push, | |
| 1614 PushArgument(Bind(Uint64Constant(1)))))); | |
| 1615 stack_pointer_push = PushLocal(stack_pointer_); | |
| 1616 // TODO(zerny): bind value and push could break stack discipline. | |
| 1617 PushArgumentInstr* value_push = PushArgument(LoadRegister(reg)); | |
| 1618 Do(InstanceCall(InstanceCallDescriptor::FromToken(Token::kASSIGN_INDEX), | |
| 1619 stack_push, | |
| 1620 stack_pointer_push, | |
| 1621 value_push)); | |
| 1622 CheckStackLimit(); | |
| 1623 } | |
| 1624 | |
| 1625 | |
| 1626 // Checks that (stack.capacity - stack_limit_slack) > stack_pointer. | |
| 1627 // This ensures that up to stack_limit_slack stack pushes can be | |
| 1628 // done without exhausting the stack space. If the check fails the | |
| 1629 // stack will be grown. | |
| 1630 void IRRegExpMacroAssembler::CheckStackLimit() { | |
| 1631 TAG(); | |
| 1632 PushArgumentInstr* stack_push = PushLocal(stack_); | |
| 1633 PushArgumentInstr* length_push = PushArgument(Bind(InstanceCall( | |
| 1634 InstanceCallDescriptor( | |
| 1635 String::ZoneHandle(Field::GetterSymbol(Symbols::Length()))), | |
| 1636 stack_push))); | |
| 1637 PushArgumentInstr* capacity_push = PushArgument(Bind(Sub( | |
| 1638 length_push, | |
| 1639 PushArgument(Bind(Uint64Constant(stack_limit_slack())))))); | |
| 1640 PushArgumentInstr* stack_pointer_push = PushLocal(stack_pointer_); | |
| 1641 BranchInstr* branch = new(Z) BranchInstr( | |
| 1642 Comparison(kGT, capacity_push, stack_pointer_push)); | |
| 1643 CloseBlockWith(branch); | |
| 1644 | |
| 1645 BlockLabel grow_stack; | |
| 1646 BlockLabel fallthrough; | |
| 1647 *branch->true_successor_address() = | |
| 1648 TargetWithJoinGoto(fallthrough.block()); | |
| 1649 *branch->false_successor_address() = | |
| 1650 TargetWithJoinGoto(grow_stack.block()); | |
| 1651 | |
| 1652 BindBlock(&grow_stack); | |
| 1653 GrowStack(); | |
| 1654 | |
| 1655 BindBlock(&fallthrough); | |
| 1656 } | |
| 1657 | |
| 1658 | |
| 1659 void IRRegExpMacroAssembler::GrowStack() { | |
| 1660 TAG(); | |
| 1661 Value* cell = Bind(new(Z) ConstantInstr(stack_array_cell_)); | |
| 1662 StoreLocal(stack_, Bind(new(Z) GrowRegExpStackInstr(cell))); | |
| 1663 } | |
| 1664 | |
| 1665 | |
| 1666 void IRRegExpMacroAssembler::ReadCurrentPositionFromRegister(intptr_t reg) { | |
| 1667 TAG(); | |
| 1668 StoreLocal(current_position_, LoadRegister(reg)); | |
| 1669 } | |
| 1670 | |
| 1671 // Resets the tip of the stack to the value stored in reg. | |
| 1672 void IRRegExpMacroAssembler::ReadStackPointerFromRegister(intptr_t reg) { | |
| 1673 TAG(); | |
| 1674 ASSERT(reg < registers_count_); | |
| 1675 StoreLocal(stack_pointer_, LoadRegister(reg)); | |
| 1676 } | |
| 1677 | |
| 1678 void IRRegExpMacroAssembler::SetCurrentPositionFromEnd(intptr_t by) { | |
| 1679 TAG(); | |
| 1680 | |
| 1681 BlockLabel after_position; | |
| 1682 | |
| 1683 Definition* cur_pos_def = LoadLocal(current_position_); | |
| 1684 Definition* by_value_def = Int64Constant(-by); | |
| 1685 | |
| 1686 BranchOrBacktrack(Comparison(kGTE, cur_pos_def, by_value_def), | |
| 1687 &after_position); | |
| 1688 | |
| 1689 StoreLocal(current_position_, Bind(Int64Constant(-by))); | |
| 1690 | |
| 1691 // On RegExp code entry (where this operation is used), the character before | |
| 1692 // the current position is expected to be already loaded. | |
| 1693 // We have advanced the position, so it's safe to read backwards. | |
| 1694 LoadCurrentCharacterUnchecked(-1, 1); | |
| 1695 | |
| 1696 BindBlock(&after_position); | |
| 1697 } | |
| 1698 | |
| 1699 | |
| 1700 void IRRegExpMacroAssembler::SetRegister(intptr_t reg, intptr_t to) { | |
| 1701 TAG(); | |
| 1702 // Reserved for positions! | |
| 1703 ASSERT(reg >= saved_registers_count_); | |
| 1704 StoreRegister(reg, to); | |
| 1705 } | |
| 1706 | |
| 1707 | |
| 1708 bool IRRegExpMacroAssembler::Succeed() { | |
| 1709 TAG(); | |
| 1710 GoTo(success_block_); | |
| 1711 return global(); | |
| 1712 } | |
| 1713 | |
| 1714 | |
| 1715 void IRRegExpMacroAssembler::WriteCurrentPositionToRegister( | |
| 1716 intptr_t reg, intptr_t cp_offset) { | |
| 1717 TAG(); | |
| 1718 | |
| 1719 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 1720 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
| 1721 PushArgumentInstr* pos_push = PushLocal(current_position_); | |
| 1722 PushArgumentInstr* off_push = PushArgument(Bind(Int64Constant(cp_offset))); | |
| 1723 PushArgumentInstr* neg_off_push = PushArgument(Bind(Add(pos_push, off_push))); | |
| 1724 // Push the negative offset; these are converted to positive string positions | |
| 1725 // within the success block. | |
| 1726 StoreRegister(registers_push, index_push, neg_off_push); | |
| 1727 } | |
| 1728 | |
| 1729 | |
| 1730 void IRRegExpMacroAssembler::ClearRegisters( | |
| 1731 intptr_t reg_from, intptr_t reg_to) { | |
| 1732 TAG(); | |
| 1733 | |
| 1734 ASSERT(reg_from <= reg_to); | |
| 1735 | |
| 1736 // In order to clear registers to a final result value of -1, set them to | |
| 1737 // (-1 - string length), the offset of -1 from the end of the string. | |
| 1738 | |
| 1739 for (intptr_t reg = reg_from; reg <= reg_to; reg++) { | |
| 1740 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 1741 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
| 1742 PushArgumentInstr* minus_one_push = | |
| 1743 PushArgument(Bind(Int64Constant(-1))); | |
| 1744 PushArgumentInstr* length_push = PushLocal(string_param_length_); | |
| 1745 PushArgumentInstr* value_push = | |
| 1746 PushArgument(Bind(Sub(minus_one_push, length_push))); | |
| 1747 StoreRegister(registers_push, index_push, value_push); | |
| 1748 } | |
| 1749 } | |
| 1750 | |
| 1751 | |
| 1752 void IRRegExpMacroAssembler::WriteStackPointerToRegister(intptr_t reg) { | |
| 1753 TAG(); | |
| 1754 | |
| 1755 PushArgumentInstr* registers_push = PushLocal(registers_); | |
| 1756 PushArgumentInstr* index_push = PushRegisterIndex(reg); | |
| 1757 PushArgumentInstr* tip_push = PushLocal(stack_pointer_); | |
| 1758 StoreRegister(registers_push, index_push, tip_push); | |
| 1759 } | |
| 1760 | |
| 1761 | |
| 1762 // Private methods: | |
| 1763 | |
| 1764 | |
| 1765 void IRRegExpMacroAssembler::CheckPosition(intptr_t cp_offset, | |
| 1766 BlockLabel* on_outside_input) { | |
| 1767 TAG(); | |
| 1768 Definition* curpos_def = LoadLocal(current_position_); | |
| 1769 Definition* cp_off_def = Int64Constant(-cp_offset); | |
| 1770 | |
| 1771 // If (current_position_ < -cp_offset), we are in bounds. | |
| 1772 // Remember, current_position_ is a negative offset from the string end. | |
| 1773 | |
| 1774 BranchOrBacktrack(Comparison(kGTE, curpos_def, cp_off_def), | |
| 1775 on_outside_input); | |
| 1776 } | |
| 1777 | |
| 1778 | |
| 1779 void IRRegExpMacroAssembler::BranchOrBacktrack( | |
| 1780 ComparisonInstr* comparison, | |
| 1781 BlockLabel* true_successor) { | |
| 1782 if (comparison == NULL) { // No condition | |
| 1783 if (true_successor == NULL) { | |
| 1784 Backtrack(); | |
| 1785 return; | |
| 1786 } | |
| 1787 GoTo(true_successor); | |
| 1788 return; | |
| 1789 } | |
| 1790 | |
| 1791 // If no successor block has been passed in, backtrack. | |
| 1792 JoinEntryInstr* true_successor_block = backtrack_block_; | |
| 1793 if (true_successor != NULL) { | |
| 1794 true_successor->SetLinked(); | |
| 1795 true_successor_block = true_successor->block(); | |
| 1796 } | |
| 1797 ASSERT(true_successor_block != NULL); | |
| 1798 | |
| 1799 // If the condition is not true, fall through to a new block. | |
| 1800 BlockLabel fallthrough; | |
| 1801 | |
| 1802 BranchInstr* branch = new(Z) BranchInstr(comparison); | |
| 1803 *branch->true_successor_address() = | |
| 1804 TargetWithJoinGoto(true_successor_block); | |
| 1805 *branch->false_successor_address() = | |
| 1806 TargetWithJoinGoto(fallthrough.block()); | |
| 1807 | |
| 1808 CloseBlockWith(branch); | |
| 1809 BindBlock(&fallthrough); | |
| 1810 } | |
| 1811 | |
| 1812 | |
| 1813 TargetEntryInstr* IRRegExpMacroAssembler::TargetWithJoinGoto( | |
| 1814 JoinEntryInstr* dst) { | |
| 1815 TargetEntryInstr* target = new(Z) TargetEntryInstr( | |
| 1816 block_id_.Alloc(), kInvalidTryIndex); | |
| 1817 blocks_.Add(target); | |
| 1818 | |
| 1819 target->AppendInstruction(new(Z) GotoInstr(dst)); | |
| 1820 | |
| 1821 return target; | |
| 1822 } | |
| 1823 | |
| 1824 | |
| 1825 IndirectEntryInstr* IRRegExpMacroAssembler::IndirectWithJoinGoto( | |
| 1826 JoinEntryInstr* dst) { | |
| 1827 IndirectEntryInstr* target = new(Z) IndirectEntryInstr( | |
| 1828 block_id_.Alloc(), indirect_id_.Alloc(), kInvalidTryIndex); | |
| 1829 blocks_.Add(target); | |
| 1830 | |
| 1831 target->AppendInstruction(new(Z) GotoInstr(dst)); | |
| 1832 | |
| 1833 return target; | |
| 1834 } | |
| 1835 | |
| 1836 | |
| 1837 void IRRegExpMacroAssembler::CheckPreemption() { | |
| 1838 TAG(); | |
| 1839 AppendInstruction(new(Z) CheckStackOverflowInstr(kNoSourcePos, 0)); | |
| 1840 } | |
| 1841 | |
| 1842 | |
| 1843 Definition* IRRegExpMacroAssembler::Add( | |
| 1844 PushArgumentInstr* lhs, | |
| 1845 PushArgumentInstr* rhs) { | |
| 1846 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kADD), lhs, rhs); | |
| 1847 } | |
| 1848 | |
| 1849 | |
| 1850 Definition* IRRegExpMacroAssembler::Sub( | |
| 1851 PushArgumentInstr* lhs, | |
| 1852 PushArgumentInstr* rhs) { | |
| 1853 return InstanceCall(InstanceCallDescriptor::FromToken(Token::kSUB), lhs, rhs); | |
| 1854 } | |
| 1855 | |
| 1856 | |
| 1857 void IRRegExpMacroAssembler::LoadCurrentCharacterUnchecked( | |
| 1858 intptr_t cp_offset, intptr_t characters) { | |
| 1859 TAG(); | |
| 1860 | |
| 1861 ASSERT(characters == 1 || CanReadUnaligned()); | |
| 1862 if (mode_ == ASCII) { | |
| 1863 ASSERT(characters == 1 || characters == 2 || characters == 4); | |
| 1864 } else { | |
| 1865 ASSERT(mode_ == UC16); | |
| 1866 ASSERT(characters == 1 || characters == 2); | |
| 1867 } | |
| 1868 | |
| 1869 // Calculate the addressed string index as: | |
| 1870 // cp_offset + current_position_ + string_param_length_ | |
| 1871 // TODO(zerny): Avoid generating 'add' instance-calls here. | |
| 1872 PushArgumentInstr* off_arg = | |
| 1873 PushArgument(Bind(Int64Constant(cp_offset))); | |
| 1874 PushArgumentInstr* pos_arg = | |
| 1875 PushArgument(BindLoadLocal(*current_position_)); | |
| 1876 PushArgumentInstr* off_pos_arg = | |
| 1877 PushArgument(Bind(Add(off_arg, pos_arg))); | |
| 1878 PushArgumentInstr* len_arg = | |
| 1879 PushArgument(BindLoadLocal(*string_param_length_)); | |
| 1880 // Index is stored in a temporary local so that we can later load it safely. | |
| 1881 StoreLocal(index_temp_, Bind(Add(off_pos_arg, len_arg))); | |
| 1882 | |
| 1883 // Load and store the code units. | |
| 1884 Value* code_unit_value = LoadCodeUnitsAt(index_temp_, characters); | |
| 1885 StoreLocal(current_character_, code_unit_value); | |
| 1886 PRINT(PushLocal(current_character_)); | |
| 1887 } | |
| 1888 | |
| 1889 | |
| 1890 Value* IRRegExpMacroAssembler::CharacterAt(LocalVariable* index) { | |
| 1891 return LoadCodeUnitsAt(index, 1); | |
| 1892 } | |
| 1893 | |
| 1894 | |
| 1895 Value* IRRegExpMacroAssembler::LoadCodeUnitsAt(LocalVariable* index, | |
| 1896 intptr_t characters) { | |
| 1897 // Bind the pattern as the load receiver. | |
| 1898 Value* pattern_val = BindLoadLocal(*string_param_); | |
| 1899 if (RawObject::IsExternalStringClassId(specialization_cid_)) { | |
| 1900 // The data of an external string is stored through two indirections. | |
| 1901 intptr_t external_offset = 0; | |
| 1902 intptr_t data_offset = 0; | |
| 1903 if (specialization_cid_ == kExternalOneByteStringCid) { | |
| 1904 external_offset = ExternalOneByteString::external_data_offset(); | |
| 1905 data_offset = RawExternalOneByteString::ExternalData::data_offset(); | |
| 1906 } else if (specialization_cid_ == kExternalTwoByteStringCid) { | |
| 1907 external_offset = ExternalTwoByteString::external_data_offset(); | |
| 1908 data_offset = RawExternalTwoByteString::ExternalData::data_offset(); | |
| 1909 } else { | |
| 1910 UNREACHABLE(); | |
| 1911 } | |
| 1912 // This pushes untagged values on the stack which are immediately consumed: | |
| 1913 // the first value is consumed to obtain the second value which is consumed | |
| 1914 // by LoadCodeUnitsAtInstr below. | |
| 1915 Value* external_val = | |
| 1916 Bind(new(Z) LoadUntaggedInstr(pattern_val, external_offset)); | |
| 1917 pattern_val = | |
| 1918 Bind(new(Z) LoadUntaggedInstr(external_val, data_offset)); | |
| 1919 } | |
| 1920 | |
| 1921 // Here pattern_val might be untagged so this must not trigger a GC. | |
| 1922 Value* index_val = BindLoadLocal(*index); | |
| 1923 | |
| 1924 return Bind(new(Z) LoadCodeUnitsInstr( | |
| 1925 pattern_val, | |
| 1926 index_val, | |
| 1927 characters, | |
| 1928 specialization_cid_, | |
| 1929 Scanner::kNoSourcePos)); | |
| 1930 } | |
| 1931 | |
| 1932 | |
| 1933 #undef __ | |
| 1934 | |
| 1935 } // namespace dart | 21 } // namespace dart |
| OLD | NEW |