| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 390 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 401 Func->reorderNodes(); | 401 Func->reorderNodes(); |
| 402 | 402 |
| 403 // Branch optimization. This needs to be done just before code emission. In | 403 // Branch optimization. This needs to be done just before code emission. In |
| 404 // particular, no transformations that insert or reorder CfgNodes should be | 404 // particular, no transformations that insert or reorder CfgNodes should be |
| 405 // done after branch optimization. We go ahead and do it before nop insertion | 405 // done after branch optimization. We go ahead and do it before nop insertion |
| 406 // to reduce the amount of work needed for searching for opportunities. | 406 // to reduce the amount of work needed for searching for opportunities. |
| 407 Func->doBranchOpt(); | 407 Func->doBranchOpt(); |
| 408 Func->dump("After branch optimization"); | 408 Func->dump("After branch optimization"); |
| 409 | 409 |
| 410 // Nop insertion | 410 // Nop insertion |
| 411 if (Ctx->getFlags().shouldDoNopInsertion()) { | 411 if (Ctx->getFlags().shouldDoNopInsertion()) |
| 412 Func->doNopInsertion(); | 412 Func->doNopInsertion(); |
| 413 } | 413 |
| 414 // Mark nodes that require sandbox alignment |
| 415 if (Ctx->getFlags().getUseSandboxing()) |
| 416 Func->markNodesForSandboxing(); |
| 414 } | 417 } |
| 415 | 418 |
| 416 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 419 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
| 417 TimerMarker T(TimerStack::TT_Om1, Func); | 420 TimerMarker T(TimerStack::TT_Om1, Func); |
| 418 | 421 |
| 419 Func->placePhiLoads(); | 422 Func->placePhiLoads(); |
| 420 if (Func->hasError()) | 423 if (Func->hasError()) |
| 421 return; | 424 return; |
| 422 Func->placePhiStores(); | 425 Func->placePhiStores(); |
| 423 if (Func->hasError()) | 426 if (Func->hasError()) |
| (...skipping 16 matching lines...) Expand all Loading... |
| 440 | 443 |
| 441 Func->genFrame(); | 444 Func->genFrame(); |
| 442 if (Func->hasError()) | 445 if (Func->hasError()) |
| 443 return; | 446 return; |
| 444 Func->dump("After stack frame mapping"); | 447 Func->dump("After stack frame mapping"); |
| 445 | 448 |
| 446 // Nop insertion | 449 // Nop insertion |
| 447 if (Ctx->getFlags().shouldDoNopInsertion()) { | 450 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 448 Func->doNopInsertion(); | 451 Func->doNopInsertion(); |
| 449 } | 452 } |
| 453 |
| 454 // Mark nodes that require sandbox alignment |
| 455 if (Ctx->getFlags().getUseSandboxing()) |
| 456 Func->markNodesForSandboxing(); |
| 450 } | 457 } |
| 451 | 458 |
| 452 inline bool canRMW(const InstArithmetic *Arith) { | 459 inline bool canRMW(const InstArithmetic *Arith) { |
| 453 Type Ty = Arith->getDest()->getType(); | 460 Type Ty = Arith->getDest()->getType(); |
| 454 // X86 vector instructions write to a register and have no RMW option. | 461 // X86 vector instructions write to a register and have no RMW option. |
| 455 if (isVectorType(Ty)) | 462 if (isVectorType(Ty)) |
| 456 return false; | 463 return false; |
| 457 bool isI64 = Ty == IceType_i64; | 464 bool isI64 = Ty == IceType_i64; |
| 458 | 465 |
| 459 switch (Arith->getOp()) { | 466 switch (Arith->getOp()) { |
| (...skipping 642 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1102 | 1109 |
| 1103 if (!Ctx->getFlags().getUseSandboxing()) | 1110 if (!Ctx->getFlags().getUseSandboxing()) |
| 1104 return; | 1111 return; |
| 1105 // Change the original ret instruction into a sandboxed return sequence. | 1112 // Change the original ret instruction into a sandboxed return sequence. |
| 1106 // t:ecx = pop | 1113 // t:ecx = pop |
| 1107 // bundle_lock | 1114 // bundle_lock |
| 1108 // and t, ~31 | 1115 // and t, ~31 |
| 1109 // jmp *t | 1116 // jmp *t |
| 1110 // bundle_unlock | 1117 // bundle_unlock |
| 1111 // FakeUse <original_ret_operand> | 1118 // FakeUse <original_ret_operand> |
| 1112 const SizeT BundleSize = 1 | |
| 1113 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 1114 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 1119 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 1115 _pop(T_ecx); | 1120 _pop(T_ecx); |
| 1116 _bundle_lock(); | 1121 lowerIndirectJump(T_ecx); |
| 1117 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 1118 _jmp(T_ecx); | |
| 1119 _bundle_unlock(); | |
| 1120 if (RI->getSrcSize()) { | 1122 if (RI->getSrcSize()) { |
| 1121 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1123 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1122 Context.insert(InstFakeUse::create(Func, RetValue)); | 1124 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1123 } | 1125 } |
| 1124 RI->setDeleted(); | 1126 RI->setDeleted(); |
| 1125 } | 1127 } |
| 1126 | 1128 |
| 1127 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 1129 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1128 switch (Var->getType()) { | 1130 switch (Var->getType()) { |
| 1129 default: | 1131 default: |
| (...skipping 2843 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3973 } else { | 3975 } else { |
| 3974 _bsr(T_Dest2, SecondVar); | 3976 _bsr(T_Dest2, SecondVar); |
| 3975 _xor(T_Dest2, ThirtyOne); | 3977 _xor(T_Dest2, ThirtyOne); |
| 3976 } | 3978 } |
| 3977 _test(SecondVar, SecondVar); | 3979 _test(SecondVar, SecondVar); |
| 3978 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3980 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3979 _mov(DestLo, T_Dest2); | 3981 _mov(DestLo, T_Dest2); |
| 3980 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3982 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3981 } | 3983 } |
| 3982 | 3984 |
| 3985 template <class Machine> |
| 3986 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { |
| 3987 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 3988 if (NeedSandboxing) { |
| 3989 _bundle_lock(); |
| 3990 const SizeT BundleSize = |
| 3991 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 3992 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 3993 } |
| 3994 _jmp(Target); |
| 3995 if (NeedSandboxing) |
| 3996 _bundle_unlock(); |
| 3997 } |
| 3998 |
| 3983 inline bool isAdd(const Inst *Inst) { | 3999 inline bool isAdd(const Inst *Inst) { |
| 3984 if (const InstArithmetic *Arith = | 4000 if (const InstArithmetic *Arith = |
| 3985 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 4001 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 3986 return (Arith->getOp() == InstArithmetic::Add); | 4002 return (Arith->getOp() == InstArithmetic::Add); |
| 3987 } | 4003 } |
| 3988 return false; | 4004 return false; |
| 3989 } | 4005 } |
| 3990 | 4006 |
| 3991 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 4007 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 3992 const Variable *Index, uint16_t Shift, | 4008 const Variable *Index, uint16_t Shift, |
| (...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4518 } | 4534 } |
| 4519 | 4535 |
| 4520 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 4536 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| 4521 | 4537 |
| 4522 return Comparison; | 4538 return Comparison; |
| 4523 } | 4539 } |
| 4524 | 4540 |
| 4525 template <class Machine> | 4541 template <class Machine> |
| 4526 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, | 4542 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, |
| 4527 Operand *Comparison, bool DoneCmp, | 4543 Operand *Comparison, bool DoneCmp, |
| 4528 CfgNode *DefaultLabel) { | 4544 CfgNode *DefaultTarget) { |
| 4529 switch (Case.getKind()) { | 4545 switch (Case.getKind()) { |
| 4530 case CaseCluster::JumpTable: { | 4546 case CaseCluster::JumpTable: { |
| 4531 typename Traits::Insts::Label *SkipJumpTable; | 4547 typename Traits::Insts::Label *SkipJumpTable; |
| 4532 | 4548 |
| 4533 Operand *RangeIndex = | 4549 Operand *RangeIndex = |
| 4534 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4550 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4535 if (DefaultLabel != nullptr) { | 4551 if (DefaultTarget == nullptr) { |
| 4536 _br(Traits::Cond::Br_a, DefaultLabel); | |
| 4537 } else { | |
| 4538 // Skip over jump table logic if comparison not in range and no default | 4552 // Skip over jump table logic if comparison not in range and no default |
| 4539 SkipJumpTable = Traits::Insts::Label::create(Func, this); | 4553 SkipJumpTable = Traits::Insts::Label::create(Func, this); |
| 4540 _br(Traits::Cond::Br_a, SkipJumpTable); | 4554 _br(Traits::Cond::Br_a, SkipJumpTable); |
| 4555 } else { |
| 4556 _br(Traits::Cond::Br_a, DefaultTarget); |
| 4541 } | 4557 } |
| 4542 | 4558 |
| 4543 InstJumpTable *JumpTable = Case.getJumpTable(); | 4559 InstJumpTable *JumpTable = Case.getJumpTable(); |
| 4544 Context.insert(JumpTable); | 4560 Context.insert(JumpTable); |
| 4545 | 4561 |
| 4546 // Make sure the index is a register of the same width as the base | 4562 // Make sure the index is a register of the same width as the base |
| 4547 Variable *Index; | 4563 Variable *Index; |
| 4548 if (RangeIndex->getType() != getPointerType()) { | 4564 if (RangeIndex->getType() != getPointerType()) { |
| 4549 Index = makeReg(getPointerType()); | 4565 Index = makeReg(getPointerType()); |
| 4550 _movzx(Index, RangeIndex); | 4566 _movzx(Index, RangeIndex); |
| 4551 } else { | 4567 } else { |
| 4552 Index = legalizeToReg(RangeIndex); | 4568 Index = legalizeToReg(RangeIndex); |
| 4553 } | 4569 } |
| 4554 | 4570 |
| 4555 constexpr RelocOffsetT RelocOffset = 0; | 4571 constexpr RelocOffsetT RelocOffset = 0; |
| 4556 constexpr bool SuppressMangling = true; | 4572 constexpr bool SuppressMangling = true; |
| 4557 Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func), | 4573 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| 4558 SuppressMangling); | 4574 Constant *Base = Ctx->getConstantSym( |
| 4575 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), |
| 4576 SuppressMangling); |
| 4559 Constant *Offset = nullptr; | 4577 Constant *Offset = nullptr; |
| 4560 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); | 4578 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); |
| 4561 // TODO(ascull): remove need for legalize by allowing null base in memop | 4579 // TODO(ascull): remove need for legalize by allowing null base in memop |
| 4562 auto *MemTarget = Traits::X86OperandMem::create( | 4580 auto *TargetInMemory = Traits::X86OperandMem::create( |
| 4563 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); | 4581 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); |
| 4564 Variable *Target = nullptr; | 4582 Variable *Target = nullptr; |
| 4565 _mov(Target, MemTarget); | 4583 _mov(Target, TargetInMemory); |
| 4566 _jmp(Target); | 4584 lowerIndirectJump(Target); |
| 4567 // TODO(ascull): sandboxing for indirect jump | |
| 4568 | 4585 |
| 4569 if (DefaultLabel == nullptr) | 4586 if (DefaultTarget == nullptr) |
| 4570 Context.insert(SkipJumpTable); | 4587 Context.insert(SkipJumpTable); |
| 4571 return; | 4588 return; |
| 4572 } | 4589 } |
| 4573 case CaseCluster::Range: { | 4590 case CaseCluster::Range: { |
| 4574 if (Case.getHigh() == Case.getLow()) { | 4591 if (Case.isUnitRange()) { |
| 4575 // Single item | 4592 // Single item |
| 4576 Constant *Value = Ctx->getConstantInt32(Case.getLow()); | 4593 if (!DoneCmp) { |
| 4577 if (!DoneCmp) | 4594 Constant *Value = Ctx->getConstantInt32(Case.getLow()); |
| 4578 _cmp(Comparison, Value); | 4595 _cmp(Comparison, Value); |
| 4579 _br(Traits::Cond::Br_e, Case.getLabel()); | 4596 } |
| 4580 if (DefaultLabel != nullptr) | 4597 _br(Traits::Cond::Br_e, Case.getTarget()); |
| 4581 _br(DefaultLabel); | 4598 } else if (DoneCmp && Case.isPairRange()) { |
| 4599 // Range of two items with first item aleady compared against |
| 4600 _br(Traits::Cond::Br_e, Case.getTarget()); |
| 4601 Constant *Value = Ctx->getConstantInt32(Case.getHigh()); |
| 4602 _cmp(Comparison, Value); |
| 4603 _br(Traits::Cond::Br_e, Case.getTarget()); |
| 4582 } else { | 4604 } else { |
| 4583 // Range | 4605 // Range |
| 4584 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4606 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4585 _br(Traits::Cond::Br_be, Case.getLabel()); | 4607 _br(Traits::Cond::Br_be, Case.getTarget()); |
| 4586 if (DefaultLabel != nullptr) | |
| 4587 _br(DefaultLabel); | |
| 4588 } | 4608 } |
| 4609 if (DefaultTarget != nullptr) |
| 4610 _br(DefaultTarget); |
| 4589 return; | 4611 return; |
| 4590 } | 4612 } |
| 4591 } | 4613 } |
| 4592 } | 4614 } |
| 4593 | 4615 |
| 4594 template <class Machine> | 4616 template <class Machine> |
| 4595 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 4617 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { |
| 4596 // Do it the old fashioned way unless asked for the advanced method | |
| 4597 if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) { | |
| 4598 // This implements the most naive possible lowering. | |
| 4599 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | |
| 4600 Operand *Src0 = Inst->getComparison(); | |
| 4601 SizeT NumCases = Inst->getNumCases(); | |
| 4602 if (Src0->getType() == IceType_i64) { | |
| 4603 Src0 = legalize(Src0); // get Base/Index into physical registers | |
| 4604 Operand *Src0Lo = loOperand(Src0); | |
| 4605 Operand *Src0Hi = hiOperand(Src0); | |
| 4606 if (NumCases >= 2) { | |
| 4607 Src0Lo = legalizeToReg(Src0Lo); | |
| 4608 Src0Hi = legalizeToReg(Src0Hi); | |
| 4609 } else { | |
| 4610 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | |
| 4611 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | |
| 4612 } | |
| 4613 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4614 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4615 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | |
| 4616 typename Traits::Insts::Label *Label = | |
| 4617 Traits::Insts::Label::create(Func, this); | |
| 4618 _cmp(Src0Lo, ValueLo); | |
| 4619 _br(Traits::Cond::Br_ne, Label); | |
| 4620 _cmp(Src0Hi, ValueHi); | |
| 4621 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4622 Context.insert(Label); | |
| 4623 } | |
| 4624 _br(Inst->getLabelDefault()); | |
| 4625 return; | |
| 4626 } | |
| 4627 // OK, we'll be slightly less naive by forcing Src into a physical | |
| 4628 // register if there are 2 or more uses. | |
| 4629 if (NumCases >= 2) | |
| 4630 Src0 = legalizeToReg(Src0); | |
| 4631 else | |
| 4632 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 4633 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4634 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4635 _cmp(Src0, Value); | |
| 4636 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4637 } | |
| 4638 | |
| 4639 _br(Inst->getLabelDefault()); | |
| 4640 return; | |
| 4641 } | |
| 4642 | |
| 4643 // Group cases together and navigate through them with a binary search | 4618 // Group cases together and navigate through them with a binary search |
| 4644 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 4619 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 4645 Operand *Src0 = Inst->getComparison(); | 4620 Operand *Src0 = Inst->getComparison(); |
| 4646 CfgNode *DefaultLabel = Inst->getLabelDefault(); | 4621 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 4647 | 4622 |
| 4648 assert(CaseClusters.size() != 0); // Should always be at least one | 4623 assert(CaseClusters.size() != 0); // Should always be at least one |
| 4649 | 4624 |
| 4650 if (Src0->getType() == IceType_i64) { | 4625 if (Src0->getType() == IceType_i64) { |
| 4651 Src0 = legalize(Src0); // get Base/Index into physical registers | 4626 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4652 Operand *Src0Lo = loOperand(Src0); | 4627 Operand *Src0Lo = loOperand(Src0); |
| 4653 Operand *Src0Hi = hiOperand(Src0); | 4628 Operand *Src0Hi = hiOperand(Src0); |
| 4654 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 4629 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 4655 // TODO(ascull): handle 64-bit case properly (currently naive version) | 4630 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 4656 // This might be handled by a higher level lowering of switches. | 4631 // This might be handled by a higher level lowering of switches. |
| (...skipping 17 matching lines...) Expand all Loading... |
| 4674 Context.insert(Label); | 4649 Context.insert(Label); |
| 4675 } | 4650 } |
| 4676 _br(Inst->getLabelDefault()); | 4651 _br(Inst->getLabelDefault()); |
| 4677 return; | 4652 return; |
| 4678 } else { | 4653 } else { |
| 4679 // All the values are 32-bit so just check the operand is too and then | 4654 // All the values are 32-bit so just check the operand is too and then |
| 4680 // fall through to the 32-bit implementation. This is a common case. | 4655 // fall through to the 32-bit implementation. This is a common case. |
| 4681 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 4656 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
| 4682 Constant *Zero = Ctx->getConstantInt32(0); | 4657 Constant *Zero = Ctx->getConstantInt32(0); |
| 4683 _cmp(Src0Hi, Zero); | 4658 _cmp(Src0Hi, Zero); |
| 4684 _br(Traits::Cond::Br_ne, DefaultLabel); | 4659 _br(Traits::Cond::Br_ne, DefaultTarget); |
| 4685 Src0 = Src0Lo; | 4660 Src0 = Src0Lo; |
| 4686 } | 4661 } |
| 4687 } | 4662 } |
| 4688 | 4663 |
| 4689 // 32-bit lowering | 4664 // 32-bit lowering |
| 4690 | 4665 |
| 4691 if (CaseClusters.size() == 1) { | 4666 if (CaseClusters.size() == 1) { |
| 4692 // Jump straight to default if needed. Currently a common case as jump | 4667 // Jump straight to default if needed. Currently a common case as jump |
| 4693 // tables occur on their own. | 4668 // tables occur on their own. |
| 4694 constexpr bool DoneCmp = false; | 4669 constexpr bool DoneCmp = false; |
| 4695 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel); | 4670 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); |
| 4696 return; | 4671 return; |
| 4697 } | 4672 } |
| 4698 | 4673 |
| 4699 // Going to be using multiple times so get it in a register early | 4674 // Going to be using multiple times so get it in a register early |
| 4700 Variable *Comparison = legalizeToReg(Src0); | 4675 Variable *Comparison = legalizeToReg(Src0); |
| 4701 | 4676 |
| 4702 // A span is over the clusters | 4677 // A span is over the clusters |
| 4703 struct SearchSpan { | 4678 struct SearchSpan { |
| 4704 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) | 4679 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) |
| 4705 : Begin(Begin), Size(Size), Label(Label) {} | 4680 : Begin(Begin), Size(Size), Label(Label) {} |
| (...skipping 14 matching lines...) Expand all Loading... |
| 4720 if (Span.Label != nullptr) | 4695 if (Span.Label != nullptr) |
| 4721 Context.insert(Span.Label); | 4696 Context.insert(Span.Label); |
| 4722 | 4697 |
| 4723 switch (Span.Size) { | 4698 switch (Span.Size) { |
| 4724 case 0: | 4699 case 0: |
| 4725 llvm::report_fatal_error("Invalid SearchSpan size"); | 4700 llvm::report_fatal_error("Invalid SearchSpan size"); |
| 4726 break; | 4701 break; |
| 4727 | 4702 |
| 4728 case 1: | 4703 case 1: |
| 4729 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, | 4704 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, |
| 4730 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4705 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4731 DoneCmp = false; | 4706 DoneCmp = false; |
| 4732 break; | 4707 break; |
| 4733 | 4708 |
| 4734 case 2: | 4709 case 2: { |
| 4735 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp); | 4710 const CaseCluster *CaseA = &CaseClusters[Span.Begin]; |
| 4711 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1]; |
| 4712 |
| 4713 // Placing a range last may allow register clobbering during the range |
| 4714 // test. That means there is no need to clone the register. If it is a |
| 4715 // unit range the comparison may have already been done in the binary |
| 4716 // search (DoneCmp) and so it should be placed first. If this is a range |
| 4717 // of two items and the comparison with the low value has already been |
| 4718 // done, comparing with the other element is cheaper than a range test. |
| 4719 // If the low end of the range is zero then there is no subtraction and |
| 4720 // nothing to be gained. |
| 4721 if (!CaseA->isUnitRange() && |
| 4722 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) { |
| 4723 std::swap(CaseA, CaseB); |
| 4724 DoneCmp = false; |
| 4725 } |
| 4726 |
| 4727 lowerCaseCluster(*CaseA, Comparison, DoneCmp); |
| 4736 DoneCmp = false; | 4728 DoneCmp = false; |
| 4737 lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp, | 4729 lowerCaseCluster(*CaseB, Comparison, DoneCmp, |
| 4738 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4730 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4739 break; | 4731 } break; |
| 4740 | 4732 |
| 4741 default: | 4733 default: |
| 4742 // Pick the middle item and branch b or ae | 4734 // Pick the middle item and branch b or ae |
| 4743 SizeT PivotIndex = Span.Begin + (Span.Size / 2); | 4735 SizeT PivotIndex = Span.Begin + (Span.Size / 2); |
| 4744 const CaseCluster &Pivot = CaseClusters[PivotIndex]; | 4736 const CaseCluster &Pivot = CaseClusters[PivotIndex]; |
| 4745 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); | 4737 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); |
| 4746 // TODO(ascull): what if this jump is too big? | |
| 4747 typename Traits::Insts::Label *Label = | 4738 typename Traits::Insts::Label *Label = |
| 4748 Traits::Insts::Label::create(Func, this); | 4739 Traits::Insts::Label::create(Func, this); |
| 4749 _cmp(Comparison, Value); | 4740 _cmp(Comparison, Value); |
| 4750 _br(Traits::Cond::Br_b, Label); | 4741 // TODO(ascull): does it alway have to be far? |
| 4742 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); |
| 4751 // Lower the left and (pivot+right) sides, falling through to the right | 4743 // Lower the left and (pivot+right) sides, falling through to the right |
| 4752 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); | 4744 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); |
| 4753 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); | 4745 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); |
| 4754 DoneCmp = true; | 4746 DoneCmp = true; |
| 4755 break; | 4747 break; |
| 4756 } | 4748 } |
| 4757 } | 4749 } |
| 4758 | 4750 |
| 4759 _br(DefaultLabel); | 4751 _br(DefaultTarget); |
| 4760 } | 4752 } |
| 4761 | 4753 |
| 4762 template <class Machine> | 4754 template <class Machine> |
| 4763 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4755 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4764 Variable *Dest, Operand *Src0, | 4756 Variable *Dest, Operand *Src0, |
| 4765 Operand *Src1) { | 4757 Operand *Src1) { |
| 4766 assert(isVectorType(Dest->getType())); | 4758 assert(isVectorType(Dest->getType())); |
| 4767 Type Ty = Dest->getType(); | 4759 Type Ty = Dest->getType(); |
| 4768 Type ElementTy = typeElementType(Ty); | 4760 Type ElementTy = typeElementType(Ty); |
| 4769 SizeT NumElements = typeNumElements(Ty); | 4761 SizeT NumElements = typeNumElements(Ty); |
| (...skipping 698 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5468 } | 5460 } |
| 5469 // the offset is not eligible for blinding or pooling, return the original | 5461 // the offset is not eligible for blinding or pooling, return the original |
| 5470 // mem operand | 5462 // mem operand |
| 5471 return MemOperand; | 5463 return MemOperand; |
| 5472 } | 5464 } |
| 5473 | 5465 |
| 5474 } // end of namespace X86Internal | 5466 } // end of namespace X86Internal |
| 5475 } // end of namespace Ice | 5467 } // end of namespace Ice |
| 5476 | 5468 |
| 5477 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5469 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |