Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 396 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 407 Func->reorderNodes(); | 407 Func->reorderNodes(); |
| 408 | 408 |
| 409 // Branch optimization. This needs to be done just before code emission. In | 409 // Branch optimization. This needs to be done just before code emission. In |
| 410 // particular, no transformations that insert or reorder CfgNodes should be | 410 // particular, no transformations that insert or reorder CfgNodes should be |
| 411 // done after branch optimization. We go ahead and do it before nop insertion | 411 // done after branch optimization. We go ahead and do it before nop insertion |
| 412 // to reduce the amount of work needed for searching for opportunities. | 412 // to reduce the amount of work needed for searching for opportunities. |
| 413 Func->doBranchOpt(); | 413 Func->doBranchOpt(); |
| 414 Func->dump("After branch optimization"); | 414 Func->dump("After branch optimization"); |
| 415 | 415 |
| 416 // Nop insertion | 416 // Nop insertion |
| 417 if (Ctx->getFlags().shouldDoNopInsertion()) { | 417 if (Ctx->getFlags().shouldDoNopInsertion()) |
| 418 Func->doNopInsertion(); | 418 Func->doNopInsertion(); |
| 419 } | 419 |
| 420 // Mark nodes that require sandbox alignment | |
| 421 if (Ctx->getFlags().getUseSandboxing()) | |
| 422 Func->markNodesForSandboxing(); | |
| 420 } | 423 } |
| 421 | 424 |
| 422 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 425 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
| 423 TimerMarker T(TimerStack::TT_Om1, Func); | 426 TimerMarker T(TimerStack::TT_Om1, Func); |
| 424 | 427 |
| 425 Func->placePhiLoads(); | 428 Func->placePhiLoads(); |
| 426 if (Func->hasError()) | 429 if (Func->hasError()) |
| 427 return; | 430 return; |
| 428 Func->placePhiStores(); | 431 Func->placePhiStores(); |
| 429 if (Func->hasError()) | 432 if (Func->hasError()) |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 445 Func->dump("After regalloc of infinite-weight variables"); | 448 Func->dump("After regalloc of infinite-weight variables"); |
| 446 | 449 |
| 447 Func->genFrame(); | 450 Func->genFrame(); |
| 448 if (Func->hasError()) | 451 if (Func->hasError()) |
| 449 return; | 452 return; |
| 450 Func->dump("After stack frame mapping"); | 453 Func->dump("After stack frame mapping"); |
| 451 | 454 |
| 452 // Nop insertion | 455 // Nop insertion |
| 453 if (Ctx->getFlags().shouldDoNopInsertion()) { | 456 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 454 Func->doNopInsertion(); | 457 Func->doNopInsertion(); |
| 455 } | 458 } |
|
jvoung (off chromium)
2015/07/29 21:31:17
// Mark nodes that require sandbox alignment
if
ascull
2015/07/29 22:43:05
Done.
| |
| 456 } | 459 } |
| 457 | 460 |
| 458 inline bool canRMW(const InstArithmetic *Arith) { | 461 inline bool canRMW(const InstArithmetic *Arith) { |
| 459 Type Ty = Arith->getDest()->getType(); | 462 Type Ty = Arith->getDest()->getType(); |
| 460 // X86 vector instructions write to a register and have no RMW option. | 463 // X86 vector instructions write to a register and have no RMW option. |
| 461 if (isVectorType(Ty)) | 464 if (isVectorType(Ty)) |
| 462 return false; | 465 return false; |
| 463 bool isI64 = Ty == IceType_i64; | 466 bool isI64 = Ty == IceType_i64; |
| 464 | 467 |
| 465 switch (Arith->getOp()) { | 468 switch (Arith->getOp()) { |
| (...skipping 642 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1108 | 1111 |
| 1109 if (!Ctx->getFlags().getUseSandboxing()) | 1112 if (!Ctx->getFlags().getUseSandboxing()) |
| 1110 return; | 1113 return; |
| 1111 // Change the original ret instruction into a sandboxed return sequence. | 1114 // Change the original ret instruction into a sandboxed return sequence. |
| 1112 // t:ecx = pop | 1115 // t:ecx = pop |
| 1113 // bundle_lock | 1116 // bundle_lock |
| 1114 // and t, ~31 | 1117 // and t, ~31 |
| 1115 // jmp *t | 1118 // jmp *t |
| 1116 // bundle_unlock | 1119 // bundle_unlock |
| 1117 // FakeUse <original_ret_operand> | 1120 // FakeUse <original_ret_operand> |
| 1118 const SizeT BundleSize = 1 | |
| 1119 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 1120 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 1121 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 1121 _pop(T_ecx); | 1122 _pop(T_ecx); |
| 1122 _bundle_lock(); | 1123 lowerIndirectJump(T_ecx); |
| 1123 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 1124 _jmp(T_ecx); | |
| 1125 _bundle_unlock(); | |
| 1126 if (RI->getSrcSize()) { | 1124 if (RI->getSrcSize()) { |
| 1127 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1125 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1128 Context.insert(InstFakeUse::create(Func, RetValue)); | 1126 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1129 } | 1127 } |
| 1130 RI->setDeleted(); | 1128 RI->setDeleted(); |
| 1131 } | 1129 } |
| 1132 | 1130 |
| 1133 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 1131 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1134 switch (Var->getType()) { | 1132 switch (Var->getType()) { |
| 1135 default: | 1133 default: |
| (...skipping 2827 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3963 } else { | 3961 } else { |
| 3964 _bsr(T_Dest2, SecondVar); | 3962 _bsr(T_Dest2, SecondVar); |
| 3965 _xor(T_Dest2, ThirtyOne); | 3963 _xor(T_Dest2, ThirtyOne); |
| 3966 } | 3964 } |
| 3967 _test(SecondVar, SecondVar); | 3965 _test(SecondVar, SecondVar); |
| 3968 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3966 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3969 _mov(DestLo, T_Dest2); | 3967 _mov(DestLo, T_Dest2); |
| 3970 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3968 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3971 } | 3969 } |
| 3972 | 3970 |
| 3971 template <class Machine> | |
| 3972 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { | |
| 3973 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | |
| 3974 if (NeedSandboxing) { | |
| 3975 _bundle_lock(); | |
| 3976 const SizeT BundleSize = | |
| 3977 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 3978 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 3979 } | |
| 3980 _jmp(Target); | |
| 3981 if (NeedSandboxing) | |
| 3982 _bundle_unlock(); | |
| 3983 } | |
| 3984 | |
| 3973 inline bool isAdd(const Inst *Inst) { | 3985 inline bool isAdd(const Inst *Inst) { |
| 3974 if (const InstArithmetic *Arith = | 3986 if (const InstArithmetic *Arith = |
| 3975 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 3987 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 3976 return (Arith->getOp() == InstArithmetic::Add); | 3988 return (Arith->getOp() == InstArithmetic::Add); |
| 3977 } | 3989 } |
| 3978 return false; | 3990 return false; |
| 3979 } | 3991 } |
| 3980 | 3992 |
| 3981 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 3993 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 3982 const Variable *Index, uint16_t Shift, | 3994 const Variable *Index, uint16_t Shift, |
| (...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4508 } | 4520 } |
| 4509 | 4521 |
| 4510 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 4522 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| 4511 | 4523 |
| 4512 return Comparison; | 4524 return Comparison; |
| 4513 } | 4525 } |
| 4514 | 4526 |
| 4515 template <class Machine> | 4527 template <class Machine> |
| 4516 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, | 4528 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, |
| 4517 Operand *Comparison, bool DoneCmp, | 4529 Operand *Comparison, bool DoneCmp, |
| 4518 CfgNode *DefaultLabel) { | 4530 CfgNode *DefaultTarget) { |
| 4519 switch (Case.getKind()) { | 4531 switch (Case.getKind()) { |
| 4520 case CaseCluster::JumpTable: { | 4532 case CaseCluster::JumpTable: { |
| 4521 typename Traits::Insts::Label *SkipJumpTable; | 4533 typename Traits::Insts::Label *SkipJumpTable; |
| 4522 | 4534 |
| 4523 Operand *RangeIndex = | 4535 Operand *RangeIndex = |
| 4524 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4536 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4525 if (DefaultLabel != nullptr) { | 4537 if (DefaultTarget == nullptr) { |
| 4526 _br(Traits::Cond::Br_a, DefaultLabel); | |
| 4527 } else { | |
| 4528 // Skip over jump table logic if comparison not in range and no default | 4538 // Skip over jump table logic if comparison not in range and no default |
| 4529 SkipJumpTable = Traits::Insts::Label::create(Func, this); | 4539 SkipJumpTable = Traits::Insts::Label::create(Func, this); |
| 4530 _br(Traits::Cond::Br_a, SkipJumpTable); | 4540 _br(Traits::Cond::Br_a, SkipJumpTable); |
| 4541 } else { | |
| 4542 _br(Traits::Cond::Br_a, DefaultTarget); | |
| 4531 } | 4543 } |
| 4532 | 4544 |
| 4533 InstJumpTable *JumpTable = Case.getJumpTable(); | 4545 InstJumpTable *JumpTable = Case.getJumpTable(); |
| 4534 Context.insert(JumpTable); | 4546 Context.insert(JumpTable); |
| 4535 | 4547 |
| 4536 // Make sure the index is a register of the same width as the base | 4548 // Make sure the index is a register of the same width as the base |
| 4537 Variable *Index; | 4549 Variable *Index; |
| 4538 if (RangeIndex->getType() != getPointerType()) { | 4550 if (RangeIndex->getType() != getPointerType()) { |
| 4539 Index = makeReg(getPointerType()); | 4551 Index = makeReg(getPointerType()); |
| 4540 _movzx(Index, RangeIndex); | 4552 _movzx(Index, RangeIndex); |
| 4541 } else { | 4553 } else { |
| 4542 Index = legalizeToReg(RangeIndex); | 4554 Index = legalizeToReg(RangeIndex); |
| 4543 } | 4555 } |
| 4544 | 4556 |
| 4545 constexpr RelocOffsetT RelocOffset = 0; | 4557 constexpr RelocOffsetT RelocOffset = 0; |
| 4546 constexpr bool SuppressMangling = true; | 4558 constexpr bool SuppressMangling = true; |
| 4547 Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func), | 4559 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| 4548 SuppressMangling); | 4560 Constant *Base = Ctx->getConstantSym( |
| 4561 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), | |
| 4562 SuppressMangling); | |
| 4549 Constant *Offset = nullptr; | 4563 Constant *Offset = nullptr; |
| 4550 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); | 4564 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); |
| 4551 // TODO(ascull): remove need for legalize by allowing null base in memop | 4565 // TODO(ascull): remove need for legalize by allowing null base in memop |
| 4552 auto *MemTarget = Traits::X86OperandMem::create( | 4566 auto *TargetInMemory = Traits::X86OperandMem::create( |
| 4553 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); | 4567 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); |
| 4554 Variable *Target = nullptr; | 4568 Variable *Target = nullptr; |
| 4555 _mov(Target, MemTarget); | 4569 _mov(Target, TargetInMemory); |
| 4556 _jmp(Target); | 4570 lowerIndirectJump(Target); |
| 4557 // TODO(ascull): sandboxing for indirect jump | |
| 4558 | 4571 |
| 4559 if (DefaultLabel == nullptr) | 4572 if (DefaultTarget == nullptr) |
| 4560 Context.insert(SkipJumpTable); | 4573 Context.insert(SkipJumpTable); |
| 4561 return; | 4574 return; |
| 4562 } | 4575 } |
| 4563 case CaseCluster::Range: { | 4576 case CaseCluster::Range: { |
| 4564 if (Case.getHigh() == Case.getLow()) { | 4577 if (Case.isUnitRange()) { |
| 4565 // Single item | 4578 // Single item |
| 4566 Constant *Value = Ctx->getConstantInt32(Case.getLow()); | 4579 if (!DoneCmp) { |
| 4567 if (!DoneCmp) | 4580 Constant *Value = Ctx->getConstantInt32(Case.getLow()); |
| 4568 _cmp(Comparison, Value); | 4581 _cmp(Comparison, Value); |
| 4569 _br(Traits::Cond::Br_e, Case.getLabel()); | 4582 } |
| 4570 if (DefaultLabel != nullptr) | 4583 _br(Traits::Cond::Br_e, Case.getTarget()); |
| 4571 _br(DefaultLabel); | 4584 } else if (DoneCmp && Case.isPairRange()) { |
| 4585 // Range of two items with first item aleady compared against | |
| 4586 _br(Traits::Cond::Br_e, Case.getTarget()); | |
| 4587 Constant *Value = Ctx->getConstantInt32(Case.getHigh()); | |
| 4588 _cmp(Comparison, Value); | |
| 4589 _br(Traits::Cond::Br_e, Case.getTarget()); | |
| 4572 } else { | 4590 } else { |
| 4573 // Range | 4591 // Range |
| 4574 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4592 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4575 _br(Traits::Cond::Br_be, Case.getLabel()); | 4593 _br(Traits::Cond::Br_be, Case.getTarget()); |
| 4576 if (DefaultLabel != nullptr) | |
| 4577 _br(DefaultLabel); | |
| 4578 } | 4594 } |
| 4595 if (DefaultTarget != nullptr) | |
| 4596 _br(DefaultTarget); | |
| 4579 return; | 4597 return; |
| 4580 } | 4598 } |
| 4581 } | 4599 } |
| 4582 } | 4600 } |
| 4583 | 4601 |
| 4584 template <class Machine> | 4602 template <class Machine> |
| 4585 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 4603 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { |
| 4586 // Do it the old fashioned way unless asked for the advanced method | |
| 4587 if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) { | |
| 4588 // This implements the most naive possible lowering. | |
| 4589 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | |
| 4590 Operand *Src0 = Inst->getComparison(); | |
| 4591 SizeT NumCases = Inst->getNumCases(); | |
| 4592 if (Src0->getType() == IceType_i64) { | |
| 4593 Src0 = legalize(Src0); // get Base/Index into physical registers | |
| 4594 Operand *Src0Lo = loOperand(Src0); | |
| 4595 Operand *Src0Hi = hiOperand(Src0); | |
| 4596 if (NumCases >= 2) { | |
| 4597 Src0Lo = legalizeToReg(Src0Lo); | |
| 4598 Src0Hi = legalizeToReg(Src0Hi); | |
| 4599 } else { | |
| 4600 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | |
| 4601 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | |
| 4602 } | |
| 4603 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4604 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4605 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | |
| 4606 typename Traits::Insts::Label *Label = | |
| 4607 Traits::Insts::Label::create(Func, this); | |
| 4608 _cmp(Src0Lo, ValueLo); | |
| 4609 _br(Traits::Cond::Br_ne, Label); | |
| 4610 _cmp(Src0Hi, ValueHi); | |
| 4611 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4612 Context.insert(Label); | |
| 4613 } | |
| 4614 _br(Inst->getLabelDefault()); | |
| 4615 return; | |
| 4616 } | |
| 4617 // OK, we'll be slightly less naive by forcing Src into a physical | |
| 4618 // register if there are 2 or more uses. | |
| 4619 if (NumCases >= 2) | |
| 4620 Src0 = legalizeToReg(Src0); | |
| 4621 else | |
| 4622 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 4623 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4624 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4625 _cmp(Src0, Value); | |
| 4626 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4627 } | |
| 4628 | |
| 4629 _br(Inst->getLabelDefault()); | |
| 4630 return; | |
| 4631 } | |
| 4632 | |
| 4633 // Group cases together and navigate through them with a binary search | 4604 // Group cases together and navigate through them with a binary search |
| 4634 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 4605 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 4635 Operand *Src0 = Inst->getComparison(); | 4606 Operand *Src0 = Inst->getComparison(); |
| 4636 CfgNode *DefaultLabel = Inst->getLabelDefault(); | 4607 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 4637 | 4608 |
| 4638 assert(CaseClusters.size() != 0); // Should always be at least one | 4609 assert(CaseClusters.size() != 0); // Should always be at least one |
| 4639 | 4610 |
| 4640 if (Src0->getType() == IceType_i64) { | 4611 if (Src0->getType() == IceType_i64) { |
| 4641 Src0 = legalize(Src0); // get Base/Index into physical registers | 4612 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4642 Operand *Src0Lo = loOperand(Src0); | 4613 Operand *Src0Lo = loOperand(Src0); |
| 4643 Operand *Src0Hi = hiOperand(Src0); | 4614 Operand *Src0Hi = hiOperand(Src0); |
| 4644 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 4615 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 4645 // TODO(ascull): handle 64-bit case properly (currently naive version) | 4616 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 4646 // This might be handled by a higher level lowering of switches. | 4617 // This might be handled by a higher level lowering of switches. |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 4664 Context.insert(Label); | 4635 Context.insert(Label); |
| 4665 } | 4636 } |
| 4666 _br(Inst->getLabelDefault()); | 4637 _br(Inst->getLabelDefault()); |
| 4667 return; | 4638 return; |
| 4668 } else { | 4639 } else { |
| 4669 // All the values are 32-bit so just check the operand is too and then | 4640 // All the values are 32-bit so just check the operand is too and then |
| 4670 // fall through to the 32-bit implementation. This is a common case. | 4641 // fall through to the 32-bit implementation. This is a common case. |
| 4671 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 4642 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
| 4672 Constant *Zero = Ctx->getConstantInt32(0); | 4643 Constant *Zero = Ctx->getConstantInt32(0); |
| 4673 _cmp(Src0Hi, Zero); | 4644 _cmp(Src0Hi, Zero); |
| 4674 _br(Traits::Cond::Br_ne, DefaultLabel); | 4645 _br(Traits::Cond::Br_ne, DefaultTarget); |
| 4675 Src0 = Src0Lo; | 4646 Src0 = Src0Lo; |
| 4676 } | 4647 } |
| 4677 } | 4648 } |
| 4678 | 4649 |
| 4679 // 32-bit lowering | 4650 // 32-bit lowering |
| 4680 | 4651 |
| 4681 if (CaseClusters.size() == 1) { | 4652 if (CaseClusters.size() == 1) { |
| 4682 // Jump straight to default if needed. Currently a common case as jump | 4653 // Jump straight to default if needed. Currently a common case as jump |
| 4683 // tables occur on their own. | 4654 // tables occur on their own. |
| 4684 constexpr bool DoneCmp = false; | 4655 constexpr bool DoneCmp = false; |
| 4685 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel); | 4656 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); |
| 4686 return; | 4657 return; |
| 4687 } | 4658 } |
| 4688 | 4659 |
| 4689 // Going to be using multiple times so get it in a register early | 4660 // Going to be using multiple times so get it in a register early |
| 4690 Variable *Comparison = legalizeToReg(Src0); | 4661 Variable *Comparison = legalizeToReg(Src0); |
| 4691 | 4662 |
| 4692 // A span is over the clusters | 4663 // A span is over the clusters |
| 4693 struct SearchSpan { | 4664 struct SearchSpan { |
| 4694 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) | 4665 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) |
| 4695 : Begin(Begin), Size(Size), Label(Label) {} | 4666 : Begin(Begin), Size(Size), Label(Label) {} |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 4710 if (Span.Label != nullptr) | 4681 if (Span.Label != nullptr) |
| 4711 Context.insert(Span.Label); | 4682 Context.insert(Span.Label); |
| 4712 | 4683 |
| 4713 switch (Span.Size) { | 4684 switch (Span.Size) { |
| 4714 case 0: | 4685 case 0: |
| 4715 llvm::report_fatal_error("Invalid SearchSpan size"); | 4686 llvm::report_fatal_error("Invalid SearchSpan size"); |
| 4716 break; | 4687 break; |
| 4717 | 4688 |
| 4718 case 1: | 4689 case 1: |
| 4719 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, | 4690 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, |
| 4720 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4691 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4721 DoneCmp = false; | 4692 DoneCmp = false; |
| 4722 break; | 4693 break; |
| 4723 | 4694 |
| 4724 case 2: | 4695 case 2: { |
| 4725 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp); | 4696 const CaseCluster *CaseA = &CaseClusters[Span.Begin]; |
| 4697 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1]; | |
| 4698 | |
| 4699 // Placing a range last may allow register clobbering during the range | |
| 4700 // test. That means there is no need to clone the register. If it is a | |
| 4701 // unit range the comparison may have already been done in the binary | |
| 4702 // search (DoneCmp) and so it should be placed first. If this is a range | |
| 4703 // of two items and the comparison with the low value has already been | |
| 4704 // done, comparing with the other element is cheaper than a range test. | |
| 4705 // If the low end of the range is zero then there is no subtraction and | |
| 4706 // nothing to be gained. | |
| 4707 if (!CaseA->isUnitRange() && | |
| 4708 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) { | |
| 4709 std::swap(CaseA, CaseB); | |
| 4710 DoneCmp = false; | |
| 4711 } | |
| 4712 | |
| 4713 lowerCaseCluster(*CaseA, Comparison, DoneCmp); | |
| 4726 DoneCmp = false; | 4714 DoneCmp = false; |
| 4727 lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp, | 4715 lowerCaseCluster(*CaseB, Comparison, DoneCmp, |
| 4728 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4716 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4729 break; | 4717 } break; |
| 4730 | 4718 |
| 4731 default: | 4719 default: |
| 4732 // Pick the middle item and branch b or ae | 4720 // Pick the middle item and branch b or ae |
| 4733 SizeT PivotIndex = Span.Begin + (Span.Size / 2); | 4721 SizeT PivotIndex = Span.Begin + (Span.Size / 2); |
| 4734 const CaseCluster &Pivot = CaseClusters[PivotIndex]; | 4722 const CaseCluster &Pivot = CaseClusters[PivotIndex]; |
| 4735 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); | 4723 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); |
| 4736 // TODO(ascull): what if this jump is too big? | |
| 4737 typename Traits::Insts::Label *Label = | 4724 typename Traits::Insts::Label *Label = |
| 4738 Traits::Insts::Label::create(Func, this); | 4725 Traits::Insts::Label::create(Func, this); |
| 4739 _cmp(Comparison, Value); | 4726 _cmp(Comparison, Value); |
| 4740 _br(Traits::Cond::Br_b, Label); | 4727 // TODO(ascull): does it alway have to be far? |
| 4728 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); | |
| 4741 // Lower the left and (pivot+right) sides, falling through to the right | 4729 // Lower the left and (pivot+right) sides, falling through to the right |
| 4742 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); | 4730 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); |
| 4743 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); | 4731 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); |
| 4744 DoneCmp = true; | 4732 DoneCmp = true; |
| 4745 break; | 4733 break; |
| 4746 } | 4734 } |
| 4747 } | 4735 } |
| 4748 | 4736 |
| 4749 _br(DefaultLabel); | 4737 _br(DefaultTarget); |
| 4750 } | 4738 } |
| 4751 | 4739 |
| 4752 template <class Machine> | 4740 template <class Machine> |
| 4753 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4741 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4754 Variable *Dest, Operand *Src0, | 4742 Variable *Dest, Operand *Src0, |
| 4755 Operand *Src1) { | 4743 Operand *Src1) { |
| 4756 assert(isVectorType(Dest->getType())); | 4744 assert(isVectorType(Dest->getType())); |
| 4757 Type Ty = Dest->getType(); | 4745 Type Ty = Dest->getType(); |
| 4758 Type ElementTy = typeElementType(Ty); | 4746 Type ElementTy = typeElementType(Ty); |
| 4759 SizeT NumElements = typeNumElements(Ty); | 4747 SizeT NumElements = typeNumElements(Ty); |
| (...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5616 } | 5604 } |
| 5617 // the offset is not eligible for blinding or pooling, return the original | 5605 // the offset is not eligible for blinding or pooling, return the original |
| 5618 // mem operand | 5606 // mem operand |
| 5619 return MemOperand; | 5607 return MemOperand; |
| 5620 } | 5608 } |
| 5621 | 5609 |
| 5622 } // end of namespace X86Internal | 5610 } // end of namespace X86Internal |
| 5623 } // end of namespace Ice | 5611 } // end of namespace Ice |
| 5624 | 5612 |
| 5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5613 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |