Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 396 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 407 Func->reorderNodes(); | 407 Func->reorderNodes(); |
| 408 | 408 |
| 409 // Branch optimization. This needs to be done just before code emission. In | 409 // Branch optimization. This needs to be done just before code emission. In |
| 410 // particular, no transformations that insert or reorder CfgNodes should be | 410 // particular, no transformations that insert or reorder CfgNodes should be |
| 411 // done after branch optimization. We go ahead and do it before nop insertion | 411 // done after branch optimization. We go ahead and do it before nop insertion |
| 412 // to reduce the amount of work needed for searching for opportunities. | 412 // to reduce the amount of work needed for searching for opportunities. |
| 413 Func->doBranchOpt(); | 413 Func->doBranchOpt(); |
| 414 Func->dump("After branch optimization"); | 414 Func->dump("After branch optimization"); |
| 415 | 415 |
| 416 // Nop insertion | 416 // Nop insertion |
| 417 if (Ctx->getFlags().shouldDoNopInsertion()) { | 417 if (Ctx->getFlags().shouldDoNopInsertion()) |
| 418 Func->doNopInsertion(); | 418 Func->doNopInsertion(); |
| 419 } | 419 |
| 420 // Mark nodes that require sandbox alignment | |
| 421 if (Ctx->getFlags().getUseSandboxing()) | |
| 422 Func->markNodesForSandboxing(); | |
| 420 } | 423 } |
| 421 | 424 |
| 422 template <class Machine> void TargetX86Base<Machine>::translateOm1() { | 425 template <class Machine> void TargetX86Base<Machine>::translateOm1() { |
| 423 TimerMarker T(TimerStack::TT_Om1, Func); | 426 TimerMarker T(TimerStack::TT_Om1, Func); |
| 424 | 427 |
| 425 Func->placePhiLoads(); | 428 Func->placePhiLoads(); |
| 426 if (Func->hasError()) | 429 if (Func->hasError()) |
| 427 return; | 430 return; |
| 428 Func->placePhiStores(); | 431 Func->placePhiStores(); |
| 429 if (Func->hasError()) | 432 if (Func->hasError()) |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 446 | 449 |
| 447 Func->genFrame(); | 450 Func->genFrame(); |
| 448 if (Func->hasError()) | 451 if (Func->hasError()) |
| 449 return; | 452 return; |
| 450 Func->dump("After stack frame mapping"); | 453 Func->dump("After stack frame mapping"); |
| 451 | 454 |
| 452 // Nop insertion | 455 // Nop insertion |
| 453 if (Ctx->getFlags().shouldDoNopInsertion()) { | 456 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 454 Func->doNopInsertion(); | 457 Func->doNopInsertion(); |
| 455 } | 458 } |
| 459 | |
| 460 // Mark nodes that require sandbox alignment | |
| 461 if (Ctx->getFlags().getUseSandboxing()) | |
| 462 Func->markNodesForSandboxing(); | |
| 456 } | 463 } |
| 457 | 464 |
| 458 inline bool canRMW(const InstArithmetic *Arith) { | 465 inline bool canRMW(const InstArithmetic *Arith) { |
| 459 Type Ty = Arith->getDest()->getType(); | 466 Type Ty = Arith->getDest()->getType(); |
| 460 // X86 vector instructions write to a register and have no RMW option. | 467 // X86 vector instructions write to a register and have no RMW option. |
| 461 if (isVectorType(Ty)) | 468 if (isVectorType(Ty)) |
| 462 return false; | 469 return false; |
| 463 bool isI64 = Ty == IceType_i64; | 470 bool isI64 = Ty == IceType_i64; |
| 464 | 471 |
| 465 switch (Arith->getOp()) { | 472 switch (Arith->getOp()) { |
| (...skipping 642 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1108 | 1115 |
| 1109 if (!Ctx->getFlags().getUseSandboxing()) | 1116 if (!Ctx->getFlags().getUseSandboxing()) |
| 1110 return; | 1117 return; |
| 1111 // Change the original ret instruction into a sandboxed return sequence. | 1118 // Change the original ret instruction into a sandboxed return sequence. |
| 1112 // t:ecx = pop | 1119 // t:ecx = pop |
| 1113 // bundle_lock | 1120 // bundle_lock |
| 1114 // and t, ~31 | 1121 // and t, ~31 |
| 1115 // jmp *t | 1122 // jmp *t |
| 1116 // bundle_unlock | 1123 // bundle_unlock |
| 1117 // FakeUse <original_ret_operand> | 1124 // FakeUse <original_ret_operand> |
| 1118 const SizeT BundleSize = 1 | |
| 1119 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 1120 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | 1125 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); |
| 1121 _pop(T_ecx); | 1126 _pop(T_ecx); |
| 1122 _bundle_lock(); | 1127 lowerIndirectJump(T_ecx); |
| 1123 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 1124 _jmp(T_ecx); | |
| 1125 _bundle_unlock(); | |
| 1126 if (RI->getSrcSize()) { | 1128 if (RI->getSrcSize()) { |
| 1127 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 1129 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 1128 Context.insert(InstFakeUse::create(Func, RetValue)); | 1130 Context.insert(InstFakeUse::create(Func, RetValue)); |
| 1129 } | 1131 } |
| 1130 RI->setDeleted(); | 1132 RI->setDeleted(); |
| 1131 } | 1133 } |
| 1132 | 1134 |
| 1133 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 1135 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1134 switch (Var->getType()) { | 1136 switch (Var->getType()) { |
| 1135 default: | 1137 default: |
| (...skipping 2827 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3963 } else { | 3965 } else { |
| 3964 _bsr(T_Dest2, SecondVar); | 3966 _bsr(T_Dest2, SecondVar); |
| 3965 _xor(T_Dest2, ThirtyOne); | 3967 _xor(T_Dest2, ThirtyOne); |
| 3966 } | 3968 } |
| 3967 _test(SecondVar, SecondVar); | 3969 _test(SecondVar, SecondVar); |
| 3968 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3969 _mov(DestLo, T_Dest2); | 3971 _mov(DestLo, T_Dest2); |
| 3970 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3971 } | 3973 } |
| 3972 | 3974 |
| 3975 template <class Machine> | |
| 3976 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { | |
| 3977 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | |
| 3978 if (NeedSandboxing) { | |
| 3979 _bundle_lock(); | |
| 3980 const SizeT BundleSize = | |
| 3981 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 3982 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 3983 } | |
| 3984 _jmp(Target); | |
| 3985 if (NeedSandboxing) | |
| 3986 _bundle_unlock(); | |
| 3987 } | |
| 3988 | |
| 3973 inline bool isAdd(const Inst *Inst) { | 3989 inline bool isAdd(const Inst *Inst) { |
| 3974 if (const InstArithmetic *Arith = | 3990 if (const InstArithmetic *Arith = |
| 3975 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 3991 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 3976 return (Arith->getOp() == InstArithmetic::Add); | 3992 return (Arith->getOp() == InstArithmetic::Add); |
| 3977 } | 3993 } |
| 3978 return false; | 3994 return false; |
| 3979 } | 3995 } |
| 3980 | 3996 |
| 3981 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 3997 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 3982 const Variable *Index, uint16_t Shift, | 3998 const Variable *Index, uint16_t Shift, |
| (...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4508 } | 4524 } |
| 4509 | 4525 |
| 4510 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); | 4526 _cmp(Comparison, Ctx->getConstantInt32(Max - Min)); |
| 4511 | 4527 |
| 4512 return Comparison; | 4528 return Comparison; |
| 4513 } | 4529 } |
| 4514 | 4530 |
| 4515 template <class Machine> | 4531 template <class Machine> |
| 4516 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, | 4532 void TargetX86Base<Machine>::lowerCaseCluster(const CaseCluster &Case, |
| 4517 Operand *Comparison, bool DoneCmp, | 4533 Operand *Comparison, bool DoneCmp, |
| 4518 CfgNode *DefaultLabel) { | 4534 CfgNode *DefaultTarget) { |
| 4519 switch (Case.getKind()) { | 4535 switch (Case.getKind()) { |
| 4520 case CaseCluster::JumpTable: { | 4536 case CaseCluster::JumpTable: { |
| 4521 typename Traits::Insts::Label *SkipJumpTable; | 4537 typename Traits::Insts::Label *SkipJumpTable; |
| 4522 | 4538 |
| 4523 Operand *RangeIndex = | 4539 Operand *RangeIndex = |
| 4524 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4540 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4525 if (DefaultLabel != nullptr) { | 4541 if (DefaultTarget == nullptr) { |
|
Jim Stichnoth
2015/07/30 15:20:24
How about something like
if (DefaultTarget) {
ascull
2015/07/30 17:30:00
I have this way to match the `(DefaultTarget == nu
Jim Stichnoth
2015/07/30 17:40:44
I see.
| |
| 4526 _br(Traits::Cond::Br_a, DefaultLabel); | |
| 4527 } else { | |
| 4528 // Skip over jump table logic if comparison not in range and no default | 4542 // Skip over jump table logic if comparison not in range and no default |
| 4529 SkipJumpTable = Traits::Insts::Label::create(Func, this); | 4543 SkipJumpTable = Traits::Insts::Label::create(Func, this); |
| 4530 _br(Traits::Cond::Br_a, SkipJumpTable); | 4544 _br(Traits::Cond::Br_a, SkipJumpTable); |
| 4545 } else { | |
| 4546 _br(Traits::Cond::Br_a, DefaultTarget); | |
| 4531 } | 4547 } |
| 4532 | 4548 |
| 4533 InstJumpTable *JumpTable = Case.getJumpTable(); | 4549 InstJumpTable *JumpTable = Case.getJumpTable(); |
| 4534 Context.insert(JumpTable); | 4550 Context.insert(JumpTable); |
| 4535 | 4551 |
| 4536 // Make sure the index is a register of the same width as the base | 4552 // Make sure the index is a register of the same width as the base |
| 4537 Variable *Index; | 4553 Variable *Index; |
| 4538 if (RangeIndex->getType() != getPointerType()) { | 4554 if (RangeIndex->getType() != getPointerType()) { |
| 4539 Index = makeReg(getPointerType()); | 4555 Index = makeReg(getPointerType()); |
| 4540 _movzx(Index, RangeIndex); | 4556 _movzx(Index, RangeIndex); |
| 4541 } else { | 4557 } else { |
| 4542 Index = legalizeToReg(RangeIndex); | 4558 Index = legalizeToReg(RangeIndex); |
| 4543 } | 4559 } |
| 4544 | 4560 |
| 4545 constexpr RelocOffsetT RelocOffset = 0; | 4561 constexpr RelocOffsetT RelocOffset = 0; |
| 4546 constexpr bool SuppressMangling = true; | 4562 constexpr bool SuppressMangling = true; |
| 4547 Constant *Base = Ctx->getConstantSym(RelocOffset, JumpTable->getName(Func), | 4563 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| 4548 SuppressMangling); | 4564 Constant *Base = Ctx->getConstantSym( |
| 4565 RelocOffset, InstJumpTable::makeName(MangledName, JumpTable->getId()), | |
| 4566 SuppressMangling); | |
| 4549 Constant *Offset = nullptr; | 4567 Constant *Offset = nullptr; |
| 4550 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); | 4568 uint16_t Shift = typeWidthInBytesLog2(getPointerType()); |
| 4551 // TODO(ascull): remove need for legalize by allowing null base in memop | 4569 // TODO(ascull): remove need for legalize by allowing null base in memop |
| 4552 auto *MemTarget = Traits::X86OperandMem::create( | 4570 auto *TargetInMemory = Traits::X86OperandMem::create( |
| 4553 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); | 4571 Func, getPointerType(), legalizeToReg(Base), Offset, Index, Shift); |
| 4554 Variable *Target = nullptr; | 4572 Variable *Target = nullptr; |
| 4555 _mov(Target, MemTarget); | 4573 _mov(Target, TargetInMemory); |
| 4556 _jmp(Target); | 4574 lowerIndirectJump(Target); |
| 4557 // TODO(ascull): sandboxing for indirect jump | |
| 4558 | 4575 |
| 4559 if (DefaultLabel == nullptr) | 4576 if (DefaultTarget == nullptr) |
| 4560 Context.insert(SkipJumpTable); | 4577 Context.insert(SkipJumpTable); |
| 4561 return; | 4578 return; |
| 4562 } | 4579 } |
| 4563 case CaseCluster::Range: { | 4580 case CaseCluster::Range: { |
| 4564 if (Case.getHigh() == Case.getLow()) { | 4581 if (Case.isUnitRange()) { |
| 4565 // Single item | 4582 // Single item |
| 4566 Constant *Value = Ctx->getConstantInt32(Case.getLow()); | 4583 if (!DoneCmp) { |
| 4567 if (!DoneCmp) | 4584 Constant *Value = Ctx->getConstantInt32(Case.getLow()); |
| 4568 _cmp(Comparison, Value); | 4585 _cmp(Comparison, Value); |
| 4569 _br(Traits::Cond::Br_e, Case.getLabel()); | 4586 } |
| 4570 if (DefaultLabel != nullptr) | 4587 _br(Traits::Cond::Br_e, Case.getTarget()); |
| 4571 _br(DefaultLabel); | 4588 } else if (DoneCmp && Case.isPairRange()) { |
| 4589 // Range of two items with first item aleady compared against | |
| 4590 _br(Traits::Cond::Br_e, Case.getTarget()); | |
| 4591 Constant *Value = Ctx->getConstantInt32(Case.getHigh()); | |
| 4592 _cmp(Comparison, Value); | |
| 4593 _br(Traits::Cond::Br_e, Case.getTarget()); | |
| 4572 } else { | 4594 } else { |
| 4573 // Range | 4595 // Range |
| 4574 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); | 4596 lowerCmpRange(Comparison, Case.getLow(), Case.getHigh()); |
| 4575 _br(Traits::Cond::Br_be, Case.getLabel()); | 4597 _br(Traits::Cond::Br_be, Case.getTarget()); |
| 4576 if (DefaultLabel != nullptr) | |
| 4577 _br(DefaultLabel); | |
| 4578 } | 4598 } |
| 4599 if (DefaultTarget != nullptr) | |
| 4600 _br(DefaultTarget); | |
| 4579 return; | 4601 return; |
| 4580 } | 4602 } |
| 4581 } | 4603 } |
| 4582 } | 4604 } |
| 4583 | 4605 |
| 4584 template <class Machine> | 4606 template <class Machine> |
| 4585 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { | 4607 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { |
| 4586 // Do it the old fashioned way unless asked for the advanced method | |
| 4587 if (!Ctx->getFlags().getUseAdvancedSwitchLowering()) { | |
| 4588 // This implements the most naive possible lowering. | |
| 4589 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | |
| 4590 Operand *Src0 = Inst->getComparison(); | |
| 4591 SizeT NumCases = Inst->getNumCases(); | |
| 4592 if (Src0->getType() == IceType_i64) { | |
| 4593 Src0 = legalize(Src0); // get Base/Index into physical registers | |
| 4594 Operand *Src0Lo = loOperand(Src0); | |
| 4595 Operand *Src0Hi = hiOperand(Src0); | |
| 4596 if (NumCases >= 2) { | |
| 4597 Src0Lo = legalizeToReg(Src0Lo); | |
| 4598 Src0Hi = legalizeToReg(Src0Hi); | |
| 4599 } else { | |
| 4600 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | |
| 4601 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | |
| 4602 } | |
| 4603 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4604 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4605 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); | |
| 4606 typename Traits::Insts::Label *Label = | |
| 4607 Traits::Insts::Label::create(Func, this); | |
| 4608 _cmp(Src0Lo, ValueLo); | |
| 4609 _br(Traits::Cond::Br_ne, Label); | |
| 4610 _cmp(Src0Hi, ValueHi); | |
| 4611 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4612 Context.insert(Label); | |
| 4613 } | |
| 4614 _br(Inst->getLabelDefault()); | |
| 4615 return; | |
| 4616 } | |
| 4617 // OK, we'll be slightly less naive by forcing Src into a physical | |
| 4618 // register if there are 2 or more uses. | |
| 4619 if (NumCases >= 2) | |
| 4620 Src0 = legalizeToReg(Src0); | |
| 4621 else | |
| 4622 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | |
| 4623 for (SizeT I = 0; I < NumCases; ++I) { | |
| 4624 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); | |
| 4625 _cmp(Src0, Value); | |
| 4626 _br(Traits::Cond::Br_e, Inst->getLabel(I)); | |
| 4627 } | |
| 4628 | |
| 4629 _br(Inst->getLabelDefault()); | |
| 4630 return; | |
| 4631 } | |
| 4632 | |
| 4633 // Group cases together and navigate through them with a binary search | 4608 // Group cases together and navigate through them with a binary search |
| 4634 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); | 4609 CaseClusterArray CaseClusters = CaseCluster::clusterizeSwitch(Func, Inst); |
| 4635 Operand *Src0 = Inst->getComparison(); | 4610 Operand *Src0 = Inst->getComparison(); |
| 4636 CfgNode *DefaultLabel = Inst->getLabelDefault(); | 4611 CfgNode *DefaultTarget = Inst->getLabelDefault(); |
| 4637 | 4612 |
| 4638 assert(CaseClusters.size() != 0); // Should always be at least one | 4613 assert(CaseClusters.size() != 0); // Should always be at least one |
| 4639 | 4614 |
| 4640 if (Src0->getType() == IceType_i64) { | 4615 if (Src0->getType() == IceType_i64) { |
| 4641 Src0 = legalize(Src0); // get Base/Index into physical registers | 4616 Src0 = legalize(Src0); // get Base/Index into physical registers |
| 4642 Operand *Src0Lo = loOperand(Src0); | 4617 Operand *Src0Lo = loOperand(Src0); |
| 4643 Operand *Src0Hi = hiOperand(Src0); | 4618 Operand *Src0Hi = hiOperand(Src0); |
| 4644 if (CaseClusters.back().getHigh() > UINT32_MAX) { | 4619 if (CaseClusters.back().getHigh() > UINT32_MAX) { |
| 4645 // TODO(ascull): handle 64-bit case properly (currently naive version) | 4620 // TODO(ascull): handle 64-bit case properly (currently naive version) |
| 4646 // This might be handled by a higher level lowering of switches. | 4621 // This might be handled by a higher level lowering of switches. |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 4664 Context.insert(Label); | 4639 Context.insert(Label); |
| 4665 } | 4640 } |
| 4666 _br(Inst->getLabelDefault()); | 4641 _br(Inst->getLabelDefault()); |
| 4667 return; | 4642 return; |
| 4668 } else { | 4643 } else { |
| 4669 // All the values are 32-bit so just check the operand is too and then | 4644 // All the values are 32-bit so just check the operand is too and then |
| 4670 // fall through to the 32-bit implementation. This is a common case. | 4645 // fall through to the 32-bit implementation. This is a common case. |
| 4671 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 4646 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
| 4672 Constant *Zero = Ctx->getConstantInt32(0); | 4647 Constant *Zero = Ctx->getConstantInt32(0); |
| 4673 _cmp(Src0Hi, Zero); | 4648 _cmp(Src0Hi, Zero); |
| 4674 _br(Traits::Cond::Br_ne, DefaultLabel); | 4649 _br(Traits::Cond::Br_ne, DefaultTarget); |
| 4675 Src0 = Src0Lo; | 4650 Src0 = Src0Lo; |
| 4676 } | 4651 } |
| 4677 } | 4652 } |
| 4678 | 4653 |
| 4679 // 32-bit lowering | 4654 // 32-bit lowering |
| 4680 | 4655 |
| 4681 if (CaseClusters.size() == 1) { | 4656 if (CaseClusters.size() == 1) { |
| 4682 // Jump straight to default if needed. Currently a common case as jump | 4657 // Jump straight to default if needed. Currently a common case as jump |
| 4683 // tables occur on their own. | 4658 // tables occur on their own. |
| 4684 constexpr bool DoneCmp = false; | 4659 constexpr bool DoneCmp = false; |
| 4685 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultLabel); | 4660 lowerCaseCluster(CaseClusters.front(), Src0, DoneCmp, DefaultTarget); |
| 4686 return; | 4661 return; |
| 4687 } | 4662 } |
| 4688 | 4663 |
| 4689 // Going to be using multiple times so get it in a register early | 4664 // Going to be using multiple times so get it in a register early |
| 4690 Variable *Comparison = legalizeToReg(Src0); | 4665 Variable *Comparison = legalizeToReg(Src0); |
| 4691 | 4666 |
| 4692 // A span is over the clusters | 4667 // A span is over the clusters |
| 4693 struct SearchSpan { | 4668 struct SearchSpan { |
| 4694 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) | 4669 SearchSpan(SizeT Begin, SizeT Size, typename Traits::Insts::Label *Label) |
| 4695 : Begin(Begin), Size(Size), Label(Label) {} | 4670 : Begin(Begin), Size(Size), Label(Label) {} |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 4710 if (Span.Label != nullptr) | 4685 if (Span.Label != nullptr) |
| 4711 Context.insert(Span.Label); | 4686 Context.insert(Span.Label); |
| 4712 | 4687 |
| 4713 switch (Span.Size) { | 4688 switch (Span.Size) { |
| 4714 case 0: | 4689 case 0: |
| 4715 llvm::report_fatal_error("Invalid SearchSpan size"); | 4690 llvm::report_fatal_error("Invalid SearchSpan size"); |
| 4716 break; | 4691 break; |
| 4717 | 4692 |
| 4718 case 1: | 4693 case 1: |
| 4719 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, | 4694 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp, |
| 4720 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4695 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4721 DoneCmp = false; | 4696 DoneCmp = false; |
| 4722 break; | 4697 break; |
| 4723 | 4698 |
| 4724 case 2: | 4699 case 2: { |
| 4725 lowerCaseCluster(CaseClusters[Span.Begin], Comparison, DoneCmp); | 4700 const CaseCluster *CaseA = &CaseClusters[Span.Begin]; |
| 4701 const CaseCluster *CaseB = &CaseClusters[Span.Begin + 1]; | |
| 4702 | |
| 4703 // Placing a range last may allow register clobbering during the range | |
| 4704 // test. That means there is no need to clone the register. If it is a | |
| 4705 // unit range the comparison may have already been done in the binary | |
| 4706 // search (DoneCmp) and so it should be placed first. If this is a range | |
| 4707 // of two items and the comparison with the low value has already been | |
| 4708 // done, comparing with the other element is cheaper than a range test. | |
| 4709 // If the low end of the range is zero then there is no subtraction and | |
| 4710 // nothing to be gained. | |
| 4711 if (!CaseA->isUnitRange() && | |
| 4712 !(CaseA->getLow() == 0 || (DoneCmp && CaseA->isPairRange()))) { | |
| 4713 std::swap(CaseA, CaseB); | |
| 4714 DoneCmp = false; | |
| 4715 } | |
| 4716 | |
| 4717 lowerCaseCluster(*CaseA, Comparison, DoneCmp); | |
| 4726 DoneCmp = false; | 4718 DoneCmp = false; |
| 4727 lowerCaseCluster(CaseClusters[Span.Begin + 1], Comparison, DoneCmp, | 4719 lowerCaseCluster(*CaseB, Comparison, DoneCmp, |
| 4728 SearchSpanStack.empty() ? nullptr : DefaultLabel); | 4720 SearchSpanStack.empty() ? nullptr : DefaultTarget); |
| 4729 break; | 4721 } break; |
| 4730 | 4722 |
| 4731 default: | 4723 default: |
| 4732 // Pick the middle item and branch b or ae | 4724 // Pick the middle item and branch b or ae |
| 4733 SizeT PivotIndex = Span.Begin + (Span.Size / 2); | 4725 SizeT PivotIndex = Span.Begin + (Span.Size / 2); |
| 4734 const CaseCluster &Pivot = CaseClusters[PivotIndex]; | 4726 const CaseCluster &Pivot = CaseClusters[PivotIndex]; |
| 4735 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); | 4727 Constant *Value = Ctx->getConstantInt32(Pivot.getLow()); |
| 4736 // TODO(ascull): what if this jump is too big? | |
| 4737 typename Traits::Insts::Label *Label = | 4728 typename Traits::Insts::Label *Label = |
| 4738 Traits::Insts::Label::create(Func, this); | 4729 Traits::Insts::Label::create(Func, this); |
| 4739 _cmp(Comparison, Value); | 4730 _cmp(Comparison, Value); |
| 4740 _br(Traits::Cond::Br_b, Label); | 4731 // TODO(ascull): does it alway have to be far? |
| 4732 _br(Traits::Cond::Br_b, Label, Traits::Insts::Br::Far); | |
| 4741 // Lower the left and (pivot+right) sides, falling through to the right | 4733 // Lower the left and (pivot+right) sides, falling through to the right |
| 4742 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); | 4734 SearchSpanStack.emplace(Span.Begin, Span.Size / 2, Label); |
| 4743 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); | 4735 SearchSpanStack.emplace(PivotIndex, Span.Size - (Span.Size / 2), nullptr); |
| 4744 DoneCmp = true; | 4736 DoneCmp = true; |
| 4745 break; | 4737 break; |
| 4746 } | 4738 } |
| 4747 } | 4739 } |
| 4748 | 4740 |
| 4749 _br(DefaultLabel); | 4741 _br(DefaultTarget); |
| 4750 } | 4742 } |
| 4751 | 4743 |
| 4752 template <class Machine> | 4744 template <class Machine> |
| 4753 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4745 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| 4754 Variable *Dest, Operand *Src0, | 4746 Variable *Dest, Operand *Src0, |
| 4755 Operand *Src1) { | 4747 Operand *Src1) { |
| 4756 assert(isVectorType(Dest->getType())); | 4748 assert(isVectorType(Dest->getType())); |
| 4757 Type Ty = Dest->getType(); | 4749 Type Ty = Dest->getType(); |
| 4758 Type ElementTy = typeElementType(Ty); | 4750 Type ElementTy = typeElementType(Ty); |
| 4759 SizeT NumElements = typeNumElements(Ty); | 4751 SizeT NumElements = typeNumElements(Ty); |
| (...skipping 856 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5616 } | 5608 } |
| 5617 // the offset is not eligible for blinding or pooling, return the original | 5609 // the offset is not eligible for blinding or pooling, return the original |
| 5618 // mem operand | 5610 // mem operand |
| 5619 return MemOperand; | 5611 return MemOperand; |
| 5620 } | 5612 } |
| 5621 | 5613 |
| 5622 } // end of namespace X86Internal | 5614 } // end of namespace X86Internal |
| 5623 } // end of namespace Ice | 5615 } // end of namespace Ice |
| 5624 | 5616 |
| 5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5617 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |