| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 /// This generally means that its lowering sequence requires more than one | 180 /// This generally means that its lowering sequence requires more than one |
| 181 /// conditional branch, namely 64-bit integer compares and some floating-point | 181 /// conditional branch, namely 64-bit integer compares and some floating-point |
| 182 /// compares. When this is true, and there is more than one consumer, we prefer | 182 /// compares. When this is true, and there is more than one consumer, we prefer |
| 183 /// to disable the folding optimization because it minimizes branches. | 183 /// to disable the folding optimization because it minimizes branches. |
| 184 template <class MachineTraits> | 184 template <class MachineTraits> |
| 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| 186 switch (getProducerKind(Instr)) { | 186 switch (getProducerKind(Instr)) { |
| 187 default: | 187 default: |
| 188 return false; | 188 return false; |
| 189 case PK_Icmp64: | 189 case PK_Icmp64: |
| 190 return true; | 190 return !MachineTraits::Is64Bit; |
| 191 case PK_Fcmp: | 191 case PK_Fcmp: |
| 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
| 193 .C2 != MachineTraits::Cond::Br_None; | 193 .C2 != MachineTraits::Cond::Br_None; |
| 194 } | 194 } |
| 195 } | 195 } |
| 196 | 196 |
| 197 template <class MachineTraits> | 197 template <class MachineTraits> |
| 198 bool BoolFolding<MachineTraits>::isValidFolding( | 198 bool BoolFolding<MachineTraits>::isValidFolding( |
| 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, | 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, |
| 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { | 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { |
| (...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 758 template <class Machine> | 758 template <class Machine> |
| 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { |
| 761 return Br->optimizeBranch(NextNode); | 761 return Br->optimizeBranch(NextNode); |
| 762 } | 762 } |
| 763 return false; | 763 return false; |
| 764 } | 764 } |
| 765 | 765 |
| 766 template <class Machine> | 766 template <class Machine> |
| 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
| 768 // Special case: never allow partial reads/writes to/from %rBP and %rSP. | |
| 769 if (RegNum == Traits::RegisterSet::Reg_esp || | |
| 770 RegNum == Traits::RegisterSet::Reg_ebp) | |
| 771 Ty = Traits::WordType; | |
| 772 if (Ty == IceType_void) | 768 if (Ty == IceType_void) |
| 773 Ty = IceType_i32; | 769 Ty = IceType_i32; |
| 774 if (PhysicalRegisters[Ty].empty()) | 770 if (PhysicalRegisters[Ty].empty()) |
| 775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
| 776 assert(RegNum < PhysicalRegisters[Ty].size()); | 772 assert(RegNum < PhysicalRegisters[Ty].size()); |
| 777 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 773 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
| 778 if (Reg == nullptr) { | 774 if (Reg == nullptr) { |
| 779 Reg = Func->makeVariable(Ty); | 775 Reg = Func->makeVariable(Ty); |
| 780 Reg->setRegNum(RegNum); | 776 Reg->setRegNum(RegNum); |
| 781 PhysicalRegisters[Ty][RegNum] = Reg; | 777 PhysicalRegisters[Ty][RegNum] = Reg; |
| (...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 987 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
| 992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; | 988 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
| 993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; | 989 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; |
| 994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); | 990 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); |
| 995 const bool UseFramePointer = | 991 const bool UseFramePointer = |
| 996 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 992 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
| 997 | 993 |
| 998 if (UseFramePointer) | 994 if (UseFramePointer) |
| 999 setHasFramePointer(); | 995 setHasFramePointer(); |
| 1000 | 996 |
| 1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 997 Variable *esp = getPhysicalRegister(getStackReg()); |
| 1002 if (OverAligned) { | 998 if (OverAligned) { |
| 1003 _and(esp, Ctx->getConstantInt32(-Alignment)); | 999 _and(esp, Ctx->getConstantInt32(-Alignment)); |
| 1004 } | 1000 } |
| 1005 | 1001 |
| 1006 Variable *Dest = Inst->getDest(); | 1002 Variable *Dest = Inst->getDest(); |
| 1007 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1003 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 1008 | 1004 |
| 1009 if (const auto *ConstantTotalSize = | 1005 if (const auto *ConstantTotalSize = |
| 1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1006 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
| 1011 const uint32_t Value = | 1007 const uint32_t Value = |
| (...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1706 _mov(T, Src0); | 1702 _mov(T, Src0); |
| 1707 if (!llvm::isa<ConstantInteger32>(Src1)) | 1703 if (!llvm::isa<ConstantInteger32>(Src1)) |
| 1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); | 1704 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
| 1709 _sar(T, Src1); | 1705 _sar(T, Src1); |
| 1710 _mov(Dest, T); | 1706 _mov(Dest, T); |
| 1711 break; | 1707 break; |
| 1712 case InstArithmetic::Udiv: { | 1708 case InstArithmetic::Udiv: { |
| 1713 // div and idiv are the few arithmetic operators that do not allow | 1709 // div and idiv are the few arithmetic operators that do not allow |
| 1714 // immediates as the operand. | 1710 // immediates as the operand. |
| 1715 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1711 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1716 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1712 uint32_t Eax; |
| 1717 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1713 uint32_t Edx; |
| 1718 switch (Ty) { | 1714 switch (Ty) { |
| 1719 default: | 1715 default: |
| 1720 llvm_unreachable("Bad type for udiv"); | 1716 llvm::report_fatal_error("Bad type for udiv"); |
| 1721 // fallthrough | 1717 case IceType_i64: |
| 1718 Eax = Traits::getRaxOrDie(); |
| 1719 Edx = Traits::getRdxOrDie(); |
| 1722 case IceType_i32: | 1720 case IceType_i32: |
| 1721 Eax = Traits::RegisterSet::Reg_eax; |
| 1722 Edx = Traits::RegisterSet::Reg_edx; |
| 1723 break; | 1723 break; |
| 1724 case IceType_i16: | 1724 case IceType_i16: |
| 1725 Eax = Traits::RegisterSet::Reg_ax; | 1725 Eax = Traits::RegisterSet::Reg_ax; |
| 1726 Edx = Traits::RegisterSet::Reg_dx; | 1726 Edx = Traits::RegisterSet::Reg_dx; |
| 1727 break; | 1727 break; |
| 1728 case IceType_i8: | 1728 case IceType_i8: |
| 1729 Eax = Traits::RegisterSet::Reg_al; | 1729 Eax = Traits::RegisterSet::Reg_al; |
| 1730 Edx = Traits::RegisterSet::Reg_ah; | 1730 Edx = Traits::RegisterSet::Reg_ah; |
| 1731 break; | 1731 break; |
| 1732 } | 1732 } |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
| 1767 } | 1767 } |
| 1768 _mov(Dest, T); | 1768 _mov(Dest, T); |
| 1769 return; | 1769 return; |
| 1770 } | 1770 } |
| 1771 } | 1771 } |
| 1772 } | 1772 } |
| 1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1774 switch (Ty) { | 1774 switch (Ty) { |
| 1775 default: | 1775 default: |
| 1776 llvm_unreachable("Bad type for sdiv"); | 1776 llvm::report_fatal_error("Bad type for sdiv"); |
| 1777 // fallthrough | 1777 case IceType_i64: |
| 1778 T_edx = makeReg(Ty, Traits::getRdxOrDie()); |
| 1779 _mov(T, Src0, Traits::getRaxOrDie()); |
| 1780 break; |
| 1778 case IceType_i32: | 1781 case IceType_i32: |
| 1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 1782 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
| 1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1783 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
| 1781 break; | 1784 break; |
| 1782 case IceType_i16: | 1785 case IceType_i16: |
| 1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 1786 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
| 1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 1787 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
| 1785 break; | 1788 break; |
| 1786 case IceType_i8: | 1789 case IceType_i8: |
| 1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 1790 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
| 1788 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1791 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
| 1789 break; | 1792 break; |
| 1790 } | 1793 } |
| 1791 _cbwdq(T_edx, T); | 1794 _cbwdq(T_edx, T); |
| 1792 _idiv(T, Src1, T_edx); | 1795 _idiv(T, Src1, T_edx); |
| 1793 _mov(Dest, T); | 1796 _mov(Dest, T); |
| 1794 break; | 1797 break; |
| 1795 case InstArithmetic::Urem: { | 1798 case InstArithmetic::Urem: { |
| 1796 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1799 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1797 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1800 uint32_t Eax; |
| 1798 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1801 uint32_t Edx; |
| 1799 switch (Ty) { | 1802 switch (Ty) { |
| 1800 default: | 1803 default: |
| 1801 llvm_unreachable("Bad type for urem"); | 1804 llvm::report_fatal_error("Bad type for urem"); |
| 1802 // fallthrough | 1805 case IceType_i64: |
| 1806 Eax = Traits::getRaxOrDie(); |
| 1807 Edx = Traits::getRdxOrDie(); |
| 1808 break; |
| 1803 case IceType_i32: | 1809 case IceType_i32: |
| 1810 Eax = Traits::RegisterSet::Reg_eax; |
| 1811 Edx = Traits::RegisterSet::Reg_edx; |
| 1804 break; | 1812 break; |
| 1805 case IceType_i16: | 1813 case IceType_i16: |
| 1806 Eax = Traits::RegisterSet::Reg_ax; | 1814 Eax = Traits::RegisterSet::Reg_ax; |
| 1807 Edx = Traits::RegisterSet::Reg_dx; | 1815 Edx = Traits::RegisterSet::Reg_dx; |
| 1808 break; | 1816 break; |
| 1809 case IceType_i8: | 1817 case IceType_i8: |
| 1810 Eax = Traits::RegisterSet::Reg_al; | 1818 Eax = Traits::RegisterSet::Reg_al; |
| 1811 Edx = Traits::RegisterSet::Reg_ah; | 1819 Edx = Traits::RegisterSet::Reg_ah; |
| 1812 break; | 1820 break; |
| 1813 } | 1821 } |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1851 _add(T, Src0); | 1859 _add(T, Src0); |
| 1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1860 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
| 1853 _sub(T, Src0); | 1861 _sub(T, Src0); |
| 1854 _neg(T); | 1862 _neg(T); |
| 1855 _mov(Dest, T); | 1863 _mov(Dest, T); |
| 1856 return; | 1864 return; |
| 1857 } | 1865 } |
| 1858 } | 1866 } |
| 1859 } | 1867 } |
| 1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1868 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
| 1861 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1869 uint32_t Eax; |
| 1862 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1870 uint32_t Edx; |
| 1863 switch (Ty) { | 1871 switch (Ty) { |
| 1864 default: | 1872 default: |
| 1865 llvm_unreachable("Bad type for srem"); | 1873 llvm::report_fatal_error("Bad type for srem"); |
| 1866 // fallthrough | 1874 case IceType_i64: |
| 1875 Eax = Traits::getRaxOrDie(); |
| 1876 Edx = Traits::getRdxOrDie(); |
| 1877 break; |
| 1867 case IceType_i32: | 1878 case IceType_i32: |
| 1879 Eax = Traits::RegisterSet::Reg_eax; |
| 1880 Edx = Traits::RegisterSet::Reg_edx; |
| 1868 break; | 1881 break; |
| 1869 case IceType_i16: | 1882 case IceType_i16: |
| 1870 Eax = Traits::RegisterSet::Reg_ax; | 1883 Eax = Traits::RegisterSet::Reg_ax; |
| 1871 Edx = Traits::RegisterSet::Reg_dx; | 1884 Edx = Traits::RegisterSet::Reg_dx; |
| 1872 break; | 1885 break; |
| 1873 case IceType_i8: | 1886 case IceType_i8: |
| 1874 Eax = Traits::RegisterSet::Reg_al; | 1887 Eax = Traits::RegisterSet::Reg_al; |
| 1875 Edx = Traits::RegisterSet::Reg_ah; | 1888 Edx = Traits::RegisterSet::Reg_ah; |
| 1876 break; | 1889 break; |
| 1877 } | 1890 } |
| (...skipping 1653 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3531 } | 3544 } |
| 3532 case Intrinsics::Sqrt: { | 3545 case Intrinsics::Sqrt: { |
| 3533 Operand *Src = legalize(Instr->getArg(0)); | 3546 Operand *Src = legalize(Instr->getArg(0)); |
| 3534 Variable *Dest = Instr->getDest(); | 3547 Variable *Dest = Instr->getDest(); |
| 3535 Variable *T = makeReg(Dest->getType()); | 3548 Variable *T = makeReg(Dest->getType()); |
| 3536 _sqrtss(T, Src); | 3549 _sqrtss(T, Src); |
| 3537 _mov(Dest, T); | 3550 _mov(Dest, T); |
| 3538 return; | 3551 return; |
| 3539 } | 3552 } |
| 3540 case Intrinsics::Stacksave: { | 3553 case Intrinsics::Stacksave: { |
| 3541 Variable *esp = | 3554 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
| 3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 3543 Variable *Dest = Instr->getDest(); | 3555 Variable *Dest = Instr->getDest(); |
| 3544 _mov(Dest, esp); | 3556 _mov(Dest, esp); |
| 3545 return; | 3557 return; |
| 3546 } | 3558 } |
| 3547 case Intrinsics::Stackrestore: { | 3559 case Intrinsics::Stackrestore: { |
| 3548 Variable *esp = | 3560 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
| 3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 3550 _redefined(_mov(esp, Instr->getArg(0))); | 3561 _redefined(_mov(esp, Instr->getArg(0))); |
| 3551 return; | 3562 return; |
| 3552 } | 3563 } |
| 3553 case Intrinsics::Trap: | 3564 case Intrinsics::Trap: |
| 3554 _ud2(); | 3565 _ud2(); |
| 3555 return; | 3566 return; |
| 3556 case Intrinsics::UnknownIntrinsic: | 3567 case Intrinsics::UnknownIntrinsic: |
| 3557 Func->setError("Should not be lowering UnknownIntrinsic"); | 3568 Func->setError("Should not be lowering UnknownIntrinsic"); |
| 3558 return; | 3569 return; |
| 3559 } | 3570 } |
| (...skipping 21 matching lines...) Expand all Loading... |
| 3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3592 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3593 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3594 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 3584 _mov(DestLo, T_eax); | 3595 _mov(DestLo, T_eax); |
| 3585 _mov(DestHi, T_edx); | 3596 _mov(DestHi, T_edx); |
| 3586 return; | 3597 return; |
| 3587 } | 3598 } |
| 3588 int32_t Eax; | 3599 int32_t Eax; |
| 3589 switch (Ty) { | 3600 switch (Ty) { |
| 3590 default: | 3601 default: |
| 3591 llvm_unreachable("Bad type for cmpxchg"); | 3602 llvm::report_fatal_error("Bad type for cmpxchg"); |
| 3592 // fallthrough | 3603 case IceType_i64: |
| 3604 Eax = Traits::getRaxOrDie(); |
| 3605 break; |
| 3593 case IceType_i32: | 3606 case IceType_i32: |
| 3594 Eax = Traits::RegisterSet::Reg_eax; | 3607 Eax = Traits::RegisterSet::Reg_eax; |
| 3595 break; | 3608 break; |
| 3596 case IceType_i16: | 3609 case IceType_i16: |
| 3597 Eax = Traits::RegisterSet::Reg_ax; | 3610 Eax = Traits::RegisterSet::Reg_ax; |
| 3598 break; | 3611 break; |
| 3599 case IceType_i8: | 3612 case IceType_i8: |
| 3600 Eax = Traits::RegisterSet::Reg_al; | 3613 Eax = Traits::RegisterSet::Reg_al; |
| 3601 break; | 3614 break; |
| 3602 } | 3615 } |
| (...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3866 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3867 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3855 _mov(DestLo, T_eax); | 3868 _mov(DestLo, T_eax); |
| 3856 _mov(DestHi, T_edx); | 3869 _mov(DestHi, T_edx); |
| 3857 return; | 3870 return; |
| 3858 } | 3871 } |
| 3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3872 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
| 3860 int32_t Eax; | 3873 int32_t Eax; |
| 3861 switch (Ty) { | 3874 switch (Ty) { |
| 3862 default: | 3875 default: |
| 3863 llvm_unreachable("Bad type for atomicRMW"); | 3876 llvm::report_fatal_error("Bad type for atomicRMW"); |
| 3864 // fallthrough | 3877 case IceType_i64: |
| 3878 Eax = Traits::getRaxOrDie(); |
| 3879 break; |
| 3865 case IceType_i32: | 3880 case IceType_i32: |
| 3866 Eax = Traits::RegisterSet::Reg_eax; | 3881 Eax = Traits::RegisterSet::Reg_eax; |
| 3867 break; | 3882 break; |
| 3868 case IceType_i16: | 3883 case IceType_i16: |
| 3869 Eax = Traits::RegisterSet::Reg_ax; | 3884 Eax = Traits::RegisterSet::Reg_ax; |
| 3870 break; | 3885 break; |
| 3871 case IceType_i8: | 3886 case IceType_i8: |
| 3872 Eax = Traits::RegisterSet::Reg_al; | 3887 Eax = Traits::RegisterSet::Reg_al; |
| 3873 break; | 3888 break; |
| 3874 } | 3889 } |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3923 // | 3938 // |
| 3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits | 3939 // Similar for 64-bit, but start w/ speculating that the upper 32 bits |
| 3925 // are all zero, and compute the result for that case (checking the lower | 3940 // are all zero, and compute the result for that case (checking the lower |
| 3926 // 32 bits). Then actually compute the result for the upper bits and | 3941 // 32 bits). Then actually compute the result for the upper bits and |
| 3927 // cmov in the result from the lower computation if the earlier speculation | 3942 // cmov in the result from the lower computation if the earlier speculation |
| 3928 // was correct. | 3943 // was correct. |
| 3929 // | 3944 // |
| 3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor | 3945 // Cttz, is similar, but uses bsf instead, and doesn't require the xor |
| 3931 // bit position conversion, and the speculation is reversed. | 3946 // bit position conversion, and the speculation is reversed. |
| 3932 assert(Ty == IceType_i32 || Ty == IceType_i64); | 3947 assert(Ty == IceType_i32 || Ty == IceType_i64); |
| 3933 Variable *T = makeReg(IceType_i32); | 3948 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32; |
| 3949 Variable *T = makeReg(DestTy); |
| 3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); | 3950 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); |
| 3935 if (Cttz) { | 3951 if (Cttz) { |
| 3936 _bsf(T, FirstValRM); | 3952 _bsf(T, FirstValRM); |
| 3937 } else { | 3953 } else { |
| 3938 _bsr(T, FirstValRM); | 3954 _bsr(T, FirstValRM); |
| 3939 } | 3955 } |
| 3940 Variable *T_Dest = makeReg(IceType_i32); | 3956 Variable *T_Dest = makeReg(DestTy); |
| 3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 3957 Constant *_31 = Ctx->getConstantInt32(31); |
| 3942 Constant *ThirtyOne = Ctx->getConstantInt32(31); | 3958 Constant *_32 = Ctx->getConstantInt(DestTy, 32); |
| 3943 if (Cttz) { | 3959 if (Cttz) { |
| 3944 _mov(T_Dest, ThirtyTwo); | 3960 _mov(T_Dest, _32); |
| 3945 } else { | 3961 } else { |
| 3946 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3962 Constant *_63 = Ctx->getConstantInt(DestTy, 63); |
| 3947 _mov(T_Dest, SixtyThree); | 3963 _mov(T_Dest, _63); |
| 3948 } | 3964 } |
| 3949 _cmov(T_Dest, T, Traits::Cond::Br_ne); | 3965 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
| 3950 if (!Cttz) { | 3966 if (!Cttz) { |
| 3951 _xor(T_Dest, ThirtyOne); | 3967 _xor(T_Dest, _31); |
| 3952 } | 3968 } |
| 3953 if (Traits::Is64Bit || Ty == IceType_i32) { | 3969 if (Traits::Is64Bit || Ty == IceType_i32) { |
| 3954 _mov(Dest, T_Dest); | 3970 _mov(Dest, T_Dest); |
| 3955 return; | 3971 return; |
| 3956 } | 3972 } |
| 3957 _add(T_Dest, ThirtyTwo); | 3973 _add(T_Dest, _32); |
| 3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3974 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 3960 // Will be using "test" on this, so we need a registerized variable. | 3976 // Will be using "test" on this, so we need a registerized variable. |
| 3961 Variable *SecondVar = legalizeToReg(SecondVal); | 3977 Variable *SecondVar = legalizeToReg(SecondVal); |
| 3962 Variable *T_Dest2 = makeReg(IceType_i32); | 3978 Variable *T_Dest2 = makeReg(IceType_i32); |
| 3963 if (Cttz) { | 3979 if (Cttz) { |
| 3964 _bsf(T_Dest2, SecondVar); | 3980 _bsf(T_Dest2, SecondVar); |
| 3965 } else { | 3981 } else { |
| 3966 _bsr(T_Dest2, SecondVar); | 3982 _bsr(T_Dest2, SecondVar); |
| 3967 _xor(T_Dest2, ThirtyOne); | 3983 _xor(T_Dest2, _31); |
| 3968 } | 3984 } |
| 3969 _test(SecondVar, SecondVar); | 3985 _test(SecondVar, SecondVar); |
| 3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3986 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3971 _mov(DestLo, T_Dest2); | 3987 _mov(DestLo, T_Dest2); |
| 3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3988 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3973 } | 3989 } |
| 3974 | 3990 |
| 3975 template <class Machine> | 3991 template <class Machine> |
| 3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, | 3992 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, |
| 3977 Constant *Offset) { | 3993 Constant *Offset) { |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4171 | 4187 |
| 4172 // TODO(ascull): is 64-bit better with vector or scalar movq? | 4188 // TODO(ascull): is 64-bit better with vector or scalar movq? |
| 4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4189 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); |
| 4174 if (isVectorType(Ty)) { | 4190 if (isVectorType(Ty)) { |
| 4175 assert(VecReg != nullptr); | 4191 assert(VecReg != nullptr); |
| 4176 _storep(VecReg, Mem); | 4192 _storep(VecReg, Mem); |
| 4177 } else if (Ty == IceType_f64) { | 4193 } else if (Ty == IceType_f64) { |
| 4178 assert(VecReg != nullptr); | 4194 assert(VecReg != nullptr); |
| 4179 _storeq(VecReg, Mem); | 4195 _storeq(VecReg, Mem); |
| 4180 } else { | 4196 } else { |
| 4197 assert(Ty != IceType_i64); |
| 4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | 4198 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
| 4182 } | 4199 } |
| 4183 }; | 4200 }; |
| 4184 | 4201 |
| 4185 // Find the largest type that can be used and use it as much as possible in | 4202 // Find the largest type that can be used and use it as much as possible in |
| 4186 // reverse order. Then handle any remainder with overlapping copies. Since | 4203 // reverse order. Then handle any remainder with overlapping copies. Since |
| 4187 // the remainder will be at the end, there will be reduces pressure on the | 4204 // the remainder will be at the end, there will be reduces pressure on the |
| 4188 // memory unit as the access to the same memory are far apart. | 4205 // memory unit as the access to the same memory are far apart. |
| 4189 Type Ty; | 4206 Type Ty; |
| 4190 if (ValValue == 0 && CountValue >= BytesPerStoreq && | 4207 if (ValValue == 0 && CountValue >= BytesPerStoreq && |
| (...skipping 1937 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6128 Variable *Reg = Func->makeVariable(Type); | 6145 Variable *Reg = Func->makeVariable(Type); |
| 6129 if (RegNum == Variable::NoRegister) | 6146 if (RegNum == Variable::NoRegister) |
| 6130 Reg->setMustHaveReg(); | 6147 Reg->setMustHaveReg(); |
| 6131 else | 6148 else |
| 6132 Reg->setRegNum(RegNum); | 6149 Reg->setRegNum(RegNum); |
| 6133 return Reg; | 6150 return Reg; |
| 6134 } | 6151 } |
| 6135 | 6152 |
| 6136 template <class Machine> | 6153 template <class Machine> |
| 6137 const Type TargetX86Base<Machine>::TypeForSize[] = { | 6154 const Type TargetX86Base<Machine>::TypeForSize[] = { |
| 6138 IceType_i8, IceType_i16, IceType_i32, | 6155 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; |
| 6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8}; | |
| 6140 template <class Machine> | 6156 template <class Machine> |
| 6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, | 6157 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, |
| 6142 uint32_t MaxSize) { | 6158 uint32_t MaxSize) { |
| 6143 assert(Size != 0); | 6159 assert(Size != 0); |
| 6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6160 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
| 6145 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6161 uint32_t MaxIndex = MaxSize == NoSizeLimit |
| 6146 ? llvm::array_lengthof(TypeForSize) - 1 | 6162 ? llvm::array_lengthof(TypeForSize) - 1 |
| 6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6163 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
| 6148 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6164 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
| 6149 } | 6165 } |
| (...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6403 } | 6419 } |
| 6404 // the offset is not eligible for blinding or pooling, return the original | 6420 // the offset is not eligible for blinding or pooling, return the original |
| 6405 // mem operand | 6421 // mem operand |
| 6406 return MemOperand; | 6422 return MemOperand; |
| 6407 } | 6423 } |
| 6408 | 6424 |
| 6409 } // end of namespace X86Internal | 6425 } // end of namespace X86Internal |
| 6410 } // end of namespace Ice | 6426 } // end of namespace Ice |
| 6411 | 6427 |
| 6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6428 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |