OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 /// This generally means that its lowering sequence requires more than one | 180 /// This generally means that its lowering sequence requires more than one |
181 /// conditional branch, namely 64-bit integer compares and some floating-point | 181 /// conditional branch, namely 64-bit integer compares and some floating-point |
182 /// compares. When this is true, and there is more than one consumer, we prefer | 182 /// compares. When this is true, and there is more than one consumer, we prefer |
183 /// to disable the folding optimization because it minimizes branches. | 183 /// to disable the folding optimization because it minimizes branches. |
184 template <class MachineTraits> | 184 template <class MachineTraits> |
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
186 switch (getProducerKind(Instr)) { | 186 switch (getProducerKind(Instr)) { |
187 default: | 187 default: |
188 return false; | 188 return false; |
189 case PK_Icmp64: | 189 case PK_Icmp64: |
190 return true; | 190 return !MachineTraits::Is64Bit; |
191 case PK_Fcmp: | 191 case PK_Fcmp: |
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
193 .C2 != MachineTraits::Cond::Br_None; | 193 .C2 != MachineTraits::Cond::Br_None; |
194 } | 194 } |
195 } | 195 } |
196 | 196 |
197 template <class MachineTraits> | 197 template <class MachineTraits> |
198 bool BoolFolding<MachineTraits>::isValidFolding( | 198 bool BoolFolding<MachineTraits>::isValidFolding( |
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, | 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, |
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { | 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { |
(...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
758 template <class Machine> | 758 template <class Machine> |
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { |
761 return Br->optimizeBranch(NextNode); | 761 return Br->optimizeBranch(NextNode); |
762 } | 762 } |
763 return false; | 763 return false; |
764 } | 764 } |
765 | 765 |
766 template <class Machine> | 766 template <class Machine> |
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
768 // Special case: never allow partial reads/writes to/from %rBP and %rSP. | |
769 if (RegNum == Traits::RegisterSet::Reg_esp || | |
770 RegNum == Traits::RegisterSet::Reg_ebp) | |
771 Ty = Traits::WordType; | |
772 if (Ty == IceType_void) | 768 if (Ty == IceType_void) |
773 Ty = IceType_i32; | 769 Ty = IceType_i32; |
774 if (PhysicalRegisters[Ty].empty()) | 770 if (PhysicalRegisters[Ty].empty()) |
775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
776 assert(RegNum < PhysicalRegisters[Ty].size()); | 772 assert(RegNum < PhysicalRegisters[Ty].size()); |
777 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 773 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
778 if (Reg == nullptr) { | 774 if (Reg == nullptr) { |
779 Reg = Func->makeVariable(Ty); | 775 Reg = Func->makeVariable(Ty); |
780 Reg->setRegNum(RegNum); | 776 Reg->setRegNum(RegNum); |
781 PhysicalRegisters[Ty][RegNum] = Reg; | 777 PhysicalRegisters[Ty][RegNum] = Reg; |
(...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 987 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; | 988 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; | 989 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; |
994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); | 990 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); |
995 const bool UseFramePointer = | 991 const bool UseFramePointer = |
996 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 992 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
997 | 993 |
998 if (UseFramePointer) | 994 if (UseFramePointer) |
999 setHasFramePointer(); | 995 setHasFramePointer(); |
1000 | 996 |
1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 997 Variable *esp = getPhysicalRegister(getStackReg()); |
1002 if (OverAligned) { | 998 if (OverAligned) { |
1003 _and(esp, Ctx->getConstantInt32(-Alignment)); | 999 _and(esp, Ctx->getConstantInt32(-Alignment)); |
1004 } | 1000 } |
1005 | 1001 |
1006 Variable *Dest = Inst->getDest(); | 1002 Variable *Dest = Inst->getDest(); |
1007 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1003 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
1008 | 1004 |
1009 if (const auto *ConstantTotalSize = | 1005 if (const auto *ConstantTotalSize = |
1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1006 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1011 const uint32_t Value = | 1007 const uint32_t Value = |
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1706 _mov(T, Src0); | 1702 _mov(T, Src0); |
1707 if (!llvm::isa<ConstantInteger32>(Src1)) | 1703 if (!llvm::isa<ConstantInteger32>(Src1)) |
1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); | 1704 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
1709 _sar(T, Src1); | 1705 _sar(T, Src1); |
1710 _mov(Dest, T); | 1706 _mov(Dest, T); |
1711 break; | 1707 break; |
1712 case InstArithmetic::Udiv: { | 1708 case InstArithmetic::Udiv: { |
1713 // div and idiv are the few arithmetic operators that do not allow | 1709 // div and idiv are the few arithmetic operators that do not allow |
1714 // immediates as the operand. | 1710 // immediates as the operand. |
1715 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1711 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1716 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1712 uint32_t Eax; |
1717 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1713 uint32_t Edx; |
1718 switch (Ty) { | 1714 switch (Ty) { |
1719 default: | 1715 default: |
1720 llvm_unreachable("Bad type for udiv"); | 1716 llvm::report_fatal_error("Bad type for udiv"); |
1721 // fallthrough | 1717 case IceType_i64: |
| 1718 Eax = Traits::getRaxOrDie(); |
| 1719 Edx = Traits::getRdxOrDie(); |
1722 case IceType_i32: | 1720 case IceType_i32: |
| 1721 Eax = Traits::RegisterSet::Reg_eax; |
| 1722 Edx = Traits::RegisterSet::Reg_edx; |
1723 break; | 1723 break; |
1724 case IceType_i16: | 1724 case IceType_i16: |
1725 Eax = Traits::RegisterSet::Reg_ax; | 1725 Eax = Traits::RegisterSet::Reg_ax; |
1726 Edx = Traits::RegisterSet::Reg_dx; | 1726 Edx = Traits::RegisterSet::Reg_dx; |
1727 break; | 1727 break; |
1728 case IceType_i8: | 1728 case IceType_i8: |
1729 Eax = Traits::RegisterSet::Reg_al; | 1729 Eax = Traits::RegisterSet::Reg_al; |
1730 Edx = Traits::RegisterSet::Reg_ah; | 1730 Edx = Traits::RegisterSet::Reg_ah; |
1731 break; | 1731 break; |
1732 } | 1732 } |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
1767 } | 1767 } |
1768 _mov(Dest, T); | 1768 _mov(Dest, T); |
1769 return; | 1769 return; |
1770 } | 1770 } |
1771 } | 1771 } |
1772 } | 1772 } |
1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1774 switch (Ty) { | 1774 switch (Ty) { |
1775 default: | 1775 default: |
1776 llvm_unreachable("Bad type for sdiv"); | 1776 llvm::report_fatal_error("Bad type for sdiv"); |
1777 // fallthrough | 1777 case IceType_i64: |
| 1778 T_edx = makeReg(Ty, Traits::getRdxOrDie()); |
| 1779 _mov(T, Src0, Traits::getRaxOrDie()); |
| 1780 break; |
1778 case IceType_i32: | 1781 case IceType_i32: |
1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 1782 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1783 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1781 break; | 1784 break; |
1782 case IceType_i16: | 1785 case IceType_i16: |
1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 1786 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 1787 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
1785 break; | 1788 break; |
1786 case IceType_i8: | 1789 case IceType_i8: |
1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 1790 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
1788 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1791 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1789 break; | 1792 break; |
1790 } | 1793 } |
1791 _cbwdq(T_edx, T); | 1794 _cbwdq(T_edx, T); |
1792 _idiv(T, Src1, T_edx); | 1795 _idiv(T, Src1, T_edx); |
1793 _mov(Dest, T); | 1796 _mov(Dest, T); |
1794 break; | 1797 break; |
1795 case InstArithmetic::Urem: { | 1798 case InstArithmetic::Urem: { |
1796 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1799 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1797 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1800 uint32_t Eax; |
1798 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1801 uint32_t Edx; |
1799 switch (Ty) { | 1802 switch (Ty) { |
1800 default: | 1803 default: |
1801 llvm_unreachable("Bad type for urem"); | 1804 llvm::report_fatal_error("Bad type for urem"); |
1802 // fallthrough | 1805 case IceType_i64: |
| 1806 Eax = Traits::getRaxOrDie(); |
| 1807 Edx = Traits::getRdxOrDie(); |
| 1808 break; |
1803 case IceType_i32: | 1809 case IceType_i32: |
| 1810 Eax = Traits::RegisterSet::Reg_eax; |
| 1811 Edx = Traits::RegisterSet::Reg_edx; |
1804 break; | 1812 break; |
1805 case IceType_i16: | 1813 case IceType_i16: |
1806 Eax = Traits::RegisterSet::Reg_ax; | 1814 Eax = Traits::RegisterSet::Reg_ax; |
1807 Edx = Traits::RegisterSet::Reg_dx; | 1815 Edx = Traits::RegisterSet::Reg_dx; |
1808 break; | 1816 break; |
1809 case IceType_i8: | 1817 case IceType_i8: |
1810 Eax = Traits::RegisterSet::Reg_al; | 1818 Eax = Traits::RegisterSet::Reg_al; |
1811 Edx = Traits::RegisterSet::Reg_ah; | 1819 Edx = Traits::RegisterSet::Reg_ah; |
1812 break; | 1820 break; |
1813 } | 1821 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1851 _add(T, Src0); | 1859 _add(T, Src0); |
1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1860 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
1853 _sub(T, Src0); | 1861 _sub(T, Src0); |
1854 _neg(T); | 1862 _neg(T); |
1855 _mov(Dest, T); | 1863 _mov(Dest, T); |
1856 return; | 1864 return; |
1857 } | 1865 } |
1858 } | 1866 } |
1859 } | 1867 } |
1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1868 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1861 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1869 uint32_t Eax; |
1862 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1870 uint32_t Edx; |
1863 switch (Ty) { | 1871 switch (Ty) { |
1864 default: | 1872 default: |
1865 llvm_unreachable("Bad type for srem"); | 1873 llvm::report_fatal_error("Bad type for srem"); |
1866 // fallthrough | 1874 case IceType_i64: |
| 1875 Eax = Traits::getRaxOrDie(); |
| 1876 Edx = Traits::getRdxOrDie(); |
| 1877 break; |
1867 case IceType_i32: | 1878 case IceType_i32: |
| 1879 Eax = Traits::RegisterSet::Reg_eax; |
| 1880 Edx = Traits::RegisterSet::Reg_edx; |
1868 break; | 1881 break; |
1869 case IceType_i16: | 1882 case IceType_i16: |
1870 Eax = Traits::RegisterSet::Reg_ax; | 1883 Eax = Traits::RegisterSet::Reg_ax; |
1871 Edx = Traits::RegisterSet::Reg_dx; | 1884 Edx = Traits::RegisterSet::Reg_dx; |
1872 break; | 1885 break; |
1873 case IceType_i8: | 1886 case IceType_i8: |
1874 Eax = Traits::RegisterSet::Reg_al; | 1887 Eax = Traits::RegisterSet::Reg_al; |
1875 Edx = Traits::RegisterSet::Reg_ah; | 1888 Edx = Traits::RegisterSet::Reg_ah; |
1876 break; | 1889 break; |
1877 } | 1890 } |
(...skipping 1653 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3531 } | 3544 } |
3532 case Intrinsics::Sqrt: { | 3545 case Intrinsics::Sqrt: { |
3533 Operand *Src = legalize(Instr->getArg(0)); | 3546 Operand *Src = legalize(Instr->getArg(0)); |
3534 Variable *Dest = Instr->getDest(); | 3547 Variable *Dest = Instr->getDest(); |
3535 Variable *T = makeReg(Dest->getType()); | 3548 Variable *T = makeReg(Dest->getType()); |
3536 _sqrtss(T, Src); | 3549 _sqrtss(T, Src); |
3537 _mov(Dest, T); | 3550 _mov(Dest, T); |
3538 return; | 3551 return; |
3539 } | 3552 } |
3540 case Intrinsics::Stacksave: { | 3553 case Intrinsics::Stacksave: { |
3541 Variable *esp = | 3554 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
3543 Variable *Dest = Instr->getDest(); | 3555 Variable *Dest = Instr->getDest(); |
3544 _mov(Dest, esp); | 3556 _mov(Dest, esp); |
3545 return; | 3557 return; |
3546 } | 3558 } |
3547 case Intrinsics::Stackrestore: { | 3559 case Intrinsics::Stackrestore: { |
3548 Variable *esp = | 3560 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
3550 _redefined(_mov(esp, Instr->getArg(0))); | 3561 _redefined(_mov(esp, Instr->getArg(0))); |
3551 return; | 3562 return; |
3552 } | 3563 } |
3553 case Intrinsics::Trap: | 3564 case Intrinsics::Trap: |
3554 _ud2(); | 3565 _ud2(); |
3555 return; | 3566 return; |
3556 case Intrinsics::UnknownIntrinsic: | 3567 case Intrinsics::UnknownIntrinsic: |
3557 Func->setError("Should not be lowering UnknownIntrinsic"); | 3568 Func->setError("Should not be lowering UnknownIntrinsic"); |
3558 return; | 3569 return; |
3559 } | 3570 } |
(...skipping 21 matching lines...) Expand all Loading... |
3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3592 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3593 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3594 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3584 _mov(DestLo, T_eax); | 3595 _mov(DestLo, T_eax); |
3585 _mov(DestHi, T_edx); | 3596 _mov(DestHi, T_edx); |
3586 return; | 3597 return; |
3587 } | 3598 } |
3588 int32_t Eax; | 3599 int32_t Eax; |
3589 switch (Ty) { | 3600 switch (Ty) { |
3590 default: | 3601 default: |
3591 llvm_unreachable("Bad type for cmpxchg"); | 3602 llvm::report_fatal_error("Bad type for cmpxchg"); |
3592 // fallthrough | 3603 case IceType_i64: |
| 3604 Eax = Traits::getRaxOrDie(); |
| 3605 break; |
3593 case IceType_i32: | 3606 case IceType_i32: |
3594 Eax = Traits::RegisterSet::Reg_eax; | 3607 Eax = Traits::RegisterSet::Reg_eax; |
3595 break; | 3608 break; |
3596 case IceType_i16: | 3609 case IceType_i16: |
3597 Eax = Traits::RegisterSet::Reg_ax; | 3610 Eax = Traits::RegisterSet::Reg_ax; |
3598 break; | 3611 break; |
3599 case IceType_i8: | 3612 case IceType_i8: |
3600 Eax = Traits::RegisterSet::Reg_al; | 3613 Eax = Traits::RegisterSet::Reg_al; |
3601 break; | 3614 break; |
3602 } | 3615 } |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3866 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3867 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3855 _mov(DestLo, T_eax); | 3868 _mov(DestLo, T_eax); |
3856 _mov(DestHi, T_edx); | 3869 _mov(DestHi, T_edx); |
3857 return; | 3870 return; |
3858 } | 3871 } |
3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3872 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3860 int32_t Eax; | 3873 int32_t Eax; |
3861 switch (Ty) { | 3874 switch (Ty) { |
3862 default: | 3875 default: |
3863 llvm_unreachable("Bad type for atomicRMW"); | 3876 llvm::report_fatal_error("Bad type for atomicRMW"); |
3864 // fallthrough | 3877 case IceType_i64: |
| 3878 Eax = Traits::getRaxOrDie(); |
| 3879 break; |
3865 case IceType_i32: | 3880 case IceType_i32: |
3866 Eax = Traits::RegisterSet::Reg_eax; | 3881 Eax = Traits::RegisterSet::Reg_eax; |
3867 break; | 3882 break; |
3868 case IceType_i16: | 3883 case IceType_i16: |
3869 Eax = Traits::RegisterSet::Reg_ax; | 3884 Eax = Traits::RegisterSet::Reg_ax; |
3870 break; | 3885 break; |
3871 case IceType_i8: | 3886 case IceType_i8: |
3872 Eax = Traits::RegisterSet::Reg_al; | 3887 Eax = Traits::RegisterSet::Reg_al; |
3873 break; | 3888 break; |
3874 } | 3889 } |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3923 // | 3938 // |
3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits | 3939 // Similar for 64-bit, but start w/ speculating that the upper 32 bits |
3925 // are all zero, and compute the result for that case (checking the lower | 3940 // are all zero, and compute the result for that case (checking the lower |
3926 // 32 bits). Then actually compute the result for the upper bits and | 3941 // 32 bits). Then actually compute the result for the upper bits and |
3927 // cmov in the result from the lower computation if the earlier speculation | 3942 // cmov in the result from the lower computation if the earlier speculation |
3928 // was correct. | 3943 // was correct. |
3929 // | 3944 // |
3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor | 3945 // Cttz, is similar, but uses bsf instead, and doesn't require the xor |
3931 // bit position conversion, and the speculation is reversed. | 3946 // bit position conversion, and the speculation is reversed. |
3932 assert(Ty == IceType_i32 || Ty == IceType_i64); | 3947 assert(Ty == IceType_i32 || Ty == IceType_i64); |
3933 Variable *T = makeReg(IceType_i32); | 3948 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32; |
| 3949 Variable *T = makeReg(DestTy); |
3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); | 3950 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); |
3935 if (Cttz) { | 3951 if (Cttz) { |
3936 _bsf(T, FirstValRM); | 3952 _bsf(T, FirstValRM); |
3937 } else { | 3953 } else { |
3938 _bsr(T, FirstValRM); | 3954 _bsr(T, FirstValRM); |
3939 } | 3955 } |
3940 Variable *T_Dest = makeReg(IceType_i32); | 3956 Variable *T_Dest = makeReg(DestTy); |
3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 3957 Constant *_31 = Ctx->getConstantInt32(31); |
3942 Constant *ThirtyOne = Ctx->getConstantInt32(31); | 3958 Constant *_32 = Ctx->getConstantInt(DestTy, 32); |
3943 if (Cttz) { | 3959 if (Cttz) { |
3944 _mov(T_Dest, ThirtyTwo); | 3960 _mov(T_Dest, _32); |
3945 } else { | 3961 } else { |
3946 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3962 Constant *_63 = Ctx->getConstantInt(DestTy, 63); |
3947 _mov(T_Dest, SixtyThree); | 3963 _mov(T_Dest, _63); |
3948 } | 3964 } |
3949 _cmov(T_Dest, T, Traits::Cond::Br_ne); | 3965 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
3950 if (!Cttz) { | 3966 if (!Cttz) { |
3951 _xor(T_Dest, ThirtyOne); | 3967 _xor(T_Dest, _31); |
3952 } | 3968 } |
3953 if (Traits::Is64Bit || Ty == IceType_i32) { | 3969 if (Traits::Is64Bit || Ty == IceType_i32) { |
3954 _mov(Dest, T_Dest); | 3970 _mov(Dest, T_Dest); |
3955 return; | 3971 return; |
3956 } | 3972 } |
3957 _add(T_Dest, ThirtyTwo); | 3973 _add(T_Dest, _32); |
3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3974 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3960 // Will be using "test" on this, so we need a registerized variable. | 3976 // Will be using "test" on this, so we need a registerized variable. |
3961 Variable *SecondVar = legalizeToReg(SecondVal); | 3977 Variable *SecondVar = legalizeToReg(SecondVal); |
3962 Variable *T_Dest2 = makeReg(IceType_i32); | 3978 Variable *T_Dest2 = makeReg(IceType_i32); |
3963 if (Cttz) { | 3979 if (Cttz) { |
3964 _bsf(T_Dest2, SecondVar); | 3980 _bsf(T_Dest2, SecondVar); |
3965 } else { | 3981 } else { |
3966 _bsr(T_Dest2, SecondVar); | 3982 _bsr(T_Dest2, SecondVar); |
3967 _xor(T_Dest2, ThirtyOne); | 3983 _xor(T_Dest2, _31); |
3968 } | 3984 } |
3969 _test(SecondVar, SecondVar); | 3985 _test(SecondVar, SecondVar); |
3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3986 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
3971 _mov(DestLo, T_Dest2); | 3987 _mov(DestLo, T_Dest2); |
3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3988 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
3973 } | 3989 } |
3974 | 3990 |
3975 template <class Machine> | 3991 template <class Machine> |
3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, | 3992 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, |
3977 Constant *Offset) { | 3993 Constant *Offset) { |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4171 | 4187 |
4172 // TODO(ascull): is 64-bit better with vector or scalar movq? | 4188 // TODO(ascull): is 64-bit better with vector or scalar movq? |
4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4189 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); |
4174 if (isVectorType(Ty)) { | 4190 if (isVectorType(Ty)) { |
4175 assert(VecReg != nullptr); | 4191 assert(VecReg != nullptr); |
4176 _storep(VecReg, Mem); | 4192 _storep(VecReg, Mem); |
4177 } else if (Ty == IceType_f64) { | 4193 } else if (Ty == IceType_f64) { |
4178 assert(VecReg != nullptr); | 4194 assert(VecReg != nullptr); |
4179 _storeq(VecReg, Mem); | 4195 _storeq(VecReg, Mem); |
4180 } else { | 4196 } else { |
| 4197 assert(Ty != IceType_i64); |
4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | 4198 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
4182 } | 4199 } |
4183 }; | 4200 }; |
4184 | 4201 |
4185 // Find the largest type that can be used and use it as much as possible in | 4202 // Find the largest type that can be used and use it as much as possible in |
4186 // reverse order. Then handle any remainder with overlapping copies. Since | 4203 // reverse order. Then handle any remainder with overlapping copies. Since |
4187 // the remainder will be at the end, there will be reduces pressure on the | 4204 // the remainder will be at the end, there will be reduces pressure on the |
4188 // memory unit as the access to the same memory are far apart. | 4205 // memory unit as the access to the same memory are far apart. |
4189 Type Ty; | 4206 Type Ty; |
4190 if (ValValue == 0 && CountValue >= BytesPerStoreq && | 4207 if (ValValue == 0 && CountValue >= BytesPerStoreq && |
(...skipping 1937 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6128 Variable *Reg = Func->makeVariable(Type); | 6145 Variable *Reg = Func->makeVariable(Type); |
6129 if (RegNum == Variable::NoRegister) | 6146 if (RegNum == Variable::NoRegister) |
6130 Reg->setMustHaveReg(); | 6147 Reg->setMustHaveReg(); |
6131 else | 6148 else |
6132 Reg->setRegNum(RegNum); | 6149 Reg->setRegNum(RegNum); |
6133 return Reg; | 6150 return Reg; |
6134 } | 6151 } |
6135 | 6152 |
6136 template <class Machine> | 6153 template <class Machine> |
6137 const Type TargetX86Base<Machine>::TypeForSize[] = { | 6154 const Type TargetX86Base<Machine>::TypeForSize[] = { |
6138 IceType_i8, IceType_i16, IceType_i32, | 6155 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; |
6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8}; | |
6140 template <class Machine> | 6156 template <class Machine> |
6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, | 6157 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, |
6142 uint32_t MaxSize) { | 6158 uint32_t MaxSize) { |
6143 assert(Size != 0); | 6159 assert(Size != 0); |
6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6160 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
6145 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6161 uint32_t MaxIndex = MaxSize == NoSizeLimit |
6146 ? llvm::array_lengthof(TypeForSize) - 1 | 6162 ? llvm::array_lengthof(TypeForSize) - 1 |
6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6163 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
6148 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6164 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
6149 } | 6165 } |
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6403 } | 6419 } |
6404 // the offset is not eligible for blinding or pooling, return the original | 6420 // the offset is not eligible for blinding or pooling, return the original |
6405 // mem operand | 6421 // mem operand |
6406 return MemOperand; | 6422 return MemOperand; |
6407 } | 6423 } |
6408 | 6424 |
6409 } // end of namespace X86Internal | 6425 } // end of namespace X86Internal |
6410 } // end of namespace Ice | 6426 } // end of namespace Ice |
6411 | 6427 |
6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6428 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |