OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
180 /// This generally means that its lowering sequence requires more than one | 180 /// This generally means that its lowering sequence requires more than one |
181 /// conditional branch, namely 64-bit integer compares and some floating-point | 181 /// conditional branch, namely 64-bit integer compares and some floating-point |
182 /// compares. When this is true, and there is more than one consumer, we prefer | 182 /// compares. When this is true, and there is more than one consumer, we prefer |
183 /// to disable the folding optimization because it minimizes branches. | 183 /// to disable the folding optimization because it minimizes branches. |
184 template <class MachineTraits> | 184 template <class MachineTraits> |
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { | 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
186 switch (getProducerKind(Instr)) { | 186 switch (getProducerKind(Instr)) { |
187 default: | 187 default: |
188 return false; | 188 return false; |
189 case PK_Icmp64: | 189 case PK_Icmp64: |
190 return true; | 190 return !MachineTraits::Is64Bit; |
191 case PK_Fcmp: | 191 case PK_Fcmp: |
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] | 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] |
193 .C2 != MachineTraits::Cond::Br_None; | 193 .C2 != MachineTraits::Cond::Br_None; |
194 } | 194 } |
195 } | 195 } |
196 | 196 |
197 template <class MachineTraits> | 197 template <class MachineTraits> |
198 bool BoolFolding<MachineTraits>::isValidFolding( | 198 bool BoolFolding<MachineTraits>::isValidFolding( |
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, | 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, |
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { | 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { |
(...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { | 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { |
761 return Br->optimizeBranch(NextNode); | 761 return Br->optimizeBranch(NextNode); |
762 } | 762 } |
763 return false; | 763 return false; |
764 } | 764 } |
765 | 765 |
766 template <class Machine> | 766 template <class Machine> |
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { | 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { |
768 // Special case: never allow partial reads/writes to/from %rBP and %rSP. | 768 // Special case: never allow partial reads/writes to/from %rBP and %rSP. |
769 if (RegNum == Traits::RegisterSet::Reg_esp || | 769 if (RegNum == getStackReg() || RegNum == getFrameReg()) |
770 RegNum == Traits::RegisterSet::Reg_ebp) | |
771 Ty = Traits::WordType; | 770 Ty = Traits::WordType; |
772 if (Ty == IceType_void) | 771 if (Ty == IceType_void) |
773 Ty = IceType_i32; | 772 Ty = IceType_i32; |
774 if (PhysicalRegisters[Ty].empty()) | 773 if (PhysicalRegisters[Ty].empty()) |
775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); | 774 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); |
776 assert(RegNum < PhysicalRegisters[Ty].size()); | 775 assert(RegNum < PhysicalRegisters[Ty].size()); |
777 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 776 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
778 if (Reg == nullptr) { | 777 if (Reg == nullptr) { |
779 Reg = Func->makeVariable(Ty); | 778 Reg = Func->makeVariable(Ty); |
780 Reg->setRegNum(RegNum); | 779 Reg->setRegNum(RegNum); |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); | 990 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); |
992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; | 991 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; |
993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; | 992 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; |
994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); | 993 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); |
995 const bool UseFramePointer = | 994 const bool UseFramePointer = |
996 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; | 995 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; |
997 | 996 |
998 if (UseFramePointer) | 997 if (UseFramePointer) |
999 setHasFramePointer(); | 998 setHasFramePointer(); |
1000 | 999 |
1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 1000 Variable *esp = getPhysicalRegister(getStackReg()); |
1002 if (OverAligned) { | 1001 if (OverAligned) { |
1003 _and(esp, Ctx->getConstantInt32(-Alignment)); | 1002 _and(esp, Ctx->getConstantInt32(-Alignment)); |
1004 } | 1003 } |
1005 | 1004 |
1006 Variable *Dest = Inst->getDest(); | 1005 Variable *Dest = Inst->getDest(); |
1007 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1006 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
1008 | 1007 |
1009 if (const auto *ConstantTotalSize = | 1008 if (const auto *ConstantTotalSize = |
1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1009 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1011 const uint32_t Value = | 1010 const uint32_t Value = |
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1706 _mov(T, Src0); | 1705 _mov(T, Src0); |
1707 if (!llvm::isa<ConstantInteger32>(Src1)) | 1706 if (!llvm::isa<ConstantInteger32>(Src1)) |
1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); | 1707 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); |
1709 _sar(T, Src1); | 1708 _sar(T, Src1); |
1710 _mov(Dest, T); | 1709 _mov(Dest, T); |
1711 break; | 1710 break; |
1712 case InstArithmetic::Udiv: { | 1711 case InstArithmetic::Udiv: { |
1713 // div and idiv are the few arithmetic operators that do not allow | 1712 // div and idiv are the few arithmetic operators that do not allow |
1714 // immediates as the operand. | 1713 // immediates as the operand. |
1715 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1714 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1716 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1715 uint32_t Eax; |
1717 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1716 uint32_t Edx; |
1718 switch (Ty) { | 1717 switch (Ty) { |
1719 default: | 1718 default: |
1720 llvm_unreachable("Bad type for udiv"); | 1719 llvm::report_fatal_error("Bad type for udiv"); |
1721 // fallthrough | 1720 case IceType_i64: |
1721 Eax = Traits::getRaxOrDie(); | |
1722 Edx = Traits::getRdxOrDie(); | |
1722 case IceType_i32: | 1723 case IceType_i32: |
1724 Eax = Traits::RegisterSet::Reg_eax; | |
1725 Edx = Traits::RegisterSet::Reg_edx; | |
1723 break; | 1726 break; |
1724 case IceType_i16: | 1727 case IceType_i16: |
1725 Eax = Traits::RegisterSet::Reg_ax; | 1728 Eax = Traits::RegisterSet::Reg_ax; |
1726 Edx = Traits::RegisterSet::Reg_dx; | 1729 Edx = Traits::RegisterSet::Reg_dx; |
1727 break; | 1730 break; |
1728 case IceType_i8: | 1731 case IceType_i8: |
1729 Eax = Traits::RegisterSet::Reg_al; | 1732 Eax = Traits::RegisterSet::Reg_al; |
1730 Edx = Traits::RegisterSet::Reg_ah; | 1733 Edx = Traits::RegisterSet::Reg_ah; |
1731 break; | 1734 break; |
1732 } | 1735 } |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); | 1769 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); |
1767 } | 1770 } |
1768 _mov(Dest, T); | 1771 _mov(Dest, T); |
1769 return; | 1772 return; |
1770 } | 1773 } |
1771 } | 1774 } |
1772 } | 1775 } |
1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1776 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1774 switch (Ty) { | 1777 switch (Ty) { |
1775 default: | 1778 default: |
1776 llvm_unreachable("Bad type for sdiv"); | 1779 llvm::report_fatal_error("Bad type for sdiv"); |
1777 // fallthrough | 1780 case IceType_i64: |
1781 T_edx = makeReg(Ty, Traits::getRdxOrDie()); | |
1782 _mov(T, Src0, Traits::getRaxOrDie()); | |
1783 break; | |
1778 case IceType_i32: | 1784 case IceType_i32: |
1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); | 1785 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); |
1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax); | 1786 _mov(T, Src0, Traits::RegisterSet::Reg_eax); |
1781 break; | 1787 break; |
1782 case IceType_i16: | 1788 case IceType_i16: |
1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); | 1789 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); |
1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax); | 1790 _mov(T, Src0, Traits::RegisterSet::Reg_ax); |
1785 break; | 1791 break; |
1786 case IceType_i8: | 1792 case IceType_i8: |
1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); | 1793 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); |
1788 _mov(T, Src0, Traits::RegisterSet::Reg_al); | 1794 _mov(T, Src0, Traits::RegisterSet::Reg_al); |
1789 break; | 1795 break; |
1790 } | 1796 } |
1791 _cbwdq(T_edx, T); | 1797 _cbwdq(T_edx, T); |
1792 _idiv(T, Src1, T_edx); | 1798 _idiv(T, Src1, T_edx); |
1793 _mov(Dest, T); | 1799 _mov(Dest, T); |
1794 break; | 1800 break; |
1795 case InstArithmetic::Urem: { | 1801 case InstArithmetic::Urem: { |
1796 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1802 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1797 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1803 uint32_t Eax; |
1798 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1804 uint32_t Edx; |
1799 switch (Ty) { | 1805 switch (Ty) { |
1800 default: | 1806 default: |
1801 llvm_unreachable("Bad type for urem"); | 1807 llvm::report_fatal_error("Bad type for urem"); |
1802 // fallthrough | 1808 case IceType_i64: |
1809 Eax = Traits::getRaxOrDie(); | |
1810 Edx = Traits::getRdxOrDie(); | |
1811 break; | |
1803 case IceType_i32: | 1812 case IceType_i32: |
1813 Eax = Traits::RegisterSet::Reg_eax; | |
1814 Edx = Traits::RegisterSet::Reg_edx; | |
1804 break; | 1815 break; |
1805 case IceType_i16: | 1816 case IceType_i16: |
1806 Eax = Traits::RegisterSet::Reg_ax; | 1817 Eax = Traits::RegisterSet::Reg_ax; |
1807 Edx = Traits::RegisterSet::Reg_dx; | 1818 Edx = Traits::RegisterSet::Reg_dx; |
1808 break; | 1819 break; |
1809 case IceType_i8: | 1820 case IceType_i8: |
1810 Eax = Traits::RegisterSet::Reg_al; | 1821 Eax = Traits::RegisterSet::Reg_al; |
1811 Edx = Traits::RegisterSet::Reg_ah; | 1822 Edx = Traits::RegisterSet::Reg_ah; |
1812 break; | 1823 break; |
1813 } | 1824 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1851 _add(T, Src0); | 1862 _add(T, Src0); |
1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); | 1863 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); |
1853 _sub(T, Src0); | 1864 _sub(T, Src0); |
1854 _neg(T); | 1865 _neg(T); |
1855 _mov(Dest, T); | 1866 _mov(Dest, T); |
1856 return; | 1867 return; |
1857 } | 1868 } |
1858 } | 1869 } |
1859 } | 1870 } |
1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1871 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1861 uint32_t Eax = Traits::RegisterSet::Reg_eax; | 1872 uint32_t Eax; |
1862 uint32_t Edx = Traits::RegisterSet::Reg_edx; | 1873 uint32_t Edx; |
1863 switch (Ty) { | 1874 switch (Ty) { |
1864 default: | 1875 default: |
1865 llvm_unreachable("Bad type for srem"); | 1876 llvm::report_fatal_error("Bad type for srem"); |
1866 // fallthrough | 1877 case IceType_i64: |
1878 Eax = Traits::getRaxOrDie(); | |
1879 Edx = Traits::getRdxOrDie(); | |
1880 break; | |
1867 case IceType_i32: | 1881 case IceType_i32: |
1882 Eax = Traits::RegisterSet::Reg_eax; | |
1883 Edx = Traits::RegisterSet::Reg_edx; | |
1868 break; | 1884 break; |
1869 case IceType_i16: | 1885 case IceType_i16: |
1870 Eax = Traits::RegisterSet::Reg_ax; | 1886 Eax = Traits::RegisterSet::Reg_ax; |
1871 Edx = Traits::RegisterSet::Reg_dx; | 1887 Edx = Traits::RegisterSet::Reg_dx; |
1872 break; | 1888 break; |
1873 case IceType_i8: | 1889 case IceType_i8: |
1874 Eax = Traits::RegisterSet::Reg_al; | 1890 Eax = Traits::RegisterSet::Reg_al; |
1875 Edx = Traits::RegisterSet::Reg_ah; | 1891 Edx = Traits::RegisterSet::Reg_ah; |
1876 break; | 1892 break; |
1877 } | 1893 } |
(...skipping 945 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2823 | 2839 |
2824 _movp(Dest, T); | 2840 _movp(Dest, T); |
2825 eliminateNextVectorSextInstruction(Dest); | 2841 eliminateNextVectorSextInstruction(Dest); |
2826 } | 2842 } |
2827 | 2843 |
2828 template <typename Machine> | 2844 template <typename Machine> |
2829 template <typename T> | 2845 template <typename T> |
2830 typename std::enable_if<!T::Is64Bit, void>::type | 2846 typename std::enable_if<!T::Is64Bit, void>::type |
2831 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, | 2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, |
2832 const Inst *Consumer) { | 2848 const Inst *Consumer) { |
2849 assert(!T::Is64Bit); | |
Jim Stichnoth
2015/12/20 19:27:38
This assert seems completely unnecessary given the
John
2015/12/21 13:41:32
Done.
| |
2833 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2850 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
2834 Operand *Src0 = legalize(Icmp->getSrc(0)); | 2851 Operand *Src0 = legalize(Icmp->getSrc(0)); |
2835 Operand *Src1 = legalize(Icmp->getSrc(1)); | 2852 Operand *Src1 = legalize(Icmp->getSrc(1)); |
2836 Variable *Dest = Icmp->getDest(); | 2853 Variable *Dest = Icmp->getDest(); |
2837 InstIcmp::ICond Condition = Icmp->getCondition(); | 2854 InstIcmp::ICond Condition = Icmp->getCondition(); |
2838 size_t Index = static_cast<size_t>(Condition); | 2855 size_t Index = static_cast<size_t>(Condition); |
2839 assert(Index < Traits::TableIcmp64Size); | 2856 assert(Index < Traits::TableIcmp64Size); |
2840 Operand *Src0LoRM = nullptr; | 2857 Operand *Src0LoRM = nullptr; |
2841 Operand *Src0HiRM = nullptr; | 2858 Operand *Src0HiRM = nullptr; |
2842 // Legalize the portions of Src0 that are going to be needed. | 2859 // Legalize the portions of Src0 that are going to be needed. |
(...skipping 688 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3531 } | 3548 } |
3532 case Intrinsics::Sqrt: { | 3549 case Intrinsics::Sqrt: { |
3533 Operand *Src = legalize(Instr->getArg(0)); | 3550 Operand *Src = legalize(Instr->getArg(0)); |
3534 Variable *Dest = Instr->getDest(); | 3551 Variable *Dest = Instr->getDest(); |
3535 Variable *T = makeReg(Dest->getType()); | 3552 Variable *T = makeReg(Dest->getType()); |
3536 _sqrtss(T, Src); | 3553 _sqrtss(T, Src); |
3537 _mov(Dest, T); | 3554 _mov(Dest, T); |
3538 return; | 3555 return; |
3539 } | 3556 } |
3540 case Intrinsics::Stacksave: { | 3557 case Intrinsics::Stacksave: { |
3541 Variable *esp = | 3558 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
3543 Variable *Dest = Instr->getDest(); | 3559 Variable *Dest = Instr->getDest(); |
3544 _mov(Dest, esp); | 3560 _mov(Dest, esp); |
3545 return; | 3561 return; |
3546 } | 3562 } |
3547 case Intrinsics::Stackrestore: { | 3563 case Intrinsics::Stackrestore: { |
3548 Variable *esp = | 3564 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg()); |
3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
3550 _redefined(_mov(esp, Instr->getArg(0))); | 3565 _redefined(_mov(esp, Instr->getArg(0))); |
3551 return; | 3566 return; |
3552 } | 3567 } |
3553 case Intrinsics::Trap: | 3568 case Intrinsics::Trap: |
3554 _ud2(); | 3569 _ud2(); |
3555 return; | 3570 return; |
3556 case Intrinsics::UnknownIntrinsic: | 3571 case Intrinsics::UnknownIntrinsic: |
3557 Func->setError("Should not be lowering UnknownIntrinsic"); | 3572 Func->setError("Should not be lowering UnknownIntrinsic"); |
3558 return; | 3573 return; |
3559 } | 3574 } |
(...skipping 21 matching lines...) Expand all Loading... | |
3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3596 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3597 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3598 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3584 _mov(DestLo, T_eax); | 3599 _mov(DestLo, T_eax); |
3585 _mov(DestHi, T_edx); | 3600 _mov(DestHi, T_edx); |
3586 return; | 3601 return; |
3587 } | 3602 } |
3588 int32_t Eax; | 3603 int32_t Eax; |
3589 switch (Ty) { | 3604 switch (Ty) { |
3590 default: | 3605 default: |
3591 llvm_unreachable("Bad type for cmpxchg"); | 3606 llvm::report_fatal_error("Bad type for cmpxchg"); |
3592 // fallthrough | 3607 case IceType_i64: |
3608 Eax = Traits::getRaxOrDie(); | |
3609 break; | |
3593 case IceType_i32: | 3610 case IceType_i32: |
3594 Eax = Traits::RegisterSet::Reg_eax; | 3611 Eax = Traits::RegisterSet::Reg_eax; |
3595 break; | 3612 break; |
3596 case IceType_i16: | 3613 case IceType_i16: |
3597 Eax = Traits::RegisterSet::Reg_ax; | 3614 Eax = Traits::RegisterSet::Reg_ax; |
3598 break; | 3615 break; |
3599 case IceType_i8: | 3616 case IceType_i8: |
3600 Eax = Traits::RegisterSet::Reg_al; | 3617 Eax = Traits::RegisterSet::Reg_al; |
3601 break; | 3618 break; |
3602 } | 3619 } |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3870 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3871 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3855 _mov(DestLo, T_eax); | 3872 _mov(DestLo, T_eax); |
3856 _mov(DestHi, T_edx); | 3873 _mov(DestHi, T_edx); |
3857 return; | 3874 return; |
3858 } | 3875 } |
3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); | 3876 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); |
3860 int32_t Eax; | 3877 int32_t Eax; |
3861 switch (Ty) { | 3878 switch (Ty) { |
3862 default: | 3879 default: |
3863 llvm_unreachable("Bad type for atomicRMW"); | 3880 llvm::report_fatal_error("Bad type for atomicRMW"); |
3864 // fallthrough | 3881 case IceType_i64: |
3882 Eax = Traits::getRaxOrDie(); | |
3883 break; | |
3865 case IceType_i32: | 3884 case IceType_i32: |
3866 Eax = Traits::RegisterSet::Reg_eax; | 3885 Eax = Traits::RegisterSet::Reg_eax; |
3867 break; | 3886 break; |
3868 case IceType_i16: | 3887 case IceType_i16: |
3869 Eax = Traits::RegisterSet::Reg_ax; | 3888 Eax = Traits::RegisterSet::Reg_ax; |
3870 break; | 3889 break; |
3871 case IceType_i8: | 3890 case IceType_i8: |
3872 Eax = Traits::RegisterSet::Reg_al; | 3891 Eax = Traits::RegisterSet::Reg_al; |
3873 break; | 3892 break; |
3874 } | 3893 } |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3923 // | 3942 // |
3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits | 3943 // Similar for 64-bit, but start w/ speculating that the upper 32 bits |
3925 // are all zero, and compute the result for that case (checking the lower | 3944 // are all zero, and compute the result for that case (checking the lower |
3926 // 32 bits). Then actually compute the result for the upper bits and | 3945 // 32 bits). Then actually compute the result for the upper bits and |
3927 // cmov in the result from the lower computation if the earlier speculation | 3946 // cmov in the result from the lower computation if the earlier speculation |
3928 // was correct. | 3947 // was correct. |
3929 // | 3948 // |
3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor | 3949 // Cttz, is similar, but uses bsf instead, and doesn't require the xor |
3931 // bit position conversion, and the speculation is reversed. | 3950 // bit position conversion, and the speculation is reversed. |
3932 assert(Ty == IceType_i32 || Ty == IceType_i64); | 3951 assert(Ty == IceType_i32 || Ty == IceType_i64); |
3933 Variable *T = makeReg(IceType_i32); | 3952 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32; |
3953 Variable *T = makeReg(DestTy); | |
3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); | 3954 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); |
3935 if (Cttz) { | 3955 if (Cttz) { |
3936 _bsf(T, FirstValRM); | 3956 _bsf(T, FirstValRM); |
3937 } else { | 3957 } else { |
3938 _bsr(T, FirstValRM); | 3958 _bsr(T, FirstValRM); |
3939 } | 3959 } |
3940 Variable *T_Dest = makeReg(IceType_i32); | 3960 Variable *T_Dest = makeReg(DestTy); |
3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 3961 Constant *_31 = Ctx->getConstantInt32(31); |
3942 Constant *ThirtyOne = Ctx->getConstantInt32(31); | 3962 Constant *_32 = Ctx->getConstantInt(DestTy, 32); |
3943 if (Cttz) { | 3963 if (Cttz) { |
3944 _mov(T_Dest, ThirtyTwo); | 3964 _mov(T_Dest, _32); |
3945 } else { | 3965 } else { |
3946 Constant *SixtyThree = Ctx->getConstantInt32(63); | 3966 Constant *_63 = Ctx->getConstantInt(DestTy, 63); |
3947 _mov(T_Dest, SixtyThree); | 3967 _mov(T_Dest, _63); |
3948 } | 3968 } |
3949 _cmov(T_Dest, T, Traits::Cond::Br_ne); | 3969 _cmov(T_Dest, T, Traits::Cond::Br_ne); |
3950 if (!Cttz) { | 3970 if (!Cttz) { |
3951 _xor(T_Dest, ThirtyOne); | 3971 _xor(T_Dest, _31); |
3952 } | 3972 } |
3953 if (Traits::Is64Bit || Ty == IceType_i32) { | 3973 if (Traits::Is64Bit || Ty == IceType_i32) { |
3954 _mov(Dest, T_Dest); | 3974 _mov(Dest, T_Dest); |
3955 return; | 3975 return; |
3956 } | 3976 } |
3957 _add(T_Dest, ThirtyTwo); | 3977 _add(T_Dest, _32); |
3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3978 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3979 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3960 // Will be using "test" on this, so we need a registerized variable. | 3980 // Will be using "test" on this, so we need a registerized variable. |
3961 Variable *SecondVar = legalizeToReg(SecondVal); | 3981 Variable *SecondVar = legalizeToReg(SecondVal); |
3962 Variable *T_Dest2 = makeReg(IceType_i32); | 3982 Variable *T_Dest2 = makeReg(IceType_i32); |
3963 if (Cttz) { | 3983 if (Cttz) { |
3964 _bsf(T_Dest2, SecondVar); | 3984 _bsf(T_Dest2, SecondVar); |
3965 } else { | 3985 } else { |
3966 _bsr(T_Dest2, SecondVar); | 3986 _bsr(T_Dest2, SecondVar); |
3967 _xor(T_Dest2, ThirtyOne); | 3987 _xor(T_Dest2, _31); |
3968 } | 3988 } |
3969 _test(SecondVar, SecondVar); | 3989 _test(SecondVar, SecondVar); |
3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3990 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
3971 _mov(DestLo, T_Dest2); | 3991 _mov(DestLo, T_Dest2); |
3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3992 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
3973 } | 3993 } |
3974 | 3994 |
3975 template <class Machine> | 3995 template <class Machine> |
3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, | 3996 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, |
3977 Constant *Offset) { | 3997 Constant *Offset) { |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4171 | 4191 |
4172 // TODO(ascull): is 64-bit better with vector or scalar movq? | 4192 // TODO(ascull): is 64-bit better with vector or scalar movq? |
4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | 4193 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); |
4174 if (isVectorType(Ty)) { | 4194 if (isVectorType(Ty)) { |
4175 assert(VecReg != nullptr); | 4195 assert(VecReg != nullptr); |
4176 _storep(VecReg, Mem); | 4196 _storep(VecReg, Mem); |
4177 } else if (Ty == IceType_f64) { | 4197 } else if (Ty == IceType_f64) { |
4178 assert(VecReg != nullptr); | 4198 assert(VecReg != nullptr); |
4179 _storeq(VecReg, Mem); | 4199 _storeq(VecReg, Mem); |
4180 } else { | 4200 } else { |
4201 assert(Ty != IceType_i64); | |
4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | 4202 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
4182 } | 4203 } |
4183 }; | 4204 }; |
4184 | 4205 |
4185 // Find the largest type that can be used and use it as much as possible in | 4206 // Find the largest type that can be used and use it as much as possible in |
4186 // reverse order. Then handle any remainder with overlapping copies. Since | 4207 // reverse order. Then handle any remainder with overlapping copies. Since |
4187 // the remainder will be at the end, there will be reduces pressure on the | 4208 // the remainder will be at the end, there will be reduces pressure on the |
4188 // memory unit as the access to the same memory are far apart. | 4209 // memory unit as the access to the same memory are far apart. |
4189 Type Ty; | 4210 Type Ty; |
4190 if (ValValue == 0 && CountValue >= BytesPerStoreq && | 4211 if (ValValue == 0 && CountValue >= BytesPerStoreq && |
(...skipping 1937 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6128 Variable *Reg = Func->makeVariable(Type); | 6149 Variable *Reg = Func->makeVariable(Type); |
6129 if (RegNum == Variable::NoRegister) | 6150 if (RegNum == Variable::NoRegister) |
6130 Reg->setMustHaveReg(); | 6151 Reg->setMustHaveReg(); |
6131 else | 6152 else |
6132 Reg->setRegNum(RegNum); | 6153 Reg->setRegNum(RegNum); |
6133 return Reg; | 6154 return Reg; |
6134 } | 6155 } |
6135 | 6156 |
6136 template <class Machine> | 6157 template <class Machine> |
6137 const Type TargetX86Base<Machine>::TypeForSize[] = { | 6158 const Type TargetX86Base<Machine>::TypeForSize[] = { |
6138 IceType_i8, IceType_i16, IceType_i32, | 6159 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8}; |
6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8}; | |
6140 template <class Machine> | 6160 template <class Machine> |
6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, | 6161 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, |
6142 uint32_t MaxSize) { | 6162 uint32_t MaxSize) { |
6143 assert(Size != 0); | 6163 assert(Size != 0); |
6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); | 6164 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); |
6145 uint32_t MaxIndex = MaxSize == NoSizeLimit | 6165 uint32_t MaxIndex = MaxSize == NoSizeLimit |
6146 ? llvm::array_lengthof(TypeForSize) - 1 | 6166 ? llvm::array_lengthof(TypeForSize) - 1 |
6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); | 6167 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); |
6148 return TypeForSize[std::min(TyIndex, MaxIndex)]; | 6168 return TypeForSize[std::min(TyIndex, MaxIndex)]; |
6149 } | 6169 } |
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6403 } | 6423 } |
6404 // the offset is not eligible for blinding or pooling, return the original | 6424 // the offset is not eligible for blinding or pooling, return the original |
6405 // mem operand | 6425 // mem operand |
6406 return MemOperand; | 6426 return MemOperand; |
6407 } | 6427 } |
6408 | 6428 |
6409 } // end of namespace X86Internal | 6429 } // end of namespace X86Internal |
6410 } // end of namespace Ice | 6430 } // end of namespace Ice |
6411 | 6431 |
6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |