src/IceTargetLoweringX86BaseImpl.h - Issue 1537703002: Subzero. x8664. Resurrects the Target.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1537703002: Subzero. x8664. Resurrects the Target. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Beautifies the assembler. Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 169 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 /// This generally means that its lowering sequence requires more than one	180 /// This generally means that its lowering sequence requires more than one

181 /// conditional branch, namely 64-bit integer compares and some floating-point	181 /// conditional branch, namely 64-bit integer compares and some floating-point

182 /// compares. When this is true, and there is more than one consumer, we prefer	182 /// compares. When this is true, and there is more than one consumer, we prefer

183 /// to disable the folding optimization because it minimizes branches.	183 /// to disable the folding optimization because it minimizes branches.

184 template <class MachineTraits>	184 template <class MachineTraits>

185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {	185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

186 switch (getProducerKind(Instr)) {	186 switch (getProducerKind(Instr)) {

187 default:	187 default:

188 return false;	188 return false;

189 case PK_Icmp64:	189 case PK_Icmp64:

190 return true;	190 return !MachineTraits::Is64Bit;

191 case PK_Fcmp:	191 case PK_Fcmp:

192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]	192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]

193 .C2 != MachineTraits::Cond::Br_None;	193 .C2 != MachineTraits::Cond::Br_None;

194 }	194 }

195 }	195 }

196	196

197 template <class MachineTraits>	197 template <class MachineTraits>

198 bool BoolFolding<MachineTraits>::isValidFolding(	198 bool BoolFolding<MachineTraits>::isValidFolding(

199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,	199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,

200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {	200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {

(...skipping 558 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
759 bool TargetX86Base<Machine>::doBranchOpt(Inst I, const CfgNode NextNode) {	759 bool TargetX86Base<Machine>::doBranchOpt(Inst I, const CfgNode NextNode) {

760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {	760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {

761 return Br->optimizeBranch(NextNode);	761 return Br->optimizeBranch(NextNode);

762 }	762 }

763 return false;	763 return false;

764 }	764 }

765	765

766 template <class Machine>	766 template <class Machine>

767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {	767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {

768 // Special case: never allow partial reads/writes to/from %rBP and %rSP.	768 // Special case: never allow partial reads/writes to/from %rBP and %rSP.

769 if (RegNum == Traits::RegisterSet::Reg_esp \|\|	769 if (RegNum == getStackReg() \|\| RegNum == getFrameReg())

770 RegNum == Traits::RegisterSet::Reg_ebp)

771 Ty = Traits::WordType;	770 Ty = Traits::WordType;

772 if (Ty == IceType_void)	771 if (Ty == IceType_void)

773 Ty = IceType_i32;	772 Ty = IceType_i32;

774 if (PhysicalRegisters[Ty].empty())	773 if (PhysicalRegisters[Ty].empty())

775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);	774 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);

776 assert(RegNum < PhysicalRegisters[Ty].size());	775 assert(RegNum < PhysicalRegisters[Ty].size());

777 Variable *Reg = PhysicalRegisters[Ty][RegNum];	776 Variable *Reg = PhysicalRegisters[Ty][RegNum];

778 if (Reg == nullptr) {	777 if (Reg == nullptr) {

779 Reg = Func->makeVariable(Ty);	778 Reg = Func->makeVariable(Ty);

780 Reg->setRegNum(RegNum);	779 Reg->setRegNum(RegNum);

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);	990 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;	991 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;

993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;	992 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;

994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();	993 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();

995 const bool UseFramePointer =	994 const bool UseFramePointer =

996 hasFramePointer() \|\| OverAligned \|\| !AllocaWithKnownOffset \|\| OptM1;	995 hasFramePointer() \|\| OverAligned \|\| !AllocaWithKnownOffset \|\| OptM1;

997	996

998 if (UseFramePointer)	997 if (UseFramePointer)

999 setHasFramePointer();	998 setHasFramePointer();

1000	999

1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	1000 Variable *esp = getPhysicalRegister(getStackReg());

1002 if (OverAligned) {	1001 if (OverAligned) {

1003 _and(esp, Ctx->getConstantInt32(-Alignment));	1002 _and(esp, Ctx->getConstantInt32(-Alignment));

1004 }	1003 }

1005	1004

1006 Variable *Dest = Inst->getDest();	1005 Variable *Dest = Inst->getDest();

1007 Operand *TotalSize = legalize(Inst->getSizeInBytes());	1006 Operand *TotalSize = legalize(Inst->getSizeInBytes());

1008	1007

1009 if (const auto *ConstantTotalSize =	1008 if (const auto *ConstantTotalSize =

1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	1009 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

1011 const uint32_t Value =	1010 const uint32_t Value =

(...skipping 694 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1706 _mov(T, Src0);	1705 _mov(T, Src0);

1707 if (!llvm::isa<ConstantInteger32>(Src1))	1706 if (!llvm::isa<ConstantInteger32>(Src1))

1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);	1707 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);

1709 _sar(T, Src1);	1708 _sar(T, Src1);

1710 _mov(Dest, T);	1709 _mov(Dest, T);

1711 break;	1710 break;

1712 case InstArithmetic::Udiv: {	1711 case InstArithmetic::Udiv: {

1713 // div and idiv are the few arithmetic operators that do not allow	1712 // div and idiv are the few arithmetic operators that do not allow

1714 // immediates as the operand.	1713 // immediates as the operand.

1715 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1714 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1716 uint32_t Eax = Traits::RegisterSet::Reg_eax;	1715 uint32_t Eax;

1717 uint32_t Edx = Traits::RegisterSet::Reg_edx;	1716 uint32_t Edx;

1718 switch (Ty) {	1717 switch (Ty) {

1719 default:	1718 default:

1720 llvm_unreachable("Bad type for udiv");	1719 llvm::report_fatal_error("Bad type for udiv");

1721 // fallthrough	1720 case IceType_i64:

	1721 Eax = Traits::getRaxOrDie();

	1722 Edx = Traits::getRdxOrDie();

1722 case IceType_i32:	1723 case IceType_i32:

	1724 Eax = Traits::RegisterSet::Reg_eax;

	1725 Edx = Traits::RegisterSet::Reg_edx;

1723 break;	1726 break;

1724 case IceType_i16:	1727 case IceType_i16:

1725 Eax = Traits::RegisterSet::Reg_ax;	1728 Eax = Traits::RegisterSet::Reg_ax;

1726 Edx = Traits::RegisterSet::Reg_dx;	1729 Edx = Traits::RegisterSet::Reg_dx;

1727 break;	1730 break;

1728 case IceType_i8:	1731 case IceType_i8:

1729 Eax = Traits::RegisterSet::Reg_al;	1732 Eax = Traits::RegisterSet::Reg_al;

1730 Edx = Traits::RegisterSet::Reg_ah;	1733 Edx = Traits::RegisterSet::Reg_ah;

1731 break;	1734 break;

1732 }	1735 }

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1769 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

1767 }	1770 }

1768 _mov(Dest, T);	1771 _mov(Dest, T);

1769 return;	1772 return;

1770 }	1773 }

1771 }	1774 }

1772 }	1775 }

1773 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1776 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1774 switch (Ty) {	1777 switch (Ty) {

1775 default:	1778 default:

1776 llvm_unreachable("Bad type for sdiv");	1779 llvm::report_fatal_error("Bad type for sdiv");

1777 // fallthrough	1780 case IceType_i64:

	1781 T_edx = makeReg(Ty, Traits::getRdxOrDie());

	1782 _mov(T, Src0, Traits::getRaxOrDie());

	1783 break;

1778 case IceType_i32:	1784 case IceType_i32:

1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);	1785 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);

1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1786 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1781 break;	1787 break;

1782 case IceType_i16:	1788 case IceType_i16:

1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);	1789 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);

1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax);	1790 _mov(T, Src0, Traits::RegisterSet::Reg_ax);

1785 break;	1791 break;

1786 case IceType_i8:	1792 case IceType_i8:

1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);	1793 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);

1788 _mov(T, Src0, Traits::RegisterSet::Reg_al);	1794 _mov(T, Src0, Traits::RegisterSet::Reg_al);

1789 break;	1795 break;

1790 }	1796 }

1791 _cbwdq(T_edx, T);	1797 _cbwdq(T_edx, T);

1792 _idiv(T, Src1, T_edx);	1798 _idiv(T, Src1, T_edx);

1793 _mov(Dest, T);	1799 _mov(Dest, T);

1794 break;	1800 break;

1795 case InstArithmetic::Urem: {	1801 case InstArithmetic::Urem: {

1796 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1802 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1797 uint32_t Eax = Traits::RegisterSet::Reg_eax;	1803 uint32_t Eax;

1798 uint32_t Edx = Traits::RegisterSet::Reg_edx;	1804 uint32_t Edx;

1799 switch (Ty) {	1805 switch (Ty) {

1800 default:	1806 default:

1801 llvm_unreachable("Bad type for urem");	1807 llvm::report_fatal_error("Bad type for urem");

1802 // fallthrough	1808 case IceType_i64:

	1809 Eax = Traits::getRaxOrDie();

	1810 Edx = Traits::getRdxOrDie();

	1811 break;

1803 case IceType_i32:	1812 case IceType_i32:

	1813 Eax = Traits::RegisterSet::Reg_eax;

	1814 Edx = Traits::RegisterSet::Reg_edx;

1804 break;	1815 break;

1805 case IceType_i16:	1816 case IceType_i16:

1806 Eax = Traits::RegisterSet::Reg_ax;	1817 Eax = Traits::RegisterSet::Reg_ax;

1807 Edx = Traits::RegisterSet::Reg_dx;	1818 Edx = Traits::RegisterSet::Reg_dx;

1808 break;	1819 break;

1809 case IceType_i8:	1820 case IceType_i8:

1810 Eax = Traits::RegisterSet::Reg_al;	1821 Eax = Traits::RegisterSet::Reg_al;

1811 Edx = Traits::RegisterSet::Reg_ah;	1822 Edx = Traits::RegisterSet::Reg_ah;

1812 break;	1823 break;

1813 }	1824 }

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1851 _add(T, Src0);	1862 _add(T, Src0);

1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));	1863 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

1853 _sub(T, Src0);	1864 _sub(T, Src0);

1854 _neg(T);	1865 _neg(T);

1855 _mov(Dest, T);	1866 _mov(Dest, T);

1856 return;	1867 return;

1857 }	1868 }

1858 }	1869 }

1859 }	1870 }

1860 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1871 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1861 uint32_t Eax = Traits::RegisterSet::Reg_eax;	1872 uint32_t Eax;

1862 uint32_t Edx = Traits::RegisterSet::Reg_edx;	1873 uint32_t Edx;

1863 switch (Ty) {	1874 switch (Ty) {

1864 default:	1875 default:

1865 llvm_unreachable("Bad type for srem");	1876 llvm::report_fatal_error("Bad type for srem");

1866 // fallthrough	1877 case IceType_i64:

	1878 Eax = Traits::getRaxOrDie();

	1879 Edx = Traits::getRdxOrDie();

	1880 break;

1867 case IceType_i32:	1881 case IceType_i32:

	1882 Eax = Traits::RegisterSet::Reg_eax;

	1883 Edx = Traits::RegisterSet::Reg_edx;

1868 break;	1884 break;

1869 case IceType_i16:	1885 case IceType_i16:

1870 Eax = Traits::RegisterSet::Reg_ax;	1886 Eax = Traits::RegisterSet::Reg_ax;

1871 Edx = Traits::RegisterSet::Reg_dx;	1887 Edx = Traits::RegisterSet::Reg_dx;

1872 break;	1888 break;

1873 case IceType_i8:	1889 case IceType_i8:

1874 Eax = Traits::RegisterSet::Reg_al;	1890 Eax = Traits::RegisterSet::Reg_al;

1875 Edx = Traits::RegisterSet::Reg_ah;	1891 Edx = Traits::RegisterSet::Reg_ah;

1876 break;	1892 break;

1877 }	1893 }

(...skipping 945 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2823	2839

2824 _movp(Dest, T);	2840 _movp(Dest, T);

2825 eliminateNextVectorSextInstruction(Dest);	2841 eliminateNextVectorSextInstruction(Dest);

2826 }	2842 }

2827	2843

2828 template <typename Machine>	2844 template <typename Machine>

2829 template <typename T>	2845 template <typename T>

2830 typename std::enable_if<!T::Is64Bit, void>::type	2846 typename std::enable_if<!T::Is64Bit, void>::type

2831 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,	2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,

2832 const Inst *Consumer) {	2848 const Inst *Consumer) {

	2849 assert(!T::Is64Bit);
	Jim Stichnoth 2015/12/20 19:27:38 This assert seems completely unnecessary given the This assert seems completely unnecessary given the std::enable_if above. John 2015/12/21 13:41:32 Done. Show quoted text On 2015/12/20 19:27:38, stichnot wrote: > This assert seems completely unnecessary given the std::enable_if above. Done.
2833 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:	2850 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

2834 Operand *Src0 = legalize(Icmp->getSrc(0));	2851 Operand *Src0 = legalize(Icmp->getSrc(0));

2835 Operand *Src1 = legalize(Icmp->getSrc(1));	2852 Operand *Src1 = legalize(Icmp->getSrc(1));

2836 Variable *Dest = Icmp->getDest();	2853 Variable *Dest = Icmp->getDest();

2837 InstIcmp::ICond Condition = Icmp->getCondition();	2854 InstIcmp::ICond Condition = Icmp->getCondition();

2838 size_t Index = static_cast<size_t>(Condition);	2855 size_t Index = static_cast<size_t>(Condition);

2839 assert(Index < Traits::TableIcmp64Size);	2856 assert(Index < Traits::TableIcmp64Size);

2840 Operand *Src0LoRM = nullptr;	2857 Operand *Src0LoRM = nullptr;

2841 Operand *Src0HiRM = nullptr;	2858 Operand *Src0HiRM = nullptr;

2842 // Legalize the portions of Src0 that are going to be needed.	2859 // Legalize the portions of Src0 that are going to be needed.

(...skipping 688 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3531 }	3548 }

3532 case Intrinsics::Sqrt: {	3549 case Intrinsics::Sqrt: {

3533 Operand *Src = legalize(Instr->getArg(0));	3550 Operand *Src = legalize(Instr->getArg(0));

3534 Variable *Dest = Instr->getDest();	3551 Variable *Dest = Instr->getDest();

3535 Variable *T = makeReg(Dest->getType());	3552 Variable *T = makeReg(Dest->getType());

3536 _sqrtss(T, Src);	3553 _sqrtss(T, Src);

3537 _mov(Dest, T);	3554 _mov(Dest, T);

3538 return;	3555 return;

3539 }	3556 }

3540 case Intrinsics::Stacksave: {	3557 case Intrinsics::Stacksave: {

3541 Variable *esp =	3558 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());

3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

3543 Variable *Dest = Instr->getDest();	3559 Variable *Dest = Instr->getDest();

3544 _mov(Dest, esp);	3560 _mov(Dest, esp);

3545 return;	3561 return;

3546 }	3562 }

3547 case Intrinsics::Stackrestore: {	3563 case Intrinsics::Stackrestore: {

3548 Variable *esp =	3564 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());

3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

3550 _redefined(_mov(esp, Instr->getArg(0)));	3565 _redefined(_mov(esp, Instr->getArg(0)));

3551 return;	3566 return;

3552 }	3567 }

3553 case Intrinsics::Trap:	3568 case Intrinsics::Trap:

3554 _ud2();	3569 _ud2();

3555 return;	3570 return;

3556 case Intrinsics::UnknownIntrinsic:	3571 case Intrinsics::UnknownIntrinsic:

3557 Func->setError("Should not be lowering UnknownIntrinsic");	3572 Func->setError("Should not be lowering UnknownIntrinsic");

3558 return;	3573 return;

3559 }	3574 }

(...skipping 21 matching lines...) Expand all Loading...
3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3596 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));	3597 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));	3598 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3584 _mov(DestLo, T_eax);	3599 _mov(DestLo, T_eax);

3585 _mov(DestHi, T_edx);	3600 _mov(DestHi, T_edx);

3586 return;	3601 return;

3587 }	3602 }

3588 int32_t Eax;	3603 int32_t Eax;

3589 switch (Ty) {	3604 switch (Ty) {

3590 default:	3605 default:

3591 llvm_unreachable("Bad type for cmpxchg");	3606 llvm::report_fatal_error("Bad type for cmpxchg");

3592 // fallthrough	3607 case IceType_i64:

	3608 Eax = Traits::getRaxOrDie();

	3609 break;

3593 case IceType_i32:	3610 case IceType_i32:

3594 Eax = Traits::RegisterSet::Reg_eax;	3611 Eax = Traits::RegisterSet::Reg_eax;

3595 break;	3612 break;

3596 case IceType_i16:	3613 case IceType_i16:

3597 Eax = Traits::RegisterSet::Reg_ax;	3614 Eax = Traits::RegisterSet::Reg_ax;

3598 break;	3615 break;

3599 case IceType_i8:	3616 case IceType_i8:

3600 Eax = Traits::RegisterSet::Reg_al;	3617 Eax = Traits::RegisterSet::Reg_al;

3601 break;	3618 break;

3602 }	3619 }

(...skipping 250 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));	3870 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3871 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3855 _mov(DestLo, T_eax);	3872 _mov(DestLo, T_eax);

3856 _mov(DestHi, T_edx);	3873 _mov(DestHi, T_edx);

3857 return;	3874 return;

3858 }	3875 }

3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);	3876 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);

3860 int32_t Eax;	3877 int32_t Eax;

3861 switch (Ty) {	3878 switch (Ty) {

3862 default:	3879 default:

3863 llvm_unreachable("Bad type for atomicRMW");	3880 llvm::report_fatal_error("Bad type for atomicRMW");

3864 // fallthrough	3881 case IceType_i64:

	3882 Eax = Traits::getRaxOrDie();

	3883 break;

3865 case IceType_i32:	3884 case IceType_i32:

3866 Eax = Traits::RegisterSet::Reg_eax;	3885 Eax = Traits::RegisterSet::Reg_eax;

3867 break;	3886 break;

3868 case IceType_i16:	3887 case IceType_i16:

3869 Eax = Traits::RegisterSet::Reg_ax;	3888 Eax = Traits::RegisterSet::Reg_ax;

3870 break;	3889 break;

3871 case IceType_i8:	3890 case IceType_i8:

3872 Eax = Traits::RegisterSet::Reg_al;	3891 Eax = Traits::RegisterSet::Reg_al;

3873 break;	3892 break;

3874 }	3893 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3923 //	3942 //

3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits	3943 // Similar for 64-bit, but start w/ speculating that the upper 32 bits

3925 // are all zero, and compute the result for that case (checking the lower	3944 // are all zero, and compute the result for that case (checking the lower

3926 // 32 bits). Then actually compute the result for the upper bits and	3945 // 32 bits). Then actually compute the result for the upper bits and

3927 // cmov in the result from the lower computation if the earlier speculation	3946 // cmov in the result from the lower computation if the earlier speculation

3928 // was correct.	3947 // was correct.

3929 //	3948 //

3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor	3949 // Cttz, is similar, but uses bsf instead, and doesn't require the xor

3931 // bit position conversion, and the speculation is reversed.	3950 // bit position conversion, and the speculation is reversed.

3932 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);	3951 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);

3933 Variable *T = makeReg(IceType_i32);	3952 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;

	3953 Variable *T = makeReg(DestTy);

3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);	3954 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);

3935 if (Cttz) {	3955 if (Cttz) {

3936 _bsf(T, FirstValRM);	3956 _bsf(T, FirstValRM);

3937 } else {	3957 } else {

3938 _bsr(T, FirstValRM);	3958 _bsr(T, FirstValRM);

3939 }	3959 }

3940 Variable *T_Dest = makeReg(IceType_i32);	3960 Variable *T_Dest = makeReg(DestTy);

3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32);	3961 Constant *_31 = Ctx->getConstantInt32(31);

3942 Constant *ThirtyOne = Ctx->getConstantInt32(31);	3962 Constant *_32 = Ctx->getConstantInt(DestTy, 32);

3943 if (Cttz) {	3963 if (Cttz) {

3944 _mov(T_Dest, ThirtyTwo);	3964 _mov(T_Dest, _32);

3945 } else {	3965 } else {

3946 Constant *SixtyThree = Ctx->getConstantInt32(63);	3966 Constant *_63 = Ctx->getConstantInt(DestTy, 63);

3947 _mov(T_Dest, SixtyThree);	3967 _mov(T_Dest, _63);

3948 }	3968 }

3949 _cmov(T_Dest, T, Traits::Cond::Br_ne);	3969 _cmov(T_Dest, T, Traits::Cond::Br_ne);

3950 if (!Cttz) {	3970 if (!Cttz) {

3951 _xor(T_Dest, ThirtyOne);	3971 _xor(T_Dest, _31);

3952 }	3972 }

3953 if (Traits::Is64Bit \|\| Ty == IceType_i32) {	3973 if (Traits::Is64Bit \|\| Ty == IceType_i32) {

3954 _mov(Dest, T_Dest);	3974 _mov(Dest, T_Dest);

3955 return;	3975 return;

3956 }	3976 }

3957 _add(T_Dest, ThirtyTwo);	3977 _add(T_Dest, _32);

3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));	3978 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3979 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3960 // Will be using "test" on this, so we need a registerized variable.	3980 // Will be using "test" on this, so we need a registerized variable.

3961 Variable *SecondVar = legalizeToReg(SecondVal);	3981 Variable *SecondVar = legalizeToReg(SecondVal);

3962 Variable *T_Dest2 = makeReg(IceType_i32);	3982 Variable *T_Dest2 = makeReg(IceType_i32);

3963 if (Cttz) {	3983 if (Cttz) {

3964 _bsf(T_Dest2, SecondVar);	3984 _bsf(T_Dest2, SecondVar);

3965 } else {	3985 } else {

3966 _bsr(T_Dest2, SecondVar);	3986 _bsr(T_Dest2, SecondVar);

3967 _xor(T_Dest2, ThirtyOne);	3987 _xor(T_Dest2, _31);

3968 }	3988 }

3969 _test(SecondVar, SecondVar);	3989 _test(SecondVar, SecondVar);

3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);	3990 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);

3971 _mov(DestLo, T_Dest2);	3991 _mov(DestLo, T_Dest2);

3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32));	3992 _mov(DestHi, Ctx->getConstantZero(IceType_i32));

3973 }	3993 }

3974	3994

3975 template <class Machine>	3995 template <class Machine>

3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable Dest, Variable Base,	3996 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable Dest, Variable Base,

3977 Constant *Offset) {	3997 Constant *Offset) {

(...skipping 193 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4171	4191

4172 // TODO(ascull): is 64-bit better with vector or scalar movq?	4192 // TODO(ascull): is 64-bit better with vector or scalar movq?

4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);	4193 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);

4174 if (isVectorType(Ty)) {	4194 if (isVectorType(Ty)) {

4175 assert(VecReg != nullptr);	4195 assert(VecReg != nullptr);

4176 _storep(VecReg, Mem);	4196 _storep(VecReg, Mem);

4177 } else if (Ty == IceType_f64) {	4197 } else if (Ty == IceType_f64) {

4178 assert(VecReg != nullptr);	4198 assert(VecReg != nullptr);

4179 _storeq(VecReg, Mem);	4199 _storeq(VecReg, Mem);

4180 } else {	4200 } else {

	4201 assert(Ty != IceType_i64);

4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);	4202 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);

4182 }	4203 }

4183 };	4204 };

4184	4205

4185 // Find the largest type that can be used and use it as much as possible in	4206 // Find the largest type that can be used and use it as much as possible in

4186 // reverse order. Then handle any remainder with overlapping copies. Since	4207 // reverse order. Then handle any remainder with overlapping copies. Since

4187 // the remainder will be at the end, there will be reduces pressure on the	4208 // the remainder will be at the end, there will be reduces pressure on the

4188 // memory unit as the access to the same memory are far apart.	4209 // memory unit as the access to the same memory are far apart.

4189 Type Ty;	4210 Type Ty;

4190 if (ValValue == 0 && CountValue >= BytesPerStoreq &&	4211 if (ValValue == 0 && CountValue >= BytesPerStoreq &&

(...skipping 1937 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6128 Variable *Reg = Func->makeVariable(Type);	6149 Variable *Reg = Func->makeVariable(Type);

6129 if (RegNum == Variable::NoRegister)	6150 if (RegNum == Variable::NoRegister)

6130 Reg->setMustHaveReg();	6151 Reg->setMustHaveReg();

6131 else	6152 else

6132 Reg->setRegNum(RegNum);	6153 Reg->setRegNum(RegNum);

6133 return Reg;	6154 return Reg;

6134 }	6155 }

6135	6156

6136 template <class Machine>	6157 template <class Machine>

6137 const Type TargetX86Base<Machine>::TypeForSize[] = {	6158 const Type TargetX86Base<Machine>::TypeForSize[] = {

6138 IceType_i8, IceType_i16, IceType_i32,	6159 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};

6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8};

6140 template <class Machine>	6160 template <class Machine>

6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,	6161 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,

6142 uint32_t MaxSize) {	6162 uint32_t MaxSize) {

6143 assert(Size != 0);	6163 assert(Size != 0);

6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);	6164 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);

6145 uint32_t MaxIndex = MaxSize == NoSizeLimit	6165 uint32_t MaxIndex = MaxSize == NoSizeLimit

6146 ? llvm::array_lengthof(TypeForSize) - 1	6166 ? llvm::array_lengthof(TypeForSize) - 1

6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);	6167 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);

6148 return TypeForSize[std::min(TyIndex, MaxIndex)];	6168 return TypeForSize[std::min(TyIndex, MaxIndex)];

6149 }	6169 }

(...skipping 253 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6403 }	6423 }

6404 // the offset is not eligible for blinding or pooling, return the original	6424 // the offset is not eligible for blinding or pooling, return the original

6405 // mem operand	6425 // mem operand

6406 return MemOperand;	6426 return MemOperand;

6407 }	6427 }

6408	6428

6409 } // end of namespace X86Internal	6429 } // end of namespace X86Internal

6410 } // end of namespace Ice	6430 } // end of namespace Ice

6411	6431

6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	6432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringX8664.cpp ('K') | « src/IceTargetLoweringX86Base.h ('k') | src/IceTypes.def » ('j') | src/IceTypes.def » ('J')