Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(68)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1537703002: Subzero. x8664. Resurrects the Target. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 /// This generally means that its lowering sequence requires more than one 180 /// This generally means that its lowering sequence requires more than one
181 /// conditional branch, namely 64-bit integer compares and some floating-point 181 /// conditional branch, namely 64-bit integer compares and some floating-point
182 /// compares. When this is true, and there is more than one consumer, we prefer 182 /// compares. When this is true, and there is more than one consumer, we prefer
183 /// to disable the folding optimization because it minimizes branches. 183 /// to disable the folding optimization because it minimizes branches.
184 template <class MachineTraits> 184 template <class MachineTraits>
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
186 switch (getProducerKind(Instr)) { 186 switch (getProducerKind(Instr)) {
187 default: 187 default:
188 return false; 188 return false;
189 case PK_Icmp64: 189 case PK_Icmp64:
190 return true; 190 return !MachineTraits::Is64Bit;
191 case PK_Fcmp: 191 case PK_Fcmp:
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
193 .C2 != MachineTraits::Cond::Br_None; 193 .C2 != MachineTraits::Cond::Br_None;
194 } 194 }
195 } 195 }
196 196
197 template <class MachineTraits> 197 template <class MachineTraits>
198 bool BoolFolding<MachineTraits>::isValidFolding( 198 bool BoolFolding<MachineTraits>::isValidFolding(
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
(...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after
758 template <class Machine> 758 template <class Machine>
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
761 return Br->optimizeBranch(NextNode); 761 return Br->optimizeBranch(NextNode);
762 } 762 }
763 return false; 763 return false;
764 } 764 }
765 765
766 template <class Machine> 766 template <class Machine>
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
768 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
769 if (RegNum == Traits::RegisterSet::Reg_esp ||
770 RegNum == Traits::RegisterSet::Reg_ebp)
771 Ty = Traits::WordType;
772 if (Ty == IceType_void) 768 if (Ty == IceType_void)
773 Ty = IceType_i32; 769 Ty = IceType_i32;
774 if (PhysicalRegisters[Ty].empty()) 770 if (PhysicalRegisters[Ty].empty())
775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 771 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
776 assert(RegNum < PhysicalRegisters[Ty].size()); 772 assert(RegNum < PhysicalRegisters[Ty].size());
777 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 773 Variable *Reg = PhysicalRegisters[Ty][RegNum];
778 if (Reg == nullptr) { 774 if (Reg == nullptr) {
779 Reg = Func->makeVariable(Ty); 775 Reg = Func->makeVariable(Ty);
780 Reg->setRegNum(RegNum); 776 Reg->setRegNum(RegNum);
781 PhysicalRegisters[Ty][RegNum] = Reg; 777 PhysicalRegisters[Ty][RegNum] = Reg;
(...skipping 209 matching lines...) Expand 10 before | Expand all | Expand 10 after
991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 987 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; 988 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; 989 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); 990 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
995 const bool UseFramePointer = 991 const bool UseFramePointer =
996 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; 992 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
997 993
998 if (UseFramePointer) 994 if (UseFramePointer)
999 setHasFramePointer(); 995 setHasFramePointer();
1000 996
1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); 997 Variable *esp = getPhysicalRegister(getStackReg());
1002 if (OverAligned) { 998 if (OverAligned) {
1003 _and(esp, Ctx->getConstantInt32(-Alignment)); 999 _and(esp, Ctx->getConstantInt32(-Alignment));
1004 } 1000 }
1005 1001
1006 Variable *Dest = Inst->getDest(); 1002 Variable *Dest = Inst->getDest();
1007 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1003 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1008 1004
1009 if (const auto *ConstantTotalSize = 1005 if (const auto *ConstantTotalSize =
1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1006 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1011 const uint32_t Value = 1007 const uint32_t Value =
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after
1706 _mov(T, Src0); 1702 _mov(T, Src0);
1707 if (!llvm::isa<ConstantInteger32>(Src1)) 1703 if (!llvm::isa<ConstantInteger32>(Src1))
1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); 1704 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
1709 _sar(T, Src1); 1705 _sar(T, Src1);
1710 _mov(Dest, T); 1706 _mov(Dest, T);
1711 break; 1707 break;
1712 case InstArithmetic::Udiv: { 1708 case InstArithmetic::Udiv: {
1713 // div and idiv are the few arithmetic operators that do not allow 1709 // div and idiv are the few arithmetic operators that do not allow
1714 // immediates as the operand. 1710 // immediates as the operand.
1715 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1711 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1716 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1712 uint32_t Eax;
1717 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1713 uint32_t Edx;
1718 switch (Ty) { 1714 switch (Ty) {
1719 default: 1715 default:
1720 llvm_unreachable("Bad type for udiv"); 1716 llvm::report_fatal_error("Bad type for udiv");
1721 // fallthrough 1717 case IceType_i64:
1718 Eax = Traits::getRaxOrDie();
1719 Edx = Traits::getRdxOrDie();
1722 case IceType_i32: 1720 case IceType_i32:
1721 Eax = Traits::RegisterSet::Reg_eax;
1722 Edx = Traits::RegisterSet::Reg_edx;
1723 break; 1723 break;
1724 case IceType_i16: 1724 case IceType_i16:
1725 Eax = Traits::RegisterSet::Reg_ax; 1725 Eax = Traits::RegisterSet::Reg_ax;
1726 Edx = Traits::RegisterSet::Reg_dx; 1726 Edx = Traits::RegisterSet::Reg_dx;
1727 break; 1727 break;
1728 case IceType_i8: 1728 case IceType_i8:
1729 Eax = Traits::RegisterSet::Reg_al; 1729 Eax = Traits::RegisterSet::Reg_al;
1730 Edx = Traits::RegisterSet::Reg_ah; 1730 Edx = Traits::RegisterSet::Reg_ah;
1731 break; 1731 break;
1732 } 1732 }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1767 } 1767 }
1768 _mov(Dest, T); 1768 _mov(Dest, T);
1769 return; 1769 return;
1770 } 1770 }
1771 } 1771 }
1772 } 1772 }
1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1774 switch (Ty) { 1774 switch (Ty) {
1775 default: 1775 default:
1776 llvm_unreachable("Bad type for sdiv"); 1776 llvm::report_fatal_error("Bad type for sdiv");
1777 // fallthrough 1777 case IceType_i64:
1778 T_edx = makeReg(Ty, Traits::getRdxOrDie());
1779 _mov(T, Src0, Traits::getRaxOrDie());
1780 break;
1778 case IceType_i32: 1781 case IceType_i32:
1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); 1782 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1783 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1781 break; 1784 break;
1782 case IceType_i16: 1785 case IceType_i16:
1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); 1786 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax); 1787 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1785 break; 1788 break;
1786 case IceType_i8: 1789 case IceType_i8:
1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); 1790 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1788 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1791 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1789 break; 1792 break;
1790 } 1793 }
1791 _cbwdq(T_edx, T); 1794 _cbwdq(T_edx, T);
1792 _idiv(T, Src1, T_edx); 1795 _idiv(T, Src1, T_edx);
1793 _mov(Dest, T); 1796 _mov(Dest, T);
1794 break; 1797 break;
1795 case InstArithmetic::Urem: { 1798 case InstArithmetic::Urem: {
1796 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1799 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1797 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1800 uint32_t Eax;
1798 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1801 uint32_t Edx;
1799 switch (Ty) { 1802 switch (Ty) {
1800 default: 1803 default:
1801 llvm_unreachable("Bad type for urem"); 1804 llvm::report_fatal_error("Bad type for urem");
1802 // fallthrough 1805 case IceType_i64:
1806 Eax = Traits::getRaxOrDie();
1807 Edx = Traits::getRdxOrDie();
1808 break;
1803 case IceType_i32: 1809 case IceType_i32:
1810 Eax = Traits::RegisterSet::Reg_eax;
1811 Edx = Traits::RegisterSet::Reg_edx;
1804 break; 1812 break;
1805 case IceType_i16: 1813 case IceType_i16:
1806 Eax = Traits::RegisterSet::Reg_ax; 1814 Eax = Traits::RegisterSet::Reg_ax;
1807 Edx = Traits::RegisterSet::Reg_dx; 1815 Edx = Traits::RegisterSet::Reg_dx;
1808 break; 1816 break;
1809 case IceType_i8: 1817 case IceType_i8:
1810 Eax = Traits::RegisterSet::Reg_al; 1818 Eax = Traits::RegisterSet::Reg_al;
1811 Edx = Traits::RegisterSet::Reg_ah; 1819 Edx = Traits::RegisterSet::Reg_ah;
1812 break; 1820 break;
1813 } 1821 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1851 _add(T, Src0); 1859 _add(T, Src0);
1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 1860 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1853 _sub(T, Src0); 1861 _sub(T, Src0);
1854 _neg(T); 1862 _neg(T);
1855 _mov(Dest, T); 1863 _mov(Dest, T);
1856 return; 1864 return;
1857 } 1865 }
1858 } 1866 }
1859 } 1867 }
1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1868 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1861 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1869 uint32_t Eax;
1862 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1870 uint32_t Edx;
1863 switch (Ty) { 1871 switch (Ty) {
1864 default: 1872 default:
1865 llvm_unreachable("Bad type for srem"); 1873 llvm::report_fatal_error("Bad type for srem");
1866 // fallthrough 1874 case IceType_i64:
1875 Eax = Traits::getRaxOrDie();
1876 Edx = Traits::getRdxOrDie();
1877 break;
1867 case IceType_i32: 1878 case IceType_i32:
1879 Eax = Traits::RegisterSet::Reg_eax;
1880 Edx = Traits::RegisterSet::Reg_edx;
1868 break; 1881 break;
1869 case IceType_i16: 1882 case IceType_i16:
1870 Eax = Traits::RegisterSet::Reg_ax; 1883 Eax = Traits::RegisterSet::Reg_ax;
1871 Edx = Traits::RegisterSet::Reg_dx; 1884 Edx = Traits::RegisterSet::Reg_dx;
1872 break; 1885 break;
1873 case IceType_i8: 1886 case IceType_i8:
1874 Eax = Traits::RegisterSet::Reg_al; 1887 Eax = Traits::RegisterSet::Reg_al;
1875 Edx = Traits::RegisterSet::Reg_ah; 1888 Edx = Traits::RegisterSet::Reg_ah;
1876 break; 1889 break;
1877 } 1890 }
(...skipping 1653 matching lines...) Expand 10 before | Expand all | Expand 10 after
3531 } 3544 }
3532 case Intrinsics::Sqrt: { 3545 case Intrinsics::Sqrt: {
3533 Operand *Src = legalize(Instr->getArg(0)); 3546 Operand *Src = legalize(Instr->getArg(0));
3534 Variable *Dest = Instr->getDest(); 3547 Variable *Dest = Instr->getDest();
3535 Variable *T = makeReg(Dest->getType()); 3548 Variable *T = makeReg(Dest->getType());
3536 _sqrtss(T, Src); 3549 _sqrtss(T, Src);
3537 _mov(Dest, T); 3550 _mov(Dest, T);
3538 return; 3551 return;
3539 } 3552 }
3540 case Intrinsics::Stacksave: { 3553 case Intrinsics::Stacksave: {
3541 Variable *esp = 3554 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3543 Variable *Dest = Instr->getDest(); 3555 Variable *Dest = Instr->getDest();
3544 _mov(Dest, esp); 3556 _mov(Dest, esp);
3545 return; 3557 return;
3546 } 3558 }
3547 case Intrinsics::Stackrestore: { 3559 case Intrinsics::Stackrestore: {
3548 Variable *esp = 3560 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3550 _redefined(_mov(esp, Instr->getArg(0))); 3561 _redefined(_mov(esp, Instr->getArg(0)));
3551 return; 3562 return;
3552 } 3563 }
3553 case Intrinsics::Trap: 3564 case Intrinsics::Trap:
3554 _ud2(); 3565 _ud2();
3555 return; 3566 return;
3556 case Intrinsics::UnknownIntrinsic: 3567 case Intrinsics::UnknownIntrinsic:
3557 Func->setError("Should not be lowering UnknownIntrinsic"); 3568 Func->setError("Should not be lowering UnknownIntrinsic");
3558 return; 3569 return;
3559 } 3570 }
(...skipping 21 matching lines...) Expand all
3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3592 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3593 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3594 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3584 _mov(DestLo, T_eax); 3595 _mov(DestLo, T_eax);
3585 _mov(DestHi, T_edx); 3596 _mov(DestHi, T_edx);
3586 return; 3597 return;
3587 } 3598 }
3588 int32_t Eax; 3599 int32_t Eax;
3589 switch (Ty) { 3600 switch (Ty) {
3590 default: 3601 default:
3591 llvm_unreachable("Bad type for cmpxchg"); 3602 llvm::report_fatal_error("Bad type for cmpxchg");
3592 // fallthrough 3603 case IceType_i64:
3604 Eax = Traits::getRaxOrDie();
3605 break;
3593 case IceType_i32: 3606 case IceType_i32:
3594 Eax = Traits::RegisterSet::Reg_eax; 3607 Eax = Traits::RegisterSet::Reg_eax;
3595 break; 3608 break;
3596 case IceType_i16: 3609 case IceType_i16:
3597 Eax = Traits::RegisterSet::Reg_ax; 3610 Eax = Traits::RegisterSet::Reg_ax;
3598 break; 3611 break;
3599 case IceType_i8: 3612 case IceType_i8:
3600 Eax = Traits::RegisterSet::Reg_al; 3613 Eax = Traits::RegisterSet::Reg_al;
3601 break; 3614 break;
3602 } 3615 }
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3866 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3867 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3855 _mov(DestLo, T_eax); 3868 _mov(DestLo, T_eax);
3856 _mov(DestHi, T_edx); 3869 _mov(DestHi, T_edx);
3857 return; 3870 return;
3858 } 3871 }
3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3872 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3860 int32_t Eax; 3873 int32_t Eax;
3861 switch (Ty) { 3874 switch (Ty) {
3862 default: 3875 default:
3863 llvm_unreachable("Bad type for atomicRMW"); 3876 llvm::report_fatal_error("Bad type for atomicRMW");
3864 // fallthrough 3877 case IceType_i64:
3878 Eax = Traits::getRaxOrDie();
3879 break;
3865 case IceType_i32: 3880 case IceType_i32:
3866 Eax = Traits::RegisterSet::Reg_eax; 3881 Eax = Traits::RegisterSet::Reg_eax;
3867 break; 3882 break;
3868 case IceType_i16: 3883 case IceType_i16:
3869 Eax = Traits::RegisterSet::Reg_ax; 3884 Eax = Traits::RegisterSet::Reg_ax;
3870 break; 3885 break;
3871 case IceType_i8: 3886 case IceType_i8:
3872 Eax = Traits::RegisterSet::Reg_al; 3887 Eax = Traits::RegisterSet::Reg_al;
3873 break; 3888 break;
3874 } 3889 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
3923 // 3938 //
3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits 3939 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3925 // are all zero, and compute the result for that case (checking the lower 3940 // are all zero, and compute the result for that case (checking the lower
3926 // 32 bits). Then actually compute the result for the upper bits and 3941 // 32 bits). Then actually compute the result for the upper bits and
3927 // cmov in the result from the lower computation if the earlier speculation 3942 // cmov in the result from the lower computation if the earlier speculation
3928 // was correct. 3943 // was correct.
3929 // 3944 //
3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor 3945 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3931 // bit position conversion, and the speculation is reversed. 3946 // bit position conversion, and the speculation is reversed.
3932 assert(Ty == IceType_i32 || Ty == IceType_i64); 3947 assert(Ty == IceType_i32 || Ty == IceType_i64);
3933 Variable *T = makeReg(IceType_i32); 3948 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;
3949 Variable *T = makeReg(DestTy);
3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3950 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3935 if (Cttz) { 3951 if (Cttz) {
3936 _bsf(T, FirstValRM); 3952 _bsf(T, FirstValRM);
3937 } else { 3953 } else {
3938 _bsr(T, FirstValRM); 3954 _bsr(T, FirstValRM);
3939 } 3955 }
3940 Variable *T_Dest = makeReg(IceType_i32); 3956 Variable *T_Dest = makeReg(DestTy);
3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 3957 Constant *_31 = Ctx->getConstantInt32(31);
3942 Constant *ThirtyOne = Ctx->getConstantInt32(31); 3958 Constant *_32 = Ctx->getConstantInt(DestTy, 32);
3943 if (Cttz) { 3959 if (Cttz) {
3944 _mov(T_Dest, ThirtyTwo); 3960 _mov(T_Dest, _32);
3945 } else { 3961 } else {
3946 Constant *SixtyThree = Ctx->getConstantInt32(63); 3962 Constant *_63 = Ctx->getConstantInt(DestTy, 63);
3947 _mov(T_Dest, SixtyThree); 3963 _mov(T_Dest, _63);
3948 } 3964 }
3949 _cmov(T_Dest, T, Traits::Cond::Br_ne); 3965 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3950 if (!Cttz) { 3966 if (!Cttz) {
3951 _xor(T_Dest, ThirtyOne); 3967 _xor(T_Dest, _31);
3952 } 3968 }
3953 if (Traits::Is64Bit || Ty == IceType_i32) { 3969 if (Traits::Is64Bit || Ty == IceType_i32) {
3954 _mov(Dest, T_Dest); 3970 _mov(Dest, T_Dest);
3955 return; 3971 return;
3956 } 3972 }
3957 _add(T_Dest, ThirtyTwo); 3973 _add(T_Dest, _32);
3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3974 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3975 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3960 // Will be using "test" on this, so we need a registerized variable. 3976 // Will be using "test" on this, so we need a registerized variable.
3961 Variable *SecondVar = legalizeToReg(SecondVal); 3977 Variable *SecondVar = legalizeToReg(SecondVal);
3962 Variable *T_Dest2 = makeReg(IceType_i32); 3978 Variable *T_Dest2 = makeReg(IceType_i32);
3963 if (Cttz) { 3979 if (Cttz) {
3964 _bsf(T_Dest2, SecondVar); 3980 _bsf(T_Dest2, SecondVar);
3965 } else { 3981 } else {
3966 _bsr(T_Dest2, SecondVar); 3982 _bsr(T_Dest2, SecondVar);
3967 _xor(T_Dest2, ThirtyOne); 3983 _xor(T_Dest2, _31);
3968 } 3984 }
3969 _test(SecondVar, SecondVar); 3985 _test(SecondVar, SecondVar);
3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); 3986 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
3971 _mov(DestLo, T_Dest2); 3987 _mov(DestLo, T_Dest2);
3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 3988 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3973 } 3989 }
3974 3990
3975 template <class Machine> 3991 template <class Machine>
3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, 3992 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base,
3977 Constant *Offset) { 3993 Constant *Offset) {
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
4171 4187
4172 // TODO(ascull): is 64-bit better with vector or scalar movq? 4188 // TODO(ascull): is 64-bit better with vector or scalar movq?
4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 4189 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
4174 if (isVectorType(Ty)) { 4190 if (isVectorType(Ty)) {
4175 assert(VecReg != nullptr); 4191 assert(VecReg != nullptr);
4176 _storep(VecReg, Mem); 4192 _storep(VecReg, Mem);
4177 } else if (Ty == IceType_f64) { 4193 } else if (Ty == IceType_f64) {
4178 assert(VecReg != nullptr); 4194 assert(VecReg != nullptr);
4179 _storeq(VecReg, Mem); 4195 _storeq(VecReg, Mem);
4180 } else { 4196 } else {
4197 assert(Ty != IceType_i64);
4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); 4198 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
4182 } 4199 }
4183 }; 4200 };
4184 4201
4185 // Find the largest type that can be used and use it as much as possible in 4202 // Find the largest type that can be used and use it as much as possible in
4186 // reverse order. Then handle any remainder with overlapping copies. Since 4203 // reverse order. Then handle any remainder with overlapping copies. Since
4187 // the remainder will be at the end, there will be reduces pressure on the 4204 // the remainder will be at the end, there will be reduces pressure on the
4188 // memory unit as the access to the same memory are far apart. 4205 // memory unit as the access to the same memory are far apart.
4189 Type Ty; 4206 Type Ty;
4190 if (ValValue == 0 && CountValue >= BytesPerStoreq && 4207 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
(...skipping 1937 matching lines...) Expand 10 before | Expand all | Expand 10 after
6128 Variable *Reg = Func->makeVariable(Type); 6145 Variable *Reg = Func->makeVariable(Type);
6129 if (RegNum == Variable::NoRegister) 6146 if (RegNum == Variable::NoRegister)
6130 Reg->setMustHaveReg(); 6147 Reg->setMustHaveReg();
6131 else 6148 else
6132 Reg->setRegNum(RegNum); 6149 Reg->setRegNum(RegNum);
6133 return Reg; 6150 return Reg;
6134 } 6151 }
6135 6152
6136 template <class Machine> 6153 template <class Machine>
6137 const Type TargetX86Base<Machine>::TypeForSize[] = { 6154 const Type TargetX86Base<Machine>::TypeForSize[] = {
6138 IceType_i8, IceType_i16, IceType_i32, 6155 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8};
6140 template <class Machine> 6156 template <class Machine>
6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, 6157 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,
6142 uint32_t MaxSize) { 6158 uint32_t MaxSize) {
6143 assert(Size != 0); 6159 assert(Size != 0);
6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); 6160 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
6145 uint32_t MaxIndex = MaxSize == NoSizeLimit 6161 uint32_t MaxIndex = MaxSize == NoSizeLimit
6146 ? llvm::array_lengthof(TypeForSize) - 1 6162 ? llvm::array_lengthof(TypeForSize) - 1
6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); 6163 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
6148 return TypeForSize[std::min(TyIndex, MaxIndex)]; 6164 return TypeForSize[std::min(TyIndex, MaxIndex)];
6149 } 6165 }
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
6403 } 6419 }
6404 // the offset is not eligible for blinding or pooling, return the original 6420 // the offset is not eligible for blinding or pooling, return the original
6405 // mem operand 6421 // mem operand
6406 return MemOperand; 6422 return MemOperand;
6407 } 6423 }
6408 6424
6409 } // end of namespace X86Internal 6425 } // end of namespace X86Internal
6410 } // end of namespace Ice 6426 } // end of namespace Ice
6411 6427
6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6428 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698