Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1537703002: Subzero. x8664. Resurrects the Target. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Beautifies the assembler. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 /// This generally means that its lowering sequence requires more than one 180 /// This generally means that its lowering sequence requires more than one
181 /// conditional branch, namely 64-bit integer compares and some floating-point 181 /// conditional branch, namely 64-bit integer compares and some floating-point
182 /// compares. When this is true, and there is more than one consumer, we prefer 182 /// compares. When this is true, and there is more than one consumer, we prefer
183 /// to disable the folding optimization because it minimizes branches. 183 /// to disable the folding optimization because it minimizes branches.
184 template <class MachineTraits> 184 template <class MachineTraits>
185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 185 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
186 switch (getProducerKind(Instr)) { 186 switch (getProducerKind(Instr)) {
187 default: 187 default:
188 return false; 188 return false;
189 case PK_Icmp64: 189 case PK_Icmp64:
190 return true; 190 return !MachineTraits::Is64Bit;
191 case PK_Fcmp: 191 case PK_Fcmp:
192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 192 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
193 .C2 != MachineTraits::Cond::Br_None; 193 .C2 != MachineTraits::Cond::Br_None;
194 } 194 }
195 } 195 }
196 196
197 template <class MachineTraits> 197 template <class MachineTraits>
198 bool BoolFolding<MachineTraits>::isValidFolding( 198 bool BoolFolding<MachineTraits>::isValidFolding(
199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind, 199 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind ProducerKind,
200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) { 200 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind ConsumerKind) {
(...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after
759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 759 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) { 760 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
761 return Br->optimizeBranch(NextNode); 761 return Br->optimizeBranch(NextNode);
762 } 762 }
763 return false; 763 return false;
764 } 764 }
765 765
766 template <class Machine> 766 template <class Machine>
767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 767 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
768 // Special case: never allow partial reads/writes to/from %rBP and %rSP. 768 // Special case: never allow partial reads/writes to/from %rBP and %rSP.
769 if (RegNum == Traits::RegisterSet::Reg_esp || 769 if (RegNum == getStackReg() || RegNum == getFrameReg())
770 RegNum == Traits::RegisterSet::Reg_ebp)
771 Ty = Traits::WordType; 770 Ty = Traits::WordType;
772 if (Ty == IceType_void) 771 if (Ty == IceType_void)
773 Ty = IceType_i32; 772 Ty = IceType_i32;
774 if (PhysicalRegisters[Ty].empty()) 773 if (PhysicalRegisters[Ty].empty())
775 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 774 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
776 assert(RegNum < PhysicalRegisters[Ty].size()); 775 assert(RegNum < PhysicalRegisters[Ty].size());
777 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 776 Variable *Reg = PhysicalRegisters[Ty][RegNum];
778 if (Reg == nullptr) { 777 if (Reg == nullptr) {
779 Reg = Func->makeVariable(Ty); 778 Reg = Func->makeVariable(Ty);
780 Reg->setRegNum(RegNum); 779 Reg->setRegNum(RegNum);
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
991 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 990 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
992 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES; 991 const bool OverAligned = Alignment > Traits::X86_STACK_ALIGNMENT_BYTES;
993 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1; 992 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
994 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset(); 993 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
995 const bool UseFramePointer = 994 const bool UseFramePointer =
996 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1; 995 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
997 996
998 if (UseFramePointer) 997 if (UseFramePointer)
999 setHasFramePointer(); 998 setHasFramePointer();
1000 999
1001 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); 1000 Variable *esp = getPhysicalRegister(getStackReg());
1002 if (OverAligned) { 1001 if (OverAligned) {
1003 _and(esp, Ctx->getConstantInt32(-Alignment)); 1002 _and(esp, Ctx->getConstantInt32(-Alignment));
1004 } 1003 }
1005 1004
1006 Variable *Dest = Inst->getDest(); 1005 Variable *Dest = Inst->getDest();
1007 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1006 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1008 1007
1009 if (const auto *ConstantTotalSize = 1008 if (const auto *ConstantTotalSize =
1010 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1009 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1011 const uint32_t Value = 1010 const uint32_t Value =
(...skipping 694 matching lines...) Expand 10 before | Expand all | Expand 10 after
1706 _mov(T, Src0); 1705 _mov(T, Src0);
1707 if (!llvm::isa<ConstantInteger32>(Src1)) 1706 if (!llvm::isa<ConstantInteger32>(Src1))
1708 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl); 1707 Src1 = copyToReg8(Src1, Traits::RegisterSet::Reg_cl);
1709 _sar(T, Src1); 1708 _sar(T, Src1);
1710 _mov(Dest, T); 1709 _mov(Dest, T);
1711 break; 1710 break;
1712 case InstArithmetic::Udiv: { 1711 case InstArithmetic::Udiv: {
1713 // div and idiv are the few arithmetic operators that do not allow 1712 // div and idiv are the few arithmetic operators that do not allow
1714 // immediates as the operand. 1713 // immediates as the operand.
1715 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1714 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1716 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1715 uint32_t Eax;
1717 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1716 uint32_t Edx;
1718 switch (Ty) { 1717 switch (Ty) {
1719 default: 1718 default:
1720 llvm_unreachable("Bad type for udiv"); 1719 llvm::report_fatal_error("Bad type for udiv");
1721 // fallthrough 1720 case IceType_i64:
1721 Eax = Traits::getRaxOrDie();
1722 Edx = Traits::getRdxOrDie();
1722 case IceType_i32: 1723 case IceType_i32:
1724 Eax = Traits::RegisterSet::Reg_eax;
1725 Edx = Traits::RegisterSet::Reg_edx;
1723 break; 1726 break;
1724 case IceType_i16: 1727 case IceType_i16:
1725 Eax = Traits::RegisterSet::Reg_ax; 1728 Eax = Traits::RegisterSet::Reg_ax;
1726 Edx = Traits::RegisterSet::Reg_dx; 1729 Edx = Traits::RegisterSet::Reg_dx;
1727 break; 1730 break;
1728 case IceType_i8: 1731 case IceType_i8:
1729 Eax = Traits::RegisterSet::Reg_al; 1732 Eax = Traits::RegisterSet::Reg_al;
1730 Edx = Traits::RegisterSet::Reg_ah; 1733 Edx = Traits::RegisterSet::Reg_ah;
1731 break; 1734 break;
1732 } 1735 }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1766 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1769 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1767 } 1770 }
1768 _mov(Dest, T); 1771 _mov(Dest, T);
1769 return; 1772 return;
1770 } 1773 }
1771 } 1774 }
1772 } 1775 }
1773 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1776 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1774 switch (Ty) { 1777 switch (Ty) {
1775 default: 1778 default:
1776 llvm_unreachable("Bad type for sdiv"); 1779 llvm::report_fatal_error("Bad type for sdiv");
1777 // fallthrough 1780 case IceType_i64:
1781 T_edx = makeReg(Ty, Traits::getRdxOrDie());
1782 _mov(T, Src0, Traits::getRaxOrDie());
1783 break;
1778 case IceType_i32: 1784 case IceType_i32:
1779 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx); 1785 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_edx);
1780 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1786 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1781 break; 1787 break;
1782 case IceType_i16: 1788 case IceType_i16:
1783 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx); 1789 T_edx = makeReg(Ty, Traits::RegisterSet::Reg_dx);
1784 _mov(T, Src0, Traits::RegisterSet::Reg_ax); 1790 _mov(T, Src0, Traits::RegisterSet::Reg_ax);
1785 break; 1791 break;
1786 case IceType_i8: 1792 case IceType_i8:
1787 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax); 1793 T_edx = makeReg(IceType_i16, Traits::RegisterSet::Reg_ax);
1788 _mov(T, Src0, Traits::RegisterSet::Reg_al); 1794 _mov(T, Src0, Traits::RegisterSet::Reg_al);
1789 break; 1795 break;
1790 } 1796 }
1791 _cbwdq(T_edx, T); 1797 _cbwdq(T_edx, T);
1792 _idiv(T, Src1, T_edx); 1798 _idiv(T, Src1, T_edx);
1793 _mov(Dest, T); 1799 _mov(Dest, T);
1794 break; 1800 break;
1795 case InstArithmetic::Urem: { 1801 case InstArithmetic::Urem: {
1796 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1802 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1797 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1803 uint32_t Eax;
1798 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1804 uint32_t Edx;
1799 switch (Ty) { 1805 switch (Ty) {
1800 default: 1806 default:
1801 llvm_unreachable("Bad type for urem"); 1807 llvm::report_fatal_error("Bad type for urem");
1802 // fallthrough 1808 case IceType_i64:
1809 Eax = Traits::getRaxOrDie();
1810 Edx = Traits::getRdxOrDie();
1811 break;
1803 case IceType_i32: 1812 case IceType_i32:
1813 Eax = Traits::RegisterSet::Reg_eax;
1814 Edx = Traits::RegisterSet::Reg_edx;
1804 break; 1815 break;
1805 case IceType_i16: 1816 case IceType_i16:
1806 Eax = Traits::RegisterSet::Reg_ax; 1817 Eax = Traits::RegisterSet::Reg_ax;
1807 Edx = Traits::RegisterSet::Reg_dx; 1818 Edx = Traits::RegisterSet::Reg_dx;
1808 break; 1819 break;
1809 case IceType_i8: 1820 case IceType_i8:
1810 Eax = Traits::RegisterSet::Reg_al; 1821 Eax = Traits::RegisterSet::Reg_al;
1811 Edx = Traits::RegisterSet::Reg_ah; 1822 Edx = Traits::RegisterSet::Reg_ah;
1812 break; 1823 break;
1813 } 1824 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
1851 _add(T, Src0); 1862 _add(T, Src0);
1852 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv))); 1863 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
1853 _sub(T, Src0); 1864 _sub(T, Src0);
1854 _neg(T); 1865 _neg(T);
1855 _mov(Dest, T); 1866 _mov(Dest, T);
1856 return; 1867 return;
1857 } 1868 }
1858 } 1869 }
1859 } 1870 }
1860 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1871 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1861 uint32_t Eax = Traits::RegisterSet::Reg_eax; 1872 uint32_t Eax;
1862 uint32_t Edx = Traits::RegisterSet::Reg_edx; 1873 uint32_t Edx;
1863 switch (Ty) { 1874 switch (Ty) {
1864 default: 1875 default:
1865 llvm_unreachable("Bad type for srem"); 1876 llvm::report_fatal_error("Bad type for srem");
1866 // fallthrough 1877 case IceType_i64:
1878 Eax = Traits::getRaxOrDie();
1879 Edx = Traits::getRdxOrDie();
1880 break;
1867 case IceType_i32: 1881 case IceType_i32:
1882 Eax = Traits::RegisterSet::Reg_eax;
1883 Edx = Traits::RegisterSet::Reg_edx;
1868 break; 1884 break;
1869 case IceType_i16: 1885 case IceType_i16:
1870 Eax = Traits::RegisterSet::Reg_ax; 1886 Eax = Traits::RegisterSet::Reg_ax;
1871 Edx = Traits::RegisterSet::Reg_dx; 1887 Edx = Traits::RegisterSet::Reg_dx;
1872 break; 1888 break;
1873 case IceType_i8: 1889 case IceType_i8:
1874 Eax = Traits::RegisterSet::Reg_al; 1890 Eax = Traits::RegisterSet::Reg_al;
1875 Edx = Traits::RegisterSet::Reg_ah; 1891 Edx = Traits::RegisterSet::Reg_ah;
1876 break; 1892 break;
1877 } 1893 }
(...skipping 945 matching lines...) Expand 10 before | Expand all | Expand 10 after
2823 2839
2824 _movp(Dest, T); 2840 _movp(Dest, T);
2825 eliminateNextVectorSextInstruction(Dest); 2841 eliminateNextVectorSextInstruction(Dest);
2826 } 2842 }
2827 2843
2828 template <typename Machine> 2844 template <typename Machine>
2829 template <typename T> 2845 template <typename T>
2830 typename std::enable_if<!T::Is64Bit, void>::type 2846 typename std::enable_if<!T::Is64Bit, void>::type
2831 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp, 2847 TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Icmp,
2832 const Inst *Consumer) { 2848 const Inst *Consumer) {
2849 assert(!T::Is64Bit);
Jim Stichnoth 2015/12/20 19:27:38 This assert seems completely unnecessary given the
John 2015/12/21 13:41:32 Done.
2833 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2850 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2834 Operand *Src0 = legalize(Icmp->getSrc(0)); 2851 Operand *Src0 = legalize(Icmp->getSrc(0));
2835 Operand *Src1 = legalize(Icmp->getSrc(1)); 2852 Operand *Src1 = legalize(Icmp->getSrc(1));
2836 Variable *Dest = Icmp->getDest(); 2853 Variable *Dest = Icmp->getDest();
2837 InstIcmp::ICond Condition = Icmp->getCondition(); 2854 InstIcmp::ICond Condition = Icmp->getCondition();
2838 size_t Index = static_cast<size_t>(Condition); 2855 size_t Index = static_cast<size_t>(Condition);
2839 assert(Index < Traits::TableIcmp64Size); 2856 assert(Index < Traits::TableIcmp64Size);
2840 Operand *Src0LoRM = nullptr; 2857 Operand *Src0LoRM = nullptr;
2841 Operand *Src0HiRM = nullptr; 2858 Operand *Src0HiRM = nullptr;
2842 // Legalize the portions of Src0 that are going to be needed. 2859 // Legalize the portions of Src0 that are going to be needed.
(...skipping 688 matching lines...) Expand 10 before | Expand all | Expand 10 after
3531 } 3548 }
3532 case Intrinsics::Sqrt: { 3549 case Intrinsics::Sqrt: {
3533 Operand *Src = legalize(Instr->getArg(0)); 3550 Operand *Src = legalize(Instr->getArg(0));
3534 Variable *Dest = Instr->getDest(); 3551 Variable *Dest = Instr->getDest();
3535 Variable *T = makeReg(Dest->getType()); 3552 Variable *T = makeReg(Dest->getType());
3536 _sqrtss(T, Src); 3553 _sqrtss(T, Src);
3537 _mov(Dest, T); 3554 _mov(Dest, T);
3538 return; 3555 return;
3539 } 3556 }
3540 case Intrinsics::Stacksave: { 3557 case Intrinsics::Stacksave: {
3541 Variable *esp = 3558 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
3542 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3543 Variable *Dest = Instr->getDest(); 3559 Variable *Dest = Instr->getDest();
3544 _mov(Dest, esp); 3560 _mov(Dest, esp);
3545 return; 3561 return;
3546 } 3562 }
3547 case Intrinsics::Stackrestore: { 3563 case Intrinsics::Stackrestore: {
3548 Variable *esp = 3564 Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
3549 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
3550 _redefined(_mov(esp, Instr->getArg(0))); 3565 _redefined(_mov(esp, Instr->getArg(0)));
3551 return; 3566 return;
3552 } 3567 }
3553 case Intrinsics::Trap: 3568 case Intrinsics::Trap:
3554 _ud2(); 3569 _ud2();
3555 return; 3570 return;
3556 case Intrinsics::UnknownIntrinsic: 3571 case Intrinsics::UnknownIntrinsic:
3557 Func->setError("Should not be lowering UnknownIntrinsic"); 3572 Func->setError("Should not be lowering UnknownIntrinsic");
3558 return; 3573 return;
3559 } 3574 }
(...skipping 21 matching lines...) Expand all
3581 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3596 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3582 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3597 auto *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3583 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3598 auto *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3584 _mov(DestLo, T_eax); 3599 _mov(DestLo, T_eax);
3585 _mov(DestHi, T_edx); 3600 _mov(DestHi, T_edx);
3586 return; 3601 return;
3587 } 3602 }
3588 int32_t Eax; 3603 int32_t Eax;
3589 switch (Ty) { 3604 switch (Ty) {
3590 default: 3605 default:
3591 llvm_unreachable("Bad type for cmpxchg"); 3606 llvm::report_fatal_error("Bad type for cmpxchg");
3592 // fallthrough 3607 case IceType_i64:
3608 Eax = Traits::getRaxOrDie();
3609 break;
3593 case IceType_i32: 3610 case IceType_i32:
3594 Eax = Traits::RegisterSet::Reg_eax; 3611 Eax = Traits::RegisterSet::Reg_eax;
3595 break; 3612 break;
3596 case IceType_i16: 3613 case IceType_i16:
3597 Eax = Traits::RegisterSet::Reg_ax; 3614 Eax = Traits::RegisterSet::Reg_ax;
3598 break; 3615 break;
3599 case IceType_i8: 3616 case IceType_i8:
3600 Eax = Traits::RegisterSet::Reg_al; 3617 Eax = Traits::RegisterSet::Reg_al;
3601 break; 3618 break;
3602 } 3619 }
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after
3853 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3870 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3854 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3871 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3855 _mov(DestLo, T_eax); 3872 _mov(DestLo, T_eax);
3856 _mov(DestHi, T_edx); 3873 _mov(DestHi, T_edx);
3857 return; 3874 return;
3858 } 3875 }
3859 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty); 3876 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3860 int32_t Eax; 3877 int32_t Eax;
3861 switch (Ty) { 3878 switch (Ty) {
3862 default: 3879 default:
3863 llvm_unreachable("Bad type for atomicRMW"); 3880 llvm::report_fatal_error("Bad type for atomicRMW");
3864 // fallthrough 3881 case IceType_i64:
3882 Eax = Traits::getRaxOrDie();
3883 break;
3865 case IceType_i32: 3884 case IceType_i32:
3866 Eax = Traits::RegisterSet::Reg_eax; 3885 Eax = Traits::RegisterSet::Reg_eax;
3867 break; 3886 break;
3868 case IceType_i16: 3887 case IceType_i16:
3869 Eax = Traits::RegisterSet::Reg_ax; 3888 Eax = Traits::RegisterSet::Reg_ax;
3870 break; 3889 break;
3871 case IceType_i8: 3890 case IceType_i8:
3872 Eax = Traits::RegisterSet::Reg_al; 3891 Eax = Traits::RegisterSet::Reg_al;
3873 break; 3892 break;
3874 } 3893 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
3923 // 3942 //
3924 // Similar for 64-bit, but start w/ speculating that the upper 32 bits 3943 // Similar for 64-bit, but start w/ speculating that the upper 32 bits
3925 // are all zero, and compute the result for that case (checking the lower 3944 // are all zero, and compute the result for that case (checking the lower
3926 // 32 bits). Then actually compute the result for the upper bits and 3945 // 32 bits). Then actually compute the result for the upper bits and
3927 // cmov in the result from the lower computation if the earlier speculation 3946 // cmov in the result from the lower computation if the earlier speculation
3928 // was correct. 3947 // was correct.
3929 // 3948 //
3930 // Cttz, is similar, but uses bsf instead, and doesn't require the xor 3949 // Cttz, is similar, but uses bsf instead, and doesn't require the xor
3931 // bit position conversion, and the speculation is reversed. 3950 // bit position conversion, and the speculation is reversed.
3932 assert(Ty == IceType_i32 || Ty == IceType_i64); 3951 assert(Ty == IceType_i32 || Ty == IceType_i64);
3933 Variable *T = makeReg(IceType_i32); 3952 const Type DestTy = Traits::Is64Bit ? Dest->getType() : IceType_i32;
3953 Variable *T = makeReg(DestTy);
3934 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3954 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3935 if (Cttz) { 3955 if (Cttz) {
3936 _bsf(T, FirstValRM); 3956 _bsf(T, FirstValRM);
3937 } else { 3957 } else {
3938 _bsr(T, FirstValRM); 3958 _bsr(T, FirstValRM);
3939 } 3959 }
3940 Variable *T_Dest = makeReg(IceType_i32); 3960 Variable *T_Dest = makeReg(DestTy);
3941 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 3961 Constant *_31 = Ctx->getConstantInt32(31);
3942 Constant *ThirtyOne = Ctx->getConstantInt32(31); 3962 Constant *_32 = Ctx->getConstantInt(DestTy, 32);
3943 if (Cttz) { 3963 if (Cttz) {
3944 _mov(T_Dest, ThirtyTwo); 3964 _mov(T_Dest, _32);
3945 } else { 3965 } else {
3946 Constant *SixtyThree = Ctx->getConstantInt32(63); 3966 Constant *_63 = Ctx->getConstantInt(DestTy, 63);
3947 _mov(T_Dest, SixtyThree); 3967 _mov(T_Dest, _63);
3948 } 3968 }
3949 _cmov(T_Dest, T, Traits::Cond::Br_ne); 3969 _cmov(T_Dest, T, Traits::Cond::Br_ne);
3950 if (!Cttz) { 3970 if (!Cttz) {
3951 _xor(T_Dest, ThirtyOne); 3971 _xor(T_Dest, _31);
3952 } 3972 }
3953 if (Traits::Is64Bit || Ty == IceType_i32) { 3973 if (Traits::Is64Bit || Ty == IceType_i32) {
3954 _mov(Dest, T_Dest); 3974 _mov(Dest, T_Dest);
3955 return; 3975 return;
3956 } 3976 }
3957 _add(T_Dest, ThirtyTwo); 3977 _add(T_Dest, _32);
3958 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3978 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3959 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3979 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3960 // Will be using "test" on this, so we need a registerized variable. 3980 // Will be using "test" on this, so we need a registerized variable.
3961 Variable *SecondVar = legalizeToReg(SecondVal); 3981 Variable *SecondVar = legalizeToReg(SecondVal);
3962 Variable *T_Dest2 = makeReg(IceType_i32); 3982 Variable *T_Dest2 = makeReg(IceType_i32);
3963 if (Cttz) { 3983 if (Cttz) {
3964 _bsf(T_Dest2, SecondVar); 3984 _bsf(T_Dest2, SecondVar);
3965 } else { 3985 } else {
3966 _bsr(T_Dest2, SecondVar); 3986 _bsr(T_Dest2, SecondVar);
3967 _xor(T_Dest2, ThirtyOne); 3987 _xor(T_Dest2, _31);
3968 } 3988 }
3969 _test(SecondVar, SecondVar); 3989 _test(SecondVar, SecondVar);
3970 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); 3990 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
3971 _mov(DestLo, T_Dest2); 3991 _mov(DestLo, T_Dest2);
3972 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 3992 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3973 } 3993 }
3974 3994
3975 template <class Machine> 3995 template <class Machine>
3976 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base, 3996 void TargetX86Base<Machine>::typedLoad(Type Ty, Variable *Dest, Variable *Base,
3977 Constant *Offset) { 3997 Constant *Offset) {
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
4171 4191
4172 // TODO(ascull): is 64-bit better with vector or scalar movq? 4192 // TODO(ascull): is 64-bit better with vector or scalar movq?
4173 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 4193 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
4174 if (isVectorType(Ty)) { 4194 if (isVectorType(Ty)) {
4175 assert(VecReg != nullptr); 4195 assert(VecReg != nullptr);
4176 _storep(VecReg, Mem); 4196 _storep(VecReg, Mem);
4177 } else if (Ty == IceType_f64) { 4197 } else if (Ty == IceType_f64) {
4178 assert(VecReg != nullptr); 4198 assert(VecReg != nullptr);
4179 _storeq(VecReg, Mem); 4199 _storeq(VecReg, Mem);
4180 } else { 4200 } else {
4201 assert(Ty != IceType_i64);
4181 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); 4202 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem);
4182 } 4203 }
4183 }; 4204 };
4184 4205
4185 // Find the largest type that can be used and use it as much as possible in 4206 // Find the largest type that can be used and use it as much as possible in
4186 // reverse order. Then handle any remainder with overlapping copies. Since 4207 // reverse order. Then handle any remainder with overlapping copies. Since
4187 // the remainder will be at the end, there will be reduces pressure on the 4208 // the remainder will be at the end, there will be reduces pressure on the
4188 // memory unit as the access to the same memory are far apart. 4209 // memory unit as the access to the same memory are far apart.
4189 Type Ty; 4210 Type Ty;
4190 if (ValValue == 0 && CountValue >= BytesPerStoreq && 4211 if (ValValue == 0 && CountValue >= BytesPerStoreq &&
(...skipping 1937 matching lines...) Expand 10 before | Expand all | Expand 10 after
6128 Variable *Reg = Func->makeVariable(Type); 6149 Variable *Reg = Func->makeVariable(Type);
6129 if (RegNum == Variable::NoRegister) 6150 if (RegNum == Variable::NoRegister)
6130 Reg->setMustHaveReg(); 6151 Reg->setMustHaveReg();
6131 else 6152 else
6132 Reg->setRegNum(RegNum); 6153 Reg->setRegNum(RegNum);
6133 return Reg; 6154 return Reg;
6134 } 6155 }
6135 6156
6136 template <class Machine> 6157 template <class Machine>
6137 const Type TargetX86Base<Machine>::TypeForSize[] = { 6158 const Type TargetX86Base<Machine>::TypeForSize[] = {
6138 IceType_i8, IceType_i16, IceType_i32, 6159 IceType_i8, IceType_i16, IceType_i32, IceType_f64, IceType_v16i8};
6139 (Traits::Is64Bit ? IceType_i64 : IceType_f64), IceType_v16i8};
6140 template <class Machine> 6160 template <class Machine>
6141 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size, 6161 Type TargetX86Base<Machine>::largestTypeInSize(uint32_t Size,
6142 uint32_t MaxSize) { 6162 uint32_t MaxSize) {
6143 assert(Size != 0); 6163 assert(Size != 0);
6144 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined); 6164 uint32_t TyIndex = llvm::findLastSet(Size, llvm::ZB_Undefined);
6145 uint32_t MaxIndex = MaxSize == NoSizeLimit 6165 uint32_t MaxIndex = MaxSize == NoSizeLimit
6146 ? llvm::array_lengthof(TypeForSize) - 1 6166 ? llvm::array_lengthof(TypeForSize) - 1
6147 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined); 6167 : llvm::findLastSet(MaxSize, llvm::ZB_Undefined);
6148 return TypeForSize[std::min(TyIndex, MaxIndex)]; 6168 return TypeForSize[std::min(TyIndex, MaxIndex)];
6149 } 6169 }
(...skipping 253 matching lines...) Expand 10 before | Expand all | Expand 10 after
6403 } 6423 }
6404 // the offset is not eligible for blinding or pooling, return the original 6424 // the offset is not eligible for blinding or pooling, return the original
6405 // mem operand 6425 // mem operand
6406 return MemOperand; 6426 return MemOperand;
6407 } 6427 }
6408 6428
6409 } // end of namespace X86Internal 6429 } // end of namespace X86Internal
6410 } // end of namespace Ice 6430 } // end of namespace Ice
6411 6431
6412 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6432 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698