| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 774 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
| 786 /// I64 arg that has been split into Lo and Hi components, it calls itself | 786 /// I64 arg that has been split into Lo and Hi components, it calls itself |
| 787 /// recursively on the components, taking care to handle Lo first because of the | 787 /// recursively on the components, taking care to handle Lo first because of the |
| 788 /// little-endian architecture. Lastly, this function generates an instruction | 788 /// little-endian architecture. Lastly, this function generates an instruction |
| 789 /// to copy Arg into its assigned register if applicable. | 789 /// to copy Arg into its assigned register if applicable. |
| 790 template <class Machine> | 790 template <class Machine> |
| 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 792 Variable *FramePtr, | 792 Variable *FramePtr, |
| 793 size_t BasicFrameOffset, | 793 size_t BasicFrameOffset, |
| 794 size_t &InArgsSizeBytes) { | 794 size_t &InArgsSizeBytes) { |
| 795 Variable *Lo = Arg->getLo(); | 795 if (!Traits::Is64Bit) { |
| 796 Variable *Hi = Arg->getHi(); | 796 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| 797 Variable *Lo = Arg64On32->getLo(); |
| 798 Variable *Hi = Arg64On32->getHi(); |
| 799 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 800 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 801 return; |
| 802 } |
| 803 } |
| 797 Type Ty = Arg->getType(); | 804 Type Ty = Arg->getType(); |
| 798 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) { | |
| 799 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | |
| 800 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | |
| 801 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
| 802 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
| 803 return; | |
| 804 } | |
| 805 if (isVectorType(Ty)) { | 805 if (isVectorType(Ty)) { |
| 806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 807 } | 807 } |
| 808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 810 if (Arg->hasReg()) { | 810 if (Arg->hasReg()) { |
| 811 assert(Ty != IceType_i64 || Traits::Is64Bit); | 811 assert(Ty != IceType_i64 || Traits::Is64Bit); |
| 812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
| 813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 814 if (isVectorType(Arg->getType())) { | 814 if (isVectorType(Arg->getType())) { |
| 815 _movp(Arg, Mem); | 815 _movp(Arg, Mem); |
| 816 } else { | 816 } else { |
| 817 _mov(Arg, Mem); | 817 _mov(Arg, Mem); |
| 818 } | 818 } |
| 819 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 819 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
| 820 // operand instead of a Variable, so its fill-from-stack operation has to | 820 // operand instead of a Variable, so its fill-from-stack operation has to |
| 821 // be tracked separately for statistics. | 821 // be tracked separately for statistics. |
| 822 Ctx->statsUpdateFills(); | 822 Ctx->statsUpdateFills(); |
| 823 } | 823 } |
| 824 } | 824 } |
| 825 | 825 |
| 826 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 826 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 827 return Traits::WordType; | 827 return Traits::WordType; |
| 828 } | 828 } |
| 829 | 829 |
| 830 template <class Machine> | 830 template <class Machine> |
| 831 template <typename T> | 831 template <typename T> |
| 832 typename std::enable_if<!T::Is64Bit, void>::type | |
| 833 TargetX86Base<Machine>::split64(Variable *Var) { | |
| 834 switch (Var->getType()) { | |
| 835 default: | |
| 836 return; | |
| 837 case IceType_i64: | |
| 838 // TODO: Only consider F64 if we need to push each half when passing as an | |
| 839 // argument to a function call. Note that each half is still typed as I32. | |
| 840 case IceType_f64: | |
| 841 break; | |
| 842 } | |
| 843 Variable *Lo = Var->getLo(); | |
| 844 Variable *Hi = Var->getHi(); | |
| 845 if (Lo) { | |
| 846 assert(Hi); | |
| 847 return; | |
| 848 } | |
| 849 assert(Hi == nullptr); | |
| 850 Lo = Func->makeVariable(IceType_i32); | |
| 851 Hi = Func->makeVariable(IceType_i32); | |
| 852 if (BuildDefs::dump()) { | |
| 853 Lo->setName(Func, Var->getName(Func) + "__lo"); | |
| 854 Hi->setName(Func, Var->getName(Func) + "__hi"); | |
| 855 } | |
| 856 Var->setLoHi(Lo, Hi); | |
| 857 if (Var->getIsArg()) { | |
| 858 Lo->setIsArg(); | |
| 859 Hi->setIsArg(); | |
| 860 } | |
| 861 } | |
| 862 | |
| 863 template <class Machine> | |
| 864 template <typename T> | |
| 865 typename std::enable_if<!T::Is64Bit, Operand>::type * | 832 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 866 TargetX86Base<Machine>::loOperand(Operand *Operand) { | 833 TargetX86Base<Machine>::loOperand(Operand *Operand) { |
| 867 assert(Operand->getType() == IceType_i64 || | 834 assert(Operand->getType() == IceType_i64 || |
| 868 Operand->getType() == IceType_f64); | 835 Operand->getType() == IceType_f64); |
| 869 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 836 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 870 return Operand; | 837 return Operand; |
| 871 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 838 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
| 872 split64(Var); | 839 return Var64On32->getLo(); |
| 873 return Var->getLo(); | |
| 874 } | |
| 875 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 840 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 876 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 841 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 877 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 842 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
| 878 // Check if we need to blind/pool the constant. | 843 // Check if we need to blind/pool the constant. |
| 879 return legalize(ConstInt); | 844 return legalize(ConstInt); |
| 880 } | 845 } |
| 881 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 846 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
| 882 auto *MemOperand = Traits::X86OperandMem::create( | 847 auto *MemOperand = Traits::X86OperandMem::create( |
| 883 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 848 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
| 884 Mem->getShift(), Mem->getSegmentRegister()); | 849 Mem->getShift(), Mem->getSegmentRegister()); |
| 885 // Test if we should randomize or pool the offset, if so randomize it or | 850 // Test if we should randomize or pool the offset, if so randomize it or |
| 886 // pool it then create mem operand with the blinded/pooled constant. | 851 // pool it then create mem operand with the blinded/pooled constant. |
| 887 // Otherwise, return the mem operand as ordinary mem operand. | 852 // Otherwise, return the mem operand as ordinary mem operand. |
| 888 return legalize(MemOperand); | 853 return legalize(MemOperand); |
| 889 } | 854 } |
| 890 llvm_unreachable("Unsupported operand type"); | 855 llvm_unreachable("Unsupported operand type"); |
| 891 return nullptr; | 856 return nullptr; |
| 892 } | 857 } |
| 893 | 858 |
| 894 template <class Machine> | 859 template <class Machine> |
| 895 template <typename T> | 860 template <typename T> |
| 896 typename std::enable_if<!T::Is64Bit, Operand>::type * | 861 typename std::enable_if<!T::Is64Bit, Operand>::type * |
| 897 TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 862 TargetX86Base<Machine>::hiOperand(Operand *Operand) { |
| 898 assert(Operand->getType() == IceType_i64 || | 863 assert(Operand->getType() == IceType_i64 || |
| 899 Operand->getType() == IceType_f64); | 864 Operand->getType() == IceType_f64); |
| 900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 865 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
| 901 return Operand; | 866 return Operand; |
| 902 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 867 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
| 903 split64(Var); | 868 return Var64On32->getHi(); |
| 904 return Var->getHi(); | |
| 905 } | |
| 906 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 869 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
| 907 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 870 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
| 908 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); | 871 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); |
| 909 // Check if we need to blind/pool the constant. | 872 // Check if we need to blind/pool the constant. |
| 910 return legalize(ConstInt); | 873 return legalize(ConstInt); |
| 911 } | 874 } |
| 912 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 875 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
| 913 Constant *Offset = Mem->getOffset(); | 876 Constant *Offset = Mem->getOffset(); |
| 914 if (Offset == nullptr) { | 877 if (Offset == nullptr) { |
| 915 Offset = Ctx->getConstantInt32(4); | 878 Offset = Ctx->getConstantInt32(4); |
| (...skipping 1083 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1999 if (isVectorType(Dest->getType())) { | 1962 if (isVectorType(Dest->getType())) { |
| 2000 assert(Dest->getType() == IceType_v4i32 && | 1963 assert(Dest->getType() == IceType_v4i32 && |
| 2001 Inst->getSrc(0)->getType() == IceType_v4f32); | 1964 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2002 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1965 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| 2003 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 1966 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
| 2004 Src0RM = legalizeToReg(Src0RM); | 1967 Src0RM = legalizeToReg(Src0RM); |
| 2005 Variable *T = makeReg(Dest->getType()); | 1968 Variable *T = makeReg(Dest->getType()); |
| 2006 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 1969 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
| 2007 _movp(Dest, T); | 1970 _movp(Dest, T); |
| 2008 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1971 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
| 2009 // Use a helper for converting floating-point values to 64-bit integers. | |
| 2010 // SSE2 appears to have no way to convert from xmm registers to something | |
| 2011 // like the edx:eax register pair, and gcc and clang both want to use x87 | |
| 2012 // instructions complete with temporary manipulation of the status word. | |
| 2013 // This helper is not needed for x86-64. | |
| 2014 split64(Dest); | |
| 2015 const SizeT MaxSrcs = 1; | 1972 const SizeT MaxSrcs = 1; |
| 2016 Type SrcType = Inst->getSrc(0)->getType(); | 1973 Type SrcType = Inst->getSrc(0)->getType(); |
| 2017 InstCall *Call = | 1974 InstCall *Call = |
| 2018 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 1975 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
| 2019 : H_fptosi_f64_i64, | 1976 : H_fptosi_f64_i64, |
| 2020 Dest, MaxSrcs); | 1977 Dest, MaxSrcs); |
| 2021 Call->addArg(Inst->getSrc(0)); | 1978 Call->addArg(Inst->getSrc(0)); |
| 2022 lowerCall(Call); | 1979 lowerCall(Call); |
| 2023 } else { | 1980 } else { |
| 2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1981 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 2044 if (isVectorType(Dest->getType())) { | 2001 if (isVectorType(Dest->getType())) { |
| 2045 assert(Dest->getType() == IceType_v4i32 && | 2002 assert(Dest->getType() == IceType_v4i32 && |
| 2046 Inst->getSrc(0)->getType() == IceType_v4f32); | 2003 Inst->getSrc(0)->getType() == IceType_v4f32); |
| 2047 const SizeT MaxSrcs = 1; | 2004 const SizeT MaxSrcs = 1; |
| 2048 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2005 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
| 2049 Call->addArg(Inst->getSrc(0)); | 2006 Call->addArg(Inst->getSrc(0)); |
| 2050 lowerCall(Call); | 2007 lowerCall(Call); |
| 2051 } else if (Dest->getType() == IceType_i64 || | 2008 } else if (Dest->getType() == IceType_i64 || |
| 2052 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2009 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
| 2053 // Use a helper for both x86-32 and x86-64. | 2010 // Use a helper for both x86-32 and x86-64. |
| 2054 if (!Traits::Is64Bit) | |
| 2055 split64(Dest); | |
| 2056 const SizeT MaxSrcs = 1; | 2011 const SizeT MaxSrcs = 1; |
| 2057 Type DestType = Dest->getType(); | 2012 Type DestType = Dest->getType(); |
| 2058 Type SrcType = Inst->getSrc(0)->getType(); | 2013 Type SrcType = Inst->getSrc(0)->getType(); |
| 2059 IceString TargetString; | 2014 IceString TargetString; |
| 2060 if (Traits::Is64Bit) { | 2015 if (Traits::Is64Bit) { |
| 2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2016 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
| 2062 : H_fptoui_f64_i64; | 2017 : H_fptoui_f64_i64; |
| 2063 } else if (isInt32Asserting32Or64(DestType)) { | 2018 } else if (isInt32Asserting32Or64(DestType)) { |
| 2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2019 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
| 2065 : H_fptoui_f64_i32; | 2020 : H_fptoui_f64_i32; |
| (...skipping 828 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2894 } | 2849 } |
| 2895 case Intrinsics::AtomicLoad: { | 2850 case Intrinsics::AtomicLoad: { |
| 2896 // We require the memory address to be naturally aligned. Given that is the | 2851 // We require the memory address to be naturally aligned. Given that is the |
| 2897 // case, then normal loads are atomic. | 2852 // case, then normal loads are atomic. |
| 2898 if (!Intrinsics::isMemoryOrderValid( | 2853 if (!Intrinsics::isMemoryOrderValid( |
| 2899 ID, getConstantMemoryOrder(Instr->getArg(1)))) { | 2854 ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
| 2900 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2855 Func->setError("Unexpected memory ordering for AtomicLoad"); |
| 2901 return; | 2856 return; |
| 2902 } | 2857 } |
| 2903 Variable *Dest = Instr->getDest(); | 2858 Variable *Dest = Instr->getDest(); |
| 2904 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2859 if (!Traits::Is64Bit) { |
| 2905 // Follow what GCC does and use a movq instead of what lowerLoad() | 2860 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { |
| 2906 // normally does (split the load into two). Thus, this skips | 2861 // Follow what GCC does and use a movq instead of what lowerLoad() |
| 2907 // load/arithmetic op folding. Load/arithmetic folding can't happen | 2862 // normally does (split the load into two). Thus, this skips |
| 2908 // anyway, since this is x86-32 and integer arithmetic only happens on | 2863 // load/arithmetic op folding. Load/arithmetic folding can't happen |
| 2909 // 32-bit quantities. | 2864 // anyway, since this is x86-32 and integer arithmetic only happens on |
| 2910 Variable *T = makeReg(IceType_f64); | 2865 // 32-bit quantities. |
| 2911 typename Traits::X86OperandMem *Addr = | 2866 Variable *T = makeReg(IceType_f64); |
| 2912 formMemoryOperand(Instr->getArg(0), IceType_f64); | 2867 typename Traits::X86OperandMem *Addr = |
| 2913 _movq(T, Addr); | 2868 formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 2914 // Then cast the bits back out of the XMM register to the i64 Dest. | 2869 _movq(T, Addr); |
| 2915 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 2870 // Then cast the bits back out of the XMM register to the i64 Dest. |
| 2916 lowerCast(Cast); | 2871 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
| 2917 // Make sure that the atomic load isn't elided when unused. | 2872 lowerCast(Cast); |
| 2918 Context.insert(InstFakeUse::create(Func, Dest->getLo())); | 2873 // Make sure that the atomic load isn't elided when unused. |
| 2919 Context.insert(InstFakeUse::create(Func, Dest->getHi())); | 2874 Context.insert(InstFakeUse::create(Func, Dest64On32->getLo())); |
| 2920 return; | 2875 Context.insert(InstFakeUse::create(Func, Dest64On32->getHi())); |
| 2876 return; |
| 2877 } |
| 2921 } | 2878 } |
| 2922 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 2879 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
| 2923 lowerLoad(Load); | 2880 lowerLoad(Load); |
| 2924 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 2881 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| 2925 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 2882 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
| 2926 // the FakeUse on the last-inserted instruction's dest. | 2883 // the FakeUse on the last-inserted instruction's dest. |
| 2927 Context.insert( | 2884 Context.insert( |
| 2928 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 2885 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 2929 return; | 2886 return; |
| 2930 } | 2887 } |
| (...skipping 2463 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5394 } | 5351 } |
| 5395 // the offset is not eligible for blinding or pooling, return the original | 5352 // the offset is not eligible for blinding or pooling, return the original |
| 5396 // mem operand | 5353 // mem operand |
| 5397 return MemOperand; | 5354 return MemOperand; |
| 5398 } | 5355 } |
| 5399 | 5356 |
| 5400 } // end of namespace X86Internal | 5357 } // end of namespace X86Internal |
| 5401 } // end of namespace Ice | 5358 } // end of namespace Ice |
| 5402 | 5359 |
| 5403 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5360 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |