OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 774 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an | 785 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
786 /// I64 arg that has been split into Lo and Hi components, it calls itself | 786 /// I64 arg that has been split into Lo and Hi components, it calls itself |
787 /// recursively on the components, taking care to handle Lo first because of the | 787 /// recursively on the components, taking care to handle Lo first because of the |
788 /// little-endian architecture. Lastly, this function generates an instruction | 788 /// little-endian architecture. Lastly, this function generates an instruction |
789 /// to copy Arg into its assigned register if applicable. | 789 /// to copy Arg into its assigned register if applicable. |
790 template <class Machine> | 790 template <class Machine> |
791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 791 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
792 Variable *FramePtr, | 792 Variable *FramePtr, |
793 size_t BasicFrameOffset, | 793 size_t BasicFrameOffset, |
794 size_t &InArgsSizeBytes) { | 794 size_t &InArgsSizeBytes) { |
795 Variable *Lo = Arg->getLo(); | 795 if (!Traits::Is64Bit) { |
796 Variable *Hi = Arg->getHi(); | 796 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| 797 Variable *Lo = Arg64On32->getLo(); |
| 798 Variable *Hi = Arg64On32->getHi(); |
| 799 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 800 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 801 return; |
| 802 } |
| 803 } |
797 Type Ty = Arg->getType(); | 804 Type Ty = Arg->getType(); |
798 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) { | |
799 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | |
800 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | |
801 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
802 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
803 return; | |
804 } | |
805 if (isVectorType(Ty)) { | 805 if (isVectorType(Ty)) { |
806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 806 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
807 } | 807 } |
808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 808 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 809 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
810 if (Arg->hasReg()) { | 810 if (Arg->hasReg()) { |
811 assert(Ty != IceType_i64 || Traits::Is64Bit); | 811 assert(Ty != IceType_i64 || Traits::Is64Bit); |
812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 812 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 813 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
814 if (isVectorType(Arg->getType())) { | 814 if (isVectorType(Arg->getType())) { |
815 _movp(Arg, Mem); | 815 _movp(Arg, Mem); |
816 } else { | 816 } else { |
817 _mov(Arg, Mem); | 817 _mov(Arg, Mem); |
818 } | 818 } |
819 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 819 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
820 // operand instead of a Variable, so its fill-from-stack operation has to | 820 // operand instead of a Variable, so its fill-from-stack operation has to |
821 // be tracked separately for statistics. | 821 // be tracked separately for statistics. |
822 Ctx->statsUpdateFills(); | 822 Ctx->statsUpdateFills(); |
823 } | 823 } |
824 } | 824 } |
825 | 825 |
826 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 826 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
827 return Traits::WordType; | 827 return Traits::WordType; |
828 } | 828 } |
829 | 829 |
830 template <class Machine> | 830 template <class Machine> |
831 template <typename T> | 831 template <typename T> |
832 typename std::enable_if<!T::Is64Bit, void>::type | |
833 TargetX86Base<Machine>::split64(Variable *Var) { | |
834 switch (Var->getType()) { | |
835 default: | |
836 return; | |
837 case IceType_i64: | |
838 // TODO: Only consider F64 if we need to push each half when passing as an | |
839 // argument to a function call. Note that each half is still typed as I32. | |
840 case IceType_f64: | |
841 break; | |
842 } | |
843 Variable *Lo = Var->getLo(); | |
844 Variable *Hi = Var->getHi(); | |
845 if (Lo) { | |
846 assert(Hi); | |
847 return; | |
848 } | |
849 assert(Hi == nullptr); | |
850 Lo = Func->makeVariable(IceType_i32); | |
851 Hi = Func->makeVariable(IceType_i32); | |
852 if (BuildDefs::dump()) { | |
853 Lo->setName(Func, Var->getName(Func) + "__lo"); | |
854 Hi->setName(Func, Var->getName(Func) + "__hi"); | |
855 } | |
856 Var->setLoHi(Lo, Hi); | |
857 if (Var->getIsArg()) { | |
858 Lo->setIsArg(); | |
859 Hi->setIsArg(); | |
860 } | |
861 } | |
862 | |
863 template <class Machine> | |
864 template <typename T> | |
865 typename std::enable_if<!T::Is64Bit, Operand>::type * | 832 typename std::enable_if<!T::Is64Bit, Operand>::type * |
866 TargetX86Base<Machine>::loOperand(Operand *Operand) { | 833 TargetX86Base<Machine>::loOperand(Operand *Operand) { |
867 assert(Operand->getType() == IceType_i64 || | 834 assert(Operand->getType() == IceType_i64 || |
868 Operand->getType() == IceType_f64); | 835 Operand->getType() == IceType_f64); |
869 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 836 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
870 return Operand; | 837 return Operand; |
871 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 838 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
872 split64(Var); | 839 return Var64On32->getLo(); |
873 return Var->getLo(); | |
874 } | |
875 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 840 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
876 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 841 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
877 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); | 842 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); |
878 // Check if we need to blind/pool the constant. | 843 // Check if we need to blind/pool the constant. |
879 return legalize(ConstInt); | 844 return legalize(ConstInt); |
880 } | 845 } |
881 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 846 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
882 auto *MemOperand = Traits::X86OperandMem::create( | 847 auto *MemOperand = Traits::X86OperandMem::create( |
883 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), | 848 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), |
884 Mem->getShift(), Mem->getSegmentRegister()); | 849 Mem->getShift(), Mem->getSegmentRegister()); |
885 // Test if we should randomize or pool the offset, if so randomize it or | 850 // Test if we should randomize or pool the offset, if so randomize it or |
886 // pool it then create mem operand with the blinded/pooled constant. | 851 // pool it then create mem operand with the blinded/pooled constant. |
887 // Otherwise, return the mem operand as ordinary mem operand. | 852 // Otherwise, return the mem operand as ordinary mem operand. |
888 return legalize(MemOperand); | 853 return legalize(MemOperand); |
889 } | 854 } |
890 llvm_unreachable("Unsupported operand type"); | 855 llvm_unreachable("Unsupported operand type"); |
891 return nullptr; | 856 return nullptr; |
892 } | 857 } |
893 | 858 |
894 template <class Machine> | 859 template <class Machine> |
895 template <typename T> | 860 template <typename T> |
896 typename std::enable_if<!T::Is64Bit, Operand>::type * | 861 typename std::enable_if<!T::Is64Bit, Operand>::type * |
897 TargetX86Base<Machine>::hiOperand(Operand *Operand) { | 862 TargetX86Base<Machine>::hiOperand(Operand *Operand) { |
898 assert(Operand->getType() == IceType_i64 || | 863 assert(Operand->getType() == IceType_i64 || |
899 Operand->getType() == IceType_f64); | 864 Operand->getType() == IceType_f64); |
900 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) | 865 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) |
901 return Operand; | 866 return Operand; |
902 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 867 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) |
903 split64(Var); | 868 return Var64On32->getHi(); |
904 return Var->getHi(); | |
905 } | |
906 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 869 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
907 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( | 870 auto *ConstInt = llvm::dyn_cast<ConstantInteger32>( |
908 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); | 871 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); |
909 // Check if we need to blind/pool the constant. | 872 // Check if we need to blind/pool the constant. |
910 return legalize(ConstInt); | 873 return legalize(ConstInt); |
911 } | 874 } |
912 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { | 875 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) { |
913 Constant *Offset = Mem->getOffset(); | 876 Constant *Offset = Mem->getOffset(); |
914 if (Offset == nullptr) { | 877 if (Offset == nullptr) { |
915 Offset = Ctx->getConstantInt32(4); | 878 Offset = Ctx->getConstantInt32(4); |
(...skipping 1083 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1999 if (isVectorType(Dest->getType())) { | 1962 if (isVectorType(Dest->getType())) { |
2000 assert(Dest->getType() == IceType_v4i32 && | 1963 assert(Dest->getType() == IceType_v4i32 && |
2001 Inst->getSrc(0)->getType() == IceType_v4f32); | 1964 Inst->getSrc(0)->getType() == IceType_v4f32); |
2002 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1965 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2003 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) | 1966 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) |
2004 Src0RM = legalizeToReg(Src0RM); | 1967 Src0RM = legalizeToReg(Src0RM); |
2005 Variable *T = makeReg(Dest->getType()); | 1968 Variable *T = makeReg(Dest->getType()); |
2006 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); | 1969 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); |
2007 _movp(Dest, T); | 1970 _movp(Dest, T); |
2008 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 1971 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { |
2009 // Use a helper for converting floating-point values to 64-bit integers. | |
2010 // SSE2 appears to have no way to convert from xmm registers to something | |
2011 // like the edx:eax register pair, and gcc and clang both want to use x87 | |
2012 // instructions complete with temporary manipulation of the status word. | |
2013 // This helper is not needed for x86-64. | |
2014 split64(Dest); | |
2015 const SizeT MaxSrcs = 1; | 1972 const SizeT MaxSrcs = 1; |
2016 Type SrcType = Inst->getSrc(0)->getType(); | 1973 Type SrcType = Inst->getSrc(0)->getType(); |
2017 InstCall *Call = | 1974 InstCall *Call = |
2018 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 | 1975 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 |
2019 : H_fptosi_f64_i64, | 1976 : H_fptosi_f64_i64, |
2020 Dest, MaxSrcs); | 1977 Dest, MaxSrcs); |
2021 Call->addArg(Inst->getSrc(0)); | 1978 Call->addArg(Inst->getSrc(0)); |
2022 lowerCall(Call); | 1979 lowerCall(Call); |
2023 } else { | 1980 } else { |
2024 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 1981 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
(...skipping 19 matching lines...) Expand all Loading... |
2044 if (isVectorType(Dest->getType())) { | 2001 if (isVectorType(Dest->getType())) { |
2045 assert(Dest->getType() == IceType_v4i32 && | 2002 assert(Dest->getType() == IceType_v4i32 && |
2046 Inst->getSrc(0)->getType() == IceType_v4f32); | 2003 Inst->getSrc(0)->getType() == IceType_v4f32); |
2047 const SizeT MaxSrcs = 1; | 2004 const SizeT MaxSrcs = 1; |
2048 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); | 2005 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs); |
2049 Call->addArg(Inst->getSrc(0)); | 2006 Call->addArg(Inst->getSrc(0)); |
2050 lowerCall(Call); | 2007 lowerCall(Call); |
2051 } else if (Dest->getType() == IceType_i64 || | 2008 } else if (Dest->getType() == IceType_i64 || |
2052 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { | 2009 (!Traits::Is64Bit && Dest->getType() == IceType_i32)) { |
2053 // Use a helper for both x86-32 and x86-64. | 2010 // Use a helper for both x86-32 and x86-64. |
2054 if (!Traits::Is64Bit) | |
2055 split64(Dest); | |
2056 const SizeT MaxSrcs = 1; | 2011 const SizeT MaxSrcs = 1; |
2057 Type DestType = Dest->getType(); | 2012 Type DestType = Dest->getType(); |
2058 Type SrcType = Inst->getSrc(0)->getType(); | 2013 Type SrcType = Inst->getSrc(0)->getType(); |
2059 IceString TargetString; | 2014 IceString TargetString; |
2060 if (Traits::Is64Bit) { | 2015 if (Traits::Is64Bit) { |
2061 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 | 2016 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64 |
2062 : H_fptoui_f64_i64; | 2017 : H_fptoui_f64_i64; |
2063 } else if (isInt32Asserting32Or64(DestType)) { | 2018 } else if (isInt32Asserting32Or64(DestType)) { |
2064 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 | 2019 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32 |
2065 : H_fptoui_f64_i32; | 2020 : H_fptoui_f64_i32; |
(...skipping 828 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2894 } | 2849 } |
2895 case Intrinsics::AtomicLoad: { | 2850 case Intrinsics::AtomicLoad: { |
2896 // We require the memory address to be naturally aligned. Given that is the | 2851 // We require the memory address to be naturally aligned. Given that is the |
2897 // case, then normal loads are atomic. | 2852 // case, then normal loads are atomic. |
2898 if (!Intrinsics::isMemoryOrderValid( | 2853 if (!Intrinsics::isMemoryOrderValid( |
2899 ID, getConstantMemoryOrder(Instr->getArg(1)))) { | 2854 ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
2900 Func->setError("Unexpected memory ordering for AtomicLoad"); | 2855 Func->setError("Unexpected memory ordering for AtomicLoad"); |
2901 return; | 2856 return; |
2902 } | 2857 } |
2903 Variable *Dest = Instr->getDest(); | 2858 Variable *Dest = Instr->getDest(); |
2904 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { | 2859 if (!Traits::Is64Bit) { |
2905 // Follow what GCC does and use a movq instead of what lowerLoad() | 2860 if (auto *Dest64On32 = llvm::dyn_cast<Variable64On32>(Dest)) { |
2906 // normally does (split the load into two). Thus, this skips | 2861 // Follow what GCC does and use a movq instead of what lowerLoad() |
2907 // load/arithmetic op folding. Load/arithmetic folding can't happen | 2862 // normally does (split the load into two). Thus, this skips |
2908 // anyway, since this is x86-32 and integer arithmetic only happens on | 2863 // load/arithmetic op folding. Load/arithmetic folding can't happen |
2909 // 32-bit quantities. | 2864 // anyway, since this is x86-32 and integer arithmetic only happens on |
2910 Variable *T = makeReg(IceType_f64); | 2865 // 32-bit quantities. |
2911 typename Traits::X86OperandMem *Addr = | 2866 Variable *T = makeReg(IceType_f64); |
2912 formMemoryOperand(Instr->getArg(0), IceType_f64); | 2867 typename Traits::X86OperandMem *Addr = |
2913 _movq(T, Addr); | 2868 formMemoryOperand(Instr->getArg(0), IceType_f64); |
2914 // Then cast the bits back out of the XMM register to the i64 Dest. | 2869 _movq(T, Addr); |
2915 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 2870 // Then cast the bits back out of the XMM register to the i64 Dest. |
2916 lowerCast(Cast); | 2871 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
2917 // Make sure that the atomic load isn't elided when unused. | 2872 lowerCast(Cast); |
2918 Context.insert(InstFakeUse::create(Func, Dest->getLo())); | 2873 // Make sure that the atomic load isn't elided when unused. |
2919 Context.insert(InstFakeUse::create(Func, Dest->getHi())); | 2874 Context.insert(InstFakeUse::create(Func, Dest64On32->getLo())); |
2920 return; | 2875 Context.insert(InstFakeUse::create(Func, Dest64On32->getHi())); |
| 2876 return; |
| 2877 } |
2921 } | 2878 } |
2922 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 2879 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
2923 lowerLoad(Load); | 2880 lowerLoad(Load); |
2924 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 2881 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
2925 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 2882 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
2926 // the FakeUse on the last-inserted instruction's dest. | 2883 // the FakeUse on the last-inserted instruction's dest. |
2927 Context.insert( | 2884 Context.insert( |
2928 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 2885 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
2929 return; | 2886 return; |
2930 } | 2887 } |
(...skipping 2463 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5394 } | 5351 } |
5395 // the offset is not eligible for blinding or pooling, return the original | 5352 // the offset is not eligible for blinding or pooling, return the original |
5396 // mem operand | 5353 // mem operand |
5397 return MemOperand; | 5354 return MemOperand; |
5398 } | 5355 } |
5399 | 5356 |
5400 } // end of namespace X86Internal | 5357 } // end of namespace X86Internal |
5401 } // end of namespace Ice | 5358 } // end of namespace Ice |
5402 | 5359 |
5403 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5360 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |