| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 775 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 786 int32_t BaseRegNum = Var->getBaseRegNum(); | 786 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 787 if (Var->getBaseRegNum() == Variable::NoRegister) { | 787 if (Var->getBaseRegNum() == Variable::NoRegister) { |
| 788 BaseRegNum = getFrameOrStackReg(); | 788 BaseRegNum = getFrameOrStackReg(); |
| 789 if (!hasFramePointer()) | 789 if (!hasFramePointer()) |
| 790 Offset += getStackAdjustment(); | 790 Offset += getStackAdjustment(); |
| 791 } | 791 } |
| 792 return typename Traits::Address( | 792 return typename Traits::Address( |
| 793 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); | 793 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); |
| 794 } | 794 } |
| 795 | 795 |
| 796 template <class Machine> void TargetX86Base<Machine>::lowerArguments() { | |
| 797 VarList &Args = Func->getArgs(); | |
| 798 // The first four arguments of vector type, regardless of their | |
| 799 // position relative to the other arguments in the argument list, are | |
| 800 // passed in registers xmm0 - xmm3. | |
| 801 unsigned NumXmmArgs = 0; | |
| 802 | |
| 803 Context.init(Func->getEntryNode()); | |
| 804 Context.setInsertPoint(Context.getCur()); | |
| 805 | |
| 806 for (SizeT I = 0, E = Args.size(); | |
| 807 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | |
| 808 Variable *Arg = Args[I]; | |
| 809 Type Ty = Arg->getType(); | |
| 810 if (!isVectorType(Ty)) | |
| 811 continue; | |
| 812 // Replace Arg in the argument list with the home register. Then | |
| 813 // generate an instruction in the prolog to copy the home register | |
| 814 // to the assigned location of Arg. | |
| 815 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; | |
| 816 ++NumXmmArgs; | |
| 817 Variable *RegisterArg = Func->makeVariable(Ty); | |
| 818 if (BuildDefs::dump()) | |
| 819 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
| 820 RegisterArg->setRegNum(RegNum); | |
| 821 RegisterArg->setIsArg(); | |
| 822 Arg->setIsArg(false); | |
| 823 | |
| 824 Args[I] = RegisterArg; | |
| 825 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | |
| 826 } | |
| 827 } | |
| 828 | |
| 829 /// Helper function for addProlog(). | 796 /// Helper function for addProlog(). |
| 830 /// | 797 /// |
| 831 /// This assumes Arg is an argument passed on the stack. This sets the | 798 /// This assumes Arg is an argument passed on the stack. This sets the |
| 832 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's | 799 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 833 /// width. For an I64 arg that has been split into Lo and Hi components, | 800 /// width. For an I64 arg that has been split into Lo and Hi components, |
| 834 /// it calls itself recursively on the components, taking care to handle | 801 /// it calls itself recursively on the components, taking care to handle |
| 835 /// Lo first because of the little-endian architecture. Lastly, this | 802 /// Lo first because of the little-endian architecture. Lastly, this |
| 836 /// function generates an instruction to copy Arg into its assigned | 803 /// function generates an instruction to copy Arg into its assigned |
| 837 /// register if applicable. | 804 /// register if applicable. |
| 838 template <class Machine> | 805 template <class Machine> |
| 839 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, | 806 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| 840 Variable *FramePtr, | 807 Variable *FramePtr, |
| 841 size_t BasicFrameOffset, | 808 size_t BasicFrameOffset, |
| 842 size_t &InArgsSizeBytes) { | 809 size_t &InArgsSizeBytes) { |
| 843 Variable *Lo = Arg->getLo(); | 810 Variable *Lo = Arg->getLo(); |
| 844 Variable *Hi = Arg->getHi(); | 811 Variable *Hi = Arg->getHi(); |
| 845 Type Ty = Arg->getType(); | 812 Type Ty = Arg->getType(); |
| 846 if (Lo && Hi && Ty == IceType_i64) { | 813 if (Lo && Hi && Ty == IceType_i64) { |
| 814 // TODO(jpp): This special case is not needed for x86-64. |
| 847 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 815 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 848 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 816 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 849 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 817 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 850 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 818 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 851 return; | 819 return; |
| 852 } | 820 } |
| 853 if (isVectorType(Ty)) { | 821 if (isVectorType(Ty)) { |
| 854 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); | 822 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); |
| 855 } | 823 } |
| 856 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 824 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 857 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 825 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 858 if (Arg->hasReg()) { | 826 if (Arg->hasReg()) { |
| 859 assert(Ty != IceType_i64); | 827 assert(Ty != IceType_i64); |
| 860 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( | 828 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( |
| 861 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); | 829 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
| 862 if (isVectorType(Arg->getType())) { | 830 if (isVectorType(Arg->getType())) { |
| 863 _movp(Arg, Mem); | 831 _movp(Arg, Mem); |
| 864 } else { | 832 } else { |
| 865 _mov(Arg, Mem); | 833 _mov(Arg, Mem); |
| 866 } | 834 } |
| 867 // This argument-copying instruction uses an explicit Traits::X86OperandMem | 835 // This argument-copying instruction uses an explicit Traits::X86OperandMem |
| 868 // operand instead of a Variable, so its fill-from-stack operation has to be | 836 // operand instead of a Variable, so its fill-from-stack operation has to be |
| 869 // tracked separately for statistics. | 837 // tracked separately for statistics. |
| 870 Ctx->statsUpdateFills(); | 838 Ctx->statsUpdateFills(); |
| 871 } | 839 } |
| 872 } | 840 } |
| 873 | 841 |
| 874 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { | 842 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { |
| 843 // TODO(jpp): this is wrong for x86-64. |
| 875 return IceType_i32; | 844 return IceType_i32; |
| 876 } | 845 } |
| 877 | 846 |
| 878 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { | |
| 879 // Stack frame layout: | |
| 880 // | |
| 881 // +------------------------+ | |
| 882 // | 1. return address | | |
| 883 // +------------------------+ | |
| 884 // | 2. preserved registers | | |
| 885 // +------------------------+ | |
| 886 // | 3. padding | | |
| 887 // +------------------------+ | |
| 888 // | 4. global spill area | | |
| 889 // +------------------------+ | |
| 890 // | 5. padding | | |
| 891 // +------------------------+ | |
| 892 // | 6. local spill area | | |
| 893 // +------------------------+ | |
| 894 // | 7. padding | | |
| 895 // +------------------------+ | |
| 896 // | 8. allocas | | |
| 897 // +------------------------+ | |
| 898 // | |
| 899 // The following variables record the size in bytes of the given areas: | |
| 900 // * X86_RET_IP_SIZE_BYTES: area 1 | |
| 901 // * PreservedRegsSizeBytes: area 2 | |
| 902 // * SpillAreaPaddingBytes: area 3 | |
| 903 // * GlobalsSize: area 4 | |
| 904 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | |
| 905 // * LocalsSpillAreaSize: area 6 | |
| 906 // * SpillAreaSizeBytes: areas 3 - 7 | |
| 907 | |
| 908 // Determine stack frame offsets for each Variable without a | |
| 909 // register assignment. This can be done as one variable per stack | |
| 910 // slot. Or, do coalescing by running the register allocator again | |
| 911 // with an infinite set of registers (as a side effect, this gives | |
| 912 // variables a second chance at physical register assignment). | |
| 913 // | |
| 914 // A middle ground approach is to leverage sparsity and allocate one | |
| 915 // block of space on the frame for globals (variables with | |
| 916 // multi-block lifetime), and one block to share for locals | |
| 917 // (single-block lifetime). | |
| 918 | |
| 919 Context.init(Node); | |
| 920 Context.setInsertPoint(Context.getCur()); | |
| 921 | |
| 922 llvm::SmallBitVector CalleeSaves = | |
| 923 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
| 924 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | |
| 925 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; | |
| 926 size_t GlobalsSize = 0; | |
| 927 // If there is a separate locals area, this represents that area. | |
| 928 // Otherwise it counts any variable not counted by GlobalsSize. | |
| 929 SpillAreaSizeBytes = 0; | |
| 930 // If there is a separate locals area, this specifies the alignment | |
| 931 // for it. | |
| 932 uint32_t LocalsSlotsAlignmentBytes = 0; | |
| 933 // The entire spill locations area gets aligned to largest natural | |
| 934 // alignment of the variables that have a spill slot. | |
| 935 uint32_t SpillAreaAlignmentBytes = 0; | |
| 936 // A spill slot linked to a variable with a stack slot should reuse | |
| 937 // that stack slot. | |
| 938 std::function<bool(Variable *)> TargetVarHook = | |
| 939 [&VariablesLinkedToSpillSlots](Variable *Var) { | |
| 940 if (auto *SpillVar = | |
| 941 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) { | |
| 942 assert(Var->getWeight().isZero()); | |
| 943 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { | |
| 944 VariablesLinkedToSpillSlots.push_back(Var); | |
| 945 return true; | |
| 946 } | |
| 947 } | |
| 948 return false; | |
| 949 }; | |
| 950 | |
| 951 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | |
| 952 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | |
| 953 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | |
| 954 &LocalsSlotsAlignmentBytes, TargetVarHook); | |
| 955 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | |
| 956 SpillAreaSizeBytes += GlobalsSize; | |
| 957 | |
| 958 // Add push instructions for preserved registers. | |
| 959 uint32_t NumCallee = 0; | |
| 960 size_t PreservedRegsSizeBytes = 0; | |
| 961 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 962 if (CalleeSaves[i] && RegsUsed[i]) { | |
| 963 ++NumCallee; | |
| 964 PreservedRegsSizeBytes += 4; | |
| 965 _push(getPhysicalRegister(i)); | |
| 966 } | |
| 967 } | |
| 968 Ctx->statsUpdateRegistersSaved(NumCallee); | |
| 969 | |
| 970 // Generate "push ebp; mov ebp, esp" | |
| 971 if (IsEbpBasedFrame) { | |
| 972 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | |
| 973 .count() == 0); | |
| 974 PreservedRegsSizeBytes += 4; | |
| 975 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); | |
| 976 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 977 _push(ebp); | |
| 978 _mov(ebp, esp); | |
| 979 // Keep ebp live for late-stage liveness analysis | |
| 980 // (e.g. asm-verbose mode). | |
| 981 Context.insert(InstFakeUse::create(Func, ebp)); | |
| 982 } | |
| 983 | |
| 984 // Align the variables area. SpillAreaPaddingBytes is the size of | |
| 985 // the region after the preserved registers and before the spill areas. | |
| 986 // LocalsSlotsPaddingBytes is the amount of padding between the globals | |
| 987 // and locals area if they are separate. | |
| 988 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); | |
| 989 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | |
| 990 uint32_t SpillAreaPaddingBytes = 0; | |
| 991 uint32_t LocalsSlotsPaddingBytes = 0; | |
| 992 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, | |
| 993 SpillAreaAlignmentBytes, GlobalsSize, | |
| 994 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, | |
| 995 &LocalsSlotsPaddingBytes); | |
| 996 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | |
| 997 uint32_t GlobalsAndSubsequentPaddingSize = | |
| 998 GlobalsSize + LocalsSlotsPaddingBytes; | |
| 999 | |
| 1000 // Align esp if necessary. | |
| 1001 if (NeedsStackAlignment) { | |
| 1002 uint32_t StackOffset = | |
| 1003 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | |
| 1004 uint32_t StackSize = | |
| 1005 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | |
| 1006 SpillAreaSizeBytes = StackSize - StackOffset; | |
| 1007 } | |
| 1008 | |
| 1009 // Generate "sub esp, SpillAreaSizeBytes" | |
| 1010 if (SpillAreaSizeBytes) | |
| 1011 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | |
| 1012 Ctx->getConstantInt32(SpillAreaSizeBytes)); | |
| 1013 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | |
| 1014 | |
| 1015 resetStackAdjustment(); | |
| 1016 | |
| 1017 // Fill in stack offsets for stack args, and copy args into registers | |
| 1018 // for those that were register-allocated. Args are pushed right to | |
| 1019 // left, so Arg[0] is closest to the stack/frame pointer. | |
| 1020 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | |
| 1021 size_t BasicFrameOffset = | |
| 1022 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | |
| 1023 if (!IsEbpBasedFrame) | |
| 1024 BasicFrameOffset += SpillAreaSizeBytes; | |
| 1025 | |
| 1026 const VarList &Args = Func->getArgs(); | |
| 1027 size_t InArgsSizeBytes = 0; | |
| 1028 unsigned NumXmmArgs = 0; | |
| 1029 for (Variable *Arg : Args) { | |
| 1030 // Skip arguments passed in registers. | |
| 1031 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { | |
| 1032 ++NumXmmArgs; | |
| 1033 continue; | |
| 1034 } | |
| 1035 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
| 1036 } | |
| 1037 | |
| 1038 // Fill in stack offsets for locals. | |
| 1039 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | |
| 1040 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | |
| 1041 IsEbpBasedFrame); | |
| 1042 // Assign stack offsets to variables that have been linked to spilled | |
| 1043 // variables. | |
| 1044 for (Variable *Var : VariablesLinkedToSpillSlots) { | |
| 1045 Variable *Linked = | |
| 1046 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); | |
| 1047 Var->setStackOffset(Linked->getStackOffset()); | |
| 1048 } | |
| 1049 this->HasComputedFrame = true; | |
| 1050 | |
| 1051 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | |
| 1052 OstreamLocker L(Func->getContext()); | |
| 1053 Ostream &Str = Func->getContext()->getStrDump(); | |
| 1054 | |
| 1055 Str << "Stack layout:\n"; | |
| 1056 uint32_t EspAdjustmentPaddingSize = | |
| 1057 SpillAreaSizeBytes - LocalsSpillAreaSize - | |
| 1058 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | |
| 1059 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | |
| 1060 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" | |
| 1061 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | |
| 1062 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | |
| 1063 << " globals spill area = " << GlobalsSize << " bytes\n" | |
| 1064 << " globals-locals spill areas intermediate padding = " | |
| 1065 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | |
| 1066 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | |
| 1067 << " esp alignment padding = " << EspAdjustmentPaddingSize | |
| 1068 << " bytes\n"; | |
| 1069 | |
| 1070 Str << "Stack details:\n" | |
| 1071 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | |
| 1072 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | |
| 1073 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | |
| 1074 << " bytes\n" | |
| 1075 << " is ebp based = " << IsEbpBasedFrame << "\n"; | |
| 1076 } | |
| 1077 } | |
| 1078 | |
| 1079 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { | |
| 1080 InstList &Insts = Node->getInsts(); | |
| 1081 InstList::reverse_iterator RI, E; | |
| 1082 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | |
| 1083 if (llvm::isa<typename Traits::Insts::Ret>(*RI)) | |
| 1084 break; | |
| 1085 } | |
| 1086 if (RI == E) | |
| 1087 return; | |
| 1088 | |
| 1089 // Convert the reverse_iterator position into its corresponding | |
| 1090 // (forward) iterator position. | |
| 1091 InstList::iterator InsertPoint = RI.base(); | |
| 1092 --InsertPoint; | |
| 1093 Context.init(Node); | |
| 1094 Context.setInsertPoint(InsertPoint); | |
| 1095 | |
| 1096 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 1097 if (IsEbpBasedFrame) { | |
| 1098 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); | |
| 1099 // For late-stage liveness analysis (e.g. asm-verbose mode), | |
| 1100 // adding a fake use of esp before the assignment of esp=ebp keeps | |
| 1101 // previous esp adjustments from being dead-code eliminated. | |
| 1102 Context.insert(InstFakeUse::create(Func, esp)); | |
| 1103 _mov(esp, ebp); | |
| 1104 _pop(ebp); | |
| 1105 } else { | |
| 1106 // add esp, SpillAreaSizeBytes | |
| 1107 if (SpillAreaSizeBytes) | |
| 1108 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); | |
| 1109 } | |
| 1110 | |
| 1111 // Add pop instructions for preserved registers. | |
| 1112 llvm::SmallBitVector CalleeSaves = | |
| 1113 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
| 1114 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 1115 SizeT j = CalleeSaves.size() - i - 1; | |
| 1116 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame) | |
| 1117 continue; | |
| 1118 if (CalleeSaves[j] && RegsUsed[j]) { | |
| 1119 _pop(getPhysicalRegister(j)); | |
| 1120 } | |
| 1121 } | |
| 1122 | |
| 1123 if (!Ctx->getFlags().getUseSandboxing()) | |
| 1124 return; | |
| 1125 // Change the original ret instruction into a sandboxed return sequence. | |
| 1126 // t:ecx = pop | |
| 1127 // bundle_lock | |
| 1128 // and t, ~31 | |
| 1129 // jmp *t | |
| 1130 // bundle_unlock | |
| 1131 // FakeUse <original_ret_operand> | |
| 1132 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | |
| 1133 _pop(T_ecx); | |
| 1134 lowerIndirectJump(T_ecx); | |
| 1135 if (RI->getSrcSize()) { | |
| 1136 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | |
| 1137 Context.insert(InstFakeUse::create(Func, RetValue)); | |
| 1138 } | |
| 1139 RI->setDeleted(); | |
| 1140 } | |
| 1141 | |
| 1142 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { | 847 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { |
| 1143 switch (Var->getType()) { | 848 switch (Var->getType()) { |
| 1144 default: | 849 default: |
| 1145 return; | 850 return; |
| 1146 case IceType_i64: | 851 case IceType_i64: |
| 1147 // TODO: Only consider F64 if we need to push each half when | 852 // TODO: Only consider F64 if we need to push each half when |
| 1148 // passing as an argument to a function call. Note that each half | 853 // passing as an argument to a function call. Note that each half |
| 1149 // is still typed as I32. | 854 // is still typed as I32. |
| 1150 case IceType_f64: | 855 case IceType_f64: |
| 1151 break; | 856 break; |
| (...skipping 3073 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4225 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); | 3930 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); |
| 4226 } | 3931 } |
| 4227 } | 3932 } |
| 4228 | 3933 |
| 4229 template <class Machine> | 3934 template <class Machine> |
| 4230 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { | 3935 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { |
| 4231 Func->setError("Phi found in regular instruction list"); | 3936 Func->setError("Phi found in regular instruction list"); |
| 4232 } | 3937 } |
| 4233 | 3938 |
| 4234 template <class Machine> | 3939 template <class Machine> |
| 4235 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) { | |
| 4236 Variable *Reg = nullptr; | |
| 4237 if (Inst->hasRetValue()) { | |
| 4238 Operand *Src0 = legalize(Inst->getRetValue()); | |
| 4239 if (Src0->getType() == IceType_i64) { | |
| 4240 Variable *eax = | |
| 4241 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); | |
| 4242 Variable *edx = | |
| 4243 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); | |
| 4244 Reg = eax; | |
| 4245 Context.insert(InstFakeUse::create(Func, edx)); | |
| 4246 } else if (isScalarFloatingType(Src0->getType())) { | |
| 4247 _fld(Src0); | |
| 4248 } else if (isVectorType(Src0->getType())) { | |
| 4249 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
| 4250 } else { | |
| 4251 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | |
| 4252 } | |
| 4253 } | |
| 4254 // Add a ret instruction even if sandboxing is enabled, because | |
| 4255 // addEpilog explicitly looks for a ret instruction as a marker for | |
| 4256 // where to insert the frame removal instructions. | |
| 4257 _ret(Reg); | |
| 4258 // Add a fake use of esp to make sure esp stays alive for the entire | |
| 4259 // function. Otherwise post-call esp adjustments get dead-code | |
| 4260 // eliminated. TODO: Are there more places where the fake use | |
| 4261 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | |
| 4262 // have a ret instruction. | |
| 4263 Variable *esp = | |
| 4264 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 4265 Context.insert(InstFakeUse::create(Func, esp)); | |
| 4266 } | |
| 4267 | |
| 4268 template <class Machine> | |
| 4269 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { | 3940 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { |
| 4270 Variable *Dest = Inst->getDest(); | 3941 Variable *Dest = Inst->getDest(); |
| 4271 Type DestTy = Dest->getType(); | 3942 Type DestTy = Dest->getType(); |
| 4272 Operand *SrcT = Inst->getTrueOperand(); | 3943 Operand *SrcT = Inst->getTrueOperand(); |
| 4273 Operand *SrcF = Inst->getFalseOperand(); | 3944 Operand *SrcF = Inst->getFalseOperand(); |
| 4274 Operand *Condition = Inst->getCondition(); | 3945 Operand *Condition = Inst->getCondition(); |
| 4275 | 3946 |
| 4276 if (isVectorType(DestTy)) { | 3947 if (isVectorType(DestTy)) { |
| 4277 Type SrcTy = SrcT->getType(); | 3948 Type SrcTy = SrcT->getType(); |
| 4278 Variable *T = makeReg(SrcTy); | 3949 Variable *T = makeReg(SrcTy); |
| (...skipping 1123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5402 } | 5073 } |
| 5403 // the offset is not eligible for blinding or pooling, return the original | 5074 // the offset is not eligible for blinding or pooling, return the original |
| 5404 // mem operand | 5075 // mem operand |
| 5405 return MemOperand; | 5076 return MemOperand; |
| 5406 } | 5077 } |
| 5407 | 5078 |
| 5408 } // end of namespace X86Internal | 5079 } // end of namespace X86Internal |
| 5409 } // end of namespace Ice | 5080 } // end of namespace Ice |
| 5410 | 5081 |
| 5411 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5082 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |