OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1089 Variable *T = makeReg(IceType_i32); | 1089 Variable *T = makeReg(IceType_i32); |
1090 auto *CalculateOperand = X86OperandMem::create( | 1090 auto *CalculateOperand = X86OperandMem::create( |
1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); | 1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
1092 _lea(T, CalculateOperand); | 1092 _lea(T, CalculateOperand); |
1093 _mov(Dest, T); | 1093 _mov(Dest, T); |
1094 } else { | 1094 } else { |
1095 _mov(Dest, esp); | 1095 _mov(Dest, esp); |
1096 } | 1096 } |
1097 } | 1097 } |
1098 | 1098 |
1099 template <typename TraitsType> | |
1100 void TargetX86Base<TraitsType>::lowerArguments() { | |
1101 VarList &Args = Func->getArgs(); | |
1102 unsigned NumXmmArgs = 0; | |
1103 bool XmmSlotsRemain = true; | |
1104 unsigned NumGprArgs = 0; | |
1105 bool GprSlotsRemain = true; | |
1106 | |
1107 Context.init(Func->getEntryNode()); | |
1108 Context.setInsertPoint(Context.getCur()); | |
1109 | |
1110 for (SizeT i = 0, End = Args.size(); | |
1111 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) { | |
1112 Variable *Arg = Args[i]; | |
1113 Type Ty = Arg->getType(); | |
1114 Variable *RegisterArg = nullptr; | |
1115 int32_t RegNum = Variable::NoRegister; | |
1116 if (isVectorType(Ty)) { | |
1117 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); | |
1118 if (RegNum == Variable::NoRegister) { | |
1119 XmmSlotsRemain = false; | |
1120 continue; | |
1121 } | |
1122 ++NumXmmArgs; | |
1123 RegisterArg = Func->makeVariable(Ty); | |
1124 } else if (isScalarFloatingType(Ty)) { | |
1125 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
1126 continue; | |
1127 } | |
1128 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); | |
1129 if (RegNum == Variable::NoRegister) { | |
1130 XmmSlotsRemain = false; | |
1131 continue; | |
1132 } | |
1133 ++NumXmmArgs; | |
1134 RegisterArg = Func->makeVariable(Ty); | |
1135 } else if (isScalarIntegerType(Ty)) { | |
1136 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs); | |
1137 if (RegNum == Variable::NoRegister) { | |
1138 GprSlotsRemain = false; | |
1139 continue; | |
1140 } | |
1141 ++NumGprArgs; | |
1142 RegisterArg = Func->makeVariable(Ty); | |
1143 } | |
1144 assert(RegNum != Variable::NoRegister); | |
1145 assert(RegisterArg != nullptr); | |
1146 // Replace Arg in the argument list with the home register. Then generate | |
1147 // an instruction in the prolog to copy the home register to the assigned | |
1148 // location of Arg. | |
1149 if (BuildDefs::dump()) | |
1150 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
1151 RegisterArg->setRegNum(RegNum); | |
1152 RegisterArg->setIsArg(); | |
1153 Arg->setIsArg(false); | |
1154 | |
1155 Args[i] = RegisterArg; | |
1156 Context.insert<InstAssign>(Arg, RegisterArg); | |
1157 } | |
1158 } | |
1159 | |
1099 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1160 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
1100 /// narrower) for certain constants. The lea instruction can be used to multiply | 1161 /// narrower) for certain constants. The lea instruction can be used to multiply |
1101 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1162 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
1102 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1163 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
1103 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1164 /// lea-based multiplies by 5, combined with left-shifting by 2. |
1104 template <typename TraitsType> | 1165 template <typename TraitsType> |
1105 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1166 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
1106 int32_t Src1) { | 1167 int32_t Src1) { |
1107 // Disable this optimization for Om1 and O0, just to keep things simple | 1168 // Disable this optimization for Om1 and O0, just to keep things simple |
1108 // there. | 1169 // there. |
(...skipping 912 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2021 return; | 2082 return; |
2022 } | 2083 } |
2023 } | 2084 } |
2024 } | 2085 } |
2025 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2086 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
2026 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2087 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2027 _cmp(Src0, Zero); | 2088 _cmp(Src0, Zero); |
2028 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 2089 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
2029 } | 2090 } |
2030 | 2091 |
2092 namespace { | |
2093 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | |
2094 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | |
2095 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } | |
2096 // | |
2097 } // end of anonymous namespace | |
2098 | |
2099 template <typename TraitsType> | |
2100 void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { | |
2101 // Common x86 calling convention lowering: | |
2102 // | |
2103 // * At the point before the call, the stack must be aligned to 16 bytes. | |
2104 // | |
2105 // * Non-register arguments are pushed onto the stack in right-to-left order, | |
2106 // such that the left-most argument ends up on the top of the stack at the | |
2107 // lowest memory address. | |
2108 // | |
2109 // * Stack arguments of vector type are aligned to start at the next highest | |
2110 // multiple of 16 bytes. Other stack arguments are aligned to the next word | |
2111 // size boundary (4 or 8 bytes, respectively). | |
2112 NeedsStackAlignment = true; | |
2113 | |
2114 using OperandList = | |
2115 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, | |
2116 Traits::X86_MAX_GPR_ARGS)>; | |
2117 OperandList XmmArgs; | |
2118 CfgVector<std::pair<const Type, Operand *>> GprArgs; | |
2119 OperandList StackArgs, StackArgLocations; | |
2120 int32_t ParameterAreaSizeBytes = 0; | |
Jim Stichnoth
2016/01/19 20:54:06
Make this uint32_t instead.
sehr
2016/01/19 21:47:35
Done.
| |
2121 | |
2122 // Classify each argument operand according to the location where the | |
Jim Stichnoth
2016/01/19 20:54:05
reflow to 80-col
sehr
2016/01/19 21:47:35
Done.
| |
2123 // argument is passed. | |
2124 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
2125 Operand *Arg = Instr->getArg(i); | |
2126 Type Ty = Arg->getType(); | |
Jim Stichnoth
2016/01/19 20:54:06
const
sehr
2016/01/19 21:47:35
Done.
| |
2127 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
2128 assert(typeWidthInBytes(Ty) >= 4); | |
2129 if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) != | |
2130 Variable::NoRegister)) { | |
2131 XmmArgs.push_back(Arg); | |
2132 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM && | |
2133 (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) { | |
2134 XmmArgs.push_back(Arg); | |
2135 } else if (isScalarIntegerType(Ty) && | |
2136 (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) != | |
2137 Variable::NoRegister)) { | |
2138 GprArgs.emplace_back(Ty, Arg); | |
2139 } else { | |
2140 // Place on stack. | |
2141 StackArgs.push_back(Arg); | |
2142 if (isVectorType(Arg->getType())) { | |
2143 ParameterAreaSizeBytes = | |
2144 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
2145 } | |
2146 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); | |
2147 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
2148 StackArgLocations.push_back( | |
2149 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
2150 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
2151 } | |
2152 } | |
2153 // Ensure there is enough space for the fstp/movs for floating returns. | |
2154 Variable *Dest = Instr->getDest(); | |
Jim Stichnoth
2016/01/19 20:54:06
Add something like:
const Type DestTy = Dest ? D
sehr
2016/01/19 21:47:35
Done.
| |
2155 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2156 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
2157 ParameterAreaSizeBytes = | |
2158 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | |
2159 typeWidthInBytesOnStack(Dest->getType())); | |
2160 } | |
2161 } | |
2162 // Adjust the parameter area so that the stack is aligned. It is assumed that | |
2163 // the stack is already aligned at the start of the calling sequence. | |
2164 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
2165 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | |
Jim Stichnoth
2016/01/19 20:54:06
Remove this static_cast, assuming ParameterAreaSiz
sehr
2016/01/19 21:47:35
Done.
| |
2166 maxOutArgsSizeBytes()); | |
2167 // Copy arguments that are passed on the stack to the appropriate stack | |
2168 // locations. | |
2169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
Jim Stichnoth
2016/01/19 20:54:06
Use NumStackArgs instead of "e", for consistency w
sehr
2016/01/19 21:47:35
Done.
| |
2170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
2171 } | |
2172 // Copy arguments to be passed in registers to the appropriate registers. | |
2173 // TODO: Investigate the impact of lowering arguments passed in registers | |
Jim Stichnoth
2016/01/19 20:54:06
I think this TODO should just be removed entirely.
sehr
2016/01/19 21:47:35
Done.
| |
2174 // after lowering stack arguments as opposed to the other way around. | |
2175 // Lowering register arguments after stack arguments may reduce register | |
2176 // pressure. On the other hand, lowering register arguments first (before | |
2177 // stack arguments) may result in more compact code, as the memory operand | |
2178 // displacements may end up being smaller before any stack adjustment is | |
2179 // done. | |
2180 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
2181 Variable *Reg = | |
2182 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); | |
2183 // Generate a FakeUse of register arguments so that they do not get dead | |
2184 // code eliminated as a result of the FakeKill of scratch registers after | |
2185 // the call. | |
2186 Context.insert<InstFakeUse>(Reg); | |
2187 } | |
2188 // Materialize moves for arguments passed in GPRs. | |
2189 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | |
2190 const Type SignatureTy = GprArgs[i].first; | |
2191 Operand *Arg = GprArgs[i].second; | |
2192 Variable *Reg = | |
2193 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); | |
2194 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | |
2195 assert(SignatureTy == Arg->getType()); | |
2196 (void)SignatureTy; | |
2197 Context.insert<InstFakeUse>(Reg); | |
2198 } | |
2199 // Generate the call instruction. Assign its result to a temporary with high | |
2200 // register allocation weight. | |
2201 // ReturnReg doubles as ReturnRegLo as necessary. | |
2202 Variable *ReturnReg = nullptr; | |
2203 Variable *ReturnRegHi = nullptr; | |
2204 if (Dest) { | |
2205 const Type DestTy = Dest->getType(); | |
2206 switch (DestTy) { | |
2207 case IceType_NUM: | |
2208 case IceType_void: | |
2209 case IceType_i1: | |
2210 case IceType_i8: | |
2211 case IceType_i16: | |
2212 llvm::report_fatal_error("Invalid Call dest type"); | |
2213 break; | |
2214 case IceType_i32: | |
2215 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); | |
2216 break; | |
2217 case IceType_i64: | |
2218 if (Traits::Is64Bit) { | |
2219 ReturnReg = makeReg( | |
2220 IceType_i64, | |
2221 Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax)); | |
2222 } else { | |
2223 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
2224 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
2225 } | |
2226 break; | |
2227 case IceType_f32: | |
2228 case IceType_f64: | |
2229 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2230 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | |
2231 // the fstp instruction. | |
2232 break; | |
2233 } | |
2234 // Fallthrough intended. | |
2235 case IceType_v4i1: | |
2236 case IceType_v8i1: | |
2237 case IceType_v16i1: | |
2238 case IceType_v16i8: | |
2239 case IceType_v8i16: | |
2240 case IceType_v4i32: | |
2241 case IceType_v4f32: | |
2242 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); | |
2243 break; | |
2244 } | |
2245 } | |
2246 // Emit the call to the function. | |
2247 Operand *CallTarget = | |
2248 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
2249 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg); | |
2250 // Keep the upper return register live on 32-bit platform. | |
2251 if (ReturnRegHi) | |
2252 Context.insert<InstFakeDef>(ReturnRegHi); | |
2253 // Mark the call as killing all the callee-saves registers. | |
Jim Stichnoth
2016/01/19 20:54:06
caller-save
sehr
2016/01/19 21:47:35
Duh. Fixed.
| |
2254 Context.insert<InstFakeKill>(NewCall); | |
2255 // Handle x86-32 floating point returns. | |
2256 if (Dest != nullptr && isScalarFloatingType(Dest->getType()) && | |
2257 !Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2258 // Special treatment for an FP function which returns its result in st(0). | |
2259 // If Dest ends up being a physical xmm register, the fstp emit code will | |
2260 // route st(0) through the space reserved in the function argument area | |
2261 // we allocated. | |
2262 _fstp(Dest); | |
2263 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
2264 // still needs to be popped. | |
2265 Context.insert<InstFakeUse>(Dest); | |
2266 } | |
2267 // Generate a FakeUse to keep the call live if necessary. | |
2268 if (Instr->hasSideEffects() && ReturnReg) { | |
2269 Context.insert<InstFakeUse>(ReturnReg); | |
2270 } | |
2271 // Process the return value, if any. | |
2272 if (!Dest) | |
Jim Stichnoth
2016/01/19 20:54:06
Dest == nullptr
sehr
2016/01/19 21:47:36
Done.
| |
2273 return; | |
2274 // Assign the result of the call to Dest. | |
2275 const Type DestTy = Dest->getType(); | |
2276 if (isVectorType(DestTy)) { | |
2277 assert(ReturnReg && "Vector type requires a return register"); | |
2278 _movp(Dest, ReturnReg); | |
2279 } else if (isScalarFloatingType(DestTy)) { | |
2280 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2281 assert(ReturnReg && "FP type requires a return register"); | |
2282 _mov(Dest, ReturnReg); | |
2283 } | |
2284 } else { | |
2285 assert(isScalarIntegerType(DestTy)); | |
2286 assert(ReturnReg && "Integer type requires a return register"); | |
2287 if (DestTy == IceType_i64 && !Traits::Is64Bit) { | |
2288 assert(ReturnRegHi && "64-bit type requires two return registers"); | |
2289 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | |
2290 Variable *DestLo = Dest64On32->getLo(); | |
2291 Variable *DestHi = Dest64On32->getHi(); | |
2292 _mov(DestLo, ReturnReg); | |
2293 _mov(DestHi, ReturnRegHi); | |
2294 } else { | |
2295 _mov(Dest, ReturnReg); | |
2296 } | |
2297 } | |
2298 } | |
2299 | |
2031 template <typename TraitsType> | 2300 template <typename TraitsType> |
2032 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { | 2301 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { |
2033 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 2302 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
2034 InstCast::OpKind CastKind = Inst->getCastKind(); | 2303 InstCast::OpKind CastKind = Inst->getCastKind(); |
2035 Variable *Dest = Inst->getDest(); | 2304 Variable *Dest = Inst->getDest(); |
2036 Type DestTy = Dest->getType(); | 2305 Type DestTy = Dest->getType(); |
2037 switch (CastKind) { | 2306 switch (CastKind) { |
2038 default: | 2307 default: |
2039 Func->setError("Cast type not supported"); | 2308 Func->setError("Cast type not supported"); |
2040 return; | 2309 return; |
(...skipping 2773 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4814 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 5083 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
4815 } | 5084 } |
4816 } | 5085 } |
4817 | 5086 |
4818 template <typename TraitsType> | 5087 template <typename TraitsType> |
4819 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { | 5088 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { |
4820 Func->setError("Phi found in regular instruction list"); | 5089 Func->setError("Phi found in regular instruction list"); |
4821 } | 5090 } |
4822 | 5091 |
4823 template <typename TraitsType> | 5092 template <typename TraitsType> |
5093 void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) { | |
5094 Variable *Reg = nullptr; | |
5095 if (Inst->hasRetValue()) { | |
5096 Operand *RetValue = legalize(Inst->getRetValue()); | |
5097 const Type ReturnType = RetValue->getType(); | |
5098 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) || | |
5099 (ReturnType == IceType_i32) || (ReturnType == IceType_i64)); | |
5100 Reg = moveReturnValueToRegister(RetValue, ReturnType); | |
5101 } | |
5102 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
5103 // explicitly looks for a ret instruction as a marker for where to insert the | |
5104 // frame removal instructions. | |
5105 _ret(Reg); | |
5106 // Add a fake use of esp to make sure esp stays alive for the entire | |
5107 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
5108 keepEspLiveAtExit(); | |
5109 } | |
5110 | |
5111 template <typename TraitsType> | |
4824 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { | 5112 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { |
4825 Variable *Dest = Select->getDest(); | 5113 Variable *Dest = Select->getDest(); |
4826 | 5114 |
4827 if (isVectorType(Dest->getType())) { | 5115 if (isVectorType(Dest->getType())) { |
4828 lowerSelectVector(Select); | 5116 lowerSelectVector(Select); |
4829 return; | 5117 return; |
4830 } | 5118 } |
4831 | 5119 |
4832 Operand *Condition = Select->getCondition(); | 5120 Operand *Condition = Select->getCondition(); |
4833 // Handle folding opportunities. | 5121 // Handle folding opportunities. |
(...skipping 1786 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6620 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), | 6908 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), |
6621 MemOperand->getShift(), MemOperand->getSegmentRegister()); | 6909 MemOperand->getShift(), MemOperand->getSegmentRegister()); |
6622 return NewMemOperand; | 6910 return NewMemOperand; |
6623 } | 6911 } |
6624 } | 6912 } |
6625 } | 6913 } |
6626 } // end of namespace X86NAMESPACE | 6914 } // end of namespace X86NAMESPACE |
6627 } // end of namespace Ice | 6915 } // end of namespace Ice |
6628 | 6916 |
6629 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6917 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |