OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1089 Variable *T = makeReg(IceType_i32); | 1089 Variable *T = makeReg(IceType_i32); |
1090 auto *CalculateOperand = X86OperandMem::create( | 1090 auto *CalculateOperand = X86OperandMem::create( |
1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); | 1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
1092 _lea(T, CalculateOperand); | 1092 _lea(T, CalculateOperand); |
1093 _mov(Dest, T); | 1093 _mov(Dest, T); |
1094 } else { | 1094 } else { |
1095 _mov(Dest, esp); | 1095 _mov(Dest, esp); |
1096 } | 1096 } |
1097 } | 1097 } |
1098 | 1098 |
1099 template <typename TraitsType> | |
1100 void TargetX86Base<TraitsType>::lowerArguments() { | |
1101 VarList &Args = Func->getArgs(); | |
1102 unsigned NumXmmArgs = 0; | |
1103 bool XmmSlotsRemain = true; | |
1104 unsigned NumGprArgs = 0; | |
1105 bool GprSlotsRemain = true; | |
1106 | |
1107 Context.init(Func->getEntryNode()); | |
1108 Context.setInsertPoint(Context.getCur()); | |
1109 | |
1110 for (SizeT i = 0, End = Args.size(); | |
1111 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) { | |
1112 Variable *Arg = Args[i]; | |
1113 Type Ty = Arg->getType(); | |
1114 Variable *RegisterArg = nullptr; | |
1115 int32_t RegNum = Variable::NoRegister; | |
1116 if (isVectorType(Ty)) { | |
1117 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); | |
1118 if (RegNum == Variable::NoRegister) { | |
1119 XmmSlotsRemain = false; | |
1120 continue; | |
1121 } | |
1122 ++NumXmmArgs; | |
1123 RegisterArg = Func->makeVariable(Ty); | |
1124 } else if (isScalarFloatingType(Ty)) { | |
1125 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
1126 continue; | |
1127 } | |
1128 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); | |
1129 if (RegNum == Variable::NoRegister) { | |
1130 XmmSlotsRemain = false; | |
1131 continue; | |
1132 } | |
1133 ++NumXmmArgs; | |
1134 RegisterArg = Func->makeVariable(Ty); | |
1135 } else if (isScalarIntegerType(Ty)) { | |
1136 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs); | |
1137 if (RegNum == Variable::NoRegister) { | |
1138 GprSlotsRemain = false; | |
1139 continue; | |
1140 } | |
1141 ++NumGprArgs; | |
1142 RegisterArg = Func->makeVariable(Ty); | |
1143 } | |
1144 assert(RegNum != Variable::NoRegister); | |
1145 assert(RegisterArg != nullptr); | |
1146 // Replace Arg in the argument list with the home register. Then generate | |
1147 // an instruction in the prolog to copy the home register to the assigned | |
1148 // location of Arg. | |
1149 if (BuildDefs::dump()) | |
1150 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
1151 RegisterArg->setRegNum(RegNum); | |
1152 RegisterArg->setIsArg(); | |
1153 Arg->setIsArg(false); | |
1154 | |
1155 Args[i] = RegisterArg; | |
1156 Context.insert<InstAssign>(Arg, RegisterArg); | |
1157 } | |
1158 } | |
1159 | |
1099 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1160 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
1100 /// narrower) for certain constants. The lea instruction can be used to multiply | 1161 /// narrower) for certain constants. The lea instruction can be used to multiply |
1101 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1162 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
1102 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1163 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
1103 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1164 /// lea-based multiplies by 5, combined with left-shifting by 2. |
1104 template <typename TraitsType> | 1165 template <typename TraitsType> |
1105 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1166 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
1106 int32_t Src1) { | 1167 int32_t Src1) { |
1107 // Disable this optimization for Om1 and O0, just to keep things simple | 1168 // Disable this optimization for Om1 and O0, just to keep things simple |
1108 // there. | 1169 // there. |
(...skipping 912 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2021 return; | 2082 return; |
2022 } | 2083 } |
2023 } | 2084 } |
2024 } | 2085 } |
2025 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2086 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
2026 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2087 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2027 _cmp(Src0, Zero); | 2088 _cmp(Src0, Zero); |
2028 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 2089 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
2029 } | 2090 } |
2030 | 2091 |
2092 namespace { | |
John
2016/01/20 00:04:34
Anonymous namespaces are dangerous in .h files. Ma
sehr
2016/01/20 16:07:10
Done.
| |
2093 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | |
2094 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | |
2095 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } | |
2096 // | |
2097 } // end of anonymous namespace | |
2098 | |
2099 template <typename TraitsType> | |
2100 void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { | |
2101 // Common x86 calling convention lowering: | |
2102 // | |
2103 // * At the point before the call, the stack must be aligned to 16 bytes. | |
2104 // | |
2105 // * Non-register arguments are pushed onto the stack in right-to-left order, | |
2106 // such that the left-most argument ends up on the top of the stack at the | |
2107 // lowest memory address. | |
2108 // | |
2109 // * Stack arguments of vector type are aligned to start at the next highest | |
2110 // multiple of 16 bytes. Other stack arguments are aligned to the next word | |
2111 // size boundary (4 or 8 bytes, respectively). | |
2112 NeedsStackAlignment = true; | |
2113 | |
2114 using OperandList = | |
2115 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, | |
2116 Traits::X86_MAX_GPR_ARGS)>; | |
2117 OperandList XmmArgs; | |
2118 CfgVector<std::pair<const Type, Operand *>> GprArgs; | |
2119 OperandList StackArgs, StackArgLocations; | |
2120 uint32_t ParameterAreaSizeBytes = 0; | |
2121 | |
2122 // Classify each argument operand according to the location where the argument | |
2123 // is passed. | |
2124 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
2125 Operand *Arg = Instr->getArg(i); | |
2126 const Type Ty = Arg->getType(); | |
2127 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
2128 assert(typeWidthInBytes(Ty) >= 4); | |
2129 if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) != | |
2130 Variable::NoRegister)) { | |
2131 XmmArgs.push_back(Arg); | |
2132 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM && | |
2133 (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) { | |
2134 XmmArgs.push_back(Arg); | |
2135 } else if (isScalarIntegerType(Ty) && | |
2136 (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) != | |
2137 Variable::NoRegister)) { | |
2138 GprArgs.emplace_back(Ty, Arg); | |
2139 } else { | |
2140 // Place on stack. | |
2141 StackArgs.push_back(Arg); | |
2142 if (isVectorType(Arg->getType())) { | |
2143 ParameterAreaSizeBytes = | |
2144 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
2145 } | |
2146 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); | |
2147 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
2148 StackArgLocations.push_back( | |
2149 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
2150 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
2151 } | |
2152 } | |
2153 // Ensure there is enough space for the fstp/movs for floating returns. | |
2154 Variable *Dest = Instr->getDest(); | |
2155 const Type DestTy = Dest ? Dest->getType() : IceType_void; | |
2156 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2157 if (isScalarFloatingType(DestTy)) { | |
2158 ParameterAreaSizeBytes = | |
2159 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | |
2160 typeWidthInBytesOnStack(DestTy)); | |
2161 } | |
2162 } | |
2163 // Adjust the parameter area so that the stack is aligned. It is assumed that | |
2164 // the stack is already aligned at the start of the calling sequence. | |
2165 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
2166 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); | |
2167 // Copy arguments that are passed on the stack to the appropriate stack | |
2168 // locations. | |
2169 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { | |
2170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
2171 } | |
2172 // Copy arguments to be passed in registers to the appropriate registers. | |
2173 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
2174 Variable *Reg = | |
2175 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); | |
2176 // Generate a FakeUse of register arguments so that they do not get dead | |
2177 // code eliminated as a result of the FakeKill of scratch registers after | |
2178 // the call. | |
2179 Context.insert<InstFakeUse>(Reg); | |
2180 } | |
2181 // Materialize moves for arguments passed in GPRs. | |
2182 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | |
2183 const Type SignatureTy = GprArgs[i].first; | |
2184 Operand *Arg = GprArgs[i].second; | |
2185 Variable *Reg = | |
2186 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); | |
2187 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | |
2188 assert(SignatureTy == Arg->getType()); | |
2189 (void)SignatureTy; | |
2190 Context.insert<InstFakeUse>(Reg); | |
2191 } | |
2192 // Generate the call instruction. Assign its result to a temporary with high | |
2193 // register allocation weight. | |
2194 // ReturnReg doubles as ReturnRegLo as necessary. | |
2195 Variable *ReturnReg = nullptr; | |
2196 Variable *ReturnRegHi = nullptr; | |
2197 if (Dest) { | |
2198 switch (DestTy) { | |
2199 case IceType_NUM: | |
2200 case IceType_void: | |
2201 case IceType_i1: | |
2202 case IceType_i8: | |
2203 case IceType_i16: | |
2204 llvm::report_fatal_error("Invalid Call dest type"); | |
2205 break; | |
2206 case IceType_i32: | |
2207 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); | |
2208 break; | |
2209 case IceType_i64: | |
2210 if (Traits::Is64Bit) { | |
2211 ReturnReg = makeReg( | |
2212 IceType_i64, | |
2213 Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax)); | |
2214 } else { | |
2215 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
2216 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
2217 } | |
2218 break; | |
2219 case IceType_f32: | |
2220 case IceType_f64: | |
2221 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2222 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | |
2223 // the fstp instruction. | |
2224 break; | |
2225 } | |
2226 // Fallthrough intended. | |
2227 case IceType_v4i1: | |
2228 case IceType_v8i1: | |
2229 case IceType_v16i1: | |
2230 case IceType_v16i8: | |
2231 case IceType_v8i16: | |
2232 case IceType_v4i32: | |
2233 case IceType_v4f32: | |
2234 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); | |
2235 break; | |
2236 } | |
2237 } | |
2238 // Emit the call to the function. | |
2239 Operand *CallTarget = | |
2240 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
2241 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg); | |
2242 // Keep the upper return register live on 32-bit platform. | |
2243 if (ReturnRegHi) | |
2244 Context.insert<InstFakeDef>(ReturnRegHi); | |
2245 // Mark the call as killing all the caller-saves registers. | |
Jim Stichnoth
2016/01/19 23:55:45
I would write "caller-save" for consistency with o
sehr
2016/01/20 16:07:10
Done.
| |
2246 Context.insert<InstFakeKill>(NewCall); | |
2247 // Handle x86-32 floating point returns. | |
2248 if (Dest != nullptr && isScalarFloatingType(Dest->getType()) && | |
2249 !Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2250 // Special treatment for an FP function which returns its result in st(0). | |
2251 // If Dest ends up being a physical xmm register, the fstp emit code will | |
2252 // route st(0) through the space reserved in the function argument area | |
2253 // we allocated. | |
2254 _fstp(Dest); | |
2255 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
2256 // still needs to be popped. | |
2257 Context.insert<InstFakeUse>(Dest); | |
2258 } | |
2259 // Generate a FakeUse to keep the call live if necessary. | |
2260 if (Instr->hasSideEffects() && ReturnReg) { | |
2261 Context.insert<InstFakeUse>(ReturnReg); | |
2262 } | |
2263 // Process the return value, if any. | |
2264 if (Dest == nullptr) | |
2265 return; | |
2266 // Assign the result of the call to Dest. | |
2267 if (isVectorType(DestTy)) { | |
2268 assert(ReturnReg && "Vector type requires a return register"); | |
2269 _movp(Dest, ReturnReg); | |
2270 } else if (isScalarFloatingType(DestTy)) { | |
2271 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { | |
2272 assert(ReturnReg && "FP type requires a return register"); | |
2273 _mov(Dest, ReturnReg); | |
2274 } | |
2275 } else { | |
2276 assert(isScalarIntegerType(DestTy)); | |
2277 assert(ReturnReg && "Integer type requires a return register"); | |
2278 if (DestTy == IceType_i64 && !Traits::Is64Bit) { | |
2279 assert(ReturnRegHi && "64-bit type requires two return registers"); | |
2280 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | |
2281 Variable *DestLo = Dest64On32->getLo(); | |
2282 Variable *DestHi = Dest64On32->getHi(); | |
2283 _mov(DestLo, ReturnReg); | |
2284 _mov(DestHi, ReturnRegHi); | |
2285 } else { | |
2286 _mov(Dest, ReturnReg); | |
2287 } | |
2288 } | |
2289 } | |
2290 | |
2031 template <typename TraitsType> | 2291 template <typename TraitsType> |
2032 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { | 2292 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { |
2033 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 2293 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
2034 InstCast::OpKind CastKind = Inst->getCastKind(); | 2294 InstCast::OpKind CastKind = Inst->getCastKind(); |
2035 Variable *Dest = Inst->getDest(); | 2295 Variable *Dest = Inst->getDest(); |
2036 Type DestTy = Dest->getType(); | 2296 Type DestTy = Dest->getType(); |
2037 switch (CastKind) { | 2297 switch (CastKind) { |
2038 default: | 2298 default: |
2039 Func->setError("Cast type not supported"); | 2299 Func->setError("Cast type not supported"); |
2040 return; | 2300 return; |
(...skipping 2773 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4814 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); | 5074 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); |
4815 } | 5075 } |
4816 } | 5076 } |
4817 | 5077 |
4818 template <typename TraitsType> | 5078 template <typename TraitsType> |
4819 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { | 5079 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { |
4820 Func->setError("Phi found in regular instruction list"); | 5080 Func->setError("Phi found in regular instruction list"); |
4821 } | 5081 } |
4822 | 5082 |
4823 template <typename TraitsType> | 5083 template <typename TraitsType> |
5084 void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) { | |
5085 Variable *Reg = nullptr; | |
5086 if (Inst->hasRetValue()) { | |
5087 Operand *RetValue = legalize(Inst->getRetValue()); | |
5088 const Type ReturnType = RetValue->getType(); | |
5089 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) || | |
5090 (ReturnType == IceType_i32) || (ReturnType == IceType_i64)); | |
5091 Reg = moveReturnValueToRegister(RetValue, ReturnType); | |
5092 } | |
5093 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
5094 // explicitly looks for a ret instruction as a marker for where to insert the | |
5095 // frame removal instructions. | |
5096 _ret(Reg); | |
5097 // Add a fake use of esp to make sure esp stays alive for the entire | |
5098 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
5099 keepEspLiveAtExit(); | |
5100 } | |
5101 | |
5102 template <typename TraitsType> | |
4824 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { | 5103 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { |
4825 Variable *Dest = Select->getDest(); | 5104 Variable *Dest = Select->getDest(); |
4826 | 5105 |
4827 if (isVectorType(Dest->getType())) { | 5106 if (isVectorType(Dest->getType())) { |
4828 lowerSelectVector(Select); | 5107 lowerSelectVector(Select); |
4829 return; | 5108 return; |
4830 } | 5109 } |
4831 | 5110 |
4832 Operand *Condition = Select->getCondition(); | 5111 Operand *Condition = Select->getCondition(); |
4833 // Handle folding opportunities. | 5112 // Handle folding opportunities. |
(...skipping 1786 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6620 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), | 6899 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), |
6621 MemOperand->getShift(), MemOperand->getSegmentRegister()); | 6900 MemOperand->getShift(), MemOperand->getSegmentRegister()); |
6622 return NewMemOperand; | 6901 return NewMemOperand; |
6623 } | 6902 } |
6624 } | 6903 } |
6625 } | 6904 } |
6626 } // end of namespace X86NAMESPACE | 6905 } // end of namespace X86NAMESPACE |
6627 } // end of namespace Ice | 6906 } // end of namespace Ice |
6628 | 6907 |
6629 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 6908 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |