| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 2108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2119 } | 2119 } |
| 2120 } | 2120 } |
| 2121 | 2121 |
| 2122 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 2122 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
| 2123 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2123 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2124 _cmp(Src0, Zero); | 2124 _cmp(Src0, Zero); |
| 2125 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 2125 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 2126 } | 2126 } |
| 2127 | 2127 |
| 2128 template <class Machine> | 2128 template <class Machine> |
| 2129 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) { | |
| 2130 // x86-32 calling convention: | |
| 2131 // | |
| 2132 // * At the point before the call, the stack must be aligned to 16 | |
| 2133 // bytes. | |
| 2134 // | |
| 2135 // * The first four arguments of vector type, regardless of their | |
| 2136 // position relative to the other arguments in the argument list, are | |
| 2137 // placed in registers xmm0 - xmm3. | |
| 2138 // | |
| 2139 // * Other arguments are pushed onto the stack in right-to-left order, | |
| 2140 // such that the left-most argument ends up on the top of the stack at | |
| 2141 // the lowest memory address. | |
| 2142 // | |
| 2143 // * Stack arguments of vector type are aligned to start at the next | |
| 2144 // highest multiple of 16 bytes. Other stack arguments are aligned to | |
| 2145 // 4 bytes. | |
| 2146 // | |
| 2147 // This intends to match the section "IA-32 Function Calling | |
| 2148 // Convention" of the document "OS X ABI Function Call Guide" by | |
| 2149 // Apple. | |
| 2150 NeedsStackAlignment = true; | |
| 2151 | |
| 2152 typedef std::vector<Operand *> OperandList; | |
| 2153 OperandList XmmArgs; | |
| 2154 OperandList StackArgs, StackArgLocations; | |
| 2155 uint32_t ParameterAreaSizeBytes = 0; | |
| 2156 | |
| 2157 // Classify each argument operand according to the location where the | |
| 2158 // argument is passed. | |
| 2159 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 2160 Operand *Arg = Instr->getArg(i); | |
| 2161 Type Ty = Arg->getType(); | |
| 2162 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 2163 assert(typeWidthInBytes(Ty) >= 4); | |
| 2164 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 2165 XmmArgs.push_back(Arg); | |
| 2166 } else { | |
| 2167 StackArgs.push_back(Arg); | |
| 2168 if (isVectorType(Arg->getType())) { | |
| 2169 ParameterAreaSizeBytes = | |
| 2170 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 2171 } | |
| 2172 Variable *esp = | |
| 2173 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 2174 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
| 2175 StackArgLocations.push_back( | |
| 2176 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
| 2177 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 2178 } | |
| 2179 } | |
| 2180 | |
| 2181 // Adjust the parameter area so that the stack is aligned. It is | |
| 2182 // assumed that the stack is already aligned at the start of the | |
| 2183 // calling sequence. | |
| 2184 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 2185 | |
| 2186 // Subtract the appropriate amount for the argument area. This also | |
| 2187 // takes care of setting the stack adjustment during emission. | |
| 2188 // | |
| 2189 // TODO: If for some reason the call instruction gets dead-code | |
| 2190 // eliminated after lowering, we would need to ensure that the | |
| 2191 // pre-call and the post-call esp adjustment get eliminated as well. | |
| 2192 if (ParameterAreaSizeBytes) { | |
| 2193 _adjust_stack(ParameterAreaSizeBytes); | |
| 2194 } | |
| 2195 | |
| 2196 // Copy arguments that are passed on the stack to the appropriate | |
| 2197 // stack locations. | |
| 2198 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
| 2199 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
| 2200 } | |
| 2201 | |
| 2202 // Copy arguments to be passed in registers to the appropriate | |
| 2203 // registers. | |
| 2204 // TODO: Investigate the impact of lowering arguments passed in | |
| 2205 // registers after lowering stack arguments as opposed to the other | |
| 2206 // way around. Lowering register arguments after stack arguments may | |
| 2207 // reduce register pressure. On the other hand, lowering register | |
| 2208 // arguments first (before stack arguments) may result in more compact | |
| 2209 // code, as the memory operand displacements may end up being smaller | |
| 2210 // before any stack adjustment is done. | |
| 2211 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
| 2212 Variable *Reg = | |
| 2213 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
| 2214 // Generate a FakeUse of register arguments so that they do not get | |
| 2215 // dead code eliminated as a result of the FakeKill of scratch | |
| 2216 // registers after the call. | |
| 2217 Context.insert(InstFakeUse::create(Func, Reg)); | |
| 2218 } | |
| 2219 // Generate the call instruction. Assign its result to a temporary | |
| 2220 // with high register allocation weight. | |
| 2221 Variable *Dest = Instr->getDest(); | |
| 2222 // ReturnReg doubles as ReturnRegLo as necessary. | |
| 2223 Variable *ReturnReg = nullptr; | |
| 2224 Variable *ReturnRegHi = nullptr; | |
| 2225 if (Dest) { | |
| 2226 switch (Dest->getType()) { | |
| 2227 case IceType_NUM: | |
| 2228 llvm_unreachable("Invalid Call dest type"); | |
| 2229 break; | |
| 2230 case IceType_void: | |
| 2231 break; | |
| 2232 case IceType_i1: | |
| 2233 case IceType_i8: | |
| 2234 case IceType_i16: | |
| 2235 case IceType_i32: | |
| 2236 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); | |
| 2237 break; | |
| 2238 case IceType_i64: | |
| 2239 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
| 2240 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
| 2241 break; | |
| 2242 case IceType_f32: | |
| 2243 case IceType_f64: | |
| 2244 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | |
| 2245 // the fstp instruction. | |
| 2246 break; | |
| 2247 case IceType_v4i1: | |
| 2248 case IceType_v8i1: | |
| 2249 case IceType_v16i1: | |
| 2250 case IceType_v16i8: | |
| 2251 case IceType_v8i16: | |
| 2252 case IceType_v4i32: | |
| 2253 case IceType_v4f32: | |
| 2254 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); | |
| 2255 break; | |
| 2256 } | |
| 2257 } | |
| 2258 Operand *CallTarget = legalize(Instr->getCallTarget()); | |
| 2259 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | |
| 2260 if (NeedSandboxing) { | |
| 2261 if (llvm::isa<Constant>(CallTarget)) { | |
| 2262 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 2263 } else { | |
| 2264 Variable *CallTargetVar = nullptr; | |
| 2265 _mov(CallTargetVar, CallTarget); | |
| 2266 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 2267 const SizeT BundleSize = | |
| 2268 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 2269 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 2270 CallTarget = CallTargetVar; | |
| 2271 } | |
| 2272 } | |
| 2273 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); | |
| 2274 Context.insert(NewCall); | |
| 2275 if (NeedSandboxing) | |
| 2276 _bundle_unlock(); | |
| 2277 if (ReturnRegHi) | |
| 2278 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | |
| 2279 | |
| 2280 // Add the appropriate offset to esp. The call instruction takes care | |
| 2281 // of resetting the stack offset during emission. | |
| 2282 if (ParameterAreaSizeBytes) { | |
| 2283 Variable *esp = | |
| 2284 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 2285 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | |
| 2286 } | |
| 2287 | |
| 2288 // Insert a register-kill pseudo instruction. | |
| 2289 Context.insert(InstFakeKill::create(Func, NewCall)); | |
| 2290 | |
| 2291 // Generate a FakeUse to keep the call live if necessary. | |
| 2292 if (Instr->hasSideEffects() && ReturnReg) { | |
| 2293 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | |
| 2294 Context.insert(FakeUse); | |
| 2295 } | |
| 2296 | |
| 2297 if (!Dest) | |
| 2298 return; | |
| 2299 | |
| 2300 // Assign the result of the call to Dest. | |
| 2301 if (ReturnReg) { | |
| 2302 if (ReturnRegHi) { | |
| 2303 assert(Dest->getType() == IceType_i64); | |
| 2304 split64(Dest); | |
| 2305 Variable *DestLo = Dest->getLo(); | |
| 2306 Variable *DestHi = Dest->getHi(); | |
| 2307 _mov(DestLo, ReturnReg); | |
| 2308 _mov(DestHi, ReturnRegHi); | |
| 2309 } else { | |
| 2310 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || | |
| 2311 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || | |
| 2312 isVectorType(Dest->getType())); | |
| 2313 if (isVectorType(Dest->getType())) { | |
| 2314 _movp(Dest, ReturnReg); | |
| 2315 } else { | |
| 2316 _mov(Dest, ReturnReg); | |
| 2317 } | |
| 2318 } | |
| 2319 } else if (isScalarFloatingType(Dest->getType())) { | |
| 2320 // Special treatment for an FP function which returns its result in | |
| 2321 // st(0). | |
| 2322 // If Dest ends up being a physical xmm register, the fstp emit code | |
| 2323 // will route st(0) through a temporary stack slot. | |
| 2324 _fstp(Dest); | |
| 2325 // Create a fake use of Dest in case it actually isn't used, | |
| 2326 // because st(0) still needs to be popped. | |
| 2327 Context.insert(InstFakeUse::create(Func, Dest)); | |
| 2328 } | |
| 2329 } | |
| 2330 | |
| 2331 template <class Machine> | |
| 2332 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 2129 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| 2333 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 2130 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
| 2334 InstCast::OpKind CastKind = Inst->getCastKind(); | 2131 InstCast::OpKind CastKind = Inst->getCastKind(); |
| 2335 Variable *Dest = Inst->getDest(); | 2132 Variable *Dest = Inst->getDest(); |
| 2336 switch (CastKind) { | 2133 switch (CastKind) { |
| 2337 default: | 2134 default: |
| 2338 Func->setError("Cast type not supported"); | 2135 Func->setError("Cast type not supported"); |
| 2339 return; | 2136 return; |
| 2340 case InstCast::Sext: { | 2137 case InstCast::Sext: { |
| 2341 // Src0RM is the source operand legalized to physical register or memory, | 2138 // Src0RM is the source operand legalized to physical register or memory, |
| (...skipping 3263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5605 } | 5402 } |
| 5606 // the offset is not eligible for blinding or pooling, return the original | 5403 // the offset is not eligible for blinding or pooling, return the original |
| 5607 // mem operand | 5404 // mem operand |
| 5608 return MemOperand; | 5405 return MemOperand; |
| 5609 } | 5406 } |
| 5610 | 5407 |
| 5611 } // end of namespace X86Internal | 5408 } // end of namespace X86Internal |
| 5612 } // end of namespace Ice | 5409 } // end of namespace Ice |
| 5613 | 5410 |
| 5614 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5411 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |