OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 | 150 |
151 if (NeedSandboxing) { | 151 if (NeedSandboxing) { |
152 const SizeT BundleSize = | 152 const SizeT BundleSize = |
153 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 153 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
154 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); | 154 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); |
155 } | 155 } |
156 | 156 |
157 _jmp(JumpTarget); | 157 _jmp(JumpTarget); |
158 } | 158 } |
159 | 159 |
160 void TargetX8632::lowerCall(const InstCall *Instr) { | 160 Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) { |
161 // x86-32 calling convention: | 161 std::unique_ptr<AutoBundle> Bundle; |
162 // | 162 if (NeedSandboxing) { |
163 // * At the point before the call, the stack must be aligned to 16 bytes. | 163 if (llvm::isa<Constant>(CallTarget)) { |
164 // | 164 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
165 // * The first four arguments of vector type, regardless of their position | |
166 // relative to the other arguments in the argument list, are placed in | |
167 // registers xmm0 - xmm3. | |
168 // | |
169 // * Other arguments are pushed onto the stack in right-to-left order, such | |
170 // that the left-most argument ends up on the top of the stack at the lowest | |
171 // memory address. | |
172 // | |
173 // * Stack arguments of vector type are aligned to start at the next highest | |
174 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. | |
175 // | |
176 // This intends to match the section "IA-32 Function Calling Convention" of | |
177 // the document "OS X ABI Function Call Guide" by Apple. | |
178 NeedsStackAlignment = true; | |
179 | |
180 OperandList XmmArgs; | |
181 OperandList StackArgs, StackArgLocations; | |
182 int32_t ParameterAreaSizeBytes = 0; | |
183 | |
184 // Classify each argument operand according to the location where the | |
185 // argument is passed. | |
186 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
187 Operand *Arg = Instr->getArg(i); | |
188 Type Ty = Arg->getType(); | |
189 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
190 assert(typeWidthInBytes(Ty) >= 4); | |
191 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
192 XmmArgs.push_back(Arg); | |
193 } else { | 165 } else { |
194 StackArgs.push_back(Arg); | 166 Variable *CallTargetVar = nullptr; |
195 if (isVectorType(Arg->getType())) { | 167 _mov(CallTargetVar, CallTarget); |
196 ParameterAreaSizeBytes = | 168 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
197 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 169 const SizeT BundleSize = |
198 } | 170 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
199 Variable *esp = | 171 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
200 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 172 CallTarget = CallTargetVar; |
201 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
202 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | |
203 StackArgLocations.push_back(Mem); | |
204 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
205 } | 173 } |
206 } | 174 } |
207 // Ensure there is enough space for the fstp/movs for floating returns. | 175 return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); |
208 Variable *Dest = Instr->getDest(); | 176 } |
209 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
210 ParameterAreaSizeBytes = | |
211 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | |
212 typeWidthInBytesOnStack(Dest->getType())); | |
213 } | |
214 | 177 |
215 // Adjust the parameter area so that the stack is aligned. It is assumed that | 178 Variable *TargetX8632::moveReturnValueToRegister(Operand *Value, |
216 // the stack is already aligned at the start of the calling sequence. | 179 const Type ReturnType) { |
217 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 180 if (isVectorType(ReturnType)) { |
218 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | 181 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0); |
219 maxOutArgsSizeBytes()); | 182 } else if (isScalarFloatingType(ReturnType)) { |
220 | 183 _fld(Value); |
221 // Copy arguments that are passed on the stack to the appropriate stack | 184 return nullptr; |
222 // locations. | 185 } else { |
223 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 186 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64); |
224 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 187 if (ReturnType == IceType_i64) { |
225 } | 188 Variable *eax = |
226 | 189 legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax); |
227 // Copy arguments to be passed in registers to the appropriate registers. | 190 Variable *edx = |
228 // TODO: Investigate the impact of lowering arguments passed in registers | 191 legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx); |
229 // after lowering stack arguments as opposed to the other way around. | 192 Context.insert<InstFakeUse>(edx); |
230 // Lowering register arguments after stack arguments may reduce register | 193 return eax; |
231 // pressure. On the other hand, lowering register arguments first (before | |
232 // stack arguments) may result in more compact code, as the memory operand | |
233 // displacements may end up being smaller before any stack adjustment is | |
234 // done. | |
235 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
236 Variable *Reg = | |
237 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
238 // Generate a FakeUse of register arguments so that they do not get dead | |
239 // code eliminated as a result of the FakeKill of scratch registers after | |
240 // the call. | |
241 Context.insert<InstFakeUse>(Reg); | |
242 } | |
243 // Generate the call instruction. Assign its result to a temporary with high | |
244 // register allocation weight. | |
245 // ReturnReg doubles as ReturnRegLo as necessary. | |
246 Variable *ReturnReg = nullptr; | |
247 Variable *ReturnRegHi = nullptr; | |
248 if (Dest) { | |
249 const Type DestTy = Dest->getType(); | |
250 switch (DestTy) { | |
251 case IceType_NUM: | |
252 case IceType_void: | |
253 case IceType_i1: | |
254 case IceType_i8: | |
255 case IceType_i16: | |
256 llvm::report_fatal_error("Invalid Call dest type"); | |
257 break; | |
258 case IceType_i32: | |
259 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); | |
260 break; | |
261 case IceType_i64: | |
262 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
263 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
264 break; | |
265 case IceType_f32: | |
266 case IceType_f64: | |
267 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the | |
268 // fstp instruction. | |
269 break; | |
270 case IceType_v4i1: | |
271 case IceType_v8i1: | |
272 case IceType_v16i1: | |
273 case IceType_v16i8: | |
274 case IceType_v8i16: | |
275 case IceType_v4i32: | |
276 case IceType_v4f32: | |
277 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); | |
278 break; | |
279 } | |
280 } | |
281 | |
282 Operand *CallTarget = | |
283 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
284 | |
285 Traits::Insts::Call *NewCall; | |
286 /* AutoBundle scoping */ { | |
287 std::unique_ptr<AutoBundle> Bundle; | |
288 if (NeedSandboxing) { | |
289 if (llvm::isa<Constant>(CallTarget)) { | |
290 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
291 } else { | |
292 Variable *CallTargetVar = nullptr; | |
293 _mov(CallTargetVar, CallTarget); | |
294 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
295 const SizeT BundleSize = | |
296 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
297 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
298 CallTarget = CallTargetVar; | |
299 } | |
300 } | |
301 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); | |
302 } | |
303 | |
304 if (ReturnRegHi) | |
305 Context.insert<InstFakeDef>(ReturnRegHi); | |
306 | |
307 // Insert a register-kill pseudo instruction. | |
308 Context.insert<InstFakeKill>(NewCall); | |
309 | |
310 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
311 // Special treatment for an FP function which returns its result in st(0). | |
312 // If Dest ends up being a physical xmm register, the fstp emit code will | |
313 // route st(0) through the space reserved in the function argument area | |
314 // we allocated. | |
315 _fstp(Dest); | |
316 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
317 // still needs to be popped. | |
318 Context.insert<InstFakeUse>(Dest); | |
319 } | |
320 | |
321 // Generate a FakeUse to keep the call live if necessary. | |
322 if (Instr->hasSideEffects() && ReturnReg) { | |
323 Context.insert<InstFakeUse>(ReturnReg); | |
324 } | |
325 | |
326 if (!Dest) | |
327 return; | |
328 | |
329 // Assign the result of the call to Dest. | |
330 if (ReturnReg) { | |
331 if (ReturnRegHi) { | |
332 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | |
333 Variable *DestLo = Dest64On32->getLo(); | |
334 Variable *DestHi = Dest64On32->getHi(); | |
335 _mov(DestLo, ReturnReg); | |
336 _mov(DestHi, ReturnRegHi); | |
337 } else { | 194 } else { |
338 const Type DestTy = Dest->getType(); | 195 Variable *Reg = nullptr; |
339 assert(DestTy == IceType_i32 || DestTy == IceType_i16 || | 196 _mov(Reg, Value, Traits::RegisterSet::Reg_eax); |
340 DestTy == IceType_i8 || DestTy == IceType_i1 || | 197 return Reg; |
341 isVectorType(DestTy)); | |
342 if (isVectorType(DestTy)) { | |
343 _movp(Dest, ReturnReg); | |
344 } else { | |
345 _mov(Dest, ReturnReg); | |
346 } | |
347 } | 198 } |
348 } | 199 } |
349 } | 200 } |
350 | 201 |
351 void TargetX8632::lowerArguments() { | |
352 VarList &Args = Func->getArgs(); | |
353 // The first four arguments of vector type, regardless of their position | |
354 // relative to the other arguments in the argument list, are passed in | |
355 // registers xmm0 - xmm3. | |
356 unsigned NumXmmArgs = 0; | |
357 | |
358 Context.init(Func->getEntryNode()); | |
359 Context.setInsertPoint(Context.getCur()); | |
360 | |
361 for (SizeT I = 0, E = Args.size(); | |
362 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | |
363 Variable *Arg = Args[I]; | |
364 Type Ty = Arg->getType(); | |
365 if (!isVectorType(Ty)) | |
366 continue; | |
367 // Replace Arg in the argument list with the home register. Then generate | |
368 // an instruction in the prolog to copy the home register to the assigned | |
369 // location of Arg. | |
370 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; | |
371 ++NumXmmArgs; | |
372 Variable *RegisterArg = Func->makeVariable(Ty); | |
373 if (BuildDefs::dump()) | |
374 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
375 RegisterArg->setRegNum(RegNum); | |
376 RegisterArg->setIsArg(); | |
377 Arg->setIsArg(false); | |
378 | |
379 Args[I] = RegisterArg; | |
380 Context.insert<InstAssign>(Arg, RegisterArg); | |
381 } | |
382 } | |
383 | |
384 void TargetX8632::lowerRet(const InstRet *Inst) { | |
385 Variable *Reg = nullptr; | |
386 if (Inst->hasRetValue()) { | |
387 Operand *Src0 = legalize(Inst->getRetValue()); | |
388 const Type Src0Ty = Src0->getType(); | |
389 // TODO(jpp): this is not needed. | |
390 if (Src0Ty == IceType_i64) { | |
391 Variable *eax = | |
392 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); | |
393 Variable *edx = | |
394 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); | |
395 Reg = eax; | |
396 Context.insert<InstFakeUse>(edx); | |
397 } else if (isScalarFloatingType(Src0Ty)) { | |
398 _fld(Src0); | |
399 } else if (isVectorType(Src0Ty)) { | |
400 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
401 } else { | |
402 assert(Src0Ty == IceType_i32); | |
403 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | |
404 } | |
405 } | |
406 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
407 // explicitly looks for a ret instruction as a marker for where to insert the | |
408 // frame removal instructions. | |
409 _ret(Reg); | |
410 // Add a fake use of esp to make sure esp stays alive for the entire | |
411 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
412 keepEspLiveAtExit(); | |
413 } | |
414 | |
415 void TargetX8632::addProlog(CfgNode *Node) { | 202 void TargetX8632::addProlog(CfgNode *Node) { |
416 // Stack frame layout: | 203 // Stack frame layout: |
417 // | 204 // |
418 // +------------------------+ | 205 // +------------------------+ |
419 // | 1. return address | | 206 // | 1. return address | |
420 // +------------------------+ | 207 // +------------------------+ |
421 // | 2. preserved registers | | 208 // | 2. preserved registers | |
422 // +------------------------+ | 209 // +------------------------+ |
423 // | 3. padding | | 210 // | 3. padding | |
424 // +------------------------+ | 211 // +------------------------+ |
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1096 #define X(tag, sizeLog2, align, elts, elty, str) \ | 883 #define X(tag, sizeLog2, align, elts, elty, str) \ |
1097 static_assert(_table1_##tag == _table2_##tag, \ | 884 static_assert(_table1_##tag == _table2_##tag, \ |
1098 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 885 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
1099 ICETYPE_TABLE | 886 ICETYPE_TABLE |
1100 #undef X | 887 #undef X |
1101 } // end of namespace dummy3 | 888 } // end of namespace dummy3 |
1102 } // end of anonymous namespace | 889 } // end of anonymous namespace |
1103 | 890 |
1104 } // end of namespace X8632 | 891 } // end of namespace X8632 |
1105 } // end of namespace Ice | 892 } // end of namespace Ice |
OLD | NEW |