OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
146 | 146 |
147 if (NeedSandboxing) { | 147 if (NeedSandboxing) { |
148 const SizeT BundleSize = | 148 const SizeT BundleSize = |
149 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 149 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
150 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); | 150 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); |
151 } | 151 } |
152 | 152 |
153 _jmp(JumpTarget); | 153 _jmp(JumpTarget); |
154 } | 154 } |
155 | 155 |
156 void TargetX8632::lowerCall(const InstCall *Instr) { | 156 Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) { |
157 // x86-32 calling convention: | 157 std::unique_ptr<AutoBundle> Bundle; |
158 // | 158 if (NeedSandboxing) { |
159 // * At the point before the call, the stack must be aligned to 16 bytes. | 159 if (llvm::isa<Constant>(CallTarget)) { |
160 // | 160 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
161 // * The first four arguments of vector type, regardless of their position | |
162 // relative to the other arguments in the argument list, are placed in | |
163 // registers xmm0 - xmm3. | |
164 // | |
165 // * Other arguments are pushed onto the stack in right-to-left order, such | |
166 // that the left-most argument ends up on the top of the stack at the lowest | |
167 // memory address. | |
168 // | |
169 // * Stack arguments of vector type are aligned to start at the next highest | |
170 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. | |
171 // | |
172 // This intends to match the section "IA-32 Function Calling Convention" of | |
173 // the document "OS X ABI Function Call Guide" by Apple. | |
174 NeedsStackAlignment = true; | |
175 | |
176 OperandList XmmArgs; | |
177 OperandList StackArgs, StackArgLocations; | |
178 int32_t ParameterAreaSizeBytes = 0; | |
179 | |
180 // Classify each argument operand according to the location where the | |
181 // argument is passed. | |
182 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
183 Operand *Arg = Instr->getArg(i); | |
184 Type Ty = Arg->getType(); | |
185 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
186 assert(typeWidthInBytes(Ty) >= 4); | |
187 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
188 XmmArgs.push_back(Arg); | |
189 } else { | 161 } else { |
190 StackArgs.push_back(Arg); | 162 Variable *CallTargetVar = nullptr; |
191 if (isVectorType(Arg->getType())) { | 163 _mov(CallTargetVar, CallTarget); |
192 ParameterAreaSizeBytes = | 164 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
193 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 165 const SizeT BundleSize = |
194 } | 166 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
195 Variable *esp = | 167 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
196 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 168 CallTarget = CallTargetVar; |
197 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
198 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | |
199 StackArgLocations.push_back(Mem); | |
200 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
201 } | 169 } |
202 } | 170 } |
203 // Ensure there is enough space for the fstp/movs for floating returns. | 171 return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); |
204 Variable *Dest = Instr->getDest(); | 172 } |
205 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
206 ParameterAreaSizeBytes = | |
207 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | |
208 typeWidthInBytesOnStack(Dest->getType())); | |
209 } | |
210 | 173 |
211 // Adjust the parameter area so that the stack is aligned. It is assumed that | 174 Variable *TargetX8632::moveReturnValueToRegister(Operand *Value, |
212 // the stack is already aligned at the start of the calling sequence. | 175 Type ReturnType) { |
213 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 176 if (isVectorType(ReturnType)) { |
214 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | 177 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0); |
215 maxOutArgsSizeBytes()); | 178 } else if (isScalarFloatingType(ReturnType)) { |
216 | 179 _fld(Value); |
217 // Copy arguments that are passed on the stack to the appropriate stack | 180 return nullptr; |
218 // locations. | 181 } else { |
219 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 182 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64); |
220 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 183 if (ReturnType == IceType_i64) { |
221 } | 184 Variable *eax = |
222 | 185 legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax); |
223 // Copy arguments to be passed in registers to the appropriate registers. | 186 Variable *edx = |
224 // TODO: Investigate the impact of lowering arguments passed in registers | 187 legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx); |
225 // after lowering stack arguments as opposed to the other way around. | 188 Context.insert<InstFakeUse>(edx); |
226 // Lowering register arguments after stack arguments may reduce register | 189 return eax; |
227 // pressure. On the other hand, lowering register arguments first (before | |
228 // stack arguments) may result in more compact code, as the memory operand | |
229 // displacements may end up being smaller before any stack adjustment is | |
230 // done. | |
231 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
232 Variable *Reg = | |
233 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
234 // Generate a FakeUse of register arguments so that they do not get dead | |
235 // code eliminated as a result of the FakeKill of scratch registers after | |
236 // the call. | |
237 Context.insert<InstFakeUse>(Reg); | |
238 } | |
239 // Generate the call instruction. Assign its result to a temporary with high | |
240 // register allocation weight. | |
241 // ReturnReg doubles as ReturnRegLo as necessary. | |
242 Variable *ReturnReg = nullptr; | |
243 Variable *ReturnRegHi = nullptr; | |
244 if (Dest) { | |
245 const Type DestTy = Dest->getType(); | |
246 switch (DestTy) { | |
247 case IceType_NUM: | |
248 case IceType_void: | |
249 case IceType_i1: | |
250 case IceType_i8: | |
251 case IceType_i16: | |
252 llvm::report_fatal_error("Invalid Call dest type"); | |
253 break; | |
254 case IceType_i32: | |
255 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); | |
256 break; | |
257 case IceType_i64: | |
258 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
259 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
260 break; | |
261 case IceType_f32: | |
262 case IceType_f64: | |
263 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the | |
264 // fstp instruction. | |
265 break; | |
266 case IceType_v4i1: | |
267 case IceType_v8i1: | |
268 case IceType_v16i1: | |
269 case IceType_v16i8: | |
270 case IceType_v8i16: | |
271 case IceType_v4i32: | |
272 case IceType_v4f32: | |
273 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); | |
274 break; | |
275 } | |
276 } | |
277 | |
278 Operand *CallTarget = | |
279 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
280 | |
281 Traits::Insts::Call *NewCall; | |
282 /* AutoBundle scoping */ { | |
283 std::unique_ptr<AutoBundle> Bundle; | |
284 if (NeedSandboxing) { | |
285 if (llvm::isa<Constant>(CallTarget)) { | |
286 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
287 } else { | |
288 Variable *CallTargetVar = nullptr; | |
289 _mov(CallTargetVar, CallTarget); | |
290 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
291 const SizeT BundleSize = | |
292 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
293 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
294 CallTarget = CallTargetVar; | |
295 } | |
296 } | |
297 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); | |
298 } | |
299 | |
300 if (ReturnRegHi) | |
301 Context.insert<InstFakeDef>(ReturnRegHi); | |
302 | |
303 // Insert a register-kill pseudo instruction. | |
304 Context.insert<InstFakeKill>(NewCall); | |
305 | |
306 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
307 // Special treatment for an FP function which returns its result in st(0). | |
308 // If Dest ends up being a physical xmm register, the fstp emit code will | |
309 // route st(0) through the space reserved in the function argument area | |
310 // we allocated. | |
311 _fstp(Dest); | |
312 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
313 // still needs to be popped. | |
314 Context.insert<InstFakeUse>(Dest); | |
315 } | |
316 | |
317 // Generate a FakeUse to keep the call live if necessary. | |
318 if (Instr->hasSideEffects() && ReturnReg) { | |
319 Context.insert<InstFakeUse>(ReturnReg); | |
320 } | |
321 | |
322 if (!Dest) | |
323 return; | |
324 | |
325 // Assign the result of the call to Dest. | |
326 if (ReturnReg) { | |
327 if (ReturnRegHi) { | |
328 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | |
329 Variable *DestLo = Dest64On32->getLo(); | |
330 Variable *DestHi = Dest64On32->getHi(); | |
331 _mov(DestLo, ReturnReg); | |
332 _mov(DestHi, ReturnRegHi); | |
333 } else { | 190 } else { |
334 const Type DestTy = Dest->getType(); | 191 Variable *Reg = nullptr; |
335 assert(DestTy == IceType_i32 || DestTy == IceType_i16 || | 192 _mov(Reg, Value, Traits::RegisterSet::Reg_eax); |
336 DestTy == IceType_i8 || DestTy == IceType_i1 || | 193 return Reg; |
337 isVectorType(DestTy)); | |
338 if (isVectorType(DestTy)) { | |
339 _movp(Dest, ReturnReg); | |
340 } else { | |
341 _mov(Dest, ReturnReg); | |
342 } | |
343 } | 194 } |
344 } | 195 } |
345 } | 196 } |
346 | 197 |
347 void TargetX8632::lowerArguments() { | |
348 VarList &Args = Func->getArgs(); | |
349 // The first four arguments of vector type, regardless of their position | |
350 // relative to the other arguments in the argument list, are passed in | |
351 // registers xmm0 - xmm3. | |
352 unsigned NumXmmArgs = 0; | |
353 | |
354 Context.init(Func->getEntryNode()); | |
355 Context.setInsertPoint(Context.getCur()); | |
356 | |
357 for (SizeT I = 0, E = Args.size(); | |
358 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | |
359 Variable *Arg = Args[I]; | |
360 Type Ty = Arg->getType(); | |
361 if (!isVectorType(Ty)) | |
362 continue; | |
363 // Replace Arg in the argument list with the home register. Then generate | |
364 // an instruction in the prolog to copy the home register to the assigned | |
365 // location of Arg. | |
366 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; | |
367 ++NumXmmArgs; | |
368 Variable *RegisterArg = Func->makeVariable(Ty); | |
369 if (BuildDefs::dump()) | |
370 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
371 RegisterArg->setRegNum(RegNum); | |
372 RegisterArg->setIsArg(); | |
373 Arg->setIsArg(false); | |
374 | |
375 Args[I] = RegisterArg; | |
376 Context.insert<InstAssign>(Arg, RegisterArg); | |
377 } | |
378 } | |
379 | |
380 void TargetX8632::lowerRet(const InstRet *Inst) { | |
381 Variable *Reg = nullptr; | |
382 if (Inst->hasRetValue()) { | |
383 Operand *Src0 = legalize(Inst->getRetValue()); | |
384 const Type Src0Ty = Src0->getType(); | |
385 // TODO(jpp): this is not needed. | |
386 if (Src0Ty == IceType_i64) { | |
387 Variable *eax = | |
388 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); | |
389 Variable *edx = | |
390 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); | |
391 Reg = eax; | |
392 Context.insert<InstFakeUse>(edx); | |
393 } else if (isScalarFloatingType(Src0Ty)) { | |
394 _fld(Src0); | |
395 } else if (isVectorType(Src0Ty)) { | |
396 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
397 } else { | |
398 assert(Src0Ty == IceType_i32); | |
399 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | |
400 } | |
401 } | |
402 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
403 // explicitly looks for a ret instruction as a marker for where to insert the | |
404 // frame removal instructions. | |
405 _ret(Reg); | |
406 // Add a fake use of esp to make sure esp stays alive for the entire | |
407 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
408 keepEspLiveAtExit(); | |
409 } | |
410 | |
411 void TargetX8632::addProlog(CfgNode *Node) { | 198 void TargetX8632::addProlog(CfgNode *Node) { |
412 // Stack frame layout: | 199 // Stack frame layout: |
413 // | 200 // |
414 // +------------------------+ | 201 // +------------------------+ |
415 // | 1. return address | | 202 // | 1. return address | |
416 // +------------------------+ | 203 // +------------------------+ |
417 // | 2. preserved registers | | 204 // | 2. preserved registers | |
418 // +------------------------+ | 205 // +------------------------+ |
419 // | 3. padding | | 206 // | 3. padding | |
420 // +------------------------+ | 207 // +------------------------+ |
(...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1092 #define X(tag, sizeLog2, align, elts, elty, str) \ | 879 #define X(tag, sizeLog2, align, elts, elty, str) \ |
1093 static_assert(_table1_##tag == _table2_##tag, \ | 880 static_assert(_table1_##tag == _table2_##tag, \ |
1094 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 881 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
1095 ICETYPE_TABLE | 882 ICETYPE_TABLE |
1096 #undef X | 883 #undef X |
1097 } // end of namespace dummy3 | 884 } // end of namespace dummy3 |
1098 } // end of anonymous namespace | 885 } // end of anonymous namespace |
1099 | 886 |
1100 } // end of namespace X8632 | 887 } // end of namespace X8632 |
1101 } // end of namespace Ice | 888 } // end of namespace Ice |
OLD | NEW |