OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 // | 124 // |
125 // * Stack arguments of vector type are aligned to start at the next highest | 125 // * Stack arguments of vector type are aligned to start at the next highest |
126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. | 126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. |
127 // | 127 // |
128 // This intends to match the section "IA-32 Function Calling Convention" of | 128 // This intends to match the section "IA-32 Function Calling Convention" of |
129 // the document "OS X ABI Function Call Guide" by Apple. | 129 // the document "OS X ABI Function Call Guide" by Apple. |
130 NeedsStackAlignment = true; | 130 NeedsStackAlignment = true; |
131 | 131 |
132 OperandList XmmArgs; | 132 OperandList XmmArgs; |
133 OperandList StackArgs, StackArgLocations; | 133 OperandList StackArgs, StackArgLocations; |
134 uint32_t ParameterAreaSizeBytes = 0; | 134 int32_t ParameterAreaSizeBytes = 0; |
135 | 135 |
136 // Classify each argument operand according to the location where the | 136 // Classify each argument operand according to the location where the |
137 // argument is passed. | 137 // argument is passed. |
138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
139 Operand *Arg = Instr->getArg(i); | 139 Operand *Arg = Instr->getArg(i); |
140 Type Ty = Arg->getType(); | 140 Type Ty = Arg->getType(); |
141 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 141 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
142 assert(typeWidthInBytes(Ty) >= 4); | 142 assert(typeWidthInBytes(Ty) >= 4); |
143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | 143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
144 XmmArgs.push_back(Arg); | 144 XmmArgs.push_back(Arg); |
145 } else { | 145 } else { |
146 StackArgs.push_back(Arg); | 146 StackArgs.push_back(Arg); |
147 if (isVectorType(Arg->getType())) { | 147 if (isVectorType(Arg->getType())) { |
148 ParameterAreaSizeBytes = | 148 ParameterAreaSizeBytes = |
149 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 149 Traits::applyStackAlignment(ParameterAreaSizeBytes); |
150 } | 150 } |
151 Variable *esp = | 151 Variable *esp = |
152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); |
155 // Stack stores for arguments are fixed to esp. | 155 // Stack stores for arguments are fixed to esp. |
156 Mem->setIgnoreStackAdjust(true); | 156 Mem->setIgnoreStackAdjust(true); |
157 StackArgLocations.push_back(Mem); | 157 StackArgLocations.push_back(Mem); |
158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
159 } | 159 } |
160 } | 160 } |
| 161 // Ensure there is enough space for the fstp/movs for floating returns. |
| 162 Variable *Dest = Instr->getDest(); |
| 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 164 ParameterAreaSizeBytes = |
| 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
| 166 typeWidthInBytesOnStack(Dest->getType())); |
| 167 } |
161 | 168 |
162 // Adjust the parameter area so that the stack is aligned. It is assumed that | 169 // Adjust the parameter area so that the stack is aligned. It is assumed that |
163 // the stack is already aligned at the start of the calling sequence. | 170 // the stack is already aligned at the start of the calling sequence. |
164 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
165 | 172 |
166 // Subtract the appropriate amount for the argument area. This also takes | 173 // Subtract the appropriate amount for the argument area. This also takes |
167 // care of setting the stack adjustment during emission. | 174 // care of setting the stack adjustment during emission. |
168 // | 175 // |
169 // TODO: If for some reason the call instruction gets dead-code eliminated | 176 // TODO: If for some reason the call instruction gets dead-code eliminated |
170 // after lowering, we would need to ensure that the pre-call and the | 177 // after lowering, we would need to ensure that the pre-call and the |
(...skipping 19 matching lines...) Expand all Loading... |
190 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 197 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
191 Variable *Reg = | 198 Variable *Reg = |
192 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | 199 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); |
193 // Generate a FakeUse of register arguments so that they do not get dead | 200 // Generate a FakeUse of register arguments so that they do not get dead |
194 // code eliminated as a result of the FakeKill of scratch registers after | 201 // code eliminated as a result of the FakeKill of scratch registers after |
195 // the call. | 202 // the call. |
196 Context.insert(InstFakeUse::create(Func, Reg)); | 203 Context.insert(InstFakeUse::create(Func, Reg)); |
197 } | 204 } |
198 // Generate the call instruction. Assign its result to a temporary with high | 205 // Generate the call instruction. Assign its result to a temporary with high |
199 // register allocation weight. | 206 // register allocation weight. |
200 Variable *Dest = Instr->getDest(); | |
201 // ReturnReg doubles as ReturnRegLo as necessary. | 207 // ReturnReg doubles as ReturnRegLo as necessary. |
202 Variable *ReturnReg = nullptr; | 208 Variable *ReturnReg = nullptr; |
203 Variable *ReturnRegHi = nullptr; | 209 Variable *ReturnRegHi = nullptr; |
204 if (Dest) { | 210 if (Dest) { |
205 switch (Dest->getType()) { | 211 switch (Dest->getType()) { |
206 case IceType_NUM: | 212 case IceType_NUM: |
207 case IceType_void: | 213 case IceType_void: |
208 llvm::report_fatal_error("Invalid Call dest type"); | 214 llvm::report_fatal_error("Invalid Call dest type"); |
209 break; | 215 break; |
210 case IceType_i1: | 216 case IceType_i1: |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
248 CallTarget = CallTargetVar; | 254 CallTarget = CallTargetVar; |
249 } | 255 } |
250 } | 256 } |
251 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); | 257 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); |
252 Context.insert(NewCall); | 258 Context.insert(NewCall); |
253 if (NeedSandboxing) | 259 if (NeedSandboxing) |
254 _bundle_unlock(); | 260 _bundle_unlock(); |
255 if (ReturnRegHi) | 261 if (ReturnRegHi) |
256 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 262 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
257 | 263 |
258 // Add the appropriate offset to esp. The call instruction takes care of | |
259 // resetting the stack offset during emission. | |
260 if (ParameterAreaSizeBytes) { | |
261 Variable *esp = | |
262 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
263 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | |
264 } | |
265 | |
266 // Insert a register-kill pseudo instruction. | 264 // Insert a register-kill pseudo instruction. |
267 Context.insert(InstFakeKill::create(Func, NewCall)); | 265 Context.insert(InstFakeKill::create(Func, NewCall)); |
268 | 266 |
| 267 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 268 // Special treatment for an FP function which returns its result in st(0). |
| 269 // If Dest ends up being a physical xmm register, the fstp emit code will |
| 270 // route st(0) through the space reserved in the function argument area |
| 271 // we allocated. |
| 272 _fstp(Dest); |
| 273 // Create a fake use of Dest in case it actually isn't used, because st(0) |
| 274 // still needs to be popped. |
| 275 Context.insert(InstFakeUse::create(Func, Dest)); |
| 276 } |
| 277 |
| 278 // Add the appropriate offset to esp. |
| 279 if (ParameterAreaSizeBytes) |
| 280 _adjust_stack(-ParameterAreaSizeBytes); |
| 281 |
269 // Generate a FakeUse to keep the call live if necessary. | 282 // Generate a FakeUse to keep the call live if necessary. |
270 if (Instr->hasSideEffects() && ReturnReg) { | 283 if (Instr->hasSideEffects() && ReturnReg) { |
271 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
272 Context.insert(FakeUse); | 285 Context.insert(FakeUse); |
273 } | 286 } |
274 | 287 |
275 if (!Dest) | 288 if (!Dest) |
276 return; | 289 return; |
277 | 290 |
278 // Assign the result of the call to Dest. | 291 // Assign the result of the call to Dest. |
279 if (ReturnReg) { | 292 if (ReturnReg) { |
280 if (ReturnRegHi) { | 293 if (ReturnRegHi) { |
281 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 294 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
282 Variable *DestLo = Dest64On32->getLo(); | 295 Variable *DestLo = Dest64On32->getLo(); |
283 Variable *DestHi = Dest64On32->getHi(); | 296 Variable *DestHi = Dest64On32->getHi(); |
284 _mov(DestLo, ReturnReg); | 297 _mov(DestLo, ReturnReg); |
285 _mov(DestHi, ReturnRegHi); | 298 _mov(DestHi, ReturnRegHi); |
286 } else { | 299 } else { |
287 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || | 300 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
288 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || | 301 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
289 isVectorType(Dest->getType())); | 302 isVectorType(Dest->getType())); |
290 if (isVectorType(Dest->getType())) { | 303 if (isVectorType(Dest->getType())) { |
291 _movp(Dest, ReturnReg); | 304 _movp(Dest, ReturnReg); |
292 } else { | 305 } else { |
293 _mov(Dest, ReturnReg); | 306 _mov(Dest, ReturnReg); |
294 } | 307 } |
295 } | 308 } |
296 } else if (isScalarFloatingType(Dest->getType())) { | |
297 // Special treatment for an FP function which returns its result in st(0). | |
298 // If Dest ends up being a physical xmm register, the fstp emit code will | |
299 // route st(0) through a temporary stack slot. | |
300 _fstp(Dest); | |
301 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
302 // still needs to be popped. | |
303 Context.insert(InstFakeUse::create(Func, Dest)); | |
304 } | 309 } |
305 } | 310 } |
306 | 311 |
307 void TargetX8632::lowerArguments() { | 312 void TargetX8632::lowerArguments() { |
308 VarList &Args = Func->getArgs(); | 313 VarList &Args = Func->getArgs(); |
309 // The first four arguments of vector type, regardless of their position | 314 // The first four arguments of vector type, regardless of their position |
310 // relative to the other arguments in the argument list, are passed in | 315 // relative to the other arguments in the argument list, are passed in |
311 // registers xmm0 - xmm3. | 316 // registers xmm0 - xmm3. |
312 unsigned NumXmmArgs = 0; | 317 unsigned NumXmmArgs = 0; |
313 | 318 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
356 } else { | 361 } else { |
357 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | 362 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); |
358 } | 363 } |
359 } | 364 } |
360 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 365 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
361 // explicitly looks for a ret instruction as a marker for where to insert the | 366 // explicitly looks for a ret instruction as a marker for where to insert the |
362 // frame removal instructions. | 367 // frame removal instructions. |
363 _ret(Reg); | 368 _ret(Reg); |
364 // Add a fake use of esp to make sure esp stays alive for the entire | 369 // Add a fake use of esp to make sure esp stays alive for the entire |
365 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 370 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
366 // TODO: Are there more places where the fake use should be inserted? E.g. | 371 keepEspLiveAtExit(); |
367 // "void f(int n){while(1) g(n);}" may not have a ret instruction. | |
368 Variable *esp = | |
369 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
370 Context.insert(InstFakeUse::create(Func, esp)); | |
371 } | 372 } |
372 | 373 |
373 void TargetX8632::addProlog(CfgNode *Node) { | 374 void TargetX8632::addProlog(CfgNode *Node) { |
374 // Stack frame layout: | 375 // Stack frame layout: |
375 // | 376 // |
376 // +------------------------+ | 377 // +------------------------+ |
377 // | 1. return address | | 378 // | 1. return address | |
378 // +------------------------+ | 379 // +------------------------+ |
379 // | 2. preserved registers | | 380 // | 2. preserved registers | |
380 // +------------------------+ | 381 // +------------------------+ |
(...skipping 593 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
974 // case the high-level table has extra entries. | 975 // case the high-level table has extra entries. |
975 #define X(tag, sizeLog2, align, elts, elty, str) \ | 976 #define X(tag, sizeLog2, align, elts, elty, str) \ |
976 static_assert(_table1_##tag == _table2_##tag, \ | 977 static_assert(_table1_##tag == _table2_##tag, \ |
977 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 978 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
978 ICETYPE_TABLE | 979 ICETYPE_TABLE |
979 #undef X | 980 #undef X |
980 } // end of namespace dummy3 | 981 } // end of namespace dummy3 |
981 } // end of anonymous namespace | 982 } // end of anonymous namespace |
982 | 983 |
983 } // end of namespace Ice | 984 } // end of namespace Ice |
OLD | NEW |