| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 | 150 |
| 151 if (NeedSandboxing) { | 151 if (NeedSandboxing) { |
| 152 const SizeT BundleSize = | 152 const SizeT BundleSize = |
| 153 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 153 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 154 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); | 154 _and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 155 } | 155 } |
| 156 | 156 |
| 157 _jmp(JumpTarget); | 157 _jmp(JumpTarget); |
| 158 } | 158 } |
| 159 | 159 |
| 160 void TargetX8632::lowerCall(const InstCall *Instr) { | 160 Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) { |
| 161 // x86-32 calling convention: | 161 std::unique_ptr<AutoBundle> Bundle; |
| 162 // | 162 if (NeedSandboxing) { |
| 163 // * At the point before the call, the stack must be aligned to 16 bytes. | 163 if (llvm::isa<Constant>(CallTarget)) { |
| 164 // | 164 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
| 165 // * The first four arguments of vector type, regardless of their position | |
| 166 // relative to the other arguments in the argument list, are placed in | |
| 167 // registers xmm0 - xmm3. | |
| 168 // | |
| 169 // * Other arguments are pushed onto the stack in right-to-left order, such | |
| 170 // that the left-most argument ends up on the top of the stack at the lowest | |
| 171 // memory address. | |
| 172 // | |
| 173 // * Stack arguments of vector type are aligned to start at the next highest | |
| 174 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. | |
| 175 // | |
| 176 // This intends to match the section "IA-32 Function Calling Convention" of | |
| 177 // the document "OS X ABI Function Call Guide" by Apple. | |
| 178 NeedsStackAlignment = true; | |
| 179 | |
| 180 OperandList XmmArgs; | |
| 181 OperandList StackArgs, StackArgLocations; | |
| 182 int32_t ParameterAreaSizeBytes = 0; | |
| 183 | |
| 184 // Classify each argument operand according to the location where the | |
| 185 // argument is passed. | |
| 186 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 187 Operand *Arg = Instr->getArg(i); | |
| 188 Type Ty = Arg->getType(); | |
| 189 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 190 assert(typeWidthInBytes(Ty) >= 4); | |
| 191 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 192 XmmArgs.push_back(Arg); | |
| 193 } else { | 165 } else { |
| 194 StackArgs.push_back(Arg); | 166 Variable *CallTargetVar = nullptr; |
| 195 if (isVectorType(Arg->getType())) { | 167 _mov(CallTargetVar, CallTarget); |
| 196 ParameterAreaSizeBytes = | 168 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); |
| 197 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 169 const SizeT BundleSize = |
| 198 } | 170 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 199 Variable *esp = | 171 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 200 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 172 CallTarget = CallTargetVar; |
| 201 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
| 202 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | |
| 203 StackArgLocations.push_back(Mem); | |
| 204 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 205 } | 173 } |
| 206 } | 174 } |
| 207 // Ensure there is enough space for the fstp/movs for floating returns. | 175 return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); |
| 208 Variable *Dest = Instr->getDest(); | 176 } |
| 209 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
| 210 ParameterAreaSizeBytes = | |
| 211 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | |
| 212 typeWidthInBytesOnStack(Dest->getType())); | |
| 213 } | |
| 214 | 177 |
| 215 // Adjust the parameter area so that the stack is aligned. It is assumed that | 178 Variable *TargetX8632::moveReturnValueToRegister(Operand *Value, |
| 216 // the stack is already aligned at the start of the calling sequence. | 179 const Type ReturnType) { |
| 217 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 180 if (isVectorType(ReturnType)) { |
| 218 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | 181 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0); |
| 219 maxOutArgsSizeBytes()); | 182 } else if (isScalarFloatingType(ReturnType)) { |
| 220 | 183 _fld(Value); |
| 221 // Copy arguments that are passed on the stack to the appropriate stack | 184 return nullptr; |
| 222 // locations. | 185 } else { |
| 223 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 186 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64); |
| 224 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 187 if (ReturnType == IceType_i64) { |
| 225 } | 188 Variable *eax = |
| 226 | 189 legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax); |
| 227 // Copy arguments to be passed in registers to the appropriate registers. | 190 Variable *edx = |
| 228 // TODO: Investigate the impact of lowering arguments passed in registers | 191 legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx); |
| 229 // after lowering stack arguments as opposed to the other way around. | 192 Context.insert<InstFakeUse>(edx); |
| 230 // Lowering register arguments after stack arguments may reduce register | 193 return eax; |
| 231 // pressure. On the other hand, lowering register arguments first (before | |
| 232 // stack arguments) may result in more compact code, as the memory operand | |
| 233 // displacements may end up being smaller before any stack adjustment is | |
| 234 // done. | |
| 235 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
| 236 Variable *Reg = | |
| 237 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
| 238 // Generate a FakeUse of register arguments so that they do not get dead | |
| 239 // code eliminated as a result of the FakeKill of scratch registers after | |
| 240 // the call. | |
| 241 Context.insert<InstFakeUse>(Reg); | |
| 242 } | |
| 243 // Generate the call instruction. Assign its result to a temporary with high | |
| 244 // register allocation weight. | |
| 245 // ReturnReg doubles as ReturnRegLo as necessary. | |
| 246 Variable *ReturnReg = nullptr; | |
| 247 Variable *ReturnRegHi = nullptr; | |
| 248 if (Dest) { | |
| 249 const Type DestTy = Dest->getType(); | |
| 250 switch (DestTy) { | |
| 251 case IceType_NUM: | |
| 252 case IceType_void: | |
| 253 case IceType_i1: | |
| 254 case IceType_i8: | |
| 255 case IceType_i16: | |
| 256 llvm::report_fatal_error("Invalid Call dest type"); | |
| 257 break; | |
| 258 case IceType_i32: | |
| 259 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); | |
| 260 break; | |
| 261 case IceType_i64: | |
| 262 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
| 263 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
| 264 break; | |
| 265 case IceType_f32: | |
| 266 case IceType_f64: | |
| 267 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the | |
| 268 // fstp instruction. | |
| 269 break; | |
| 270 case IceType_v4i1: | |
| 271 case IceType_v8i1: | |
| 272 case IceType_v16i1: | |
| 273 case IceType_v16i8: | |
| 274 case IceType_v8i16: | |
| 275 case IceType_v4i32: | |
| 276 case IceType_v4f32: | |
| 277 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); | |
| 278 break; | |
| 279 } | |
| 280 } | |
| 281 | |
| 282 Operand *CallTarget = | |
| 283 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
| 284 | |
| 285 Traits::Insts::Call *NewCall; | |
| 286 /* AutoBundle scoping */ { | |
| 287 std::unique_ptr<AutoBundle> Bundle; | |
| 288 if (NeedSandboxing) { | |
| 289 if (llvm::isa<Constant>(CallTarget)) { | |
| 290 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
| 291 } else { | |
| 292 Variable *CallTargetVar = nullptr; | |
| 293 _mov(CallTargetVar, CallTarget); | |
| 294 Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd); | |
| 295 const SizeT BundleSize = | |
| 296 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 297 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 298 CallTarget = CallTargetVar; | |
| 299 } | |
| 300 } | |
| 301 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); | |
| 302 } | |
| 303 | |
| 304 if (ReturnRegHi) | |
| 305 Context.insert<InstFakeDef>(ReturnRegHi); | |
| 306 | |
| 307 // Insert a register-kill pseudo instruction. | |
| 308 Context.insert<InstFakeKill>(NewCall); | |
| 309 | |
| 310 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | |
| 311 // Special treatment for an FP function which returns its result in st(0). | |
| 312 // If Dest ends up being a physical xmm register, the fstp emit code will | |
| 313 // route st(0) through the space reserved in the function argument area | |
| 314 // we allocated. | |
| 315 _fstp(Dest); | |
| 316 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
| 317 // still needs to be popped. | |
| 318 Context.insert<InstFakeUse>(Dest); | |
| 319 } | |
| 320 | |
| 321 // Generate a FakeUse to keep the call live if necessary. | |
| 322 if (Instr->hasSideEffects() && ReturnReg) { | |
| 323 Context.insert<InstFakeUse>(ReturnReg); | |
| 324 } | |
| 325 | |
| 326 if (!Dest) | |
| 327 return; | |
| 328 | |
| 329 // Assign the result of the call to Dest. | |
| 330 if (ReturnReg) { | |
| 331 if (ReturnRegHi) { | |
| 332 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | |
| 333 Variable *DestLo = Dest64On32->getLo(); | |
| 334 Variable *DestHi = Dest64On32->getHi(); | |
| 335 _mov(DestLo, ReturnReg); | |
| 336 _mov(DestHi, ReturnRegHi); | |
| 337 } else { | 194 } else { |
| 338 const Type DestTy = Dest->getType(); | 195 Variable *Reg = nullptr; |
| 339 assert(DestTy == IceType_i32 || DestTy == IceType_i16 || | 196 _mov(Reg, Value, Traits::RegisterSet::Reg_eax); |
| 340 DestTy == IceType_i8 || DestTy == IceType_i1 || | 197 return Reg; |
| 341 isVectorType(DestTy)); | |
| 342 if (isVectorType(DestTy)) { | |
| 343 _movp(Dest, ReturnReg); | |
| 344 } else { | |
| 345 _mov(Dest, ReturnReg); | |
| 346 } | |
| 347 } | 198 } |
| 348 } | 199 } |
| 349 } | 200 } |
| 350 | 201 |
| 351 void TargetX8632::lowerArguments() { | |
| 352 VarList &Args = Func->getArgs(); | |
| 353 // The first four arguments of vector type, regardless of their position | |
| 354 // relative to the other arguments in the argument list, are passed in | |
| 355 // registers xmm0 - xmm3. | |
| 356 unsigned NumXmmArgs = 0; | |
| 357 | |
| 358 Context.init(Func->getEntryNode()); | |
| 359 Context.setInsertPoint(Context.getCur()); | |
| 360 | |
| 361 for (SizeT I = 0, E = Args.size(); | |
| 362 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | |
| 363 Variable *Arg = Args[I]; | |
| 364 Type Ty = Arg->getType(); | |
| 365 if (!isVectorType(Ty)) | |
| 366 continue; | |
| 367 // Replace Arg in the argument list with the home register. Then generate | |
| 368 // an instruction in the prolog to copy the home register to the assigned | |
| 369 // location of Arg. | |
| 370 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; | |
| 371 ++NumXmmArgs; | |
| 372 Variable *RegisterArg = Func->makeVariable(Ty); | |
| 373 if (BuildDefs::dump()) | |
| 374 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
| 375 RegisterArg->setRegNum(RegNum); | |
| 376 RegisterArg->setIsArg(); | |
| 377 Arg->setIsArg(false); | |
| 378 | |
| 379 Args[I] = RegisterArg; | |
| 380 Context.insert<InstAssign>(Arg, RegisterArg); | |
| 381 } | |
| 382 } | |
| 383 | |
| 384 void TargetX8632::lowerRet(const InstRet *Inst) { | |
| 385 Variable *Reg = nullptr; | |
| 386 if (Inst->hasRetValue()) { | |
| 387 Operand *Src0 = legalize(Inst->getRetValue()); | |
| 388 const Type Src0Ty = Src0->getType(); | |
| 389 // TODO(jpp): this is not needed. | |
| 390 if (Src0Ty == IceType_i64) { | |
| 391 Variable *eax = | |
| 392 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); | |
| 393 Variable *edx = | |
| 394 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); | |
| 395 Reg = eax; | |
| 396 Context.insert<InstFakeUse>(edx); | |
| 397 } else if (isScalarFloatingType(Src0Ty)) { | |
| 398 _fld(Src0); | |
| 399 } else if (isVectorType(Src0Ty)) { | |
| 400 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
| 401 } else { | |
| 402 assert(Src0Ty == IceType_i32); | |
| 403 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | |
| 404 } | |
| 405 } | |
| 406 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
| 407 // explicitly looks for a ret instruction as a marker for where to insert the | |
| 408 // frame removal instructions. | |
| 409 _ret(Reg); | |
| 410 // Add a fake use of esp to make sure esp stays alive for the entire | |
| 411 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
| 412 keepEspLiveAtExit(); | |
| 413 } | |
| 414 | |
| 415 void TargetX8632::addProlog(CfgNode *Node) { | 202 void TargetX8632::addProlog(CfgNode *Node) { |
| 416 // Stack frame layout: | 203 // Stack frame layout: |
| 417 // | 204 // |
| 418 // +------------------------+ | 205 // +------------------------+ |
| 419 // | 1. return address | | 206 // | 1. return address | |
| 420 // +------------------------+ | 207 // +------------------------+ |
| 421 // | 2. preserved registers | | 208 // | 2. preserved registers | |
| 422 // +------------------------+ | 209 // +------------------------+ |
| 423 // | 3. padding | | 210 // | 3. padding | |
| 424 // +------------------------+ | 211 // +------------------------+ |
| (...skipping 671 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1096 #define X(tag, sizeLog2, align, elts, elty, str) \ | 883 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 1097 static_assert(_table1_##tag == _table2_##tag, \ | 884 static_assert(_table1_##tag == _table2_##tag, \ |
| 1098 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 885 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 1099 ICETYPE_TABLE | 886 ICETYPE_TABLE |
| 1100 #undef X | 887 #undef X |
| 1101 } // end of namespace dummy3 | 888 } // end of namespace dummy3 |
| 1102 } // end of anonymous namespace | 889 } // end of anonymous namespace |
| 1103 | 890 |
| 1104 } // end of namespace X8632 | 891 } // end of namespace X8632 |
| 1105 } // end of namespace Ice | 892 } // end of namespace Ice |
| OLD | NEW |