Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 82 }; | 82 }; |
| 83 | 83 |
| 84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize = | 84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize = |
| 85 llvm::array_lengthof(TableTypeX8632Attributes); | 85 llvm::array_lengthof(TableTypeX8632Attributes); |
| 86 | 86 |
| 87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16; | 87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16; |
| 88 const char *MachineTraits<TargetX8632>::TargetName = "X8632"; | 88 const char *MachineTraits<TargetX8632>::TargetName = "X8632"; |
| 89 | 89 |
| 90 } // end of namespace X86Internal | 90 } // end of namespace X86Internal |
| 91 | 91 |
| 92 //------------------------------------------------------------------------------ | |
| 93 // __ ______ __ __ ______ ______ __ __ __ ______ | |
| 94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ | |
| 95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ | |
| 96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ | |
| 97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ | |
| 98 // | |
| 99 //------------------------------------------------------------------------------ | |
| 100 void TargetX8632::lowerCall(const InstCall *Instr) { | |
| 101 // x86-32 calling convention: | |
| 102 // | |
| 103 // * At the point before the call, the stack must be aligned to 16 | |
| 104 // bytes. | |
| 105 // | |
| 106 // * The first four arguments of vector type, regardless of their | |
| 107 // position relative to the other arguments in the argument list, are | |
| 108 // placed in registers xmm0 - xmm3. | |
| 109 // | |
| 110 // * Other arguments are pushed onto the stack in right-to-left order, | |
| 111 // such that the left-most argument ends up on the top of the stack at | |
| 112 // the lowest memory address. | |
| 113 // | |
| 114 // * Stack arguments of vector type are aligned to start at the next | |
| 115 // highest multiple of 16 bytes. Other stack arguments are aligned to | |
| 116 // 4 bytes. | |
| 117 // | |
| 118 // This intends to match the section "IA-32 Function Calling | |
| 119 // Convention" of the document "OS X ABI Function Call Guide" by | |
| 120 // Apple. | |
| 121 NeedsStackAlignment = true; | |
| 122 | |
| 123 typedef std::vector<Operand *> OperandList; | |
| 124 OperandList XmmArgs; | |
| 125 OperandList StackArgs, StackArgLocations; | |
| 126 uint32_t ParameterAreaSizeBytes = 0; | |
| 127 | |
| 128 // Classify each argument operand according to the location where the | |
| 129 // argument is passed. | |
| 130 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 131 Operand *Arg = Instr->getArg(i); | |
| 132 Type Ty = Arg->getType(); | |
| 133 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 134 assert(typeWidthInBytes(Ty) >= 4); | |
| 135 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 136 XmmArgs.push_back(Arg); | |
| 137 } else { | |
| 138 StackArgs.push_back(Arg); | |
| 139 if (isVectorType(Arg->getType())) { | |
| 140 ParameterAreaSizeBytes = | |
| 141 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 142 } | |
| 143 Variable *esp = | |
| 144 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 145 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
| 146 StackArgLocations.push_back( | |
| 147 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
| 148 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 // Adjust the parameter area so that the stack is aligned. It is | |
| 153 // assumed that the stack is already aligned at the start of the | |
| 154 // calling sequence. | |
| 155 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 156 | |
| 157 // Subtract the appropriate amount for the argument area. This also | |
| 158 // takes care of setting the stack adjustment during emission. | |
| 159 // | |
| 160 // TODO: If for some reason the call instruction gets dead-code | |
| 161 // eliminated after lowering, we would need to ensure that the | |
| 162 // pre-call and the post-call esp adjustment get eliminated as well. | |
| 163 if (ParameterAreaSizeBytes) { | |
| 164 _adjust_stack(ParameterAreaSizeBytes); | |
| 165 } | |
| 166 | |
| 167 // Copy arguments that are passed on the stack to the appropriate | |
| 168 // stack locations. | |
| 169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
| 170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
| 171 } | |
| 172 | |
| 173 // Copy arguments to be passed in registers to the appropriate | |
| 174 // registers. | |
| 175 // TODO: Investigate the impact of lowering arguments passed in | |
| 176 // registers after lowering stack arguments as opposed to the other | |
| 177 // way around. Lowering register arguments after stack arguments may | |
| 178 // reduce register pressure. On the other hand, lowering register | |
| 179 // arguments first (before stack arguments) may result in more compact | |
| 180 // code, as the memory operand displacements may end up being smaller | |
| 181 // before any stack adjustment is done. | |
| 182 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
| 183 Variable *Reg = | |
| 184 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
| 185 // Generate a FakeUse of register arguments so that they do not get | |
| 186 // dead code eliminated as a result of the FakeKill of scratch | |
| 187 // registers after the call. | |
| 188 Context.insert(InstFakeUse::create(Func, Reg)); | |
| 189 } | |
| 190 // Generate the call instruction. Assign its result to a temporary | |
| 191 // with high register allocation weight. | |
| 192 Variable *Dest = Instr->getDest(); | |
| 193 // ReturnReg doubles as ReturnRegLo as necessary. | |
| 194 Variable *ReturnReg = nullptr; | |
| 195 Variable *ReturnRegHi = nullptr; | |
| 196 if (Dest) { | |
| 197 switch (Dest->getType()) { | |
| 198 case IceType_NUM: | |
| 199 llvm_unreachable("Invalid Call dest type"); | |
| 200 break; | |
| 201 case IceType_void: | |
|
Jim Stichnoth
2015/08/06 13:49:19
Wasn't there discussion on another CL that IceType
John
2015/08/06 14:44:08
There was a comment about it, but I did not unders
| |
| 202 break; | |
| 203 case IceType_i1: | |
| 204 case IceType_i8: | |
| 205 case IceType_i16: | |
| 206 case IceType_i32: | |
| 207 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); | |
| 208 break; | |
| 209 case IceType_i64: | |
| 210 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
| 211 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
| 212 break; | |
| 213 case IceType_f32: | |
| 214 case IceType_f64: | |
| 215 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | |
| 216 // the fstp instruction. | |
| 217 break; | |
| 218 case IceType_v4i1: | |
| 219 case IceType_v8i1: | |
| 220 case IceType_v16i1: | |
| 221 case IceType_v16i8: | |
| 222 case IceType_v8i16: | |
| 223 case IceType_v4i32: | |
| 224 case IceType_v4f32: | |
| 225 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); | |
| 226 break; | |
| 227 } | |
| 228 } | |
| 229 Operand *CallTarget = legalize(Instr->getCallTarget()); | |
| 230 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | |
| 231 if (NeedSandboxing) { | |
| 232 if (llvm::isa<Constant>(CallTarget)) { | |
| 233 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 234 } else { | |
| 235 Variable *CallTargetVar = nullptr; | |
| 236 _mov(CallTargetVar, CallTarget); | |
| 237 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 238 const SizeT BundleSize = | |
| 239 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 240 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 241 CallTarget = CallTargetVar; | |
| 242 } | |
| 243 } | |
| 244 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); | |
| 245 Context.insert(NewCall); | |
| 246 if (NeedSandboxing) | |
| 247 _bundle_unlock(); | |
| 248 if (ReturnRegHi) | |
| 249 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | |
| 250 | |
| 251 // Add the appropriate offset to esp. The call instruction takes care | |
| 252 // of resetting the stack offset during emission. | |
| 253 if (ParameterAreaSizeBytes) { | |
| 254 Variable *esp = | |
| 255 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 256 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | |
| 257 } | |
| 258 | |
| 259 // Insert a register-kill pseudo instruction. | |
| 260 Context.insert(InstFakeKill::create(Func, NewCall)); | |
| 261 | |
| 262 // Generate a FakeUse to keep the call live if necessary. | |
| 263 if (Instr->hasSideEffects() && ReturnReg) { | |
| 264 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | |
| 265 Context.insert(FakeUse); | |
| 266 } | |
| 267 | |
| 268 if (!Dest) | |
| 269 return; | |
| 270 | |
| 271 // Assign the result of the call to Dest. | |
| 272 if (ReturnReg) { | |
| 273 if (ReturnRegHi) { | |
| 274 assert(Dest->getType() == IceType_i64); | |
| 275 split64(Dest); | |
| 276 Variable *DestLo = Dest->getLo(); | |
| 277 Variable *DestHi = Dest->getHi(); | |
| 278 _mov(DestLo, ReturnReg); | |
| 279 _mov(DestHi, ReturnRegHi); | |
| 280 } else { | |
| 281 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || | |
| 282 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || | |
| 283 isVectorType(Dest->getType())); | |
| 284 if (isVectorType(Dest->getType())) { | |
| 285 _movp(Dest, ReturnReg); | |
| 286 } else { | |
| 287 _mov(Dest, ReturnReg); | |
| 288 } | |
| 289 } | |
| 290 } else if (isScalarFloatingType(Dest->getType())) { | |
| 291 // Special treatment for an FP function which returns its result in | |
| 292 // st(0). | |
| 293 // If Dest ends up being a physical xmm register, the fstp emit code | |
| 294 // will route st(0) through a temporary stack slot. | |
| 295 _fstp(Dest); | |
| 296 // Create a fake use of Dest in case it actually isn't used, | |
| 297 // because st(0) still needs to be popped. | |
| 298 Context.insert(InstFakeUse::create(Func, Dest)); | |
| 299 } | |
| 300 } | |
| 301 | |
| 302 void TargetX8632::lowerArguments() { | |
| 303 VarList &Args = Func->getArgs(); | |
| 304 // The first four arguments of vector type, regardless of their | |
| 305 // position relative to the other arguments in the argument list, are | |
| 306 // passed in registers xmm0 - xmm3. | |
| 307 unsigned NumXmmArgs = 0; | |
| 308 | |
| 309 Context.init(Func->getEntryNode()); | |
| 310 Context.setInsertPoint(Context.getCur()); | |
| 311 | |
| 312 for (SizeT I = 0, E = Args.size(); | |
| 313 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) { | |
| 314 Variable *Arg = Args[I]; | |
| 315 Type Ty = Arg->getType(); | |
| 316 if (!isVectorType(Ty)) | |
| 317 continue; | |
| 318 // Replace Arg in the argument list with the home register. Then | |
| 319 // generate an instruction in the prolog to copy the home register | |
| 320 // to the assigned location of Arg. | |
| 321 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; | |
| 322 ++NumXmmArgs; | |
| 323 Variable *RegisterArg = Func->makeVariable(Ty); | |
| 324 if (BuildDefs::dump()) | |
| 325 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
| 326 RegisterArg->setRegNum(RegNum); | |
| 327 RegisterArg->setIsArg(); | |
| 328 Arg->setIsArg(false); | |
| 329 | |
| 330 Args[I] = RegisterArg; | |
| 331 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | |
| 332 } | |
| 333 } | |
| 334 | |
| 335 void TargetX8632::lowerRet(const InstRet *Inst) { | |
| 336 Variable *Reg = nullptr; | |
| 337 if (Inst->hasRetValue()) { | |
| 338 Operand *Src0 = legalize(Inst->getRetValue()); | |
| 339 // TODO(jpp): this is not needed. | |
| 340 if (Src0->getType() == IceType_i64) { | |
| 341 Variable *eax = | |
| 342 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax); | |
| 343 Variable *edx = | |
| 344 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx); | |
| 345 Reg = eax; | |
| 346 Context.insert(InstFakeUse::create(Func, edx)); | |
| 347 } else if (isScalarFloatingType(Src0->getType())) { | |
| 348 _fld(Src0); | |
| 349 } else if (isVectorType(Src0->getType())) { | |
| 350 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
| 351 } else { | |
| 352 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | |
| 353 } | |
| 354 } | |
| 355 // Add a ret instruction even if sandboxing is enabled, because | |
| 356 // addEpilog explicitly looks for a ret instruction as a marker for | |
| 357 // where to insert the frame removal instructions. | |
| 358 _ret(Reg); | |
| 359 // Add a fake use of esp to make sure esp stays alive for the entire | |
| 360 // function. Otherwise post-call esp adjustments get dead-code | |
| 361 // eliminated. TODO: Are there more places where the fake use | |
| 362 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | |
| 363 // have a ret instruction. | |
| 364 Variable *esp = | |
| 365 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 366 Context.insert(InstFakeUse::create(Func, esp)); | |
| 367 } | |
| 368 | |
| 369 void TargetX8632::addProlog(CfgNode *Node) { | |
| 370 // Stack frame layout: | |
| 371 // | |
| 372 // +------------------------+ | |
| 373 // | 1. return address | | |
| 374 // +------------------------+ | |
| 375 // | 2. preserved registers | | |
| 376 // +------------------------+ | |
| 377 // | 3. padding | | |
| 378 // +------------------------+ | |
| 379 // | 4. global spill area | | |
| 380 // +------------------------+ | |
| 381 // | 5. padding | | |
| 382 // +------------------------+ | |
| 383 // | 6. local spill area | | |
| 384 // +------------------------+ | |
| 385 // | 7. padding | | |
| 386 // +------------------------+ | |
| 387 // | 8. allocas | | |
| 388 // +------------------------+ | |
| 389 // | |
| 390 // The following variables record the size in bytes of the given areas: | |
| 391 // * X86_RET_IP_SIZE_BYTES: area 1 | |
| 392 // * PreservedRegsSizeBytes: area 2 | |
| 393 // * SpillAreaPaddingBytes: area 3 | |
| 394 // * GlobalsSize: area 4 | |
| 395 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | |
| 396 // * LocalsSpillAreaSize: area 6 | |
| 397 // * SpillAreaSizeBytes: areas 3 - 7 | |
| 398 | |
| 399 // Determine stack frame offsets for each Variable without a | |
| 400 // register assignment. This can be done as one variable per stack | |
| 401 // slot. Or, do coalescing by running the register allocator again | |
| 402 // with an infinite set of registers (as a side effect, this gives | |
| 403 // variables a second chance at physical register assignment). | |
| 404 // | |
| 405 // A middle ground approach is to leverage sparsity and allocate one | |
| 406 // block of space on the frame for globals (variables with | |
| 407 // multi-block lifetime), and one block to share for locals | |
| 408 // (single-block lifetime). | |
| 409 | |
| 410 Context.init(Node); | |
| 411 Context.setInsertPoint(Context.getCur()); | |
| 412 | |
| 413 llvm::SmallBitVector CalleeSaves = | |
| 414 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
| 415 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | |
| 416 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; | |
| 417 size_t GlobalsSize = 0; | |
| 418 // If there is a separate locals area, this represents that area. | |
| 419 // Otherwise it counts any variable not counted by GlobalsSize. | |
| 420 SpillAreaSizeBytes = 0; | |
| 421 // If there is a separate locals area, this specifies the alignment | |
| 422 // for it. | |
| 423 uint32_t LocalsSlotsAlignmentBytes = 0; | |
| 424 // The entire spill locations area gets aligned to largest natural | |
| 425 // alignment of the variables that have a spill slot. | |
| 426 uint32_t SpillAreaAlignmentBytes = 0; | |
| 427 // A spill slot linked to a variable with a stack slot should reuse | |
| 428 // that stack slot. | |
| 429 std::function<bool(Variable *)> TargetVarHook = | |
| 430 [&VariablesLinkedToSpillSlots](Variable *Var) { | |
| 431 if (auto *SpillVar = | |
| 432 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) { | |
| 433 assert(Var->getWeight().isZero()); | |
| 434 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { | |
| 435 VariablesLinkedToSpillSlots.push_back(Var); | |
| 436 return true; | |
| 437 } | |
| 438 } | |
| 439 return false; | |
| 440 }; | |
| 441 | |
| 442 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | |
| 443 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | |
| 444 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | |
| 445 &LocalsSlotsAlignmentBytes, TargetVarHook); | |
| 446 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | |
| 447 SpillAreaSizeBytes += GlobalsSize; | |
| 448 | |
| 449 // Add push instructions for preserved registers. | |
| 450 uint32_t NumCallee = 0; | |
| 451 size_t PreservedRegsSizeBytes = 0; | |
| 452 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 453 if (CalleeSaves[i] && RegsUsed[i]) { | |
| 454 ++NumCallee; | |
| 455 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32); | |
| 456 _push(getPhysicalRegister(i)); | |
| 457 } | |
| 458 } | |
| 459 Ctx->statsUpdateRegistersSaved(NumCallee); | |
| 460 | |
| 461 // Generate "push ebp; mov ebp, esp" | |
| 462 if (IsEbpBasedFrame) { | |
| 463 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | |
| 464 .count() == 0); | |
| 465 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32); | |
| 466 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); | |
| 467 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 468 _push(ebp); | |
| 469 _mov(ebp, esp); | |
| 470 // Keep ebp live for late-stage liveness analysis | |
| 471 // (e.g. asm-verbose mode). | |
| 472 Context.insert(InstFakeUse::create(Func, ebp)); | |
| 473 } | |
| 474 | |
| 475 // Align the variables area. SpillAreaPaddingBytes is the size of | |
| 476 // the region after the preserved registers and before the spill areas. | |
| 477 // LocalsSlotsPaddingBytes is the amount of padding between the globals | |
| 478 // and locals area if they are separate. | |
| 479 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); | |
| 480 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | |
| 481 uint32_t SpillAreaPaddingBytes = 0; | |
| 482 uint32_t LocalsSlotsPaddingBytes = 0; | |
| 483 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, | |
| 484 SpillAreaAlignmentBytes, GlobalsSize, | |
| 485 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, | |
| 486 &LocalsSlotsPaddingBytes); | |
| 487 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | |
| 488 uint32_t GlobalsAndSubsequentPaddingSize = | |
| 489 GlobalsSize + LocalsSlotsPaddingBytes; | |
| 490 | |
| 491 // Align esp if necessary. | |
| 492 if (NeedsStackAlignment) { | |
| 493 uint32_t StackOffset = | |
| 494 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | |
| 495 uint32_t StackSize = | |
| 496 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | |
| 497 SpillAreaSizeBytes = StackSize - StackOffset; | |
| 498 } | |
| 499 | |
| 500 // Generate "sub esp, SpillAreaSizeBytes" | |
| 501 if (SpillAreaSizeBytes) | |
| 502 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | |
| 503 Ctx->getConstantInt32(SpillAreaSizeBytes)); | |
| 504 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | |
| 505 | |
| 506 resetStackAdjustment(); | |
| 507 | |
| 508 // Fill in stack offsets for stack args, and copy args into registers | |
| 509 // for those that were register-allocated. Args are pushed right to | |
| 510 // left, so Arg[0] is closest to the stack/frame pointer. | |
| 511 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | |
| 512 size_t BasicFrameOffset = | |
| 513 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | |
| 514 if (!IsEbpBasedFrame) | |
| 515 BasicFrameOffset += SpillAreaSizeBytes; | |
| 516 | |
| 517 const VarList &Args = Func->getArgs(); | |
| 518 size_t InArgsSizeBytes = 0; | |
| 519 unsigned NumXmmArgs = 0; | |
| 520 for (Variable *Arg : Args) { | |
| 521 // Skip arguments passed in registers. | |
| 522 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) { | |
| 523 ++NumXmmArgs; | |
| 524 continue; | |
| 525 } | |
| 526 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
| 527 } | |
| 528 | |
| 529 // Fill in stack offsets for locals. | |
| 530 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | |
| 531 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | |
| 532 IsEbpBasedFrame); | |
| 533 // Assign stack offsets to variables that have been linked to spilled | |
| 534 // variables. | |
| 535 for (Variable *Var : VariablesLinkedToSpillSlots) { | |
| 536 Variable *Linked = | |
| 537 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); | |
| 538 Var->setStackOffset(Linked->getStackOffset()); | |
| 539 } | |
| 540 this->HasComputedFrame = true; | |
| 541 | |
| 542 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | |
| 543 OstreamLocker L(Func->getContext()); | |
| 544 Ostream &Str = Func->getContext()->getStrDump(); | |
| 545 | |
| 546 Str << "Stack layout:\n"; | |
| 547 uint32_t EspAdjustmentPaddingSize = | |
| 548 SpillAreaSizeBytes - LocalsSpillAreaSize - | |
| 549 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | |
| 550 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | |
| 551 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" | |
| 552 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | |
| 553 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | |
| 554 << " globals spill area = " << GlobalsSize << " bytes\n" | |
| 555 << " globals-locals spill areas intermediate padding = " | |
| 556 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | |
| 557 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | |
| 558 << " esp alignment padding = " << EspAdjustmentPaddingSize | |
| 559 << " bytes\n"; | |
| 560 | |
| 561 Str << "Stack details:\n" | |
| 562 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | |
| 563 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | |
| 564 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | |
| 565 << " bytes\n" | |
| 566 << " is ebp based = " << IsEbpBasedFrame << "\n"; | |
| 567 } | |
| 568 } | |
| 569 | |
| 570 void TargetX8632::addEpilog(CfgNode *Node) { | |
| 571 InstList &Insts = Node->getInsts(); | |
| 572 InstList::reverse_iterator RI, E; | |
| 573 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | |
| 574 if (llvm::isa<typename Traits::Insts::Ret>(*RI)) | |
| 575 break; | |
| 576 } | |
| 577 if (RI == E) | |
| 578 return; | |
| 579 | |
| 580 // Convert the reverse_iterator position into its corresponding | |
| 581 // (forward) iterator position. | |
| 582 InstList::iterator InsertPoint = RI.base(); | |
| 583 --InsertPoint; | |
| 584 Context.init(Node); | |
| 585 Context.setInsertPoint(InsertPoint); | |
| 586 | |
| 587 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 588 if (IsEbpBasedFrame) { | |
| 589 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); | |
| 590 // For late-stage liveness analysis (e.g. asm-verbose mode), | |
| 591 // adding a fake use of esp before the assignment of esp=ebp keeps | |
| 592 // previous esp adjustments from being dead-code eliminated. | |
| 593 Context.insert(InstFakeUse::create(Func, esp)); | |
| 594 _mov(esp, ebp); | |
| 595 _pop(ebp); | |
| 596 } else { | |
| 597 // add esp, SpillAreaSizeBytes | |
| 598 if (SpillAreaSizeBytes) | |
| 599 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); | |
| 600 } | |
| 601 | |
| 602 // Add pop instructions for preserved registers. | |
| 603 llvm::SmallBitVector CalleeSaves = | |
| 604 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
| 605 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
| 606 SizeT j = CalleeSaves.size() - i - 1; | |
| 607 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame) | |
| 608 continue; | |
| 609 if (CalleeSaves[j] && RegsUsed[j]) { | |
| 610 _pop(getPhysicalRegister(j)); | |
| 611 } | |
| 612 } | |
| 613 | |
| 614 if (!Ctx->getFlags().getUseSandboxing()) | |
| 615 return; | |
| 616 // Change the original ret instruction into a sandboxed return sequence. | |
| 617 // t:ecx = pop | |
| 618 // bundle_lock | |
| 619 // and t, ~31 | |
| 620 // jmp *t | |
| 621 // bundle_unlock | |
| 622 // FakeUse <original_ret_operand> | |
| 623 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); | |
| 624 _pop(T_ecx); | |
| 625 lowerIndirectJump(T_ecx); | |
| 626 if (RI->getSrcSize()) { | |
| 627 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); | |
| 628 Context.insert(InstFakeUse::create(Func, RetValue)); | |
| 629 } | |
| 630 RI->setDeleted(); | |
| 631 } | |
| 632 | |
| 633 void TargetX8632::emitJumpTable(const Cfg *Func, | |
| 634 const InstJumpTable *JumpTable) const { | |
| 635 if (!BuildDefs::dump()) | |
| 636 return; | |
| 637 Ostream &Str = Ctx->getStrEmit(); | |
| 638 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); | |
| 639 Str << "\t.section\t.rodata." << MangledName | |
| 640 << "$jumptable,\"a\",@progbits\n"; | |
| 641 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; | |
| 642 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; | |
| 643 | |
| 644 // On X8632 pointers are 32-bit hence the use of .long | |
| 645 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) | |
| 646 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); | |
| 647 Str << "\n"; | |
| 648 } | |
| 649 | |
| 92 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) | 650 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) |
| 93 : TargetDataLowering(Ctx) {} | 651 : TargetDataLowering(Ctx) {} |
| 94 | 652 |
| 95 namespace { | 653 namespace { |
| 96 template <typename T> struct PoolTypeConverter {}; | 654 template <typename T> struct PoolTypeConverter {}; |
| 97 | 655 |
| 98 template <> struct PoolTypeConverter<float> { | 656 template <> struct PoolTypeConverter<float> { |
| 99 typedef uint32_t PrimitiveIntType; | 657 typedef uint32_t PrimitiveIntType; |
| 100 typedef ConstantFloat IceType; | 658 typedef ConstantFloat IceType; |
| 101 static const Type Ty = IceType_f32; | 659 static const Type Ty = IceType_f32; |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 152 static const Type Ty = IceType_i8; | 710 static const Type Ty = IceType_i8; |
| 153 static const char *TypeName; | 711 static const char *TypeName; |
| 154 static const char *AsmTag; | 712 static const char *AsmTag; |
| 155 static const char *PrintfString; | 713 static const char *PrintfString; |
| 156 }; | 714 }; |
| 157 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; | 715 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; |
| 158 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; | 716 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; |
| 159 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; | 717 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; |
| 160 } // end of anonymous namespace | 718 } // end of anonymous namespace |
| 161 | 719 |
| 162 void TargetX8632::emitJumpTable(const Cfg *Func, | |
| 163 const InstJumpTable *JumpTable) const { | |
| 164 if (!BuildDefs::dump()) | |
| 165 return; | |
| 166 Ostream &Str = Ctx->getStrEmit(); | |
| 167 IceString MangledName = Ctx->mangleName(Func->getFunctionName()); | |
| 168 Str << "\t.section\t.rodata." << MangledName | |
| 169 << "$jumptable,\"a\",@progbits\n"; | |
| 170 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; | |
| 171 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; | |
| 172 | |
| 173 // On X8632 pointers are 32-bit hence the use of .long | |
| 174 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) | |
| 175 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); | |
| 176 Str << "\n"; | |
| 177 } | |
| 178 | |
| 179 template <typename T> | 720 template <typename T> |
| 180 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { | 721 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { |
| 181 if (!BuildDefs::dump()) | 722 if (!BuildDefs::dump()) |
| 182 return; | 723 return; |
| 183 Ostream &Str = Ctx->getStrEmit(); | 724 Ostream &Str = Ctx->getStrEmit(); |
| 184 Type Ty = T::Ty; | 725 Type Ty = T::Ty; |
| 185 SizeT Align = typeAlignInBytes(Ty); | 726 SizeT Align = typeAlignInBytes(Ty); |
| 186 ConstantList Pool = Ctx->getConstantPool(Ty); | 727 ConstantList Pool = Ctx->getConstantPool(Ty); |
| 187 | 728 |
| 188 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align | 729 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align |
| (...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 400 // Repeat the static asserts with respect to the high-level table | 941 // Repeat the static asserts with respect to the high-level table |
| 401 // entries in case the high-level table has extra entries. | 942 // entries in case the high-level table has extra entries. |
| 402 #define X(tag, sizeLog2, align, elts, elty, str) \ | 943 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 403 static_assert(_table1_##tag == _table2_##tag, \ | 944 static_assert(_table1_##tag == _table2_##tag, \ |
| 404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 945 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 405 ICETYPE_TABLE | 946 ICETYPE_TABLE |
| 406 #undef X | 947 #undef X |
| 407 } // end of namespace dummy3 | 948 } // end of namespace dummy3 |
| 408 } // end of anonymous namespace | 949 } // end of anonymous namespace |
| 409 | 950 |
| 410 //------------------------------------------------------------------------------ | |
| 411 // __ ______ __ __ ______ ______ __ __ __ ______ | |
| 412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ | |
| 413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ | |
| 414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ | |
| 415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ | |
| 416 // | |
| 417 //------------------------------------------------------------------------------ | |
| 418 void TargetX8632::lowerCall(const InstCall *Instr) { | |
| 419 // x86-32 calling convention: | |
| 420 // | |
| 421 // * At the point before the call, the stack must be aligned to 16 | |
| 422 // bytes. | |
| 423 // | |
| 424 // * The first four arguments of vector type, regardless of their | |
| 425 // position relative to the other arguments in the argument list, are | |
| 426 // placed in registers xmm0 - xmm3. | |
| 427 // | |
| 428 // * Other arguments are pushed onto the stack in right-to-left order, | |
| 429 // such that the left-most argument ends up on the top of the stack at | |
| 430 // the lowest memory address. | |
| 431 // | |
| 432 // * Stack arguments of vector type are aligned to start at the next | |
| 433 // highest multiple of 16 bytes. Other stack arguments are aligned to | |
| 434 // 4 bytes. | |
| 435 // | |
| 436 // This intends to match the section "IA-32 Function Calling | |
| 437 // Convention" of the document "OS X ABI Function Call Guide" by | |
| 438 // Apple. | |
| 439 NeedsStackAlignment = true; | |
| 440 | |
| 441 typedef std::vector<Operand *> OperandList; | |
| 442 OperandList XmmArgs; | |
| 443 OperandList StackArgs, StackArgLocations; | |
| 444 uint32_t ParameterAreaSizeBytes = 0; | |
| 445 | |
| 446 // Classify each argument operand according to the location where the | |
| 447 // argument is passed. | |
| 448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 449 Operand *Arg = Instr->getArg(i); | |
| 450 Type Ty = Arg->getType(); | |
| 451 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 452 assert(typeWidthInBytes(Ty) >= 4); | |
| 453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 454 XmmArgs.push_back(Arg); | |
| 455 } else { | |
| 456 StackArgs.push_back(Arg); | |
| 457 if (isVectorType(Arg->getType())) { | |
| 458 ParameterAreaSizeBytes = | |
| 459 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 460 } | |
| 461 Variable *esp = | |
| 462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
| 464 StackArgLocations.push_back( | |
| 465 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
| 466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 467 } | |
| 468 } | |
| 469 | |
| 470 // Adjust the parameter area so that the stack is aligned. It is | |
| 471 // assumed that the stack is already aligned at the start of the | |
| 472 // calling sequence. | |
| 473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 474 | |
| 475 // Subtract the appropriate amount for the argument area. This also | |
| 476 // takes care of setting the stack adjustment during emission. | |
| 477 // | |
| 478 // TODO: If for some reason the call instruction gets dead-code | |
| 479 // eliminated after lowering, we would need to ensure that the | |
| 480 // pre-call and the post-call esp adjustment get eliminated as well. | |
| 481 if (ParameterAreaSizeBytes) { | |
| 482 _adjust_stack(ParameterAreaSizeBytes); | |
| 483 } | |
| 484 | |
| 485 // Copy arguments that are passed on the stack to the appropriate | |
| 486 // stack locations. | |
| 487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
| 488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
| 489 } | |
| 490 | |
| 491 // Copy arguments to be passed in registers to the appropriate | |
| 492 // registers. | |
| 493 // TODO: Investigate the impact of lowering arguments passed in | |
| 494 // registers after lowering stack arguments as opposed to the other | |
| 495 // way around. Lowering register arguments after stack arguments may | |
| 496 // reduce register pressure. On the other hand, lowering register | |
| 497 // arguments first (before stack arguments) may result in more compact | |
| 498 // code, as the memory operand displacements may end up being smaller | |
| 499 // before any stack adjustment is done. | |
| 500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
| 501 Variable *Reg = | |
| 502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | |
| 503 // Generate a FakeUse of register arguments so that they do not get | |
| 504 // dead code eliminated as a result of the FakeKill of scratch | |
| 505 // registers after the call. | |
| 506 Context.insert(InstFakeUse::create(Func, Reg)); | |
| 507 } | |
| 508 // Generate the call instruction. Assign its result to a temporary | |
| 509 // with high register allocation weight. | |
| 510 Variable *Dest = Instr->getDest(); | |
| 511 // ReturnReg doubles as ReturnRegLo as necessary. | |
| 512 Variable *ReturnReg = nullptr; | |
| 513 Variable *ReturnRegHi = nullptr; | |
| 514 if (Dest) { | |
| 515 switch (Dest->getType()) { | |
| 516 case IceType_NUM: | |
| 517 llvm_unreachable("Invalid Call dest type"); | |
| 518 break; | |
| 519 case IceType_void: | |
| 520 break; | |
| 521 case IceType_i1: | |
| 522 case IceType_i8: | |
| 523 case IceType_i16: | |
| 524 case IceType_i32: | |
| 525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); | |
| 526 break; | |
| 527 case IceType_i64: | |
| 528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); | |
| 529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); | |
| 530 break; | |
| 531 case IceType_f32: | |
| 532 case IceType_f64: | |
| 533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with | |
| 534 // the fstp instruction. | |
| 535 break; | |
| 536 case IceType_v4i1: | |
| 537 case IceType_v8i1: | |
| 538 case IceType_v16i1: | |
| 539 case IceType_v16i8: | |
| 540 case IceType_v8i16: | |
| 541 case IceType_v4i32: | |
| 542 case IceType_v4f32: | |
| 543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); | |
| 544 break; | |
| 545 } | |
| 546 } | |
| 547 Operand *CallTarget = legalize(Instr->getCallTarget()); | |
| 548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | |
| 549 if (NeedSandboxing) { | |
| 550 if (llvm::isa<Constant>(CallTarget)) { | |
| 551 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 552 } else { | |
| 553 Variable *CallTargetVar = nullptr; | |
| 554 _mov(CallTargetVar, CallTarget); | |
| 555 _bundle_lock(InstBundleLock::Opt_AlignToEnd); | |
| 556 const SizeT BundleSize = | |
| 557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | |
| 558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); | |
| 559 CallTarget = CallTargetVar; | |
| 560 } | |
| 561 } | |
| 562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); | |
| 563 Context.insert(NewCall); | |
| 564 if (NeedSandboxing) | |
| 565 _bundle_unlock(); | |
| 566 if (ReturnRegHi) | |
| 567 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | |
| 568 | |
| 569 // Add the appropriate offset to esp. The call instruction takes care | |
| 570 // of resetting the stack offset during emission. | |
| 571 if (ParameterAreaSizeBytes) { | |
| 572 Variable *esp = | |
| 573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | |
| 575 } | |
| 576 | |
| 577 // Insert a register-kill pseudo instruction. | |
| 578 Context.insert(InstFakeKill::create(Func, NewCall)); | |
| 579 | |
| 580 // Generate a FakeUse to keep the call live if necessary. | |
| 581 if (Instr->hasSideEffects() && ReturnReg) { | |
| 582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | |
| 583 Context.insert(FakeUse); | |
| 584 } | |
| 585 | |
| 586 if (!Dest) | |
| 587 return; | |
| 588 | |
| 589 // Assign the result of the call to Dest. | |
| 590 if (ReturnReg) { | |
| 591 if (ReturnRegHi) { | |
| 592 assert(Dest->getType() == IceType_i64); | |
| 593 split64(Dest); | |
| 594 Variable *DestLo = Dest->getLo(); | |
| 595 Variable *DestHi = Dest->getHi(); | |
| 596 _mov(DestLo, ReturnReg); | |
| 597 _mov(DestHi, ReturnRegHi); | |
| 598 } else { | |
| 599 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || | |
| 600 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || | |
| 601 isVectorType(Dest->getType())); | |
| 602 if (isVectorType(Dest->getType())) { | |
| 603 _movp(Dest, ReturnReg); | |
| 604 } else { | |
| 605 _mov(Dest, ReturnReg); | |
| 606 } | |
| 607 } | |
| 608 } else if (isScalarFloatingType(Dest->getType())) { | |
| 609 // Special treatment for an FP function which returns its result in | |
| 610 // st(0). | |
| 611 // If Dest ends up being a physical xmm register, the fstp emit code | |
| 612 // will route st(0) through a temporary stack slot. | |
| 613 _fstp(Dest); | |
| 614 // Create a fake use of Dest in case it actually isn't used, | |
| 615 // because st(0) still needs to be popped. | |
| 616 Context.insert(InstFakeUse::create(Func, Dest)); | |
| 617 } | |
| 618 } | |
| 619 | |
| 620 } // end of namespace Ice | 951 } // end of namespace Ice |
| OLD | NEW |