| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 377 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 377 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 378 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); | 378 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 379 _movzx(T64, T); | 379 _movzx(T64, T); |
| 380 _add(T64, r15); | 380 _add(T64, r15); |
| 381 JumpTarget = T64; | 381 JumpTarget = T64; |
| 382 } | 382 } |
| 383 | 383 |
| 384 _jmp(JumpTarget); | 384 _jmp(JumpTarget); |
| 385 } | 385 } |
| 386 | 386 |
| 387 namespace { | 387 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) { |
| 388 static inline TargetX8664::Traits::RegisterSet::AllRegisters | 388 Inst *NewCall = nullptr; |
| 389 getRegisterForXmmArgNum(uint32_t ArgNum) { | |
| 390 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS); | |
| 391 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( | |
| 392 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum); | |
| 393 } | |
| 394 | |
| 395 static inline TargetX8664::Traits::RegisterSet::AllRegisters | |
| 396 getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) { | |
| 397 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS); | |
| 398 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = { | |
| 399 TargetX8664::Traits::RegisterSet::Reg_rdi, | |
| 400 TargetX8664::Traits::RegisterSet::Reg_rsi, | |
| 401 TargetX8664::Traits::RegisterSet::Reg_rdx, | |
| 402 TargetX8664::Traits::RegisterSet::Reg_rcx, | |
| 403 TargetX8664::Traits::RegisterSet::Reg_r8, | |
| 404 TargetX8664::Traits::RegisterSet::Reg_r9, | |
| 405 }; | |
| 406 static_assert(llvm::array_lengthof(GprForArgNum) == | |
| 407 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS, | |
| 408 "Mismatch between MAX_GPR_ARGS and GprForArgNum."); | |
| 409 assert(Ty == IceType_i64 || Ty == IceType_i32); | |
| 410 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( | |
| 411 TargetX8664::Traits::getGprForType(Ty, GprForArgNum[ArgNum])); | |
| 412 } | |
| 413 | |
| 414 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | |
| 415 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | |
| 416 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } | |
| 417 | |
| 418 } // end of anonymous namespace | |
| 419 | |
| 420 void TargetX8664::lowerCall(const InstCall *Instr) { | |
| 421 // x86-64 calling convention: | |
| 422 // | |
| 423 // * At the point before the call, the stack must be aligned to 16 bytes. | |
| 424 // | |
| 425 // * The first eight arguments of vector/fp type, regardless of their | |
| 426 // position relative to the other arguments in the argument list, are placed | |
| 427 // in registers %xmm0 - %xmm7. | |
| 428 // | |
| 429 // * The first six arguments of integer types, regardless of their position | |
| 430 // relative to the other arguments in the argument list, are placed in | |
| 431 // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. | |
| 432 // | |
| 433 // * Other arguments are pushed onto the stack in right-to-left order, such | |
| 434 // that the left-most argument ends up on the top of the stack at the lowest | |
| 435 // memory address. | |
| 436 // | |
| 437 // * Stack arguments of vector type are aligned to start at the next highest | |
| 438 // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes. | |
| 439 // | |
| 440 // This intends to match the section "Function Calling Sequence" of the | |
| 441 // document "System V Application Binary Interface." | |
| 442 NeedsStackAlignment = true; | |
| 443 | |
| 444 using OperandList = | |
| 445 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, | |
| 446 Traits::X86_MAX_GPR_ARGS)>; | |
| 447 OperandList XmmArgs; | |
| 448 CfgVector<std::pair<const Type, Operand *>> GprArgs; | |
| 449 OperandList StackArgs, StackArgLocations; | |
| 450 int32_t ParameterAreaSizeBytes = 0; | |
| 451 | |
| 452 // Classify each argument operand according to the location where the | |
| 453 // argument is passed. | |
| 454 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
| 455 Operand *Arg = Instr->getArg(i); | |
| 456 Type Ty = Arg->getType(); | |
| 457 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 458 assert(typeWidthInBytes(Ty) >= 4); | |
| 459 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 460 XmmArgs.push_back(Arg); | |
| 461 } else if (isScalarFloatingType(Ty) && | |
| 462 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
| 463 XmmArgs.push_back(Arg); | |
| 464 } else if (isScalarIntegerType(Ty) && | |
| 465 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) { | |
| 466 GprArgs.emplace_back(Ty, Arg); | |
| 467 } else { | |
| 468 StackArgs.push_back(Arg); | |
| 469 if (isVectorType(Arg->getType())) { | |
| 470 ParameterAreaSizeBytes = | |
| 471 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 472 } | |
| 473 Variable *esp = | |
| 474 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64); | |
| 475 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
| 476 StackArgLocations.push_back( | |
| 477 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
| 478 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 479 } | |
| 480 } | |
| 481 | |
| 482 // Adjust the parameter area so that the stack is aligned. It is assumed that | |
| 483 // the stack is already aligned at the start of the calling sequence. | |
| 484 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
| 485 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | |
| 486 maxOutArgsSizeBytes()); | |
| 487 | |
| 488 // Copy arguments that are passed on the stack to the appropriate stack | |
| 489 // locations. | |
| 490 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
| 491 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
| 492 } | |
| 493 | |
| 494 // Copy arguments to be passed in registers to the appropriate registers. | |
| 495 // TODO: Investigate the impact of lowering arguments passed in registers | |
| 496 // after lowering stack arguments as opposed to the other way around. | |
| 497 // Lowering register arguments after stack arguments may reduce register | |
| 498 // pressure. On the other hand, lowering register arguments first (before | |
| 499 // stack arguments) may result in more compact code, as the memory operand | |
| 500 // displacements may end up being smaller before any stack adjustment is | |
| 501 // done. | |
| 502 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
| 503 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); | |
| 504 // Generate a FakeUse of register arguments so that they do not get dead | |
| 505 // code eliminated as a result of the FakeKill of scratch registers after | |
| 506 // the call. | |
| 507 Context.insert<InstFakeUse>(Reg); | |
| 508 } | |
| 509 | |
| 510 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | |
| 511 const Type SignatureTy = GprArgs[i].first; | |
| 512 Operand *Arg = GprArgs[i].second; | |
| 513 Variable *Reg = | |
| 514 legalizeToReg(Arg, getRegisterForGprArgNum(Arg->getType(), i)); | |
| 515 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | |
| 516 if (SignatureTy != Arg->getType()) { | |
| 517 if (SignatureTy == IceType_i32) { | |
| 518 assert(Arg->getType() == IceType_i64); | |
| 519 Variable *T = makeReg( | |
| 520 IceType_i32, Traits::getGprForType(IceType_i32, Reg->getRegNum())); | |
| 521 _mov(T, Reg); | |
| 522 Reg = T; | |
| 523 } else { | |
| 524 // This branch has never been reached, so we leave the assert(false) | |
| 525 // here until we figure out how to exercise it. | |
| 526 assert(false); | |
| 527 assert(Arg->getType() == IceType_i32); | |
| 528 Variable *T = makeReg( | |
| 529 IceType_i64, Traits::getGprForType(IceType_i64, Reg->getRegNum())); | |
| 530 _movzx(T, Reg); | |
| 531 Reg = T; | |
| 532 } | |
| 533 } | |
| 534 Context.insert<InstFakeUse>(Reg); | |
| 535 } | |
| 536 | |
| 537 // Generate the call instruction. Assign its result to a temporary with high | |
| 538 // register allocation weight. | |
| 539 Variable *Dest = Instr->getDest(); | |
| 540 // ReturnReg doubles as ReturnRegLo as necessary. | |
| 541 Variable *ReturnReg = nullptr; | |
| 542 if (Dest) { | |
| 543 switch (Dest->getType()) { | |
| 544 case IceType_NUM: | |
| 545 case IceType_void: | |
| 546 llvm::report_fatal_error("Invalid Call dest type"); | |
| 547 break; | |
| 548 case IceType_i1: | |
| 549 case IceType_i8: | |
| 550 case IceType_i16: | |
| 551 // The bitcode should never return an i1, i8, or i16. | |
| 552 assert(false); | |
| 553 // Fallthrough intended. | |
| 554 case IceType_i32: | |
| 555 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); | |
| 556 break; | |
| 557 case IceType_i64: | |
| 558 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_rax); | |
| 559 break; | |
| 560 case IceType_f32: | |
| 561 case IceType_f64: | |
| 562 case IceType_v4i1: | |
| 563 case IceType_v8i1: | |
| 564 case IceType_v16i1: | |
| 565 case IceType_v16i8: | |
| 566 case IceType_v8i16: | |
| 567 case IceType_v4i32: | |
| 568 case IceType_v4f32: | |
| 569 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); | |
| 570 break; | |
| 571 } | |
| 572 } | |
| 573 | |
| 574 InstX86Label *ReturnAddress = nullptr; | |
| 575 Operand *CallTarget = | |
| 576 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
| 577 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); | 389 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); |
| 578 Inst *NewCall = nullptr; | 390 if (NeedSandboxing) { |
| 579 if (!NeedSandboxing) { | 391 InstX86Label *ReturnAddress = InstX86Label::create(Func, this); |
| 580 if (CallTargetR != nullptr) { | |
| 581 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the | |
| 582 // emitted call needs a i64 register (for textual asm.) | |
| 583 Variable *T = makeReg(IceType_i64); | |
| 584 _movzx(T, CallTargetR); | |
| 585 CallTarget = T; | |
| 586 } | |
| 587 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); | |
| 588 } else { | |
| 589 ReturnAddress = InstX86Label::create(Func, this); | |
| 590 ReturnAddress->setIsReturnLocation(true); | 392 ReturnAddress->setIsReturnLocation(true); |
| 591 constexpr bool SuppressMangling = true; | 393 constexpr bool SuppressMangling = true; |
| 592 /* AutoBundle scoping */ { | 394 /* AutoBundle scoping */ { |
| 593 std::unique_ptr<AutoBundle> Bundler; | 395 std::unique_ptr<AutoBundle> Bundler; |
| 594 if (CallTargetR == nullptr) { | 396 if (CallTargetR == nullptr) { |
| 595 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); | 397 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); |
| 596 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), | 398 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), |
| 597 SuppressMangling)); | 399 SuppressMangling)); |
| 598 } else { | 400 } else { |
| 599 Variable *T = makeReg(IceType_i32); | 401 Variable *T = makeReg(IceType_i32); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 613 CallTarget = T64; | 415 CallTarget = T64; |
| 614 } | 416 } |
| 615 | 417 |
| 616 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); | 418 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); |
| 617 } | 419 } |
| 618 if (ReturnReg != nullptr) { | 420 if (ReturnReg != nullptr) { |
| 619 Context.insert<InstFakeDef>(ReturnReg); | 421 Context.insert<InstFakeDef>(ReturnReg); |
| 620 } | 422 } |
| 621 | 423 |
| 622 Context.insert(ReturnAddress); | 424 Context.insert(ReturnAddress); |
| 425 } else { |
| 426 if (CallTargetR != nullptr) { |
| 427 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the |
| 428 // emitted call needs a i64 register (for textual asm.) |
| 429 Variable *T = makeReg(IceType_i64); |
| 430 _movzx(T, CallTargetR); |
| 431 CallTarget = T; |
| 432 } |
| 433 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); |
| 623 } | 434 } |
| 435 return NewCall; |
| 436 } |
| 624 | 437 |
| 625 // Insert a register-kill pseudo instruction. | 438 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value, |
| 626 Context.insert<InstFakeKill>(NewCall); | 439 Type ReturnType) { |
| 627 | 440 if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) { |
| 628 // Generate a FakeUse to keep the call live if necessary. | 441 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0); |
| 629 if (Instr->hasSideEffects() && ReturnReg) { | |
| 630 Context.insert<InstFakeUse>(ReturnReg); | |
| 631 } | |
| 632 | |
| 633 if (!Dest) | |
| 634 return; | |
| 635 | |
| 636 assert(ReturnReg && "x86-64 always returns value on registers."); | |
| 637 | |
| 638 if (isVectorType(Dest->getType())) { | |
| 639 _movp(Dest, ReturnReg); | |
| 640 } else { | 442 } else { |
| 641 assert(isScalarFloatingType(Dest->getType()) || | 443 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64); |
| 642 isScalarIntegerType(Dest->getType())); | 444 Variable *Reg = nullptr; |
| 643 _mov(Dest, ReturnReg); | 445 _mov(Reg, Value, |
| 446 Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax)); |
| 447 return Reg; |
| 644 } | 448 } |
| 645 } | 449 } |
| 646 | 450 |
| 647 void TargetX8664::lowerArguments() { | |
| 648 VarList &Args = Func->getArgs(); | |
| 649 // The first eight vector typed arguments (as well as fp arguments) are | |
| 650 // passed in %xmm0 through %xmm7 regardless of their position in the argument | |
| 651 // list. | |
| 652 unsigned NumXmmArgs = 0; | |
| 653 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, | |
| 654 // %rcx, %r8, and %r9 regardless of their position in the argument list. | |
| 655 unsigned NumGprArgs = 0; | |
| 656 | |
| 657 Context.init(Func->getEntryNode()); | |
| 658 Context.setInsertPoint(Context.getCur()); | |
| 659 | |
| 660 for (SizeT i = 0, End = Args.size(); | |
| 661 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS || | |
| 662 NumGprArgs < Traits::X86_MAX_XMM_ARGS); | |
| 663 ++i) { | |
| 664 Variable *Arg = Args[i]; | |
| 665 Type Ty = Arg->getType(); | |
| 666 Variable *RegisterArg = nullptr; | |
| 667 int32_t RegNum = Variable::NoRegister; | |
| 668 if ((isVectorType(Ty) || isScalarFloatingType(Ty))) { | |
| 669 if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) { | |
| 670 continue; | |
| 671 } | |
| 672 RegNum = getRegisterForXmmArgNum(NumXmmArgs); | |
| 673 ++NumXmmArgs; | |
| 674 RegisterArg = Func->makeVariable(Ty); | |
| 675 } else if (isScalarIntegerType(Ty)) { | |
| 676 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) { | |
| 677 continue; | |
| 678 } | |
| 679 RegNum = getRegisterForGprArgNum(Ty, NumGprArgs); | |
| 680 ++NumGprArgs; | |
| 681 RegisterArg = Func->makeVariable(Ty); | |
| 682 } | |
| 683 assert(RegNum != Variable::NoRegister); | |
| 684 assert(RegisterArg != nullptr); | |
| 685 // Replace Arg in the argument list with the home register. Then generate | |
| 686 // an instruction in the prolog to copy the home register to the assigned | |
| 687 // location of Arg. | |
| 688 if (BuildDefs::dump()) | |
| 689 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
| 690 RegisterArg->setRegNum(RegNum); | |
| 691 RegisterArg->setIsArg(); | |
| 692 Arg->setIsArg(false); | |
| 693 | |
| 694 Args[i] = RegisterArg; | |
| 695 Context.insert<InstAssign>(Arg, RegisterArg); | |
| 696 } | |
| 697 } | |
| 698 | |
| 699 void TargetX8664::lowerRet(const InstRet *Inst) { | |
| 700 Variable *Reg = nullptr; | |
| 701 if (Inst->hasRetValue()) { | |
| 702 Operand *Src0 = legalize(Inst->getRetValue()); | |
| 703 const Type Src0Ty = Src0->getType(); | |
| 704 if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) { | |
| 705 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
| 706 } else { | |
| 707 assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64); | |
| 708 _mov(Reg, Src0, | |
| 709 Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax)); | |
| 710 } | |
| 711 } | |
| 712 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
| 713 // explicitly looks for a ret instruction as a marker for where to insert the | |
| 714 // frame removal instructions. | |
| 715 _ret(Reg); | |
| 716 // Add a fake use of esp to make sure esp stays alive for the entire | |
| 717 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
| 718 keepEspLiveAtExit(); | |
| 719 } | |
| 720 | |
| 721 void TargetX8664::addProlog(CfgNode *Node) { | 451 void TargetX8664::addProlog(CfgNode *Node) { |
| 722 // Stack frame layout: | 452 // Stack frame layout: |
| 723 // | 453 // |
| 724 // +------------------------+ | 454 // +------------------------+ |
| 725 // | 1. return address | | 455 // | 1. return address | |
| 726 // +------------------------+ | 456 // +------------------------+ |
| 727 // | 2. preserved registers | | 457 // | 2. preserved registers | |
| 728 // +------------------------+ | 458 // +------------------------+ |
| 729 // | 3. padding | | 459 // | 3. padding | |
| 730 // +------------------------+ | 460 // +------------------------+ |
| (...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1416 #define X(tag, sizeLog2, align, elts, elty, str) \ | 1146 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 1417 static_assert(_table1_##tag == _table2_##tag, \ | 1147 static_assert(_table1_##tag == _table2_##tag, \ |
| 1418 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); | 1148 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); |
| 1419 ICETYPE_TABLE | 1149 ICETYPE_TABLE |
| 1420 #undef X | 1150 #undef X |
| 1421 } // end of namespace dummy3 | 1151 } // end of namespace dummy3 |
| 1422 } // end of anonymous namespace | 1152 } // end of anonymous namespace |
| 1423 | 1153 |
| 1424 } // end of namespace X8664 | 1154 } // end of namespace X8664 |
| 1425 } // end of namespace Ice | 1155 } // end of namespace Ice |
| OLD | NEW |