OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
381 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 381 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
382 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); | 382 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); |
383 _movzx(T64, T); | 383 _movzx(T64, T); |
384 _add(T64, r15); | 384 _add(T64, r15); |
385 JumpTarget = T64; | 385 JumpTarget = T64; |
386 } | 386 } |
387 | 387 |
388 _jmp(JumpTarget); | 388 _jmp(JumpTarget); |
389 } | 389 } |
390 | 390 |
391 namespace { | 391 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) { |
392 static inline TargetX8664::Traits::RegisterSet::AllRegisters | 392 Inst *NewCall = nullptr; |
393 getRegisterForXmmArgNum(uint32_t ArgNum) { | |
394 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS); | |
395 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( | |
396 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum); | |
397 } | |
398 | |
399 static inline TargetX8664::Traits::RegisterSet::AllRegisters | |
400 getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) { | |
401 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS); | |
402 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = { | |
403 TargetX8664::Traits::RegisterSet::Reg_rdi, | |
404 TargetX8664::Traits::RegisterSet::Reg_rsi, | |
405 TargetX8664::Traits::RegisterSet::Reg_rdx, | |
406 TargetX8664::Traits::RegisterSet::Reg_rcx, | |
407 TargetX8664::Traits::RegisterSet::Reg_r8, | |
408 TargetX8664::Traits::RegisterSet::Reg_r9, | |
409 }; | |
410 static_assert(llvm::array_lengthof(GprForArgNum) == | |
411 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS, | |
412 "Mismatch between MAX_GPR_ARGS and GprForArgNum."); | |
413 assert(Ty == IceType_i64 || Ty == IceType_i32); | |
414 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( | |
415 TargetX8664::Traits::getGprForType(Ty, GprForArgNum[ArgNum])); | |
416 } | |
417 | |
418 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining | |
419 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't. | |
420 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } | |
421 | |
422 } // end of anonymous namespace | |
423 | |
424 void TargetX8664::lowerCall(const InstCall *Instr) { | |
425 // x86-64 calling convention: | |
426 // | |
427 // * At the point before the call, the stack must be aligned to 16 bytes. | |
428 // | |
429 // * The first eight arguments of vector/fp type, regardless of their | |
430 // position relative to the other arguments in the argument list, are placed | |
431 // in registers %xmm0 - %xmm7. | |
432 // | |
433 // * The first six arguments of integer types, regardless of their position | |
434 // relative to the other arguments in the argument list, are placed in | |
435 // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. | |
436 // | |
437 // * Other arguments are pushed onto the stack in right-to-left order, such | |
438 // that the left-most argument ends up on the top of the stack at the lowest | |
439 // memory address. | |
440 // | |
441 // * Stack arguments of vector type are aligned to start at the next highest | |
442 // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes. | |
443 // | |
444 // This intends to match the section "Function Calling Sequence" of the | |
445 // document "System V Application Binary Interface." | |
446 NeedsStackAlignment = true; | |
447 | |
448 using OperandList = | |
449 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, | |
450 Traits::X86_MAX_GPR_ARGS)>; | |
451 OperandList XmmArgs; | |
452 CfgVector<std::pair<const Type, Operand *>> GprArgs; | |
453 OperandList StackArgs, StackArgLocations; | |
454 int32_t ParameterAreaSizeBytes = 0; | |
455 | |
456 // Classify each argument operand according to the location where the | |
457 // argument is passed. | |
458 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | |
459 Operand *Arg = Instr->getArg(i); | |
460 Type Ty = Arg->getType(); | |
461 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
462 assert(typeWidthInBytes(Ty) >= 4); | |
463 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
464 XmmArgs.push_back(Arg); | |
465 } else if (isScalarFloatingType(Ty) && | |
466 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | |
467 XmmArgs.push_back(Arg); | |
468 } else if (isScalarIntegerType(Ty) && | |
469 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) { | |
470 GprArgs.emplace_back(Ty, Arg); | |
471 } else { | |
472 StackArgs.push_back(Arg); | |
473 if (isVectorType(Arg->getType())) { | |
474 ParameterAreaSizeBytes = | |
475 Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
476 } | |
477 Variable *esp = | |
478 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64); | |
479 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | |
480 StackArgLocations.push_back( | |
481 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | |
482 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
483 } | |
484 } | |
485 | |
486 // Adjust the parameter area so that the stack is aligned. It is assumed that | |
487 // the stack is already aligned at the start of the calling sequence. | |
488 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | |
489 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= | |
490 maxOutArgsSizeBytes()); | |
491 | |
492 // Copy arguments that are passed on the stack to the appropriate stack | |
493 // locations. | |
494 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
495 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
496 } | |
497 | |
498 // Copy arguments to be passed in registers to the appropriate registers. | |
499 // TODO: Investigate the impact of lowering arguments passed in registers | |
500 // after lowering stack arguments as opposed to the other way around. | |
501 // Lowering register arguments after stack arguments may reduce register | |
502 // pressure. On the other hand, lowering register arguments first (before | |
503 // stack arguments) may result in more compact code, as the memory operand | |
504 // displacements may end up being smaller before any stack adjustment is | |
505 // done. | |
506 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | |
507 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); | |
508 // Generate a FakeUse of register arguments so that they do not get dead | |
509 // code eliminated as a result of the FakeKill of scratch registers after | |
510 // the call. | |
511 Context.insert<InstFakeUse>(Reg); | |
512 } | |
513 | |
514 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | |
515 const Type SignatureTy = GprArgs[i].first; | |
516 Operand *Arg = GprArgs[i].second; | |
517 Variable *Reg = | |
518 legalizeToReg(Arg, getRegisterForGprArgNum(Arg->getType(), i)); | |
519 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | |
520 if (SignatureTy != Arg->getType()) { | |
521 if (SignatureTy == IceType_i32) { | |
522 assert(Arg->getType() == IceType_i64); | |
523 Variable *T = makeReg( | |
524 IceType_i32, Traits::getGprForType(IceType_i32, Reg->getRegNum())); | |
525 _mov(T, Reg); | |
526 Reg = T; | |
527 } else { | |
528 // This branch has never been reached, so we leave the assert(false) | |
529 // here until we figure out how to exercise it. | |
530 assert(false); | |
531 assert(Arg->getType() == IceType_i32); | |
532 Variable *T = makeReg( | |
533 IceType_i64, Traits::getGprForType(IceType_i64, Reg->getRegNum())); | |
534 _movzx(T, Reg); | |
535 Reg = T; | |
536 } | |
537 } | |
538 Context.insert<InstFakeUse>(Reg); | |
539 } | |
540 | |
541 // Generate the call instruction. Assign its result to a temporary with high | |
542 // register allocation weight. | |
543 Variable *Dest = Instr->getDest(); | |
544 // ReturnReg doubles as ReturnRegLo as necessary. | |
545 Variable *ReturnReg = nullptr; | |
546 if (Dest) { | |
547 switch (Dest->getType()) { | |
548 case IceType_NUM: | |
549 case IceType_void: | |
550 llvm::report_fatal_error("Invalid Call dest type"); | |
551 break; | |
552 case IceType_i1: | |
553 case IceType_i8: | |
554 case IceType_i16: | |
555 // The bitcode should never return an i1, i8, or i16. | |
556 assert(false); | |
557 // Fallthrough intended. | |
558 case IceType_i32: | |
559 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); | |
560 break; | |
561 case IceType_i64: | |
562 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_rax); | |
563 break; | |
564 case IceType_f32: | |
565 case IceType_f64: | |
566 case IceType_v4i1: | |
567 case IceType_v8i1: | |
568 case IceType_v16i1: | |
569 case IceType_v16i8: | |
570 case IceType_v8i16: | |
571 case IceType_v4i32: | |
572 case IceType_v4f32: | |
573 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); | |
574 break; | |
575 } | |
576 } | |
577 | |
578 InstX86Label *ReturnAddress = nullptr; | |
579 Operand *CallTarget = | |
580 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); | |
581 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); | 393 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); |
582 Inst *NewCall = nullptr; | 394 if (NeedSandboxing) { |
583 if (!NeedSandboxing) { | 395 InstX86Label *ReturnAddress = InstX86Label::create(Func, this); |
584 if (CallTargetR != nullptr) { | |
585 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the | |
586 // emitted call needs a i64 register (for textual asm.) | |
587 Variable *T = makeReg(IceType_i64); | |
588 _movzx(T, CallTargetR); | |
589 CallTarget = T; | |
590 } | |
591 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); | |
592 } else { | |
593 ReturnAddress = InstX86Label::create(Func, this); | |
594 ReturnAddress->setIsReturnLocation(true); | 396 ReturnAddress->setIsReturnLocation(true); |
595 constexpr bool SuppressMangling = true; | 397 constexpr bool SuppressMangling = true; |
596 /* AutoBundle scoping */ { | 398 /* AutoBundle scoping */ { |
597 std::unique_ptr<AutoBundle> Bundler; | 399 std::unique_ptr<AutoBundle> Bundler; |
598 if (CallTargetR == nullptr) { | 400 if (CallTargetR == nullptr) { |
599 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); | 401 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); |
600 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), | 402 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), |
601 SuppressMangling)); | 403 SuppressMangling)); |
602 } else { | 404 } else { |
603 Variable *T = makeReg(IceType_i32); | 405 Variable *T = makeReg(IceType_i32); |
(...skipping 13 matching lines...) Expand all Loading... |
617 CallTarget = T64; | 419 CallTarget = T64; |
618 } | 420 } |
619 | 421 |
620 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); | 422 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); |
621 } | 423 } |
622 if (ReturnReg != nullptr) { | 424 if (ReturnReg != nullptr) { |
623 Context.insert<InstFakeDef>(ReturnReg); | 425 Context.insert<InstFakeDef>(ReturnReg); |
624 } | 426 } |
625 | 427 |
626 Context.insert(ReturnAddress); | 428 Context.insert(ReturnAddress); |
| 429 } else { |
| 430 if (CallTargetR != nullptr) { |
| 431 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the |
| 432 // emitted call needs a i64 register (for textual asm.) |
| 433 Variable *T = makeReg(IceType_i64); |
| 434 _movzx(T, CallTargetR); |
| 435 CallTarget = T; |
| 436 } |
| 437 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget); |
627 } | 438 } |
| 439 return NewCall; |
| 440 } |
628 | 441 |
629 // Insert a register-kill pseudo instruction. | 442 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value, |
630 Context.insert<InstFakeKill>(NewCall); | 443 const Type ReturnType) { |
631 | 444 if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) { |
632 // Generate a FakeUse to keep the call live if necessary. | 445 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0); |
633 if (Instr->hasSideEffects() && ReturnReg) { | |
634 Context.insert<InstFakeUse>(ReturnReg); | |
635 } | |
636 | |
637 if (!Dest) | |
638 return; | |
639 | |
640 assert(ReturnReg && "x86-64 always returns value on registers."); | |
641 | |
642 if (isVectorType(Dest->getType())) { | |
643 _movp(Dest, ReturnReg); | |
644 } else { | 446 } else { |
645 assert(isScalarFloatingType(Dest->getType()) || | 447 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64); |
646 isScalarIntegerType(Dest->getType())); | 448 Variable *Reg = nullptr; |
647 _mov(Dest, ReturnReg); | 449 _mov(Reg, Value, |
| 450 Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax)); |
| 451 return Reg; |
648 } | 452 } |
649 } | 453 } |
650 | 454 |
651 void TargetX8664::lowerArguments() { | |
652 VarList &Args = Func->getArgs(); | |
653 // The first eight vector typed arguments (as well as fp arguments) are | |
654 // passed in %xmm0 through %xmm7 regardless of their position in the argument | |
655 // list. | |
656 unsigned NumXmmArgs = 0; | |
657 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, | |
658 // %rcx, %r8, and %r9 regardless of their position in the argument list. | |
659 unsigned NumGprArgs = 0; | |
660 | |
661 Context.init(Func->getEntryNode()); | |
662 Context.setInsertPoint(Context.getCur()); | |
663 | |
664 for (SizeT i = 0, End = Args.size(); | |
665 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS || | |
666 NumGprArgs < Traits::X86_MAX_XMM_ARGS); | |
667 ++i) { | |
668 Variable *Arg = Args[i]; | |
669 Type Ty = Arg->getType(); | |
670 Variable *RegisterArg = nullptr; | |
671 int32_t RegNum = Variable::NoRegister; | |
672 if ((isVectorType(Ty) || isScalarFloatingType(Ty))) { | |
673 if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) { | |
674 continue; | |
675 } | |
676 RegNum = getRegisterForXmmArgNum(NumXmmArgs); | |
677 ++NumXmmArgs; | |
678 RegisterArg = Func->makeVariable(Ty); | |
679 } else if (isScalarIntegerType(Ty)) { | |
680 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) { | |
681 continue; | |
682 } | |
683 RegNum = getRegisterForGprArgNum(Ty, NumGprArgs); | |
684 ++NumGprArgs; | |
685 RegisterArg = Func->makeVariable(Ty); | |
686 } | |
687 assert(RegNum != Variable::NoRegister); | |
688 assert(RegisterArg != nullptr); | |
689 // Replace Arg in the argument list with the home register. Then generate | |
690 // an instruction in the prolog to copy the home register to the assigned | |
691 // location of Arg. | |
692 if (BuildDefs::dump()) | |
693 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | |
694 RegisterArg->setRegNum(RegNum); | |
695 RegisterArg->setIsArg(); | |
696 Arg->setIsArg(false); | |
697 | |
698 Args[i] = RegisterArg; | |
699 Context.insert<InstAssign>(Arg, RegisterArg); | |
700 } | |
701 } | |
702 | |
703 void TargetX8664::lowerRet(const InstRet *Inst) { | |
704 Variable *Reg = nullptr; | |
705 if (Inst->hasRetValue()) { | |
706 Operand *Src0 = legalize(Inst->getRetValue()); | |
707 const Type Src0Ty = Src0->getType(); | |
708 if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) { | |
709 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0); | |
710 } else { | |
711 assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64); | |
712 _mov(Reg, Src0, | |
713 Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax)); | |
714 } | |
715 } | |
716 // Add a ret instruction even if sandboxing is enabled, because addEpilog | |
717 // explicitly looks for a ret instruction as a marker for where to insert the | |
718 // frame removal instructions. | |
719 _ret(Reg); | |
720 // Add a fake use of esp to make sure esp stays alive for the entire | |
721 // function. Otherwise post-call esp adjustments get dead-code eliminated. | |
722 keepEspLiveAtExit(); | |
723 } | |
724 | |
725 void TargetX8664::addProlog(CfgNode *Node) { | 455 void TargetX8664::addProlog(CfgNode *Node) { |
726 // Stack frame layout: | 456 // Stack frame layout: |
727 // | 457 // |
728 // +------------------------+ | 458 // +------------------------+ |
729 // | 1. return address | | 459 // | 1. return address | |
730 // +------------------------+ | 460 // +------------------------+ |
731 // | 2. preserved registers | | 461 // | 2. preserved registers | |
732 // +------------------------+ | 462 // +------------------------+ |
733 // | 3. padding | | 463 // | 3. padding | |
734 // +------------------------+ | 464 // +------------------------+ |
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1420 #define X(tag, sizeLog2, align, elts, elty, str) \ | 1150 #define X(tag, sizeLog2, align, elts, elty, str) \ |
1421 static_assert(_table1_##tag == _table2_##tag, \ | 1151 static_assert(_table1_##tag == _table2_##tag, \ |
1422 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); | 1152 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); |
1423 ICETYPE_TABLE | 1153 ICETYPE_TABLE |
1424 #undef X | 1154 #undef X |
1425 } // end of namespace dummy3 | 1155 } // end of namespace dummy3 |
1426 } // end of anonymous namespace | 1156 } // end of anonymous namespace |
1427 | 1157 |
1428 } // end of namespace X8664 | 1158 } // end of namespace X8664 |
1429 } // end of namespace Ice | 1159 } // end of namespace Ice |
OLD | NEW |