Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1146)

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1592033002: Merge lowerCall and lowerRet between x86 and x64 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Changed to use Variable::NoRegister Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 370 matching lines...) Expand 10 before | Expand all | Expand 10 after
381 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 381 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
382 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); 382 _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
383 _movzx(T64, T); 383 _movzx(T64, T);
384 _add(T64, r15); 384 _add(T64, r15);
385 JumpTarget = T64; 385 JumpTarget = T64;
386 } 386 }
387 387
388 _jmp(JumpTarget); 388 _jmp(JumpTarget);
389 } 389 }
390 390
391 namespace { 391 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
392 static inline TargetX8664::Traits::RegisterSet::AllRegisters 392 Inst *NewCall = nullptr;
393 getRegisterForXmmArgNum(uint32_t ArgNum) {
394 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
395 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
396 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
397 }
398
399 static inline TargetX8664::Traits::RegisterSet::AllRegisters
400 getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) {
401 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
402 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
403 TargetX8664::Traits::RegisterSet::Reg_rdi,
404 TargetX8664::Traits::RegisterSet::Reg_rsi,
405 TargetX8664::Traits::RegisterSet::Reg_rdx,
406 TargetX8664::Traits::RegisterSet::Reg_rcx,
407 TargetX8664::Traits::RegisterSet::Reg_r8,
408 TargetX8664::Traits::RegisterSet::Reg_r9,
409 };
410 static_assert(llvm::array_lengthof(GprForArgNum) ==
411 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
412 "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
413 assert(Ty == IceType_i64 || Ty == IceType_i32);
414 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
415 TargetX8664::Traits::getGprForType(Ty, GprForArgNum[ArgNum]));
416 }
417
418 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
419 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
420 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
421
422 } // end of anonymous namespace
423
424 void TargetX8664::lowerCall(const InstCall *Instr) {
425 // x86-64 calling convention:
426 //
427 // * At the point before the call, the stack must be aligned to 16 bytes.
428 //
429 // * The first eight arguments of vector/fp type, regardless of their
430 // position relative to the other arguments in the argument list, are placed
431 // in registers %xmm0 - %xmm7.
432 //
433 // * The first six arguments of integer types, regardless of their position
434 // relative to the other arguments in the argument list, are placed in
435 // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
436 //
437 // * Other arguments are pushed onto the stack in right-to-left order, such
438 // that the left-most argument ends up on the top of the stack at the lowest
439 // memory address.
440 //
441 // * Stack arguments of vector type are aligned to start at the next highest
442 // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.
443 //
444 // This intends to match the section "Function Calling Sequence" of the
445 // document "System V Application Binary Interface."
446 NeedsStackAlignment = true;
447
448 using OperandList =
449 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
450 Traits::X86_MAX_GPR_ARGS)>;
451 OperandList XmmArgs;
452 CfgVector<std::pair<const Type, Operand *>> GprArgs;
453 OperandList StackArgs, StackArgLocations;
454 int32_t ParameterAreaSizeBytes = 0;
455
456 // Classify each argument operand according to the location where the
457 // argument is passed.
458 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
459 Operand *Arg = Instr->getArg(i);
460 Type Ty = Arg->getType();
461 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
462 assert(typeWidthInBytes(Ty) >= 4);
463 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
464 XmmArgs.push_back(Arg);
465 } else if (isScalarFloatingType(Ty) &&
466 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
467 XmmArgs.push_back(Arg);
468 } else if (isScalarIntegerType(Ty) &&
469 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
470 GprArgs.emplace_back(Ty, Arg);
471 } else {
472 StackArgs.push_back(Arg);
473 if (isVectorType(Arg->getType())) {
474 ParameterAreaSizeBytes =
475 Traits::applyStackAlignment(ParameterAreaSizeBytes);
476 }
477 Variable *esp =
478 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
479 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
480 StackArgLocations.push_back(
481 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
482 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
483 }
484 }
485
486 // Adjust the parameter area so that the stack is aligned. It is assumed that
487 // the stack is already aligned at the start of the calling sequence.
488 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
489 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
490 maxOutArgsSizeBytes());
491
492 // Copy arguments that are passed on the stack to the appropriate stack
493 // locations.
494 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
495 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
496 }
497
498 // Copy arguments to be passed in registers to the appropriate registers.
499 // TODO: Investigate the impact of lowering arguments passed in registers
500 // after lowering stack arguments as opposed to the other way around.
501 // Lowering register arguments after stack arguments may reduce register
502 // pressure. On the other hand, lowering register arguments first (before
503 // stack arguments) may result in more compact code, as the memory operand
504 // displacements may end up being smaller before any stack adjustment is
505 // done.
506 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
507 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
508 // Generate a FakeUse of register arguments so that they do not get dead
509 // code eliminated as a result of the FakeKill of scratch registers after
510 // the call.
511 Context.insert<InstFakeUse>(Reg);
512 }
513
514 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
515 const Type SignatureTy = GprArgs[i].first;
516 Operand *Arg = GprArgs[i].second;
517 Variable *Reg =
518 legalizeToReg(Arg, getRegisterForGprArgNum(Arg->getType(), i));
519 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
520 if (SignatureTy != Arg->getType()) {
521 if (SignatureTy == IceType_i32) {
522 assert(Arg->getType() == IceType_i64);
523 Variable *T = makeReg(
524 IceType_i32, Traits::getGprForType(IceType_i32, Reg->getRegNum()));
525 _mov(T, Reg);
526 Reg = T;
527 } else {
528 // This branch has never been reached, so we leave the assert(false)
529 // here until we figure out how to exercise it.
530 assert(false);
531 assert(Arg->getType() == IceType_i32);
532 Variable *T = makeReg(
533 IceType_i64, Traits::getGprForType(IceType_i64, Reg->getRegNum()));
534 _movzx(T, Reg);
535 Reg = T;
536 }
537 }
538 Context.insert<InstFakeUse>(Reg);
539 }
540
541 // Generate the call instruction. Assign its result to a temporary with high
542 // register allocation weight.
543 Variable *Dest = Instr->getDest();
544 // ReturnReg doubles as ReturnRegLo as necessary.
545 Variable *ReturnReg = nullptr;
546 if (Dest) {
547 switch (Dest->getType()) {
548 case IceType_NUM:
549 case IceType_void:
550 llvm::report_fatal_error("Invalid Call dest type");
551 break;
552 case IceType_i1:
553 case IceType_i8:
554 case IceType_i16:
555 // The bitcode should never return an i1, i8, or i16.
556 assert(false);
557 // Fallthrough intended.
558 case IceType_i32:
559 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
560 break;
561 case IceType_i64:
562 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_rax);
563 break;
564 case IceType_f32:
565 case IceType_f64:
566 case IceType_v4i1:
567 case IceType_v8i1:
568 case IceType_v16i1:
569 case IceType_v16i8:
570 case IceType_v8i16:
571 case IceType_v4i32:
572 case IceType_v4f32:
573 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
574 break;
575 }
576 }
577
578 InstX86Label *ReturnAddress = nullptr;
579 Operand *CallTarget =
580 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
581 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); 393 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
582 Inst *NewCall = nullptr; 394 if (NeedSandboxing) {
583 if (!NeedSandboxing) { 395 InstX86Label *ReturnAddress = InstX86Label::create(Func, this);
584 if (CallTargetR != nullptr) {
585 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
586 // emitted call needs a i64 register (for textual asm.)
587 Variable *T = makeReg(IceType_i64);
588 _movzx(T, CallTargetR);
589 CallTarget = T;
590 }
591 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
592 } else {
593 ReturnAddress = InstX86Label::create(Func, this);
594 ReturnAddress->setIsReturnLocation(true); 396 ReturnAddress->setIsReturnLocation(true);
595 constexpr bool SuppressMangling = true; 397 constexpr bool SuppressMangling = true;
596 /* AutoBundle scoping */ { 398 /* AutoBundle scoping */ {
597 std::unique_ptr<AutoBundle> Bundler; 399 std::unique_ptr<AutoBundle> Bundler;
598 if (CallTargetR == nullptr) { 400 if (CallTargetR == nullptr) {
599 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); 401 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
600 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), 402 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func),
601 SuppressMangling)); 403 SuppressMangling));
602 } else { 404 } else {
603 Variable *T = makeReg(IceType_i32); 405 Variable *T = makeReg(IceType_i32);
(...skipping 13 matching lines...) Expand all
617 CallTarget = T64; 419 CallTarget = T64;
618 } 420 }
619 421
620 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); 422 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget);
621 } 423 }
622 if (ReturnReg != nullptr) { 424 if (ReturnReg != nullptr) {
623 Context.insert<InstFakeDef>(ReturnReg); 425 Context.insert<InstFakeDef>(ReturnReg);
624 } 426 }
625 427
626 Context.insert(ReturnAddress); 428 Context.insert(ReturnAddress);
429 } else {
430 if (CallTargetR != nullptr) {
431 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
432 // emitted call needs a i64 register (for textual asm.)
433 Variable *T = makeReg(IceType_i64);
434 _movzx(T, CallTargetR);
435 CallTarget = T;
436 }
437 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
627 } 438 }
439 return NewCall;
440 }
628 441
629 // Insert a register-kill pseudo instruction. 442 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
630 Context.insert<InstFakeKill>(NewCall); 443 const Type ReturnType) {
631 444 if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) {
632 // Generate a FakeUse to keep the call live if necessary. 445 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
633 if (Instr->hasSideEffects() && ReturnReg) {
634 Context.insert<InstFakeUse>(ReturnReg);
635 }
636
637 if (!Dest)
638 return;
639
640 assert(ReturnReg && "x86-64 always returns value on registers.");
641
642 if (isVectorType(Dest->getType())) {
643 _movp(Dest, ReturnReg);
644 } else { 446 } else {
645 assert(isScalarFloatingType(Dest->getType()) || 447 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
646 isScalarIntegerType(Dest->getType())); 448 Variable *Reg = nullptr;
647 _mov(Dest, ReturnReg); 449 _mov(Reg, Value,
450 Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax));
451 return Reg;
648 } 452 }
649 } 453 }
650 454
651 void TargetX8664::lowerArguments() {
652 VarList &Args = Func->getArgs();
653 // The first eight vector typed arguments (as well as fp arguments) are
654 // passed in %xmm0 through %xmm7 regardless of their position in the argument
655 // list.
656 unsigned NumXmmArgs = 0;
657 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,
658 // %rcx, %r8, and %r9 regardless of their position in the argument list.
659 unsigned NumGprArgs = 0;
660
661 Context.init(Func->getEntryNode());
662 Context.setInsertPoint(Context.getCur());
663
664 for (SizeT i = 0, End = Args.size();
665 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
666 NumGprArgs < Traits::X86_MAX_XMM_ARGS);
667 ++i) {
668 Variable *Arg = Args[i];
669 Type Ty = Arg->getType();
670 Variable *RegisterArg = nullptr;
671 int32_t RegNum = Variable::NoRegister;
672 if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
673 if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
674 continue;
675 }
676 RegNum = getRegisterForXmmArgNum(NumXmmArgs);
677 ++NumXmmArgs;
678 RegisterArg = Func->makeVariable(Ty);
679 } else if (isScalarIntegerType(Ty)) {
680 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
681 continue;
682 }
683 RegNum = getRegisterForGprArgNum(Ty, NumGprArgs);
684 ++NumGprArgs;
685 RegisterArg = Func->makeVariable(Ty);
686 }
687 assert(RegNum != Variable::NoRegister);
688 assert(RegisterArg != nullptr);
689 // Replace Arg in the argument list with the home register. Then generate
690 // an instruction in the prolog to copy the home register to the assigned
691 // location of Arg.
692 if (BuildDefs::dump())
693 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
694 RegisterArg->setRegNum(RegNum);
695 RegisterArg->setIsArg();
696 Arg->setIsArg(false);
697
698 Args[i] = RegisterArg;
699 Context.insert<InstAssign>(Arg, RegisterArg);
700 }
701 }
702
703 void TargetX8664::lowerRet(const InstRet *Inst) {
704 Variable *Reg = nullptr;
705 if (Inst->hasRetValue()) {
706 Operand *Src0 = legalize(Inst->getRetValue());
707 const Type Src0Ty = Src0->getType();
708 if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) {
709 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
710 } else {
711 assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64);
712 _mov(Reg, Src0,
713 Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax));
714 }
715 }
716 // Add a ret instruction even if sandboxing is enabled, because addEpilog
717 // explicitly looks for a ret instruction as a marker for where to insert the
718 // frame removal instructions.
719 _ret(Reg);
720 // Add a fake use of esp to make sure esp stays alive for the entire
721 // function. Otherwise post-call esp adjustments get dead-code eliminated.
722 keepEspLiveAtExit();
723 }
724
725 void TargetX8664::addProlog(CfgNode *Node) { 455 void TargetX8664::addProlog(CfgNode *Node) {
726 // Stack frame layout: 456 // Stack frame layout:
727 // 457 //
728 // +------------------------+ 458 // +------------------------+
729 // | 1. return address | 459 // | 1. return address |
730 // +------------------------+ 460 // +------------------------+
731 // | 2. preserved registers | 461 // | 2. preserved registers |
732 // +------------------------+ 462 // +------------------------+
733 // | 3. padding | 463 // | 3. padding |
734 // +------------------------+ 464 // +------------------------+
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after
1420 #define X(tag, sizeLog2, align, elts, elty, str) \ 1150 #define X(tag, sizeLog2, align, elts, elty, str) \
1421 static_assert(_table1_##tag == _table2_##tag, \ 1151 static_assert(_table1_##tag == _table2_##tag, \
1422 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); 1152 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
1423 ICETYPE_TABLE 1153 ICETYPE_TABLE
1424 #undef X 1154 #undef X
1425 } // end of namespace dummy3 1155 } // end of namespace dummy3
1426 } // end of anonymous namespace 1156 } // end of anonymous namespace
1427 1157
1428 } // end of namespace X8664 1158 } // end of namespace X8664
1429 } // end of namespace Ice 1159 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698