Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1592033002: Merge lowerCall and lowerRet between x86 and x64 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review comments Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after
377 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); 377 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
378 _and(T, Ctx->getConstantInt32(~(BundleSize - 1))); 378 _and(T, Ctx->getConstantInt32(~(BundleSize - 1)));
379 _movzx(T64, T); 379 _movzx(T64, T);
380 _add(T64, r15); 380 _add(T64, r15);
381 JumpTarget = T64; 381 JumpTarget = T64;
382 } 382 }
383 383
384 _jmp(JumpTarget); 384 _jmp(JumpTarget);
385 } 385 }
386 386
387 namespace { 387 Inst *TargetX8664::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
388 static inline TargetX8664::Traits::RegisterSet::AllRegisters 388 Inst *NewCall = nullptr;
389 getRegisterForXmmArgNum(uint32_t ArgNum) {
390 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
391 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
392 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
393 }
394
395 static inline TargetX8664::Traits::RegisterSet::AllRegisters
396 getRegisterForGprArgNum(Type Ty, uint32_t ArgNum) {
397 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
398 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
399 TargetX8664::Traits::RegisterSet::Reg_rdi,
400 TargetX8664::Traits::RegisterSet::Reg_rsi,
401 TargetX8664::Traits::RegisterSet::Reg_rdx,
402 TargetX8664::Traits::RegisterSet::Reg_rcx,
403 TargetX8664::Traits::RegisterSet::Reg_r8,
404 TargetX8664::Traits::RegisterSet::Reg_r9,
405 };
406 static_assert(llvm::array_lengthof(GprForArgNum) ==
407 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
408 "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
409 assert(Ty == IceType_i64 || Ty == IceType_i32);
410 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
411 TargetX8664::Traits::getGprForType(Ty, GprForArgNum[ArgNum]));
412 }
413
414 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
415 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
416 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
417
418 } // end of anonymous namespace
419
420 void TargetX8664::lowerCall(const InstCall *Instr) {
421 // x86-64 calling convention:
422 //
423 // * At the point before the call, the stack must be aligned to 16 bytes.
424 //
425 // * The first eight arguments of vector/fp type, regardless of their
426 // position relative to the other arguments in the argument list, are placed
427 // in registers %xmm0 - %xmm7.
428 //
429 // * The first six arguments of integer types, regardless of their position
430 // relative to the other arguments in the argument list, are placed in
431 // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
432 //
433 // * Other arguments are pushed onto the stack in right-to-left order, such
434 // that the left-most argument ends up on the top of the stack at the lowest
435 // memory address.
436 //
437 // * Stack arguments of vector type are aligned to start at the next highest
438 // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.
439 //
440 // This intends to match the section "Function Calling Sequence" of the
441 // document "System V Application Binary Interface."
442 NeedsStackAlignment = true;
443
444 using OperandList =
445 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
446 Traits::X86_MAX_GPR_ARGS)>;
447 OperandList XmmArgs;
448 CfgVector<std::pair<const Type, Operand *>> GprArgs;
449 OperandList StackArgs, StackArgLocations;
450 int32_t ParameterAreaSizeBytes = 0;
451
452 // Classify each argument operand according to the location where the
453 // argument is passed.
454 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
455 Operand *Arg = Instr->getArg(i);
456 Type Ty = Arg->getType();
457 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
458 assert(typeWidthInBytes(Ty) >= 4);
459 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
460 XmmArgs.push_back(Arg);
461 } else if (isScalarFloatingType(Ty) &&
462 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
463 XmmArgs.push_back(Arg);
464 } else if (isScalarIntegerType(Ty) &&
465 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
466 GprArgs.emplace_back(Ty, Arg);
467 } else {
468 StackArgs.push_back(Arg);
469 if (isVectorType(Arg->getType())) {
470 ParameterAreaSizeBytes =
471 Traits::applyStackAlignment(ParameterAreaSizeBytes);
472 }
473 Variable *esp =
474 getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
475 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
476 StackArgLocations.push_back(
477 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
478 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
479 }
480 }
481
482 // Adjust the parameter area so that the stack is aligned. It is assumed that
483 // the stack is already aligned at the start of the calling sequence.
484 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
485 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
486 maxOutArgsSizeBytes());
487
488 // Copy arguments that are passed on the stack to the appropriate stack
489 // locations.
490 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
491 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
492 }
493
494 // Copy arguments to be passed in registers to the appropriate registers.
495 // TODO: Investigate the impact of lowering arguments passed in registers
496 // after lowering stack arguments as opposed to the other way around.
497 // Lowering register arguments after stack arguments may reduce register
498 // pressure. On the other hand, lowering register arguments first (before
499 // stack arguments) may result in more compact code, as the memory operand
500 // displacements may end up being smaller before any stack adjustment is
501 // done.
502 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
503 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
504 // Generate a FakeUse of register arguments so that they do not get dead
505 // code eliminated as a result of the FakeKill of scratch registers after
506 // the call.
507 Context.insert<InstFakeUse>(Reg);
508 }
509
510 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
511 const Type SignatureTy = GprArgs[i].first;
512 Operand *Arg = GprArgs[i].second;
513 Variable *Reg =
514 legalizeToReg(Arg, getRegisterForGprArgNum(Arg->getType(), i));
515 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
516 if (SignatureTy != Arg->getType()) {
517 if (SignatureTy == IceType_i32) {
518 assert(Arg->getType() == IceType_i64);
519 Variable *T = makeReg(
520 IceType_i32, Traits::getGprForType(IceType_i32, Reg->getRegNum()));
521 _mov(T, Reg);
522 Reg = T;
523 } else {
524 // This branch has never been reached, so we leave the assert(false)
525 // here until we figure out how to exercise it.
526 assert(false);
527 assert(Arg->getType() == IceType_i32);
528 Variable *T = makeReg(
529 IceType_i64, Traits::getGprForType(IceType_i64, Reg->getRegNum()));
530 _movzx(T, Reg);
531 Reg = T;
532 }
533 }
534 Context.insert<InstFakeUse>(Reg);
535 }
536
537 // Generate the call instruction. Assign its result to a temporary with high
538 // register allocation weight.
539 Variable *Dest = Instr->getDest();
540 // ReturnReg doubles as ReturnRegLo as necessary.
541 Variable *ReturnReg = nullptr;
542 if (Dest) {
543 switch (Dest->getType()) {
544 case IceType_NUM:
545 case IceType_void:
546 llvm::report_fatal_error("Invalid Call dest type");
547 break;
548 case IceType_i1:
549 case IceType_i8:
550 case IceType_i16:
551 // The bitcode should never return an i1, i8, or i16.
552 assert(false);
553 // Fallthrough intended.
554 case IceType_i32:
555 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
556 break;
557 case IceType_i64:
558 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_rax);
559 break;
560 case IceType_f32:
561 case IceType_f64:
562 case IceType_v4i1:
563 case IceType_v8i1:
564 case IceType_v16i1:
565 case IceType_v16i8:
566 case IceType_v8i16:
567 case IceType_v4i32:
568 case IceType_v4f32:
569 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
570 break;
571 }
572 }
573
574 InstX86Label *ReturnAddress = nullptr;
575 Operand *CallTarget =
576 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
577 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget); 389 auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget);
578 Inst *NewCall = nullptr; 390 if (NeedSandboxing) {
579 if (!NeedSandboxing) { 391 InstX86Label *ReturnAddress = InstX86Label::create(Func, this);
580 if (CallTargetR != nullptr) {
581 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
582 // emitted call needs a i64 register (for textual asm.)
583 Variable *T = makeReg(IceType_i64);
584 _movzx(T, CallTargetR);
585 CallTarget = T;
586 }
587 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
588 } else {
589 ReturnAddress = InstX86Label::create(Func, this);
590 ReturnAddress->setIsReturnLocation(true); 392 ReturnAddress->setIsReturnLocation(true);
591 constexpr bool SuppressMangling = true; 393 constexpr bool SuppressMangling = true;
592 /* AutoBundle scoping */ { 394 /* AutoBundle scoping */ {
593 std::unique_ptr<AutoBundle> Bundler; 395 std::unique_ptr<AutoBundle> Bundler;
594 if (CallTargetR == nullptr) { 396 if (CallTargetR == nullptr) {
595 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd); 397 Bundler = makeUnique<AutoBundle>(this, InstBundleLock::Opt_PadToEnd);
596 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func), 398 _push(Ctx->getConstantSym(0, ReturnAddress->getName(Func),
597 SuppressMangling)); 399 SuppressMangling));
598 } else { 400 } else {
599 Variable *T = makeReg(IceType_i32); 401 Variable *T = makeReg(IceType_i32);
(...skipping 13 matching lines...) Expand all
613 CallTarget = T64; 415 CallTarget = T64;
614 } 416 }
615 417
616 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget); 418 NewCall = Context.insert<Traits::Insts::Jmp>(CallTarget);
617 } 419 }
618 if (ReturnReg != nullptr) { 420 if (ReturnReg != nullptr) {
619 Context.insert<InstFakeDef>(ReturnReg); 421 Context.insert<InstFakeDef>(ReturnReg);
620 } 422 }
621 423
622 Context.insert(ReturnAddress); 424 Context.insert(ReturnAddress);
425 } else {
426 if (CallTargetR != nullptr) {
427 // x86-64 in Subzero is ILP32. Therefore, CallTarget is i32, but the
428 // emitted call needs a i64 register (for textual asm.)
429 Variable *T = makeReg(IceType_i64);
430 _movzx(T, CallTargetR);
431 CallTarget = T;
432 }
433 NewCall = Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
623 } 434 }
435 return NewCall;
436 }
624 437
625 // Insert a register-kill pseudo instruction. 438 Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
626 Context.insert<InstFakeKill>(NewCall); 439 Type ReturnType) {
627 440 if (isVectorType(ReturnType) || isScalarFloatingType(ReturnType)) {
628 // Generate a FakeUse to keep the call live if necessary. 441 return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
629 if (Instr->hasSideEffects() && ReturnReg) {
630 Context.insert<InstFakeUse>(ReturnReg);
631 }
632
633 if (!Dest)
634 return;
635
636 assert(ReturnReg && "x86-64 always returns value on registers.");
637
638 if (isVectorType(Dest->getType())) {
639 _movp(Dest, ReturnReg);
640 } else { 442 } else {
641 assert(isScalarFloatingType(Dest->getType()) || 443 assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
642 isScalarIntegerType(Dest->getType())); 444 Variable *Reg = nullptr;
643 _mov(Dest, ReturnReg); 445 _mov(Reg, Value,
446 Traits::getGprForType(ReturnType, Traits::RegisterSet::Reg_rax));
447 return Reg;
644 } 448 }
645 } 449 }
646 450
647 void TargetX8664::lowerArguments() {
648 VarList &Args = Func->getArgs();
649 // The first eight vector typed arguments (as well as fp arguments) are
650 // passed in %xmm0 through %xmm7 regardless of their position in the argument
651 // list.
652 unsigned NumXmmArgs = 0;
653 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,
654 // %rcx, %r8, and %r9 regardless of their position in the argument list.
655 unsigned NumGprArgs = 0;
656
657 Context.init(Func->getEntryNode());
658 Context.setInsertPoint(Context.getCur());
659
660 for (SizeT i = 0, End = Args.size();
661 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
662 NumGprArgs < Traits::X86_MAX_XMM_ARGS);
663 ++i) {
664 Variable *Arg = Args[i];
665 Type Ty = Arg->getType();
666 Variable *RegisterArg = nullptr;
667 int32_t RegNum = Variable::NoRegister;
668 if ((isVectorType(Ty) || isScalarFloatingType(Ty))) {
669 if (NumXmmArgs >= Traits::X86_MAX_XMM_ARGS) {
670 continue;
671 }
672 RegNum = getRegisterForXmmArgNum(NumXmmArgs);
673 ++NumXmmArgs;
674 RegisterArg = Func->makeVariable(Ty);
675 } else if (isScalarIntegerType(Ty)) {
676 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {
677 continue;
678 }
679 RegNum = getRegisterForGprArgNum(Ty, NumGprArgs);
680 ++NumGprArgs;
681 RegisterArg = Func->makeVariable(Ty);
682 }
683 assert(RegNum != Variable::NoRegister);
684 assert(RegisterArg != nullptr);
685 // Replace Arg in the argument list with the home register. Then generate
686 // an instruction in the prolog to copy the home register to the assigned
687 // location of Arg.
688 if (BuildDefs::dump())
689 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
690 RegisterArg->setRegNum(RegNum);
691 RegisterArg->setIsArg();
692 Arg->setIsArg(false);
693
694 Args[i] = RegisterArg;
695 Context.insert<InstAssign>(Arg, RegisterArg);
696 }
697 }
698
699 void TargetX8664::lowerRet(const InstRet *Inst) {
700 Variable *Reg = nullptr;
701 if (Inst->hasRetValue()) {
702 Operand *Src0 = legalize(Inst->getRetValue());
703 const Type Src0Ty = Src0->getType();
704 if (isVectorType(Src0Ty) || isScalarFloatingType(Src0Ty)) {
705 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
706 } else {
707 assert(Src0Ty == IceType_i32 || Src0Ty == IceType_i64);
708 _mov(Reg, Src0,
709 Traits::getGprForType(Src0Ty, Traits::RegisterSet::Reg_rax));
710 }
711 }
712 // Add a ret instruction even if sandboxing is enabled, because addEpilog
713 // explicitly looks for a ret instruction as a marker for where to insert the
714 // frame removal instructions.
715 _ret(Reg);
716 // Add a fake use of esp to make sure esp stays alive for the entire
717 // function. Otherwise post-call esp adjustments get dead-code eliminated.
718 keepEspLiveAtExit();
719 }
720
721 void TargetX8664::addProlog(CfgNode *Node) { 451 void TargetX8664::addProlog(CfgNode *Node) {
722 // Stack frame layout: 452 // Stack frame layout:
723 // 453 //
724 // +------------------------+ 454 // +------------------------+
725 // | 1. return address | 455 // | 1. return address |
726 // +------------------------+ 456 // +------------------------+
727 // | 2. preserved registers | 457 // | 2. preserved registers |
728 // +------------------------+ 458 // +------------------------+
729 // | 3. padding | 459 // | 3. padding |
730 // +------------------------+ 460 // +------------------------+
(...skipping 685 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 #define X(tag, sizeLog2, align, elts, elty, str) \ 1146 #define X(tag, sizeLog2, align, elts, elty, str) \
1417 static_assert(_table1_##tag == _table2_##tag, \ 1147 static_assert(_table1_##tag == _table2_##tag, \
1418 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); 1148 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
1419 ICETYPE_TABLE 1149 ICETYPE_TABLE
1420 #undef X 1150 #undef X
1421 } // end of namespace dummy3 1151 } // end of namespace dummy3
1422 } // end of anonymous namespace 1152 } // end of anonymous namespace
1423 1153
1424 } // end of namespace X8664 1154 } // end of namespace X8664
1425 } // end of namespace Ice 1155 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698