| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| 11 /// This file implements the TargetLoweringX8632 class, which | 11 /// This file implements the TargetLoweringX8632 class, which |
| 12 /// consists almost entirely of the lowering sequence for each | 12 /// consists almost entirely of the lowering sequence for each |
| 13 /// high-level instruction. | 13 /// high-level instruction. |
| 14 /// | 14 /// |
| 15 //===----------------------------------------------------------------------===// | 15 //===----------------------------------------------------------------------===// |
| 16 | 16 |
| 17 #include "IceTargetLoweringX8632.h" | 17 #include "IceTargetLoweringX8632.h" |
| 18 | 18 |
| 19 #include "IceTargetLoweringX8632Traits.h" | 19 #include "IceTargetLoweringX8632Traits.h" |
| 20 #include "IceTargetLoweringX86Base.h" | 20 #include "IceTargetLoweringX86Base.h" |
| 21 | 21 |
| 22 namespace Ice { | 22 namespace Ice { |
| 23 | 23 |
| 24 //------------------------------------------------------------------------------ |
| 25 // ______ ______ ______ __ ______ ______ |
| 26 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\ |
| 27 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \ |
| 28 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\ |
| 29 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/ |
| 30 // |
| 31 //------------------------------------------------------------------------------ |
| 24 namespace X86Internal { | 32 namespace X86Internal { |
| 25 const MachineTraits<TargetX8632>::TableFcmpType | 33 const MachineTraits<TargetX8632>::TableFcmpType |
| 26 MachineTraits<TargetX8632>::TableFcmp[] = { | 34 MachineTraits<TargetX8632>::TableFcmp[] = { |
| 27 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 35 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
| 28 { \ | 36 { \ |
| 29 dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV, \ | 37 dflt, swapS, X8632::Traits::Cond::C1, X8632::Traits::Cond::C2, swapV, \ |
| 30 X8632::Traits::Cond::pred \ | 38 X8632::Traits::Cond::pred \ |
| 31 } \ | 39 } \ |
| 32 , | 40 , |
| 33 FCMPX8632_TABLE | 41 FCMPX8632_TABLE |
| (...skipping 358 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 392 // Repeat the static asserts with respect to the high-level table | 400 // Repeat the static asserts with respect to the high-level table |
| 393 // entries in case the high-level table has extra entries. | 401 // entries in case the high-level table has extra entries. |
| 394 #define X(tag, sizeLog2, align, elts, elty, str) \ | 402 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 395 static_assert(_table1_##tag == _table2_##tag, \ | 403 static_assert(_table1_##tag == _table2_##tag, \ |
| 396 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 397 ICETYPE_TABLE | 405 ICETYPE_TABLE |
| 398 #undef X | 406 #undef X |
| 399 } // end of namespace dummy3 | 407 } // end of namespace dummy3 |
| 400 } // end of anonymous namespace | 408 } // end of anonymous namespace |
| 401 | 409 |
| 410 //------------------------------------------------------------------------------ |
| 411 // __ ______ __ __ ______ ______ __ __ __ ______ |
| 412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ |
| 413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ |
| 414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ |
| 415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ |
| 416 // |
| 417 //------------------------------------------------------------------------------ |
| 418 void TargetX8632::lowerCall(const InstCall *Instr) { |
| 419 // x86-32 calling convention: |
| 420 // |
| 421 // * At the point before the call, the stack must be aligned to 16 |
| 422 // bytes. |
| 423 // |
| 424 // * The first four arguments of vector type, regardless of their |
| 425 // position relative to the other arguments in the argument list, are |
| 426 // placed in registers xmm0 - xmm3. |
| 427 // |
| 428 // * Other arguments are pushed onto the stack in right-to-left order, |
| 429 // such that the left-most argument ends up on the top of the stack at |
| 430 // the lowest memory address. |
| 431 // |
| 432 // * Stack arguments of vector type are aligned to start at the next |
| 433 // highest multiple of 16 bytes. Other stack arguments are aligned to |
| 434 // 4 bytes. |
| 435 // |
| 436 // This intends to match the section "IA-32 Function Calling |
| 437 // Convention" of the document "OS X ABI Function Call Guide" by |
| 438 // Apple. |
| 439 NeedsStackAlignment = true; |
| 440 |
| 441 typedef std::vector<Operand *> OperandList; |
| 442 OperandList XmmArgs; |
| 443 OperandList StackArgs, StackArgLocations; |
| 444 uint32_t ParameterAreaSizeBytes = 0; |
| 445 |
| 446 // Classify each argument operand according to the location where the |
| 447 // argument is passed. |
| 448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 449 Operand *Arg = Instr->getArg(i); |
| 450 Type Ty = Arg->getType(); |
| 451 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 452 assert(typeWidthInBytes(Ty) >= 4); |
| 453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 454 XmmArgs.push_back(Arg); |
| 455 } else { |
| 456 StackArgs.push_back(Arg); |
| 457 if (isVectorType(Arg->getType())) { |
| 458 ParameterAreaSizeBytes = |
| 459 Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 460 } |
| 461 Variable *esp = |
| 462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 464 StackArgLocations.push_back( |
| 465 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); |
| 466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 467 } |
| 468 } |
| 469 |
| 470 // Adjust the parameter area so that the stack is aligned. It is |
| 471 // assumed that the stack is already aligned at the start of the |
| 472 // calling sequence. |
| 473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 474 |
| 475 // Subtract the appropriate amount for the argument area. This also |
| 476 // takes care of setting the stack adjustment during emission. |
| 477 // |
| 478 // TODO: If for some reason the call instruction gets dead-code |
| 479 // eliminated after lowering, we would need to ensure that the |
| 480 // pre-call and the post-call esp adjustment get eliminated as well. |
| 481 if (ParameterAreaSizeBytes) { |
| 482 _adjust_stack(ParameterAreaSizeBytes); |
| 483 } |
| 484 |
| 485 // Copy arguments that are passed on the stack to the appropriate |
| 486 // stack locations. |
| 487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 489 } |
| 490 |
| 491 // Copy arguments to be passed in registers to the appropriate |
| 492 // registers. |
| 493 // TODO: Investigate the impact of lowering arguments passed in |
| 494 // registers after lowering stack arguments as opposed to the other |
| 495 // way around. Lowering register arguments after stack arguments may |
| 496 // reduce register pressure. On the other hand, lowering register |
| 497 // arguments first (before stack arguments) may result in more compact |
| 498 // code, as the memory operand displacements may end up being smaller |
| 499 // before any stack adjustment is done. |
| 500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| 501 Variable *Reg = |
| 502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); |
| 503 // Generate a FakeUse of register arguments so that they do not get |
| 504 // dead code eliminated as a result of the FakeKill of scratch |
| 505 // registers after the call. |
| 506 Context.insert(InstFakeUse::create(Func, Reg)); |
| 507 } |
| 508 // Generate the call instruction. Assign its result to a temporary |
| 509 // with high register allocation weight. |
| 510 Variable *Dest = Instr->getDest(); |
| 511 // ReturnReg doubles as ReturnRegLo as necessary. |
| 512 Variable *ReturnReg = nullptr; |
| 513 Variable *ReturnRegHi = nullptr; |
| 514 if (Dest) { |
| 515 switch (Dest->getType()) { |
| 516 case IceType_NUM: |
| 517 llvm_unreachable("Invalid Call dest type"); |
| 518 break; |
| 519 case IceType_void: |
| 520 break; |
| 521 case IceType_i1: |
| 522 case IceType_i8: |
| 523 case IceType_i16: |
| 524 case IceType_i32: |
| 525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); |
| 526 break; |
| 527 case IceType_i64: |
| 528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| 529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| 530 break; |
| 531 case IceType_f32: |
| 532 case IceType_f64: |
| 533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with |
| 534 // the fstp instruction. |
| 535 break; |
| 536 case IceType_v4i1: |
| 537 case IceType_v8i1: |
| 538 case IceType_v16i1: |
| 539 case IceType_v16i8: |
| 540 case IceType_v8i16: |
| 541 case IceType_v4i32: |
| 542 case IceType_v4f32: |
| 543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); |
| 544 break; |
| 545 } |
| 546 } |
| 547 Operand *CallTarget = legalize(Instr->getCallTarget()); |
| 548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 549 if (NeedSandboxing) { |
| 550 if (llvm::isa<Constant>(CallTarget)) { |
| 551 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 552 } else { |
| 553 Variable *CallTargetVar = nullptr; |
| 554 _mov(CallTargetVar, CallTarget); |
| 555 _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| 556 const SizeT BundleSize = |
| 557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 559 CallTarget = CallTargetVar; |
| 560 } |
| 561 } |
| 562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); |
| 563 Context.insert(NewCall); |
| 564 if (NeedSandboxing) |
| 565 _bundle_unlock(); |
| 566 if (ReturnRegHi) |
| 567 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 568 |
| 569 // Add the appropriate offset to esp. The call instruction takes care |
| 570 // of resetting the stack offset during emission. |
| 571 if (ParameterAreaSizeBytes) { |
| 572 Variable *esp = |
| 573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); |
| 575 } |
| 576 |
| 577 // Insert a register-kill pseudo instruction. |
| 578 Context.insert(InstFakeKill::create(Func, NewCall)); |
| 579 |
| 580 // Generate a FakeUse to keep the call live if necessary. |
| 581 if (Instr->hasSideEffects() && ReturnReg) { |
| 582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 583 Context.insert(FakeUse); |
| 584 } |
| 585 |
| 586 if (!Dest) |
| 587 return; |
| 588 |
| 589 // Assign the result of the call to Dest. |
| 590 if (ReturnReg) { |
| 591 if (ReturnRegHi) { |
| 592 assert(Dest->getType() == IceType_i64); |
| 593 split64(Dest); |
| 594 Variable *DestLo = Dest->getLo(); |
| 595 Variable *DestHi = Dest->getHi(); |
| 596 _mov(DestLo, ReturnReg); |
| 597 _mov(DestHi, ReturnRegHi); |
| 598 } else { |
| 599 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| 600 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| 601 isVectorType(Dest->getType())); |
| 602 if (isVectorType(Dest->getType())) { |
| 603 _movp(Dest, ReturnReg); |
| 604 } else { |
| 605 _mov(Dest, ReturnReg); |
| 606 } |
| 607 } |
| 608 } else if (isScalarFloatingType(Dest->getType())) { |
| 609 // Special treatment for an FP function which returns its result in |
| 610 // st(0). |
| 611 // If Dest ends up being a physical xmm register, the fstp emit code |
| 612 // will route st(0) through a temporary stack slot. |
| 613 _fstp(Dest); |
| 614 // Create a fake use of Dest in case it actually isn't used, |
| 615 // because st(0) still needs to be popped. |
| 616 Context.insert(InstFakeUse::create(Func, Dest)); |
| 617 } |
| 618 } |
| 619 |
| 402 } // end of namespace Ice | 620 } // end of namespace Ice |
| OLD | NEW |