Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| 11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
| 12 // high-level instruction. It also implements | 12 // high-level instruction. It also implements |
| 13 // TargetX8632Fast::postLower() which does the simplest possible | 13 // TargetX8632Fast::postLower() which does the simplest possible |
| 14 // register allocation for the "fast" target. | 14 // register allocation for the "fast" target. |
| 15 // | 15 // |
| 16 //===----------------------------------------------------------------------===// | 16 //===----------------------------------------------------------------------===// |
| 17 | 17 |
| 18 #include "IceDefs.h" | 18 #include "IceDefs.h" |
| 19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
| 20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
| 21 #include "IceInstX8632.h" | 21 #include "IceInstX8632.h" |
| 22 #include "IceOperand.h" | 22 #include "IceOperand.h" |
| 23 #include "IceTargetLoweringX8632.def" | 23 #include "IceTargetLoweringX8632.def" |
| 24 #include "IceTargetLoweringX8632.h" | 24 #include "IceTargetLoweringX8632.h" |
| 25 #include "llvm/Support/CommandLine.h" | 25 #include "llvm/Support/CommandLine.h" |
| 26 | 26 |
| 27 #include <strings.h> | |
|
Jim Stichnoth
2014/08/14 18:21:19
Use MathExtras.h and llvm::findFirstSet(), since J
| |
| 28 | |
| 27 namespace Ice { | 29 namespace Ice { |
| 28 | 30 |
| 29 namespace { | 31 namespace { |
| 30 | 32 |
| 31 // The following table summarizes the logic for lowering the fcmp | 33 // The following table summarizes the logic for lowering the fcmp |
| 32 // instruction. There is one table entry for each of the 16 conditions. | 34 // instruction. There is one table entry for each of the 16 conditions. |
| 33 // | 35 // |
| 34 // The first four columns describe the case when the operands are | 36 // The first four columns describe the case when the operands are |
| 35 // floating point scalar values. A comment in lowerFcmp() describes the | 37 // floating point scalar values. A comment in lowerFcmp() describes the |
| 36 // lowering template. In the most general case, there is a compare | 38 // lowering template. In the most general case, there is a compare |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 121 } | 123 } |
| 122 | 124 |
| 123 // The maximum number of arguments to pass in XMM registers | 125 // The maximum number of arguments to pass in XMM registers |
| 124 const uint32_t X86_MAX_XMM_ARGS = 4; | 126 const uint32_t X86_MAX_XMM_ARGS = 4; |
| 125 // The number of bits in a byte | 127 // The number of bits in a byte |
| 126 const uint32_t X86_CHAR_BIT = 8; | 128 const uint32_t X86_CHAR_BIT = 8; |
| 127 // Stack alignment | 129 // Stack alignment |
| 128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; | 130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; |
| 129 // Size of the return address on the stack | 131 // Size of the return address on the stack |
| 130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; | 132 const uint32_t X86_RET_IP_SIZE_BYTES = 4; |
| 133 // The base 2 logarithm of the width in bytes of the largest supported type | |
| 134 const uint32_t X86_LOG2_OF_MAX_TYPE_SIZE = 4; | |
| 131 | 135 |
| 132 // Value is a size in bytes. Return Value adjusted to the next highest | 136 // Value and Alignment are in bytes. Return Value adjusted to the next |
| 133 // multiple of the stack alignment. | 137 // highest multiple of Alignment. |
| 138 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) { | |
| 139 // power of 2 | |
| 140 assert((Alignment & (Alignment - 1)) == 0); | |
| 141 return (Value + Alignment - 1) & -Alignment; | |
| 142 } | |
| 143 | |
| 144 // Value is in bytes. Return Value adjusted to the next highest multiple | |
| 145 // of the stack alignment. | |
| 134 uint32_t applyStackAlignment(uint32_t Value) { | 146 uint32_t applyStackAlignment(uint32_t Value) { |
| 135 // power of 2 | 147 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| 136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | |
| 137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; | |
| 138 } | 148 } |
| 139 | 149 |
| 140 // Instruction set options | 150 // Instruction set options |
| 141 namespace cl = ::llvm::cl; | 151 namespace cl = ::llvm::cl; |
| 142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 152 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( |
| 143 "mattr", cl::desc("X86 target attributes"), | 153 "mattr", cl::desc("X86 target attributes"), |
| 144 cl::init(TargetX8632::SSE2), | 154 cl::init(TargetX8632::SSE2), |
| 145 cl::values( | 155 cl::values( |
| 146 clEnumValN(TargetX8632::SSE2, "sse2", | 156 clEnumValN(TargetX8632::SSE2, "sse2", |
| 147 "Enable SSE2 instructions (default)"), | 157 "Enable SSE2 instructions (default)"), |
| (...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); | 523 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); |
| 514 RegisterArg->setRegNum(RegNum); | 524 RegisterArg->setRegNum(RegNum); |
| 515 RegisterArg->setIsArg(Func); | 525 RegisterArg->setIsArg(Func); |
| 516 Arg->setIsArg(Func, false); | 526 Arg->setIsArg(Func, false); |
| 517 | 527 |
| 518 Args[I] = RegisterArg; | 528 Args[I] = RegisterArg; |
| 519 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 529 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
| 520 } | 530 } |
| 521 } | 531 } |
| 522 | 532 |
| 533 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const { | |
| 534 const SizeT NumBuckets = X86_LOG2_OF_MAX_TYPE_SIZE + 1; | |
| 535 VarList Buckets[NumBuckets]; | |
| 536 | |
| 537 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E; | |
| 538 ++I) { | |
| 539 Variable *Var = *I; | |
| 540 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType()); | |
| 541 SizeT BucketIndex = ffs(NaturalAlignment) - 1; | |
| 542 assert(BucketIndex < NumBuckets); | |
| 543 Buckets[BucketIndex].push_back(Var); | |
| 544 } | |
| 545 | |
| 546 for (SizeT I = 0, E = NumBuckets; I < E; ++I) { | |
| 547 VarList &List = Buckets[NumBuckets - I - 1]; | |
| 548 Dest.insert(Dest.end(), List.begin(), List.end()); | |
| 549 } | |
| 550 } | |
| 551 | |
| 523 // Helper function for addProlog(). | 552 // Helper function for addProlog(). |
| 524 // | 553 // |
| 525 // This assumes Arg is an argument passed on the stack. This sets the | 554 // This assumes Arg is an argument passed on the stack. This sets the |
| 526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 555 // frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 527 // width. For an I64 arg that has been split into Lo and Hi components, | 556 // width. For an I64 arg that has been split into Lo and Hi components, |
| 528 // it calls itself recursively on the components, taking care to handle | 557 // it calls itself recursively on the components, taking care to handle |
| 529 // Lo first because of the little-endian architecture. Lastly, this | 558 // Lo first because of the little-endian architecture. Lastly, this |
| 530 // function generates an instruction to copy Arg into its assigned | 559 // function generates an instruction to copy Arg into its assigned |
| 531 // register if applicable. | 560 // register if applicable. |
| 532 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 561 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 556 _movp(Arg, Mem); | 585 _movp(Arg, Mem); |
| 557 } else { | 586 } else { |
| 558 _mov(Arg, Mem); | 587 _mov(Arg, Mem); |
| 559 } | 588 } |
| 560 } | 589 } |
| 561 } | 590 } |
| 562 | 591 |
| 563 Type TargetX8632::stackSlotType() { return IceType_i32; } | 592 Type TargetX8632::stackSlotType() { return IceType_i32; } |
| 564 | 593 |
| 565 void TargetX8632::addProlog(CfgNode *Node) { | 594 void TargetX8632::addProlog(CfgNode *Node) { |
| 595 // Stack frame layout: | |
| 596 // | |
| 597 // +------------------------+ | |
| 598 // | 1. return address | | |
| 599 // +------------------------+ | |
| 600 // | 2. preserved registers | | |
| 601 // +------------------------+ | |
| 602 // | 3. padding | | |
| 603 // +------------------------+ | |
| 604 // | 4. global spill area | | |
| 605 // +------------------------+ | |
| 606 // | 5. padding | | |
| 607 // +------------------------+ | |
| 608 // | 6. local spill area | | |
| 609 // +------------------------+ | |
| 610 // | 7. padding | | |
| 611 // +------------------------+ | |
| 612 // | 8. local variables | | |
| 613 // +------------------------+ | |
| 614 // | |
| 615 // The following variables record the size in bytes of the given areas: | |
| 616 // * X86_RET_IP_SIZE_BYTES: area 1 | |
| 617 // * PreservedRegsSizeBytes: area 2 | |
| 618 // * SpillAreaPaddingBytes: area 3 | |
| 619 // * GlobalsSize: area 4 | |
| 620 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | |
| 621 // * LocalsSpillAreaSize: area 6 | |
| 622 // * LocalsSizeBytes: areas 3 - 7 | |
| 623 | |
| 566 // If SimpleCoalescing is false, each variable without a register | 624 // If SimpleCoalescing is false, each variable without a register |
| 567 // gets its own unique stack slot, which leads to large stack | 625 // gets its own unique stack slot, which leads to large stack |
| 568 // frames. If SimpleCoalescing is true, then each "global" variable | 626 // frames. If SimpleCoalescing is true, then each "global" variable |
| 569 // without a register gets its own slot, but "local" variable slots | 627 // without a register gets its own slot, but "local" variable slots |
| 570 // are reused across basic blocks. E.g., if A and B are local to | 628 // are reused across basic blocks. E.g., if A and B are local to |
| 571 // block 1 and C is local to block 2, then C may share a slot with A | 629 // block 1 and C is local to block 2, then C may share a slot with A |
| 572 // or B. | 630 // or B. |
| 573 const bool SimpleCoalescing = true; | 631 const bool SimpleCoalescing = true; |
| 574 size_t InArgsSizeBytes = 0; | 632 size_t InArgsSizeBytes = 0; |
| 575 size_t PreservedRegsSizeBytes = 0; | 633 size_t PreservedRegsSizeBytes = 0; |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 592 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 650 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| 593 | 651 |
| 594 size_t GlobalsSize = 0; | 652 size_t GlobalsSize = 0; |
| 595 std::vector<size_t> LocalsSize(Func->getNumNodes()); | 653 std::vector<size_t> LocalsSize(Func->getNumNodes()); |
| 596 | 654 |
| 597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and | 655 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and |
| 598 // LocalsSizeBytes. | 656 // LocalsSizeBytes. |
| 599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | 657 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| 600 const VarList &Variables = Func->getVariables(); | 658 const VarList &Variables = Func->getVariables(); |
| 601 const VarList &Args = Func->getArgs(); | 659 const VarList &Args = Func->getArgs(); |
| 660 VarList SpilledVariables, SortedSpilledVariables, | |
| 661 VariablesLinkedToSpillSplots; | |
| 662 | |
| 663 // If there is a separate locals area, this specifies the alignment | |
| 664 // for it. | |
| 665 uint32_t LocalsSlotsAlignmentBytes = 0; | |
| 666 // The entire spill locations area gets aligned to largest natural | |
| 667 // alignment of the variables that have a spill slot. | |
| 668 uint32_t SpillAreaAlignmentBytes = 0; | |
| 602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 669 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
| 603 I != E; ++I) { | 670 I != E; ++I) { |
| 604 Variable *Var = *I; | 671 Variable *Var = *I; |
| 605 if (Var->hasReg()) { | 672 if (Var->hasReg()) { |
| 606 RegsUsed[Var->getRegNum()] = true; | 673 RegsUsed[Var->getRegNum()] = true; |
| 607 continue; | 674 continue; |
| 608 } | 675 } |
| 609 // An argument either does not need a stack slot (if passed in a | 676 // An argument either does not need a stack slot (if passed in a |
| 610 // register) or already has one (if passed on the stack). | 677 // register) or already has one (if passed on the stack). |
| 611 if (Var->getIsArg()) | 678 if (Var->getIsArg()) |
| 612 continue; | 679 continue; |
| 613 // An unreferenced variable doesn't need a stack slot. | 680 // An unreferenced variable doesn't need a stack slot. |
| 614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) | 681 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| 615 continue; | 682 continue; |
| 616 // A spill slot linked to a variable with a stack slot should reuse | 683 // A spill slot linked to a variable with a stack slot should reuse |
| 617 // that stack slot. | 684 // that stack slot. |
| 618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | 685 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
| 619 if (Variable *Linked = Var->getPreferredRegister()) { | 686 if (Variable *Linked = Var->getPreferredRegister()) { |
| 620 if (!Linked->hasReg()) | 687 if (!Linked->hasReg()) { |
| 688 VariablesLinkedToSpillSplots.push_back(Var); | |
| 621 continue; | 689 continue; |
| 690 } | |
| 622 } | 691 } |
| 623 } | 692 } |
| 693 SpilledVariables.push_back(Var); | |
| 694 } | |
| 695 | |
| 696 sortByAlignment(SortedSpilledVariables, SpilledVariables); | |
| 697 for (VarList::const_iterator I = SortedSpilledVariables.begin(), | |
| 698 E = SortedSpilledVariables.end(); | |
| 699 I != E; ++I) { | |
| 700 Variable *Var = *I; | |
| 624 size_t Increment = typeWidthInBytesOnStack(Var->getType()); | 701 size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| 702 if (!SpillAreaAlignmentBytes) | |
| 703 SpillAreaAlignmentBytes = Increment; | |
| 625 if (SimpleCoalescing) { | 704 if (SimpleCoalescing) { |
| 626 if (Var->isMultiblockLife()) { | 705 if (Var->isMultiblockLife()) { |
| 627 GlobalsSize += Increment; | 706 GlobalsSize += Increment; |
| 628 } else { | 707 } else { |
| 629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | 708 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); |
| 630 LocalsSize[NodeIndex] += Increment; | 709 LocalsSize[NodeIndex] += Increment; |
| 631 if (LocalsSize[NodeIndex] > LocalsSizeBytes) | 710 if (LocalsSize[NodeIndex] > LocalsSizeBytes) |
| 632 LocalsSizeBytes = LocalsSize[NodeIndex]; | 711 LocalsSizeBytes = LocalsSize[NodeIndex]; |
| 712 if (!LocalsSlotsAlignmentBytes) | |
| 713 LocalsSlotsAlignmentBytes = Increment; | |
| 633 } | 714 } |
| 634 } else { | 715 } else { |
| 635 LocalsSizeBytes += Increment; | 716 LocalsSizeBytes += Increment; |
| 636 } | 717 } |
| 637 } | 718 } |
| 719 uint32_t LocalsSpillAreaSize = LocalsSizeBytes; | |
| 720 | |
| 638 LocalsSizeBytes += GlobalsSize; | 721 LocalsSizeBytes += GlobalsSize; |
| 639 | 722 |
| 640 // Add push instructions for preserved registers. | 723 // Add push instructions for preserved registers. |
| 641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 724 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 642 if (CalleeSaves[i] && RegsUsed[i]) { | 725 if (CalleeSaves[i] && RegsUsed[i]) { |
| 643 PreservedRegsSizeBytes += 4; | 726 PreservedRegsSizeBytes += 4; |
| 644 const bool SuppressStackAdjustment = true; | 727 const bool SuppressStackAdjustment = true; |
| 645 _push(getPhysicalRegister(i), SuppressStackAdjustment); | 728 _push(getPhysicalRegister(i), SuppressStackAdjustment); |
| 646 } | 729 } |
| 647 } | 730 } |
| 648 | 731 |
| 649 // Generate "push ebp; mov ebp, esp" | 732 // Generate "push ebp; mov ebp, esp" |
| 650 if (IsEbpBasedFrame) { | 733 if (IsEbpBasedFrame) { |
| 651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 734 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
| 652 .count() == 0); | 735 .count() == 0); |
| 653 PreservedRegsSizeBytes += 4; | 736 PreservedRegsSizeBytes += 4; |
| 654 Variable *ebp = getPhysicalRegister(Reg_ebp); | 737 Variable *ebp = getPhysicalRegister(Reg_ebp); |
| 655 Variable *esp = getPhysicalRegister(Reg_esp); | 738 Variable *esp = getPhysicalRegister(Reg_esp); |
| 656 const bool SuppressStackAdjustment = true; | 739 const bool SuppressStackAdjustment = true; |
| 657 _push(ebp, SuppressStackAdjustment); | 740 _push(ebp, SuppressStackAdjustment); |
| 658 _mov(ebp, esp); | 741 _mov(ebp, esp); |
| 659 } | 742 } |
| 660 | 743 |
| 744 // Align the variables area. SpillAreaPaddingBytes is the size of | |
| 745 // the region after the preserved registers and before the spill | |
| 746 // areas. | |
| 747 uint32_t SpillAreaPaddingBytes = 0; | |
| 748 if (SpillAreaAlignmentBytes) { | |
| 749 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); | |
| 750 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | |
| 751 uint32_t SpillAreaStart = | |
| 752 applyAlignment(PaddingStart, SpillAreaAlignmentBytes); | |
| 753 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; | |
| 754 LocalsSizeBytes += SpillAreaPaddingBytes; | |
| 755 } | |
| 756 | |
| 757 // If there are separate globals and locals areas, make sure the | |
| 758 // locals area is aligned by padding the end of the globals area. | |
| 759 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; | |
| 760 if (LocalsSlotsAlignmentBytes) { | |
| 761 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | |
| 762 GlobalsAndSubsequentPaddingSize = | |
| 763 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); | |
| 764 LocalsSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize; | |
| 765 } | |
| 766 | |
| 767 // Align esp if necessary. | |
| 661 if (NeedsStackAlignment) { | 768 if (NeedsStackAlignment) { |
| 662 uint32_t StackSize = applyStackAlignment( | 769 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); | 770 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes); |
| 664 LocalsSizeBytes = | 771 LocalsSizeBytes = StackSize - StackOffset; |
| 665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; | |
| 666 } | 772 } |
| 667 | 773 |
| 668 // Generate "sub esp, LocalsSizeBytes" | 774 // Generate "sub esp, LocalsSizeBytes" |
| 669 if (LocalsSizeBytes) | 775 if (LocalsSizeBytes) |
| 670 _sub(getPhysicalRegister(Reg_esp), | 776 _sub(getPhysicalRegister(Reg_esp), |
| 671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | 777 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); |
| 672 | 778 |
| 673 resetStackAdjustment(); | 779 resetStackAdjustment(); |
| 674 | 780 |
| 675 // Fill in stack offsets for stack args, and copy args into registers | 781 // Fill in stack offsets for stack args, and copy args into registers |
| 676 // for those that were register-allocated. Args are pushed right to | 782 // for those that were register-allocated. Args are pushed right to |
| 677 // left, so Arg[0] is closest to the stack/frame pointer. | 783 // left, so Arg[0] is closest to the stack/frame pointer. |
| 678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 784 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 785 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
| 680 if (!IsEbpBasedFrame) | 786 if (!IsEbpBasedFrame) |
| 681 BasicFrameOffset += LocalsSizeBytes; | 787 BasicFrameOffset += LocalsSizeBytes; |
| 682 | 788 |
| 683 unsigned NumXmmArgs = 0; | 789 unsigned NumXmmArgs = 0; |
| 684 for (SizeT i = 0; i < Args.size(); ++i) { | 790 for (SizeT i = 0; i < Args.size(); ++i) { |
| 685 Variable *Arg = Args[i]; | 791 Variable *Arg = Args[i]; |
| 686 // Skip arguments passed in registers. | 792 // Skip arguments passed in registers. |
| 687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 793 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { |
| 688 ++NumXmmArgs; | 794 ++NumXmmArgs; |
| 689 continue; | 795 continue; |
| 690 } | 796 } |
| 691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 797 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 692 } | 798 } |
| 693 | 799 |
| 694 // Fill in stack offsets for locals. | 800 // Fill in stack offsets for locals. |
| 695 size_t TotalGlobalsSize = GlobalsSize; | 801 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; |
| 696 GlobalsSize = 0; | |
| 697 LocalsSize.assign(LocalsSize.size(), 0); | 802 LocalsSize.assign(LocalsSize.size(), 0); |
| 698 size_t NextStackOffset = 0; | 803 size_t NextStackOffset = GlobalsSpaceUsed; |
| 699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 804 for (VarList::const_iterator I = SortedSpilledVariables.begin(), |
| 805 E = SortedSpilledVariables.end(); | |
| 700 I != E; ++I) { | 806 I != E; ++I) { |
| 701 Variable *Var = *I; | 807 Variable *Var = *I; |
| 702 if (Var->hasReg()) { | |
| 703 RegsUsed[Var->getRegNum()] = true; | |
| 704 continue; | |
| 705 } | |
| 706 if (Var->getIsArg()) | |
| 707 continue; | |
| 708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) | |
| 709 continue; | |
| 710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | |
| 711 if (Variable *Linked = Var->getPreferredRegister()) { | |
| 712 if (!Linked->hasReg()) { | |
| 713 // TODO: Make sure Linked has already been assigned a stack | |
| 714 // slot. | |
| 715 Var->setStackOffset(Linked->getStackOffset()); | |
| 716 continue; | |
| 717 } | |
| 718 } | |
| 719 } | |
| 720 size_t Increment = typeWidthInBytesOnStack(Var->getType()); | 808 size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| 721 if (SimpleCoalescing) { | 809 if (SimpleCoalescing) { |
| 722 if (Var->isMultiblockLife()) { | 810 if (Var->isMultiblockLife()) { |
| 723 GlobalsSize += Increment; | 811 GlobalsSpaceUsed += Increment; |
| 724 NextStackOffset = GlobalsSize; | 812 NextStackOffset = GlobalsSpaceUsed; |
| 725 } else { | 813 } else { |
| 726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | 814 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); |
| 727 LocalsSize[NodeIndex] += Increment; | 815 LocalsSize[NodeIndex] += Increment; |
| 728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; | 816 NextStackOffset = SpillAreaPaddingBytes + |
| 817 GlobalsAndSubsequentPaddingSize + | |
| 818 LocalsSize[NodeIndex]; | |
| 729 } | 819 } |
| 730 } else { | 820 } else { |
| 731 NextStackOffset += Increment; | 821 NextStackOffset += Increment; |
| 732 } | 822 } |
| 733 if (IsEbpBasedFrame) | 823 if (IsEbpBasedFrame) |
| 734 Var->setStackOffset(-NextStackOffset); | 824 Var->setStackOffset(-NextStackOffset); |
| 735 else | 825 else |
| 736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); | 826 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); |
| 737 } | 827 } |
| 738 this->FrameSizeLocals = NextStackOffset; | 828 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes; |
| 739 this->HasComputedFrame = true; | 829 this->HasComputedFrame = true; |
| 740 | 830 |
| 831 // Assign stack offsets to variables that have been linked to spilled | |
| 832 // variables. | |
| 833 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(), | |
| 834 E = VariablesLinkedToSpillSplots.end(); | |
| 835 I != E; ++I) { | |
| 836 Variable *Var = *I; | |
| 837 Variable *Linked = Var->getPreferredRegister(); | |
| 838 Var->setStackOffset(Linked->getStackOffset()); | |
| 839 } | |
| 840 | |
| 741 if (Func->getContext()->isVerbose(IceV_Frame)) { | 841 if (Func->getContext()->isVerbose(IceV_Frame)) { |
| 742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes | 842 Ostream &Str = Func->getContext()->getStrDump(); |
| 743 << "\n" | 843 |
| 744 << "InArgsSizeBytes=" << InArgsSizeBytes | 844 Str << "Stack layout:\n"; |
| 745 << "\n" | 845 uint32_t EspAdjustmentPaddingSize = |
| 746 << "PreservedRegsSizeBytes=" | 846 LocalsSizeBytes - GlobalsAndSubsequentPaddingSize - |
| 747 << PreservedRegsSizeBytes << "\n"; | 847 LocalsSpillAreaSize - PreservedRegsSizeBytes - X86_RET_IP_SIZE_BYTES; |
| 848 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | |
| 849 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" | |
| 850 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | |
| 851 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | |
| 852 << " globals spill area = " << GlobalsSize << " bytes\n" | |
| 853 << " globals-locals spill areas intermediate padding = " | |
| 854 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | |
| 855 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | |
| 856 << " esp alignment padding = " << EspAdjustmentPaddingSize | |
| 857 << " bytes\n"; | |
| 858 | |
| 859 Str << "Stack details:\n" | |
| 860 << " esp adjustment = " << LocalsSizeBytes << " bytes\n" | |
| 861 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | |
| 862 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | |
| 863 << " bytes\n" | |
| 864 << " is ebp based = " << IsEbpBasedFrame << "\n"; | |
| 748 } | 865 } |
| 749 } | 866 } |
| 750 | 867 |
| 751 void TargetX8632::addEpilog(CfgNode *Node) { | 868 void TargetX8632::addEpilog(CfgNode *Node) { |
| 752 InstList &Insts = Node->getInsts(); | 869 InstList &Insts = Node->getInsts(); |
| 753 InstList::reverse_iterator RI, E; | 870 InstList::reverse_iterator RI, E; |
| 754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 871 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| 755 if (llvm::isa<InstX8632Ret>(*RI)) | 872 if (llvm::isa<InstX8632Ret>(*RI)) |
| 756 break; | 873 break; |
| 757 } | 874 } |
| (...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 984 assert((AlignmentParam & (AlignmentParam - 1)) == 0); | 1101 assert((AlignmentParam & (AlignmentParam - 1)) == 0); |
| 985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | 1102 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
| 986 | 1103 |
| 987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); | 1104 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); |
| 988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { | 1105 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { |
| 989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); | 1106 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); |
| 990 } | 1107 } |
| 991 if (ConstantInteger *ConstantTotalSize = | 1108 if (ConstantInteger *ConstantTotalSize = |
| 992 llvm::dyn_cast<ConstantInteger>(TotalSize)) { | 1109 llvm::dyn_cast<ConstantInteger>(TotalSize)) { |
| 993 uint32_t Value = ConstantTotalSize->getValue(); | 1110 uint32_t Value = ConstantTotalSize->getValue(); |
| 994 // Round Value up to the next highest multiple of the alignment. | 1111 Value = applyAlignment(Value, Alignment); |
| 995 Value = (Value + Alignment - 1) & -Alignment; | |
| 996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); | 1112 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); |
| 997 } else { | 1113 } else { |
| 998 // Non-constant sizes need to be adjusted to the next highest | 1114 // Non-constant sizes need to be adjusted to the next highest |
| 999 // multiple of the required alignment at runtime. | 1115 // multiple of the required alignment at runtime. |
| 1000 Variable *T = makeReg(IceType_i32); | 1116 Variable *T = makeReg(IceType_i32); |
| 1001 _mov(T, TotalSize); | 1117 _mov(T, TotalSize); |
| 1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); | 1118 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); |
| 1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); | 1119 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); |
| 1004 _sub(esp, T); | 1120 _sub(esp, T); |
| 1005 } | 1121 } |
| (...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1232 case InstArithmetic::Fsub: | 1348 case InstArithmetic::Fsub: |
| 1233 case InstArithmetic::Fmul: | 1349 case InstArithmetic::Fmul: |
| 1234 case InstArithmetic::Fdiv: | 1350 case InstArithmetic::Fdiv: |
| 1235 case InstArithmetic::Frem: | 1351 case InstArithmetic::Frem: |
| 1236 llvm_unreachable("FP instruction with i64 type"); | 1352 llvm_unreachable("FP instruction with i64 type"); |
| 1237 break; | 1353 break; |
| 1238 } | 1354 } |
| 1239 } else if (isVectorType(Dest->getType())) { | 1355 } else if (isVectorType(Dest->getType())) { |
| 1240 // TODO: Trap on integer divide and integer modulo by zero. | 1356 // TODO: Trap on integer divide and integer modulo by zero. |
| 1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 | 1357 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| 1242 // | |
| 1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in | |
| 1244 // registers. This is a workaround of the fact that there is no | |
| 1245 // support for aligning stack operands. Once there is support, | |
| 1246 // remove LEGAL_HACK. | |
| 1247 #define LEGAL_HACK(s) legalizeToVar((s)) | |
| 1248 switch (Inst->getOp()) { | 1358 switch (Inst->getOp()) { |
| 1249 case InstArithmetic::_num: | 1359 case InstArithmetic::_num: |
| 1250 llvm_unreachable("Unknown arithmetic operator"); | 1360 llvm_unreachable("Unknown arithmetic operator"); |
| 1251 break; | 1361 break; |
| 1252 case InstArithmetic::Add: { | 1362 case InstArithmetic::Add: { |
| 1253 Variable *T = makeReg(Dest->getType()); | 1363 Variable *T = makeReg(Dest->getType()); |
| 1254 _movp(T, Src0); | 1364 _movp(T, Src0); |
| 1255 _padd(T, LEGAL_HACK(Src1)); | 1365 _padd(T, Src1); |
| 1256 _movp(Dest, T); | 1366 _movp(Dest, T); |
| 1257 } break; | 1367 } break; |
| 1258 case InstArithmetic::And: { | 1368 case InstArithmetic::And: { |
| 1259 Variable *T = makeReg(Dest->getType()); | 1369 Variable *T = makeReg(Dest->getType()); |
| 1260 _movp(T, Src0); | 1370 _movp(T, Src0); |
| 1261 _pand(T, LEGAL_HACK(Src1)); | 1371 _pand(T, Src1); |
| 1262 _movp(Dest, T); | 1372 _movp(Dest, T); |
| 1263 } break; | 1373 } break; |
| 1264 case InstArithmetic::Or: { | 1374 case InstArithmetic::Or: { |
| 1265 Variable *T = makeReg(Dest->getType()); | 1375 Variable *T = makeReg(Dest->getType()); |
| 1266 _movp(T, Src0); | 1376 _movp(T, Src0); |
| 1267 _por(T, LEGAL_HACK(Src1)); | 1377 _por(T, Src1); |
| 1268 _movp(Dest, T); | 1378 _movp(Dest, T); |
| 1269 } break; | 1379 } break; |
| 1270 case InstArithmetic::Xor: { | 1380 case InstArithmetic::Xor: { |
| 1271 Variable *T = makeReg(Dest->getType()); | 1381 Variable *T = makeReg(Dest->getType()); |
| 1272 _movp(T, Src0); | 1382 _movp(T, Src0); |
| 1273 _pxor(T, LEGAL_HACK(Src1)); | 1383 _pxor(T, Src1); |
| 1274 _movp(Dest, T); | 1384 _movp(Dest, T); |
| 1275 } break; | 1385 } break; |
| 1276 case InstArithmetic::Sub: { | 1386 case InstArithmetic::Sub: { |
| 1277 Variable *T = makeReg(Dest->getType()); | 1387 Variable *T = makeReg(Dest->getType()); |
| 1278 _movp(T, Src0); | 1388 _movp(T, Src0); |
| 1279 _psub(T, LEGAL_HACK(Src1)); | 1389 _psub(T, Src1); |
| 1280 _movp(Dest, T); | 1390 _movp(Dest, T); |
| 1281 } break; | 1391 } break; |
| 1282 case InstArithmetic::Mul: { | 1392 case InstArithmetic::Mul: { |
| 1283 bool TypesAreValidForPmull = | 1393 bool TypesAreValidForPmull = |
| 1284 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; | 1394 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; |
| 1285 bool InstructionSetIsValidForPmull = | 1395 bool InstructionSetIsValidForPmull = |
| 1286 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; | 1396 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; |
| 1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 1397 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| 1288 Variable *T = makeReg(Dest->getType()); | 1398 Variable *T = makeReg(Dest->getType()); |
| 1289 _movp(T, Src0); | 1399 _movp(T, Src0); |
| 1290 _pmull(T, LEGAL_HACK(Src1)); | 1400 _pmull(T, Src1); |
| 1291 _movp(Dest, T); | 1401 _movp(Dest, T); |
| 1292 } else if (Dest->getType() == IceType_v4i32) { | 1402 } else if (Dest->getType() == IceType_v4i32) { |
| 1293 // Lowering sequence: | 1403 // Lowering sequence: |
| 1294 // Note: The mask arguments have index 0 on the left. | 1404 // Note: The mask arguments have index 0 on the left. |
| 1295 // | 1405 // |
| 1296 // movups T1, Src0 | 1406 // movups T1, Src0 |
| 1297 // pshufd T2, Src0, {1,0,3,0} | 1407 // pshufd T2, Src0, {1,0,3,0} |
| 1298 // pshufd T3, Src1, {1,0,3,0} | 1408 // pshufd T3, Src1, {1,0,3,0} |
| 1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} | 1409 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} |
| 1300 // pmuludq T1, Src1 | 1410 // pmuludq T1, Src1 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 1313 // Dest[0, 2], Src[0, 2] | 1423 // Dest[0, 2], Src[0, 2] |
| 1314 const unsigned Mask0202 = 0x88; | 1424 const unsigned Mask0202 = 0x88; |
| 1315 // Mask that directs pshufd to create a vector with entries | 1425 // Mask that directs pshufd to create a vector with entries |
| 1316 // Src[0, 2, 1, 3] | 1426 // Src[0, 2, 1, 3] |
| 1317 const unsigned Mask0213 = 0xd8; | 1427 const unsigned Mask0213 = 0xd8; |
| 1318 Variable *T1 = makeReg(IceType_v4i32); | 1428 Variable *T1 = makeReg(IceType_v4i32); |
| 1319 Variable *T2 = makeReg(IceType_v4i32); | 1429 Variable *T2 = makeReg(IceType_v4i32); |
| 1320 Variable *T3 = makeReg(IceType_v4i32); | 1430 Variable *T3 = makeReg(IceType_v4i32); |
| 1321 Variable *T4 = makeReg(IceType_v4i32); | 1431 Variable *T4 = makeReg(IceType_v4i32); |
| 1322 _movp(T1, Src0); | 1432 _movp(T1, Src0); |
| 1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R | 1433 _pshufd(T2, Src0, Mask1030); |
| 1324 // with Src1 after stack operand alignment support is | 1434 _pshufd(T3, Src1, Mask1030); |
| 1325 // implemented. | 1435 _pmuludq(T1, Src1); |
| 1326 Variable *Src0R = LEGAL_HACK(Src0); | |
| 1327 Variable *Src1R = LEGAL_HACK(Src1); | |
| 1328 _pshufd(T2, Src0R, Mask1030); | |
| 1329 _pshufd(T3, Src1R, Mask1030); | |
| 1330 _pmuludq(T1, Src1R); | |
| 1331 _pmuludq(T2, T3); | 1436 _pmuludq(T2, T3); |
| 1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | 1437 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
| 1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | 1438 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
| 1334 _movp(Dest, T4); | 1439 _movp(Dest, T4); |
| 1335 } else { | 1440 } else { |
| 1336 assert(Dest->getType() == IceType_v16i8); | 1441 assert(Dest->getType() == IceType_v16i8); |
| 1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1442 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1338 } | 1443 } |
| 1339 } break; | 1444 } break; |
| 1340 case InstArithmetic::Shl: | 1445 case InstArithmetic::Shl: |
| 1341 case InstArithmetic::Lshr: | 1446 case InstArithmetic::Lshr: |
| 1342 case InstArithmetic::Ashr: | 1447 case InstArithmetic::Ashr: |
| 1343 case InstArithmetic::Udiv: | 1448 case InstArithmetic::Udiv: |
| 1344 case InstArithmetic::Urem: | 1449 case InstArithmetic::Urem: |
| 1345 case InstArithmetic::Sdiv: | 1450 case InstArithmetic::Sdiv: |
| 1346 case InstArithmetic::Srem: | 1451 case InstArithmetic::Srem: |
| 1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1452 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1348 break; | 1453 break; |
| 1349 case InstArithmetic::Fadd: { | 1454 case InstArithmetic::Fadd: { |
| 1350 Variable *T = makeReg(Dest->getType()); | 1455 Variable *T = makeReg(Dest->getType()); |
| 1351 _movp(T, Src0); | 1456 _movp(T, Src0); |
| 1352 _addps(T, LEGAL_HACK(Src1)); | 1457 _addps(T, Src1); |
| 1353 _movp(Dest, T); | 1458 _movp(Dest, T); |
| 1354 } break; | 1459 } break; |
| 1355 case InstArithmetic::Fsub: { | 1460 case InstArithmetic::Fsub: { |
| 1356 Variable *T = makeReg(Dest->getType()); | 1461 Variable *T = makeReg(Dest->getType()); |
| 1357 _movp(T, Src0); | 1462 _movp(T, Src0); |
| 1358 _subps(T, LEGAL_HACK(Src1)); | 1463 _subps(T, Src1); |
| 1359 _movp(Dest, T); | 1464 _movp(Dest, T); |
| 1360 } break; | 1465 } break; |
| 1361 case InstArithmetic::Fmul: { | 1466 case InstArithmetic::Fmul: { |
| 1362 Variable *T = makeReg(Dest->getType()); | 1467 Variable *T = makeReg(Dest->getType()); |
| 1363 _movp(T, Src0); | 1468 _movp(T, Src0); |
| 1364 _mulps(T, LEGAL_HACK(Src1)); | 1469 _mulps(T, Src1); |
| 1365 _movp(Dest, T); | 1470 _movp(Dest, T); |
| 1366 } break; | 1471 } break; |
| 1367 case InstArithmetic::Fdiv: { | 1472 case InstArithmetic::Fdiv: { |
| 1368 Variable *T = makeReg(Dest->getType()); | 1473 Variable *T = makeReg(Dest->getType()); |
| 1369 _movp(T, Src0); | 1474 _movp(T, Src0); |
| 1370 _divps(T, LEGAL_HACK(Src1)); | 1475 _divps(T, Src1); |
| 1371 _movp(Dest, T); | 1476 _movp(Dest, T); |
| 1372 } break; | 1477 } break; |
| 1373 case InstArithmetic::Frem: | 1478 case InstArithmetic::Frem: |
| 1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1479 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| 1375 break; | 1480 break; |
| 1376 } | 1481 } |
| 1377 #undef LEGAL_HACK | |
| 1378 } else { // Dest->getType() is non-i64 scalar | 1482 } else { // Dest->getType() is non-i64 scalar |
| 1379 Variable *T_edx = NULL; | 1483 Variable *T_edx = NULL; |
| 1380 Variable *T = NULL; | 1484 Variable *T = NULL; |
| 1381 switch (Inst->getOp()) { | 1485 switch (Inst->getOp()) { |
| 1382 case InstArithmetic::_num: | 1486 case InstArithmetic::_num: |
| 1383 llvm_unreachable("Unknown arithmetic operator"); | 1487 llvm_unreachable("Unknown arithmetic operator"); |
| 1384 break; | 1488 break; |
| 1385 case InstArithmetic::Add: | 1489 case InstArithmetic::Add: |
| 1386 _mov(T, Src0); | 1490 _mov(T, Src0); |
| 1387 _add(T, Src1); | 1491 _add(T, Src1); |
| (...skipping 804 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2192 // TODO(wala): Determine the best lowering sequences for each type. | 2296 // TODO(wala): Determine the best lowering sequences for each type. |
| 2193 bool CanUsePextr = | 2297 bool CanUsePextr = |
| 2194 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2298 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; |
| 2195 if (CanUsePextr && Ty != IceType_v4f32) { | 2299 if (CanUsePextr && Ty != IceType_v4f32) { |
| 2196 // Use pextrb, pextrw, or pextrd. | 2300 // Use pextrb, pextrw, or pextrd. |
| 2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2301 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
| 2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2302 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
| 2199 _pextr(ExtractedElementR, SourceVectR, Mask); | 2303 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2200 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2304 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2201 // Use pshufd and movd/movss. | 2305 // Use pshufd and movd/movss. |
| 2202 // | |
| 2203 // ALIGNHACK: Force vector operands to registers in instructions | |
| 2204 // that require aligned memory operands until support for data | |
| 2205 // alignment is implemented. | |
| 2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | |
| 2207 Operand *SourceVectRM = | |
| 2208 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | |
| 2209 Variable *T = NULL; | 2306 Variable *T = NULL; |
| 2210 if (Index) { | 2307 if (Index) { |
| 2211 // The shuffle only needs to occur if the element to be extracted | 2308 // The shuffle only needs to occur if the element to be extracted |
| 2212 // is not at the lowest index. | 2309 // is not at the lowest index. |
| 2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2310 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
| 2214 T = makeReg(Ty); | 2311 T = makeReg(Ty); |
| 2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 2312 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); |
| 2216 } else { | 2313 } else { |
| 2217 T = ALIGN_HACK(SourceVectRM); | 2314 T = legalizeToVar(SourceVectNotLegalized); |
| 2218 } | 2315 } |
| 2219 | 2316 |
| 2220 if (InVectorElementTy == IceType_i32) { | 2317 if (InVectorElementTy == IceType_i32) { |
| 2221 _movd(ExtractedElementR, T); | 2318 _movd(ExtractedElementR, T); |
| 2222 } else { // Ty == Icetype_f32 | 2319 } else { // Ty == Icetype_f32 |
| 2223 // TODO(wala): _movss is only used here because _mov does not | 2320 // TODO(wala): _movss is only used here because _mov does not |
| 2224 // allow a vector source and a scalar destination. _mov should be | 2321 // allow a vector source and a scalar destination. _mov should be |
| 2225 // able to be used here. | 2322 // able to be used here. |
| 2226 // _movss is a binary instruction, so the FakeDef is needed to | 2323 // _movss is a binary instruction, so the FakeDef is needed to |
| 2227 // keep the live range analysis consistent. | 2324 // keep the live range analysis consistent. |
| 2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); | 2325 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); |
| 2229 _movss(ExtractedElementR, T); | 2326 _movss(ExtractedElementR, T); |
| 2230 } | 2327 } |
| 2231 #undef ALIGN_HACK | |
| 2232 } else { | 2328 } else { |
| 2233 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 2329 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
| 2234 // Spill the value to a stack slot and do the extraction in memory. | 2330 // Spill the value to a stack slot and do the extraction in memory. |
| 2235 // | 2331 // |
| 2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when | 2332 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when |
| 2237 // support for legalizing to mem is implemented. | 2333 // support for legalizing to mem is implemented. |
| 2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | 2334 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); |
| 2239 Slot->setWeight(RegWeight::Zero); | 2335 Slot->setWeight(RegWeight::Zero); |
| 2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); | 2336 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); |
| 2241 | 2337 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2280 | 2376 |
| 2281 if (Condition == InstFcmp::True) { | 2377 if (Condition == InstFcmp::True) { |
| 2282 // makeVectorOfOnes() requires an integer vector type. | 2378 // makeVectorOfOnes() requires an integer vector type. |
| 2283 T = makeVectorOfMinusOnes(IceType_v4i32); | 2379 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 2284 } else if (Condition == InstFcmp::False) { | 2380 } else if (Condition == InstFcmp::False) { |
| 2285 T = makeVectorOfZeros(Dest->getType()); | 2381 T = makeVectorOfZeros(Dest->getType()); |
| 2286 } else { | 2382 } else { |
| 2287 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2383 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2288 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2384 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2289 | 2385 |
| 2290 // ALIGNHACK: Without support for data alignment, both operands to | |
| 2291 // cmpps need to be forced into registers. Once support for data | |
| 2292 // alignment is implemented, remove LEGAL_HACK. | |
| 2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
| 2294 switch (Condition) { | 2386 switch (Condition) { |
| 2295 default: { | 2387 default: { |
| 2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2388 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
| 2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 2389 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
| 2298 T = makeReg(Src0RM->getType()); | 2390 T = makeReg(Src0RM->getType()); |
| 2299 _movp(T, Src0RM); | 2391 _movp(T, Src0RM); |
| 2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 2392 _cmpps(T, Src1RM, Predicate); |
| 2301 } break; | 2393 } break; |
| 2302 case InstFcmp::One: { | 2394 case InstFcmp::One: { |
| 2303 // Check both unequal and ordered. | 2395 // Check both unequal and ordered. |
| 2304 T = makeReg(Src0RM->getType()); | 2396 T = makeReg(Src0RM->getType()); |
| 2305 Variable *T2 = makeReg(Src0RM->getType()); | 2397 Variable *T2 = makeReg(Src0RM->getType()); |
| 2306 Src1RM = LEGAL_HACK(Src1RM); | |
| 2307 _movp(T, Src0RM); | 2398 _movp(T, Src0RM); |
| 2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); | 2399 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); |
| 2309 _movp(T2, Src0RM); | 2400 _movp(T2, Src0RM); |
| 2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); | 2401 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); |
| 2311 _pand(T, T2); | 2402 _pand(T, T2); |
| 2312 } break; | 2403 } break; |
| 2313 case InstFcmp::Ueq: { | 2404 case InstFcmp::Ueq: { |
| 2314 // Check both equal or unordered. | 2405 // Check both equal or unordered. |
| 2315 T = makeReg(Src0RM->getType()); | 2406 T = makeReg(Src0RM->getType()); |
| 2316 Variable *T2 = makeReg(Src0RM->getType()); | 2407 Variable *T2 = makeReg(Src0RM->getType()); |
| 2317 Src1RM = LEGAL_HACK(Src1RM); | |
| 2318 _movp(T, Src0RM); | 2408 _movp(T, Src0RM); |
| 2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); | 2409 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); |
| 2320 _movp(T2, Src0RM); | 2410 _movp(T2, Src0RM); |
| 2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); | 2411 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); |
| 2322 _por(T, T2); | 2412 _por(T, T2); |
| 2323 } break; | 2413 } break; |
| 2324 } | 2414 } |
| 2325 #undef LEGAL_HACK | |
| 2326 } | 2415 } |
| 2327 | 2416 |
| 2328 _movp(Dest, T); | 2417 _movp(Dest, T); |
| 2329 eliminateNextVectorSextInstruction(Dest); | 2418 eliminateNextVectorSextInstruction(Dest); |
| 2330 return; | 2419 return; |
| 2331 } | 2420 } |
| 2332 | 2421 |
| 2333 // Lowering a = fcmp cond, b, c | 2422 // Lowering a = fcmp cond, b, c |
| 2334 // ucomiss b, c /* only if C1 != Br_None */ | 2423 // ucomiss b, c /* only if C1 != Br_None */ |
| 2335 // /* but swap b,c order if SwapOperands==true */ | 2424 // /* but swap b,c order if SwapOperands==true */ |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2420 Variable *T1 = makeReg(Ty); | 2509 Variable *T1 = makeReg(Ty); |
| 2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | 2510 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| 2422 _movp(T0, Src0RM); | 2511 _movp(T0, Src0RM); |
| 2423 _pxor(T0, HighOrderBits); | 2512 _pxor(T0, HighOrderBits); |
| 2424 _movp(T1, Src1RM); | 2513 _movp(T1, Src1RM); |
| 2425 _pxor(T1, HighOrderBits); | 2514 _pxor(T1, HighOrderBits); |
| 2426 Src0RM = T0; | 2515 Src0RM = T0; |
| 2427 Src1RM = T1; | 2516 Src1RM = T1; |
| 2428 } | 2517 } |
| 2429 | 2518 |
| 2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be | |
| 2431 // in registers until data alignment support is implemented. Once | |
| 2432 // there is support for data alignment, LEGAL_HACK can be removed. | |
| 2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
| 2434 Variable *T = makeReg(Ty); | 2519 Variable *T = makeReg(Ty); |
| 2435 switch (Condition) { | 2520 switch (Condition) { |
| 2436 default: | 2521 default: |
| 2437 llvm_unreachable("unexpected condition"); | 2522 llvm_unreachable("unexpected condition"); |
| 2438 break; | 2523 break; |
| 2439 case InstIcmp::Eq: { | 2524 case InstIcmp::Eq: { |
| 2440 _movp(T, Src0RM); | 2525 _movp(T, Src0RM); |
| 2441 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 2526 _pcmpeq(T, Src1RM); |
| 2442 } break; | 2527 } break; |
| 2443 case InstIcmp::Ne: { | 2528 case InstIcmp::Ne: { |
| 2444 _movp(T, Src0RM); | 2529 _movp(T, Src0RM); |
| 2445 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 2530 _pcmpeq(T, Src1RM); |
| 2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2531 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2447 _pxor(T, MinusOne); | 2532 _pxor(T, MinusOne); |
| 2448 } break; | 2533 } break; |
| 2449 case InstIcmp::Ugt: | 2534 case InstIcmp::Ugt: |
| 2450 case InstIcmp::Sgt: { | 2535 case InstIcmp::Sgt: { |
| 2451 _movp(T, Src0RM); | 2536 _movp(T, Src0RM); |
| 2452 _pcmpgt(T, LEGAL_HACK(Src1RM)); | 2537 _pcmpgt(T, Src1RM); |
| 2453 } break; | 2538 } break; |
| 2454 case InstIcmp::Uge: | 2539 case InstIcmp::Uge: |
| 2455 case InstIcmp::Sge: { | 2540 case InstIcmp::Sge: { |
| 2456 // !(Src1RM > Src0RM) | 2541 // !(Src1RM > Src0RM) |
| 2457 _movp(T, Src1RM); | 2542 _movp(T, Src1RM); |
| 2458 _pcmpgt(T, LEGAL_HACK(Src0RM)); | 2543 _pcmpgt(T, Src0RM); |
| 2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2544 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2460 _pxor(T, MinusOne); | 2545 _pxor(T, MinusOne); |
| 2461 } break; | 2546 } break; |
| 2462 case InstIcmp::Ult: | 2547 case InstIcmp::Ult: |
| 2463 case InstIcmp::Slt: { | 2548 case InstIcmp::Slt: { |
| 2464 _movp(T, Src1RM); | 2549 _movp(T, Src1RM); |
| 2465 _pcmpgt(T, LEGAL_HACK(Src0RM)); | 2550 _pcmpgt(T, Src0RM); |
| 2466 } break; | 2551 } break; |
| 2467 case InstIcmp::Ule: | 2552 case InstIcmp::Ule: |
| 2468 case InstIcmp::Sle: { | 2553 case InstIcmp::Sle: { |
| 2469 // !(Src0RM > Src1RM) | 2554 // !(Src0RM > Src1RM) |
| 2470 _movp(T, Src0RM); | 2555 _movp(T, Src0RM); |
| 2471 _pcmpgt(T, LEGAL_HACK(Src1RM)); | 2556 _pcmpgt(T, Src1RM); |
| 2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 2557 Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| 2473 _pxor(T, MinusOne); | 2558 _pxor(T, MinusOne); |
| 2474 } break; | 2559 } break; |
| 2475 } | 2560 } |
| 2476 #undef LEGAL_HACK | |
| 2477 | 2561 |
| 2478 _movp(Dest, T); | 2562 _movp(Dest, T); |
| 2479 eliminateNextVectorSextInstruction(Dest); | 2563 eliminateNextVectorSextInstruction(Dest); |
| 2480 return; | 2564 return; |
| 2481 } | 2565 } |
| 2482 | 2566 |
| 2483 // If Src1 is an immediate, or known to be a physical register, we can | 2567 // If Src1 is an immediate, or known to be a physical register, we can |
| 2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | 2568 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| 2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for | 2569 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| 2486 // the physical register, but unfortunately we have to commit to one or | 2570 // the physical register, but unfortunately we have to commit to one or |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2642 // insertelement into index 3 (result is stored in T): | 2726 // insertelement into index 3 (result is stored in T): |
| 2643 // T := SourceVectRM | 2727 // T := SourceVectRM |
| 2644 // ElementR := ElementR[0, 0] T[0, 2] | 2728 // ElementR := ElementR[0, 0] T[0, 2] |
| 2645 // T := T[0, 1] ElementR[3, 0] | 2729 // T := T[0, 1] ElementR[3, 0] |
| 2646 const unsigned char Mask1[3] = {0, 192, 128}; | 2730 const unsigned char Mask1[3] = {0, 192, 128}; |
| 2647 const unsigned char Mask2[3] = {227, 196, 52}; | 2731 const unsigned char Mask2[3] = {227, 196, 52}; |
| 2648 | 2732 |
| 2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 2733 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
| 2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 2734 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
| 2651 | 2735 |
| 2652 // ALIGNHACK: Force vector operands to registers in instructions | |
| 2653 // that require aligned memory operands until support for data | |
| 2654 // alignment is implemented. | |
| 2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | |
| 2656 if (Index == 1) { | 2736 if (Index == 1) { |
| 2657 SourceVectRM = ALIGN_HACK(SourceVectRM); | |
| 2658 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2737 _shufps(ElementR, SourceVectRM, Mask1Constant); |
| 2659 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2738 _shufps(ElementR, SourceVectRM, Mask2Constant); |
| 2660 _movp(Inst->getDest(), ElementR); | 2739 _movp(Inst->getDest(), ElementR); |
| 2661 } else { | 2740 } else { |
| 2662 Variable *T = makeReg(Ty); | 2741 Variable *T = makeReg(Ty); |
| 2663 _movp(T, SourceVectRM); | 2742 _movp(T, SourceVectRM); |
| 2664 _shufps(ElementR, T, Mask1Constant); | 2743 _shufps(ElementR, T, Mask1Constant); |
| 2665 _shufps(T, ElementR, Mask2Constant); | 2744 _shufps(T, ElementR, Mask2Constant); |
| 2666 _movp(Inst->getDest(), T); | 2745 _movp(Inst->getDest(), T); |
| 2667 } | 2746 } |
| 2668 #undef ALIGN_HACK | |
| 2669 } else { | 2747 } else { |
| 2670 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 2748 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
| 2671 // Spill the value to a stack slot and perform the insertion in | 2749 // Spill the value to a stack slot and perform the insertion in |
| 2672 // memory. | 2750 // memory. |
| 2673 // | 2751 // |
| 2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when | 2752 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when |
| 2675 // support for legalizing to mem is implemented. | 2753 // support for legalizing to mem is implemented. |
| 2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); | 2754 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); |
| 2677 Slot->setWeight(RegWeight::Zero); | 2755 Slot->setWeight(RegWeight::Zero); |
| 2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); | 2756 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); |
| (...skipping 941 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3620 Variable *Dest = Inst->getDest(); | 3698 Variable *Dest = Inst->getDest(); |
| 3621 Operand *SrcT = Inst->getTrueOperand(); | 3699 Operand *SrcT = Inst->getTrueOperand(); |
| 3622 Operand *SrcF = Inst->getFalseOperand(); | 3700 Operand *SrcF = Inst->getFalseOperand(); |
| 3623 Operand *Condition = Inst->getCondition(); | 3701 Operand *Condition = Inst->getCondition(); |
| 3624 | 3702 |
| 3625 if (isVectorType(Dest->getType())) { | 3703 if (isVectorType(Dest->getType())) { |
| 3626 Type SrcTy = SrcT->getType(); | 3704 Type SrcTy = SrcT->getType(); |
| 3627 Variable *T = makeReg(SrcTy); | 3705 Variable *T = makeReg(SrcTy); |
| 3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 3706 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 3707 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 3630 // ALIGNHACK: Until data alignment support is implemented, vector | |
| 3631 // instructions need to have vector operands in registers. Once | |
| 3632 // there is support for data alignment, LEGAL_HACK can be removed. | |
| 3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | |
| 3634 if (InstructionSet >= SSE4_1) { | 3708 if (InstructionSet >= SSE4_1) { |
| 3635 // TODO(wala): If the condition operand is a constant, use blendps | 3709 // TODO(wala): If the condition operand is a constant, use blendps |
| 3636 // or pblendw. | 3710 // or pblendw. |
| 3637 // | 3711 // |
| 3638 // Use blendvps or pblendvb to implement select. | 3712 // Use blendvps or pblendvb to implement select. |
| 3639 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3713 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 3640 SrcTy == IceType_v4f32) { | 3714 SrcTy == IceType_v4f32) { |
| 3641 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3715 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 3716 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); |
| 3643 _movp(xmm0, ConditionRM); | 3717 _movp(xmm0, ConditionRM); |
| 3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); | 3718 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); |
| 3645 _movp(T, SrcFRM); | 3719 _movp(T, SrcFRM); |
| 3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); | 3720 _blendvps(T, SrcTRM, xmm0); |
| 3647 _movp(Dest, T); | 3721 _movp(Dest, T); |
| 3648 } else { | 3722 } else { |
| 3649 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 3723 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| 3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 3724 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
| 3651 : IceType_v16i8; | 3725 : IceType_v16i8; |
| 3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); | 3726 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); |
| 3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 3727 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| 3654 _movp(T, SrcFRM); | 3728 _movp(T, SrcFRM); |
| 3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); | 3729 _pblendvb(T, SrcTRM, xmm0); |
| 3656 _movp(Dest, T); | 3730 _movp(Dest, T); |
| 3657 } | 3731 } |
| 3658 return; | 3732 return; |
| 3659 } | 3733 } |
| 3660 // Lower select without SSE4.1: | 3734 // Lower select without SSE4.1: |
| 3661 // a=d?b:c ==> | 3735 // a=d?b:c ==> |
| 3662 // if elementtype(d) != i1: | 3736 // if elementtype(d) != i1: |
| 3663 // d=sext(d); | 3737 // d=sext(d); |
| 3664 // a=(b&d)|(c&~d); | 3738 // a=(b&d)|(c&~d); |
| 3665 Variable *T2 = makeReg(SrcTy); | 3739 Variable *T2 = makeReg(SrcTy); |
| 3666 // Sign extend the condition operand if applicable. | 3740 // Sign extend the condition operand if applicable. |
| 3667 if (SrcTy == IceType_v4f32) { | 3741 if (SrcTy == IceType_v4f32) { |
| 3668 // The sext operation takes only integer arguments. | 3742 // The sext operation takes only integer arguments. |
| 3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); | 3743 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); |
| 3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); | 3744 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); |
| 3671 _movp(T, T3); | 3745 _movp(T, T3); |
| 3672 } else if (typeElementType(SrcTy) != IceType_i1) { | 3746 } else if (typeElementType(SrcTy) != IceType_i1) { |
| 3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); | 3747 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); |
| 3674 } else { | 3748 } else { |
| 3675 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3749 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 3676 _movp(T, ConditionRM); | 3750 _movp(T, ConditionRM); |
| 3677 } | 3751 } |
| 3678 _movp(T2, T); | 3752 _movp(T2, T); |
| 3679 _pand(T, LEGAL_HACK(SrcTRM)); | 3753 _pand(T, SrcTRM); |
| 3680 _pandn(T2, LEGAL_HACK(SrcFRM)); | 3754 _pandn(T2, SrcFRM); |
| 3681 _por(T, T2); | 3755 _por(T, T2); |
| 3682 _movp(Dest, T); | 3756 _movp(Dest, T); |
| 3683 #undef LEGAL_HACK | |
| 3684 | 3757 |
| 3685 return; | 3758 return; |
| 3686 } | 3759 } |
| 3687 | 3760 |
| 3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: | 3761 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: |
| 3689 Operand *ConditionRMI = legalize(Condition); | 3762 Operand *ConditionRMI = legalize(Condition); |
| 3690 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 3763 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 3691 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3764 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 3692 | 3765 |
| 3693 if (Dest->getType() == IceType_i64) { | 3766 if (Dest->getType() == IceType_i64) { |
| (...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4236 for (SizeT i = 0; i < Size; ++i) { | 4309 for (SizeT i = 0; i < Size; ++i) { |
| 4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4310 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 4238 } | 4311 } |
| 4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4312 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 4240 } | 4313 } |
| 4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4314 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 4242 << "\n"; | 4315 << "\n"; |
| 4243 } | 4316 } |
| 4244 | 4317 |
| 4245 } // end of namespace Ice | 4318 } // end of namespace Ice |
| OLD | NEW |