src/IceTargetLoweringX8632.cpp - Issue 465413003: Subzero: Align spill locations to natural alignment.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Comments, round 1 Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction. It also implements	12 // high-level instruction. It also implements

13 // TargetX8632Fast::postLower() which does the simplest possible	13 // TargetX8632Fast::postLower() which does the simplest possible

14 // register allocation for the "fast" target.	14 // register allocation for the "fast" target.

15 //	15 //

16 //===----------------------------------------------------------------------===//	16 //===----------------------------------------------------------------------===//

17	17

18 #include "IceDefs.h"	18 #include "IceDefs.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceInstX8632.h"	21 #include "IceInstX8632.h"

22 #include "IceOperand.h"	22 #include "IceOperand.h"

23 #include "IceTargetLoweringX8632.def"	23 #include "IceTargetLoweringX8632.def"

24 #include "IceTargetLoweringX8632.h"	24 #include "IceTargetLoweringX8632.h"

25 #include "llvm/Support/CommandLine.h"	25 #include "llvm/Support/CommandLine.h"

26	26

	27 #include <strings.h>
	Jim Stichnoth 2014/08/14 18:21:19 Use MathExtras.h and llvm::findFirstSet(), since J Use MathExtras.h and llvm::findFirstSet(), since JF says it's awesomer.
	28

27 namespace Ice {	29 namespace Ice {

28	30

29 namespace {	31 namespace {

30	32

31 // The following table summarizes the logic for lowering the fcmp	33 // The following table summarizes the logic for lowering the fcmp

32 // instruction. There is one table entry for each of the 16 conditions.	34 // instruction. There is one table entry for each of the 16 conditions.

33 //	35 //

34 // The first four columns describe the case when the operands are	36 // The first four columns describe the case when the operands are

35 // floating point scalar values. A comment in lowerFcmp() describes the	37 // floating point scalar values. A comment in lowerFcmp() describes the

36 // lowering template. In the most general case, there is a compare	38 // lowering template. In the most general case, there is a compare

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
121 }	123 }

122	124

123 // The maximum number of arguments to pass in XMM registers	125 // The maximum number of arguments to pass in XMM registers

124 const uint32_t X86_MAX_XMM_ARGS = 4;	126 const uint32_t X86_MAX_XMM_ARGS = 4;

125 // The number of bits in a byte	127 // The number of bits in a byte

126 const uint32_t X86_CHAR_BIT = 8;	128 const uint32_t X86_CHAR_BIT = 8;

127 // Stack alignment	129 // Stack alignment

128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;	130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

129 // Size of the return address on the stack	131 // Size of the return address on the stack

130 const uint32_t X86_RET_IP_SIZE_BYTES = 4;	132 const uint32_t X86_RET_IP_SIZE_BYTES = 4;

	133 // The base 2 logarithm of the width in bytes of the largest supported type

	134 const uint32_t X86_LOG2_OF_MAX_TYPE_SIZE = 4;

131	135

132 // Value is a size in bytes. Return Value adjusted to the next highest	136 // Value and Alignment are in bytes. Return Value adjusted to the next

133 // multiple of the stack alignment.	137 // highest multiple of Alignment.

	138 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {

	139 // power of 2

	140 assert((Alignment & (Alignment - 1)) == 0);

	141 return (Value + Alignment - 1) & -Alignment;

	142 }

	143

	144 // Value is in bytes. Return Value adjusted to the next highest multiple

	145 // of the stack alignment.

134 uint32_t applyStackAlignment(uint32_t Value) {	146 uint32_t applyStackAlignment(uint32_t Value) {

135 // power of 2	147 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;

138 }	148 }

139	149

140 // Instruction set options	150 // Instruction set options

141 namespace cl = ::llvm::cl;	151 namespace cl = ::llvm::cl;

142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(	152 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(

143 "mattr", cl::desc("X86 target attributes"),	153 "mattr", cl::desc("X86 target attributes"),

144 cl::init(TargetX8632::SSE2),	154 cl::init(TargetX8632::SSE2),

145 cl::values(	155 cl::values(

146 clEnumValN(TargetX8632::SSE2, "sse2",	156 clEnumValN(TargetX8632::SSE2, "sse2",

147 "Enable SSE2 instructions (default)"),	157 "Enable SSE2 instructions (default)"),

(...skipping 365 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);	523 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);

514 RegisterArg->setRegNum(RegNum);	524 RegisterArg->setRegNum(RegNum);

515 RegisterArg->setIsArg(Func);	525 RegisterArg->setIsArg(Func);

516 Arg->setIsArg(Func, false);	526 Arg->setIsArg(Func, false);

517	527

518 Args[I] = RegisterArg;	528 Args[I] = RegisterArg;

519 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	529 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

520 }	530 }

521 }	531 }

522	532

	533 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {

	534 const SizeT NumBuckets = X86_LOG2_OF_MAX_TYPE_SIZE + 1;

	535 VarList Buckets[NumBuckets];

	536

	537 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;

	538 ++I) {

	539 Variable Var = I;

	540 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());

	541 SizeT BucketIndex = ffs(NaturalAlignment) - 1;

	542 assert(BucketIndex < NumBuckets);

	543 Buckets[BucketIndex].push_back(Var);

	544 }

	545

	546 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {

	547 VarList &List = Buckets[NumBuckets - I - 1];

	548 Dest.insert(Dest.end(), List.begin(), List.end());

	549 }

	550 }

	551

523 // Helper function for addProlog().	552 // Helper function for addProlog().

524 //	553 //

525 // This assumes Arg is an argument passed on the stack. This sets the	554 // This assumes Arg is an argument passed on the stack. This sets the

526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's	555 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

527 // width. For an I64 arg that has been split into Lo and Hi components,	556 // width. For an I64 arg that has been split into Lo and Hi components,

528 // it calls itself recursively on the components, taking care to handle	557 // it calls itself recursively on the components, taking care to handle

529 // Lo first because of the little-endian architecture. Lastly, this	558 // Lo first because of the little-endian architecture. Lastly, this

530 // function generates an instruction to copy Arg into its assigned	559 // function generates an instruction to copy Arg into its assigned

531 // register if applicable.	560 // register if applicable.

532 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,	561 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,

(...skipping 23 matching lines...) Expand all Loading...
556 _movp(Arg, Mem);	585 _movp(Arg, Mem);

557 } else {	586 } else {

558 _mov(Arg, Mem);	587 _mov(Arg, Mem);

559 }	588 }

560 }	589 }

561 }	590 }

562	591

563 Type TargetX8632::stackSlotType() { return IceType_i32; }	592 Type TargetX8632::stackSlotType() { return IceType_i32; }

564	593

565 void TargetX8632::addProlog(CfgNode *Node) {	594 void TargetX8632::addProlog(CfgNode *Node) {

	595 // Stack frame layout:

	596 //

	597 // +------------------------+

	598 // \| 1. return address \|

	599 // +------------------------+

	600 // \| 2. preserved registers \|

	601 // +------------------------+

	602 // \| 3. padding \|

	603 // +------------------------+

	604 // \| 4. global spill area \|

	605 // +------------------------+

	606 // \| 5. padding \|

	607 // +------------------------+

	608 // \| 6. local spill area \|

	609 // +------------------------+

	610 // \| 7. padding \|

	611 // +------------------------+

	612 // \| 8. local variables \|

	613 // +------------------------+

	614 //

	615 // The following variables record the size in bytes of the given areas:

	616 // * X86_RET_IP_SIZE_BYTES: area 1

	617 // * PreservedRegsSizeBytes: area 2

	618 // * SpillAreaPaddingBytes: area 3

	619 // * GlobalsSize: area 4

	620 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

	621 // * LocalsSpillAreaSize: area 6

	622 // * LocalsSizeBytes: areas 3 - 7

	623

566 // If SimpleCoalescing is false, each variable without a register	624 // If SimpleCoalescing is false, each variable without a register

567 // gets its own unique stack slot, which leads to large stack	625 // gets its own unique stack slot, which leads to large stack

568 // frames. If SimpleCoalescing is true, then each "global" variable	626 // frames. If SimpleCoalescing is true, then each "global" variable

569 // without a register gets its own slot, but "local" variable slots	627 // without a register gets its own slot, but "local" variable slots

570 // are reused across basic blocks. E.g., if A and B are local to	628 // are reused across basic blocks. E.g., if A and B are local to

571 // block 1 and C is local to block 2, then C may share a slot with A	629 // block 1 and C is local to block 2, then C may share a slot with A

572 // or B.	630 // or B.

573 const bool SimpleCoalescing = true;	631 const bool SimpleCoalescing = true;

574 size_t InArgsSizeBytes = 0;	632 size_t InArgsSizeBytes = 0;

575 size_t PreservedRegsSizeBytes = 0;	633 size_t PreservedRegsSizeBytes = 0;

(...skipping 16 matching lines...) Expand all Loading...
592 getRegisterSet(RegSet_CalleeSave, RegSet_None);	650 getRegisterSet(RegSet_CalleeSave, RegSet_None);

593	651

594 size_t GlobalsSize = 0;	652 size_t GlobalsSize = 0;

595 std::vector<size_t> LocalsSize(Func->getNumNodes());	653 std::vector<size_t> LocalsSize(Func->getNumNodes());

596	654

597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and	655 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

598 // LocalsSizeBytes.	656 // LocalsSizeBytes.

599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());	657 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

600 const VarList &Variables = Func->getVariables();	658 const VarList &Variables = Func->getVariables();

601 const VarList &Args = Func->getArgs();	659 const VarList &Args = Func->getArgs();

	660 VarList SpilledVariables, SortedSpilledVariables,

	661 VariablesLinkedToSpillSplots;

	662

	663 // If there is a separate locals area, this specifies the alignment

	664 // for it.

	665 uint32_t LocalsSlotsAlignmentBytes = 0;

	666 // The entire spill locations area gets aligned to largest natural

	667 // alignment of the variables that have a spill slot.

	668 uint32_t SpillAreaAlignmentBytes = 0;

602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	669 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

603 I != E; ++I) {	670 I != E; ++I) {

604 Variable Var = I;	671 Variable Var = I;

605 if (Var->hasReg()) {	672 if (Var->hasReg()) {

606 RegsUsed[Var->getRegNum()] = true;	673 RegsUsed[Var->getRegNum()] = true;

607 continue;	674 continue;

608 }	675 }

609 // An argument either does not need a stack slot (if passed in a	676 // An argument either does not need a stack slot (if passed in a

610 // register) or already has one (if passed on the stack).	677 // register) or already has one (if passed on the stack).

611 if (Var->getIsArg())	678 if (Var->getIsArg())

612 continue;	679 continue;

613 // An unreferenced variable doesn't need a stack slot.	680 // An unreferenced variable doesn't need a stack slot.

614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())	681 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

615 continue;	682 continue;

616 // A spill slot linked to a variable with a stack slot should reuse	683 // A spill slot linked to a variable with a stack slot should reuse

617 // that stack slot.	684 // that stack slot.

618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {	685 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

619 if (Variable *Linked = Var->getPreferredRegister()) {	686 if (Variable *Linked = Var->getPreferredRegister()) {

620 if (!Linked->hasReg())	687 if (!Linked->hasReg()) {

	688 VariablesLinkedToSpillSplots.push_back(Var);

621 continue;	689 continue;

	690 }

622 }	691 }

623 }	692 }

	693 SpilledVariables.push_back(Var);

	694 }

	695

	696 sortByAlignment(SortedSpilledVariables, SpilledVariables);

	697 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	698 E = SortedSpilledVariables.end();

	699 I != E; ++I) {

	700 Variable Var = I;

624 size_t Increment = typeWidthInBytesOnStack(Var->getType());	701 size_t Increment = typeWidthInBytesOnStack(Var->getType());

	702 if (!SpillAreaAlignmentBytes)

	703 SpillAreaAlignmentBytes = Increment;

625 if (SimpleCoalescing) {	704 if (SimpleCoalescing) {

626 if (Var->isMultiblockLife()) {	705 if (Var->isMultiblockLife()) {

627 GlobalsSize += Increment;	706 GlobalsSize += Increment;

628 } else {	707 } else {

629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	708 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

630 LocalsSize[NodeIndex] += Increment;	709 LocalsSize[NodeIndex] += Increment;

631 if (LocalsSize[NodeIndex] > LocalsSizeBytes)	710 if (LocalsSize[NodeIndex] > LocalsSizeBytes)

632 LocalsSizeBytes = LocalsSize[NodeIndex];	711 LocalsSizeBytes = LocalsSize[NodeIndex];

	712 if (!LocalsSlotsAlignmentBytes)

	713 LocalsSlotsAlignmentBytes = Increment;

633 }	714 }

634 } else {	715 } else {

635 LocalsSizeBytes += Increment;	716 LocalsSizeBytes += Increment;

636 }	717 }

637 }	718 }

	719 uint32_t LocalsSpillAreaSize = LocalsSizeBytes;

	720

638 LocalsSizeBytes += GlobalsSize;	721 LocalsSizeBytes += GlobalsSize;

639	722

640 // Add push instructions for preserved registers.	723 // Add push instructions for preserved registers.

641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	724 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

642 if (CalleeSaves[i] && RegsUsed[i]) {	725 if (CalleeSaves[i] && RegsUsed[i]) {

643 PreservedRegsSizeBytes += 4;	726 PreservedRegsSizeBytes += 4;

644 const bool SuppressStackAdjustment = true;	727 const bool SuppressStackAdjustment = true;

645 _push(getPhysicalRegister(i), SuppressStackAdjustment);	728 _push(getPhysicalRegister(i), SuppressStackAdjustment);

646 }	729 }

647 }	730 }

648	731

649 // Generate "push ebp; mov ebp, esp"	732 // Generate "push ebp; mov ebp, esp"

650 if (IsEbpBasedFrame) {	733 if (IsEbpBasedFrame) {

651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))	734 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

652 .count() == 0);	735 .count() == 0);

653 PreservedRegsSizeBytes += 4;	736 PreservedRegsSizeBytes += 4;

654 Variable *ebp = getPhysicalRegister(Reg_ebp);	737 Variable *ebp = getPhysicalRegister(Reg_ebp);

655 Variable *esp = getPhysicalRegister(Reg_esp);	738 Variable *esp = getPhysicalRegister(Reg_esp);

656 const bool SuppressStackAdjustment = true;	739 const bool SuppressStackAdjustment = true;

657 _push(ebp, SuppressStackAdjustment);	740 _push(ebp, SuppressStackAdjustment);

658 _mov(ebp, esp);	741 _mov(ebp, esp);

659 }	742 }

660	743

	744 // Align the variables area. SpillAreaPaddingBytes is the size of

	745 // the region after the preserved registers and before the spill

	746 // areas.

	747 uint32_t SpillAreaPaddingBytes = 0;

	748 if (SpillAreaAlignmentBytes) {

	749 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);

	750 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	751 uint32_t SpillAreaStart =

	752 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);

	753 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;

	754 LocalsSizeBytes += SpillAreaPaddingBytes;

	755 }

	756

	757 // If there are separate globals and locals areas, make sure the

	758 // locals area is aligned by padding the end of the globals area.

	759 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;

	760 if (LocalsSlotsAlignmentBytes) {

	761 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	762 GlobalsAndSubsequentPaddingSize =

	763 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);

	764 LocalsSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;

	765 }

	766

	767 // Align esp if necessary.

661 if (NeedsStackAlignment) {	768 if (NeedsStackAlignment) {

662 uint32_t StackSize = applyStackAlignment(	769 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);	770 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes);

664 LocalsSizeBytes =	771 LocalsSizeBytes = StackSize - StackOffset;

665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;

666 }	772 }

667	773

668 // Generate "sub esp, LocalsSizeBytes"	774 // Generate "sub esp, LocalsSizeBytes"

669 if (LocalsSizeBytes)	775 if (LocalsSizeBytes)

670 _sub(getPhysicalRegister(Reg_esp),	776 _sub(getPhysicalRegister(Reg_esp),

671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));	777 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

672	778

673 resetStackAdjustment();	779 resetStackAdjustment();

674	780

675 // Fill in stack offsets for stack args, and copy args into registers	781 // Fill in stack offsets for stack args, and copy args into registers

676 // for those that were register-allocated. Args are pushed right to	782 // for those that were register-allocated. Args are pushed right to

677 // left, so Arg[0] is closest to the stack/frame pointer.	783 // left, so Arg[0] is closest to the stack/frame pointer.

678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	784 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	785 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

680 if (!IsEbpBasedFrame)	786 if (!IsEbpBasedFrame)

681 BasicFrameOffset += LocalsSizeBytes;	787 BasicFrameOffset += LocalsSizeBytes;

682	788

683 unsigned NumXmmArgs = 0;	789 unsigned NumXmmArgs = 0;

684 for (SizeT i = 0; i < Args.size(); ++i) {	790 for (SizeT i = 0; i < Args.size(); ++i) {

685 Variable *Arg = Args[i];	791 Variable *Arg = Args[i];

686 // Skip arguments passed in registers.	792 // Skip arguments passed in registers.

687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {	793 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {

688 ++NumXmmArgs;	794 ++NumXmmArgs;

689 continue;	795 continue;

690 }	796 }

691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);	797 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

692 }	798 }

693	799

694 // Fill in stack offsets for locals.	800 // Fill in stack offsets for locals.

695 size_t TotalGlobalsSize = GlobalsSize;	801 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;

696 GlobalsSize = 0;

697 LocalsSize.assign(LocalsSize.size(), 0);	802 LocalsSize.assign(LocalsSize.size(), 0);

698 size_t NextStackOffset = 0;	803 size_t NextStackOffset = GlobalsSpaceUsed;

699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	804 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	805 E = SortedSpilledVariables.end();

700 I != E; ++I) {	806 I != E; ++I) {

701 Variable Var = I;	807 Variable Var = I;

702 if (Var->hasReg()) {

703 RegsUsed[Var->getRegNum()] = true;

704 continue;

705 }

706 if (Var->getIsArg())

707 continue;

708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

709 continue;

710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

711 if (Variable *Linked = Var->getPreferredRegister()) {

712 if (!Linked->hasReg()) {

713 // TODO: Make sure Linked has already been assigned a stack

714 // slot.

715 Var->setStackOffset(Linked->getStackOffset());

716 continue;

717 }

718 }

719 }

720 size_t Increment = typeWidthInBytesOnStack(Var->getType());	808 size_t Increment = typeWidthInBytesOnStack(Var->getType());

721 if (SimpleCoalescing) {	809 if (SimpleCoalescing) {

722 if (Var->isMultiblockLife()) {	810 if (Var->isMultiblockLife()) {

723 GlobalsSize += Increment;	811 GlobalsSpaceUsed += Increment;

724 NextStackOffset = GlobalsSize;	812 NextStackOffset = GlobalsSpaceUsed;

725 } else {	813 } else {

726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	814 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

727 LocalsSize[NodeIndex] += Increment;	815 LocalsSize[NodeIndex] += Increment;

728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];	816 NextStackOffset = SpillAreaPaddingBytes +

	817 GlobalsAndSubsequentPaddingSize +

	818 LocalsSize[NodeIndex];

729 }	819 }

730 } else {	820 } else {

731 NextStackOffset += Increment;	821 NextStackOffset += Increment;

732 }	822 }

733 if (IsEbpBasedFrame)	823 if (IsEbpBasedFrame)

734 Var->setStackOffset(-NextStackOffset);	824 Var->setStackOffset(-NextStackOffset);

735 else	825 else

736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);	826 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);

737 }	827 }

738 this->FrameSizeLocals = NextStackOffset;	828 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;

739 this->HasComputedFrame = true;	829 this->HasComputedFrame = true;

740	830

	831 // Assign stack offsets to variables that have been linked to spilled

	832 // variables.

	833 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),

	834 E = VariablesLinkedToSpillSplots.end();

	835 I != E; ++I) {

	836 Variable Var = I;

	837 Variable *Linked = Var->getPreferredRegister();

	838 Var->setStackOffset(Linked->getStackOffset());

	839 }

	840

741 if (Func->getContext()->isVerbose(IceV_Frame)) {	841 if (Func->getContext()->isVerbose(IceV_Frame)) {

742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes	842 Ostream &Str = Func->getContext()->getStrDump();

743 << "\n"	843

744 << "InArgsSizeBytes=" << InArgsSizeBytes	844 Str << "Stack layout:\n";

745 << "\n"	845 uint32_t EspAdjustmentPaddingSize =

746 << "PreservedRegsSizeBytes="	846 LocalsSizeBytes - GlobalsAndSubsequentPaddingSize -

747 << PreservedRegsSizeBytes << "\n";	847 LocalsSpillAreaSize - PreservedRegsSizeBytes - X86_RET_IP_SIZE_BYTES;

	848 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

	849 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"

	850 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

	851 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

	852 << " globals spill area = " << GlobalsSize << " bytes\n"

	853 << " globals-locals spill areas intermediate padding = "

	854 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

	855 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

	856 << " esp alignment padding = " << EspAdjustmentPaddingSize

	857 << " bytes\n";

	858

	859 Str << "Stack details:\n"

	860 << " esp adjustment = " << LocalsSizeBytes << " bytes\n"

	861 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	862 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

	863 << " bytes\n"

	864 << " is ebp based = " << IsEbpBasedFrame << "\n";

748 }	865 }

749 }	866 }

750	867

751 void TargetX8632::addEpilog(CfgNode *Node) {	868 void TargetX8632::addEpilog(CfgNode *Node) {

752 InstList &Insts = Node->getInsts();	869 InstList &Insts = Node->getInsts();

753 InstList::reverse_iterator RI, E;	870 InstList::reverse_iterator RI, E;

754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {	871 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

755 if (llvm::isa<InstX8632Ret>(*RI))	872 if (llvm::isa<InstX8632Ret>(*RI))

756 break;	873 break;

757 }	874 }

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1101 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1102 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

986	1103

987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1104 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1105 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));	1106 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));

990 }	1107 }

991 if (ConstantInteger *ConstantTotalSize =	1108 if (ConstantInteger *ConstantTotalSize =

992 llvm::dyn_cast<ConstantInteger>(TotalSize)) {	1109 llvm::dyn_cast<ConstantInteger>(TotalSize)) {

993 uint32_t Value = ConstantTotalSize->getValue();	1110 uint32_t Value = ConstantTotalSize->getValue();

994 // Round Value up to the next highest multiple of the alignment.	1111 Value = applyAlignment(Value, Alignment);

995 Value = (Value + Alignment - 1) & -Alignment;

996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));	1112 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));

997 } else {	1113 } else {

998 // Non-constant sizes need to be adjusted to the next highest	1114 // Non-constant sizes need to be adjusted to the next highest

999 // multiple of the required alignment at runtime.	1115 // multiple of the required alignment at runtime.

1000 Variable *T = makeReg(IceType_i32);	1116 Variable *T = makeReg(IceType_i32);

1001 _mov(T, TotalSize);	1117 _mov(T, TotalSize);

1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));	1118 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));

1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));	1119 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));

1004 _sub(esp, T);	1120 _sub(esp, T);

1005 }	1121 }

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1232 case InstArithmetic::Fsub:	1348 case InstArithmetic::Fsub:

1233 case InstArithmetic::Fmul:	1349 case InstArithmetic::Fmul:

1234 case InstArithmetic::Fdiv:	1350 case InstArithmetic::Fdiv:

1235 case InstArithmetic::Frem:	1351 case InstArithmetic::Frem:

1236 llvm_unreachable("FP instruction with i64 type");	1352 llvm_unreachable("FP instruction with i64 type");

1237 break;	1353 break;

1238 }	1354 }

1239 } else if (isVectorType(Dest->getType())) {	1355 } else if (isVectorType(Dest->getType())) {

1240 // TODO: Trap on integer divide and integer modulo by zero.	1356 // TODO: Trap on integer divide and integer modulo by zero.

1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1357 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1242 //

1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

1244 // registers. This is a workaround of the fact that there is no

1245 // support for aligning stack operands. Once there is support,

1246 // remove LEGAL_HACK.

1247 #define LEGAL_HACK(s) legalizeToVar((s))

1248 switch (Inst->getOp()) {	1358 switch (Inst->getOp()) {

1249 case InstArithmetic::_num:	1359 case InstArithmetic::_num:

1250 llvm_unreachable("Unknown arithmetic operator");	1360 llvm_unreachable("Unknown arithmetic operator");

1251 break;	1361 break;

1252 case InstArithmetic::Add: {	1362 case InstArithmetic::Add: {

1253 Variable *T = makeReg(Dest->getType());	1363 Variable *T = makeReg(Dest->getType());

1254 _movp(T, Src0);	1364 _movp(T, Src0);

1255 _padd(T, LEGAL_HACK(Src1));	1365 _padd(T, Src1);

1256 _movp(Dest, T);	1366 _movp(Dest, T);

1257 } break;	1367 } break;

1258 case InstArithmetic::And: {	1368 case InstArithmetic::And: {

1259 Variable *T = makeReg(Dest->getType());	1369 Variable *T = makeReg(Dest->getType());

1260 _movp(T, Src0);	1370 _movp(T, Src0);

1261 _pand(T, LEGAL_HACK(Src1));	1371 _pand(T, Src1);

1262 _movp(Dest, T);	1372 _movp(Dest, T);

1263 } break;	1373 } break;

1264 case InstArithmetic::Or: {	1374 case InstArithmetic::Or: {

1265 Variable *T = makeReg(Dest->getType());	1375 Variable *T = makeReg(Dest->getType());

1266 _movp(T, Src0);	1376 _movp(T, Src0);

1267 _por(T, LEGAL_HACK(Src1));	1377 _por(T, Src1);

1268 _movp(Dest, T);	1378 _movp(Dest, T);

1269 } break;	1379 } break;

1270 case InstArithmetic::Xor: {	1380 case InstArithmetic::Xor: {

1271 Variable *T = makeReg(Dest->getType());	1381 Variable *T = makeReg(Dest->getType());

1272 _movp(T, Src0);	1382 _movp(T, Src0);

1273 _pxor(T, LEGAL_HACK(Src1));	1383 _pxor(T, Src1);

1274 _movp(Dest, T);	1384 _movp(Dest, T);

1275 } break;	1385 } break;

1276 case InstArithmetic::Sub: {	1386 case InstArithmetic::Sub: {

1277 Variable *T = makeReg(Dest->getType());	1387 Variable *T = makeReg(Dest->getType());

1278 _movp(T, Src0);	1388 _movp(T, Src0);

1279 _psub(T, LEGAL_HACK(Src1));	1389 _psub(T, Src1);

1280 _movp(Dest, T);	1390 _movp(Dest, T);

1281 } break;	1391 } break;

1282 case InstArithmetic::Mul: {	1392 case InstArithmetic::Mul: {

1283 bool TypesAreValidForPmull =	1393 bool TypesAreValidForPmull =

1284 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;	1394 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1285 bool InstructionSetIsValidForPmull =	1395 bool InstructionSetIsValidForPmull =

1286 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;	1396 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;

1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1397 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1288 Variable *T = makeReg(Dest->getType());	1398 Variable *T = makeReg(Dest->getType());

1289 _movp(T, Src0);	1399 _movp(T, Src0);

1290 _pmull(T, LEGAL_HACK(Src1));	1400 _pmull(T, Src1);

1291 _movp(Dest, T);	1401 _movp(Dest, T);

1292 } else if (Dest->getType() == IceType_v4i32) {	1402 } else if (Dest->getType() == IceType_v4i32) {

1293 // Lowering sequence:	1403 // Lowering sequence:

1294 // Note: The mask arguments have index 0 on the left.	1404 // Note: The mask arguments have index 0 on the left.

1295 //	1405 //

1296 // movups T1, Src0	1406 // movups T1, Src0

1297 // pshufd T2, Src0, {1,0,3,0}	1407 // pshufd T2, Src0, {1,0,3,0}

1298 // pshufd T3, Src1, {1,0,3,0}	1408 // pshufd T3, Src1, {1,0,3,0}

1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}	1409 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1300 // pmuludq T1, Src1	1410 // pmuludq T1, Src1

(...skipping 12 matching lines...) Expand all Loading...
1313 // Dest[0, 2], Src[0, 2]	1423 // Dest[0, 2], Src[0, 2]

1314 const unsigned Mask0202 = 0x88;	1424 const unsigned Mask0202 = 0x88;

1315 // Mask that directs pshufd to create a vector with entries	1425 // Mask that directs pshufd to create a vector with entries

1316 // Src[0, 2, 1, 3]	1426 // Src[0, 2, 1, 3]

1317 const unsigned Mask0213 = 0xd8;	1427 const unsigned Mask0213 = 0xd8;

1318 Variable *T1 = makeReg(IceType_v4i32);	1428 Variable *T1 = makeReg(IceType_v4i32);

1319 Variable *T2 = makeReg(IceType_v4i32);	1429 Variable *T2 = makeReg(IceType_v4i32);

1320 Variable *T3 = makeReg(IceType_v4i32);	1430 Variable *T3 = makeReg(IceType_v4i32);

1321 Variable *T4 = makeReg(IceType_v4i32);	1431 Variable *T4 = makeReg(IceType_v4i32);

1322 _movp(T1, Src0);	1432 _movp(T1, Src0);

1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R	1433 _pshufd(T2, Src0, Mask1030);

1324 // with Src1 after stack operand alignment support is	1434 _pshufd(T3, Src1, Mask1030);

1325 // implemented.	1435 _pmuludq(T1, Src1);

1326 Variable *Src0R = LEGAL_HACK(Src0);

1327 Variable *Src1R = LEGAL_HACK(Src1);

1328 _pshufd(T2, Src0R, Mask1030);

1329 _pshufd(T3, Src1R, Mask1030);

1330 _pmuludq(T1, Src1R);

1331 _pmuludq(T2, T3);	1436 _pmuludq(T2, T3);

1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));	1437 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));	1438 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

1334 _movp(Dest, T4);	1439 _movp(Dest, T4);

1335 } else {	1440 } else {

1336 assert(Dest->getType() == IceType_v16i8);	1441 assert(Dest->getType() == IceType_v16i8);

1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1442 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1338 }	1443 }

1339 } break;	1444 } break;

1340 case InstArithmetic::Shl:	1445 case InstArithmetic::Shl:

1341 case InstArithmetic::Lshr:	1446 case InstArithmetic::Lshr:

1342 case InstArithmetic::Ashr:	1447 case InstArithmetic::Ashr:

1343 case InstArithmetic::Udiv:	1448 case InstArithmetic::Udiv:

1344 case InstArithmetic::Urem:	1449 case InstArithmetic::Urem:

1345 case InstArithmetic::Sdiv:	1450 case InstArithmetic::Sdiv:

1346 case InstArithmetic::Srem:	1451 case InstArithmetic::Srem:

1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1452 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1348 break;	1453 break;

1349 case InstArithmetic::Fadd: {	1454 case InstArithmetic::Fadd: {

1350 Variable *T = makeReg(Dest->getType());	1455 Variable *T = makeReg(Dest->getType());

1351 _movp(T, Src0);	1456 _movp(T, Src0);

1352 _addps(T, LEGAL_HACK(Src1));	1457 _addps(T, Src1);

1353 _movp(Dest, T);	1458 _movp(Dest, T);

1354 } break;	1459 } break;

1355 case InstArithmetic::Fsub: {	1460 case InstArithmetic::Fsub: {

1356 Variable *T = makeReg(Dest->getType());	1461 Variable *T = makeReg(Dest->getType());

1357 _movp(T, Src0);	1462 _movp(T, Src0);

1358 _subps(T, LEGAL_HACK(Src1));	1463 _subps(T, Src1);

1359 _movp(Dest, T);	1464 _movp(Dest, T);

1360 } break;	1465 } break;

1361 case InstArithmetic::Fmul: {	1466 case InstArithmetic::Fmul: {

1362 Variable *T = makeReg(Dest->getType());	1467 Variable *T = makeReg(Dest->getType());

1363 _movp(T, Src0);	1468 _movp(T, Src0);

1364 _mulps(T, LEGAL_HACK(Src1));	1469 _mulps(T, Src1);

1365 _movp(Dest, T);	1470 _movp(Dest, T);

1366 } break;	1471 } break;

1367 case InstArithmetic::Fdiv: {	1472 case InstArithmetic::Fdiv: {

1368 Variable *T = makeReg(Dest->getType());	1473 Variable *T = makeReg(Dest->getType());

1369 _movp(T, Src0);	1474 _movp(T, Src0);

1370 _divps(T, LEGAL_HACK(Src1));	1475 _divps(T, Src1);

1371 _movp(Dest, T);	1476 _movp(Dest, T);

1372 } break;	1477 } break;

1373 case InstArithmetic::Frem:	1478 case InstArithmetic::Frem:

1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1479 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1375 break;	1480 break;

1376 }	1481 }

1377 #undef LEGAL_HACK

1378 } else { // Dest->getType() is non-i64 scalar	1482 } else { // Dest->getType() is non-i64 scalar

1379 Variable *T_edx = NULL;	1483 Variable *T_edx = NULL;

1380 Variable *T = NULL;	1484 Variable *T = NULL;

1381 switch (Inst->getOp()) {	1485 switch (Inst->getOp()) {

1382 case InstArithmetic::_num:	1486 case InstArithmetic::_num:

1383 llvm_unreachable("Unknown arithmetic operator");	1487 llvm_unreachable("Unknown arithmetic operator");

1384 break;	1488 break;

1385 case InstArithmetic::Add:	1489 case InstArithmetic::Add:

1386 _mov(T, Src0);	1490 _mov(T, Src0);

1387 _add(T, Src1);	1491 _add(T, Src1);

(...skipping 804 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2192 // TODO(wala): Determine the best lowering sequences for each type.	2296 // TODO(wala): Determine the best lowering sequences for each type.

2193 bool CanUsePextr =	2297 bool CanUsePextr =

2194 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2298 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2195 if (CanUsePextr && Ty != IceType_v4f32) {	2299 if (CanUsePextr && Ty != IceType_v4f32) {

2196 // Use pextrb, pextrw, or pextrd.	2300 // Use pextrb, pextrw, or pextrd.

2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2301 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2302 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2199 _pextr(ExtractedElementR, SourceVectR, Mask);	2303 _pextr(ExtractedElementR, SourceVectR, Mask);

2200 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2304 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2201 // Use pshufd and movd/movss.	2305 // Use pshufd and movd/movss.

2202 //

2203 // ALIGNHACK: Force vector operands to registers in instructions

2204 // that require aligned memory operands until support for data

2205 // alignment is implemented.

2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2207 Operand *SourceVectRM =

2208 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2209 Variable *T = NULL;	2306 Variable *T = NULL;

2210 if (Index) {	2307 if (Index) {

2211 // The shuffle only needs to occur if the element to be extracted	2308 // The shuffle only needs to occur if the element to be extracted

2212 // is not at the lowest index.	2309 // is not at the lowest index.

2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2310 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2214 T = makeReg(Ty);	2311 T = makeReg(Ty);

2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);	2312 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2216 } else {	2313 } else {

2217 T = ALIGN_HACK(SourceVectRM);	2314 T = legalizeToVar(SourceVectNotLegalized);

2218 }	2315 }

2219	2316

2220 if (InVectorElementTy == IceType_i32) {	2317 if (InVectorElementTy == IceType_i32) {

2221 _movd(ExtractedElementR, T);	2318 _movd(ExtractedElementR, T);

2222 } else { // Ty == Icetype_f32	2319 } else { // Ty == Icetype_f32

2223 // TODO(wala): _movss is only used here because _mov does not	2320 // TODO(wala): _movss is only used here because _mov does not

2224 // allow a vector source and a scalar destination. _mov should be	2321 // allow a vector source and a scalar destination. _mov should be

2225 // able to be used here.	2322 // able to be used here.

2226 // _movss is a binary instruction, so the FakeDef is needed to	2323 // _movss is a binary instruction, so the FakeDef is needed to

2227 // keep the live range analysis consistent.	2324 // keep the live range analysis consistent.

2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR));	2325 Context.insert(InstFakeDef::create(Func, ExtractedElementR));

2229 _movss(ExtractedElementR, T);	2326 _movss(ExtractedElementR, T);

2230 }	2327 }

2231 #undef ALIGN_HACK

2232 } else {	2328 } else {

2233 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2329 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2234 // Spill the value to a stack slot and do the extraction in memory.	2330 // Spill the value to a stack slot and do the extraction in memory.

2235 //	2331 //

2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2332 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2237 // support for legalizing to mem is implemented.	2333 // support for legalizing to mem is implemented.

2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2334 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2239 Slot->setWeight(RegWeight::Zero);	2335 Slot->setWeight(RegWeight::Zero);

2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2336 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

2241	2337

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2280	2376

2281 if (Condition == InstFcmp::True) {	2377 if (Condition == InstFcmp::True) {

2282 // makeVectorOfOnes() requires an integer vector type.	2378 // makeVectorOfOnes() requires an integer vector type.

2283 T = makeVectorOfMinusOnes(IceType_v4i32);	2379 T = makeVectorOfMinusOnes(IceType_v4i32);

2284 } else if (Condition == InstFcmp::False) {	2380 } else if (Condition == InstFcmp::False) {

2285 T = makeVectorOfZeros(Dest->getType());	2381 T = makeVectorOfZeros(Dest->getType());

2286 } else {	2382 } else {

2287 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2383 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2288 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2384 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2289	2385

2290 // ALIGNHACK: Without support for data alignment, both operands to

2291 // cmpps need to be forced into registers. Once support for data

2292 // alignment is implemented, remove LEGAL_HACK.

2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2294 switch (Condition) {	2386 switch (Condition) {

2295 default: {	2387 default: {

2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;	2388 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;

2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);	2389 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);

2298 T = makeReg(Src0RM->getType());	2390 T = makeReg(Src0RM->getType());

2299 _movp(T, Src0RM);	2391 _movp(T, Src0RM);

2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate);	2392 _cmpps(T, Src1RM, Predicate);

2301 } break;	2393 } break;

2302 case InstFcmp::One: {	2394 case InstFcmp::One: {

2303 // Check both unequal and ordered.	2395 // Check both unequal and ordered.

2304 T = makeReg(Src0RM->getType());	2396 T = makeReg(Src0RM->getType());

2305 Variable *T2 = makeReg(Src0RM->getType());	2397 Variable *T2 = makeReg(Src0RM->getType());

2306 Src1RM = LEGAL_HACK(Src1RM);

2307 _movp(T, Src0RM);	2398 _movp(T, Src0RM);

2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);	2399 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);

2309 _movp(T2, Src0RM);	2400 _movp(T2, Src0RM);

2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);	2401 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);

2311 _pand(T, T2);	2402 _pand(T, T2);

2312 } break;	2403 } break;

2313 case InstFcmp::Ueq: {	2404 case InstFcmp::Ueq: {

2314 // Check both equal or unordered.	2405 // Check both equal or unordered.

2315 T = makeReg(Src0RM->getType());	2406 T = makeReg(Src0RM->getType());

2316 Variable *T2 = makeReg(Src0RM->getType());	2407 Variable *T2 = makeReg(Src0RM->getType());

2317 Src1RM = LEGAL_HACK(Src1RM);

2318 _movp(T, Src0RM);	2408 _movp(T, Src0RM);

2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);	2409 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);

2320 _movp(T2, Src0RM);	2410 _movp(T2, Src0RM);

2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);	2411 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);

2322 _por(T, T2);	2412 _por(T, T2);

2323 } break;	2413 } break;

2324 }	2414 }

2325 #undef LEGAL_HACK

2326 }	2415 }

2327	2416

2328 _movp(Dest, T);	2417 _movp(Dest, T);

2329 eliminateNextVectorSextInstruction(Dest);	2418 eliminateNextVectorSextInstruction(Dest);

2330 return;	2419 return;

2331 }	2420 }

2332	2421

2333 // Lowering a = fcmp cond, b, c	2422 // Lowering a = fcmp cond, b, c

2334 // ucomiss b, c /* only if C1 != Br_None */	2423 // ucomiss b, c /* only if C1 != Br_None */

2335 // /* but swap b,c order if SwapOperands==true */	2424 // /* but swap b,c order if SwapOperands==true */

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2420 Variable *T1 = makeReg(Ty);	2509 Variable *T1 = makeReg(Ty);

2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);	2510 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

2422 _movp(T0, Src0RM);	2511 _movp(T0, Src0RM);

2423 _pxor(T0, HighOrderBits);	2512 _pxor(T0, HighOrderBits);

2424 _movp(T1, Src1RM);	2513 _movp(T1, Src1RM);

2425 _pxor(T1, HighOrderBits);	2514 _pxor(T1, HighOrderBits);

2426 Src0RM = T0;	2515 Src0RM = T0;

2427 Src1RM = T1;	2516 Src1RM = T1;

2428 }	2517 }

2429	2518

2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be

2431 // in registers until data alignment support is implemented. Once

2432 // there is support for data alignment, LEGAL_HACK can be removed.

2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2434 Variable *T = makeReg(Ty);	2519 Variable *T = makeReg(Ty);

2435 switch (Condition) {	2520 switch (Condition) {

2436 default:	2521 default:

2437 llvm_unreachable("unexpected condition");	2522 llvm_unreachable("unexpected condition");

2438 break;	2523 break;

2439 case InstIcmp::Eq: {	2524 case InstIcmp::Eq: {

2440 _movp(T, Src0RM);	2525 _movp(T, Src0RM);

2441 _pcmpeq(T, LEGAL_HACK(Src1RM));	2526 _pcmpeq(T, Src1RM);

2442 } break;	2527 } break;

2443 case InstIcmp::Ne: {	2528 case InstIcmp::Ne: {

2444 _movp(T, Src0RM);	2529 _movp(T, Src0RM);

2445 _pcmpeq(T, LEGAL_HACK(Src1RM));	2530 _pcmpeq(T, Src1RM);

2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2531 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2447 _pxor(T, MinusOne);	2532 _pxor(T, MinusOne);

2448 } break;	2533 } break;

2449 case InstIcmp::Ugt:	2534 case InstIcmp::Ugt:

2450 case InstIcmp::Sgt: {	2535 case InstIcmp::Sgt: {

2451 _movp(T, Src0RM);	2536 _movp(T, Src0RM);

2452 _pcmpgt(T, LEGAL_HACK(Src1RM));	2537 _pcmpgt(T, Src1RM);

2453 } break;	2538 } break;

2454 case InstIcmp::Uge:	2539 case InstIcmp::Uge:

2455 case InstIcmp::Sge: {	2540 case InstIcmp::Sge: {

2456 // !(Src1RM > Src0RM)	2541 // !(Src1RM > Src0RM)

2457 _movp(T, Src1RM);	2542 _movp(T, Src1RM);

2458 _pcmpgt(T, LEGAL_HACK(Src0RM));	2543 _pcmpgt(T, Src0RM);

2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2544 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2460 _pxor(T, MinusOne);	2545 _pxor(T, MinusOne);

2461 } break;	2546 } break;

2462 case InstIcmp::Ult:	2547 case InstIcmp::Ult:

2463 case InstIcmp::Slt: {	2548 case InstIcmp::Slt: {

2464 _movp(T, Src1RM);	2549 _movp(T, Src1RM);

2465 _pcmpgt(T, LEGAL_HACK(Src0RM));	2550 _pcmpgt(T, Src0RM);

2466 } break;	2551 } break;

2467 case InstIcmp::Ule:	2552 case InstIcmp::Ule:

2468 case InstIcmp::Sle: {	2553 case InstIcmp::Sle: {

2469 // !(Src0RM > Src1RM)	2554 // !(Src0RM > Src1RM)

2470 _movp(T, Src0RM);	2555 _movp(T, Src0RM);

2471 _pcmpgt(T, LEGAL_HACK(Src1RM));	2556 _pcmpgt(T, Src1RM);

2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2557 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2473 _pxor(T, MinusOne);	2558 _pxor(T, MinusOne);

2474 } break;	2559 } break;

2475 }	2560 }

2476 #undef LEGAL_HACK

2477	2561

2478 _movp(Dest, T);	2562 _movp(Dest, T);

2479 eliminateNextVectorSextInstruction(Dest);	2563 eliminateNextVectorSextInstruction(Dest);

2480 return;	2564 return;

2481 }	2565 }

2482	2566

2483 // If Src1 is an immediate, or known to be a physical register, we can	2567 // If Src1 is an immediate, or known to be a physical register, we can

2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into	2568 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for	2569 // a physical register. (Actually, either Src0 or Src1 can be chosen for

2486 // the physical register, but unfortunately we have to commit to one or	2570 // the physical register, but unfortunately we have to commit to one or

(...skipping 155 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2642 // insertelement into index 3 (result is stored in T):	2726 // insertelement into index 3 (result is stored in T):

2643 // T := SourceVectRM	2727 // T := SourceVectRM

2644 // ElementR := ElementR[0, 0] T[0, 2]	2728 // ElementR := ElementR[0, 0] T[0, 2]

2645 // T := T[0, 1] ElementR[3, 0]	2729 // T := T[0, 1] ElementR[3, 0]

2646 const unsigned char Mask1[3] = {0, 192, 128};	2730 const unsigned char Mask1[3] = {0, 192, 128};

2647 const unsigned char Mask2[3] = {227, 196, 52};	2731 const unsigned char Mask2[3] = {227, 196, 52};

2648	2732

2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);	2733 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);

2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);	2734 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);

2651	2735

2652 // ALIGNHACK: Force vector operands to registers in instructions

2653 // that require aligned memory operands until support for data

2654 // alignment is implemented.

2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2656 if (Index == 1) {	2736 if (Index == 1) {

2657 SourceVectRM = ALIGN_HACK(SourceVectRM);

2658 _shufps(ElementR, SourceVectRM, Mask1Constant);	2737 _shufps(ElementR, SourceVectRM, Mask1Constant);

2659 _shufps(ElementR, SourceVectRM, Mask2Constant);	2738 _shufps(ElementR, SourceVectRM, Mask2Constant);

2660 _movp(Inst->getDest(), ElementR);	2739 _movp(Inst->getDest(), ElementR);

2661 } else {	2740 } else {

2662 Variable *T = makeReg(Ty);	2741 Variable *T = makeReg(Ty);

2663 _movp(T, SourceVectRM);	2742 _movp(T, SourceVectRM);

2664 _shufps(ElementR, T, Mask1Constant);	2743 _shufps(ElementR, T, Mask1Constant);

2665 _shufps(T, ElementR, Mask2Constant);	2744 _shufps(T, ElementR, Mask2Constant);

2666 _movp(Inst->getDest(), T);	2745 _movp(Inst->getDest(), T);

2667 }	2746 }

2668 #undef ALIGN_HACK

2669 } else {	2747 } else {

2670 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2748 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2671 // Spill the value to a stack slot and perform the insertion in	2749 // Spill the value to a stack slot and perform the insertion in

2672 // memory.	2750 // memory.

2673 //	2751 //

2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2752 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2675 // support for legalizing to mem is implemented.	2753 // support for legalizing to mem is implemented.

2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2754 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2677 Slot->setWeight(RegWeight::Zero);	2755 Slot->setWeight(RegWeight::Zero);

2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2756 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

(...skipping 941 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3620 Variable *Dest = Inst->getDest();	3698 Variable *Dest = Inst->getDest();

3621 Operand *SrcT = Inst->getTrueOperand();	3699 Operand *SrcT = Inst->getTrueOperand();

3622 Operand *SrcF = Inst->getFalseOperand();	3700 Operand *SrcF = Inst->getFalseOperand();

3623 Operand *Condition = Inst->getCondition();	3701 Operand *Condition = Inst->getCondition();

3624	3702

3625 if (isVectorType(Dest->getType())) {	3703 if (isVectorType(Dest->getType())) {

3626 Type SrcTy = SrcT->getType();	3704 Type SrcTy = SrcT->getType();

3627 Variable *T = makeReg(SrcTy);	3705 Variable *T = makeReg(SrcTy);

3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	3706 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	3707 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

3630 // ALIGNHACK: Until data alignment support is implemented, vector

3631 // instructions need to have vector operands in registers. Once

3632 // there is support for data alignment, LEGAL_HACK can be removed.

3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

3634 if (InstructionSet >= SSE4_1) {	3708 if (InstructionSet >= SSE4_1) {

3635 // TODO(wala): If the condition operand is a constant, use blendps	3709 // TODO(wala): If the condition operand is a constant, use blendps

3636 // or pblendw.	3710 // or pblendw.

3637 //	3711 //

3638 // Use blendvps or pblendvb to implement select.	3712 // Use blendvps or pblendvb to implement select.

3639 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3713 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3640 SrcTy == IceType_v4f32) {	3714 SrcTy == IceType_v4f32) {

3641 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3715 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);	3716 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);

3643 _movp(xmm0, ConditionRM);	3717 _movp(xmm0, ConditionRM);

3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));	3718 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));

3645 _movp(T, SrcFRM);	3719 _movp(T, SrcFRM);

3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0);	3720 _blendvps(T, SrcTRM, xmm0);

3647 _movp(Dest, T);	3721 _movp(Dest, T);

3648 } else {	3722 } else {

3649 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3723 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3724 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3651 : IceType_v16i8;	3725 : IceType_v16i8;

3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);	3726 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);

3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3727 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3654 _movp(T, SrcFRM);	3728 _movp(T, SrcFRM);

3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0);	3729 _pblendvb(T, SrcTRM, xmm0);

3656 _movp(Dest, T);	3730 _movp(Dest, T);

3657 }	3731 }

3658 return;	3732 return;

3659 }	3733 }

3660 // Lower select without SSE4.1:	3734 // Lower select without SSE4.1:

3661 // a=d?b:c ==>	3735 // a=d?b:c ==>

3662 // if elementtype(d) != i1:	3736 // if elementtype(d) != i1:

3663 // d=sext(d);	3737 // d=sext(d);

3664 // a=(b&d)\|(c&~d);	3738 // a=(b&d)\|(c&~d);

3665 Variable *T2 = makeReg(SrcTy);	3739 Variable *T2 = makeReg(SrcTy);

3666 // Sign extend the condition operand if applicable.	3740 // Sign extend the condition operand if applicable.

3667 if (SrcTy == IceType_v4f32) {	3741 if (SrcTy == IceType_v4f32) {

3668 // The sext operation takes only integer arguments.	3742 // The sext operation takes only integer arguments.

3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());	3743 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());

3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));	3744 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));

3671 _movp(T, T3);	3745 _movp(T, T3);

3672 } else if (typeElementType(SrcTy) != IceType_i1) {	3746 } else if (typeElementType(SrcTy) != IceType_i1) {

3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));	3747 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));

3674 } else {	3748 } else {

3675 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3749 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3676 _movp(T, ConditionRM);	3750 _movp(T, ConditionRM);

3677 }	3751 }

3678 _movp(T2, T);	3752 _movp(T2, T);

3679 _pand(T, LEGAL_HACK(SrcTRM));	3753 _pand(T, SrcTRM);

3680 _pandn(T2, LEGAL_HACK(SrcFRM));	3754 _pandn(T2, SrcFRM);

3681 _por(T, T2);	3755 _por(T, T2);

3682 _movp(Dest, T);	3756 _movp(Dest, T);

3683 #undef LEGAL_HACK

3684	3757

3685 return;	3758 return;

3686 }	3759 }

3687	3760

3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:	3761 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

3689 Operand *ConditionRMI = legalize(Condition);	3762 Operand *ConditionRMI = legalize(Condition);

3690 Constant *Zero = Ctx->getConstantZero(IceType_i32);	3763 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3691 InstX8632Label *Label = InstX8632Label::create(Func, this);	3764 InstX8632Label *Label = InstX8632Label::create(Func, this);

3692	3765

3693 if (Dest->getType() == IceType_i64) {	3766 if (Dest->getType() == IceType_i64) {

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4236 for (SizeT i = 0; i < Size; ++i) {	4309 for (SizeT i = 0; i < Size; ++i) {

4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4310 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

4238 }	4311 }

4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4312 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4240 }	4313 }

4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	4314 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

4242 << "\n";	4315 << "\n";

4243 }	4316 }

4244	4317

4245 } // end of namespace Ice	4318 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »