src/IceTargetLoweringX8632.cpp - Issue 465413003: Subzero: Align spill locations to natural alignment.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: LocalsSizeBytes -> SpillAreaSizeBytes, local variables -> allocas Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction. It also implements	12 // high-level instruction. It also implements

13 // TargetX8632Fast::postLower() which does the simplest possible	13 // TargetX8632Fast::postLower() which does the simplest possible

14 // register allocation for the "fast" target.	14 // register allocation for the "fast" target.

15 //	15 //

16 //===----------------------------------------------------------------------===//	16 //===----------------------------------------------------------------------===//

17	17

18 #include "IceDefs.h"	18 #include "IceDefs.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceInstX8632.h"	21 #include "IceInstX8632.h"

22 #include "IceOperand.h"	22 #include "IceOperand.h"

23 #include "IceTargetLoweringX8632.def"	23 #include "IceTargetLoweringX8632.def"

24 #include "IceTargetLoweringX8632.h"	24 #include "IceTargetLoweringX8632.h"

25 #include "llvm/Support/CommandLine.h"	25 #include "llvm/Support/CommandLine.h"

26	26

	27 #include <strings.h>

	28

27 namespace Ice {	29 namespace Ice {

28	30

29 namespace {	31 namespace {

30	32

31 // The following table summarizes the logic for lowering the fcmp	33 // The following table summarizes the logic for lowering the fcmp

32 // instruction. There is one table entry for each of the 16 conditions.	34 // instruction. There is one table entry for each of the 16 conditions.

33 //	35 //

34 // The first four columns describe the case when the operands are	36 // The first four columns describe the case when the operands are

35 // floating point scalar values. A comment in lowerFcmp() describes the	37 // floating point scalar values. A comment in lowerFcmp() describes the

36 // lowering template. In the most general case, there is a compare	38 // lowering template. In the most general case, there is a compare

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
121 }	123 }

122	124

123 // The maximum number of arguments to pass in XMM registers	125 // The maximum number of arguments to pass in XMM registers

124 const uint32_t X86_MAX_XMM_ARGS = 4;	126 const uint32_t X86_MAX_XMM_ARGS = 4;

125 // The number of bits in a byte	127 // The number of bits in a byte

126 const uint32_t X86_CHAR_BIT = 8;	128 const uint32_t X86_CHAR_BIT = 8;

127 // Stack alignment	129 // Stack alignment

128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;	130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

129 // Size of the return address on the stack	131 // Size of the return address on the stack

130 const uint32_t X86_RET_IP_SIZE_BYTES = 4;	132 const uint32_t X86_RET_IP_SIZE_BYTES = 4;

	133 // The base 2 logarithm of the width in bytes of the smallest stack slot

	134 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;

	135 // The base 2 logarithm of the width in bytes of the largest stack slot

	136 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;

131	137

132 // Value is a size in bytes. Return Value adjusted to the next highest	138 // Value and Alignment are in bytes. Return Value adjusted to the next

133 // multiple of the stack alignment.	139 // highest multiple of Alignment.

	140 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {

	141 // power of 2

	142 assert((Alignment & (Alignment - 1)) == 0);

	143 return (Value + Alignment - 1) & -Alignment;

	144 }

	145

	146 // Value is in bytes. Return Value adjusted to the next highest multiple

	147 // of the stack alignment.

134 uint32_t applyStackAlignment(uint32_t Value) {	148 uint32_t applyStackAlignment(uint32_t Value) {

135 // power of 2	149 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;

138 }	150 }

139	151

140 // Instruction set options	152 // Instruction set options

141 namespace cl = ::llvm::cl;	153 namespace cl = ::llvm::cl;

142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(	154 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(

143 "mattr", cl::desc("X86 target attributes"),	155 "mattr", cl::desc("X86 target attributes"),

144 cl::init(TargetX8632::SSE2),	156 cl::init(TargetX8632::SSE2),

145 cl::values(	157 cl::values(

146 clEnumValN(TargetX8632::SSE2, "sse2",	158 clEnumValN(TargetX8632::SSE2, "sse2",

147 "Enable SSE2 instructions (default)"),	159 "Enable SSE2 instructions (default)"),

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
241 ICETYPE_TABLE;	253 ICETYPE_TABLE;

242 #undef X	254 #undef X

243 }	255 }

244 }	256 }

245	257

246 } // end of anonymous namespace	258 } // end of anonymous namespace

247	259

248 TargetX8632::TargetX8632(Cfg *Func)	260 TargetX8632::TargetX8632(Cfg *Func)

249 : TargetLowering(Func), InstructionSet(CLInstructionSet),	261 : TargetLowering(Func), InstructionSet(CLInstructionSet),

250 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),	262 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),

251 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),	263 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),

252 PhysicalRegisters(VarList(Reg_NUM)) {	264 PhysicalRegisters(VarList(Reg_NUM)) {

253 // TODO: Don't initialize IntegerRegisters and friends every time.	265 // TODO: Don't initialize IntegerRegisters and friends every time.

254 // Instead, initialize in some sort of static initializer for the	266 // Instead, initialize in some sort of static initializer for the

255 // class.	267 // class.

256 llvm::SmallBitVector IntegerRegisters(Reg_NUM);	268 llvm::SmallBitVector IntegerRegisters(Reg_NUM);

257 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);	269 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);

258 llvm::SmallBitVector FloatRegisters(Reg_NUM);	270 llvm::SmallBitVector FloatRegisters(Reg_NUM);

259 llvm::SmallBitVector VectorRegisters(Reg_NUM);	271 llvm::SmallBitVector VectorRegisters(Reg_NUM);

260 llvm::SmallBitVector InvalidRegisters(Reg_NUM);	272 llvm::SmallBitVector InvalidRegisters(Reg_NUM);

261 ScratchRegs.resize(Reg_NUM);	273 ScratchRegs.resize(Reg_NUM);

(...skipping 251 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);	525 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);

514 RegisterArg->setRegNum(RegNum);	526 RegisterArg->setRegNum(RegNum);

515 RegisterArg->setIsArg(Func);	527 RegisterArg->setIsArg(Func);

516 Arg->setIsArg(Func, false);	528 Arg->setIsArg(Func, false);

517	529

518 Args[I] = RegisterArg;	530 Args[I] = RegisterArg;

519 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	531 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

520 }	532 }

521 }	533 }

522	534

	535 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {

	536 // Sort the variables into buckets according to the log of their width

	537 // in bytes.

	538 const SizeT NumBuckets =

	539 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;

	540 VarList Buckets[NumBuckets];

	541

	542 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;

	543 ++I) {

	544 Variable Var = I;

	545 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());

	546 SizeT LogNaturalAlignment = ffs(NaturalAlignment) - 1;

	547 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);

	548 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);

	549 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;

	550 Buckets[BucketIndex].push_back(Var);

	551 }

	552

	553 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {

	554 VarList &List = Buckets[NumBuckets - I - 1];

	555 Dest.insert(Dest.end(), List.begin(), List.end());

	556 }

	557 }

	558

523 // Helper function for addProlog().	559 // Helper function for addProlog().

524 //	560 //

525 // This assumes Arg is an argument passed on the stack. This sets the	561 // This assumes Arg is an argument passed on the stack. This sets the

526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's	562 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

527 // width. For an I64 arg that has been split into Lo and Hi components,	563 // width. For an I64 arg that has been split into Lo and Hi components,

528 // it calls itself recursively on the components, taking care to handle	564 // it calls itself recursively on the components, taking care to handle

529 // Lo first because of the little-endian architecture. Lastly, this	565 // Lo first because of the little-endian architecture. Lastly, this

530 // function generates an instruction to copy Arg into its assigned	566 // function generates an instruction to copy Arg into its assigned

531 // register if applicable.	567 // register if applicable.

532 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,	568 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,

(...skipping 23 matching lines...) Expand all Loading...
556 _movp(Arg, Mem);	592 _movp(Arg, Mem);

557 } else {	593 } else {

558 _mov(Arg, Mem);	594 _mov(Arg, Mem);

559 }	595 }

560 }	596 }

561 }	597 }

562	598

563 Type TargetX8632::stackSlotType() { return IceType_i32; }	599 Type TargetX8632::stackSlotType() { return IceType_i32; }

564	600

565 void TargetX8632::addProlog(CfgNode *Node) {	601 void TargetX8632::addProlog(CfgNode *Node) {

	602 // Stack frame layout:

	603 //

	604 // +------------------------+

	605 // \| 1. return address \|

	606 // +------------------------+

	607 // \| 2. preserved registers \|

	608 // +------------------------+

	609 // \| 3. padding \|

	610 // +------------------------+

	611 // \| 4. global spill area \|

	612 // +------------------------+

	613 // \| 5. padding \|

	614 // +------------------------+

	615 // \| 6. local spill area \|

	616 // +------------------------+

	617 // \| 7. padding \|

	618 // +------------------------+

	619 // \| 8. allocas \|

	620 // +------------------------+

	621 //

	622 // The following variables record the size in bytes of the given areas:

	623 // * X86_RET_IP_SIZE_BYTES: area 1

	624 // * PreservedRegsSizeBytes: area 2

	625 // * SpillAreaPaddingBytes: area 3

	626 // * GlobalsSize: area 4

	627 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

	628 // * LocalsSpillAreaSize: area 6

	629 // * SpillAreaSizeBytes: areas 3 - 7

	630

566 // If SimpleCoalescing is false, each variable without a register	631 // If SimpleCoalescing is false, each variable without a register

567 // gets its own unique stack slot, which leads to large stack	632 // gets its own unique stack slot, which leads to large stack

568 // frames. If SimpleCoalescing is true, then each "global" variable	633 // frames. If SimpleCoalescing is true, then each "global" variable

569 // without a register gets its own slot, but "local" variable slots	634 // without a register gets its own slot, but "local" variable slots

570 // are reused across basic blocks. E.g., if A and B are local to	635 // are reused across basic blocks. E.g., if A and B are local to

571 // block 1 and C is local to block 2, then C may share a slot with A	636 // block 1 and C is local to block 2, then C may share a slot with A

572 // or B.	637 // or B.

573 const bool SimpleCoalescing = true;	638 const bool SimpleCoalescing = true;

574 size_t InArgsSizeBytes = 0;	639 size_t InArgsSizeBytes = 0;

575 size_t PreservedRegsSizeBytes = 0;	640 size_t PreservedRegsSizeBytes = 0;

576 LocalsSizeBytes = 0;	641 SpillAreaSizeBytes = 0;

577 Context.init(Node);	642 Context.init(Node);

578 Context.setInsertPoint(Context.getCur());	643 Context.setInsertPoint(Context.getCur());

579	644

580 // Determine stack frame offsets for each Variable without a	645 // Determine stack frame offsets for each Variable without a

581 // register assignment. This can be done as one variable per stack	646 // register assignment. This can be done as one variable per stack

582 // slot. Or, do coalescing by running the register allocator again	647 // slot. Or, do coalescing by running the register allocator again

583 // with an infinite set of registers (as a side effect, this gives	648 // with an infinite set of registers (as a side effect, this gives

584 // variables a second chance at physical register assignment).	649 // variables a second chance at physical register assignment).

585 //	650 //

586 // A middle ground approach is to leverage sparsity and allocate one	651 // A middle ground approach is to leverage sparsity and allocate one

587 // block of space on the frame for globals (variables with	652 // block of space on the frame for globals (variables with

588 // multi-block lifetime), and one block to share for locals	653 // multi-block lifetime), and one block to share for locals

589 // (single-block lifetime).	654 // (single-block lifetime).

590	655

591 llvm::SmallBitVector CalleeSaves =	656 llvm::SmallBitVector CalleeSaves =

592 getRegisterSet(RegSet_CalleeSave, RegSet_None);	657 getRegisterSet(RegSet_CalleeSave, RegSet_None);

593	658

594 size_t GlobalsSize = 0;	659 size_t GlobalsSize = 0;

595 std::vector<size_t> LocalsSize(Func->getNumNodes());	660 std::vector<size_t> LocalsSize(Func->getNumNodes());

596	661

597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and	662 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

598 // LocalsSizeBytes.	663 // SpillAreaSizeBytes.

599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());	664 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

600 const VarList &Variables = Func->getVariables();	665 const VarList &Variables = Func->getVariables();

601 const VarList &Args = Func->getArgs();	666 const VarList &Args = Func->getArgs();

	667 VarList SpilledVariables, SortedSpilledVariables,

	668 VariablesLinkedToSpillSplots;

	669

	670 // If there is a separate locals area, this specifies the alignment

	671 // for it.

	672 uint32_t LocalsSlotsAlignmentBytes = 0;

	673 // The entire spill locations area gets aligned to largest natural

	674 // alignment of the variables that have a spill slot.

	675 uint32_t SpillAreaAlignmentBytes = 0;

602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	676 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

603 I != E; ++I) {	677 I != E; ++I) {

604 Variable Var = I;	678 Variable Var = I;

605 if (Var->hasReg()) {	679 if (Var->hasReg()) {

606 RegsUsed[Var->getRegNum()] = true;	680 RegsUsed[Var->getRegNum()] = true;

607 continue;	681 continue;

608 }	682 }

609 // An argument either does not need a stack slot (if passed in a	683 // An argument either does not need a stack slot (if passed in a

610 // register) or already has one (if passed on the stack).	684 // register) or already has one (if passed on the stack).

611 if (Var->getIsArg())	685 if (Var->getIsArg())

612 continue;	686 continue;

613 // An unreferenced variable doesn't need a stack slot.	687 // An unreferenced variable doesn't need a stack slot.

614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())	688 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

615 continue;	689 continue;

616 // A spill slot linked to a variable with a stack slot should reuse	690 // A spill slot linked to a variable with a stack slot should reuse

617 // that stack slot.	691 // that stack slot.

618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {	692 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

619 if (Variable *Linked = Var->getPreferredRegister()) {	693 if (Variable *Linked = Var->getPreferredRegister()) {

620 if (!Linked->hasReg())	694 if (!Linked->hasReg()) {

	695 VariablesLinkedToSpillSplots.push_back(Var);

621 continue;	696 continue;

	697 }

622 }	698 }

623 }	699 }

	700 SpilledVariables.push_back(Var);

	701 }

	702

	703 SortedSpilledVariables.reserve(SpilledVariables.size());

	704 sortByAlignment(SortedSpilledVariables, SpilledVariables);

	705 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	706 E = SortedSpilledVariables.end();

	707 I != E; ++I) {

	708 Variable Var = I;

624 size_t Increment = typeWidthInBytesOnStack(Var->getType());	709 size_t Increment = typeWidthInBytesOnStack(Var->getType());

	710 if (!SpillAreaAlignmentBytes)

	711 SpillAreaAlignmentBytes = Increment;

625 if (SimpleCoalescing) {	712 if (SimpleCoalescing) {

626 if (Var->isMultiblockLife()) {	713 if (Var->isMultiblockLife()) {

627 GlobalsSize += Increment;	714 GlobalsSize += Increment;

628 } else {	715 } else {

629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	716 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

630 LocalsSize[NodeIndex] += Increment;	717 LocalsSize[NodeIndex] += Increment;

631 if (LocalsSize[NodeIndex] > LocalsSizeBytes)	718 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)

632 LocalsSizeBytes = LocalsSize[NodeIndex];	719 SpillAreaSizeBytes = LocalsSize[NodeIndex];

	720 if (!LocalsSlotsAlignmentBytes)

	721 LocalsSlotsAlignmentBytes = Increment;

633 }	722 }

634 } else {	723 } else {

635 LocalsSizeBytes += Increment;	724 SpillAreaSizeBytes += Increment;

636 }	725 }

637 }	726 }

638 LocalsSizeBytes += GlobalsSize;	727 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

	728

	729 SpillAreaSizeBytes += GlobalsSize;

639	730

640 // Add push instructions for preserved registers.	731 // Add push instructions for preserved registers.

641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	732 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

642 if (CalleeSaves[i] && RegsUsed[i]) {	733 if (CalleeSaves[i] && RegsUsed[i]) {

643 PreservedRegsSizeBytes += 4;	734 PreservedRegsSizeBytes += 4;

644 const bool SuppressStackAdjustment = true;	735 const bool SuppressStackAdjustment = true;

645 _push(getPhysicalRegister(i), SuppressStackAdjustment);	736 _push(getPhysicalRegister(i), SuppressStackAdjustment);

646 }	737 }

647 }	738 }

648	739

649 // Generate "push ebp; mov ebp, esp"	740 // Generate "push ebp; mov ebp, esp"

650 if (IsEbpBasedFrame) {	741 if (IsEbpBasedFrame) {

651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))	742 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

652 .count() == 0);	743 .count() == 0);

653 PreservedRegsSizeBytes += 4;	744 PreservedRegsSizeBytes += 4;

654 Variable *ebp = getPhysicalRegister(Reg_ebp);	745 Variable *ebp = getPhysicalRegister(Reg_ebp);

655 Variable *esp = getPhysicalRegister(Reg_esp);	746 Variable *esp = getPhysicalRegister(Reg_esp);

656 const bool SuppressStackAdjustment = true;	747 const bool SuppressStackAdjustment = true;

657 _push(ebp, SuppressStackAdjustment);	748 _push(ebp, SuppressStackAdjustment);

658 _mov(ebp, esp);	749 _mov(ebp, esp);

659 }	750 }

660	751

661 if (NeedsStackAlignment) {	752 // Align the variables area. SpillAreaPaddingBytes is the size of

662 uint32_t StackSize = applyStackAlignment(	753 // the region after the preserved registers and before the spill

663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);	754 // areas.

664 LocalsSizeBytes =	755 uint32_t SpillAreaPaddingBytes = 0;

665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;	756 if (SpillAreaAlignmentBytes) {

	757 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);

	758 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	759 uint32_t SpillAreaStart =

	760 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);

	761 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;

	762 SpillAreaSizeBytes += SpillAreaPaddingBytes;

666 }	763 }

667	764

668 // Generate "sub esp, LocalsSizeBytes"	765 // If there are separate globals and locals areas, make sure the

669 if (LocalsSizeBytes)	766 // locals area is aligned by padding the end of the globals area.

	767 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;

	768 if (LocalsSlotsAlignmentBytes) {

	769 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	770 GlobalsAndSubsequentPaddingSize =

	771 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);

	772 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;

	773 }

	774

	775 // Align esp if necessary.

	776 if (NeedsStackAlignment) {

	777 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	778 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

	779 SpillAreaSizeBytes = StackSize - StackOffset;

	780 }

	781

	782 // Generate "sub esp, SpillAreaSizeBytes"

	783 if (SpillAreaSizeBytes)

670 _sub(getPhysicalRegister(Reg_esp),	784 _sub(getPhysicalRegister(Reg_esp),

671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));	785 Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));

672	786

673 resetStackAdjustment();	787 resetStackAdjustment();

674	788

675 // Fill in stack offsets for stack args, and copy args into registers	789 // Fill in stack offsets for stack args, and copy args into registers

676 // for those that were register-allocated. Args are pushed right to	790 // for those that were register-allocated. Args are pushed right to

677 // left, so Arg[0] is closest to the stack/frame pointer.	791 // left, so Arg[0] is closest to the stack/frame pointer.

678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	792 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	793 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

680 if (!IsEbpBasedFrame)	794 if (!IsEbpBasedFrame)

681 BasicFrameOffset += LocalsSizeBytes;	795 BasicFrameOffset += SpillAreaSizeBytes;

682	796

683 unsigned NumXmmArgs = 0;	797 unsigned NumXmmArgs = 0;

684 for (SizeT i = 0; i < Args.size(); ++i) {	798 for (SizeT i = 0; i < Args.size(); ++i) {

685 Variable *Arg = Args[i];	799 Variable *Arg = Args[i];

686 // Skip arguments passed in registers.	800 // Skip arguments passed in registers.

687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {	801 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {

688 ++NumXmmArgs;	802 ++NumXmmArgs;

689 continue;	803 continue;

690 }	804 }

691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);	805 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

692 }	806 }

693	807

694 // Fill in stack offsets for locals.	808 // Fill in stack offsets for locals.

695 size_t TotalGlobalsSize = GlobalsSize;	809 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;

696 GlobalsSize = 0;

697 LocalsSize.assign(LocalsSize.size(), 0);	810 LocalsSize.assign(LocalsSize.size(), 0);

698 size_t NextStackOffset = 0;	811 size_t NextStackOffset = GlobalsSpaceUsed;

699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	812 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	813 E = SortedSpilledVariables.end();

700 I != E; ++I) {	814 I != E; ++I) {

701 Variable Var = I;	815 Variable Var = I;

702 if (Var->hasReg()) {

703 RegsUsed[Var->getRegNum()] = true;

704 continue;

705 }

706 if (Var->getIsArg())

707 continue;

708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

709 continue;

710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

711 if (Variable *Linked = Var->getPreferredRegister()) {

712 if (!Linked->hasReg()) {

713 // TODO: Make sure Linked has already been assigned a stack

714 // slot.

715 Var->setStackOffset(Linked->getStackOffset());

716 continue;

717 }

718 }

719 }

720 size_t Increment = typeWidthInBytesOnStack(Var->getType());	816 size_t Increment = typeWidthInBytesOnStack(Var->getType());

721 if (SimpleCoalescing) {	817 if (SimpleCoalescing) {

722 if (Var->isMultiblockLife()) {	818 if (Var->isMultiblockLife()) {

723 GlobalsSize += Increment;	819 GlobalsSpaceUsed += Increment;

724 NextStackOffset = GlobalsSize;	820 NextStackOffset = GlobalsSpaceUsed;

725 } else {	821 } else {

726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	822 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

727 LocalsSize[NodeIndex] += Increment;	823 LocalsSize[NodeIndex] += Increment;

728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];	824 NextStackOffset = SpillAreaPaddingBytes +

	825 GlobalsAndSubsequentPaddingSize +

	826 LocalsSize[NodeIndex];

729 }	827 }

730 } else {	828 } else {

731 NextStackOffset += Increment;	829 NextStackOffset += Increment;

732 }	830 }

733 if (IsEbpBasedFrame)	831 if (IsEbpBasedFrame)

734 Var->setStackOffset(-NextStackOffset);	832 Var->setStackOffset(-NextStackOffset);

735 else	833 else

736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);	834 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);

737 }	835 }

738 this->FrameSizeLocals = NextStackOffset;	836 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;

739 this->HasComputedFrame = true;	837 this->HasComputedFrame = true;

740	838

	839 // Assign stack offsets to variables that have been linked to spilled

	840 // variables.

	841 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),

	842 E = VariablesLinkedToSpillSplots.end();

	843 I != E; ++I) {

	844 Variable Var = I;

	845 Variable *Linked = Var->getPreferredRegister();

	846 Var->setStackOffset(Linked->getStackOffset());

	847 }

	848

741 if (Func->getContext()->isVerbose(IceV_Frame)) {	849 if (Func->getContext()->isVerbose(IceV_Frame)) {

742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes	850 Ostream &Str = Func->getContext()->getStrDump();

743 << "\n"	851

744 << "InArgsSizeBytes=" << InArgsSizeBytes	852 Str << "Stack layout:\n";

745 << "\n"	853 uint32_t EspAdjustmentPaddingSize =

746 << "PreservedRegsSizeBytes="	854 SpillAreaSizeBytes - LocalsSpillAreaSize -

747 << PreservedRegsSizeBytes << "\n";	855 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

	856 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

	857 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"

	858 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

	859 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

	860 << " globals spill area = " << GlobalsSize << " bytes\n"

	861 << " globals-locals spill areas intermediate padding = "

	862 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

	863 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

	864 << " esp alignment padding = " << EspAdjustmentPaddingSize

	865 << " bytes\n";

	866

	867 Str << "Stack details:\n"

	868 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

	869 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	870 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

	871 << " bytes\n"

	872 << " is ebp based = " << IsEbpBasedFrame << "\n";

748 }	873 }

749 }	874 }

750	875

751 void TargetX8632::addEpilog(CfgNode *Node) {	876 void TargetX8632::addEpilog(CfgNode *Node) {

752 InstList &Insts = Node->getInsts();	877 InstList &Insts = Node->getInsts();

753 InstList::reverse_iterator RI, E;	878 InstList::reverse_iterator RI, E;

754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {	879 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

755 if (llvm::isa<InstX8632Ret>(*RI))	880 if (llvm::isa<InstX8632Ret>(*RI))

756 break;	881 break;

757 }	882 }

758 if (RI == E)	883 if (RI == E)

759 return;	884 return;

760	885

761 // Convert the reverse_iterator position into its corresponding	886 // Convert the reverse_iterator position into its corresponding

762 // (forward) iterator position.	887 // (forward) iterator position.

763 InstList::iterator InsertPoint = RI.base();	888 InstList::iterator InsertPoint = RI.base();

764 --InsertPoint;	889 --InsertPoint;

765 Context.init(Node);	890 Context.init(Node);

766 Context.setInsertPoint(InsertPoint);	891 Context.setInsertPoint(InsertPoint);

767	892

768 Variable *esp = getPhysicalRegister(Reg_esp);	893 Variable *esp = getPhysicalRegister(Reg_esp);

769 if (IsEbpBasedFrame) {	894 if (IsEbpBasedFrame) {

770 Variable *ebp = getPhysicalRegister(Reg_ebp);	895 Variable *ebp = getPhysicalRegister(Reg_ebp);

771 _mov(esp, ebp);	896 _mov(esp, ebp);

772 _pop(ebp);	897 _pop(ebp);

773 } else {	898 } else {

774 // add esp, LocalsSizeBytes	899 // add esp, SpillAreaSizeBytes

775 if (LocalsSizeBytes)	900 if (SpillAreaSizeBytes)

776 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));	901 _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));

777 }	902 }

778	903

779 // Add pop instructions for preserved registers.	904 // Add pop instructions for preserved registers.

780 llvm::SmallBitVector CalleeSaves =	905 llvm::SmallBitVector CalleeSaves =

781 getRegisterSet(RegSet_CalleeSave, RegSet_None);	906 getRegisterSet(RegSet_CalleeSave, RegSet_None);

782 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	907 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

783 SizeT j = CalleeSaves.size() - i - 1;	908 SizeT j = CalleeSaves.size() - i - 1;

784 if (j == Reg_ebp && IsEbpBasedFrame)	909 if (j == Reg_ebp && IsEbpBasedFrame)

785 continue;	910 continue;

786 if (CalleeSaves[j] && RegsUsed[j]) {	911 if (CalleeSaves[j] && RegsUsed[j]) {

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1109 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1110 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

986	1111

987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1112 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1113 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));	1114 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));

990 }	1115 }

991 if (ConstantInteger *ConstantTotalSize =	1116 if (ConstantInteger *ConstantTotalSize =

992 llvm::dyn_cast<ConstantInteger>(TotalSize)) {	1117 llvm::dyn_cast<ConstantInteger>(TotalSize)) {

993 uint32_t Value = ConstantTotalSize->getValue();	1118 uint32_t Value = ConstantTotalSize->getValue();

994 // Round Value up to the next highest multiple of the alignment.	1119 Value = applyAlignment(Value, Alignment);

995 Value = (Value + Alignment - 1) & -Alignment;

996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));	1120 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));

997 } else {	1121 } else {

998 // Non-constant sizes need to be adjusted to the next highest	1122 // Non-constant sizes need to be adjusted to the next highest

999 // multiple of the required alignment at runtime.	1123 // multiple of the required alignment at runtime.

1000 Variable *T = makeReg(IceType_i32);	1124 Variable *T = makeReg(IceType_i32);

1001 _mov(T, TotalSize);	1125 _mov(T, TotalSize);

1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));	1126 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));

1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));	1127 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));

1004 _sub(esp, T);	1128 _sub(esp, T);

1005 }	1129 }

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1232 case InstArithmetic::Fsub:	1356 case InstArithmetic::Fsub:

1233 case InstArithmetic::Fmul:	1357 case InstArithmetic::Fmul:

1234 case InstArithmetic::Fdiv:	1358 case InstArithmetic::Fdiv:

1235 case InstArithmetic::Frem:	1359 case InstArithmetic::Frem:

1236 llvm_unreachable("FP instruction with i64 type");	1360 llvm_unreachable("FP instruction with i64 type");

1237 break;	1361 break;

1238 }	1362 }

1239 } else if (isVectorType(Dest->getType())) {	1363 } else if (isVectorType(Dest->getType())) {

1240 // TODO: Trap on integer divide and integer modulo by zero.	1364 // TODO: Trap on integer divide and integer modulo by zero.

1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1365 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1242 //

1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

1244 // registers. This is a workaround of the fact that there is no

1245 // support for aligning stack operands. Once there is support,

1246 // remove LEGAL_HACK.

1247 #define LEGAL_HACK(s) legalizeToVar((s))

1248 switch (Inst->getOp()) {	1366 switch (Inst->getOp()) {

1249 case InstArithmetic::_num:	1367 case InstArithmetic::_num:

1250 llvm_unreachable("Unknown arithmetic operator");	1368 llvm_unreachable("Unknown arithmetic operator");

1251 break;	1369 break;

1252 case InstArithmetic::Add: {	1370 case InstArithmetic::Add: {

1253 Variable *T = makeReg(Dest->getType());	1371 Variable *T = makeReg(Dest->getType());

1254 _movp(T, Src0);	1372 _movp(T, Src0);

1255 _padd(T, LEGAL_HACK(Src1));	1373 _padd(T, Src1);

1256 _movp(Dest, T);	1374 _movp(Dest, T);

1257 } break;	1375 } break;

1258 case InstArithmetic::And: {	1376 case InstArithmetic::And: {

1259 Variable *T = makeReg(Dest->getType());	1377 Variable *T = makeReg(Dest->getType());

1260 _movp(T, Src0);	1378 _movp(T, Src0);

1261 _pand(T, LEGAL_HACK(Src1));	1379 _pand(T, Src1);

1262 _movp(Dest, T);	1380 _movp(Dest, T);

1263 } break;	1381 } break;

1264 case InstArithmetic::Or: {	1382 case InstArithmetic::Or: {

1265 Variable *T = makeReg(Dest->getType());	1383 Variable *T = makeReg(Dest->getType());

1266 _movp(T, Src0);	1384 _movp(T, Src0);

1267 _por(T, LEGAL_HACK(Src1));	1385 _por(T, Src1);

1268 _movp(Dest, T);	1386 _movp(Dest, T);

1269 } break;	1387 } break;

1270 case InstArithmetic::Xor: {	1388 case InstArithmetic::Xor: {

1271 Variable *T = makeReg(Dest->getType());	1389 Variable *T = makeReg(Dest->getType());

1272 _movp(T, Src0);	1390 _movp(T, Src0);

1273 _pxor(T, LEGAL_HACK(Src1));	1391 _pxor(T, Src1);

1274 _movp(Dest, T);	1392 _movp(Dest, T);

1275 } break;	1393 } break;

1276 case InstArithmetic::Sub: {	1394 case InstArithmetic::Sub: {

1277 Variable *T = makeReg(Dest->getType());	1395 Variable *T = makeReg(Dest->getType());

1278 _movp(T, Src0);	1396 _movp(T, Src0);

1279 _psub(T, LEGAL_HACK(Src1));	1397 _psub(T, Src1);

1280 _movp(Dest, T);	1398 _movp(Dest, T);

1281 } break;	1399 } break;

1282 case InstArithmetic::Mul: {	1400 case InstArithmetic::Mul: {

1283 bool TypesAreValidForPmull =	1401 bool TypesAreValidForPmull =

1284 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;	1402 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1285 bool InstructionSetIsValidForPmull =	1403 bool InstructionSetIsValidForPmull =

1286 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;	1404 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;

1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1405 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1288 Variable *T = makeReg(Dest->getType());	1406 Variable *T = makeReg(Dest->getType());

1289 _movp(T, Src0);	1407 _movp(T, Src0);

1290 _pmull(T, LEGAL_HACK(Src1));	1408 _pmull(T, Src1);

1291 _movp(Dest, T);	1409 _movp(Dest, T);

1292 } else if (Dest->getType() == IceType_v4i32) {	1410 } else if (Dest->getType() == IceType_v4i32) {

1293 // Lowering sequence:	1411 // Lowering sequence:

1294 // Note: The mask arguments have index 0 on the left.	1412 // Note: The mask arguments have index 0 on the left.

1295 //	1413 //

1296 // movups T1, Src0	1414 // movups T1, Src0

1297 // pshufd T2, Src0, {1,0,3,0}	1415 // pshufd T2, Src0, {1,0,3,0}

1298 // pshufd T3, Src1, {1,0,3,0}	1416 // pshufd T3, Src1, {1,0,3,0}

1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}	1417 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1300 // pmuludq T1, Src1	1418 // pmuludq T1, Src1

(...skipping 12 matching lines...) Expand all Loading...
1313 // Dest[0, 2], Src[0, 2]	1431 // Dest[0, 2], Src[0, 2]

1314 const unsigned Mask0202 = 0x88;	1432 const unsigned Mask0202 = 0x88;

1315 // Mask that directs pshufd to create a vector with entries	1433 // Mask that directs pshufd to create a vector with entries

1316 // Src[0, 2, 1, 3]	1434 // Src[0, 2, 1, 3]

1317 const unsigned Mask0213 = 0xd8;	1435 const unsigned Mask0213 = 0xd8;

1318 Variable *T1 = makeReg(IceType_v4i32);	1436 Variable *T1 = makeReg(IceType_v4i32);

1319 Variable *T2 = makeReg(IceType_v4i32);	1437 Variable *T2 = makeReg(IceType_v4i32);

1320 Variable *T3 = makeReg(IceType_v4i32);	1438 Variable *T3 = makeReg(IceType_v4i32);

1321 Variable *T4 = makeReg(IceType_v4i32);	1439 Variable *T4 = makeReg(IceType_v4i32);

1322 _movp(T1, Src0);	1440 _movp(T1, Src0);

1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R	1441 _pshufd(T2, Src0, Mask1030);

1324 // with Src1 after stack operand alignment support is	1442 _pshufd(T3, Src1, Mask1030);

1325 // implemented.	1443 _pmuludq(T1, Src1);

1326 Variable *Src0R = LEGAL_HACK(Src0);

1327 Variable *Src1R = LEGAL_HACK(Src1);

1328 _pshufd(T2, Src0R, Mask1030);

1329 _pshufd(T3, Src1R, Mask1030);

1330 _pmuludq(T1, Src1R);

1331 _pmuludq(T2, T3);	1444 _pmuludq(T2, T3);

1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));	1445 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));	1446 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

1334 _movp(Dest, T4);	1447 _movp(Dest, T4);

1335 } else {	1448 } else {

1336 assert(Dest->getType() == IceType_v16i8);	1449 assert(Dest->getType() == IceType_v16i8);

1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1450 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1338 }	1451 }

1339 } break;	1452 } break;

1340 case InstArithmetic::Shl:	1453 case InstArithmetic::Shl:

1341 case InstArithmetic::Lshr:	1454 case InstArithmetic::Lshr:

1342 case InstArithmetic::Ashr:	1455 case InstArithmetic::Ashr:

1343 case InstArithmetic::Udiv:	1456 case InstArithmetic::Udiv:

1344 case InstArithmetic::Urem:	1457 case InstArithmetic::Urem:

1345 case InstArithmetic::Sdiv:	1458 case InstArithmetic::Sdiv:

1346 case InstArithmetic::Srem:	1459 case InstArithmetic::Srem:

1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1460 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1348 break;	1461 break;

1349 case InstArithmetic::Fadd: {	1462 case InstArithmetic::Fadd: {

1350 Variable *T = makeReg(Dest->getType());	1463 Variable *T = makeReg(Dest->getType());

1351 _movp(T, Src0);	1464 _movp(T, Src0);

1352 _addps(T, LEGAL_HACK(Src1));	1465 _addps(T, Src1);

1353 _movp(Dest, T);	1466 _movp(Dest, T);

1354 } break;	1467 } break;

1355 case InstArithmetic::Fsub: {	1468 case InstArithmetic::Fsub: {

1356 Variable *T = makeReg(Dest->getType());	1469 Variable *T = makeReg(Dest->getType());

1357 _movp(T, Src0);	1470 _movp(T, Src0);

1358 _subps(T, LEGAL_HACK(Src1));	1471 _subps(T, Src1);

1359 _movp(Dest, T);	1472 _movp(Dest, T);

1360 } break;	1473 } break;

1361 case InstArithmetic::Fmul: {	1474 case InstArithmetic::Fmul: {

1362 Variable *T = makeReg(Dest->getType());	1475 Variable *T = makeReg(Dest->getType());

1363 _movp(T, Src0);	1476 _movp(T, Src0);

1364 _mulps(T, LEGAL_HACK(Src1));	1477 _mulps(T, Src1);

1365 _movp(Dest, T);	1478 _movp(Dest, T);

1366 } break;	1479 } break;

1367 case InstArithmetic::Fdiv: {	1480 case InstArithmetic::Fdiv: {

1368 Variable *T = makeReg(Dest->getType());	1481 Variable *T = makeReg(Dest->getType());

1369 _movp(T, Src0);	1482 _movp(T, Src0);

1370 _divps(T, LEGAL_HACK(Src1));	1483 _divps(T, Src1);

1371 _movp(Dest, T);	1484 _movp(Dest, T);

1372 } break;	1485 } break;

1373 case InstArithmetic::Frem:	1486 case InstArithmetic::Frem:

1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1487 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1375 break;	1488 break;

1376 }	1489 }

1377 #undef LEGAL_HACK

1378 } else { // Dest->getType() is non-i64 scalar	1490 } else { // Dest->getType() is non-i64 scalar

1379 Variable *T_edx = NULL;	1491 Variable *T_edx = NULL;

1380 Variable *T = NULL;	1492 Variable *T = NULL;

1381 switch (Inst->getOp()) {	1493 switch (Inst->getOp()) {

1382 case InstArithmetic::_num:	1494 case InstArithmetic::_num:

1383 llvm_unreachable("Unknown arithmetic operator");	1495 llvm_unreachable("Unknown arithmetic operator");

1384 break;	1496 break;

1385 case InstArithmetic::Add:	1497 case InstArithmetic::Add:

1386 _mov(T, Src0);	1498 _mov(T, Src0);

1387 _add(T, Src1);	1499 _add(T, Src1);

(...skipping 804 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2192 // TODO(wala): Determine the best lowering sequences for each type.	2304 // TODO(wala): Determine the best lowering sequences for each type.

2193 bool CanUsePextr =	2305 bool CanUsePextr =

2194 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2306 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2195 if (CanUsePextr && Ty != IceType_v4f32) {	2307 if (CanUsePextr && Ty != IceType_v4f32) {

2196 // Use pextrb, pextrw, or pextrd.	2308 // Use pextrb, pextrw, or pextrd.

2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2309 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2310 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2199 _pextr(ExtractedElementR, SourceVectR, Mask);	2311 _pextr(ExtractedElementR, SourceVectR, Mask);

2200 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2312 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2201 // Use pshufd and movd/movss.	2313 // Use pshufd and movd/movss.

2202 //

2203 // ALIGNHACK: Force vector operands to registers in instructions

2204 // that require aligned memory operands until support for data

2205 // alignment is implemented.

2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2207 Operand *SourceVectRM =

2208 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2209 Variable *T = NULL;	2314 Variable *T = NULL;

2210 if (Index) {	2315 if (Index) {

2211 // The shuffle only needs to occur if the element to be extracted	2316 // The shuffle only needs to occur if the element to be extracted

2212 // is not at the lowest index.	2317 // is not at the lowest index.

2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2318 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2214 T = makeReg(Ty);	2319 T = makeReg(Ty);

2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);	2320 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2216 } else {	2321 } else {

2217 T = ALIGN_HACK(SourceVectRM);	2322 T = legalizeToVar(SourceVectNotLegalized);

2218 }	2323 }

2219	2324

2220 if (InVectorElementTy == IceType_i32) {	2325 if (InVectorElementTy == IceType_i32) {

2221 _movd(ExtractedElementR, T);	2326 _movd(ExtractedElementR, T);

2222 } else { // Ty == Icetype_f32	2327 } else { // Ty == Icetype_f32

2223 // TODO(wala): _movss is only used here because _mov does not	2328 // TODO(wala): _movss is only used here because _mov does not

2224 // allow a vector source and a scalar destination. _mov should be	2329 // allow a vector source and a scalar destination. _mov should be

2225 // able to be used here.	2330 // able to be used here.

2226 // _movss is a binary instruction, so the FakeDef is needed to	2331 // _movss is a binary instruction, so the FakeDef is needed to

2227 // keep the live range analysis consistent.	2332 // keep the live range analysis consistent.

2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR));	2333 Context.insert(InstFakeDef::create(Func, ExtractedElementR));

2229 _movss(ExtractedElementR, T);	2334 _movss(ExtractedElementR, T);

2230 }	2335 }

2231 #undef ALIGN_HACK

2232 } else {	2336 } else {

2233 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2337 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2234 // Spill the value to a stack slot and do the extraction in memory.	2338 // Spill the value to a stack slot and do the extraction in memory.

2235 //	2339 //

2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2340 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2237 // support for legalizing to mem is implemented.	2341 // support for legalizing to mem is implemented.

2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2342 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2239 Slot->setWeight(RegWeight::Zero);	2343 Slot->setWeight(RegWeight::Zero);

2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2344 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

2241	2345

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2280	2384

2281 if (Condition == InstFcmp::True) {	2385 if (Condition == InstFcmp::True) {

2282 // makeVectorOfOnes() requires an integer vector type.	2386 // makeVectorOfOnes() requires an integer vector type.

2283 T = makeVectorOfMinusOnes(IceType_v4i32);	2387 T = makeVectorOfMinusOnes(IceType_v4i32);

2284 } else if (Condition == InstFcmp::False) {	2388 } else if (Condition == InstFcmp::False) {

2285 T = makeVectorOfZeros(Dest->getType());	2389 T = makeVectorOfZeros(Dest->getType());

2286 } else {	2390 } else {

2287 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2391 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2288 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2392 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2289	2393

2290 // ALIGNHACK: Without support for data alignment, both operands to

2291 // cmpps need to be forced into registers. Once support for data

2292 // alignment is implemented, remove LEGAL_HACK.

2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2294 switch (Condition) {	2394 switch (Condition) {

2295 default: {	2395 default: {

2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;	2396 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;

2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);	2397 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);

2298 T = makeReg(Src0RM->getType());	2398 T = makeReg(Src0RM->getType());

2299 _movp(T, Src0RM);	2399 _movp(T, Src0RM);

2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate);	2400 _cmpps(T, Src1RM, Predicate);

2301 } break;	2401 } break;

2302 case InstFcmp::One: {	2402 case InstFcmp::One: {

2303 // Check both unequal and ordered.	2403 // Check both unequal and ordered.

2304 T = makeReg(Src0RM->getType());	2404 T = makeReg(Src0RM->getType());

2305 Variable *T2 = makeReg(Src0RM->getType());	2405 Variable *T2 = makeReg(Src0RM->getType());

2306 Src1RM = LEGAL_HACK(Src1RM);

2307 _movp(T, Src0RM);	2406 _movp(T, Src0RM);

2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);	2407 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);

2309 _movp(T2, Src0RM);	2408 _movp(T2, Src0RM);

2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);	2409 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);

2311 _pand(T, T2);	2410 _pand(T, T2);

2312 } break;	2411 } break;

2313 case InstFcmp::Ueq: {	2412 case InstFcmp::Ueq: {

2314 // Check both equal or unordered.	2413 // Check both equal or unordered.

2315 T = makeReg(Src0RM->getType());	2414 T = makeReg(Src0RM->getType());

2316 Variable *T2 = makeReg(Src0RM->getType());	2415 Variable *T2 = makeReg(Src0RM->getType());

2317 Src1RM = LEGAL_HACK(Src1RM);

2318 _movp(T, Src0RM);	2416 _movp(T, Src0RM);

2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);	2417 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);

2320 _movp(T2, Src0RM);	2418 _movp(T2, Src0RM);

2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);	2419 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);

2322 _por(T, T2);	2420 _por(T, T2);

2323 } break;	2421 } break;

2324 }	2422 }

2325 #undef LEGAL_HACK

2326 }	2423 }

2327	2424

2328 _movp(Dest, T);	2425 _movp(Dest, T);

2329 eliminateNextVectorSextInstruction(Dest);	2426 eliminateNextVectorSextInstruction(Dest);

2330 return;	2427 return;

2331 }	2428 }

2332	2429

2333 // Lowering a = fcmp cond, b, c	2430 // Lowering a = fcmp cond, b, c

2334 // ucomiss b, c /* only if C1 != Br_None */	2431 // ucomiss b, c /* only if C1 != Br_None */

2335 // /* but swap b,c order if SwapOperands==true */	2432 // /* but swap b,c order if SwapOperands==true */

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2420 Variable *T1 = makeReg(Ty);	2517 Variable *T1 = makeReg(Ty);

2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);	2518 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

2422 _movp(T0, Src0RM);	2519 _movp(T0, Src0RM);

2423 _pxor(T0, HighOrderBits);	2520 _pxor(T0, HighOrderBits);

2424 _movp(T1, Src1RM);	2521 _movp(T1, Src1RM);

2425 _pxor(T1, HighOrderBits);	2522 _pxor(T1, HighOrderBits);

2426 Src0RM = T0;	2523 Src0RM = T0;

2427 Src1RM = T1;	2524 Src1RM = T1;

2428 }	2525 }

2429	2526

2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be

2431 // in registers until data alignment support is implemented. Once

2432 // there is support for data alignment, LEGAL_HACK can be removed.

2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2434 Variable *T = makeReg(Ty);	2527 Variable *T = makeReg(Ty);

2435 switch (Condition) {	2528 switch (Condition) {

2436 default:	2529 default:

2437 llvm_unreachable("unexpected condition");	2530 llvm_unreachable("unexpected condition");

2438 break;	2531 break;

2439 case InstIcmp::Eq: {	2532 case InstIcmp::Eq: {

2440 _movp(T, Src0RM);	2533 _movp(T, Src0RM);

2441 _pcmpeq(T, LEGAL_HACK(Src1RM));	2534 _pcmpeq(T, Src1RM);

2442 } break;	2535 } break;

2443 case InstIcmp::Ne: {	2536 case InstIcmp::Ne: {

2444 _movp(T, Src0RM);	2537 _movp(T, Src0RM);

2445 _pcmpeq(T, LEGAL_HACK(Src1RM));	2538 _pcmpeq(T, Src1RM);

2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2539 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2447 _pxor(T, MinusOne);	2540 _pxor(T, MinusOne);

2448 } break;	2541 } break;

2449 case InstIcmp::Ugt:	2542 case InstIcmp::Ugt:

2450 case InstIcmp::Sgt: {	2543 case InstIcmp::Sgt: {

2451 _movp(T, Src0RM);	2544 _movp(T, Src0RM);

2452 _pcmpgt(T, LEGAL_HACK(Src1RM));	2545 _pcmpgt(T, Src1RM);

2453 } break;	2546 } break;

2454 case InstIcmp::Uge:	2547 case InstIcmp::Uge:

2455 case InstIcmp::Sge: {	2548 case InstIcmp::Sge: {

2456 // !(Src1RM > Src0RM)	2549 // !(Src1RM > Src0RM)

2457 _movp(T, Src1RM);	2550 _movp(T, Src1RM);

2458 _pcmpgt(T, LEGAL_HACK(Src0RM));	2551 _pcmpgt(T, Src0RM);

2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2552 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2460 _pxor(T, MinusOne);	2553 _pxor(T, MinusOne);

2461 } break;	2554 } break;

2462 case InstIcmp::Ult:	2555 case InstIcmp::Ult:

2463 case InstIcmp::Slt: {	2556 case InstIcmp::Slt: {

2464 _movp(T, Src1RM);	2557 _movp(T, Src1RM);

2465 _pcmpgt(T, LEGAL_HACK(Src0RM));	2558 _pcmpgt(T, Src0RM);

2466 } break;	2559 } break;

2467 case InstIcmp::Ule:	2560 case InstIcmp::Ule:

2468 case InstIcmp::Sle: {	2561 case InstIcmp::Sle: {

2469 // !(Src0RM > Src1RM)	2562 // !(Src0RM > Src1RM)

2470 _movp(T, Src0RM);	2563 _movp(T, Src0RM);

2471 _pcmpgt(T, LEGAL_HACK(Src1RM));	2564 _pcmpgt(T, Src1RM);

2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2565 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2473 _pxor(T, MinusOne);	2566 _pxor(T, MinusOne);

2474 } break;	2567 } break;

2475 }	2568 }

2476 #undef LEGAL_HACK

2477	2569

2478 _movp(Dest, T);	2570 _movp(Dest, T);

2479 eliminateNextVectorSextInstruction(Dest);	2571 eliminateNextVectorSextInstruction(Dest);

2480 return;	2572 return;

2481 }	2573 }

2482	2574

2483 // If Src1 is an immediate, or known to be a physical register, we can	2575 // If Src1 is an immediate, or known to be a physical register, we can

2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into	2576 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for	2577 // a physical register. (Actually, either Src0 or Src1 can be chosen for

2486 // the physical register, but unfortunately we have to commit to one or	2578 // the physical register, but unfortunately we have to commit to one or

(...skipping 155 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2642 // insertelement into index 3 (result is stored in T):	2734 // insertelement into index 3 (result is stored in T):

2643 // T := SourceVectRM	2735 // T := SourceVectRM

2644 // ElementR := ElementR[0, 0] T[0, 2]	2736 // ElementR := ElementR[0, 0] T[0, 2]

2645 // T := T[0, 1] ElementR[3, 0]	2737 // T := T[0, 1] ElementR[3, 0]

2646 const unsigned char Mask1[3] = {0, 192, 128};	2738 const unsigned char Mask1[3] = {0, 192, 128};

2647 const unsigned char Mask2[3] = {227, 196, 52};	2739 const unsigned char Mask2[3] = {227, 196, 52};

2648	2740

2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);	2741 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);

2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);	2742 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);

2651	2743

2652 // ALIGNHACK: Force vector operands to registers in instructions

2653 // that require aligned memory operands until support for data

2654 // alignment is implemented.

2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2656 if (Index == 1) {	2744 if (Index == 1) {

2657 SourceVectRM = ALIGN_HACK(SourceVectRM);

2658 _shufps(ElementR, SourceVectRM, Mask1Constant);	2745 _shufps(ElementR, SourceVectRM, Mask1Constant);

2659 _shufps(ElementR, SourceVectRM, Mask2Constant);	2746 _shufps(ElementR, SourceVectRM, Mask2Constant);

2660 _movp(Inst->getDest(), ElementR);	2747 _movp(Inst->getDest(), ElementR);

2661 } else {	2748 } else {

2662 Variable *T = makeReg(Ty);	2749 Variable *T = makeReg(Ty);

2663 _movp(T, SourceVectRM);	2750 _movp(T, SourceVectRM);

2664 _shufps(ElementR, T, Mask1Constant);	2751 _shufps(ElementR, T, Mask1Constant);

2665 _shufps(T, ElementR, Mask2Constant);	2752 _shufps(T, ElementR, Mask2Constant);

2666 _movp(Inst->getDest(), T);	2753 _movp(Inst->getDest(), T);

2667 }	2754 }

2668 #undef ALIGN_HACK

2669 } else {	2755 } else {

2670 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2756 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2671 // Spill the value to a stack slot and perform the insertion in	2757 // Spill the value to a stack slot and perform the insertion in

2672 // memory.	2758 // memory.

2673 //	2759 //

2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2760 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2675 // support for legalizing to mem is implemented.	2761 // support for legalizing to mem is implemented.

2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2762 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2677 Slot->setWeight(RegWeight::Zero);	2763 Slot->setWeight(RegWeight::Zero);

2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2764 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

(...skipping 941 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3620 Variable *Dest = Inst->getDest();	3706 Variable *Dest = Inst->getDest();

3621 Operand *SrcT = Inst->getTrueOperand();	3707 Operand *SrcT = Inst->getTrueOperand();

3622 Operand *SrcF = Inst->getFalseOperand();	3708 Operand *SrcF = Inst->getFalseOperand();

3623 Operand *Condition = Inst->getCondition();	3709 Operand *Condition = Inst->getCondition();

3624	3710

3625 if (isVectorType(Dest->getType())) {	3711 if (isVectorType(Dest->getType())) {

3626 Type SrcTy = SrcT->getType();	3712 Type SrcTy = SrcT->getType();

3627 Variable *T = makeReg(SrcTy);	3713 Variable *T = makeReg(SrcTy);

3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	3714 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	3715 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

3630 // ALIGNHACK: Until data alignment support is implemented, vector

3631 // instructions need to have vector operands in registers. Once

3632 // there is support for data alignment, LEGAL_HACK can be removed.

3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

3634 if (InstructionSet >= SSE4_1) {	3716 if (InstructionSet >= SSE4_1) {

3635 // TODO(wala): If the condition operand is a constant, use blendps	3717 // TODO(wala): If the condition operand is a constant, use blendps

3636 // or pblendw.	3718 // or pblendw.

3637 //	3719 //

3638 // Use blendvps or pblendvb to implement select.	3720 // Use blendvps or pblendvb to implement select.

3639 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3721 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3640 SrcTy == IceType_v4f32) {	3722 SrcTy == IceType_v4f32) {

3641 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3723 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);	3724 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);

3643 _movp(xmm0, ConditionRM);	3725 _movp(xmm0, ConditionRM);

3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));	3726 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));

3645 _movp(T, SrcFRM);	3727 _movp(T, SrcFRM);

3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0);	3728 _blendvps(T, SrcTRM, xmm0);

3647 _movp(Dest, T);	3729 _movp(Dest, T);

3648 } else {	3730 } else {

3649 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3731 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3732 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3651 : IceType_v16i8;	3733 : IceType_v16i8;

3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);	3734 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);

3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3735 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3654 _movp(T, SrcFRM);	3736 _movp(T, SrcFRM);

3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0);	3737 _pblendvb(T, SrcTRM, xmm0);

3656 _movp(Dest, T);	3738 _movp(Dest, T);

3657 }	3739 }

3658 return;	3740 return;

3659 }	3741 }

3660 // Lower select without SSE4.1:	3742 // Lower select without SSE4.1:

3661 // a=d?b:c ==>	3743 // a=d?b:c ==>

3662 // if elementtype(d) != i1:	3744 // if elementtype(d) != i1:

3663 // d=sext(d);	3745 // d=sext(d);

3664 // a=(b&d)\|(c&~d);	3746 // a=(b&d)\|(c&~d);

3665 Variable *T2 = makeReg(SrcTy);	3747 Variable *T2 = makeReg(SrcTy);

3666 // Sign extend the condition operand if applicable.	3748 // Sign extend the condition operand if applicable.

3667 if (SrcTy == IceType_v4f32) {	3749 if (SrcTy == IceType_v4f32) {

3668 // The sext operation takes only integer arguments.	3750 // The sext operation takes only integer arguments.

3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());	3751 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());

3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));	3752 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));

3671 _movp(T, T3);	3753 _movp(T, T3);

3672 } else if (typeElementType(SrcTy) != IceType_i1) {	3754 } else if (typeElementType(SrcTy) != IceType_i1) {

3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));	3755 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));

3674 } else {	3756 } else {

3675 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3757 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3676 _movp(T, ConditionRM);	3758 _movp(T, ConditionRM);

3677 }	3759 }

3678 _movp(T2, T);	3760 _movp(T2, T);

3679 _pand(T, LEGAL_HACK(SrcTRM));	3761 _pand(T, SrcTRM);

3680 _pandn(T2, LEGAL_HACK(SrcFRM));	3762 _pandn(T2, SrcFRM);

3681 _por(T, T2);	3763 _por(T, T2);

3682 _movp(Dest, T);	3764 _movp(Dest, T);

3683 #undef LEGAL_HACK

3684	3765

3685 return;	3766 return;

3686 }	3767 }

3687	3768

3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:	3769 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

3689 Operand *ConditionRMI = legalize(Condition);	3770 Operand *ConditionRMI = legalize(Condition);

3690 Constant *Zero = Ctx->getConstantZero(IceType_i32);	3771 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3691 InstX8632Label *Label = InstX8632Label::create(Func, this);	3772 InstX8632Label *Label = InstX8632Label::create(Func, this);

3692	3773

3693 if (Dest->getType() == IceType_i64) {	3774 if (Dest->getType() == IceType_i64) {

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4236 for (SizeT i = 0; i < Size; ++i) {	4317 for (SizeT i = 0; i < Size; ++i) {

4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4318 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

4238 }	4319 }

4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4320 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4240 }	4321 }

4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	4322 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

4242 << "\n";	4323 << "\n";

4243 }	4324 }

4244	4325

4245 } // end of namespace Ice	4326 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »