src/IceTargetLoweringX8632.cpp - Issue 465413003: Subzero: Align spill locations to natural alignment.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Don't sort everything. Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 111 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
122	122

123 // The maximum number of arguments to pass in XMM registers	123 // The maximum number of arguments to pass in XMM registers

124 const uint32_t X86_MAX_XMM_ARGS = 4;	124 const uint32_t X86_MAX_XMM_ARGS = 4;

125 // The number of bits in a byte	125 // The number of bits in a byte

126 const uint32_t X86_CHAR_BIT = 8;	126 const uint32_t X86_CHAR_BIT = 8;

127 // Stack alignment	127 // Stack alignment

128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;	128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

129 // Size of the return address on the stack	129 // Size of the return address on the stack

130 const uint32_t X86_RET_IP_SIZE_BYTES = 4;	130 const uint32_t X86_RET_IP_SIZE_BYTES = 4;

131	131

132 // Value is a size in bytes. Return Value adjusted to the next highest	132 // Value and Alignment are in bytes. Return Value adjusted to the next

133 // multiple of the stack alignment.	133 // highest multiple of Alignment.

	134 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {

	135 // power of 2

	136 assert((Alignment & (Alignment - 1)) == 0);

	137 return (Value + Alignment - 1) & -Alignment;

	138 }

	139

	140 // Value is in bytes. Return Value adjusted to the next highest multiple

	141 // of the stack alignment.

134 uint32_t applyStackAlignment(uint32_t Value) {	142 uint32_t applyStackAlignment(uint32_t Value) {

135 // power of 2	143 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;

138 }	144 }

139	145

140 // Instruction set options	146 // Instruction set options

141 namespace cl = ::llvm::cl;	147 namespace cl = ::llvm::cl;

142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(	148 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(

143 "mattr", cl::desc("X86 target attributes"),	149 "mattr", cl::desc("X86 target attributes"),

144 cl::init(TargetX8632::SSE2),	150 cl::init(TargetX8632::SSE2),

145 cl::values(	151 cl::values(

146 clEnumValN(TargetX8632::SSE2, "sse2",	152 clEnumValN(TargetX8632::SSE2, "sse2",

147 "Enable SSE2 instructions (default)"),	153 "Enable SSE2 instructions (default)"),

(...skipping 365 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);	519 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);

514 RegisterArg->setRegNum(RegNum);	520 RegisterArg->setRegNum(RegNum);

515 RegisterArg->setIsArg(Func);	521 RegisterArg->setIsArg(Func);

516 Arg->setIsArg(Func, false);	522 Arg->setIsArg(Func, false);

517	523

518 Args[I] = RegisterArg;	524 Args[I] = RegisterArg;

519 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	525 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

520 }	526 }

521 }	527 }

522	528

	529 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {

	530 typedef std::map<uint32_t, VarList> BucketMap;
	Jim Stichnoth 2014/08/13 23:59:34 Use a list<> or vector<> instead of map<> ? Use a list<> or vector<> instead of map<> ? Jim Stichnoth 2014/08/14 16:48:29 Never mind this comment, I misread the code. Show quoted text On 2014/08/13 23:59:34, stichnot wrote: > Use a list<> or vector<> instead of map<> ? Never mind this comment, I misread the code. wala 2014/08/14 17:31:24 There are only 4 possible buckets (corresponding t Show quoted text On 2014/08/13 23:59:34, stichnot wrote: > Use a list<> or vector<> instead of map<> ? There are only 4 possible buckets (corresponding to the type sizes). I ended up using an array. wala 2014/08/14 17:51:59 Actually, there are 3 usable buckets (4 bytes, 8 b Show quoted text On 2014/08/14 17:31:24, wala wrote: > On 2014/08/13 23:59:34, stichnot wrote: > > Use a list<> or vector<> instead of map<> ? > > There are only 4 possible buckets (corresponding to the type sizes). I ended up > using an array. Actually, there are 3 usable buckets (4 bytes, 8 bytes, 16 bytes).
	531 BucketMap Buckets;

	532

	533 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;

	534 ++I) {

	535 uint32_t NaturalAlignment = typeWidthInBytesOnStack((*I)->getType());

	536 Buckets[NaturalAlignment].push_back(*I);

	537 }

	538

	539 for (BucketMap::reverse_iterator I = Buckets.rbegin(), E = Buckets.rend();

	540 I != E; ++I) {

	541 VarList &List = I->second;

	542 Dest.insert(Dest.end(), List.begin(), List.end());

	543 }

	544 }

	545

523 // Helper function for addProlog().	546 // Helper function for addProlog().

524 //	547 //

525 // This assumes Arg is an argument passed on the stack. This sets the	548 // This assumes Arg is an argument passed on the stack. This sets the

526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's	549 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

527 // width. For an I64 arg that has been split into Lo and Hi components,	550 // width. For an I64 arg that has been split into Lo and Hi components,

528 // it calls itself recursively on the components, taking care to handle	551 // it calls itself recursively on the components, taking care to handle

529 // Lo first because of the little-endian architecture. Lastly, this	552 // Lo first because of the little-endian architecture. Lastly, this

530 // function generates an instruction to copy Arg into its assigned	553 // function generates an instruction to copy Arg into its assigned

531 // register if applicable.	554 // register if applicable.

532 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,	555 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
592 getRegisterSet(RegSet_CalleeSave, RegSet_None);	615 getRegisterSet(RegSet_CalleeSave, RegSet_None);

593	616

594 size_t GlobalsSize = 0;	617 size_t GlobalsSize = 0;

595 std::vector<size_t> LocalsSize(Func->getNumNodes());	618 std::vector<size_t> LocalsSize(Func->getNumNodes());

596	619

597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and	620 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

598 // LocalsSizeBytes.	621 // LocalsSizeBytes.

599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());	622 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

600 const VarList &Variables = Func->getVariables();	623 const VarList &Variables = Func->getVariables();

601 const VarList &Args = Func->getArgs();	624 const VarList &Args = Func->getArgs();

	625 VarList SpilledVariables, SortedSpilledVariables,

	626 VariablesLinkedToSpillSplots;

	627

	628 // If there is a separate locals area, this specifies the alignment

	629 // for it.

	630 uint32_t LocalsSlotsAlignmentBytes = 0;
	Jim Stichnoth 2014/08/13 23:59:34 At this point, the stack frame layout is starting At this point, the stack frame layout is starting to get complex enough that it would be nice to have a beautiful ascii art diagram of the frame like in this CL description, labeled with the local variables that contain the size of each section. wala 2014/08/14 17:31:24 Done. Show quoted text On 2014/08/13 23:59:34, stichnot wrote: > At this point, the stack frame layout is starting to get complex enough that it > would be nice to have a beautiful ascii art diagram of the frame like in this CL > description, labeled with the local variables that contain the size of each > section. Done.
	631 // The entire spill locations area gets aligned to largest natural

	632 // alignment of the variables that have a spill slot.

	633 uint32_t SpillAreaAlignmentBytes = 0;

602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	634 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

603 I != E; ++I) {	635 I != E; ++I) {

604 Variable Var = I;	636 Variable Var = I;

605 if (Var->hasReg()) {	637 if (Var->hasReg()) {

606 RegsUsed[Var->getRegNum()] = true;	638 RegsUsed[Var->getRegNum()] = true;

607 continue;	639 continue;

608 }	640 }

609 // An argument either does not need a stack slot (if passed in a	641 // An argument either does not need a stack slot (if passed in a

610 // register) or already has one (if passed on the stack).	642 // register) or already has one (if passed on the stack).

611 if (Var->getIsArg())	643 if (Var->getIsArg())

612 continue;	644 continue;

613 // An unreferenced variable doesn't need a stack slot.	645 // An unreferenced variable doesn't need a stack slot.

614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())	646 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

615 continue;	647 continue;

616 // A spill slot linked to a variable with a stack slot should reuse	648 // A spill slot linked to a variable with a stack slot should reuse

617 // that stack slot.	649 // that stack slot.

618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {	650 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

619 if (Variable *Linked = Var->getPreferredRegister()) {	651 if (Variable *Linked = Var->getPreferredRegister()) {

620 if (!Linked->hasReg())	652 if (!Linked->hasReg()) {

	653 VariablesLinkedToSpillSplots.push_back(Var);

621 continue;	654 continue;

	655 }

622 }	656 }

623 }	657 }

	658 SpilledVariables.push_back(Var);

	659 }

	660

	661 sortByAlignment(SortedSpilledVariables, SpilledVariables);

	662 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	663 E = SortedSpilledVariables.end();

	664 I != E; ++I) {

	665 Variable Var = I;

624 size_t Increment = typeWidthInBytesOnStack(Var->getType());	666 size_t Increment = typeWidthInBytesOnStack(Var->getType());

	667 if (!SpillAreaAlignmentBytes)

	668 SpillAreaAlignmentBytes = Increment;

625 if (SimpleCoalescing) {	669 if (SimpleCoalescing) {

626 if (Var->isMultiblockLife()) {	670 if (Var->isMultiblockLife()) {

627 GlobalsSize += Increment;	671 GlobalsSize += Increment;

628 } else {	672 } else {

629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	673 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

630 LocalsSize[NodeIndex] += Increment;	674 LocalsSize[NodeIndex] += Increment;

631 if (LocalsSize[NodeIndex] > LocalsSizeBytes)	675 if (LocalsSize[NodeIndex] > LocalsSizeBytes)

632 LocalsSizeBytes = LocalsSize[NodeIndex];	676 LocalsSizeBytes = LocalsSize[NodeIndex];

	677 if (!LocalsSlotsAlignmentBytes)

	678 LocalsSlotsAlignmentBytes = Increment;

633 }	679 }

634 } else {	680 } else {

635 LocalsSizeBytes += Increment;	681 LocalsSizeBytes += Increment;

636 }	682 }

637 }	683 }

638 LocalsSizeBytes += GlobalsSize;	684 LocalsSizeBytes += GlobalsSize;

639	685

640 // Add push instructions for preserved registers.	686 // Add push instructions for preserved registers.

641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	687 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

642 if (CalleeSaves[i] && RegsUsed[i]) {	688 if (CalleeSaves[i] && RegsUsed[i]) {

643 PreservedRegsSizeBytes += 4;	689 PreservedRegsSizeBytes += 4;

644 const bool SuppressStackAdjustment = true;	690 const bool SuppressStackAdjustment = true;

645 _push(getPhysicalRegister(i), SuppressStackAdjustment);	691 _push(getPhysicalRegister(i), SuppressStackAdjustment);

646 }	692 }

647 }	693 }

648	694

649 // Generate "push ebp; mov ebp, esp"	695 // Generate "push ebp; mov ebp, esp"

650 if (IsEbpBasedFrame) {	696 if (IsEbpBasedFrame) {

651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))	697 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

652 .count() == 0);	698 .count() == 0);

653 PreservedRegsSizeBytes += 4;	699 PreservedRegsSizeBytes += 4;

654 Variable *ebp = getPhysicalRegister(Reg_ebp);	700 Variable *ebp = getPhysicalRegister(Reg_ebp);

655 Variable *esp = getPhysicalRegister(Reg_esp);	701 Variable *esp = getPhysicalRegister(Reg_esp);

656 const bool SuppressStackAdjustment = true;	702 const bool SuppressStackAdjustment = true;

657 _push(ebp, SuppressStackAdjustment);	703 _push(ebp, SuppressStackAdjustment);

658 _mov(ebp, esp);	704 _mov(ebp, esp);

659 }	705 }

660	706

	707 // Align the variables area.

	708 uint32_t SpillAreaPaddingBytes = 0;

	709 if (SpillAreaAlignmentBytes) {

	710 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);

	711 uint32_t SpillAreaOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	712 uint32_t SpillAreaStart =

	713 applyAlignment(SpillAreaOffset, SpillAreaAlignmentBytes);

	714 SpillAreaPaddingBytes = SpillAreaStart - SpillAreaOffset;

	715 LocalsSizeBytes += SpillAreaPaddingBytes;

	716 }

	717

	718 // If there are separate globals and locals areas, make sure the

	719 // locals area is aligned by padding the end of the globals area.

	720 if (LocalsSlotsAlignmentBytes) {

	721 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	722 uint32_t NewGlobalsSize =

	723 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);

	724 GlobalsSize = NewGlobalsSize;

	725 LocalsSizeBytes += NewGlobalsSize - GlobalsSize;
	jvoung (off chromium) 2014/08/14 00:48:49 Isn't this difference always going to be zero? Do Isn't this difference always going to be zero? Do you want to move the assignment of GlobalsSize = NewGlobalsSize to below this += ? wala 2014/08/14 17:31:24 Ouch, I'm surprised all the tests still passed. D Show quoted text On 2014/08/14 00:48:49, jvoung wrote: > Isn't this difference always going to be zero? > > Do you want to move the assignment of GlobalsSize = NewGlobalsSize to below this > += ? Ouch, I'm surprised all the tests still passed. Done: The logic has been changed so that the padding size is properly added to LocalsSizeBytes. Also, the variables have been renamed for clarity. I've also added a unit test to exercise this case.
	726 }

	727

	728 // Align esp if necessary.

661 if (NeedsStackAlignment) {	729 if (NeedsStackAlignment) {

662 uint32_t StackSize = applyStackAlignment(	730 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);	731 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes);

664 LocalsSizeBytes =	732 LocalsSizeBytes = StackSize - StackOffset;

665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;

666 }	733 }

667	734

668 // Generate "sub esp, LocalsSizeBytes"	735 // Generate "sub esp, LocalsSizeBytes"

669 if (LocalsSizeBytes)	736 if (LocalsSizeBytes)

670 _sub(getPhysicalRegister(Reg_esp),	737 _sub(getPhysicalRegister(Reg_esp),

671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));	738 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

672	739

673 resetStackAdjustment();	740 resetStackAdjustment();

674	741

675 // Fill in stack offsets for stack args, and copy args into registers	742 // Fill in stack offsets for stack args, and copy args into registers

(...skipping 13 matching lines...) Expand all Loading...
689 continue;	756 continue;

690 }	757 }

691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);	758 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

692 }	759 }

693	760

694 // Fill in stack offsets for locals.	761 // Fill in stack offsets for locals.

695 size_t TotalGlobalsSize = GlobalsSize;	762 size_t TotalGlobalsSize = GlobalsSize;

696 GlobalsSize = 0;	763 GlobalsSize = 0;

697 LocalsSize.assign(LocalsSize.size(), 0);	764 LocalsSize.assign(LocalsSize.size(), 0);

698 size_t NextStackOffset = 0;	765 size_t NextStackOffset = 0;

699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();	766 for (VarList::const_iterator I = SortedSpilledVariables.begin(),

	767 E = SortedSpilledVariables.end();

700 I != E; ++I) {	768 I != E; ++I) {

701 Variable Var = I;	769 Variable Var = I;

702 if (Var->hasReg()) {

703 RegsUsed[Var->getRegNum()] = true;

704 continue;

705 }

706 if (Var->getIsArg())

707 continue;

708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())

709 continue;

710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {	770 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
	wala 2014/08/14 17:31:24 Since I've separated out the list of variables lin Since I've separated out the list of variables linked to a variable with a spill slot from the list of spilled variables, I don't think this statement is necessary, so I've removed it.
711 if (Variable *Linked = Var->getPreferredRegister()) {	771 if (Variable *Linked = Var->getPreferredRegister()) {

712 if (!Linked->hasReg()) {	772 if (!Linked->hasReg()) {

713 // TODO: Make sure Linked has already been assigned a stack	773 // TODO: Make sure Linked has already been assigned a stack

714 // slot.	774 // slot.

715 Var->setStackOffset(Linked->getStackOffset());	775 Var->setStackOffset(Linked->getStackOffset());

716 continue;	776 continue;

717 }	777 }

718 }	778 }

719 }	779 }

720 size_t Increment = typeWidthInBytesOnStack(Var->getType());	780 size_t Increment = typeWidthInBytesOnStack(Var->getType());

721 if (SimpleCoalescing) {	781 if (SimpleCoalescing) {

722 if (Var->isMultiblockLife()) {	782 if (Var->isMultiblockLife()) {

723 GlobalsSize += Increment;	783 GlobalsSize += Increment;

724 NextStackOffset = GlobalsSize;	784 NextStackOffset = GlobalsSize;

725 } else {	785 } else {

726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();	786 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

727 LocalsSize[NodeIndex] += Increment;	787 LocalsSize[NodeIndex] += Increment;

728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];	788 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];

729 }	789 }

730 } else {	790 } else {

731 NextStackOffset += Increment;	791 NextStackOffset += Increment;

732 }	792 }

733 if (IsEbpBasedFrame)	793 if (IsEbpBasedFrame)

734 Var->setStackOffset(-NextStackOffset);	794 Var->setStackOffset(-NextStackOffset - SpillAreaPaddingBytes);

735 else	795 else {
	Jim Stichnoth 2014/08/13 23:59:34 Probably best to have braces around both clauses o Probably best to have braces around both clauses or neither clause. wala 2014/08/14 17:31:24 Done. Show quoted text On 2014/08/13 23:59:34, stichnot wrote: > Probably best to have braces around both clauses or neither clause. Done.
736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);	796 Var->setStackOffset(LocalsSizeBytes - NextStackOffset -

	797 SpillAreaPaddingBytes);

	798 }

737 }	799 }

738 this->FrameSizeLocals = NextStackOffset;	800 this->FrameSizeLocals = NextStackOffset;

739 this->HasComputedFrame = true;	801 this->HasComputedFrame = true;

740	802

	803 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),

	804 E = VariablesLinkedToSpillSplots.end();

	805 I != E; ++I) {

	806 Variable Var = I;

	807 Variable *Linked = Var->getPreferredRegister();

	808 Var->setStackOffset(Linked->getStackOffset());

	809 }

	810

741 if (Func->getContext()->isVerbose(IceV_Frame)) {	811 if (Func->getContext()->isVerbose(IceV_Frame)) {

742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes	812 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes

743 << "\n"	813 << "\n"

744 << "InArgsSizeBytes=" << InArgsSizeBytes	814 << "InArgsSizeBytes=" << InArgsSizeBytes

745 << "\n"	815 << "\n"

746 << "PreservedRegsSizeBytes="	816 << "PreservedRegsSizeBytes="

747 << PreservedRegsSizeBytes << "\n";	817 << PreservedRegsSizeBytes << "\n";

748 }	818 }

749 }	819 }

750	820

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1054 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1055 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

986	1056

987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1057 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1058 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));	1059 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));

990 }	1060 }

991 if (ConstantInteger *ConstantTotalSize =	1061 if (ConstantInteger *ConstantTotalSize =

992 llvm::dyn_cast<ConstantInteger>(TotalSize)) {	1062 llvm::dyn_cast<ConstantInteger>(TotalSize)) {

993 uint32_t Value = ConstantTotalSize->getValue();	1063 uint32_t Value = ConstantTotalSize->getValue();

994 // Round Value up to the next highest multiple of the alignment.	1064 Value = applyAlignment(Value, Alignment);

995 Value = (Value + Alignment - 1) & -Alignment;

996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));	1065 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));

997 } else {	1066 } else {

998 // Non-constant sizes need to be adjusted to the next highest	1067 // Non-constant sizes need to be adjusted to the next highest

999 // multiple of the required alignment at runtime.	1068 // multiple of the required alignment at runtime.

1000 Variable *T = makeReg(IceType_i32);	1069 Variable *T = makeReg(IceType_i32);

1001 _mov(T, TotalSize);	1070 _mov(T, TotalSize);

1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));	1071 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));

1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));	1072 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));

1004 _sub(esp, T);	1073 _sub(esp, T);

1005 }	1074 }

(...skipping 226 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1232 case InstArithmetic::Fsub:	1301 case InstArithmetic::Fsub:

1233 case InstArithmetic::Fmul:	1302 case InstArithmetic::Fmul:

1234 case InstArithmetic::Fdiv:	1303 case InstArithmetic::Fdiv:

1235 case InstArithmetic::Frem:	1304 case InstArithmetic::Frem:

1236 llvm_unreachable("FP instruction with i64 type");	1305 llvm_unreachable("FP instruction with i64 type");

1237 break;	1306 break;

1238 }	1307 }

1239 } else if (isVectorType(Dest->getType())) {	1308 } else if (isVectorType(Dest->getType())) {

1240 // TODO: Trap on integer divide and integer modulo by zero.	1309 // TODO: Trap on integer divide and integer modulo by zero.

1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1310 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1242 //

1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in

1244 // registers. This is a workaround of the fact that there is no

1245 // support for aligning stack operands. Once there is support,

1246 // remove LEGAL_HACK.

1247 #define LEGAL_HACK(s) legalizeToVar((s))

1248 switch (Inst->getOp()) {	1311 switch (Inst->getOp()) {

1249 case InstArithmetic::_num:	1312 case InstArithmetic::_num:

1250 llvm_unreachable("Unknown arithmetic operator");	1313 llvm_unreachable("Unknown arithmetic operator");

1251 break;	1314 break;

1252 case InstArithmetic::Add: {	1315 case InstArithmetic::Add: {

1253 Variable *T = makeReg(Dest->getType());	1316 Variable *T = makeReg(Dest->getType());

1254 _movp(T, Src0);	1317 _movp(T, Src0);

1255 _padd(T, LEGAL_HACK(Src1));	1318 _padd(T, Src1);

1256 _movp(Dest, T);	1319 _movp(Dest, T);

1257 } break;	1320 } break;

1258 case InstArithmetic::And: {	1321 case InstArithmetic::And: {

1259 Variable *T = makeReg(Dest->getType());	1322 Variable *T = makeReg(Dest->getType());

1260 _movp(T, Src0);	1323 _movp(T, Src0);

1261 _pand(T, LEGAL_HACK(Src1));	1324 _pand(T, Src1);

1262 _movp(Dest, T);	1325 _movp(Dest, T);

1263 } break;	1326 } break;

1264 case InstArithmetic::Or: {	1327 case InstArithmetic::Or: {

1265 Variable *T = makeReg(Dest->getType());	1328 Variable *T = makeReg(Dest->getType());

1266 _movp(T, Src0);	1329 _movp(T, Src0);

1267 _por(T, LEGAL_HACK(Src1));	1330 _por(T, Src1);

1268 _movp(Dest, T);	1331 _movp(Dest, T);

1269 } break;	1332 } break;

1270 case InstArithmetic::Xor: {	1333 case InstArithmetic::Xor: {

1271 Variable *T = makeReg(Dest->getType());	1334 Variable *T = makeReg(Dest->getType());

1272 _movp(T, Src0);	1335 _movp(T, Src0);

1273 _pxor(T, LEGAL_HACK(Src1));	1336 _pxor(T, Src1);

1274 _movp(Dest, T);	1337 _movp(Dest, T);

1275 } break;	1338 } break;

1276 case InstArithmetic::Sub: {	1339 case InstArithmetic::Sub: {

1277 Variable *T = makeReg(Dest->getType());	1340 Variable *T = makeReg(Dest->getType());

1278 _movp(T, Src0);	1341 _movp(T, Src0);

1279 _psub(T, LEGAL_HACK(Src1));	1342 _psub(T, Src1);

1280 _movp(Dest, T);	1343 _movp(Dest, T);

1281 } break;	1344 } break;

1282 case InstArithmetic::Mul: {	1345 case InstArithmetic::Mul: {

1283 bool TypesAreValidForPmull =	1346 bool TypesAreValidForPmull =

1284 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;	1347 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1285 bool InstructionSetIsValidForPmull =	1348 bool InstructionSetIsValidForPmull =

1286 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;	1349 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;

1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1350 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1288 Variable *T = makeReg(Dest->getType());	1351 Variable *T = makeReg(Dest->getType());

1289 _movp(T, Src0);	1352 _movp(T, Src0);

1290 _pmull(T, LEGAL_HACK(Src1));	1353 _pmull(T, Src1);

1291 _movp(Dest, T);	1354 _movp(Dest, T);

1292 } else if (Dest->getType() == IceType_v4i32) {	1355 } else if (Dest->getType() == IceType_v4i32) {

1293 // Lowering sequence:	1356 // Lowering sequence:

1294 // Note: The mask arguments have index 0 on the left.	1357 // Note: The mask arguments have index 0 on the left.

1295 //	1358 //

1296 // movups T1, Src0	1359 // movups T1, Src0

1297 // pshufd T2, Src0, {1,0,3,0}	1360 // pshufd T2, Src0, {1,0,3,0}

1298 // pshufd T3, Src1, {1,0,3,0}	1361 // pshufd T3, Src1, {1,0,3,0}

1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}	1362 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1300 // pmuludq T1, Src1	1363 // pmuludq T1, Src1

(...skipping 12 matching lines...) Expand all Loading...
1313 // Dest[0, 2], Src[0, 2]	1376 // Dest[0, 2], Src[0, 2]

1314 const unsigned Mask0202 = 0x88;	1377 const unsigned Mask0202 = 0x88;

1315 // Mask that directs pshufd to create a vector with entries	1378 // Mask that directs pshufd to create a vector with entries

1316 // Src[0, 2, 1, 3]	1379 // Src[0, 2, 1, 3]

1317 const unsigned Mask0213 = 0xd8;	1380 const unsigned Mask0213 = 0xd8;

1318 Variable *T1 = makeReg(IceType_v4i32);	1381 Variable *T1 = makeReg(IceType_v4i32);

1319 Variable *T2 = makeReg(IceType_v4i32);	1382 Variable *T2 = makeReg(IceType_v4i32);

1320 Variable *T3 = makeReg(IceType_v4i32);	1383 Variable *T3 = makeReg(IceType_v4i32);

1321 Variable *T4 = makeReg(IceType_v4i32);	1384 Variable *T4 = makeReg(IceType_v4i32);

1322 _movp(T1, Src0);	1385 _movp(T1, Src0);

1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R	1386 _pshufd(T2, Src0, Mask1030);

1324 // with Src1 after stack operand alignment support is	1387 _pshufd(T3, Src1, Mask1030);

1325 // implemented.	1388 _pmuludq(T1, Src1);

1326 Variable *Src0R = LEGAL_HACK(Src0);

1327 Variable *Src1R = LEGAL_HACK(Src1);

1328 _pshufd(T2, Src0R, Mask1030);

1329 _pshufd(T3, Src1R, Mask1030);

1330 _pmuludq(T1, Src1R);

1331 _pmuludq(T2, T3);	1389 _pmuludq(T2, T3);

1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));	1390 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));

1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));	1391 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));

1334 _movp(Dest, T4);	1392 _movp(Dest, T4);

1335 } else {	1393 } else {

1336 assert(Dest->getType() == IceType_v16i8);	1394 assert(Dest->getType() == IceType_v16i8);

1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1395 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1338 }	1396 }

1339 } break;	1397 } break;

1340 case InstArithmetic::Shl:	1398 case InstArithmetic::Shl:

1341 case InstArithmetic::Lshr:	1399 case InstArithmetic::Lshr:

1342 case InstArithmetic::Ashr:	1400 case InstArithmetic::Ashr:

1343 case InstArithmetic::Udiv:	1401 case InstArithmetic::Udiv:

1344 case InstArithmetic::Urem:	1402 case InstArithmetic::Urem:

1345 case InstArithmetic::Sdiv:	1403 case InstArithmetic::Sdiv:

1346 case InstArithmetic::Srem:	1404 case InstArithmetic::Srem:

1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1405 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1348 break;	1406 break;

1349 case InstArithmetic::Fadd: {	1407 case InstArithmetic::Fadd: {

1350 Variable *T = makeReg(Dest->getType());	1408 Variable *T = makeReg(Dest->getType());

1351 _movp(T, Src0);	1409 _movp(T, Src0);

1352 _addps(T, LEGAL_HACK(Src1));	1410 _addps(T, Src1);

1353 _movp(Dest, T);	1411 _movp(Dest, T);

1354 } break;	1412 } break;

1355 case InstArithmetic::Fsub: {	1413 case InstArithmetic::Fsub: {

1356 Variable *T = makeReg(Dest->getType());	1414 Variable *T = makeReg(Dest->getType());

1357 _movp(T, Src0);	1415 _movp(T, Src0);

1358 _subps(T, LEGAL_HACK(Src1));	1416 _subps(T, Src1);

1359 _movp(Dest, T);	1417 _movp(Dest, T);

1360 } break;	1418 } break;

1361 case InstArithmetic::Fmul: {	1419 case InstArithmetic::Fmul: {

1362 Variable *T = makeReg(Dest->getType());	1420 Variable *T = makeReg(Dest->getType());

1363 _movp(T, Src0);	1421 _movp(T, Src0);

1364 _mulps(T, LEGAL_HACK(Src1));	1422 _mulps(T, Src1);

1365 _movp(Dest, T);	1423 _movp(Dest, T);

1366 } break;	1424 } break;

1367 case InstArithmetic::Fdiv: {	1425 case InstArithmetic::Fdiv: {

1368 Variable *T = makeReg(Dest->getType());	1426 Variable *T = makeReg(Dest->getType());

1369 _movp(T, Src0);	1427 _movp(T, Src0);

1370 _divps(T, LEGAL_HACK(Src1));	1428 _divps(T, Src1);

1371 _movp(Dest, T);	1429 _movp(Dest, T);

1372 } break;	1430 } break;

1373 case InstArithmetic::Frem:	1431 case InstArithmetic::Frem:

1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1432 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1375 break;	1433 break;

1376 }	1434 }

1377 #undef LEGAL_HACK

1378 } else { // Dest->getType() is non-i64 scalar	1435 } else { // Dest->getType() is non-i64 scalar

1379 Variable *T_edx = NULL;	1436 Variable *T_edx = NULL;

1380 Variable *T = NULL;	1437 Variable *T = NULL;

1381 switch (Inst->getOp()) {	1438 switch (Inst->getOp()) {

1382 case InstArithmetic::_num:	1439 case InstArithmetic::_num:

1383 llvm_unreachable("Unknown arithmetic operator");	1440 llvm_unreachable("Unknown arithmetic operator");

1384 break;	1441 break;

1385 case InstArithmetic::Add:	1442 case InstArithmetic::Add:

1386 _mov(T, Src0);	1443 _mov(T, Src0);

1387 _add(T, Src1);	1444 _add(T, Src1);

(...skipping 804 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2192 // TODO(wala): Determine the best lowering sequences for each type.	2249 // TODO(wala): Determine the best lowering sequences for each type.

2193 bool CanUsePextr =	2250 bool CanUsePextr =

2194 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2251 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2195 if (CanUsePextr && Ty != IceType_v4f32) {	2252 if (CanUsePextr && Ty != IceType_v4f32) {

2196 // Use pextrb, pextrw, or pextrd.	2253 // Use pextrb, pextrw, or pextrd.

2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2254 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2255 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2199 _pextr(ExtractedElementR, SourceVectR, Mask);	2256 _pextr(ExtractedElementR, SourceVectR, Mask);

2200 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2257 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2201 // Use pshufd and movd/movss.	2258 // Use pshufd and movd/movss.

2202 //

2203 // ALIGNHACK: Force vector operands to registers in instructions

2204 // that require aligned memory operands until support for data

2205 // alignment is implemented.

2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2207 Operand *SourceVectRM =

2208 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2209 Variable *T = NULL;	2259 Variable *T = NULL;

2210 if (Index) {	2260 if (Index) {

2211 // The shuffle only needs to occur if the element to be extracted	2261 // The shuffle only needs to occur if the element to be extracted

2212 // is not at the lowest index.	2262 // is not at the lowest index.

2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2263 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);

2214 T = makeReg(Ty);	2264 T = makeReg(Ty);

2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);	2265 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2216 } else {	2266 } else {

2217 T = ALIGN_HACK(SourceVectRM);	2267 T = legalizeToVar(SourceVectNotLegalized);

2218 }	2268 }

2219	2269

2220 if (InVectorElementTy == IceType_i32) {	2270 if (InVectorElementTy == IceType_i32) {

2221 _movd(ExtractedElementR, T);	2271 _movd(ExtractedElementR, T);

2222 } else { // Ty == Icetype_f32	2272 } else { // Ty == Icetype_f32

2223 // TODO(wala): _movss is only used here because _mov does not	2273 // TODO(wala): _movss is only used here because _mov does not

2224 // allow a vector source and a scalar destination. _mov should be	2274 // allow a vector source and a scalar destination. _mov should be

2225 // able to be used here.	2275 // able to be used here.

2226 // _movss is a binary instruction, so the FakeDef is needed to	2276 // _movss is a binary instruction, so the FakeDef is needed to

2227 // keep the live range analysis consistent.	2277 // keep the live range analysis consistent.

2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR));	2278 Context.insert(InstFakeDef::create(Func, ExtractedElementR));

2229 _movss(ExtractedElementR, T);	2279 _movss(ExtractedElementR, T);

2230 }	2280 }

2231 #undef ALIGN_HACK

2232 } else {	2281 } else {

2233 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2282 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2234 // Spill the value to a stack slot and do the extraction in memory.	2283 // Spill the value to a stack slot and do the extraction in memory.

2235 //	2284 //

2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2285 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2237 // support for legalizing to mem is implemented.	2286 // support for legalizing to mem is implemented.

2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2287 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2239 Slot->setWeight(RegWeight::Zero);	2288 Slot->setWeight(RegWeight::Zero);

2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2289 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

2241	2290

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2280	2329

2281 if (Condition == InstFcmp::True) {	2330 if (Condition == InstFcmp::True) {

2282 // makeVectorOfOnes() requires an integer vector type.	2331 // makeVectorOfOnes() requires an integer vector type.

2283 T = makeVectorOfMinusOnes(IceType_v4i32);	2332 T = makeVectorOfMinusOnes(IceType_v4i32);

2284 } else if (Condition == InstFcmp::False) {	2333 } else if (Condition == InstFcmp::False) {

2285 T = makeVectorOfZeros(Dest->getType());	2334 T = makeVectorOfZeros(Dest->getType());

2286 } else {	2335 } else {

2287 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2336 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2288 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2337 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2289	2338

2290 // ALIGNHACK: Without support for data alignment, both operands to

2291 // cmpps need to be forced into registers. Once support for data

2292 // alignment is implemented, remove LEGAL_HACK.

2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2294 switch (Condition) {	2339 switch (Condition) {

2295 default: {	2340 default: {

2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;	2341 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;

2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);	2342 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);

2298 T = makeReg(Src0RM->getType());	2343 T = makeReg(Src0RM->getType());

2299 _movp(T, Src0RM);	2344 _movp(T, Src0RM);

2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate);	2345 _cmpps(T, Src1RM, Predicate);

2301 } break;	2346 } break;

2302 case InstFcmp::One: {	2347 case InstFcmp::One: {

2303 // Check both unequal and ordered.	2348 // Check both unequal and ordered.

2304 T = makeReg(Src0RM->getType());	2349 T = makeReg(Src0RM->getType());

2305 Variable *T2 = makeReg(Src0RM->getType());	2350 Variable *T2 = makeReg(Src0RM->getType());

2306 Src1RM = LEGAL_HACK(Src1RM);

2307 _movp(T, Src0RM);	2351 _movp(T, Src0RM);

2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);	2352 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);

2309 _movp(T2, Src0RM);	2353 _movp(T2, Src0RM);

2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);	2354 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);

2311 _pand(T, T2);	2355 _pand(T, T2);

2312 } break;	2356 } break;

2313 case InstFcmp::Ueq: {	2357 case InstFcmp::Ueq: {

2314 // Check both equal or unordered.	2358 // Check both equal or unordered.

2315 T = makeReg(Src0RM->getType());	2359 T = makeReg(Src0RM->getType());

2316 Variable *T2 = makeReg(Src0RM->getType());	2360 Variable *T2 = makeReg(Src0RM->getType());

2317 Src1RM = LEGAL_HACK(Src1RM);

2318 _movp(T, Src0RM);	2361 _movp(T, Src0RM);

2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);	2362 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);

2320 _movp(T2, Src0RM);	2363 _movp(T2, Src0RM);

2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);	2364 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);

2322 _por(T, T2);	2365 _por(T, T2);

2323 } break;	2366 } break;

2324 }	2367 }

2325 #undef LEGAL_HACK

2326 }	2368 }

2327	2369

2328 _movp(Dest, T);	2370 _movp(Dest, T);

2329 eliminateNextVectorSextInstruction(Dest);	2371 eliminateNextVectorSextInstruction(Dest);

2330 return;	2372 return;

2331 }	2373 }

2332	2374

2333 // Lowering a = fcmp cond, b, c	2375 // Lowering a = fcmp cond, b, c

2334 // ucomiss b, c /* only if C1 != Br_None */	2376 // ucomiss b, c /* only if C1 != Br_None */

2335 // /* but swap b,c order if SwapOperands==true */	2377 // /* but swap b,c order if SwapOperands==true */

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2420 Variable *T1 = makeReg(Ty);	2462 Variable *T1 = makeReg(Ty);

2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);	2463 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

2422 _movp(T0, Src0RM);	2464 _movp(T0, Src0RM);

2423 _pxor(T0, HighOrderBits);	2465 _pxor(T0, HighOrderBits);

2424 _movp(T1, Src1RM);	2466 _movp(T1, Src1RM);

2425 _pxor(T1, HighOrderBits);	2467 _pxor(T1, HighOrderBits);

2426 Src0RM = T0;	2468 Src0RM = T0;

2427 Src1RM = T1;	2469 Src1RM = T1;

2428 }	2470 }

2429	2471

2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be

2431 // in registers until data alignment support is implemented. Once

2432 // there is support for data alignment, LEGAL_HACK can be removed.

2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

2434 Variable *T = makeReg(Ty);	2472 Variable *T = makeReg(Ty);

2435 switch (Condition) {	2473 switch (Condition) {

2436 default:	2474 default:

2437 llvm_unreachable("unexpected condition");	2475 llvm_unreachable("unexpected condition");

2438 break;	2476 break;

2439 case InstIcmp::Eq: {	2477 case InstIcmp::Eq: {

2440 _movp(T, Src0RM);	2478 _movp(T, Src0RM);

2441 _pcmpeq(T, LEGAL_HACK(Src1RM));	2479 _pcmpeq(T, Src1RM);

2442 } break;	2480 } break;

2443 case InstIcmp::Ne: {	2481 case InstIcmp::Ne: {

2444 _movp(T, Src0RM);	2482 _movp(T, Src0RM);

2445 _pcmpeq(T, LEGAL_HACK(Src1RM));	2483 _pcmpeq(T, Src1RM);

2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2484 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2447 _pxor(T, MinusOne);	2485 _pxor(T, MinusOne);

2448 } break;	2486 } break;

2449 case InstIcmp::Ugt:	2487 case InstIcmp::Ugt:

2450 case InstIcmp::Sgt: {	2488 case InstIcmp::Sgt: {

2451 _movp(T, Src0RM);	2489 _movp(T, Src0RM);

2452 _pcmpgt(T, LEGAL_HACK(Src1RM));	2490 _pcmpgt(T, Src1RM);

2453 } break;	2491 } break;

2454 case InstIcmp::Uge:	2492 case InstIcmp::Uge:

2455 case InstIcmp::Sge: {	2493 case InstIcmp::Sge: {

2456 // !(Src1RM > Src0RM)	2494 // !(Src1RM > Src0RM)

2457 _movp(T, Src1RM);	2495 _movp(T, Src1RM);

2458 _pcmpgt(T, LEGAL_HACK(Src0RM));	2496 _pcmpgt(T, Src0RM);

2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2497 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2460 _pxor(T, MinusOne);	2498 _pxor(T, MinusOne);

2461 } break;	2499 } break;

2462 case InstIcmp::Ult:	2500 case InstIcmp::Ult:

2463 case InstIcmp::Slt: {	2501 case InstIcmp::Slt: {

2464 _movp(T, Src1RM);	2502 _movp(T, Src1RM);

2465 _pcmpgt(T, LEGAL_HACK(Src0RM));	2503 _pcmpgt(T, Src0RM);

2466 } break;	2504 } break;

2467 case InstIcmp::Ule:	2505 case InstIcmp::Ule:

2468 case InstIcmp::Sle: {	2506 case InstIcmp::Sle: {

2469 // !(Src0RM > Src1RM)	2507 // !(Src0RM > Src1RM)

2470 _movp(T, Src0RM);	2508 _movp(T, Src0RM);

2471 _pcmpgt(T, LEGAL_HACK(Src1RM));	2509 _pcmpgt(T, Src1RM);

2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	2510 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

2473 _pxor(T, MinusOne);	2511 _pxor(T, MinusOne);

2474 } break;	2512 } break;

2475 }	2513 }

2476 #undef LEGAL_HACK

2477	2514

2478 _movp(Dest, T);	2515 _movp(Dest, T);

2479 eliminateNextVectorSextInstruction(Dest);	2516 eliminateNextVectorSextInstruction(Dest);

2480 return;	2517 return;

2481 }	2518 }

2482	2519

2483 // If Src1 is an immediate, or known to be a physical register, we can	2520 // If Src1 is an immediate, or known to be a physical register, we can

2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into	2521 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for	2522 // a physical register. (Actually, either Src0 or Src1 can be chosen for

2486 // the physical register, but unfortunately we have to commit to one or	2523 // the physical register, but unfortunately we have to commit to one or

(...skipping 155 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2642 // insertelement into index 3 (result is stored in T):	2679 // insertelement into index 3 (result is stored in T):

2643 // T := SourceVectRM	2680 // T := SourceVectRM

2644 // ElementR := ElementR[0, 0] T[0, 2]	2681 // ElementR := ElementR[0, 0] T[0, 2]

2645 // T := T[0, 1] ElementR[3, 0]	2682 // T := T[0, 1] ElementR[3, 0]

2646 const unsigned char Mask1[3] = {0, 192, 128};	2683 const unsigned char Mask1[3] = {0, 192, 128};

2647 const unsigned char Mask2[3] = {227, 196, 52};	2684 const unsigned char Mask2[3] = {227, 196, 52};

2648	2685

2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);	2686 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);

2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);	2687 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);

2651	2688

2652 // ALIGNHACK: Force vector operands to registers in instructions

2653 // that require aligned memory operands until support for data

2654 // alignment is implemented.

2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))

2656 if (Index == 1) {	2689 if (Index == 1) {

2657 SourceVectRM = ALIGN_HACK(SourceVectRM);

2658 _shufps(ElementR, SourceVectRM, Mask1Constant);	2690 _shufps(ElementR, SourceVectRM, Mask1Constant);

2659 _shufps(ElementR, SourceVectRM, Mask2Constant);	2691 _shufps(ElementR, SourceVectRM, Mask2Constant);

2660 _movp(Inst->getDest(), ElementR);	2692 _movp(Inst->getDest(), ElementR);

2661 } else {	2693 } else {

2662 Variable *T = makeReg(Ty);	2694 Variable *T = makeReg(Ty);

2663 _movp(T, SourceVectRM);	2695 _movp(T, SourceVectRM);

2664 _shufps(ElementR, T, Mask1Constant);	2696 _shufps(ElementR, T, Mask1Constant);

2665 _shufps(T, ElementR, Mask2Constant);	2697 _shufps(T, ElementR, Mask2Constant);

2666 _movp(Inst->getDest(), T);	2698 _movp(Inst->getDest(), T);

2667 }	2699 }

2668 #undef ALIGN_HACK

2669 } else {	2700 } else {

2670 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2701 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2671 // Spill the value to a stack slot and perform the insertion in	2702 // Spill the value to a stack slot and perform the insertion in

2672 // memory.	2703 // memory.

2673 //	2704 //

2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2705 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

2675 // support for legalizing to mem is implemented.	2706 // support for legalizing to mem is implemented.

2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode());	2707 Variable *Slot = Func->makeVariable(Ty, Context.getNode());

2677 Slot->setWeight(RegWeight::Zero);	2708 Slot->setWeight(RegWeight::Zero);

2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized));	2709 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

(...skipping 941 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3620 Variable *Dest = Inst->getDest();	3651 Variable *Dest = Inst->getDest();

3621 Operand *SrcT = Inst->getTrueOperand();	3652 Operand *SrcT = Inst->getTrueOperand();

3622 Operand *SrcF = Inst->getFalseOperand();	3653 Operand *SrcF = Inst->getFalseOperand();

3623 Operand *Condition = Inst->getCondition();	3654 Operand *Condition = Inst->getCondition();

3624	3655

3625 if (isVectorType(Dest->getType())) {	3656 if (isVectorType(Dest->getType())) {

3626 Type SrcTy = SrcT->getType();	3657 Type SrcTy = SrcT->getType();

3627 Variable *T = makeReg(SrcTy);	3658 Variable *T = makeReg(SrcTy);

3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	3659 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	3660 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

3630 // ALIGNHACK: Until data alignment support is implemented, vector

3631 // instructions need to have vector operands in registers. Once

3632 // there is support for data alignment, LEGAL_HACK can be removed.

3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))

3634 if (InstructionSet >= SSE4_1) {	3661 if (InstructionSet >= SSE4_1) {

3635 // TODO(wala): If the condition operand is a constant, use blendps	3662 // TODO(wala): If the condition operand is a constant, use blendps

3636 // or pblendw.	3663 // or pblendw.

3637 //	3664 //

3638 // Use blendvps or pblendvb to implement select.	3665 // Use blendvps or pblendvb to implement select.

3639 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3666 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3640 SrcTy == IceType_v4f32) {	3667 SrcTy == IceType_v4f32) {

3641 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3668 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);	3669 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);

3643 _movp(xmm0, ConditionRM);	3670 _movp(xmm0, ConditionRM);

3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));	3671 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));

3645 _movp(T, SrcFRM);	3672 _movp(T, SrcFRM);

3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0);	3673 _blendvps(T, SrcTRM, xmm0);

3647 _movp(Dest, T);	3674 _movp(Dest, T);

3648 } else {	3675 } else {

3649 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3676 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3677 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3651 : IceType_v16i8;	3678 : IceType_v16i8;

3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);	3679 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);

3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3680 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3654 _movp(T, SrcFRM);	3681 _movp(T, SrcFRM);

3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0);	3682 _pblendvb(T, SrcTRM, xmm0);

3656 _movp(Dest, T);	3683 _movp(Dest, T);

3657 }	3684 }

3658 return;	3685 return;

3659 }	3686 }

3660 // Lower select without SSE4.1:	3687 // Lower select without SSE4.1:

3661 // a=d?b:c ==>	3688 // a=d?b:c ==>

3662 // if elementtype(d) != i1:	3689 // if elementtype(d) != i1:

3663 // d=sext(d);	3690 // d=sext(d);

3664 // a=(b&d)\|(c&~d);	3691 // a=(b&d)\|(c&~d);

3665 Variable *T2 = makeReg(SrcTy);	3692 Variable *T2 = makeReg(SrcTy);

3666 // Sign extend the condition operand if applicable.	3693 // Sign extend the condition operand if applicable.

3667 if (SrcTy == IceType_v4f32) {	3694 if (SrcTy == IceType_v4f32) {

3668 // The sext operation takes only integer arguments.	3695 // The sext operation takes only integer arguments.

3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());	3696 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());

3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));	3697 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));

3671 _movp(T, T3);	3698 _movp(T, T3);

3672 } else if (typeElementType(SrcTy) != IceType_i1) {	3699 } else if (typeElementType(SrcTy) != IceType_i1) {

3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));	3700 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));

3674 } else {	3701 } else {

3675 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3702 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3676 _movp(T, ConditionRM);	3703 _movp(T, ConditionRM);

3677 }	3704 }

3678 _movp(T2, T);	3705 _movp(T2, T);

3679 _pand(T, LEGAL_HACK(SrcTRM));	3706 _pand(T, SrcTRM);

3680 _pandn(T2, LEGAL_HACK(SrcFRM));	3707 _pandn(T2, SrcFRM);

3681 _por(T, T2);	3708 _por(T, T2);

3682 _movp(Dest, T);	3709 _movp(Dest, T);

3683 #undef LEGAL_HACK

3684	3710

3685 return;	3711 return;

3686 }	3712 }

3687	3713

3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:	3714 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

3689 Operand *ConditionRMI = legalize(Condition);	3715 Operand *ConditionRMI = legalize(Condition);

3690 Constant *Zero = Ctx->getConstantZero(IceType_i32);	3716 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3691 InstX8632Label *Label = InstX8632Label::create(Func, this);	3717 InstX8632Label *Label = InstX8632Label::create(Func, this);

3692	3718

3693 if (Dest->getType() == IceType_i64) {	3719 if (Dest->getType() == IceType_i64) {

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4236 for (SizeT i = 0; i < Size; ++i) {	4262 for (SizeT i = 0; i < Size; ++i) {

4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4263 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

4238 }	4264 }

4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4265 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4240 }	4266 }

4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName	4267 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName

4242 << "\n";	4268 << "\n";

4243 }	4269 }

4244	4270

4245 } // end of namespace Ice	4271 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('J')