Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: LocalsSizeBytes -> SpillAreaSizeBytes, local variables -> allocas Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "IceDefs.h" 18 #include "IceDefs.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h" 21 #include "IceInstX8632.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def" 23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h" 24 #include "IceTargetLoweringX8632.h"
25 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/CommandLine.h"
26 26
27 #include <strings.h>
28
27 namespace Ice { 29 namespace Ice {
28 30
29 namespace { 31 namespace {
30 32
31 // The following table summarizes the logic for lowering the fcmp 33 // The following table summarizes the logic for lowering the fcmp
32 // instruction. There is one table entry for each of the 16 conditions. 34 // instruction. There is one table entry for each of the 16 conditions.
33 // 35 //
34 // The first four columns describe the case when the operands are 36 // The first four columns describe the case when the operands are
35 // floating point scalar values. A comment in lowerFcmp() describes the 37 // floating point scalar values. A comment in lowerFcmp() describes the
36 // lowering template. In the most general case, there is a compare 38 // lowering template. In the most general case, there is a compare
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 } 123 }
122 124
123 // The maximum number of arguments to pass in XMM registers 125 // The maximum number of arguments to pass in XMM registers
124 const uint32_t X86_MAX_XMM_ARGS = 4; 126 const uint32_t X86_MAX_XMM_ARGS = 4;
125 // The number of bits in a byte 127 // The number of bits in a byte
126 const uint32_t X86_CHAR_BIT = 8; 128 const uint32_t X86_CHAR_BIT = 8;
127 // Stack alignment 129 // Stack alignment
128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
129 // Size of the return address on the stack 131 // Size of the return address on the stack
130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; 132 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
133 // The base 2 logarithm of the width in bytes of the smallest stack slot
134 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
135 // The base 2 logarithm of the width in bytes of the largest stack slot
136 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
131 137
132 // Value is a size in bytes. Return Value adjusted to the next highest 138 // Value and Alignment are in bytes. Return Value adjusted to the next
133 // multiple of the stack alignment. 139 // highest multiple of Alignment.
140 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
141 // power of 2
142 assert((Alignment & (Alignment - 1)) == 0);
143 return (Value + Alignment - 1) & -Alignment;
144 }
145
146 // Value is in bytes. Return Value adjusted to the next highest multiple
147 // of the stack alignment.
134 uint32_t applyStackAlignment(uint32_t Value) { 148 uint32_t applyStackAlignment(uint32_t Value) {
135 // power of 2 149 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
138 } 150 }
139 151
140 // Instruction set options 152 // Instruction set options
141 namespace cl = ::llvm::cl; 153 namespace cl = ::llvm::cl;
142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( 154 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
143 "mattr", cl::desc("X86 target attributes"), 155 "mattr", cl::desc("X86 target attributes"),
144 cl::init(TargetX8632::SSE2), 156 cl::init(TargetX8632::SSE2),
145 cl::values( 157 cl::values(
146 clEnumValN(TargetX8632::SSE2, "sse2", 158 clEnumValN(TargetX8632::SSE2, "sse2",
147 "Enable SSE2 instructions (default)"), 159 "Enable SSE2 instructions (default)"),
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
241 ICETYPE_TABLE; 253 ICETYPE_TABLE;
242 #undef X 254 #undef X
243 } 255 }
244 } 256 }
245 257
246 } // end of anonymous namespace 258 } // end of anonymous namespace
247 259
248 TargetX8632::TargetX8632(Cfg *Func) 260 TargetX8632::TargetX8632(Cfg *Func)
249 : TargetLowering(Func), InstructionSet(CLInstructionSet), 261 : TargetLowering(Func), InstructionSet(CLInstructionSet),
250 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), 262 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
251 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), 263 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
252 PhysicalRegisters(VarList(Reg_NUM)) { 264 PhysicalRegisters(VarList(Reg_NUM)) {
253 // TODO: Don't initialize IntegerRegisters and friends every time. 265 // TODO: Don't initialize IntegerRegisters and friends every time.
254 // Instead, initialize in some sort of static initializer for the 266 // Instead, initialize in some sort of static initializer for the
255 // class. 267 // class.
256 llvm::SmallBitVector IntegerRegisters(Reg_NUM); 268 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
257 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); 269 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
258 llvm::SmallBitVector FloatRegisters(Reg_NUM); 270 llvm::SmallBitVector FloatRegisters(Reg_NUM);
259 llvm::SmallBitVector VectorRegisters(Reg_NUM); 271 llvm::SmallBitVector VectorRegisters(Reg_NUM);
260 llvm::SmallBitVector InvalidRegisters(Reg_NUM); 272 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
261 ScratchRegs.resize(Reg_NUM); 273 ScratchRegs.resize(Reg_NUM);
(...skipping 251 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); 525 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
514 RegisterArg->setRegNum(RegNum); 526 RegisterArg->setRegNum(RegNum);
515 RegisterArg->setIsArg(Func); 527 RegisterArg->setIsArg(Func);
516 Arg->setIsArg(Func, false); 528 Arg->setIsArg(Func, false);
517 529
518 Args[I] = RegisterArg; 530 Args[I] = RegisterArg;
519 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 531 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
520 } 532 }
521 } 533 }
522 534
535 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
536 // Sort the variables into buckets according to the log of their width
537 // in bytes.
538 const SizeT NumBuckets =
539 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
540 VarList Buckets[NumBuckets];
541
542 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;
543 ++I) {
544 Variable *Var = *I;
545 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
546 SizeT LogNaturalAlignment = ffs(NaturalAlignment) - 1;
547 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
548 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
549 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
550 Buckets[BucketIndex].push_back(Var);
551 }
552
553 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
554 VarList &List = Buckets[NumBuckets - I - 1];
555 Dest.insert(Dest.end(), List.begin(), List.end());
556 }
557 }
558
523 // Helper function for addProlog(). 559 // Helper function for addProlog().
524 // 560 //
525 // This assumes Arg is an argument passed on the stack. This sets the 561 // This assumes Arg is an argument passed on the stack. This sets the
526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 562 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
527 // width. For an I64 arg that has been split into Lo and Hi components, 563 // width. For an I64 arg that has been split into Lo and Hi components,
528 // it calls itself recursively on the components, taking care to handle 564 // it calls itself recursively on the components, taking care to handle
529 // Lo first because of the little-endian architecture. Lastly, this 565 // Lo first because of the little-endian architecture. Lastly, this
530 // function generates an instruction to copy Arg into its assigned 566 // function generates an instruction to copy Arg into its assigned
531 // register if applicable. 567 // register if applicable.
532 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 568 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
(...skipping 23 matching lines...) Expand all
556 _movp(Arg, Mem); 592 _movp(Arg, Mem);
557 } else { 593 } else {
558 _mov(Arg, Mem); 594 _mov(Arg, Mem);
559 } 595 }
560 } 596 }
561 } 597 }
562 598
563 Type TargetX8632::stackSlotType() { return IceType_i32; } 599 Type TargetX8632::stackSlotType() { return IceType_i32; }
564 600
565 void TargetX8632::addProlog(CfgNode *Node) { 601 void TargetX8632::addProlog(CfgNode *Node) {
602 // Stack frame layout:
603 //
604 // +------------------------+
605 // | 1. return address |
606 // +------------------------+
607 // | 2. preserved registers |
608 // +------------------------+
609 // | 3. padding |
610 // +------------------------+
611 // | 4. global spill area |
612 // +------------------------+
613 // | 5. padding |
614 // +------------------------+
615 // | 6. local spill area |
616 // +------------------------+
617 // | 7. padding |
618 // +------------------------+
619 // | 8. allocas |
620 // +------------------------+
621 //
622 // The following variables record the size in bytes of the given areas:
623 // * X86_RET_IP_SIZE_BYTES: area 1
624 // * PreservedRegsSizeBytes: area 2
625 // * SpillAreaPaddingBytes: area 3
626 // * GlobalsSize: area 4
627 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
628 // * LocalsSpillAreaSize: area 6
629 // * SpillAreaSizeBytes: areas 3 - 7
630
566 // If SimpleCoalescing is false, each variable without a register 631 // If SimpleCoalescing is false, each variable without a register
567 // gets its own unique stack slot, which leads to large stack 632 // gets its own unique stack slot, which leads to large stack
568 // frames. If SimpleCoalescing is true, then each "global" variable 633 // frames. If SimpleCoalescing is true, then each "global" variable
569 // without a register gets its own slot, but "local" variable slots 634 // without a register gets its own slot, but "local" variable slots
570 // are reused across basic blocks. E.g., if A and B are local to 635 // are reused across basic blocks. E.g., if A and B are local to
571 // block 1 and C is local to block 2, then C may share a slot with A 636 // block 1 and C is local to block 2, then C may share a slot with A
572 // or B. 637 // or B.
573 const bool SimpleCoalescing = true; 638 const bool SimpleCoalescing = true;
574 size_t InArgsSizeBytes = 0; 639 size_t InArgsSizeBytes = 0;
575 size_t PreservedRegsSizeBytes = 0; 640 size_t PreservedRegsSizeBytes = 0;
576 LocalsSizeBytes = 0; 641 SpillAreaSizeBytes = 0;
577 Context.init(Node); 642 Context.init(Node);
578 Context.setInsertPoint(Context.getCur()); 643 Context.setInsertPoint(Context.getCur());
579 644
580 // Determine stack frame offsets for each Variable without a 645 // Determine stack frame offsets for each Variable without a
581 // register assignment. This can be done as one variable per stack 646 // register assignment. This can be done as one variable per stack
582 // slot. Or, do coalescing by running the register allocator again 647 // slot. Or, do coalescing by running the register allocator again
583 // with an infinite set of registers (as a side effect, this gives 648 // with an infinite set of registers (as a side effect, this gives
584 // variables a second chance at physical register assignment). 649 // variables a second chance at physical register assignment).
585 // 650 //
586 // A middle ground approach is to leverage sparsity and allocate one 651 // A middle ground approach is to leverage sparsity and allocate one
587 // block of space on the frame for globals (variables with 652 // block of space on the frame for globals (variables with
588 // multi-block lifetime), and one block to share for locals 653 // multi-block lifetime), and one block to share for locals
589 // (single-block lifetime). 654 // (single-block lifetime).
590 655
591 llvm::SmallBitVector CalleeSaves = 656 llvm::SmallBitVector CalleeSaves =
592 getRegisterSet(RegSet_CalleeSave, RegSet_None); 657 getRegisterSet(RegSet_CalleeSave, RegSet_None);
593 658
594 size_t GlobalsSize = 0; 659 size_t GlobalsSize = 0;
595 std::vector<size_t> LocalsSize(Func->getNumNodes()); 660 std::vector<size_t> LocalsSize(Func->getNumNodes());
596 661
597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and 662 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
598 // LocalsSizeBytes. 663 // SpillAreaSizeBytes.
599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 664 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
600 const VarList &Variables = Func->getVariables(); 665 const VarList &Variables = Func->getVariables();
601 const VarList &Args = Func->getArgs(); 666 const VarList &Args = Func->getArgs();
667 VarList SpilledVariables, SortedSpilledVariables,
668 VariablesLinkedToSpillSplots;
669
670 // If there is a separate locals area, this specifies the alignment
671 // for it.
672 uint32_t LocalsSlotsAlignmentBytes = 0;
673 // The entire spill locations area gets aligned to largest natural
674 // alignment of the variables that have a spill slot.
675 uint32_t SpillAreaAlignmentBytes = 0;
602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 676 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
603 I != E; ++I) { 677 I != E; ++I) {
604 Variable *Var = *I; 678 Variable *Var = *I;
605 if (Var->hasReg()) { 679 if (Var->hasReg()) {
606 RegsUsed[Var->getRegNum()] = true; 680 RegsUsed[Var->getRegNum()] = true;
607 continue; 681 continue;
608 } 682 }
609 // An argument either does not need a stack slot (if passed in a 683 // An argument either does not need a stack slot (if passed in a
610 // register) or already has one (if passed on the stack). 684 // register) or already has one (if passed on the stack).
611 if (Var->getIsArg()) 685 if (Var->getIsArg())
612 continue; 686 continue;
613 // An unreferenced variable doesn't need a stack slot. 687 // An unreferenced variable doesn't need a stack slot.
614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) 688 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
615 continue; 689 continue;
616 // A spill slot linked to a variable with a stack slot should reuse 690 // A spill slot linked to a variable with a stack slot should reuse
617 // that stack slot. 691 // that stack slot.
618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { 692 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
619 if (Variable *Linked = Var->getPreferredRegister()) { 693 if (Variable *Linked = Var->getPreferredRegister()) {
620 if (!Linked->hasReg()) 694 if (!Linked->hasReg()) {
695 VariablesLinkedToSpillSplots.push_back(Var);
621 continue; 696 continue;
697 }
622 } 698 }
623 } 699 }
700 SpilledVariables.push_back(Var);
701 }
702
703 SortedSpilledVariables.reserve(SpilledVariables.size());
704 sortByAlignment(SortedSpilledVariables, SpilledVariables);
705 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
706 E = SortedSpilledVariables.end();
707 I != E; ++I) {
708 Variable *Var = *I;
624 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 709 size_t Increment = typeWidthInBytesOnStack(Var->getType());
710 if (!SpillAreaAlignmentBytes)
711 SpillAreaAlignmentBytes = Increment;
625 if (SimpleCoalescing) { 712 if (SimpleCoalescing) {
626 if (Var->isMultiblockLife()) { 713 if (Var->isMultiblockLife()) {
627 GlobalsSize += Increment; 714 GlobalsSize += Increment;
628 } else { 715 } else {
629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 716 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
630 LocalsSize[NodeIndex] += Increment; 717 LocalsSize[NodeIndex] += Increment;
631 if (LocalsSize[NodeIndex] > LocalsSizeBytes) 718 if (LocalsSize[NodeIndex] > SpillAreaSizeBytes)
632 LocalsSizeBytes = LocalsSize[NodeIndex]; 719 SpillAreaSizeBytes = LocalsSize[NodeIndex];
720 if (!LocalsSlotsAlignmentBytes)
721 LocalsSlotsAlignmentBytes = Increment;
633 } 722 }
634 } else { 723 } else {
635 LocalsSizeBytes += Increment; 724 SpillAreaSizeBytes += Increment;
636 } 725 }
637 } 726 }
638 LocalsSizeBytes += GlobalsSize; 727 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
728
729 SpillAreaSizeBytes += GlobalsSize;
639 730
640 // Add push instructions for preserved registers. 731 // Add push instructions for preserved registers.
641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 732 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
642 if (CalleeSaves[i] && RegsUsed[i]) { 733 if (CalleeSaves[i] && RegsUsed[i]) {
643 PreservedRegsSizeBytes += 4; 734 PreservedRegsSizeBytes += 4;
644 const bool SuppressStackAdjustment = true; 735 const bool SuppressStackAdjustment = true;
645 _push(getPhysicalRegister(i), SuppressStackAdjustment); 736 _push(getPhysicalRegister(i), SuppressStackAdjustment);
646 } 737 }
647 } 738 }
648 739
649 // Generate "push ebp; mov ebp, esp" 740 // Generate "push ebp; mov ebp, esp"
650 if (IsEbpBasedFrame) { 741 if (IsEbpBasedFrame) {
651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 742 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
652 .count() == 0); 743 .count() == 0);
653 PreservedRegsSizeBytes += 4; 744 PreservedRegsSizeBytes += 4;
654 Variable *ebp = getPhysicalRegister(Reg_ebp); 745 Variable *ebp = getPhysicalRegister(Reg_ebp);
655 Variable *esp = getPhysicalRegister(Reg_esp); 746 Variable *esp = getPhysicalRegister(Reg_esp);
656 const bool SuppressStackAdjustment = true; 747 const bool SuppressStackAdjustment = true;
657 _push(ebp, SuppressStackAdjustment); 748 _push(ebp, SuppressStackAdjustment);
658 _mov(ebp, esp); 749 _mov(ebp, esp);
659 } 750 }
660 751
661 if (NeedsStackAlignment) { 752 // Align the variables area. SpillAreaPaddingBytes is the size of
662 uint32_t StackSize = applyStackAlignment( 753 // the region after the preserved registers and before the spill
663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); 754 // areas.
664 LocalsSizeBytes = 755 uint32_t SpillAreaPaddingBytes = 0;
665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; 756 if (SpillAreaAlignmentBytes) {
757 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
758 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
759 uint32_t SpillAreaStart =
760 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
761 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
762 SpillAreaSizeBytes += SpillAreaPaddingBytes;
666 } 763 }
667 764
668 // Generate "sub esp, LocalsSizeBytes" 765 // If there are separate globals and locals areas, make sure the
669 if (LocalsSizeBytes) 766 // locals area is aligned by padding the end of the globals area.
767 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
768 if (LocalsSlotsAlignmentBytes) {
769 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
770 GlobalsAndSubsequentPaddingSize =
771 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
772 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
773 }
774
775 // Align esp if necessary.
776 if (NeedsStackAlignment) {
777 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
778 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
779 SpillAreaSizeBytes = StackSize - StackOffset;
780 }
781
782 // Generate "sub esp, SpillAreaSizeBytes"
783 if (SpillAreaSizeBytes)
670 _sub(getPhysicalRegister(Reg_esp), 784 _sub(getPhysicalRegister(Reg_esp),
671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 785 Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));
672 786
673 resetStackAdjustment(); 787 resetStackAdjustment();
674 788
675 // Fill in stack offsets for stack args, and copy args into registers 789 // Fill in stack offsets for stack args, and copy args into registers
676 // for those that were register-allocated. Args are pushed right to 790 // for those that were register-allocated. Args are pushed right to
677 // left, so Arg[0] is closest to the stack/frame pointer. 791 // left, so Arg[0] is closest to the stack/frame pointer.
678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 792 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 793 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
680 if (!IsEbpBasedFrame) 794 if (!IsEbpBasedFrame)
681 BasicFrameOffset += LocalsSizeBytes; 795 BasicFrameOffset += SpillAreaSizeBytes;
682 796
683 unsigned NumXmmArgs = 0; 797 unsigned NumXmmArgs = 0;
684 for (SizeT i = 0; i < Args.size(); ++i) { 798 for (SizeT i = 0; i < Args.size(); ++i) {
685 Variable *Arg = Args[i]; 799 Variable *Arg = Args[i];
686 // Skip arguments passed in registers. 800 // Skip arguments passed in registers.
687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 801 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
688 ++NumXmmArgs; 802 ++NumXmmArgs;
689 continue; 803 continue;
690 } 804 }
691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 805 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
692 } 806 }
693 807
694 // Fill in stack offsets for locals. 808 // Fill in stack offsets for locals.
695 size_t TotalGlobalsSize = GlobalsSize; 809 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
696 GlobalsSize = 0;
697 LocalsSize.assign(LocalsSize.size(), 0); 810 LocalsSize.assign(LocalsSize.size(), 0);
698 size_t NextStackOffset = 0; 811 size_t NextStackOffset = GlobalsSpaceUsed;
699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 812 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
813 E = SortedSpilledVariables.end();
700 I != E; ++I) { 814 I != E; ++I) {
701 Variable *Var = *I; 815 Variable *Var = *I;
702 if (Var->hasReg()) {
703 RegsUsed[Var->getRegNum()] = true;
704 continue;
705 }
706 if (Var->getIsArg())
707 continue;
708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
709 continue;
710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
711 if (Variable *Linked = Var->getPreferredRegister()) {
712 if (!Linked->hasReg()) {
713 // TODO: Make sure Linked has already been assigned a stack
714 // slot.
715 Var->setStackOffset(Linked->getStackOffset());
716 continue;
717 }
718 }
719 }
720 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 816 size_t Increment = typeWidthInBytesOnStack(Var->getType());
721 if (SimpleCoalescing) { 817 if (SimpleCoalescing) {
722 if (Var->isMultiblockLife()) { 818 if (Var->isMultiblockLife()) {
723 GlobalsSize += Increment; 819 GlobalsSpaceUsed += Increment;
724 NextStackOffset = GlobalsSize; 820 NextStackOffset = GlobalsSpaceUsed;
725 } else { 821 } else {
726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 822 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
727 LocalsSize[NodeIndex] += Increment; 823 LocalsSize[NodeIndex] += Increment;
728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; 824 NextStackOffset = SpillAreaPaddingBytes +
825 GlobalsAndSubsequentPaddingSize +
826 LocalsSize[NodeIndex];
729 } 827 }
730 } else { 828 } else {
731 NextStackOffset += Increment; 829 NextStackOffset += Increment;
732 } 830 }
733 if (IsEbpBasedFrame) 831 if (IsEbpBasedFrame)
734 Var->setStackOffset(-NextStackOffset); 832 Var->setStackOffset(-NextStackOffset);
735 else 833 else
736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); 834 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
737 } 835 }
738 this->FrameSizeLocals = NextStackOffset; 836 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
739 this->HasComputedFrame = true; 837 this->HasComputedFrame = true;
740 838
839 // Assign stack offsets to variables that have been linked to spilled
840 // variables.
841 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),
842 E = VariablesLinkedToSpillSplots.end();
843 I != E; ++I) {
844 Variable *Var = *I;
845 Variable *Linked = Var->getPreferredRegister();
846 Var->setStackOffset(Linked->getStackOffset());
847 }
848
741 if (Func->getContext()->isVerbose(IceV_Frame)) { 849 if (Func->getContext()->isVerbose(IceV_Frame)) {
742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes 850 Ostream &Str = Func->getContext()->getStrDump();
743 << "\n" 851
744 << "InArgsSizeBytes=" << InArgsSizeBytes 852 Str << "Stack layout:\n";
745 << "\n" 853 uint32_t EspAdjustmentPaddingSize =
746 << "PreservedRegsSizeBytes=" 854 SpillAreaSizeBytes - LocalsSpillAreaSize -
747 << PreservedRegsSizeBytes << "\n"; 855 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
856 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
857 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
858 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
859 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
860 << " globals spill area = " << GlobalsSize << " bytes\n"
861 << " globals-locals spill areas intermediate padding = "
862 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
863 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
864 << " esp alignment padding = " << EspAdjustmentPaddingSize
865 << " bytes\n";
866
867 Str << "Stack details:\n"
868 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
869 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
870 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
871 << " bytes\n"
872 << " is ebp based = " << IsEbpBasedFrame << "\n";
748 } 873 }
749 } 874 }
750 875
751 void TargetX8632::addEpilog(CfgNode *Node) { 876 void TargetX8632::addEpilog(CfgNode *Node) {
752 InstList &Insts = Node->getInsts(); 877 InstList &Insts = Node->getInsts();
753 InstList::reverse_iterator RI, E; 878 InstList::reverse_iterator RI, E;
754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 879 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
755 if (llvm::isa<InstX8632Ret>(*RI)) 880 if (llvm::isa<InstX8632Ret>(*RI))
756 break; 881 break;
757 } 882 }
758 if (RI == E) 883 if (RI == E)
759 return; 884 return;
760 885
761 // Convert the reverse_iterator position into its corresponding 886 // Convert the reverse_iterator position into its corresponding
762 // (forward) iterator position. 887 // (forward) iterator position.
763 InstList::iterator InsertPoint = RI.base(); 888 InstList::iterator InsertPoint = RI.base();
764 --InsertPoint; 889 --InsertPoint;
765 Context.init(Node); 890 Context.init(Node);
766 Context.setInsertPoint(InsertPoint); 891 Context.setInsertPoint(InsertPoint);
767 892
768 Variable *esp = getPhysicalRegister(Reg_esp); 893 Variable *esp = getPhysicalRegister(Reg_esp);
769 if (IsEbpBasedFrame) { 894 if (IsEbpBasedFrame) {
770 Variable *ebp = getPhysicalRegister(Reg_ebp); 895 Variable *ebp = getPhysicalRegister(Reg_ebp);
771 _mov(esp, ebp); 896 _mov(esp, ebp);
772 _pop(ebp); 897 _pop(ebp);
773 } else { 898 } else {
774 // add esp, LocalsSizeBytes 899 // add esp, SpillAreaSizeBytes
775 if (LocalsSizeBytes) 900 if (SpillAreaSizeBytes)
776 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 901 _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));
777 } 902 }
778 903
779 // Add pop instructions for preserved registers. 904 // Add pop instructions for preserved registers.
780 llvm::SmallBitVector CalleeSaves = 905 llvm::SmallBitVector CalleeSaves =
781 getRegisterSet(RegSet_CalleeSave, RegSet_None); 906 getRegisterSet(RegSet_CalleeSave, RegSet_None);
782 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 907 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
783 SizeT j = CalleeSaves.size() - i - 1; 908 SizeT j = CalleeSaves.size() - i - 1;
784 if (j == Reg_ebp && IsEbpBasedFrame) 909 if (j == Reg_ebp && IsEbpBasedFrame)
785 continue; 910 continue;
786 if (CalleeSaves[j] && RegsUsed[j]) { 911 if (CalleeSaves[j] && RegsUsed[j]) {
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1109 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1110 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
986 1111
987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1112 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1113 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1114 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
990 } 1115 }
991 if (ConstantInteger *ConstantTotalSize = 1116 if (ConstantInteger *ConstantTotalSize =
992 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1117 llvm::dyn_cast<ConstantInteger>(TotalSize)) {
993 uint32_t Value = ConstantTotalSize->getValue(); 1118 uint32_t Value = ConstantTotalSize->getValue();
994 // Round Value up to the next highest multiple of the alignment. 1119 Value = applyAlignment(Value, Alignment);
995 Value = (Value + Alignment - 1) & -Alignment;
996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1120 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
997 } else { 1121 } else {
998 // Non-constant sizes need to be adjusted to the next highest 1122 // Non-constant sizes need to be adjusted to the next highest
999 // multiple of the required alignment at runtime. 1123 // multiple of the required alignment at runtime.
1000 Variable *T = makeReg(IceType_i32); 1124 Variable *T = makeReg(IceType_i32);
1001 _mov(T, TotalSize); 1125 _mov(T, TotalSize);
1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1126 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1127 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
1004 _sub(esp, T); 1128 _sub(esp, T);
1005 } 1129 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
1232 case InstArithmetic::Fsub: 1356 case InstArithmetic::Fsub:
1233 case InstArithmetic::Fmul: 1357 case InstArithmetic::Fmul:
1234 case InstArithmetic::Fdiv: 1358 case InstArithmetic::Fdiv:
1235 case InstArithmetic::Frem: 1359 case InstArithmetic::Frem:
1236 llvm_unreachable("FP instruction with i64 type"); 1360 llvm_unreachable("FP instruction with i64 type");
1237 break; 1361 break;
1238 } 1362 }
1239 } else if (isVectorType(Dest->getType())) { 1363 } else if (isVectorType(Dest->getType())) {
1240 // TODO: Trap on integer divide and integer modulo by zero. 1364 // TODO: Trap on integer divide and integer modulo by zero.
1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1365 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1242 //
1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1244 // registers. This is a workaround of the fact that there is no
1245 // support for aligning stack operands. Once there is support,
1246 // remove LEGAL_HACK.
1247 #define LEGAL_HACK(s) legalizeToVar((s))
1248 switch (Inst->getOp()) { 1366 switch (Inst->getOp()) {
1249 case InstArithmetic::_num: 1367 case InstArithmetic::_num:
1250 llvm_unreachable("Unknown arithmetic operator"); 1368 llvm_unreachable("Unknown arithmetic operator");
1251 break; 1369 break;
1252 case InstArithmetic::Add: { 1370 case InstArithmetic::Add: {
1253 Variable *T = makeReg(Dest->getType()); 1371 Variable *T = makeReg(Dest->getType());
1254 _movp(T, Src0); 1372 _movp(T, Src0);
1255 _padd(T, LEGAL_HACK(Src1)); 1373 _padd(T, Src1);
1256 _movp(Dest, T); 1374 _movp(Dest, T);
1257 } break; 1375 } break;
1258 case InstArithmetic::And: { 1376 case InstArithmetic::And: {
1259 Variable *T = makeReg(Dest->getType()); 1377 Variable *T = makeReg(Dest->getType());
1260 _movp(T, Src0); 1378 _movp(T, Src0);
1261 _pand(T, LEGAL_HACK(Src1)); 1379 _pand(T, Src1);
1262 _movp(Dest, T); 1380 _movp(Dest, T);
1263 } break; 1381 } break;
1264 case InstArithmetic::Or: { 1382 case InstArithmetic::Or: {
1265 Variable *T = makeReg(Dest->getType()); 1383 Variable *T = makeReg(Dest->getType());
1266 _movp(T, Src0); 1384 _movp(T, Src0);
1267 _por(T, LEGAL_HACK(Src1)); 1385 _por(T, Src1);
1268 _movp(Dest, T); 1386 _movp(Dest, T);
1269 } break; 1387 } break;
1270 case InstArithmetic::Xor: { 1388 case InstArithmetic::Xor: {
1271 Variable *T = makeReg(Dest->getType()); 1389 Variable *T = makeReg(Dest->getType());
1272 _movp(T, Src0); 1390 _movp(T, Src0);
1273 _pxor(T, LEGAL_HACK(Src1)); 1391 _pxor(T, Src1);
1274 _movp(Dest, T); 1392 _movp(Dest, T);
1275 } break; 1393 } break;
1276 case InstArithmetic::Sub: { 1394 case InstArithmetic::Sub: {
1277 Variable *T = makeReg(Dest->getType()); 1395 Variable *T = makeReg(Dest->getType());
1278 _movp(T, Src0); 1396 _movp(T, Src0);
1279 _psub(T, LEGAL_HACK(Src1)); 1397 _psub(T, Src1);
1280 _movp(Dest, T); 1398 _movp(Dest, T);
1281 } break; 1399 } break;
1282 case InstArithmetic::Mul: { 1400 case InstArithmetic::Mul: {
1283 bool TypesAreValidForPmull = 1401 bool TypesAreValidForPmull =
1284 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1402 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1285 bool InstructionSetIsValidForPmull = 1403 bool InstructionSetIsValidForPmull =
1286 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1404 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1405 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1288 Variable *T = makeReg(Dest->getType()); 1406 Variable *T = makeReg(Dest->getType());
1289 _movp(T, Src0); 1407 _movp(T, Src0);
1290 _pmull(T, LEGAL_HACK(Src1)); 1408 _pmull(T, Src1);
1291 _movp(Dest, T); 1409 _movp(Dest, T);
1292 } else if (Dest->getType() == IceType_v4i32) { 1410 } else if (Dest->getType() == IceType_v4i32) {
1293 // Lowering sequence: 1411 // Lowering sequence:
1294 // Note: The mask arguments have index 0 on the left. 1412 // Note: The mask arguments have index 0 on the left.
1295 // 1413 //
1296 // movups T1, Src0 1414 // movups T1, Src0
1297 // pshufd T2, Src0, {1,0,3,0} 1415 // pshufd T2, Src0, {1,0,3,0}
1298 // pshufd T3, Src1, {1,0,3,0} 1416 // pshufd T3, Src1, {1,0,3,0}
1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1417 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1300 // pmuludq T1, Src1 1418 // pmuludq T1, Src1
(...skipping 12 matching lines...) Expand all
1313 // Dest[0, 2], Src[0, 2] 1431 // Dest[0, 2], Src[0, 2]
1314 const unsigned Mask0202 = 0x88; 1432 const unsigned Mask0202 = 0x88;
1315 // Mask that directs pshufd to create a vector with entries 1433 // Mask that directs pshufd to create a vector with entries
1316 // Src[0, 2, 1, 3] 1434 // Src[0, 2, 1, 3]
1317 const unsigned Mask0213 = 0xd8; 1435 const unsigned Mask0213 = 0xd8;
1318 Variable *T1 = makeReg(IceType_v4i32); 1436 Variable *T1 = makeReg(IceType_v4i32);
1319 Variable *T2 = makeReg(IceType_v4i32); 1437 Variable *T2 = makeReg(IceType_v4i32);
1320 Variable *T3 = makeReg(IceType_v4i32); 1438 Variable *T3 = makeReg(IceType_v4i32);
1321 Variable *T4 = makeReg(IceType_v4i32); 1439 Variable *T4 = makeReg(IceType_v4i32);
1322 _movp(T1, Src0); 1440 _movp(T1, Src0);
1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R 1441 _pshufd(T2, Src0, Mask1030);
1324 // with Src1 after stack operand alignment support is 1442 _pshufd(T3, Src1, Mask1030);
1325 // implemented. 1443 _pmuludq(T1, Src1);
1326 Variable *Src0R = LEGAL_HACK(Src0);
1327 Variable *Src1R = LEGAL_HACK(Src1);
1328 _pshufd(T2, Src0R, Mask1030);
1329 _pshufd(T3, Src1R, Mask1030);
1330 _pmuludq(T1, Src1R);
1331 _pmuludq(T2, T3); 1444 _pmuludq(T2, T3);
1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1445 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1446 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1334 _movp(Dest, T4); 1447 _movp(Dest, T4);
1335 } else { 1448 } else {
1336 assert(Dest->getType() == IceType_v16i8); 1449 assert(Dest->getType() == IceType_v16i8);
1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1450 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1338 } 1451 }
1339 } break; 1452 } break;
1340 case InstArithmetic::Shl: 1453 case InstArithmetic::Shl:
1341 case InstArithmetic::Lshr: 1454 case InstArithmetic::Lshr:
1342 case InstArithmetic::Ashr: 1455 case InstArithmetic::Ashr:
1343 case InstArithmetic::Udiv: 1456 case InstArithmetic::Udiv:
1344 case InstArithmetic::Urem: 1457 case InstArithmetic::Urem:
1345 case InstArithmetic::Sdiv: 1458 case InstArithmetic::Sdiv:
1346 case InstArithmetic::Srem: 1459 case InstArithmetic::Srem:
1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1460 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1348 break; 1461 break;
1349 case InstArithmetic::Fadd: { 1462 case InstArithmetic::Fadd: {
1350 Variable *T = makeReg(Dest->getType()); 1463 Variable *T = makeReg(Dest->getType());
1351 _movp(T, Src0); 1464 _movp(T, Src0);
1352 _addps(T, LEGAL_HACK(Src1)); 1465 _addps(T, Src1);
1353 _movp(Dest, T); 1466 _movp(Dest, T);
1354 } break; 1467 } break;
1355 case InstArithmetic::Fsub: { 1468 case InstArithmetic::Fsub: {
1356 Variable *T = makeReg(Dest->getType()); 1469 Variable *T = makeReg(Dest->getType());
1357 _movp(T, Src0); 1470 _movp(T, Src0);
1358 _subps(T, LEGAL_HACK(Src1)); 1471 _subps(T, Src1);
1359 _movp(Dest, T); 1472 _movp(Dest, T);
1360 } break; 1473 } break;
1361 case InstArithmetic::Fmul: { 1474 case InstArithmetic::Fmul: {
1362 Variable *T = makeReg(Dest->getType()); 1475 Variable *T = makeReg(Dest->getType());
1363 _movp(T, Src0); 1476 _movp(T, Src0);
1364 _mulps(T, LEGAL_HACK(Src1)); 1477 _mulps(T, Src1);
1365 _movp(Dest, T); 1478 _movp(Dest, T);
1366 } break; 1479 } break;
1367 case InstArithmetic::Fdiv: { 1480 case InstArithmetic::Fdiv: {
1368 Variable *T = makeReg(Dest->getType()); 1481 Variable *T = makeReg(Dest->getType());
1369 _movp(T, Src0); 1482 _movp(T, Src0);
1370 _divps(T, LEGAL_HACK(Src1)); 1483 _divps(T, Src1);
1371 _movp(Dest, T); 1484 _movp(Dest, T);
1372 } break; 1485 } break;
1373 case InstArithmetic::Frem: 1486 case InstArithmetic::Frem:
1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1487 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1375 break; 1488 break;
1376 } 1489 }
1377 #undef LEGAL_HACK
1378 } else { // Dest->getType() is non-i64 scalar 1490 } else { // Dest->getType() is non-i64 scalar
1379 Variable *T_edx = NULL; 1491 Variable *T_edx = NULL;
1380 Variable *T = NULL; 1492 Variable *T = NULL;
1381 switch (Inst->getOp()) { 1493 switch (Inst->getOp()) {
1382 case InstArithmetic::_num: 1494 case InstArithmetic::_num:
1383 llvm_unreachable("Unknown arithmetic operator"); 1495 llvm_unreachable("Unknown arithmetic operator");
1384 break; 1496 break;
1385 case InstArithmetic::Add: 1497 case InstArithmetic::Add:
1386 _mov(T, Src0); 1498 _mov(T, Src0);
1387 _add(T, Src1); 1499 _add(T, Src1);
(...skipping 804 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 // TODO(wala): Determine the best lowering sequences for each type. 2304 // TODO(wala): Determine the best lowering sequences for each type.
2193 bool CanUsePextr = 2305 bool CanUsePextr =
2194 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2306 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2195 if (CanUsePextr && Ty != IceType_v4f32) { 2307 if (CanUsePextr && Ty != IceType_v4f32) {
2196 // Use pextrb, pextrw, or pextrd. 2308 // Use pextrb, pextrw, or pextrd.
2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2309 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2310 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2199 _pextr(ExtractedElementR, SourceVectR, Mask); 2311 _pextr(ExtractedElementR, SourceVectR, Mask);
2200 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2312 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2201 // Use pshufd and movd/movss. 2313 // Use pshufd and movd/movss.
2202 //
2203 // ALIGNHACK: Force vector operands to registers in instructions
2204 // that require aligned memory operands until support for data
2205 // alignment is implemented.
2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2207 Operand *SourceVectRM =
2208 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2209 Variable *T = NULL; 2314 Variable *T = NULL;
2210 if (Index) { 2315 if (Index) {
2211 // The shuffle only needs to occur if the element to be extracted 2316 // The shuffle only needs to occur if the element to be extracted
2212 // is not at the lowest index. 2317 // is not at the lowest index.
2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2318 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2214 T = makeReg(Ty); 2319 T = makeReg(Ty);
2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); 2320 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2216 } else { 2321 } else {
2217 T = ALIGN_HACK(SourceVectRM); 2322 T = legalizeToVar(SourceVectNotLegalized);
2218 } 2323 }
2219 2324
2220 if (InVectorElementTy == IceType_i32) { 2325 if (InVectorElementTy == IceType_i32) {
2221 _movd(ExtractedElementR, T); 2326 _movd(ExtractedElementR, T);
2222 } else { // Ty == Icetype_f32 2327 } else { // Ty == Icetype_f32
2223 // TODO(wala): _movss is only used here because _mov does not 2328 // TODO(wala): _movss is only used here because _mov does not
2224 // allow a vector source and a scalar destination. _mov should be 2329 // allow a vector source and a scalar destination. _mov should be
2225 // able to be used here. 2330 // able to be used here.
2226 // _movss is a binary instruction, so the FakeDef is needed to 2331 // _movss is a binary instruction, so the FakeDef is needed to
2227 // keep the live range analysis consistent. 2332 // keep the live range analysis consistent.
2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); 2333 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2229 _movss(ExtractedElementR, T); 2334 _movss(ExtractedElementR, T);
2230 } 2335 }
2231 #undef ALIGN_HACK
2232 } else { 2336 } else {
2233 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2337 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2234 // Spill the value to a stack slot and do the extraction in memory. 2338 // Spill the value to a stack slot and do the extraction in memory.
2235 // 2339 //
2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2340 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2237 // support for legalizing to mem is implemented. 2341 // support for legalizing to mem is implemented.
2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2342 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2239 Slot->setWeight(RegWeight::Zero); 2343 Slot->setWeight(RegWeight::Zero);
2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2344 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2241 2345
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2280 2384
2281 if (Condition == InstFcmp::True) { 2385 if (Condition == InstFcmp::True) {
2282 // makeVectorOfOnes() requires an integer vector type. 2386 // makeVectorOfOnes() requires an integer vector type.
2283 T = makeVectorOfMinusOnes(IceType_v4i32); 2387 T = makeVectorOfMinusOnes(IceType_v4i32);
2284 } else if (Condition == InstFcmp::False) { 2388 } else if (Condition == InstFcmp::False) {
2285 T = makeVectorOfZeros(Dest->getType()); 2389 T = makeVectorOfZeros(Dest->getType());
2286 } else { 2390 } else {
2287 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2391 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2288 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2392 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2289 2393
2290 // ALIGNHACK: Without support for data alignment, both operands to
2291 // cmpps need to be forced into registers. Once support for data
2292 // alignment is implemented, remove LEGAL_HACK.
2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2294 switch (Condition) { 2394 switch (Condition) {
2295 default: { 2395 default: {
2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2396 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2397 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2298 T = makeReg(Src0RM->getType()); 2398 T = makeReg(Src0RM->getType());
2299 _movp(T, Src0RM); 2399 _movp(T, Src0RM);
2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); 2400 _cmpps(T, Src1RM, Predicate);
2301 } break; 2401 } break;
2302 case InstFcmp::One: { 2402 case InstFcmp::One: {
2303 // Check both unequal and ordered. 2403 // Check both unequal and ordered.
2304 T = makeReg(Src0RM->getType()); 2404 T = makeReg(Src0RM->getType());
2305 Variable *T2 = makeReg(Src0RM->getType()); 2405 Variable *T2 = makeReg(Src0RM->getType());
2306 Src1RM = LEGAL_HACK(Src1RM);
2307 _movp(T, Src0RM); 2406 _movp(T, Src0RM);
2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); 2407 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
2309 _movp(T2, Src0RM); 2408 _movp(T2, Src0RM);
2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); 2409 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
2311 _pand(T, T2); 2410 _pand(T, T2);
2312 } break; 2411 } break;
2313 case InstFcmp::Ueq: { 2412 case InstFcmp::Ueq: {
2314 // Check both equal or unordered. 2413 // Check both equal or unordered.
2315 T = makeReg(Src0RM->getType()); 2414 T = makeReg(Src0RM->getType());
2316 Variable *T2 = makeReg(Src0RM->getType()); 2415 Variable *T2 = makeReg(Src0RM->getType());
2317 Src1RM = LEGAL_HACK(Src1RM);
2318 _movp(T, Src0RM); 2416 _movp(T, Src0RM);
2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); 2417 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
2320 _movp(T2, Src0RM); 2418 _movp(T2, Src0RM);
2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); 2419 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
2322 _por(T, T2); 2420 _por(T, T2);
2323 } break; 2421 } break;
2324 } 2422 }
2325 #undef LEGAL_HACK
2326 } 2423 }
2327 2424
2328 _movp(Dest, T); 2425 _movp(Dest, T);
2329 eliminateNextVectorSextInstruction(Dest); 2426 eliminateNextVectorSextInstruction(Dest);
2330 return; 2427 return;
2331 } 2428 }
2332 2429
2333 // Lowering a = fcmp cond, b, c 2430 // Lowering a = fcmp cond, b, c
2334 // ucomiss b, c /* only if C1 != Br_None */ 2431 // ucomiss b, c /* only if C1 != Br_None */
2335 // /* but swap b,c order if SwapOperands==true */ 2432 // /* but swap b,c order if SwapOperands==true */
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
2420 Variable *T1 = makeReg(Ty); 2517 Variable *T1 = makeReg(Ty);
2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2518 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2422 _movp(T0, Src0RM); 2519 _movp(T0, Src0RM);
2423 _pxor(T0, HighOrderBits); 2520 _pxor(T0, HighOrderBits);
2424 _movp(T1, Src1RM); 2521 _movp(T1, Src1RM);
2425 _pxor(T1, HighOrderBits); 2522 _pxor(T1, HighOrderBits);
2426 Src0RM = T0; 2523 Src0RM = T0;
2427 Src1RM = T1; 2524 Src1RM = T1;
2428 } 2525 }
2429 2526
2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2431 // in registers until data alignment support is implemented. Once
2432 // there is support for data alignment, LEGAL_HACK can be removed.
2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2434 Variable *T = makeReg(Ty); 2527 Variable *T = makeReg(Ty);
2435 switch (Condition) { 2528 switch (Condition) {
2436 default: 2529 default:
2437 llvm_unreachable("unexpected condition"); 2530 llvm_unreachable("unexpected condition");
2438 break; 2531 break;
2439 case InstIcmp::Eq: { 2532 case InstIcmp::Eq: {
2440 _movp(T, Src0RM); 2533 _movp(T, Src0RM);
2441 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2534 _pcmpeq(T, Src1RM);
2442 } break; 2535 } break;
2443 case InstIcmp::Ne: { 2536 case InstIcmp::Ne: {
2444 _movp(T, Src0RM); 2537 _movp(T, Src0RM);
2445 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2538 _pcmpeq(T, Src1RM);
2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2539 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2447 _pxor(T, MinusOne); 2540 _pxor(T, MinusOne);
2448 } break; 2541 } break;
2449 case InstIcmp::Ugt: 2542 case InstIcmp::Ugt:
2450 case InstIcmp::Sgt: { 2543 case InstIcmp::Sgt: {
2451 _movp(T, Src0RM); 2544 _movp(T, Src0RM);
2452 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2545 _pcmpgt(T, Src1RM);
2453 } break; 2546 } break;
2454 case InstIcmp::Uge: 2547 case InstIcmp::Uge:
2455 case InstIcmp::Sge: { 2548 case InstIcmp::Sge: {
2456 // !(Src1RM > Src0RM) 2549 // !(Src1RM > Src0RM)
2457 _movp(T, Src1RM); 2550 _movp(T, Src1RM);
2458 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2551 _pcmpgt(T, Src0RM);
2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2552 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2460 _pxor(T, MinusOne); 2553 _pxor(T, MinusOne);
2461 } break; 2554 } break;
2462 case InstIcmp::Ult: 2555 case InstIcmp::Ult:
2463 case InstIcmp::Slt: { 2556 case InstIcmp::Slt: {
2464 _movp(T, Src1RM); 2557 _movp(T, Src1RM);
2465 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2558 _pcmpgt(T, Src0RM);
2466 } break; 2559 } break;
2467 case InstIcmp::Ule: 2560 case InstIcmp::Ule:
2468 case InstIcmp::Sle: { 2561 case InstIcmp::Sle: {
2469 // !(Src0RM > Src1RM) 2562 // !(Src0RM > Src1RM)
2470 _movp(T, Src0RM); 2563 _movp(T, Src0RM);
2471 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2564 _pcmpgt(T, Src1RM);
2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2565 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2473 _pxor(T, MinusOne); 2566 _pxor(T, MinusOne);
2474 } break; 2567 } break;
2475 } 2568 }
2476 #undef LEGAL_HACK
2477 2569
2478 _movp(Dest, T); 2570 _movp(Dest, T);
2479 eliminateNextVectorSextInstruction(Dest); 2571 eliminateNextVectorSextInstruction(Dest);
2480 return; 2572 return;
2481 } 2573 }
2482 2574
2483 // If Src1 is an immediate, or known to be a physical register, we can 2575 // If Src1 is an immediate, or known to be a physical register, we can
2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2576 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2577 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2486 // the physical register, but unfortunately we have to commit to one or 2578 // the physical register, but unfortunately we have to commit to one or
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
2642 // insertelement into index 3 (result is stored in T): 2734 // insertelement into index 3 (result is stored in T):
2643 // T := SourceVectRM 2735 // T := SourceVectRM
2644 // ElementR := ElementR[0, 0] T[0, 2] 2736 // ElementR := ElementR[0, 0] T[0, 2]
2645 // T := T[0, 1] ElementR[3, 0] 2737 // T := T[0, 1] ElementR[3, 0]
2646 const unsigned char Mask1[3] = {0, 192, 128}; 2738 const unsigned char Mask1[3] = {0, 192, 128};
2647 const unsigned char Mask2[3] = {227, 196, 52}; 2739 const unsigned char Mask2[3] = {227, 196, 52};
2648 2740
2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2741 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2742 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2651 2743
2652 // ALIGNHACK: Force vector operands to registers in instructions
2653 // that require aligned memory operands until support for data
2654 // alignment is implemented.
2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2656 if (Index == 1) { 2744 if (Index == 1) {
2657 SourceVectRM = ALIGN_HACK(SourceVectRM);
2658 _shufps(ElementR, SourceVectRM, Mask1Constant); 2745 _shufps(ElementR, SourceVectRM, Mask1Constant);
2659 _shufps(ElementR, SourceVectRM, Mask2Constant); 2746 _shufps(ElementR, SourceVectRM, Mask2Constant);
2660 _movp(Inst->getDest(), ElementR); 2747 _movp(Inst->getDest(), ElementR);
2661 } else { 2748 } else {
2662 Variable *T = makeReg(Ty); 2749 Variable *T = makeReg(Ty);
2663 _movp(T, SourceVectRM); 2750 _movp(T, SourceVectRM);
2664 _shufps(ElementR, T, Mask1Constant); 2751 _shufps(ElementR, T, Mask1Constant);
2665 _shufps(T, ElementR, Mask2Constant); 2752 _shufps(T, ElementR, Mask2Constant);
2666 _movp(Inst->getDest(), T); 2753 _movp(Inst->getDest(), T);
2667 } 2754 }
2668 #undef ALIGN_HACK
2669 } else { 2755 } else {
2670 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2756 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2671 // Spill the value to a stack slot and perform the insertion in 2757 // Spill the value to a stack slot and perform the insertion in
2672 // memory. 2758 // memory.
2673 // 2759 //
2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2760 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2675 // support for legalizing to mem is implemented. 2761 // support for legalizing to mem is implemented.
2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2762 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2677 Slot->setWeight(RegWeight::Zero); 2763 Slot->setWeight(RegWeight::Zero);
2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2764 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
(...skipping 941 matching lines...) Expand 10 before | Expand all | Expand 10 after
3620 Variable *Dest = Inst->getDest(); 3706 Variable *Dest = Inst->getDest();
3621 Operand *SrcT = Inst->getTrueOperand(); 3707 Operand *SrcT = Inst->getTrueOperand();
3622 Operand *SrcF = Inst->getFalseOperand(); 3708 Operand *SrcF = Inst->getFalseOperand();
3623 Operand *Condition = Inst->getCondition(); 3709 Operand *Condition = Inst->getCondition();
3624 3710
3625 if (isVectorType(Dest->getType())) { 3711 if (isVectorType(Dest->getType())) {
3626 Type SrcTy = SrcT->getType(); 3712 Type SrcTy = SrcT->getType();
3627 Variable *T = makeReg(SrcTy); 3713 Variable *T = makeReg(SrcTy);
3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3714 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3715 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3630 // ALIGNHACK: Until data alignment support is implemented, vector
3631 // instructions need to have vector operands in registers. Once
3632 // there is support for data alignment, LEGAL_HACK can be removed.
3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
3634 if (InstructionSet >= SSE4_1) { 3716 if (InstructionSet >= SSE4_1) {
3635 // TODO(wala): If the condition operand is a constant, use blendps 3717 // TODO(wala): If the condition operand is a constant, use blendps
3636 // or pblendw. 3718 // or pblendw.
3637 // 3719 //
3638 // Use blendvps or pblendvb to implement select. 3720 // Use blendvps or pblendvb to implement select.
3639 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3721 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3640 SrcTy == IceType_v4f32) { 3722 SrcTy == IceType_v4f32) {
3641 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3723 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3724 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3643 _movp(xmm0, ConditionRM); 3725 _movp(xmm0, ConditionRM);
3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3726 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
3645 _movp(T, SrcFRM); 3727 _movp(T, SrcFRM);
3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); 3728 _blendvps(T, SrcTRM, xmm0);
3647 _movp(Dest, T); 3729 _movp(Dest, T);
3648 } else { 3730 } else {
3649 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3731 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3732 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3651 : IceType_v16i8; 3733 : IceType_v16i8;
3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3734 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3735 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3654 _movp(T, SrcFRM); 3736 _movp(T, SrcFRM);
3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); 3737 _pblendvb(T, SrcTRM, xmm0);
3656 _movp(Dest, T); 3738 _movp(Dest, T);
3657 } 3739 }
3658 return; 3740 return;
3659 } 3741 }
3660 // Lower select without SSE4.1: 3742 // Lower select without SSE4.1:
3661 // a=d?b:c ==> 3743 // a=d?b:c ==>
3662 // if elementtype(d) != i1: 3744 // if elementtype(d) != i1:
3663 // d=sext(d); 3745 // d=sext(d);
3664 // a=(b&d)|(c&~d); 3746 // a=(b&d)|(c&~d);
3665 Variable *T2 = makeReg(SrcTy); 3747 Variable *T2 = makeReg(SrcTy);
3666 // Sign extend the condition operand if applicable. 3748 // Sign extend the condition operand if applicable.
3667 if (SrcTy == IceType_v4f32) { 3749 if (SrcTy == IceType_v4f32) {
3668 // The sext operation takes only integer arguments. 3750 // The sext operation takes only integer arguments.
3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); 3751 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 3752 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3671 _movp(T, T3); 3753 _movp(T, T3);
3672 } else if (typeElementType(SrcTy) != IceType_i1) { 3754 } else if (typeElementType(SrcTy) != IceType_i1) {
3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 3755 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3674 } else { 3756 } else {
3675 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3757 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3676 _movp(T, ConditionRM); 3758 _movp(T, ConditionRM);
3677 } 3759 }
3678 _movp(T2, T); 3760 _movp(T2, T);
3679 _pand(T, LEGAL_HACK(SrcTRM)); 3761 _pand(T, SrcTRM);
3680 _pandn(T2, LEGAL_HACK(SrcFRM)); 3762 _pandn(T2, SrcFRM);
3681 _por(T, T2); 3763 _por(T, T2);
3682 _movp(Dest, T); 3764 _movp(Dest, T);
3683 #undef LEGAL_HACK
3684 3765
3685 return; 3766 return;
3686 } 3767 }
3687 3768
3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: 3769 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3689 Operand *ConditionRMI = legalize(Condition); 3770 Operand *ConditionRMI = legalize(Condition);
3690 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3771 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3691 InstX8632Label *Label = InstX8632Label::create(Func, this); 3772 InstX8632Label *Label = InstX8632Label::create(Func, this);
3692 3773
3693 if (Dest->getType() == IceType_i64) { 3774 if (Dest->getType() == IceType_i64) {
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
4236 for (SizeT i = 0; i < Size; ++i) { 4317 for (SizeT i = 0; i < Size; ++i) {
4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4318 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4238 } 4319 }
4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4320 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4240 } 4321 }
4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4322 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4242 << "\n"; 4323 << "\n";
4243 } 4324 }
4244 4325
4245 } // end of namespace Ice 4326 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698