Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(327)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Don't sort everything. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 122
123 // The maximum number of arguments to pass in XMM registers 123 // The maximum number of arguments to pass in XMM registers
124 const uint32_t X86_MAX_XMM_ARGS = 4; 124 const uint32_t X86_MAX_XMM_ARGS = 4;
125 // The number of bits in a byte 125 // The number of bits in a byte
126 const uint32_t X86_CHAR_BIT = 8; 126 const uint32_t X86_CHAR_BIT = 8;
127 // Stack alignment 127 // Stack alignment
128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
129 // Size of the return address on the stack 129 // Size of the return address on the stack
130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; 130 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
131 131
132 // Value is a size in bytes. Return Value adjusted to the next highest 132 // Value and Alignment are in bytes. Return Value adjusted to the next
133 // multiple of the stack alignment. 133 // highest multiple of Alignment.
134 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
135 // power of 2
136 assert((Alignment & (Alignment - 1)) == 0);
137 return (Value + Alignment - 1) & -Alignment;
138 }
139
140 // Value is in bytes. Return Value adjusted to the next highest multiple
141 // of the stack alignment.
134 uint32_t applyStackAlignment(uint32_t Value) { 142 uint32_t applyStackAlignment(uint32_t Value) {
135 // power of 2 143 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
138 } 144 }
139 145
140 // Instruction set options 146 // Instruction set options
141 namespace cl = ::llvm::cl; 147 namespace cl = ::llvm::cl;
142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( 148 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
143 "mattr", cl::desc("X86 target attributes"), 149 "mattr", cl::desc("X86 target attributes"),
144 cl::init(TargetX8632::SSE2), 150 cl::init(TargetX8632::SSE2),
145 cl::values( 151 cl::values(
146 clEnumValN(TargetX8632::SSE2, "sse2", 152 clEnumValN(TargetX8632::SSE2, "sse2",
147 "Enable SSE2 instructions (default)"), 153 "Enable SSE2 instructions (default)"),
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); 519 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
514 RegisterArg->setRegNum(RegNum); 520 RegisterArg->setRegNum(RegNum);
515 RegisterArg->setIsArg(Func); 521 RegisterArg->setIsArg(Func);
516 Arg->setIsArg(Func, false); 522 Arg->setIsArg(Func, false);
517 523
518 Args[I] = RegisterArg; 524 Args[I] = RegisterArg;
519 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 525 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
520 } 526 }
521 } 527 }
522 528
529 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
530 typedef std::map<uint32_t, VarList> BucketMap;
Jim Stichnoth 2014/08/13 23:59:34 Use a list<> or vector<> instead of map<> ?
Jim Stichnoth 2014/08/14 16:48:29 Never mind this comment, I misread the code.
wala 2014/08/14 17:31:24 There are only 4 possible buckets (corresponding t
wala 2014/08/14 17:51:59 Actually, there are 3 usable buckets (4 bytes, 8 b
531 BucketMap Buckets;
532
533 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;
534 ++I) {
535 uint32_t NaturalAlignment = typeWidthInBytesOnStack((*I)->getType());
536 Buckets[NaturalAlignment].push_back(*I);
537 }
538
539 for (BucketMap::reverse_iterator I = Buckets.rbegin(), E = Buckets.rend();
540 I != E; ++I) {
541 VarList &List = I->second;
542 Dest.insert(Dest.end(), List.begin(), List.end());
543 }
544 }
545
523 // Helper function for addProlog(). 546 // Helper function for addProlog().
524 // 547 //
525 // This assumes Arg is an argument passed on the stack. This sets the 548 // This assumes Arg is an argument passed on the stack. This sets the
526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 549 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
527 // width. For an I64 arg that has been split into Lo and Hi components, 550 // width. For an I64 arg that has been split into Lo and Hi components,
528 // it calls itself recursively on the components, taking care to handle 551 // it calls itself recursively on the components, taking care to handle
529 // Lo first because of the little-endian architecture. Lastly, this 552 // Lo first because of the little-endian architecture. Lastly, this
530 // function generates an instruction to copy Arg into its assigned 553 // function generates an instruction to copy Arg into its assigned
531 // register if applicable. 554 // register if applicable.
532 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 555 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 getRegisterSet(RegSet_CalleeSave, RegSet_None); 615 getRegisterSet(RegSet_CalleeSave, RegSet_None);
593 616
594 size_t GlobalsSize = 0; 617 size_t GlobalsSize = 0;
595 std::vector<size_t> LocalsSize(Func->getNumNodes()); 618 std::vector<size_t> LocalsSize(Func->getNumNodes());
596 619
597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and 620 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
598 // LocalsSizeBytes. 621 // LocalsSizeBytes.
599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 622 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
600 const VarList &Variables = Func->getVariables(); 623 const VarList &Variables = Func->getVariables();
601 const VarList &Args = Func->getArgs(); 624 const VarList &Args = Func->getArgs();
625 VarList SpilledVariables, SortedSpilledVariables,
626 VariablesLinkedToSpillSplots;
627
628 // If there is a separate locals area, this specifies the alignment
629 // for it.
630 uint32_t LocalsSlotsAlignmentBytes = 0;
Jim Stichnoth 2014/08/13 23:59:34 At this point, the stack frame layout is starting
wala 2014/08/14 17:31:24 Done.
631 // The entire spill locations area gets aligned to largest natural
632 // alignment of the variables that have a spill slot.
633 uint32_t SpillAreaAlignmentBytes = 0;
602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 634 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
603 I != E; ++I) { 635 I != E; ++I) {
604 Variable *Var = *I; 636 Variable *Var = *I;
605 if (Var->hasReg()) { 637 if (Var->hasReg()) {
606 RegsUsed[Var->getRegNum()] = true; 638 RegsUsed[Var->getRegNum()] = true;
607 continue; 639 continue;
608 } 640 }
609 // An argument either does not need a stack slot (if passed in a 641 // An argument either does not need a stack slot (if passed in a
610 // register) or already has one (if passed on the stack). 642 // register) or already has one (if passed on the stack).
611 if (Var->getIsArg()) 643 if (Var->getIsArg())
612 continue; 644 continue;
613 // An unreferenced variable doesn't need a stack slot. 645 // An unreferenced variable doesn't need a stack slot.
614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) 646 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
615 continue; 647 continue;
616 // A spill slot linked to a variable with a stack slot should reuse 648 // A spill slot linked to a variable with a stack slot should reuse
617 // that stack slot. 649 // that stack slot.
618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { 650 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
619 if (Variable *Linked = Var->getPreferredRegister()) { 651 if (Variable *Linked = Var->getPreferredRegister()) {
620 if (!Linked->hasReg()) 652 if (!Linked->hasReg()) {
653 VariablesLinkedToSpillSplots.push_back(Var);
621 continue; 654 continue;
655 }
622 } 656 }
623 } 657 }
658 SpilledVariables.push_back(Var);
659 }
660
661 sortByAlignment(SortedSpilledVariables, SpilledVariables);
662 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
663 E = SortedSpilledVariables.end();
664 I != E; ++I) {
665 Variable *Var = *I;
624 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 666 size_t Increment = typeWidthInBytesOnStack(Var->getType());
667 if (!SpillAreaAlignmentBytes)
668 SpillAreaAlignmentBytes = Increment;
625 if (SimpleCoalescing) { 669 if (SimpleCoalescing) {
626 if (Var->isMultiblockLife()) { 670 if (Var->isMultiblockLife()) {
627 GlobalsSize += Increment; 671 GlobalsSize += Increment;
628 } else { 672 } else {
629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 673 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
630 LocalsSize[NodeIndex] += Increment; 674 LocalsSize[NodeIndex] += Increment;
631 if (LocalsSize[NodeIndex] > LocalsSizeBytes) 675 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
632 LocalsSizeBytes = LocalsSize[NodeIndex]; 676 LocalsSizeBytes = LocalsSize[NodeIndex];
677 if (!LocalsSlotsAlignmentBytes)
678 LocalsSlotsAlignmentBytes = Increment;
633 } 679 }
634 } else { 680 } else {
635 LocalsSizeBytes += Increment; 681 LocalsSizeBytes += Increment;
636 } 682 }
637 } 683 }
638 LocalsSizeBytes += GlobalsSize; 684 LocalsSizeBytes += GlobalsSize;
639 685
640 // Add push instructions for preserved registers. 686 // Add push instructions for preserved registers.
641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 687 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
642 if (CalleeSaves[i] && RegsUsed[i]) { 688 if (CalleeSaves[i] && RegsUsed[i]) {
643 PreservedRegsSizeBytes += 4; 689 PreservedRegsSizeBytes += 4;
644 const bool SuppressStackAdjustment = true; 690 const bool SuppressStackAdjustment = true;
645 _push(getPhysicalRegister(i), SuppressStackAdjustment); 691 _push(getPhysicalRegister(i), SuppressStackAdjustment);
646 } 692 }
647 } 693 }
648 694
649 // Generate "push ebp; mov ebp, esp" 695 // Generate "push ebp; mov ebp, esp"
650 if (IsEbpBasedFrame) { 696 if (IsEbpBasedFrame) {
651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 697 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
652 .count() == 0); 698 .count() == 0);
653 PreservedRegsSizeBytes += 4; 699 PreservedRegsSizeBytes += 4;
654 Variable *ebp = getPhysicalRegister(Reg_ebp); 700 Variable *ebp = getPhysicalRegister(Reg_ebp);
655 Variable *esp = getPhysicalRegister(Reg_esp); 701 Variable *esp = getPhysicalRegister(Reg_esp);
656 const bool SuppressStackAdjustment = true; 702 const bool SuppressStackAdjustment = true;
657 _push(ebp, SuppressStackAdjustment); 703 _push(ebp, SuppressStackAdjustment);
658 _mov(ebp, esp); 704 _mov(ebp, esp);
659 } 705 }
660 706
707 // Align the variables area.
708 uint32_t SpillAreaPaddingBytes = 0;
709 if (SpillAreaAlignmentBytes) {
710 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
711 uint32_t SpillAreaOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
712 uint32_t SpillAreaStart =
713 applyAlignment(SpillAreaOffset, SpillAreaAlignmentBytes);
714 SpillAreaPaddingBytes = SpillAreaStart - SpillAreaOffset;
715 LocalsSizeBytes += SpillAreaPaddingBytes;
716 }
717
718 // If there are separate globals and locals areas, make sure the
719 // locals area is aligned by padding the end of the globals area.
720 if (LocalsSlotsAlignmentBytes) {
721 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
722 uint32_t NewGlobalsSize =
723 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
724 GlobalsSize = NewGlobalsSize;
725 LocalsSizeBytes += NewGlobalsSize - GlobalsSize;
jvoung (off chromium) 2014/08/14 00:48:49 Isn't this difference always going to be zero? Do
wala 2014/08/14 17:31:24 Ouch, I'm surprised all the tests still passed. D
726 }
727
728 // Align esp if necessary.
661 if (NeedsStackAlignment) { 729 if (NeedsStackAlignment) {
662 uint32_t StackSize = applyStackAlignment( 730 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); 731 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes);
664 LocalsSizeBytes = 732 LocalsSizeBytes = StackSize - StackOffset;
665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
666 } 733 }
667 734
668 // Generate "sub esp, LocalsSizeBytes" 735 // Generate "sub esp, LocalsSizeBytes"
669 if (LocalsSizeBytes) 736 if (LocalsSizeBytes)
670 _sub(getPhysicalRegister(Reg_esp), 737 _sub(getPhysicalRegister(Reg_esp),
671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 738 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
672 739
673 resetStackAdjustment(); 740 resetStackAdjustment();
674 741
675 // Fill in stack offsets for stack args, and copy args into registers 742 // Fill in stack offsets for stack args, and copy args into registers
(...skipping 13 matching lines...) Expand all
689 continue; 756 continue;
690 } 757 }
691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 758 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
692 } 759 }
693 760
694 // Fill in stack offsets for locals. 761 // Fill in stack offsets for locals.
695 size_t TotalGlobalsSize = GlobalsSize; 762 size_t TotalGlobalsSize = GlobalsSize;
696 GlobalsSize = 0; 763 GlobalsSize = 0;
697 LocalsSize.assign(LocalsSize.size(), 0); 764 LocalsSize.assign(LocalsSize.size(), 0);
698 size_t NextStackOffset = 0; 765 size_t NextStackOffset = 0;
699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 766 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
767 E = SortedSpilledVariables.end();
700 I != E; ++I) { 768 I != E; ++I) {
701 Variable *Var = *I; 769 Variable *Var = *I;
702 if (Var->hasReg()) {
703 RegsUsed[Var->getRegNum()] = true;
704 continue;
705 }
706 if (Var->getIsArg())
707 continue;
708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
709 continue;
710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { 770 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
wala 2014/08/14 17:31:24 Since I've separated out the list of variables lin
711 if (Variable *Linked = Var->getPreferredRegister()) { 771 if (Variable *Linked = Var->getPreferredRegister()) {
712 if (!Linked->hasReg()) { 772 if (!Linked->hasReg()) {
713 // TODO: Make sure Linked has already been assigned a stack 773 // TODO: Make sure Linked has already been assigned a stack
714 // slot. 774 // slot.
715 Var->setStackOffset(Linked->getStackOffset()); 775 Var->setStackOffset(Linked->getStackOffset());
716 continue; 776 continue;
717 } 777 }
718 } 778 }
719 } 779 }
720 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 780 size_t Increment = typeWidthInBytesOnStack(Var->getType());
721 if (SimpleCoalescing) { 781 if (SimpleCoalescing) {
722 if (Var->isMultiblockLife()) { 782 if (Var->isMultiblockLife()) {
723 GlobalsSize += Increment; 783 GlobalsSize += Increment;
724 NextStackOffset = GlobalsSize; 784 NextStackOffset = GlobalsSize;
725 } else { 785 } else {
726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 786 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
727 LocalsSize[NodeIndex] += Increment; 787 LocalsSize[NodeIndex] += Increment;
728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; 788 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
729 } 789 }
730 } else { 790 } else {
731 NextStackOffset += Increment; 791 NextStackOffset += Increment;
732 } 792 }
733 if (IsEbpBasedFrame) 793 if (IsEbpBasedFrame)
734 Var->setStackOffset(-NextStackOffset); 794 Var->setStackOffset(-NextStackOffset - SpillAreaPaddingBytes);
735 else 795 else {
Jim Stichnoth 2014/08/13 23:59:34 Probably best to have braces around both clauses o
wala 2014/08/14 17:31:24 Done.
736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); 796 Var->setStackOffset(LocalsSizeBytes - NextStackOffset -
797 SpillAreaPaddingBytes);
798 }
737 } 799 }
738 this->FrameSizeLocals = NextStackOffset; 800 this->FrameSizeLocals = NextStackOffset;
739 this->HasComputedFrame = true; 801 this->HasComputedFrame = true;
740 802
803 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),
804 E = VariablesLinkedToSpillSplots.end();
805 I != E; ++I) {
806 Variable *Var = *I;
807 Variable *Linked = Var->getPreferredRegister();
808 Var->setStackOffset(Linked->getStackOffset());
809 }
810
741 if (Func->getContext()->isVerbose(IceV_Frame)) { 811 if (Func->getContext()->isVerbose(IceV_Frame)) {
742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes 812 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
743 << "\n" 813 << "\n"
744 << "InArgsSizeBytes=" << InArgsSizeBytes 814 << "InArgsSizeBytes=" << InArgsSizeBytes
745 << "\n" 815 << "\n"
746 << "PreservedRegsSizeBytes=" 816 << "PreservedRegsSizeBytes="
747 << PreservedRegsSizeBytes << "\n"; 817 << PreservedRegsSizeBytes << "\n";
748 } 818 }
749 } 819 }
750 820
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1054 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1055 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
986 1056
987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1057 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1058 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1059 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
990 } 1060 }
991 if (ConstantInteger *ConstantTotalSize = 1061 if (ConstantInteger *ConstantTotalSize =
992 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1062 llvm::dyn_cast<ConstantInteger>(TotalSize)) {
993 uint32_t Value = ConstantTotalSize->getValue(); 1063 uint32_t Value = ConstantTotalSize->getValue();
994 // Round Value up to the next highest multiple of the alignment. 1064 Value = applyAlignment(Value, Alignment);
995 Value = (Value + Alignment - 1) & -Alignment;
996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1065 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
997 } else { 1066 } else {
998 // Non-constant sizes need to be adjusted to the next highest 1067 // Non-constant sizes need to be adjusted to the next highest
999 // multiple of the required alignment at runtime. 1068 // multiple of the required alignment at runtime.
1000 Variable *T = makeReg(IceType_i32); 1069 Variable *T = makeReg(IceType_i32);
1001 _mov(T, TotalSize); 1070 _mov(T, TotalSize);
1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1071 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1072 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
1004 _sub(esp, T); 1073 _sub(esp, T);
1005 } 1074 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
1232 case InstArithmetic::Fsub: 1301 case InstArithmetic::Fsub:
1233 case InstArithmetic::Fmul: 1302 case InstArithmetic::Fmul:
1234 case InstArithmetic::Fdiv: 1303 case InstArithmetic::Fdiv:
1235 case InstArithmetic::Frem: 1304 case InstArithmetic::Frem:
1236 llvm_unreachable("FP instruction with i64 type"); 1305 llvm_unreachable("FP instruction with i64 type");
1237 break; 1306 break;
1238 } 1307 }
1239 } else if (isVectorType(Dest->getType())) { 1308 } else if (isVectorType(Dest->getType())) {
1240 // TODO: Trap on integer divide and integer modulo by zero. 1309 // TODO: Trap on integer divide and integer modulo by zero.
1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1310 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1242 //
1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1244 // registers. This is a workaround of the fact that there is no
1245 // support for aligning stack operands. Once there is support,
1246 // remove LEGAL_HACK.
1247 #define LEGAL_HACK(s) legalizeToVar((s))
1248 switch (Inst->getOp()) { 1311 switch (Inst->getOp()) {
1249 case InstArithmetic::_num: 1312 case InstArithmetic::_num:
1250 llvm_unreachable("Unknown arithmetic operator"); 1313 llvm_unreachable("Unknown arithmetic operator");
1251 break; 1314 break;
1252 case InstArithmetic::Add: { 1315 case InstArithmetic::Add: {
1253 Variable *T = makeReg(Dest->getType()); 1316 Variable *T = makeReg(Dest->getType());
1254 _movp(T, Src0); 1317 _movp(T, Src0);
1255 _padd(T, LEGAL_HACK(Src1)); 1318 _padd(T, Src1);
1256 _movp(Dest, T); 1319 _movp(Dest, T);
1257 } break; 1320 } break;
1258 case InstArithmetic::And: { 1321 case InstArithmetic::And: {
1259 Variable *T = makeReg(Dest->getType()); 1322 Variable *T = makeReg(Dest->getType());
1260 _movp(T, Src0); 1323 _movp(T, Src0);
1261 _pand(T, LEGAL_HACK(Src1)); 1324 _pand(T, Src1);
1262 _movp(Dest, T); 1325 _movp(Dest, T);
1263 } break; 1326 } break;
1264 case InstArithmetic::Or: { 1327 case InstArithmetic::Or: {
1265 Variable *T = makeReg(Dest->getType()); 1328 Variable *T = makeReg(Dest->getType());
1266 _movp(T, Src0); 1329 _movp(T, Src0);
1267 _por(T, LEGAL_HACK(Src1)); 1330 _por(T, Src1);
1268 _movp(Dest, T); 1331 _movp(Dest, T);
1269 } break; 1332 } break;
1270 case InstArithmetic::Xor: { 1333 case InstArithmetic::Xor: {
1271 Variable *T = makeReg(Dest->getType()); 1334 Variable *T = makeReg(Dest->getType());
1272 _movp(T, Src0); 1335 _movp(T, Src0);
1273 _pxor(T, LEGAL_HACK(Src1)); 1336 _pxor(T, Src1);
1274 _movp(Dest, T); 1337 _movp(Dest, T);
1275 } break; 1338 } break;
1276 case InstArithmetic::Sub: { 1339 case InstArithmetic::Sub: {
1277 Variable *T = makeReg(Dest->getType()); 1340 Variable *T = makeReg(Dest->getType());
1278 _movp(T, Src0); 1341 _movp(T, Src0);
1279 _psub(T, LEGAL_HACK(Src1)); 1342 _psub(T, Src1);
1280 _movp(Dest, T); 1343 _movp(Dest, T);
1281 } break; 1344 } break;
1282 case InstArithmetic::Mul: { 1345 case InstArithmetic::Mul: {
1283 bool TypesAreValidForPmull = 1346 bool TypesAreValidForPmull =
1284 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1347 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1285 bool InstructionSetIsValidForPmull = 1348 bool InstructionSetIsValidForPmull =
1286 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1349 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1350 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1288 Variable *T = makeReg(Dest->getType()); 1351 Variable *T = makeReg(Dest->getType());
1289 _movp(T, Src0); 1352 _movp(T, Src0);
1290 _pmull(T, LEGAL_HACK(Src1)); 1353 _pmull(T, Src1);
1291 _movp(Dest, T); 1354 _movp(Dest, T);
1292 } else if (Dest->getType() == IceType_v4i32) { 1355 } else if (Dest->getType() == IceType_v4i32) {
1293 // Lowering sequence: 1356 // Lowering sequence:
1294 // Note: The mask arguments have index 0 on the left. 1357 // Note: The mask arguments have index 0 on the left.
1295 // 1358 //
1296 // movups T1, Src0 1359 // movups T1, Src0
1297 // pshufd T2, Src0, {1,0,3,0} 1360 // pshufd T2, Src0, {1,0,3,0}
1298 // pshufd T3, Src1, {1,0,3,0} 1361 // pshufd T3, Src1, {1,0,3,0}
1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1362 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1300 // pmuludq T1, Src1 1363 // pmuludq T1, Src1
(...skipping 12 matching lines...) Expand all
1313 // Dest[0, 2], Src[0, 2] 1376 // Dest[0, 2], Src[0, 2]
1314 const unsigned Mask0202 = 0x88; 1377 const unsigned Mask0202 = 0x88;
1315 // Mask that directs pshufd to create a vector with entries 1378 // Mask that directs pshufd to create a vector with entries
1316 // Src[0, 2, 1, 3] 1379 // Src[0, 2, 1, 3]
1317 const unsigned Mask0213 = 0xd8; 1380 const unsigned Mask0213 = 0xd8;
1318 Variable *T1 = makeReg(IceType_v4i32); 1381 Variable *T1 = makeReg(IceType_v4i32);
1319 Variable *T2 = makeReg(IceType_v4i32); 1382 Variable *T2 = makeReg(IceType_v4i32);
1320 Variable *T3 = makeReg(IceType_v4i32); 1383 Variable *T3 = makeReg(IceType_v4i32);
1321 Variable *T4 = makeReg(IceType_v4i32); 1384 Variable *T4 = makeReg(IceType_v4i32);
1322 _movp(T1, Src0); 1385 _movp(T1, Src0);
1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R 1386 _pshufd(T2, Src0, Mask1030);
1324 // with Src1 after stack operand alignment support is 1387 _pshufd(T3, Src1, Mask1030);
1325 // implemented. 1388 _pmuludq(T1, Src1);
1326 Variable *Src0R = LEGAL_HACK(Src0);
1327 Variable *Src1R = LEGAL_HACK(Src1);
1328 _pshufd(T2, Src0R, Mask1030);
1329 _pshufd(T3, Src1R, Mask1030);
1330 _pmuludq(T1, Src1R);
1331 _pmuludq(T2, T3); 1389 _pmuludq(T2, T3);
1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1390 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1391 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1334 _movp(Dest, T4); 1392 _movp(Dest, T4);
1335 } else { 1393 } else {
1336 assert(Dest->getType() == IceType_v16i8); 1394 assert(Dest->getType() == IceType_v16i8);
1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1395 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1338 } 1396 }
1339 } break; 1397 } break;
1340 case InstArithmetic::Shl: 1398 case InstArithmetic::Shl:
1341 case InstArithmetic::Lshr: 1399 case InstArithmetic::Lshr:
1342 case InstArithmetic::Ashr: 1400 case InstArithmetic::Ashr:
1343 case InstArithmetic::Udiv: 1401 case InstArithmetic::Udiv:
1344 case InstArithmetic::Urem: 1402 case InstArithmetic::Urem:
1345 case InstArithmetic::Sdiv: 1403 case InstArithmetic::Sdiv:
1346 case InstArithmetic::Srem: 1404 case InstArithmetic::Srem:
1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1405 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1348 break; 1406 break;
1349 case InstArithmetic::Fadd: { 1407 case InstArithmetic::Fadd: {
1350 Variable *T = makeReg(Dest->getType()); 1408 Variable *T = makeReg(Dest->getType());
1351 _movp(T, Src0); 1409 _movp(T, Src0);
1352 _addps(T, LEGAL_HACK(Src1)); 1410 _addps(T, Src1);
1353 _movp(Dest, T); 1411 _movp(Dest, T);
1354 } break; 1412 } break;
1355 case InstArithmetic::Fsub: { 1413 case InstArithmetic::Fsub: {
1356 Variable *T = makeReg(Dest->getType()); 1414 Variable *T = makeReg(Dest->getType());
1357 _movp(T, Src0); 1415 _movp(T, Src0);
1358 _subps(T, LEGAL_HACK(Src1)); 1416 _subps(T, Src1);
1359 _movp(Dest, T); 1417 _movp(Dest, T);
1360 } break; 1418 } break;
1361 case InstArithmetic::Fmul: { 1419 case InstArithmetic::Fmul: {
1362 Variable *T = makeReg(Dest->getType()); 1420 Variable *T = makeReg(Dest->getType());
1363 _movp(T, Src0); 1421 _movp(T, Src0);
1364 _mulps(T, LEGAL_HACK(Src1)); 1422 _mulps(T, Src1);
1365 _movp(Dest, T); 1423 _movp(Dest, T);
1366 } break; 1424 } break;
1367 case InstArithmetic::Fdiv: { 1425 case InstArithmetic::Fdiv: {
1368 Variable *T = makeReg(Dest->getType()); 1426 Variable *T = makeReg(Dest->getType());
1369 _movp(T, Src0); 1427 _movp(T, Src0);
1370 _divps(T, LEGAL_HACK(Src1)); 1428 _divps(T, Src1);
1371 _movp(Dest, T); 1429 _movp(Dest, T);
1372 } break; 1430 } break;
1373 case InstArithmetic::Frem: 1431 case InstArithmetic::Frem:
1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1432 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1375 break; 1433 break;
1376 } 1434 }
1377 #undef LEGAL_HACK
1378 } else { // Dest->getType() is non-i64 scalar 1435 } else { // Dest->getType() is non-i64 scalar
1379 Variable *T_edx = NULL; 1436 Variable *T_edx = NULL;
1380 Variable *T = NULL; 1437 Variable *T = NULL;
1381 switch (Inst->getOp()) { 1438 switch (Inst->getOp()) {
1382 case InstArithmetic::_num: 1439 case InstArithmetic::_num:
1383 llvm_unreachable("Unknown arithmetic operator"); 1440 llvm_unreachable("Unknown arithmetic operator");
1384 break; 1441 break;
1385 case InstArithmetic::Add: 1442 case InstArithmetic::Add:
1386 _mov(T, Src0); 1443 _mov(T, Src0);
1387 _add(T, Src1); 1444 _add(T, Src1);
(...skipping 804 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 // TODO(wala): Determine the best lowering sequences for each type. 2249 // TODO(wala): Determine the best lowering sequences for each type.
2193 bool CanUsePextr = 2250 bool CanUsePextr =
2194 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2251 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2195 if (CanUsePextr && Ty != IceType_v4f32) { 2252 if (CanUsePextr && Ty != IceType_v4f32) {
2196 // Use pextrb, pextrw, or pextrd. 2253 // Use pextrb, pextrw, or pextrd.
2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2254 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2255 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2199 _pextr(ExtractedElementR, SourceVectR, Mask); 2256 _pextr(ExtractedElementR, SourceVectR, Mask);
2200 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2257 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2201 // Use pshufd and movd/movss. 2258 // Use pshufd and movd/movss.
2202 //
2203 // ALIGNHACK: Force vector operands to registers in instructions
2204 // that require aligned memory operands until support for data
2205 // alignment is implemented.
2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2207 Operand *SourceVectRM =
2208 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2209 Variable *T = NULL; 2259 Variable *T = NULL;
2210 if (Index) { 2260 if (Index) {
2211 // The shuffle only needs to occur if the element to be extracted 2261 // The shuffle only needs to occur if the element to be extracted
2212 // is not at the lowest index. 2262 // is not at the lowest index.
2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2263 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2214 T = makeReg(Ty); 2264 T = makeReg(Ty);
2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); 2265 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2216 } else { 2266 } else {
2217 T = ALIGN_HACK(SourceVectRM); 2267 T = legalizeToVar(SourceVectNotLegalized);
2218 } 2268 }
2219 2269
2220 if (InVectorElementTy == IceType_i32) { 2270 if (InVectorElementTy == IceType_i32) {
2221 _movd(ExtractedElementR, T); 2271 _movd(ExtractedElementR, T);
2222 } else { // Ty == Icetype_f32 2272 } else { // Ty == Icetype_f32
2223 // TODO(wala): _movss is only used here because _mov does not 2273 // TODO(wala): _movss is only used here because _mov does not
2224 // allow a vector source and a scalar destination. _mov should be 2274 // allow a vector source and a scalar destination. _mov should be
2225 // able to be used here. 2275 // able to be used here.
2226 // _movss is a binary instruction, so the FakeDef is needed to 2276 // _movss is a binary instruction, so the FakeDef is needed to
2227 // keep the live range analysis consistent. 2277 // keep the live range analysis consistent.
2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); 2278 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2229 _movss(ExtractedElementR, T); 2279 _movss(ExtractedElementR, T);
2230 } 2280 }
2231 #undef ALIGN_HACK
2232 } else { 2281 } else {
2233 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2282 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2234 // Spill the value to a stack slot and do the extraction in memory. 2283 // Spill the value to a stack slot and do the extraction in memory.
2235 // 2284 //
2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2285 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2237 // support for legalizing to mem is implemented. 2286 // support for legalizing to mem is implemented.
2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2287 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2239 Slot->setWeight(RegWeight::Zero); 2288 Slot->setWeight(RegWeight::Zero);
2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2289 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2241 2290
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2280 2329
2281 if (Condition == InstFcmp::True) { 2330 if (Condition == InstFcmp::True) {
2282 // makeVectorOfOnes() requires an integer vector type. 2331 // makeVectorOfOnes() requires an integer vector type.
2283 T = makeVectorOfMinusOnes(IceType_v4i32); 2332 T = makeVectorOfMinusOnes(IceType_v4i32);
2284 } else if (Condition == InstFcmp::False) { 2333 } else if (Condition == InstFcmp::False) {
2285 T = makeVectorOfZeros(Dest->getType()); 2334 T = makeVectorOfZeros(Dest->getType());
2286 } else { 2335 } else {
2287 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2336 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2288 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2337 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2289 2338
2290 // ALIGNHACK: Without support for data alignment, both operands to
2291 // cmpps need to be forced into registers. Once support for data
2292 // alignment is implemented, remove LEGAL_HACK.
2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2294 switch (Condition) { 2339 switch (Condition) {
2295 default: { 2340 default: {
2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2341 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2342 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2298 T = makeReg(Src0RM->getType()); 2343 T = makeReg(Src0RM->getType());
2299 _movp(T, Src0RM); 2344 _movp(T, Src0RM);
2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); 2345 _cmpps(T, Src1RM, Predicate);
2301 } break; 2346 } break;
2302 case InstFcmp::One: { 2347 case InstFcmp::One: {
2303 // Check both unequal and ordered. 2348 // Check both unequal and ordered.
2304 T = makeReg(Src0RM->getType()); 2349 T = makeReg(Src0RM->getType());
2305 Variable *T2 = makeReg(Src0RM->getType()); 2350 Variable *T2 = makeReg(Src0RM->getType());
2306 Src1RM = LEGAL_HACK(Src1RM);
2307 _movp(T, Src0RM); 2351 _movp(T, Src0RM);
2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); 2352 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
2309 _movp(T2, Src0RM); 2353 _movp(T2, Src0RM);
2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); 2354 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
2311 _pand(T, T2); 2355 _pand(T, T2);
2312 } break; 2356 } break;
2313 case InstFcmp::Ueq: { 2357 case InstFcmp::Ueq: {
2314 // Check both equal or unordered. 2358 // Check both equal or unordered.
2315 T = makeReg(Src0RM->getType()); 2359 T = makeReg(Src0RM->getType());
2316 Variable *T2 = makeReg(Src0RM->getType()); 2360 Variable *T2 = makeReg(Src0RM->getType());
2317 Src1RM = LEGAL_HACK(Src1RM);
2318 _movp(T, Src0RM); 2361 _movp(T, Src0RM);
2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); 2362 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
2320 _movp(T2, Src0RM); 2363 _movp(T2, Src0RM);
2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); 2364 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
2322 _por(T, T2); 2365 _por(T, T2);
2323 } break; 2366 } break;
2324 } 2367 }
2325 #undef LEGAL_HACK
2326 } 2368 }
2327 2369
2328 _movp(Dest, T); 2370 _movp(Dest, T);
2329 eliminateNextVectorSextInstruction(Dest); 2371 eliminateNextVectorSextInstruction(Dest);
2330 return; 2372 return;
2331 } 2373 }
2332 2374
2333 // Lowering a = fcmp cond, b, c 2375 // Lowering a = fcmp cond, b, c
2334 // ucomiss b, c /* only if C1 != Br_None */ 2376 // ucomiss b, c /* only if C1 != Br_None */
2335 // /* but swap b,c order if SwapOperands==true */ 2377 // /* but swap b,c order if SwapOperands==true */
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
2420 Variable *T1 = makeReg(Ty); 2462 Variable *T1 = makeReg(Ty);
2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2463 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2422 _movp(T0, Src0RM); 2464 _movp(T0, Src0RM);
2423 _pxor(T0, HighOrderBits); 2465 _pxor(T0, HighOrderBits);
2424 _movp(T1, Src1RM); 2466 _movp(T1, Src1RM);
2425 _pxor(T1, HighOrderBits); 2467 _pxor(T1, HighOrderBits);
2426 Src0RM = T0; 2468 Src0RM = T0;
2427 Src1RM = T1; 2469 Src1RM = T1;
2428 } 2470 }
2429 2471
2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2431 // in registers until data alignment support is implemented. Once
2432 // there is support for data alignment, LEGAL_HACK can be removed.
2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2434 Variable *T = makeReg(Ty); 2472 Variable *T = makeReg(Ty);
2435 switch (Condition) { 2473 switch (Condition) {
2436 default: 2474 default:
2437 llvm_unreachable("unexpected condition"); 2475 llvm_unreachable("unexpected condition");
2438 break; 2476 break;
2439 case InstIcmp::Eq: { 2477 case InstIcmp::Eq: {
2440 _movp(T, Src0RM); 2478 _movp(T, Src0RM);
2441 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2479 _pcmpeq(T, Src1RM);
2442 } break; 2480 } break;
2443 case InstIcmp::Ne: { 2481 case InstIcmp::Ne: {
2444 _movp(T, Src0RM); 2482 _movp(T, Src0RM);
2445 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2483 _pcmpeq(T, Src1RM);
2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2484 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2447 _pxor(T, MinusOne); 2485 _pxor(T, MinusOne);
2448 } break; 2486 } break;
2449 case InstIcmp::Ugt: 2487 case InstIcmp::Ugt:
2450 case InstIcmp::Sgt: { 2488 case InstIcmp::Sgt: {
2451 _movp(T, Src0RM); 2489 _movp(T, Src0RM);
2452 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2490 _pcmpgt(T, Src1RM);
2453 } break; 2491 } break;
2454 case InstIcmp::Uge: 2492 case InstIcmp::Uge:
2455 case InstIcmp::Sge: { 2493 case InstIcmp::Sge: {
2456 // !(Src1RM > Src0RM) 2494 // !(Src1RM > Src0RM)
2457 _movp(T, Src1RM); 2495 _movp(T, Src1RM);
2458 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2496 _pcmpgt(T, Src0RM);
2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2497 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2460 _pxor(T, MinusOne); 2498 _pxor(T, MinusOne);
2461 } break; 2499 } break;
2462 case InstIcmp::Ult: 2500 case InstIcmp::Ult:
2463 case InstIcmp::Slt: { 2501 case InstIcmp::Slt: {
2464 _movp(T, Src1RM); 2502 _movp(T, Src1RM);
2465 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2503 _pcmpgt(T, Src0RM);
2466 } break; 2504 } break;
2467 case InstIcmp::Ule: 2505 case InstIcmp::Ule:
2468 case InstIcmp::Sle: { 2506 case InstIcmp::Sle: {
2469 // !(Src0RM > Src1RM) 2507 // !(Src0RM > Src1RM)
2470 _movp(T, Src0RM); 2508 _movp(T, Src0RM);
2471 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2509 _pcmpgt(T, Src1RM);
2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2510 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2473 _pxor(T, MinusOne); 2511 _pxor(T, MinusOne);
2474 } break; 2512 } break;
2475 } 2513 }
2476 #undef LEGAL_HACK
2477 2514
2478 _movp(Dest, T); 2515 _movp(Dest, T);
2479 eliminateNextVectorSextInstruction(Dest); 2516 eliminateNextVectorSextInstruction(Dest);
2480 return; 2517 return;
2481 } 2518 }
2482 2519
2483 // If Src1 is an immediate, or known to be a physical register, we can 2520 // If Src1 is an immediate, or known to be a physical register, we can
2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2521 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2522 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2486 // the physical register, but unfortunately we have to commit to one or 2523 // the physical register, but unfortunately we have to commit to one or
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
2642 // insertelement into index 3 (result is stored in T): 2679 // insertelement into index 3 (result is stored in T):
2643 // T := SourceVectRM 2680 // T := SourceVectRM
2644 // ElementR := ElementR[0, 0] T[0, 2] 2681 // ElementR := ElementR[0, 0] T[0, 2]
2645 // T := T[0, 1] ElementR[3, 0] 2682 // T := T[0, 1] ElementR[3, 0]
2646 const unsigned char Mask1[3] = {0, 192, 128}; 2683 const unsigned char Mask1[3] = {0, 192, 128};
2647 const unsigned char Mask2[3] = {227, 196, 52}; 2684 const unsigned char Mask2[3] = {227, 196, 52};
2648 2685
2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2686 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2687 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2651 2688
2652 // ALIGNHACK: Force vector operands to registers in instructions
2653 // that require aligned memory operands until support for data
2654 // alignment is implemented.
2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2656 if (Index == 1) { 2689 if (Index == 1) {
2657 SourceVectRM = ALIGN_HACK(SourceVectRM);
2658 _shufps(ElementR, SourceVectRM, Mask1Constant); 2690 _shufps(ElementR, SourceVectRM, Mask1Constant);
2659 _shufps(ElementR, SourceVectRM, Mask2Constant); 2691 _shufps(ElementR, SourceVectRM, Mask2Constant);
2660 _movp(Inst->getDest(), ElementR); 2692 _movp(Inst->getDest(), ElementR);
2661 } else { 2693 } else {
2662 Variable *T = makeReg(Ty); 2694 Variable *T = makeReg(Ty);
2663 _movp(T, SourceVectRM); 2695 _movp(T, SourceVectRM);
2664 _shufps(ElementR, T, Mask1Constant); 2696 _shufps(ElementR, T, Mask1Constant);
2665 _shufps(T, ElementR, Mask2Constant); 2697 _shufps(T, ElementR, Mask2Constant);
2666 _movp(Inst->getDest(), T); 2698 _movp(Inst->getDest(), T);
2667 } 2699 }
2668 #undef ALIGN_HACK
2669 } else { 2700 } else {
2670 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2701 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2671 // Spill the value to a stack slot and perform the insertion in 2702 // Spill the value to a stack slot and perform the insertion in
2672 // memory. 2703 // memory.
2673 // 2704 //
2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2705 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2675 // support for legalizing to mem is implemented. 2706 // support for legalizing to mem is implemented.
2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2707 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2677 Slot->setWeight(RegWeight::Zero); 2708 Slot->setWeight(RegWeight::Zero);
2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2709 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
(...skipping 941 matching lines...) Expand 10 before | Expand all | Expand 10 after
3620 Variable *Dest = Inst->getDest(); 3651 Variable *Dest = Inst->getDest();
3621 Operand *SrcT = Inst->getTrueOperand(); 3652 Operand *SrcT = Inst->getTrueOperand();
3622 Operand *SrcF = Inst->getFalseOperand(); 3653 Operand *SrcF = Inst->getFalseOperand();
3623 Operand *Condition = Inst->getCondition(); 3654 Operand *Condition = Inst->getCondition();
3624 3655
3625 if (isVectorType(Dest->getType())) { 3656 if (isVectorType(Dest->getType())) {
3626 Type SrcTy = SrcT->getType(); 3657 Type SrcTy = SrcT->getType();
3627 Variable *T = makeReg(SrcTy); 3658 Variable *T = makeReg(SrcTy);
3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3659 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3660 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3630 // ALIGNHACK: Until data alignment support is implemented, vector
3631 // instructions need to have vector operands in registers. Once
3632 // there is support for data alignment, LEGAL_HACK can be removed.
3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
3634 if (InstructionSet >= SSE4_1) { 3661 if (InstructionSet >= SSE4_1) {
3635 // TODO(wala): If the condition operand is a constant, use blendps 3662 // TODO(wala): If the condition operand is a constant, use blendps
3636 // or pblendw. 3663 // or pblendw.
3637 // 3664 //
3638 // Use blendvps or pblendvb to implement select. 3665 // Use blendvps or pblendvb to implement select.
3639 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3666 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3640 SrcTy == IceType_v4f32) { 3667 SrcTy == IceType_v4f32) {
3641 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3668 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3669 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3643 _movp(xmm0, ConditionRM); 3670 _movp(xmm0, ConditionRM);
3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3671 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
3645 _movp(T, SrcFRM); 3672 _movp(T, SrcFRM);
3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); 3673 _blendvps(T, SrcTRM, xmm0);
3647 _movp(Dest, T); 3674 _movp(Dest, T);
3648 } else { 3675 } else {
3649 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3676 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3677 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3651 : IceType_v16i8; 3678 : IceType_v16i8;
3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3679 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3680 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3654 _movp(T, SrcFRM); 3681 _movp(T, SrcFRM);
3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); 3682 _pblendvb(T, SrcTRM, xmm0);
3656 _movp(Dest, T); 3683 _movp(Dest, T);
3657 } 3684 }
3658 return; 3685 return;
3659 } 3686 }
3660 // Lower select without SSE4.1: 3687 // Lower select without SSE4.1:
3661 // a=d?b:c ==> 3688 // a=d?b:c ==>
3662 // if elementtype(d) != i1: 3689 // if elementtype(d) != i1:
3663 // d=sext(d); 3690 // d=sext(d);
3664 // a=(b&d)|(c&~d); 3691 // a=(b&d)|(c&~d);
3665 Variable *T2 = makeReg(SrcTy); 3692 Variable *T2 = makeReg(SrcTy);
3666 // Sign extend the condition operand if applicable. 3693 // Sign extend the condition operand if applicable.
3667 if (SrcTy == IceType_v4f32) { 3694 if (SrcTy == IceType_v4f32) {
3668 // The sext operation takes only integer arguments. 3695 // The sext operation takes only integer arguments.
3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); 3696 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 3697 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3671 _movp(T, T3); 3698 _movp(T, T3);
3672 } else if (typeElementType(SrcTy) != IceType_i1) { 3699 } else if (typeElementType(SrcTy) != IceType_i1) {
3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 3700 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3674 } else { 3701 } else {
3675 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3702 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3676 _movp(T, ConditionRM); 3703 _movp(T, ConditionRM);
3677 } 3704 }
3678 _movp(T2, T); 3705 _movp(T2, T);
3679 _pand(T, LEGAL_HACK(SrcTRM)); 3706 _pand(T, SrcTRM);
3680 _pandn(T2, LEGAL_HACK(SrcFRM)); 3707 _pandn(T2, SrcFRM);
3681 _por(T, T2); 3708 _por(T, T2);
3682 _movp(Dest, T); 3709 _movp(Dest, T);
3683 #undef LEGAL_HACK
3684 3710
3685 return; 3711 return;
3686 } 3712 }
3687 3713
3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: 3714 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3689 Operand *ConditionRMI = legalize(Condition); 3715 Operand *ConditionRMI = legalize(Condition);
3690 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3716 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3691 InstX8632Label *Label = InstX8632Label::create(Func, this); 3717 InstX8632Label *Label = InstX8632Label::create(Func, this);
3692 3718
3693 if (Dest->getType() == IceType_i64) { 3719 if (Dest->getType() == IceType_i64) {
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
4236 for (SizeT i = 0; i < Size; ++i) { 4262 for (SizeT i = 0; i < Size; ++i) {
4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4263 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4238 } 4264 }
4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4265 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4240 } 4266 }
4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4267 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4242 << "\n"; 4268 << "\n";
4243 } 4269 }
4244 4270
4245 } // end of namespace Ice 4271 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698