Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(188)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review changes Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 464 matching lines...) Expand 10 before | Expand all | Expand 10 after
475 return; 475 return;
476 476
477 // TODO: It should be sufficient to use the fastest liveness 477 // TODO: It should be sufficient to use the fastest liveness
478 // calculation, i.e. livenessLightweight(). However, for some 478 // calculation, i.e. livenessLightweight(). However, for some
479 // reason that slows down the rest of the translation. Investigate. 479 // reason that slows down the rest of the translation. Investigate.
480 Func->liveness(Liveness_Basic); 480 Func->liveness(Liveness_Basic);
481 if (Func->hasError()) 481 if (Func->hasError())
482 return; 482 return;
483 Func->dump("After x86 address mode opt"); 483 Func->dump("After x86 address mode opt");
484 484
485 doLoadOpt();
485 Func->genCode(); 486 Func->genCode();
486 if (Func->hasError()) 487 if (Func->hasError())
487 return; 488 return;
488 Func->dump("After x86 codegen"); 489 Func->dump("After x86 codegen");
489 490
490 // Register allocation. This requires instruction renumbering and 491 // Register allocation. This requires instruction renumbering and
491 // full liveness analysis. 492 // full liveness analysis.
492 Func->renumberInstructions(); 493 Func->renumberInstructions();
493 if (Func->hasError()) 494 if (Func->hasError())
494 return; 495 return;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
565 if (Func->hasError()) 566 if (Func->hasError())
566 return; 567 return;
567 Func->dump("After stack frame mapping"); 568 Func->dump("After stack frame mapping");
568 569
569 // Nop insertion 570 // Nop insertion
570 if (Ctx->getFlags().shouldDoNopInsertion()) { 571 if (Ctx->getFlags().shouldDoNopInsertion()) {
571 Func->doNopInsertion(); 572 Func->doNopInsertion();
572 } 573 }
573 } 574 }
574 575
576 namespace {
577
578 // Converts a ConstantInteger32 operand into its constant value, or
579 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
580 uint64_t getConstantMemoryOrder(Operand *Opnd) {
581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
582 return Integer->getValue();
583 return Intrinsics::MemoryOrderInvalid;
584 }
585
586 // Determines whether the dest of a Load instruction can be folded
587 // into one of the src operands of a 2-operand instruction. This is
588 // true as long as the load dest matches exactly one of the binary
589 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
590 // the answer is true.
591 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
592 Operand *&Src0, Operand *&Src1) {
593 if (Src0 == LoadDest && Src1 != LoadDest) {
594 Src0 = LoadSrc;
595 return true;
596 }
597 if (Src0 != LoadDest && Src1 == LoadDest) {
598 Src1 = LoadSrc;
599 return true;
600 }
601 return false;
602 }
603
604 } // end of anonymous namespace
605
606 void TargetX8632::doLoadOpt() {
607 for (CfgNode *Node : Func->getNodes()) {
608 Context.init(Node);
609 while (!Context.atEnd()) {
610 Variable *LoadDest = nullptr;
611 Operand *LoadSrc = nullptr;
612 Inst *CurInst = Context.getCur();
613 Inst *Next = Context.getNextInst();
614 // Determine whether the current instruction is a Load
615 // instruction or equivalent.
616 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
617 // An InstLoad always qualifies.
618 LoadDest = Load->getDest();
619 const bool DoLegalize = false;
620 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
621 LoadDest->getType(), DoLegalize);
622 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
623 // An AtomicLoad intrinsic qualifies as long as it has a valid
624 // memory ordering, and can be implemented in a single
625 // instruction (i.e., not i64).
626 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
627 if (ID == Intrinsics::AtomicLoad &&
628 Intrin->getDest()->getType() != IceType_i64 &&
629 Intrinsics::isMemoryOrderValid(
630 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
631 LoadDest = Intrin->getDest();
632 const bool DoLegalize = false;
633 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
634 DoLegalize);
635 }
636 }
637 // A Load instruction can be folded into the following
638 // instruction only if the following instruction ends the Load's
639 // Dest variable's live range.
640 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
641 assert(LoadSrc);
642 Inst *NewInst = nullptr;
643 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
644 Operand *Src0 = Arith->getSrc(0);
645 Operand *Src1 = Arith->getSrc(1);
646 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
647 NewInst = InstArithmetic::create(Func, Arith->getOp(),
648 Arith->getDest(), Src0, Src1);
649 }
650 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
651 Operand *Src0 = Icmp->getSrc(0);
652 Operand *Src1 = Icmp->getSrc(1);
653 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
654 NewInst = InstIcmp::create(Func, Icmp->getCondition(),
655 Icmp->getDest(), Src0, Src1);
656 }
657 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
658 Operand *Src0 = Fcmp->getSrc(0);
659 Operand *Src1 = Fcmp->getSrc(1);
660 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
661 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
662 Fcmp->getDest(), Src0, Src1);
663 }
664 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
665 Operand *Src0 = Select->getTrueOperand();
666 Operand *Src1 = Select->getFalseOperand();
667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
668 NewInst = InstSelect::create(Func, Select->getDest(),
669 Select->getCondition(), Src0, Src1);
670 }
671 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
672 // The load dest can always be folded into a Cast
673 // instruction.
674 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
675 if (Src0 == LoadDest) {
676 NewInst = InstCast::create(Func, Cast->getCastKind(),
677 Cast->getDest(), LoadSrc);
678 }
679 }
680 if (NewInst) {
681 CurInst->setDeleted();
682 Next->setDeleted();
683 Context.insert(NewInst);
684 // Update NewInst->LiveRangesEnded so that target lowering
685 // may benefit. Also update NewInst->HasSideEffects.
686 NewInst->spliceLivenessInfo(Next, CurInst);
687 }
688 }
689 Context.advanceCur();
690 Context.advanceNext();
691 }
692 }
693 Func->dump("After load optimization");
694 }
695
575 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { 696 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
576 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 697 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
577 return Br->optimizeBranch(NextNode); 698 return Br->optimizeBranch(NextNode);
578 } 699 }
579 return false; 700 return false;
580 } 701 }
581 702
582 IceString TargetX8632::RegNames[] = { 703 IceString TargetX8632::RegNames[] = {
583 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 704 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
584 frameptr, isI8, isInt, isFP) \ 705 frameptr, isI8, isInt, isFP) \
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
797 // If there is a separate locals area, this specifies the alignment 918 // If there is a separate locals area, this specifies the alignment
798 // for it. 919 // for it.
799 uint32_t LocalsSlotsAlignmentBytes = 0; 920 uint32_t LocalsSlotsAlignmentBytes = 0;
800 // The entire spill locations area gets aligned to largest natural 921 // The entire spill locations area gets aligned to largest natural
801 // alignment of the variables that have a spill slot. 922 // alignment of the variables that have a spill slot.
802 uint32_t SpillAreaAlignmentBytes = 0; 923 uint32_t SpillAreaAlignmentBytes = 0;
803 // A spill slot linked to a variable with a stack slot should reuse 924 // A spill slot linked to a variable with a stack slot should reuse
804 // that stack slot. 925 // that stack slot.
805 std::function<bool(Variable *)> TargetVarHook = 926 std::function<bool(Variable *)> TargetVarHook =
806 [&VariablesLinkedToSpillSlots](Variable *Var) { 927 [&VariablesLinkedToSpillSlots](Variable *Var) {
807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { 928 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
808 assert(Var->getWeight().isZero()); 929 assert(Var->getWeight().isZero());
809 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { 930 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
810 VariablesLinkedToSpillSlots.push_back(Var); 931 VariablesLinkedToSpillSlots.push_back(Var);
811 return true; 932 return true;
812 } 933 }
813 } 934 }
814 return false; 935 return false;
815 }; 936 };
816 937
817 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 938 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 939 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 940 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
820 &LocalsSlotsAlignmentBytes, TargetVarHook); 941 &LocalsSlotsAlignmentBytes, TargetVarHook);
821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 942 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
822 SpillAreaSizeBytes += GlobalsSize; 943 SpillAreaSizeBytes += GlobalsSize;
823 944
824 // Add push instructions for preserved registers. 945 // Add push instructions for preserved registers.
825 uint32_t NumCallee = 0; 946 uint32_t NumCallee = 0;
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after
1163 _and(T, Ctx->getConstantInt32(-Alignment)); 1284 _and(T, Ctx->getConstantInt32(-Alignment));
1164 _sub(esp, T); 1285 _sub(esp, T);
1165 } 1286 }
1166 _mov(Dest, esp); 1287 _mov(Dest, esp);
1167 } 1288 }
1168 1289
1169 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1170 Variable *Dest = Inst->getDest(); 1291 Variable *Dest = Inst->getDest();
1171 Operand *Src0 = legalize(Inst->getSrc(0)); 1292 Operand *Src0 = legalize(Inst->getSrc(0));
1172 Operand *Src1 = legalize(Inst->getSrc(1)); 1293 Operand *Src1 = legalize(Inst->getSrc(1));
1294 if (Inst->isCommutative()) {
1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1296 std::swap(Src0, Src1);
1297 }
1173 if (Dest->getType() == IceType_i64) { 1298 if (Dest->getType() == IceType_i64) {
1174 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1175 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1176 Operand *Src0Lo = loOperand(Src0); 1301 Operand *Src0Lo = loOperand(Src0);
1177 Operand *Src0Hi = hiOperand(Src0); 1302 Operand *Src0Hi = hiOperand(Src0);
1178 Operand *Src1Lo = loOperand(Src1); 1303 Operand *Src1Lo = loOperand(Src1);
1179 Operand *Src1Hi = hiOperand(Src1); 1304 Operand *Src1Hi = hiOperand(Src1);
1180 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1305 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1181 switch (Inst->getOp()) { 1306 switch (Inst->getOp()) {
1182 case InstArithmetic::_num: 1307 case InstArithmetic::_num:
(...skipping 1701 matching lines...) Expand 10 before | Expand all | Expand 10 after
2884 OperandX8632Mem *Loc = 3009 OperandX8632Mem *Loc =
2885 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3010 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2886 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); 3011 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
2887 3012
2888 Variable *T = makeReg(Ty); 3013 Variable *T = makeReg(Ty);
2889 _movp(T, Slot); 3014 _movp(T, Slot);
2890 _movp(Inst->getDest(), T); 3015 _movp(Inst->getDest(), T);
2891 } 3016 }
2892 } 3017 }
2893 3018
2894 namespace {
2895
2896 // Converts a ConstantInteger32 operand into its constant value, or
2897 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
2898 uint64_t getConstantMemoryOrder(Operand *Opnd) {
2899 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
2900 return Integer->getValue();
2901 return Intrinsics::MemoryOrderInvalid;
2902 }
2903
2904 } // end of anonymous namespace
2905
2906 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 3019 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2907 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { 3020 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
2908 case Intrinsics::AtomicCmpxchg: { 3021 case Intrinsics::AtomicCmpxchg: {
2909 if (!Intrinsics::isMemoryOrderValid( 3022 if (!Intrinsics::isMemoryOrderValid(
2910 ID, getConstantMemoryOrder(Instr->getArg(3)), 3023 ID, getConstantMemoryOrder(Instr->getArg(3)),
2911 getConstantMemoryOrder(Instr->getArg(4)))) { 3024 getConstantMemoryOrder(Instr->getArg(4)))) {
2912 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 3025 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
2913 return; 3026 return;
2914 } 3027 }
2915 Variable *DestPrev = Instr->getDest(); 3028 Variable *DestPrev = Instr->getDest();
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
2984 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); 3097 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
2985 _movq(T, Addr); 3098 _movq(T, Addr);
2986 // Then cast the bits back out of the XMM register to the i64 Dest. 3099 // Then cast the bits back out of the XMM register to the i64 Dest.
2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 3100 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2988 lowerCast(Cast); 3101 lowerCast(Cast);
2989 // Make sure that the atomic load isn't elided when unused. 3102 // Make sure that the atomic load isn't elided when unused.
2990 Context.insert(InstFakeUse::create(Func, Dest->getLo())); 3103 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2991 Context.insert(InstFakeUse::create(Func, Dest->getHi())); 3104 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2992 return; 3105 return;
2993 } 3106 }
2994 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); 3107 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
jvoung (off chromium) 2015/06/03 21:14:05 Maybe at some point the load can just be tagged wi
Jim Stichnoth 2015/06/03 22:51:36 This brings up a really good point. The HasSideEf
2995 lowerLoad(Load); 3108 lowerLoad(Load);
2996 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 3109 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2997 // Since lowerLoad may fuse the load w/ an arithmetic instruction, 3110 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2998 // insert the FakeUse on the last-inserted instruction's dest. 3111 // insert the FakeUse on the last-inserted instruction's dest.
2999 Context.insert( 3112 Context.insert(
3000 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
3001 return; 3114 return;
3002 } 3115 }
3003 case Intrinsics::AtomicRMW: 3116 case Intrinsics::AtomicRMW:
3004 if (!Intrinsics::isMemoryOrderValid( 3117 if (!Intrinsics::isMemoryOrderValid(
3005 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
3006 Func->setError("Unexpected memory ordering for AtomicRMW"); 3119 Func->setError("Unexpected memory ordering for AtomicRMW");
3007 return; 3120 return;
3008 } 3121 }
3009 lowerAtomicRMW(Instr->getDest(), 3122 lowerAtomicRMW(
3010 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( 3123 Instr->getDest(),
3011 Instr->getArg(0))->getValue()), 3124 static_cast<uint32_t>(
3012 Instr->getArg(1), Instr->getArg(2)); 3125 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
3126 Instr->getArg(1), Instr->getArg(2));
3013 return; 3127 return;
3014 case Intrinsics::AtomicStore: { 3128 case Intrinsics::AtomicStore: {
3015 if (!Intrinsics::isMemoryOrderValid( 3129 if (!Intrinsics::isMemoryOrderValid(
3016 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 3130 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
3017 Func->setError("Unexpected memory ordering for AtomicStore"); 3131 Func->setError("Unexpected memory ordering for AtomicStore");
3018 return; 3132 return;
3019 } 3133 }
3020 // We require the memory address to be naturally aligned. 3134 // We require the memory address to be naturally aligned.
3021 // Given that is the case, then normal stores are atomic. 3135 // Given that is the case, then normal stores are atomic.
3022 // Add a fence after the store to make it visible. 3136 // Add a fence after the store to make it visible.
(...skipping 822 matching lines...) Expand 10 before | Expand all | Expand 10 after
3845 } 3959 }
3846 3960
3847 } // anonymous namespace 3961 } // anonymous namespace
3848 3962
3849 void TargetX8632::lowerLoad(const InstLoad *Load) { 3963 void TargetX8632::lowerLoad(const InstLoad *Load) {
3850 // A Load instruction can be treated the same as an Assign 3964 // A Load instruction can be treated the same as an Assign
3851 // instruction, after the source operand is transformed into an 3965 // instruction, after the source operand is transformed into an
3852 // OperandX8632Mem operand. Note that the address mode 3966 // OperandX8632Mem operand. Note that the address mode
3853 // optimization already creates an OperandX8632Mem operand, so it 3967 // optimization already creates an OperandX8632Mem operand, so it
3854 // doesn't need another level of transformation. 3968 // doesn't need another level of transformation.
3855 Type Ty = Load->getDest()->getType(); 3969 Variable *DestLoad = Load->getDest();
3970 Type Ty = DestLoad->getType();
3856 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 3971 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
3857
3858 // Fuse this load with a subsequent Arithmetic instruction in the
3859 // following situations:
3860 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b
3861 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true
3862 //
3863 // Fuse this load with a subsequent Cast instruction:
3864 // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a
3865 //
3866 // TODO: Clean up and test thoroughly.
3867 // (E.g., if there is an mfence-all make sure the load ends up on the
3868 // same side of the fence).
3869 //
3870 // TODO: Why limit to Arithmetic instructions? This could probably be
3871 // applied to most any instruction type. Look at all source operands
3872 // in the following instruction, and if there is one instance of the
3873 // load instruction's dest variable, and that instruction ends that
3874 // variable's live range, then make the substitution. Deal with
3875 // commutativity optimization in the arithmetic instruction lowering.
3876 //
3877 // TODO(stichnot): Do load fusing as a separate pass. Run it before
3878 // the bool folding pass. Modify Ice::Inst to allow src operands to
3879 // be replaced, including updating Inst::LiveRangesEnded, to avoid
3880 // having to manually mostly clone each instruction type.
3881 Inst *NextInst = Context.getNextInst();
3882 Variable *DestLoad = Load->getDest();
3883 if (NextInst && NextInst->isLastUse(DestLoad)) {
3884 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) {
3885 InstArithmetic *NewArith = nullptr;
3886 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));
3887 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));
3888 if (Src1Arith == DestLoad && DestLoad != Src0Arith) {
3889 NewArith = InstArithmetic::create(
3890 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0);
3891 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&
3892 DestLoad != Src1Arith) {
3893 NewArith = InstArithmetic::create(
3894 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0);
3895 }
3896 if (NewArith) {
3897 Arith->setDeleted();
3898 Context.advanceNext();
3899 lowerArithmetic(NewArith);
3900 return;
3901 }
3902 } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) {
3903 Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0));
3904 if (Src0Cast == DestLoad) {
3905 InstCast *NewCast =
3906 InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0);
3907 Cast->setDeleted();
3908 Context.advanceNext();
3909 lowerCast(NewCast);
3910 return;
3911 }
3912 }
3913 }
3914
3915 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 3972 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
3916 lowerAssign(Assign); 3973 lowerAssign(Assign);
3917 } 3974 }
3918 3975
3919 void TargetX8632::doAddressOptLoad() { 3976 void TargetX8632::doAddressOptLoad() {
3920 Inst *Inst = Context.getCur(); 3977 Inst *Inst = Context.getCur();
3921 Variable *Dest = Inst->getDest(); 3978 Variable *Dest = Inst->getDest();
3922 Operand *Addr = Inst->getSrc(0); 3979 Operand *Addr = Inst->getSrc(0);
3923 Variable *Index = nullptr; 3980 Variable *Index = nullptr;
3924 uint16_t Shift = 0; 3981 uint16_t Shift = 0;
(...skipping 707 matching lines...) Expand 10 before | Expand all | Expand 10 after
4632 bool IsSrc1ImmOrReg = false; 4689 bool IsSrc1ImmOrReg = false;
4633 if (llvm::isa<Constant>(Src1)) { 4690 if (llvm::isa<Constant>(Src1)) {
4634 IsSrc1ImmOrReg = true; 4691 IsSrc1ImmOrReg = true;
4635 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 4692 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
4636 if (Var->hasReg()) 4693 if (Var->hasReg())
4637 IsSrc1ImmOrReg = true; 4694 IsSrc1ImmOrReg = true;
4638 } 4695 }
4639 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 4696 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
4640 } 4697 }
4641 4698
4642 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) { 4699 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty,
4700 bool DoLegalize) {
4643 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); 4701 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4644 // It may be the case that address mode optimization already creates 4702 // It may be the case that address mode optimization already creates
4645 // an OperandX8632Mem, so in that case it wouldn't need another level 4703 // an OperandX8632Mem, so in that case it wouldn't need another level
4646 // of transformation. 4704 // of transformation.
4647 if (!Mem) { 4705 if (!Mem) {
4648 Variable *Base = llvm::dyn_cast<Variable>(Operand); 4706 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4649 Constant *Offset = llvm::dyn_cast<Constant>(Operand); 4707 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4650 assert(Base || Offset); 4708 assert(Base || Offset);
4651 if (Offset) { 4709 if (Offset) {
4652 // Make sure Offset is not undef. 4710 // Make sure Offset is not undef.
4653 Offset = llvm::cast<Constant>(legalize(Offset)); 4711 Offset = llvm::cast<Constant>(legalize(Offset));
4654 assert(llvm::isa<ConstantInteger32>(Offset) || 4712 assert(llvm::isa<ConstantInteger32>(Offset) ||
4655 llvm::isa<ConstantRelocatable>(Offset)); 4713 llvm::isa<ConstantRelocatable>(Offset));
4656 } 4714 }
4657 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 4715 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4658 } 4716 }
4659 return llvm::cast<OperandX8632Mem>(legalize(Mem)); 4717 return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem);
4660 } 4718 }
4661 4719
4662 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { 4720 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
4663 // There aren't any 64-bit integer registers for x86-32. 4721 // There aren't any 64-bit integer registers for x86-32.
4664 assert(Type != IceType_i64); 4722 assert(Type != IceType_i64);
4665 Variable *Reg = Func->makeVariable(Type); 4723 Variable *Reg = Func->makeVariable(Type);
4666 if (RegNum == Variable::NoRegister) 4724 if (RegNum == Variable::NoRegister)
4667 Reg->setWeightInfinite(); 4725 Reg->setWeightInfinite();
4668 else 4726 else
4669 Reg->setRegNum(RegNum); 4727 Reg->setRegNum(RegNum);
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
4942 case FT_Asm: 5000 case FT_Asm:
4943 case FT_Iasm: { 5001 case FT_Iasm: {
4944 OstreamLocker L(Ctx); 5002 OstreamLocker L(Ctx);
4945 emitConstantPool<PoolTypeConverter<float>>(Ctx); 5003 emitConstantPool<PoolTypeConverter<float>>(Ctx);
4946 emitConstantPool<PoolTypeConverter<double>>(Ctx); 5004 emitConstantPool<PoolTypeConverter<double>>(Ctx);
4947 } break; 5005 } break;
4948 } 5006 }
4949 } 5007 }
4950 5008
4951 } // end of namespace Ice 5009 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698