OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 464 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
475 return; | 475 return; |
476 | 476 |
477 // TODO: It should be sufficient to use the fastest liveness | 477 // TODO: It should be sufficient to use the fastest liveness |
478 // calculation, i.e. livenessLightweight(). However, for some | 478 // calculation, i.e. livenessLightweight(). However, for some |
479 // reason that slows down the rest of the translation. Investigate. | 479 // reason that slows down the rest of the translation. Investigate. |
480 Func->liveness(Liveness_Basic); | 480 Func->liveness(Liveness_Basic); |
481 if (Func->hasError()) | 481 if (Func->hasError()) |
482 return; | 482 return; |
483 Func->dump("After x86 address mode opt"); | 483 Func->dump("After x86 address mode opt"); |
484 | 484 |
485 doLoadOpt(); | |
485 Func->genCode(); | 486 Func->genCode(); |
486 if (Func->hasError()) | 487 if (Func->hasError()) |
487 return; | 488 return; |
488 Func->dump("After x86 codegen"); | 489 Func->dump("After x86 codegen"); |
489 | 490 |
490 // Register allocation. This requires instruction renumbering and | 491 // Register allocation. This requires instruction renumbering and |
491 // full liveness analysis. | 492 // full liveness analysis. |
492 Func->renumberInstructions(); | 493 Func->renumberInstructions(); |
493 if (Func->hasError()) | 494 if (Func->hasError()) |
494 return; | 495 return; |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
565 if (Func->hasError()) | 566 if (Func->hasError()) |
566 return; | 567 return; |
567 Func->dump("After stack frame mapping"); | 568 Func->dump("After stack frame mapping"); |
568 | 569 |
569 // Nop insertion | 570 // Nop insertion |
570 if (Ctx->getFlags().shouldDoNopInsertion()) { | 571 if (Ctx->getFlags().shouldDoNopInsertion()) { |
571 Func->doNopInsertion(); | 572 Func->doNopInsertion(); |
572 } | 573 } |
573 } | 574 } |
574 | 575 |
576 namespace { | |
577 | |
578 // Converts a ConstantInteger32 operand into its constant value, or | |
579 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | |
580 uint64_t getConstantMemoryOrder(Operand *Opnd) { | |
581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | |
582 return Integer->getValue(); | |
583 return Intrinsics::MemoryOrderInvalid; | |
584 } | |
585 | |
586 // Determines whether the dest of a Load instruction can be folded | |
587 // into one of the src operands of a 2-operand instruction. This is | |
588 // true as long as the load dest matches exactly one of the binary | |
589 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | |
590 // the answer is true. | |
591 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | |
592 Operand *&Src0, Operand *&Src1) { | |
593 if (Src0 == LoadDest && Src1 != LoadDest) { | |
594 Src0 = LoadSrc; | |
595 return true; | |
596 } | |
597 if (Src0 != LoadDest && Src1 == LoadDest) { | |
598 Src1 = LoadSrc; | |
599 return true; | |
600 } | |
601 return false; | |
602 } | |
603 | |
604 } // end of anonymous namespace | |
605 | |
606 void TargetX8632::doLoadOpt() { | |
607 for (CfgNode *Node : Func->getNodes()) { | |
608 Context.init(Node); | |
609 while (!Context.atEnd()) { | |
610 Variable *LoadDest = nullptr; | |
611 Operand *LoadSrc = nullptr; | |
612 Inst *CurInst = Context.getCur(); | |
613 Inst *Next = Context.getNextInst(); | |
614 // Determine whether the current instruction is a Load | |
615 // instruction or equivalent. | |
616 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | |
617 // An InstLoad always qualifies. | |
618 LoadDest = Load->getDest(); | |
619 const bool DoLegalize = false; | |
620 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | |
621 LoadDest->getType(), DoLegalize); | |
622 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | |
623 // An AtomicLoad intrinsic qualifies as long as it has a valid | |
624 // memory ordering, and can be implemented in a single | |
625 // instruction (i.e., not i64). | |
626 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | |
627 if (ID == Intrinsics::AtomicLoad && | |
628 Intrin->getDest()->getType() != IceType_i64 && | |
629 Intrinsics::isMemoryOrderValid( | |
630 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | |
631 LoadDest = Intrin->getDest(); | |
632 const bool DoLegalize = false; | |
633 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | |
634 DoLegalize); | |
635 } | |
636 } | |
637 // A Load instruction can be folded into the following | |
638 // instruction only if the following instruction ends the Load's | |
639 // Dest variable's live range. | |
640 if (LoadDest && Next && Next->isLastUse(LoadDest)) { | |
641 assert(LoadSrc); | |
642 Inst *NewInst = nullptr; | |
643 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) { | |
644 Operand *Src0 = Arith->getSrc(0); | |
645 Operand *Src1 = Arith->getSrc(1); | |
646 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
647 NewInst = InstArithmetic::create(Func, Arith->getOp(), | |
648 Arith->getDest(), Src0, Src1); | |
649 } | |
650 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) { | |
651 Operand *Src0 = Icmp->getSrc(0); | |
652 Operand *Src1 = Icmp->getSrc(1); | |
653 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
654 NewInst = InstIcmp::create(Func, Icmp->getCondition(), | |
655 Icmp->getDest(), Src0, Src1); | |
656 } | |
657 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) { | |
658 Operand *Src0 = Fcmp->getSrc(0); | |
659 Operand *Src1 = Fcmp->getSrc(1); | |
660 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
661 NewInst = InstFcmp::create(Func, Fcmp->getCondition(), | |
662 Fcmp->getDest(), Src0, Src1); | |
663 } | |
664 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) { | |
665 Operand *Src0 = Select->getTrueOperand(); | |
666 Operand *Src1 = Select->getFalseOperand(); | |
667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
668 NewInst = InstSelect::create(Func, Select->getDest(), | |
669 Select->getCondition(), Src0, Src1); | |
670 } | |
671 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) { | |
672 // The load dest can always be folded into a Cast | |
673 // instruction. | |
674 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0)); | |
675 if (Src0 == LoadDest) { | |
676 NewInst = InstCast::create(Func, Cast->getCastKind(), | |
677 Cast->getDest(), LoadSrc); | |
678 } | |
679 } | |
680 if (NewInst) { | |
681 CurInst->setDeleted(); | |
682 Next->setDeleted(); | |
683 Context.insert(NewInst); | |
684 // Update NewInst->LiveRangesEnded so that target lowering | |
685 // may benefit. Also update NewInst->HasSideEffects. | |
686 NewInst->spliceLivenessInfo(Next, CurInst); | |
687 } | |
688 } | |
689 Context.advanceCur(); | |
690 Context.advanceNext(); | |
691 } | |
692 } | |
693 Func->dump("After load optimization"); | |
694 } | |
695 | |
575 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 696 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
576 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 697 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
577 return Br->optimizeBranch(NextNode); | 698 return Br->optimizeBranch(NextNode); |
578 } | 699 } |
579 return false; | 700 return false; |
580 } | 701 } |
581 | 702 |
582 IceString TargetX8632::RegNames[] = { | 703 IceString TargetX8632::RegNames[] = { |
583 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 704 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
584 frameptr, isI8, isInt, isFP) \ | 705 frameptr, isI8, isInt, isFP) \ |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
797 // If there is a separate locals area, this specifies the alignment | 918 // If there is a separate locals area, this specifies the alignment |
798 // for it. | 919 // for it. |
799 uint32_t LocalsSlotsAlignmentBytes = 0; | 920 uint32_t LocalsSlotsAlignmentBytes = 0; |
800 // The entire spill locations area gets aligned to largest natural | 921 // The entire spill locations area gets aligned to largest natural |
801 // alignment of the variables that have a spill slot. | 922 // alignment of the variables that have a spill slot. |
802 uint32_t SpillAreaAlignmentBytes = 0; | 923 uint32_t SpillAreaAlignmentBytes = 0; |
803 // A spill slot linked to a variable with a stack slot should reuse | 924 // A spill slot linked to a variable with a stack slot should reuse |
804 // that stack slot. | 925 // that stack slot. |
805 std::function<bool(Variable *)> TargetVarHook = | 926 std::function<bool(Variable *)> TargetVarHook = |
806 [&VariablesLinkedToSpillSlots](Variable *Var) { | 927 [&VariablesLinkedToSpillSlots](Variable *Var) { |
807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { | 928 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { |
808 assert(Var->getWeight().isZero()); | 929 assert(Var->getWeight().isZero()); |
809 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { | 930 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
810 VariablesLinkedToSpillSlots.push_back(Var); | 931 VariablesLinkedToSpillSlots.push_back(Var); |
811 return true; | 932 return true; |
812 } | 933 } |
813 } | 934 } |
814 return false; | 935 return false; |
815 }; | 936 }; |
816 | 937 |
817 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 938 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 939 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 940 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
820 &LocalsSlotsAlignmentBytes, TargetVarHook); | 941 &LocalsSlotsAlignmentBytes, TargetVarHook); |
821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 942 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
822 SpillAreaSizeBytes += GlobalsSize; | 943 SpillAreaSizeBytes += GlobalsSize; |
823 | 944 |
824 // Add push instructions for preserved registers. | 945 // Add push instructions for preserved registers. |
825 uint32_t NumCallee = 0; | 946 uint32_t NumCallee = 0; |
(...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1163 _and(T, Ctx->getConstantInt32(-Alignment)); | 1284 _and(T, Ctx->getConstantInt32(-Alignment)); |
1164 _sub(esp, T); | 1285 _sub(esp, T); |
1165 } | 1286 } |
1166 _mov(Dest, esp); | 1287 _mov(Dest, esp); |
1167 } | 1288 } |
1168 | 1289 |
1169 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
1170 Variable *Dest = Inst->getDest(); | 1291 Variable *Dest = Inst->getDest(); |
1171 Operand *Src0 = legalize(Inst->getSrc(0)); | 1292 Operand *Src0 = legalize(Inst->getSrc(0)); |
1172 Operand *Src1 = legalize(Inst->getSrc(1)); | 1293 Operand *Src1 = legalize(Inst->getSrc(1)); |
1294 if (Inst->isCommutative()) { | |
1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | |
1296 std::swap(Src0, Src1); | |
1297 } | |
1173 if (Dest->getType() == IceType_i64) { | 1298 if (Dest->getType() == IceType_i64) { |
1174 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1175 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1176 Operand *Src0Lo = loOperand(Src0); | 1301 Operand *Src0Lo = loOperand(Src0); |
1177 Operand *Src0Hi = hiOperand(Src0); | 1302 Operand *Src0Hi = hiOperand(Src0); |
1178 Operand *Src1Lo = loOperand(Src1); | 1303 Operand *Src1Lo = loOperand(Src1); |
1179 Operand *Src1Hi = hiOperand(Src1); | 1304 Operand *Src1Hi = hiOperand(Src1); |
1180 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1305 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
1181 switch (Inst->getOp()) { | 1306 switch (Inst->getOp()) { |
1182 case InstArithmetic::_num: | 1307 case InstArithmetic::_num: |
(...skipping 1701 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2884 OperandX8632Mem *Loc = | 3009 OperandX8632Mem *Loc = |
2885 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3010 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
2886 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); | 3011 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); |
2887 | 3012 |
2888 Variable *T = makeReg(Ty); | 3013 Variable *T = makeReg(Ty); |
2889 _movp(T, Slot); | 3014 _movp(T, Slot); |
2890 _movp(Inst->getDest(), T); | 3015 _movp(Inst->getDest(), T); |
2891 } | 3016 } |
2892 } | 3017 } |
2893 | 3018 |
2894 namespace { | |
2895 | |
2896 // Converts a ConstantInteger32 operand into its constant value, or | |
2897 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | |
2898 uint64_t getConstantMemoryOrder(Operand *Opnd) { | |
2899 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | |
2900 return Integer->getValue(); | |
2901 return Intrinsics::MemoryOrderInvalid; | |
2902 } | |
2903 | |
2904 } // end of anonymous namespace | |
2905 | |
2906 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 3019 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2907 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3020 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
2908 case Intrinsics::AtomicCmpxchg: { | 3021 case Intrinsics::AtomicCmpxchg: { |
2909 if (!Intrinsics::isMemoryOrderValid( | 3022 if (!Intrinsics::isMemoryOrderValid( |
2910 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3023 ID, getConstantMemoryOrder(Instr->getArg(3)), |
2911 getConstantMemoryOrder(Instr->getArg(4)))) { | 3024 getConstantMemoryOrder(Instr->getArg(4)))) { |
2912 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3025 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
2913 return; | 3026 return; |
2914 } | 3027 } |
2915 Variable *DestPrev = Instr->getDest(); | 3028 Variable *DestPrev = Instr->getDest(); |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2984 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); | 3097 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); |
2985 _movq(T, Addr); | 3098 _movq(T, Addr); |
2986 // Then cast the bits back out of the XMM register to the i64 Dest. | 3099 // Then cast the bits back out of the XMM register to the i64 Dest. |
2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3100 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
2988 lowerCast(Cast); | 3101 lowerCast(Cast); |
2989 // Make sure that the atomic load isn't elided when unused. | 3102 // Make sure that the atomic load isn't elided when unused. |
2990 Context.insert(InstFakeUse::create(Func, Dest->getLo())); | 3103 Context.insert(InstFakeUse::create(Func, Dest->getLo())); |
2991 Context.insert(InstFakeUse::create(Func, Dest->getHi())); | 3104 Context.insert(InstFakeUse::create(Func, Dest->getHi())); |
2992 return; | 3105 return; |
2993 } | 3106 } |
2994 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 3107 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
jvoung (off chromium)
2015/06/03 21:14:05
Maybe at some point the load can just be tagged wi
Jim Stichnoth
2015/06/03 22:51:36
This brings up a really good point. The HasSideEf
| |
2995 lowerLoad(Load); | 3108 lowerLoad(Load); |
2996 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 3109 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
2997 // Since lowerLoad may fuse the load w/ an arithmetic instruction, | 3110 // Since lowerLoad may fuse the load w/ an arithmetic instruction, |
2998 // insert the FakeUse on the last-inserted instruction's dest. | 3111 // insert the FakeUse on the last-inserted instruction's dest. |
2999 Context.insert( | 3112 Context.insert( |
3000 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
3001 return; | 3114 return; |
3002 } | 3115 } |
3003 case Intrinsics::AtomicRMW: | 3116 case Intrinsics::AtomicRMW: |
3004 if (!Intrinsics::isMemoryOrderValid( | 3117 if (!Intrinsics::isMemoryOrderValid( |
3005 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
3006 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3119 Func->setError("Unexpected memory ordering for AtomicRMW"); |
3007 return; | 3120 return; |
3008 } | 3121 } |
3009 lowerAtomicRMW(Instr->getDest(), | 3122 lowerAtomicRMW( |
3010 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( | 3123 Instr->getDest(), |
3011 Instr->getArg(0))->getValue()), | 3124 static_cast<uint32_t>( |
3012 Instr->getArg(1), Instr->getArg(2)); | 3125 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
3126 Instr->getArg(1), Instr->getArg(2)); | |
3013 return; | 3127 return; |
3014 case Intrinsics::AtomicStore: { | 3128 case Intrinsics::AtomicStore: { |
3015 if (!Intrinsics::isMemoryOrderValid( | 3129 if (!Intrinsics::isMemoryOrderValid( |
3016 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 3130 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
3017 Func->setError("Unexpected memory ordering for AtomicStore"); | 3131 Func->setError("Unexpected memory ordering for AtomicStore"); |
3018 return; | 3132 return; |
3019 } | 3133 } |
3020 // We require the memory address to be naturally aligned. | 3134 // We require the memory address to be naturally aligned. |
3021 // Given that is the case, then normal stores are atomic. | 3135 // Given that is the case, then normal stores are atomic. |
3022 // Add a fence after the store to make it visible. | 3136 // Add a fence after the store to make it visible. |
(...skipping 822 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3845 } | 3959 } |
3846 | 3960 |
3847 } // anonymous namespace | 3961 } // anonymous namespace |
3848 | 3962 |
3849 void TargetX8632::lowerLoad(const InstLoad *Load) { | 3963 void TargetX8632::lowerLoad(const InstLoad *Load) { |
3850 // A Load instruction can be treated the same as an Assign | 3964 // A Load instruction can be treated the same as an Assign |
3851 // instruction, after the source operand is transformed into an | 3965 // instruction, after the source operand is transformed into an |
3852 // OperandX8632Mem operand. Note that the address mode | 3966 // OperandX8632Mem operand. Note that the address mode |
3853 // optimization already creates an OperandX8632Mem operand, so it | 3967 // optimization already creates an OperandX8632Mem operand, so it |
3854 // doesn't need another level of transformation. | 3968 // doesn't need another level of transformation. |
3855 Type Ty = Load->getDest()->getType(); | 3969 Variable *DestLoad = Load->getDest(); |
3970 Type Ty = DestLoad->getType(); | |
3856 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 3971 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
3857 | |
3858 // Fuse this load with a subsequent Arithmetic instruction in the | |
3859 // following situations: | |
3860 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b | |
3861 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true | |
3862 // | |
3863 // Fuse this load with a subsequent Cast instruction: | |
3864 // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a | |
3865 // | |
3866 // TODO: Clean up and test thoroughly. | |
3867 // (E.g., if there is an mfence-all make sure the load ends up on the | |
3868 // same side of the fence). | |
3869 // | |
3870 // TODO: Why limit to Arithmetic instructions? This could probably be | |
3871 // applied to most any instruction type. Look at all source operands | |
3872 // in the following instruction, and if there is one instance of the | |
3873 // load instruction's dest variable, and that instruction ends that | |
3874 // variable's live range, then make the substitution. Deal with | |
3875 // commutativity optimization in the arithmetic instruction lowering. | |
3876 // | |
3877 // TODO(stichnot): Do load fusing as a separate pass. Run it before | |
3878 // the bool folding pass. Modify Ice::Inst to allow src operands to | |
3879 // be replaced, including updating Inst::LiveRangesEnded, to avoid | |
3880 // having to manually mostly clone each instruction type. | |
3881 Inst *NextInst = Context.getNextInst(); | |
3882 Variable *DestLoad = Load->getDest(); | |
3883 if (NextInst && NextInst->isLastUse(DestLoad)) { | |
3884 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) { | |
3885 InstArithmetic *NewArith = nullptr; | |
3886 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0)); | |
3887 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1)); | |
3888 if (Src1Arith == DestLoad && DestLoad != Src0Arith) { | |
3889 NewArith = InstArithmetic::create( | |
3890 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0); | |
3891 } else if (Src0Arith == DestLoad && Arith->isCommutative() && | |
3892 DestLoad != Src1Arith) { | |
3893 NewArith = InstArithmetic::create( | |
3894 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0); | |
3895 } | |
3896 if (NewArith) { | |
3897 Arith->setDeleted(); | |
3898 Context.advanceNext(); | |
3899 lowerArithmetic(NewArith); | |
3900 return; | |
3901 } | |
3902 } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) { | |
3903 Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0)); | |
3904 if (Src0Cast == DestLoad) { | |
3905 InstCast *NewCast = | |
3906 InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0); | |
3907 Cast->setDeleted(); | |
3908 Context.advanceNext(); | |
3909 lowerCast(NewCast); | |
3910 return; | |
3911 } | |
3912 } | |
3913 } | |
3914 | |
3915 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 3972 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
3916 lowerAssign(Assign); | 3973 lowerAssign(Assign); |
3917 } | 3974 } |
3918 | 3975 |
3919 void TargetX8632::doAddressOptLoad() { | 3976 void TargetX8632::doAddressOptLoad() { |
3920 Inst *Inst = Context.getCur(); | 3977 Inst *Inst = Context.getCur(); |
3921 Variable *Dest = Inst->getDest(); | 3978 Variable *Dest = Inst->getDest(); |
3922 Operand *Addr = Inst->getSrc(0); | 3979 Operand *Addr = Inst->getSrc(0); |
3923 Variable *Index = nullptr; | 3980 Variable *Index = nullptr; |
3924 uint16_t Shift = 0; | 3981 uint16_t Shift = 0; |
(...skipping 707 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4632 bool IsSrc1ImmOrReg = false; | 4689 bool IsSrc1ImmOrReg = false; |
4633 if (llvm::isa<Constant>(Src1)) { | 4690 if (llvm::isa<Constant>(Src1)) { |
4634 IsSrc1ImmOrReg = true; | 4691 IsSrc1ImmOrReg = true; |
4635 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 4692 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
4636 if (Var->hasReg()) | 4693 if (Var->hasReg()) |
4637 IsSrc1ImmOrReg = true; | 4694 IsSrc1ImmOrReg = true; |
4638 } | 4695 } |
4639 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 4696 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
4640 } | 4697 } |
4641 | 4698 |
4642 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) { | 4699 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty, |
4700 bool DoLegalize) { | |
4643 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); | 4701 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); |
4644 // It may be the case that address mode optimization already creates | 4702 // It may be the case that address mode optimization already creates |
4645 // an OperandX8632Mem, so in that case it wouldn't need another level | 4703 // an OperandX8632Mem, so in that case it wouldn't need another level |
4646 // of transformation. | 4704 // of transformation. |
4647 if (!Mem) { | 4705 if (!Mem) { |
4648 Variable *Base = llvm::dyn_cast<Variable>(Operand); | 4706 Variable *Base = llvm::dyn_cast<Variable>(Operand); |
4649 Constant *Offset = llvm::dyn_cast<Constant>(Operand); | 4707 Constant *Offset = llvm::dyn_cast<Constant>(Operand); |
4650 assert(Base || Offset); | 4708 assert(Base || Offset); |
4651 if (Offset) { | 4709 if (Offset) { |
4652 // Make sure Offset is not undef. | 4710 // Make sure Offset is not undef. |
4653 Offset = llvm::cast<Constant>(legalize(Offset)); | 4711 Offset = llvm::cast<Constant>(legalize(Offset)); |
4654 assert(llvm::isa<ConstantInteger32>(Offset) || | 4712 assert(llvm::isa<ConstantInteger32>(Offset) || |
4655 llvm::isa<ConstantRelocatable>(Offset)); | 4713 llvm::isa<ConstantRelocatable>(Offset)); |
4656 } | 4714 } |
4657 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); | 4715 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); |
4658 } | 4716 } |
4659 return llvm::cast<OperandX8632Mem>(legalize(Mem)); | 4717 return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem); |
4660 } | 4718 } |
4661 | 4719 |
4662 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 4720 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { |
4663 // There aren't any 64-bit integer registers for x86-32. | 4721 // There aren't any 64-bit integer registers for x86-32. |
4664 assert(Type != IceType_i64); | 4722 assert(Type != IceType_i64); |
4665 Variable *Reg = Func->makeVariable(Type); | 4723 Variable *Reg = Func->makeVariable(Type); |
4666 if (RegNum == Variable::NoRegister) | 4724 if (RegNum == Variable::NoRegister) |
4667 Reg->setWeightInfinite(); | 4725 Reg->setWeightInfinite(); |
4668 else | 4726 else |
4669 Reg->setRegNum(RegNum); | 4727 Reg->setRegNum(RegNum); |
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4942 case FT_Asm: | 5000 case FT_Asm: |
4943 case FT_Iasm: { | 5001 case FT_Iasm: { |
4944 OstreamLocker L(Ctx); | 5002 OstreamLocker L(Ctx); |
4945 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5003 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
4946 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5004 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
4947 } break; | 5005 } break; |
4948 } | 5006 } |
4949 } | 5007 } |
4950 | 5008 |
4951 } // end of namespace Ice | 5009 } // end of namespace Ice |
OLD | NEW |