src/IceTargetLoweringX8632.cpp - Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1169493002: Subzero: Improve/refactor folding loads into the next instruction. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Code review changes Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 464 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
475 return;	475 return;

476	476

477 // TODO: It should be sufficient to use the fastest liveness	477 // TODO: It should be sufficient to use the fastest liveness

478 // calculation, i.e. livenessLightweight(). However, for some	478 // calculation, i.e. livenessLightweight(). However, for some

479 // reason that slows down the rest of the translation. Investigate.	479 // reason that slows down the rest of the translation. Investigate.

480 Func->liveness(Liveness_Basic);	480 Func->liveness(Liveness_Basic);

481 if (Func->hasError())	481 if (Func->hasError())

482 return;	482 return;

483 Func->dump("After x86 address mode opt");	483 Func->dump("After x86 address mode opt");

484	484

	485 doLoadOpt();

485 Func->genCode();	486 Func->genCode();

486 if (Func->hasError())	487 if (Func->hasError())

487 return;	488 return;

488 Func->dump("After x86 codegen");	489 Func->dump("After x86 codegen");

489	490

490 // Register allocation. This requires instruction renumbering and	491 // Register allocation. This requires instruction renumbering and

491 // full liveness analysis.	492 // full liveness analysis.

492 Func->renumberInstructions();	493 Func->renumberInstructions();

493 if (Func->hasError())	494 if (Func->hasError())

494 return;	495 return;

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
565 if (Func->hasError())	566 if (Func->hasError())

566 return;	567 return;

567 Func->dump("After stack frame mapping");	568 Func->dump("After stack frame mapping");

568	569

569 // Nop insertion	570 // Nop insertion

570 if (Ctx->getFlags().shouldDoNopInsertion()) {	571 if (Ctx->getFlags().shouldDoNopInsertion()) {

571 Func->doNopInsertion();	572 Func->doNopInsertion();

572 }	573 }

573 }	574 }

574	575

	576 namespace {

	577

	578 // Converts a ConstantInteger32 operand into its constant value, or

	579 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

	580 uint64_t getConstantMemoryOrder(Operand *Opnd) {

	581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

	582 return Integer->getValue();

	583 return Intrinsics::MemoryOrderInvalid;

	584 }

	585

	586 // Determines whether the dest of a Load instruction can be folded

	587 // into one of the src operands of a 2-operand instruction. This is

	588 // true as long as the load dest matches exactly one of the binary

	589 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if

	590 // the answer is true.

	591 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

	592 Operand &Src0, Operand &Src1) {

	593 if (Src0 == LoadDest && Src1 != LoadDest) {

	594 Src0 = LoadSrc;

	595 return true;

	596 }

	597 if (Src0 != LoadDest && Src1 == LoadDest) {

	598 Src1 = LoadSrc;

	599 return true;

	600 }

	601 return false;

	602 }

	603

	604 } // end of anonymous namespace

	605

	606 void TargetX8632::doLoadOpt() {

	607 for (CfgNode *Node : Func->getNodes()) {

	608 Context.init(Node);

	609 while (!Context.atEnd()) {

	610 Variable *LoadDest = nullptr;

	611 Operand *LoadSrc = nullptr;

	612 Inst *CurInst = Context.getCur();

	613 Inst *Next = Context.getNextInst();

	614 // Determine whether the current instruction is a Load

	615 // instruction or equivalent.

	616 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

	617 // An InstLoad always qualifies.

	618 LoadDest = Load->getDest();

	619 const bool DoLegalize = false;

	620 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

	621 LoadDest->getType(), DoLegalize);

	622 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

	623 // An AtomicLoad intrinsic qualifies as long as it has a valid

	624 // memory ordering, and can be implemented in a single

	625 // instruction (i.e., not i64).

	626 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

	627 if (ID == Intrinsics::AtomicLoad &&

	628 Intrin->getDest()->getType() != IceType_i64 &&

	629 Intrinsics::isMemoryOrderValid(

	630 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

	631 LoadDest = Intrin->getDest();

	632 const bool DoLegalize = false;

	633 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

	634 DoLegalize);

	635 }

	636 }

	637 // A Load instruction can be folded into the following

	638 // instruction only if the following instruction ends the Load's

	639 // Dest variable's live range.

	640 if (LoadDest && Next && Next->isLastUse(LoadDest)) {

	641 assert(LoadSrc);

	642 Inst *NewInst = nullptr;

	643 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {

	644 Operand *Src0 = Arith->getSrc(0);

	645 Operand *Src1 = Arith->getSrc(1);

	646 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

	647 NewInst = InstArithmetic::create(Func, Arith->getOp(),

	648 Arith->getDest(), Src0, Src1);

	649 }

	650 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {

	651 Operand *Src0 = Icmp->getSrc(0);

	652 Operand *Src1 = Icmp->getSrc(1);

	653 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

	654 NewInst = InstIcmp::create(Func, Icmp->getCondition(),

	655 Icmp->getDest(), Src0, Src1);

	656 }

	657 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {

	658 Operand *Src0 = Fcmp->getSrc(0);

	659 Operand *Src1 = Fcmp->getSrc(1);

	660 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

	661 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),

	662 Fcmp->getDest(), Src0, Src1);

	663 }

	664 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {

	665 Operand *Src0 = Select->getTrueOperand();

	666 Operand *Src1 = Select->getFalseOperand();

	667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

	668 NewInst = InstSelect::create(Func, Select->getDest(),

	669 Select->getCondition(), Src0, Src1);

	670 }

	671 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {

	672 // The load dest can always be folded into a Cast

	673 // instruction.

	674 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));

	675 if (Src0 == LoadDest) {

	676 NewInst = InstCast::create(Func, Cast->getCastKind(),

	677 Cast->getDest(), LoadSrc);

	678 }

	679 }

	680 if (NewInst) {

	681 CurInst->setDeleted();

	682 Next->setDeleted();

	683 Context.insert(NewInst);

	684 // Update NewInst->LiveRangesEnded so that target lowering

	685 // may benefit. Also update NewInst->HasSideEffects.

	686 NewInst->spliceLivenessInfo(Next, CurInst);

	687 }

	688 }

	689 Context.advanceCur();

	690 Context.advanceNext();

	691 }

	692 }

	693 Func->dump("After load optimization");

	694 }

	695

575 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {	696 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {

576 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {	697 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

577 return Br->optimizeBranch(NextNode);	698 return Br->optimizeBranch(NextNode);

578 }	699 }

579 return false;	700 return false;

580 }	701 }

581	702

582 IceString TargetX8632::RegNames[] = {	703 IceString TargetX8632::RegNames[] = {

583 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	704 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

584 frameptr, isI8, isInt, isFP) \	705 frameptr, isI8, isInt, isFP) \

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
797 // If there is a separate locals area, this specifies the alignment	918 // If there is a separate locals area, this specifies the alignment

798 // for it.	919 // for it.

799 uint32_t LocalsSlotsAlignmentBytes = 0;	920 uint32_t LocalsSlotsAlignmentBytes = 0;

800 // The entire spill locations area gets aligned to largest natural	921 // The entire spill locations area gets aligned to largest natural

801 // alignment of the variables that have a spill slot.	922 // alignment of the variables that have a spill slot.

802 uint32_t SpillAreaAlignmentBytes = 0;	923 uint32_t SpillAreaAlignmentBytes = 0;

803 // A spill slot linked to a variable with a stack slot should reuse	924 // A spill slot linked to a variable with a stack slot should reuse

804 // that stack slot.	925 // that stack slot.

805 std::function<bool(Variable *)> TargetVarHook =	926 std::function<bool(Variable *)> TargetVarHook =

806 [&VariablesLinkedToSpillSlots](Variable *Var) {	927 [&VariablesLinkedToSpillSlots](Variable *Var) {

807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {	928 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {

808 assert(Var->getWeight().isZero());	929 assert(Var->getWeight().isZero());

809 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {	930 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

810 VariablesLinkedToSpillSlots.push_back(Var);	931 VariablesLinkedToSpillSlots.push_back(Var);

811 return true;	932 return true;

812 }	933 }

813 }	934 }

814 return false;	935 return false;

815 };	936 };

816	937

817 // Compute the list of spilled variables and bounds for GlobalsSize, etc.	938 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,	939 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,	940 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

820 &LocalsSlotsAlignmentBytes, TargetVarHook);	941 &LocalsSlotsAlignmentBytes, TargetVarHook);

821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;	942 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

822 SpillAreaSizeBytes += GlobalsSize;	943 SpillAreaSizeBytes += GlobalsSize;

823	944

824 // Add push instructions for preserved registers.	945 // Add push instructions for preserved registers.

825 uint32_t NumCallee = 0;	946 uint32_t NumCallee = 0;

(...skipping 337 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1163 _and(T, Ctx->getConstantInt32(-Alignment));	1284 _and(T, Ctx->getConstantInt32(-Alignment));

1164 _sub(esp, T);	1285 _sub(esp, T);

1165 }	1286 }

1166 _mov(Dest, esp);	1287 _mov(Dest, esp);

1167 }	1288 }

1168	1289

1169 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1170 Variable *Dest = Inst->getDest();	1291 Variable *Dest = Inst->getDest();

1171 Operand *Src0 = legalize(Inst->getSrc(0));	1292 Operand *Src0 = legalize(Inst->getSrc(0));

1172 Operand *Src1 = legalize(Inst->getSrc(1));	1293 Operand *Src1 = legalize(Inst->getSrc(1));

	1294 if (Inst->isCommutative()) {

	1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

	1296 std::swap(Src0, Src1);

	1297 }

1173 if (Dest->getType() == IceType_i64) {	1298 if (Dest->getType() == IceType_i64) {

1174 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1175 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1176 Operand *Src0Lo = loOperand(Src0);	1301 Operand *Src0Lo = loOperand(Src0);

1177 Operand *Src0Hi = hiOperand(Src0);	1302 Operand *Src0Hi = hiOperand(Src0);

1178 Operand *Src1Lo = loOperand(Src1);	1303 Operand *Src1Lo = loOperand(Src1);

1179 Operand *Src1Hi = hiOperand(Src1);	1304 Operand *Src1Hi = hiOperand(Src1);

1180 Variable T_Lo = nullptr, T_Hi = nullptr;	1305 Variable T_Lo = nullptr, T_Hi = nullptr;

1181 switch (Inst->getOp()) {	1306 switch (Inst->getOp()) {

1182 case InstArithmetic::_num:	1307 case InstArithmetic::_num:

(...skipping 1701 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2884 OperandX8632Mem *Loc =	3009 OperandX8632Mem *Loc =

2885 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);	3010 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

2886 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);	3011 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);

2887	3012

2888 Variable *T = makeReg(Ty);	3013 Variable *T = makeReg(Ty);

2889 _movp(T, Slot);	3014 _movp(T, Slot);

2890 _movp(Inst->getDest(), T);	3015 _movp(Inst->getDest(), T);

2891 }	3016 }

2892 }	3017 }

2893	3018

2894 namespace {

2895

2896 // Converts a ConstantInteger32 operand into its constant value, or

2897 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

2898 uint64_t getConstantMemoryOrder(Operand *Opnd) {

2899 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

2900 return Integer->getValue();

2901 return Intrinsics::MemoryOrderInvalid;

2902 }

2903

2904 } // end of anonymous namespace

2905

2906 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {	3019 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

2907 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {	3020 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {

2908 case Intrinsics::AtomicCmpxchg: {	3021 case Intrinsics::AtomicCmpxchg: {

2909 if (!Intrinsics::isMemoryOrderValid(	3022 if (!Intrinsics::isMemoryOrderValid(

2910 ID, getConstantMemoryOrder(Instr->getArg(3)),	3023 ID, getConstantMemoryOrder(Instr->getArg(3)),

2911 getConstantMemoryOrder(Instr->getArg(4)))) {	3024 getConstantMemoryOrder(Instr->getArg(4)))) {

2912 Func->setError("Unexpected memory ordering for AtomicCmpxchg");	3025 Func->setError("Unexpected memory ordering for AtomicCmpxchg");

2913 return;	3026 return;

2914 }	3027 }

2915 Variable *DestPrev = Instr->getDest();	3028 Variable *DestPrev = Instr->getDest();

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2984 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);	3097 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);

2985 _movq(T, Addr);	3098 _movq(T, Addr);

2986 // Then cast the bits back out of the XMM register to the i64 Dest.	3099 // Then cast the bits back out of the XMM register to the i64 Dest.

2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);	3100 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);

2988 lowerCast(Cast);	3101 lowerCast(Cast);

2989 // Make sure that the atomic load isn't elided when unused.	3102 // Make sure that the atomic load isn't elided when unused.

2990 Context.insert(InstFakeUse::create(Func, Dest->getLo()));	3103 Context.insert(InstFakeUse::create(Func, Dest->getLo()));

2991 Context.insert(InstFakeUse::create(Func, Dest->getHi()));	3104 Context.insert(InstFakeUse::create(Func, Dest->getHi()));

2992 return;	3105 return;

2993 }	3106 }

2994 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));	3107 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
	jvoung (off chromium) 2015/06/03 21:14:05 Maybe at some point the load can just be tagged wi Maybe at some point the load can just be tagged with side-effects, and the FakeUse can be removed then? Jim Stichnoth 2015/06/03 22:51:36 This brings up a really good point. The HasSideEf Show quoted text On 2015/06/03 21:14:05, jvoung wrote: > Maybe at some point the load can just be tagged with side-effects, and the > FakeUse can be removed then? This brings up a really good point. The HasSideEffects flag is great for preventing DCE in the high level ICE and in the low level instructions, but there's a disconnect in the lowering phase where the high level side effects info gets lost/ignored. I added an issue to track this. https://code.google.com/p/nativeclient/issues/detail?id=4193
2995 lowerLoad(Load);	3108 lowerLoad(Load);

2996 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.	3109 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.

2997 // Since lowerLoad may fuse the load w/ an arithmetic instruction,	3110 // Since lowerLoad may fuse the load w/ an arithmetic instruction,

2998 // insert the FakeUse on the last-inserted instruction's dest.	3111 // insert the FakeUse on the last-inserted instruction's dest.

2999 Context.insert(	3112 Context.insert(

3000 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

3001 return;	3114 return;

3002 }	3115 }

3003 case Intrinsics::AtomicRMW:	3116 case Intrinsics::AtomicRMW:

3004 if (!Intrinsics::isMemoryOrderValid(	3117 if (!Intrinsics::isMemoryOrderValid(

3005 ID, getConstantMemoryOrder(Instr->getArg(3)))) {	3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

3006 Func->setError("Unexpected memory ordering for AtomicRMW");	3119 Func->setError("Unexpected memory ordering for AtomicRMW");

3007 return;	3120 return;

3008 }	3121 }

3009 lowerAtomicRMW(Instr->getDest(),	3122 lowerAtomicRMW(

3010 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(	3123 Instr->getDest(),

3011 Instr->getArg(0))->getValue()),	3124 static_cast<uint32_t>(

3012 Instr->getArg(1), Instr->getArg(2));	3125 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

	3126 Instr->getArg(1), Instr->getArg(2));

3013 return;	3127 return;

3014 case Intrinsics::AtomicStore: {	3128 case Intrinsics::AtomicStore: {

3015 if (!Intrinsics::isMemoryOrderValid(	3129 if (!Intrinsics::isMemoryOrderValid(

3016 ID, getConstantMemoryOrder(Instr->getArg(2)))) {	3130 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

3017 Func->setError("Unexpected memory ordering for AtomicStore");	3131 Func->setError("Unexpected memory ordering for AtomicStore");

3018 return;	3132 return;

3019 }	3133 }

3020 // We require the memory address to be naturally aligned.	3134 // We require the memory address to be naturally aligned.

3021 // Given that is the case, then normal stores are atomic.	3135 // Given that is the case, then normal stores are atomic.

3022 // Add a fence after the store to make it visible.	3136 // Add a fence after the store to make it visible.

(...skipping 822 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3845 }	3959 }

3846	3960

3847 } // anonymous namespace	3961 } // anonymous namespace

3848	3962

3849 void TargetX8632::lowerLoad(const InstLoad *Load) {	3963 void TargetX8632::lowerLoad(const InstLoad *Load) {

3850 // A Load instruction can be treated the same as an Assign	3964 // A Load instruction can be treated the same as an Assign

3851 // instruction, after the source operand is transformed into an	3965 // instruction, after the source operand is transformed into an

3852 // OperandX8632Mem operand. Note that the address mode	3966 // OperandX8632Mem operand. Note that the address mode

3853 // optimization already creates an OperandX8632Mem operand, so it	3967 // optimization already creates an OperandX8632Mem operand, so it

3854 // doesn't need another level of transformation.	3968 // doesn't need another level of transformation.

3855 Type Ty = Load->getDest()->getType();	3969 Variable *DestLoad = Load->getDest();

	3970 Type Ty = DestLoad->getType();

3856 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);	3971 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);

3857

3858 // Fuse this load with a subsequent Arithmetic instruction in the

3859 // following situations:

3860 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b

3861 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true

3862 //

3863 // Fuse this load with a subsequent Cast instruction:

3864 // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a

3865 //

3866 // TODO: Clean up and test thoroughly.

3867 // (E.g., if there is an mfence-all make sure the load ends up on the

3868 // same side of the fence).

3869 //

3870 // TODO: Why limit to Arithmetic instructions? This could probably be

3871 // applied to most any instruction type. Look at all source operands

3872 // in the following instruction, and if there is one instance of the

3873 // load instruction's dest variable, and that instruction ends that

3874 // variable's live range, then make the substitution. Deal with

3875 // commutativity optimization in the arithmetic instruction lowering.

3876 //

3877 // TODO(stichnot): Do load fusing as a separate pass. Run it before

3878 // the bool folding pass. Modify Ice::Inst to allow src operands to

3879 // be replaced, including updating Inst::LiveRangesEnded, to avoid

3880 // having to manually mostly clone each instruction type.

3881 Inst *NextInst = Context.getNextInst();

3882 Variable *DestLoad = Load->getDest();

3883 if (NextInst && NextInst->isLastUse(DestLoad)) {

3884 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) {

3885 InstArithmetic *NewArith = nullptr;

3886 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0));

3887 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1));

3888 if (Src1Arith == DestLoad && DestLoad != Src0Arith) {

3889 NewArith = InstArithmetic::create(

3890 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0);

3891 } else if (Src0Arith == DestLoad && Arith->isCommutative() &&

3892 DestLoad != Src1Arith) {

3893 NewArith = InstArithmetic::create(

3894 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0);

3895 }

3896 if (NewArith) {

3897 Arith->setDeleted();

3898 Context.advanceNext();

3899 lowerArithmetic(NewArith);

3900 return;

3901 }

3902 } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) {

3903 Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0));

3904 if (Src0Cast == DestLoad) {

3905 InstCast *NewCast =

3906 InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0);

3907 Cast->setDeleted();

3908 Context.advanceNext();

3909 lowerCast(NewCast);

3910 return;

3911 }

3912 }

3913 }

3914

3915 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);	3972 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);

3916 lowerAssign(Assign);	3973 lowerAssign(Assign);

3917 }	3974 }

3918	3975

3919 void TargetX8632::doAddressOptLoad() {	3976 void TargetX8632::doAddressOptLoad() {

3920 Inst *Inst = Context.getCur();	3977 Inst *Inst = Context.getCur();

3921 Variable *Dest = Inst->getDest();	3978 Variable *Dest = Inst->getDest();

3922 Operand *Addr = Inst->getSrc(0);	3979 Operand *Addr = Inst->getSrc(0);

3923 Variable *Index = nullptr;	3980 Variable *Index = nullptr;

3924 uint16_t Shift = 0;	3981 uint16_t Shift = 0;

(...skipping 707 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4632 bool IsSrc1ImmOrReg = false;	4689 bool IsSrc1ImmOrReg = false;

4633 if (llvm::isa<Constant>(Src1)) {	4690 if (llvm::isa<Constant>(Src1)) {

4634 IsSrc1ImmOrReg = true;	4691 IsSrc1ImmOrReg = true;

4635 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {	4692 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

4636 if (Var->hasReg())	4693 if (Var->hasReg())

4637 IsSrc1ImmOrReg = true;	4694 IsSrc1ImmOrReg = true;

4638 }	4695 }

4639 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);	4696 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);

4640 }	4697 }

4641	4698

4642 OperandX8632Mem TargetX8632::formMemoryOperand(Operand Operand, Type Ty) {	4699 OperandX8632Mem TargetX8632::formMemoryOperand(Operand Operand, Type Ty,

	4700 bool DoLegalize) {

4643 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);	4701 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);

4644 // It may be the case that address mode optimization already creates	4702 // It may be the case that address mode optimization already creates

4645 // an OperandX8632Mem, so in that case it wouldn't need another level	4703 // an OperandX8632Mem, so in that case it wouldn't need another level

4646 // of transformation.	4704 // of transformation.

4647 if (!Mem) {	4705 if (!Mem) {

4648 Variable *Base = llvm::dyn_cast<Variable>(Operand);	4706 Variable *Base = llvm::dyn_cast<Variable>(Operand);

4649 Constant *Offset = llvm::dyn_cast<Constant>(Operand);	4707 Constant *Offset = llvm::dyn_cast<Constant>(Operand);

4650 assert(Base \|\| Offset);	4708 assert(Base \|\| Offset);

4651 if (Offset) {	4709 if (Offset) {

4652 // Make sure Offset is not undef.	4710 // Make sure Offset is not undef.

4653 Offset = llvm::cast<Constant>(legalize(Offset));	4711 Offset = llvm::cast<Constant>(legalize(Offset));

4654 assert(llvm::isa<ConstantInteger32>(Offset) \|\|	4712 assert(llvm::isa<ConstantInteger32>(Offset) \|\|

4655 llvm::isa<ConstantRelocatable>(Offset));	4713 llvm::isa<ConstantRelocatable>(Offset));

4656 }	4714 }

4657 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);	4715 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);

4658 }	4716 }

4659 return llvm::cast<OperandX8632Mem>(legalize(Mem));	4717 return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem);

4660 }	4718 }

4661	4719

4662 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {	4720 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

4663 // There aren't any 64-bit integer registers for x86-32.	4721 // There aren't any 64-bit integer registers for x86-32.

4664 assert(Type != IceType_i64);	4722 assert(Type != IceType_i64);

4665 Variable *Reg = Func->makeVariable(Type);	4723 Variable *Reg = Func->makeVariable(Type);

4666 if (RegNum == Variable::NoRegister)	4724 if (RegNum == Variable::NoRegister)

4667 Reg->setWeightInfinite();	4725 Reg->setWeightInfinite();

4668 else	4726 else

4669 Reg->setRegNum(RegNum);	4727 Reg->setRegNum(RegNum);

(...skipping 272 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4942 case FT_Asm:	5000 case FT_Asm:

4943 case FT_Iasm: {	5001 case FT_Iasm: {

4944 OstreamLocker L(Ctx);	5002 OstreamLocker L(Ctx);

4945 emitConstantPool<PoolTypeConverter<float>>(Ctx);	5003 emitConstantPool<PoolTypeConverter<float>>(Ctx);

4946 emitConstantPool<PoolTypeConverter<double>>(Ctx);	5004 emitConstantPool<PoolTypeConverter<double>>(Ctx);

4947 } break;	5005 } break;

4948 }	5006 }

4949 }	5007 }

4950	5008

4951 } // end of namespace Ice	5009 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-fence-all.ll » ('j') | no next file with comments »