Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 464 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 475 return; | 475 return; |
| 476 | 476 |
| 477 // TODO: It should be sufficient to use the fastest liveness | 477 // TODO: It should be sufficient to use the fastest liveness |
| 478 // calculation, i.e. livenessLightweight(). However, for some | 478 // calculation, i.e. livenessLightweight(). However, for some |
| 479 // reason that slows down the rest of the translation. Investigate. | 479 // reason that slows down the rest of the translation. Investigate. |
| 480 Func->liveness(Liveness_Basic); | 480 Func->liveness(Liveness_Basic); |
| 481 if (Func->hasError()) | 481 if (Func->hasError()) |
| 482 return; | 482 return; |
| 483 Func->dump("After x86 address mode opt"); | 483 Func->dump("After x86 address mode opt"); |
| 484 | 484 |
| 485 doLoadOpt(); | |
| 485 Func->genCode(); | 486 Func->genCode(); |
| 486 if (Func->hasError()) | 487 if (Func->hasError()) |
| 487 return; | 488 return; |
| 488 Func->dump("After x86 codegen"); | 489 Func->dump("After x86 codegen"); |
| 489 | 490 |
| 490 // Register allocation. This requires instruction renumbering and | 491 // Register allocation. This requires instruction renumbering and |
| 491 // full liveness analysis. | 492 // full liveness analysis. |
| 492 Func->renumberInstructions(); | 493 Func->renumberInstructions(); |
| 493 if (Func->hasError()) | 494 if (Func->hasError()) |
| 494 return; | 495 return; |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 565 if (Func->hasError()) | 566 if (Func->hasError()) |
| 566 return; | 567 return; |
| 567 Func->dump("After stack frame mapping"); | 568 Func->dump("After stack frame mapping"); |
| 568 | 569 |
| 569 // Nop insertion | 570 // Nop insertion |
| 570 if (Ctx->getFlags().shouldDoNopInsertion()) { | 571 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 571 Func->doNopInsertion(); | 572 Func->doNopInsertion(); |
| 572 } | 573 } |
| 573 } | 574 } |
| 574 | 575 |
| 576 namespace { | |
| 577 | |
| 578 // Converts a ConstantInteger32 operand into its constant value, or | |
| 579 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | |
| 580 uint64_t getConstantMemoryOrder(Operand *Opnd) { | |
| 581 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | |
| 582 return Integer->getValue(); | |
| 583 return Intrinsics::MemoryOrderInvalid; | |
| 584 } | |
| 585 | |
| 586 // Determines whether the dest of a Load instruction can be folded | |
| 587 // into one of the src operands of a 2-operand instruction. This is | |
| 588 // true as long as the load dest matches exactly one of the binary | |
| 589 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if | |
| 590 // the answer is true. | |
| 591 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, | |
| 592 Operand *&Src0, Operand *&Src1) { | |
| 593 if (Src0 == LoadDest && Src1 != LoadDest) { | |
| 594 Src0 = LoadSrc; | |
| 595 return true; | |
| 596 } | |
| 597 if (Src0 != LoadDest && Src1 == LoadDest) { | |
| 598 Src1 = LoadSrc; | |
| 599 return true; | |
| 600 } | |
| 601 return false; | |
| 602 } | |
| 603 | |
| 604 } // end of anonymous namespace | |
| 605 | |
| 606 void TargetX8632::doLoadOpt() { | |
| 607 for (CfgNode *Node : Func->getNodes()) { | |
| 608 Context.init(Node); | |
| 609 while (!Context.atEnd()) { | |
| 610 Variable *LoadDest = nullptr; | |
| 611 Operand *LoadSrc = nullptr; | |
| 612 Inst *CurInst = Context.getCur(); | |
| 613 Inst *Next = Context.getNextInst(); | |
| 614 // Determine whether the current instruction is a Load | |
| 615 // instruction or equivalent. | |
| 616 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { | |
| 617 // An InstLoad always qualifies. | |
| 618 LoadDest = Load->getDest(); | |
| 619 const bool DoLegalize = false; | |
| 620 LoadSrc = formMemoryOperand(Load->getSourceAddress(), | |
| 621 LoadDest->getType(), DoLegalize); | |
| 622 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { | |
| 623 // An AtomicLoad intrinsic qualifies as long as it has a valid | |
| 624 // memory ordering, and can be implemented in a single | |
| 625 // instruction (i.e., not i64). | |
| 626 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; | |
| 627 if (ID == Intrinsics::AtomicLoad && | |
| 628 Intrin->getDest()->getType() != IceType_i64 && | |
| 629 Intrinsics::isMemoryOrderValid( | |
| 630 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { | |
| 631 LoadDest = Intrin->getDest(); | |
| 632 const bool DoLegalize = false; | |
| 633 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), | |
| 634 DoLegalize); | |
| 635 } | |
| 636 } | |
| 637 // A Load instruction can be folded into the following | |
| 638 // instruction only if the following instruction ends the Load's | |
| 639 // Dest variable's live range. | |
| 640 if (LoadDest && Next && Next->isLastUse(LoadDest)) { | |
| 641 assert(LoadSrc); | |
| 642 Inst *NewInst = nullptr; | |
| 643 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) { | |
| 644 Operand *Src0 = Arith->getSrc(0); | |
| 645 Operand *Src1 = Arith->getSrc(1); | |
| 646 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
| 647 NewInst = InstArithmetic::create(Func, Arith->getOp(), | |
| 648 Arith->getDest(), Src0, Src1); | |
| 649 } | |
| 650 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) { | |
| 651 Operand *Src0 = Icmp->getSrc(0); | |
| 652 Operand *Src1 = Icmp->getSrc(1); | |
| 653 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
| 654 NewInst = InstIcmp::create(Func, Icmp->getCondition(), | |
| 655 Icmp->getDest(), Src0, Src1); | |
| 656 } | |
| 657 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) { | |
| 658 Operand *Src0 = Fcmp->getSrc(0); | |
| 659 Operand *Src1 = Fcmp->getSrc(1); | |
| 660 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
| 661 NewInst = InstFcmp::create(Func, Fcmp->getCondition(), | |
| 662 Fcmp->getDest(), Src0, Src1); | |
| 663 } | |
| 664 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) { | |
| 665 Operand *Src0 = Select->getTrueOperand(); | |
| 666 Operand *Src1 = Select->getFalseOperand(); | |
| 667 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { | |
| 668 NewInst = InstSelect::create(Func, Select->getDest(), | |
| 669 Select->getCondition(), Src0, Src1); | |
| 670 } | |
| 671 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) { | |
| 672 // The load dest can always be folded into a Cast | |
| 673 // instruction. | |
| 674 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0)); | |
| 675 if (Src0 == LoadDest) { | |
| 676 NewInst = InstCast::create(Func, Cast->getCastKind(), | |
| 677 Cast->getDest(), LoadSrc); | |
| 678 } | |
| 679 } | |
| 680 if (NewInst) { | |
| 681 CurInst->setDeleted(); | |
| 682 Next->setDeleted(); | |
| 683 Context.insert(NewInst); | |
| 684 // Update NewInst->LiveRangesEnded so that target lowering | |
| 685 // may benefit. Also update NewInst->HasSideEffects. | |
| 686 NewInst->spliceLivenessInfo(Next, CurInst); | |
| 687 } | |
| 688 } | |
| 689 Context.advanceCur(); | |
| 690 Context.advanceNext(); | |
| 691 } | |
| 692 } | |
| 693 Func->dump("After load optimization"); | |
| 694 } | |
| 695 | |
| 575 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 696 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 576 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 697 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
| 577 return Br->optimizeBranch(NextNode); | 698 return Br->optimizeBranch(NextNode); |
| 578 } | 699 } |
| 579 return false; | 700 return false; |
| 580 } | 701 } |
| 581 | 702 |
| 582 IceString TargetX8632::RegNames[] = { | 703 IceString TargetX8632::RegNames[] = { |
| 583 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 704 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| 584 frameptr, isI8, isInt, isFP) \ | 705 frameptr, isI8, isInt, isFP) \ |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 797 // If there is a separate locals area, this specifies the alignment | 918 // If there is a separate locals area, this specifies the alignment |
| 798 // for it. | 919 // for it. |
| 799 uint32_t LocalsSlotsAlignmentBytes = 0; | 920 uint32_t LocalsSlotsAlignmentBytes = 0; |
| 800 // The entire spill locations area gets aligned to largest natural | 921 // The entire spill locations area gets aligned to largest natural |
| 801 // alignment of the variables that have a spill slot. | 922 // alignment of the variables that have a spill slot. |
| 802 uint32_t SpillAreaAlignmentBytes = 0; | 923 uint32_t SpillAreaAlignmentBytes = 0; |
| 803 // A spill slot linked to a variable with a stack slot should reuse | 924 // A spill slot linked to a variable with a stack slot should reuse |
| 804 // that stack slot. | 925 // that stack slot. |
| 805 std::function<bool(Variable *)> TargetVarHook = | 926 std::function<bool(Variable *)> TargetVarHook = |
| 806 [&VariablesLinkedToSpillSlots](Variable *Var) { | 927 [&VariablesLinkedToSpillSlots](Variable *Var) { |
| 807 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { | 928 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { |
| 808 assert(Var->getWeight().isZero()); | 929 assert(Var->getWeight().isZero()); |
| 809 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { | 930 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
| 810 VariablesLinkedToSpillSlots.push_back(Var); | 931 VariablesLinkedToSpillSlots.push_back(Var); |
| 811 return true; | 932 return true; |
| 812 } | 933 } |
| 813 } | 934 } |
| 814 return false; | 935 return false; |
| 815 }; | 936 }; |
| 816 | 937 |
| 817 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 938 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| 818 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 939 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| 819 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 940 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| 820 &LocalsSlotsAlignmentBytes, TargetVarHook); | 941 &LocalsSlotsAlignmentBytes, TargetVarHook); |
| 821 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 942 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| 822 SpillAreaSizeBytes += GlobalsSize; | 943 SpillAreaSizeBytes += GlobalsSize; |
| 823 | 944 |
| 824 // Add push instructions for preserved registers. | 945 // Add push instructions for preserved registers. |
| 825 uint32_t NumCallee = 0; | 946 uint32_t NumCallee = 0; |
| (...skipping 337 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1163 _and(T, Ctx->getConstantInt32(-Alignment)); | 1284 _and(T, Ctx->getConstantInt32(-Alignment)); |
| 1164 _sub(esp, T); | 1285 _sub(esp, T); |
| 1165 } | 1286 } |
| 1166 _mov(Dest, esp); | 1287 _mov(Dest, esp); |
| 1167 } | 1288 } |
| 1168 | 1289 |
| 1169 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1290 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| 1170 Variable *Dest = Inst->getDest(); | 1291 Variable *Dest = Inst->getDest(); |
| 1171 Operand *Src0 = legalize(Inst->getSrc(0)); | 1292 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 1172 Operand *Src1 = legalize(Inst->getSrc(1)); | 1293 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 1294 if (Inst->isCommutative()) { | |
| 1295 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) | |
| 1296 std::swap(Src0, Src1); | |
| 1297 } | |
| 1173 if (Dest->getType() == IceType_i64) { | 1298 if (Dest->getType() == IceType_i64) { |
| 1174 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1299 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 1175 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1300 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 1176 Operand *Src0Lo = loOperand(Src0); | 1301 Operand *Src0Lo = loOperand(Src0); |
| 1177 Operand *Src0Hi = hiOperand(Src0); | 1302 Operand *Src0Hi = hiOperand(Src0); |
| 1178 Operand *Src1Lo = loOperand(Src1); | 1303 Operand *Src1Lo = loOperand(Src1); |
| 1179 Operand *Src1Hi = hiOperand(Src1); | 1304 Operand *Src1Hi = hiOperand(Src1); |
| 1180 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1305 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
| 1181 switch (Inst->getOp()) { | 1306 switch (Inst->getOp()) { |
| 1182 case InstArithmetic::_num: | 1307 case InstArithmetic::_num: |
| (...skipping 1701 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2884 OperandX8632Mem *Loc = | 3009 OperandX8632Mem *Loc = |
| 2885 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); | 3010 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); |
| 2886 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); | 3011 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); |
| 2887 | 3012 |
| 2888 Variable *T = makeReg(Ty); | 3013 Variable *T = makeReg(Ty); |
| 2889 _movp(T, Slot); | 3014 _movp(T, Slot); |
| 2890 _movp(Inst->getDest(), T); | 3015 _movp(Inst->getDest(), T); |
| 2891 } | 3016 } |
| 2892 } | 3017 } |
| 2893 | 3018 |
| 2894 namespace { | |
| 2895 | |
| 2896 // Converts a ConstantInteger32 operand into its constant value, or | |
| 2897 // MemoryOrderInvalid if the operand is not a ConstantInteger32. | |
| 2898 uint64_t getConstantMemoryOrder(Operand *Opnd) { | |
| 2899 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | |
| 2900 return Integer->getValue(); | |
| 2901 return Intrinsics::MemoryOrderInvalid; | |
| 2902 } | |
| 2903 | |
| 2904 } // end of anonymous namespace | |
| 2905 | |
| 2906 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 3019 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| 2907 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { | 3020 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) { |
| 2908 case Intrinsics::AtomicCmpxchg: { | 3021 case Intrinsics::AtomicCmpxchg: { |
| 2909 if (!Intrinsics::isMemoryOrderValid( | 3022 if (!Intrinsics::isMemoryOrderValid( |
| 2910 ID, getConstantMemoryOrder(Instr->getArg(3)), | 3023 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 2911 getConstantMemoryOrder(Instr->getArg(4)))) { | 3024 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 2912 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 3025 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 2913 return; | 3026 return; |
| 2914 } | 3027 } |
| 2915 Variable *DestPrev = Instr->getDest(); | 3028 Variable *DestPrev = Instr->getDest(); |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2984 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); | 3097 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); |
| 2985 _movq(T, Addr); | 3098 _movq(T, Addr); |
| 2986 // Then cast the bits back out of the XMM register to the i64 Dest. | 3099 // Then cast the bits back out of the XMM register to the i64 Dest. |
| 2987 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); | 3100 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); |
| 2988 lowerCast(Cast); | 3101 lowerCast(Cast); |
| 2989 // Make sure that the atomic load isn't elided when unused. | 3102 // Make sure that the atomic load isn't elided when unused. |
| 2990 Context.insert(InstFakeUse::create(Func, Dest->getLo())); | 3103 Context.insert(InstFakeUse::create(Func, Dest->getLo())); |
| 2991 Context.insert(InstFakeUse::create(Func, Dest->getHi())); | 3104 Context.insert(InstFakeUse::create(Func, Dest->getHi())); |
| 2992 return; | 3105 return; |
| 2993 } | 3106 } |
| 2994 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); | 3107 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); |
|
jvoung (off chromium)
2015/06/03 21:14:05
Maybe at some point the load can just be tagged wi
Jim Stichnoth
2015/06/03 22:51:36
This brings up a really good point. The HasSideEf
| |
| 2995 lowerLoad(Load); | 3108 lowerLoad(Load); |
| 2996 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 3109 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
| 2997 // Since lowerLoad may fuse the load w/ an arithmetic instruction, | 3110 // Since lowerLoad may fuse the load w/ an arithmetic instruction, |
| 2998 // insert the FakeUse on the last-inserted instruction's dest. | 3111 // insert the FakeUse on the last-inserted instruction's dest. |
| 2999 Context.insert( | 3112 Context.insert( |
| 3000 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); | 3113 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
| 3001 return; | 3114 return; |
| 3002 } | 3115 } |
| 3003 case Intrinsics::AtomicRMW: | 3116 case Intrinsics::AtomicRMW: |
| 3004 if (!Intrinsics::isMemoryOrderValid( | 3117 if (!Intrinsics::isMemoryOrderValid( |
| 3005 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 3118 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 3006 Func->setError("Unexpected memory ordering for AtomicRMW"); | 3119 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 3007 return; | 3120 return; |
| 3008 } | 3121 } |
| 3009 lowerAtomicRMW(Instr->getDest(), | 3122 lowerAtomicRMW( |
| 3010 static_cast<uint32_t>(llvm::cast<ConstantInteger32>( | 3123 Instr->getDest(), |
| 3011 Instr->getArg(0))->getValue()), | 3124 static_cast<uint32_t>( |
| 3012 Instr->getArg(1), Instr->getArg(2)); | 3125 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
| 3126 Instr->getArg(1), Instr->getArg(2)); | |
| 3013 return; | 3127 return; |
| 3014 case Intrinsics::AtomicStore: { | 3128 case Intrinsics::AtomicStore: { |
| 3015 if (!Intrinsics::isMemoryOrderValid( | 3129 if (!Intrinsics::isMemoryOrderValid( |
| 3016 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 3130 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 3017 Func->setError("Unexpected memory ordering for AtomicStore"); | 3131 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 3018 return; | 3132 return; |
| 3019 } | 3133 } |
| 3020 // We require the memory address to be naturally aligned. | 3134 // We require the memory address to be naturally aligned. |
| 3021 // Given that is the case, then normal stores are atomic. | 3135 // Given that is the case, then normal stores are atomic. |
| 3022 // Add a fence after the store to make it visible. | 3136 // Add a fence after the store to make it visible. |
| (...skipping 822 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3845 } | 3959 } |
| 3846 | 3960 |
| 3847 } // anonymous namespace | 3961 } // anonymous namespace |
| 3848 | 3962 |
| 3849 void TargetX8632::lowerLoad(const InstLoad *Load) { | 3963 void TargetX8632::lowerLoad(const InstLoad *Load) { |
| 3850 // A Load instruction can be treated the same as an Assign | 3964 // A Load instruction can be treated the same as an Assign |
| 3851 // instruction, after the source operand is transformed into an | 3965 // instruction, after the source operand is transformed into an |
| 3852 // OperandX8632Mem operand. Note that the address mode | 3966 // OperandX8632Mem operand. Note that the address mode |
| 3853 // optimization already creates an OperandX8632Mem operand, so it | 3967 // optimization already creates an OperandX8632Mem operand, so it |
| 3854 // doesn't need another level of transformation. | 3968 // doesn't need another level of transformation. |
| 3855 Type Ty = Load->getDest()->getType(); | 3969 Variable *DestLoad = Load->getDest(); |
| 3970 Type Ty = DestLoad->getType(); | |
| 3856 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 3971 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
| 3857 | |
| 3858 // Fuse this load with a subsequent Arithmetic instruction in the | |
| 3859 // following situations: | |
| 3860 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b | |
| 3861 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true | |
| 3862 // | |
| 3863 // Fuse this load with a subsequent Cast instruction: | |
| 3864 // a=[mem]; b=cast(a) ==> b=cast([mem]) if last use of a | |
| 3865 // | |
| 3866 // TODO: Clean up and test thoroughly. | |
| 3867 // (E.g., if there is an mfence-all make sure the load ends up on the | |
| 3868 // same side of the fence). | |
| 3869 // | |
| 3870 // TODO: Why limit to Arithmetic instructions? This could probably be | |
| 3871 // applied to most any instruction type. Look at all source operands | |
| 3872 // in the following instruction, and if there is one instance of the | |
| 3873 // load instruction's dest variable, and that instruction ends that | |
| 3874 // variable's live range, then make the substitution. Deal with | |
| 3875 // commutativity optimization in the arithmetic instruction lowering. | |
| 3876 // | |
| 3877 // TODO(stichnot): Do load fusing as a separate pass. Run it before | |
| 3878 // the bool folding pass. Modify Ice::Inst to allow src operands to | |
| 3879 // be replaced, including updating Inst::LiveRangesEnded, to avoid | |
| 3880 // having to manually mostly clone each instruction type. | |
| 3881 Inst *NextInst = Context.getNextInst(); | |
| 3882 Variable *DestLoad = Load->getDest(); | |
| 3883 if (NextInst && NextInst->isLastUse(DestLoad)) { | |
| 3884 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(NextInst)) { | |
| 3885 InstArithmetic *NewArith = nullptr; | |
| 3886 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0)); | |
| 3887 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1)); | |
| 3888 if (Src1Arith == DestLoad && DestLoad != Src0Arith) { | |
| 3889 NewArith = InstArithmetic::create( | |
| 3890 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(0), Src0); | |
| 3891 } else if (Src0Arith == DestLoad && Arith->isCommutative() && | |
| 3892 DestLoad != Src1Arith) { | |
| 3893 NewArith = InstArithmetic::create( | |
| 3894 Func, Arith->getOp(), Arith->getDest(), Arith->getSrc(1), Src0); | |
| 3895 } | |
| 3896 if (NewArith) { | |
| 3897 Arith->setDeleted(); | |
| 3898 Context.advanceNext(); | |
| 3899 lowerArithmetic(NewArith); | |
| 3900 return; | |
| 3901 } | |
| 3902 } else if (auto *Cast = llvm::dyn_cast<InstCast>(NextInst)) { | |
| 3903 Variable *Src0Cast = llvm::dyn_cast<Variable>(Cast->getSrc(0)); | |
| 3904 if (Src0Cast == DestLoad) { | |
| 3905 InstCast *NewCast = | |
| 3906 InstCast::create(Func, Cast->getCastKind(), Cast->getDest(), Src0); | |
| 3907 Cast->setDeleted(); | |
| 3908 Context.advanceNext(); | |
| 3909 lowerCast(NewCast); | |
| 3910 return; | |
| 3911 } | |
| 3912 } | |
| 3913 } | |
| 3914 | |
| 3915 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 3972 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
| 3916 lowerAssign(Assign); | 3973 lowerAssign(Assign); |
| 3917 } | 3974 } |
| 3918 | 3975 |
| 3919 void TargetX8632::doAddressOptLoad() { | 3976 void TargetX8632::doAddressOptLoad() { |
| 3920 Inst *Inst = Context.getCur(); | 3977 Inst *Inst = Context.getCur(); |
| 3921 Variable *Dest = Inst->getDest(); | 3978 Variable *Dest = Inst->getDest(); |
| 3922 Operand *Addr = Inst->getSrc(0); | 3979 Operand *Addr = Inst->getSrc(0); |
| 3923 Variable *Index = nullptr; | 3980 Variable *Index = nullptr; |
| 3924 uint16_t Shift = 0; | 3981 uint16_t Shift = 0; |
| (...skipping 707 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4632 bool IsSrc1ImmOrReg = false; | 4689 bool IsSrc1ImmOrReg = false; |
| 4633 if (llvm::isa<Constant>(Src1)) { | 4690 if (llvm::isa<Constant>(Src1)) { |
| 4634 IsSrc1ImmOrReg = true; | 4691 IsSrc1ImmOrReg = true; |
| 4635 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | 4692 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 4636 if (Var->hasReg()) | 4693 if (Var->hasReg()) |
| 4637 IsSrc1ImmOrReg = true; | 4694 IsSrc1ImmOrReg = true; |
| 4638 } | 4695 } |
| 4639 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); | 4696 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); |
| 4640 } | 4697 } |
| 4641 | 4698 |
| 4642 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty) { | 4699 OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Operand, Type Ty, |
| 4700 bool DoLegalize) { | |
| 4643 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); | 4701 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); |
| 4644 // It may be the case that address mode optimization already creates | 4702 // It may be the case that address mode optimization already creates |
| 4645 // an OperandX8632Mem, so in that case it wouldn't need another level | 4703 // an OperandX8632Mem, so in that case it wouldn't need another level |
| 4646 // of transformation. | 4704 // of transformation. |
| 4647 if (!Mem) { | 4705 if (!Mem) { |
| 4648 Variable *Base = llvm::dyn_cast<Variable>(Operand); | 4706 Variable *Base = llvm::dyn_cast<Variable>(Operand); |
| 4649 Constant *Offset = llvm::dyn_cast<Constant>(Operand); | 4707 Constant *Offset = llvm::dyn_cast<Constant>(Operand); |
| 4650 assert(Base || Offset); | 4708 assert(Base || Offset); |
| 4651 if (Offset) { | 4709 if (Offset) { |
| 4652 // Make sure Offset is not undef. | 4710 // Make sure Offset is not undef. |
| 4653 Offset = llvm::cast<Constant>(legalize(Offset)); | 4711 Offset = llvm::cast<Constant>(legalize(Offset)); |
| 4654 assert(llvm::isa<ConstantInteger32>(Offset) || | 4712 assert(llvm::isa<ConstantInteger32>(Offset) || |
| 4655 llvm::isa<ConstantRelocatable>(Offset)); | 4713 llvm::isa<ConstantRelocatable>(Offset)); |
| 4656 } | 4714 } |
| 4657 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); | 4715 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); |
| 4658 } | 4716 } |
| 4659 return llvm::cast<OperandX8632Mem>(legalize(Mem)); | 4717 return llvm::cast<OperandX8632Mem>(DoLegalize ? legalize(Mem) : Mem); |
| 4660 } | 4718 } |
| 4661 | 4719 |
| 4662 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | 4720 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { |
| 4663 // There aren't any 64-bit integer registers for x86-32. | 4721 // There aren't any 64-bit integer registers for x86-32. |
| 4664 assert(Type != IceType_i64); | 4722 assert(Type != IceType_i64); |
| 4665 Variable *Reg = Func->makeVariable(Type); | 4723 Variable *Reg = Func->makeVariable(Type); |
| 4666 if (RegNum == Variable::NoRegister) | 4724 if (RegNum == Variable::NoRegister) |
| 4667 Reg->setWeightInfinite(); | 4725 Reg->setWeightInfinite(); |
| 4668 else | 4726 else |
| 4669 Reg->setRegNum(RegNum); | 4727 Reg->setRegNum(RegNum); |
| (...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4942 case FT_Asm: | 5000 case FT_Asm: |
| 4943 case FT_Iasm: { | 5001 case FT_Iasm: { |
| 4944 OstreamLocker L(Ctx); | 5002 OstreamLocker L(Ctx); |
| 4945 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 5003 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
| 4946 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 5004 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
| 4947 } break; | 5005 } break; |
| 4948 } | 5006 } |
| 4949 } | 5007 } |
| 4950 | 5008 |
| 4951 } // end of namespace Ice | 5009 } // end of namespace Ice |
| OLD | NEW |