OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
74 }; | 74 }; |
75 | 75 |
76 template <class MachineTraits> class BoolFolding { | 76 template <class MachineTraits> class BoolFolding { |
77 public: | 77 public: |
78 enum BoolFoldingProducerKind { | 78 enum BoolFoldingProducerKind { |
79 PK_None, | 79 PK_None, |
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. | 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. |
81 PK_Icmp32, | 81 PK_Icmp32, |
82 PK_Icmp64, | 82 PK_Icmp64, |
83 PK_Fcmp, | 83 PK_Fcmp, |
84 PK_Trunc | 84 PK_Trunc, |
85 PK_Arith // A flag-setting arithmetic instruction. | |
85 }; | 86 }; |
86 | 87 |
87 /// Currently the actual enum values are not used (other than CK_None), but we | 88 /// Currently the actual enum values are not used (other than CK_None), but we |
88 /// go ahead and produce them anyway for symmetry with the | 89 /// go ahead and produce them anyway for symmetry with the |
89 /// BoolFoldingProducerKind. | 90 /// BoolFoldingProducerKind. |
90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; | 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
91 | 92 |
92 private: | 93 private: |
93 BoolFolding(const BoolFolding &) = delete; | 94 BoolFolding(const BoolFolding &) = delete; |
94 BoolFolding &operator=(const BoolFolding &) = delete; | 95 BoolFolding &operator=(const BoolFolding &) = delete; |
(...skipping 23 matching lines...) Expand all Loading... | |
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} | 119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} |
119 | 120 |
120 template <class MachineTraits> | 121 template <class MachineTraits> |
121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind | 122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind |
122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { | 123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { |
123 if (llvm::isa<InstIcmp>(Instr)) { | 124 if (llvm::isa<InstIcmp>(Instr)) { |
124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) | 125 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) |
125 return PK_Icmp32; | 126 return PK_Icmp32; |
126 return PK_Icmp64; | 127 return PK_Icmp64; |
127 } | 128 } |
129 if (llvm::isa<InstFcmp>(Instr)) | |
130 return PK_Fcmp; | |
131 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) { | |
132 if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) { | |
133 switch (Arith->getOp()) { | |
134 default: | |
135 break; | |
Jim Stichnoth
2015/11/11 14:05:20
I think you should just "return PK_None", unless y
sehr
2015/11/13 06:00:52
Done.
| |
136 case InstArithmetic::And: | |
137 case InstArithmetic::Or: | |
138 return PK_Arith; | |
139 } | |
140 } | |
141 } | |
128 return PK_None; // TODO(stichnot): remove this | 142 return PK_None; // TODO(stichnot): remove this |
129 | 143 |
130 if (llvm::isa<InstFcmp>(Instr)) | |
131 return PK_Fcmp; | |
132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { | 144 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { |
133 switch (Cast->getCastKind()) { | 145 switch (Cast->getCastKind()) { |
134 default: | 146 default: |
135 return PK_None; | 147 return PK_None; |
136 case InstCast::Trunc: | 148 case InstCast::Trunc: |
137 return PK_Trunc; | 149 return PK_Trunc; |
138 } | 150 } |
139 } | 151 } |
140 return PK_None; | 152 return PK_None; |
141 } | 153 } |
(...skipping 1776 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1918 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { | 1930 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { |
1919 assert(Producer->isDeleted()); | 1931 assert(Producer->isDeleted()); |
1920 switch (BoolFolding::getProducerKind(Producer)) { | 1932 switch (BoolFolding::getProducerKind(Producer)) { |
1921 default: | 1933 default: |
1922 break; | 1934 break; |
1923 case BoolFolding::PK_Icmp32: | 1935 case BoolFolding::PK_Icmp32: |
1924 case BoolFolding::PK_Icmp64: { | 1936 case BoolFolding::PK_Icmp64: { |
1925 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); | 1937 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); |
1926 return; | 1938 return; |
1927 } | 1939 } |
1940 case BoolFolding::PK_Fcmp: { | |
1941 lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst); | |
1942 return; | |
1943 } | |
1944 case BoolFolding::PK_Arith: { | |
1945 lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst); | |
1946 return; | |
1947 } | |
1928 } | 1948 } |
1929 } | 1949 } |
1930 | |
1931 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); | 1950 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); |
1932 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1951 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1933 _cmp(Src0, Zero); | 1952 _cmp(Src0, Zero); |
1934 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1953 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
1935 } | 1954 } |
1936 | 1955 |
1937 template <class Machine> | 1956 template <class Machine> |
1938 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { | 1957 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
1939 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | 1958 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
1940 InstCast::OpKind CastKind = Inst->getCastKind(); | 1959 InstCast::OpKind CastKind = Inst->getCastKind(); |
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2533 ExtractedElementR = T; | 2552 ExtractedElementR = T; |
2534 } | 2553 } |
2535 | 2554 |
2536 // Copy the element to the destination. | 2555 // Copy the element to the destination. |
2537 Variable *Dest = Inst->getDest(); | 2556 Variable *Dest = Inst->getDest(); |
2538 _mov(Dest, ExtractedElementR); | 2557 _mov(Dest, ExtractedElementR); |
2539 } | 2558 } |
2540 | 2559 |
2541 template <class Machine> | 2560 template <class Machine> |
2542 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { | 2561 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { |
2562 constexpr InstBr *Br = nullptr; | |
2563 lowerFcmpAndBr(Inst, Br); | |
2564 } | |
2565 | |
2566 template <class Machine> | |
2567 void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst, | |
2568 const InstBr *Br) { | |
2543 Operand *Src0 = Inst->getSrc(0); | 2569 Operand *Src0 = Inst->getSrc(0); |
2544 Operand *Src1 = Inst->getSrc(1); | 2570 Operand *Src1 = Inst->getSrc(1); |
2545 Variable *Dest = Inst->getDest(); | 2571 Variable *Dest = Inst->getDest(); |
2546 | 2572 |
2547 if (isVectorType(Dest->getType())) { | 2573 if (isVectorType(Dest->getType())) { |
2574 if (Br) | |
2575 llvm::report_fatal_error("vector compare/branch cannot be folded"); | |
2548 InstFcmp::FCond Condition = Inst->getCondition(); | 2576 InstFcmp::FCond Condition = Inst->getCondition(); |
2549 size_t Index = static_cast<size_t>(Condition); | 2577 size_t Index = static_cast<size_t>(Condition); |
2550 assert(Index < Traits::TableFcmpSize); | 2578 assert(Index < Traits::TableFcmpSize); |
2551 | 2579 |
2552 if (Traits::TableFcmp[Index].SwapVectorOperands) { | 2580 if (Traits::TableFcmp[Index].SwapVectorOperands) { |
2553 Operand *T = Src0; | 2581 Operand *T = Src0; |
Jim Stichnoth
2015/11/11 14:05:20
std::swap(Src0, Src1);
sehr
2015/11/13 06:00:52
Done.
| |
2554 Src0 = Src1; | 2582 Src0 = Src1; |
2555 Src1 = T; | 2583 Src1 = T; |
2556 } | 2584 } |
2557 | 2585 |
2558 Variable *T = nullptr; | 2586 Variable *T = nullptr; |
2559 | 2587 |
2560 if (Condition == InstFcmp::True) { | 2588 if (Condition == InstFcmp::True) { |
2561 // makeVectorOfOnes() requires an integer vector type. | 2589 // makeVectorOfOnes() requires an integer vector type. |
2562 T = makeVectorOfMinusOnes(IceType_v4i32); | 2590 T = makeVectorOfMinusOnes(IceType_v4i32); |
2563 } else if (Condition == InstFcmp::False) { | 2591 } else if (Condition == InstFcmp::False) { |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2626 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); | 2654 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); |
2627 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); | 2655 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); |
2628 if (HasC1) { | 2656 if (HasC1) { |
2629 Src0 = legalize(Src0); | 2657 Src0 = legalize(Src0); |
2630 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2658 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2631 Variable *T = nullptr; | 2659 Variable *T = nullptr; |
2632 _mov(T, Src0); | 2660 _mov(T, Src0); |
2633 _ucomiss(T, Src1RM); | 2661 _ucomiss(T, Src1RM); |
2634 if (!HasC2) { | 2662 if (!HasC2) { |
2635 assert(Traits::TableFcmp[Index].Default); | 2663 assert(Traits::TableFcmp[Index].Default); |
2636 _setcc(Dest, Traits::TableFcmp[Index].C1); | 2664 setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br); |
2637 return; | 2665 return; |
2638 } | 2666 } |
2639 } | 2667 } |
2640 Constant *Default = | 2668 int32_t IntDefault = Traits::TableFcmp[Index].Default; |
2641 Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default); | 2669 if (Br == nullptr) { |
2642 _mov(Dest, Default); | 2670 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault); |
2643 if (HasC1) { | 2671 _mov(Dest, Default); |
2644 typename Traits::Insts::Label *Label = | 2672 if (HasC1) { |
2645 Traits::Insts::Label::create(Func, this); | 2673 typename Traits::Insts::Label *Label = |
2646 _br(Traits::TableFcmp[Index].C1, Label); | 2674 Traits::Insts::Label::create(Func, this); |
2647 if (HasC2) { | 2675 _br(Traits::TableFcmp[Index].C1, Label); |
2648 _br(Traits::TableFcmp[Index].C2, Label); | 2676 if (HasC2) { |
2677 _br(Traits::TableFcmp[Index].C2, Label); | |
2678 } | |
2679 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault); | |
2680 _mov_redefined(Dest, NonDefault); | |
2681 Context.insert(Label); | |
2649 } | 2682 } |
2650 Constant *NonDefault = | 2683 } else { |
Jim Stichnoth
2015/11/11 14:05:20
Can this be "else if"?
} else if (IntDefault ==
sehr
2015/11/13 06:00:52
I used std::swap and removed the duplication, as w
| |
2651 Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default); | 2684 if (IntDefault == 0) { |
2652 _mov_redefined(Dest, NonDefault); | 2685 if (HasC1) { |
2653 Context.insert(Label); | 2686 _br(Traits::TableFcmp[Index].C1, Br->getTargetFalse()); |
2687 if (HasC2) { | |
2688 _br(Traits::TableFcmp[Index].C2, Br->getTargetFalse()); | |
2689 } | |
2690 _br(Br->getTargetTrue()); | |
2691 return; | |
2692 } | |
2693 _br(Br->getTargetFalse()); | |
2694 } else { | |
2695 if (HasC1) { | |
2696 _br(Traits::TableFcmp[Index].C1, Br->getTargetTrue()); | |
2697 if (HasC2) { | |
2698 _br(Traits::TableFcmp[Index].C2, Br->getTargetTrue()); | |
2699 } | |
2700 _br(Br->getTargetFalse()); | |
2701 return; | |
2702 } | |
2703 _br(Br->getTargetTrue()); | |
2704 } | |
2654 } | 2705 } |
2655 } | 2706 } |
2656 | 2707 |
2657 inline bool isZero(const Operand *Opnd) { | 2708 inline bool isZero(const Operand *Opnd) { |
2658 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) | 2709 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) |
2659 return C64->getValue() == 0; | 2710 return C64->getValue() == 0; |
2660 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 2711 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
2661 return C32->getValue() == 0; | 2712 return C32->getValue() == 0; |
2662 return false; | 2713 return false; |
2663 } | 2714 } |
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2953 // TODO(sehr,stichnot): This could be done with a single unconditional | 3004 // TODO(sehr,stichnot): This could be done with a single unconditional |
2954 // branch instruction, but subzero doesn't know how to handle the resulting | 3005 // branch instruction, but subzero doesn't know how to handle the resulting |
2955 // control flow graph changes now. Make it do so to eliminate mov and cmp. | 3006 // control flow graph changes now. Make it do so to eliminate mov and cmp. |
2956 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); | 3007 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); |
2957 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); | 3008 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); |
2958 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | 3009 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
2959 } | 3010 } |
2960 } | 3011 } |
2961 | 3012 |
2962 template <class Machine> | 3013 template <class Machine> |
3014 void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith, | |
3015 const InstBr *Br) { | |
3016 Variable *T = nullptr; | |
3017 Operand *Src0 = legalize(Arith->getSrc(0)); | |
3018 Operand *Src1 = legalize(Arith->getSrc(1)); | |
3019 Variable *Dest = Arith->getDest(); | |
3020 switch (Arith->getOp()) { | |
3021 default: | |
3022 llvm_unreachable("arithmetic operator not AND or OR"); | |
3023 break; | |
3024 case InstArithmetic::And: | |
3025 _mov(T, Src0); | |
3026 _and(T, Src1); | |
John
2015/11/11 02:08:10
would _test be better here? just curious.
Jim Stichnoth
2015/11/11 14:05:20
Yes it would be better, since T would be able to s
sehr
2015/11/13 06:00:52
Done.
sehr
2015/11/13 06:00:52
Done.
| |
3027 break; | |
3028 case InstArithmetic::Or: | |
3029 _mov(T, Src0); | |
3030 _or(T, Src1); | |
3031 break; | |
3032 } | |
3033 Context.insert(InstFakeUse::create(Func, T)); | |
3034 Context.insert(InstFakeDef::create(Func, Dest)); | |
3035 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); | |
3036 } | |
3037 | |
3038 template <class Machine> | |
2963 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { | 3039 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
2964 Operand *SourceVectNotLegalized = Inst->getSrc(0); | 3040 Operand *SourceVectNotLegalized = Inst->getSrc(0); |
2965 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); | 3041 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); |
2966 ConstantInteger32 *ElementIndex = | 3042 ConstantInteger32 *ElementIndex = |
2967 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); | 3043 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); |
2968 // Only constant indices are allowed in PNaCl IR. | 3044 // Only constant indices are allowed in PNaCl IR. |
2969 assert(ElementIndex); | 3045 assert(ElementIndex); |
2970 unsigned Index = ElementIndex->getValue(); | 3046 unsigned Index = ElementIndex->getValue(); |
2971 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); | 3047 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); |
2972 | 3048 |
(...skipping 2908 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5881 } | 5957 } |
5882 // the offset is not eligible for blinding or pooling, return the original | 5958 // the offset is not eligible for blinding or pooling, return the original |
5883 // mem operand | 5959 // mem operand |
5884 return MemOperand; | 5960 return MemOperand; |
5885 } | 5961 } |
5886 | 5962 |
5887 } // end of namespace X86Internal | 5963 } // end of namespace X86Internal |
5888 } // end of namespace Ice | 5964 } // end of namespace Ice |
5889 | 5965 |
5890 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5966 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |