Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1436623002: Improve bool folding (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Enabled fcmp folding and test Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 }; 74 };
75 75
76 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
77 public: 77 public:
78 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
79 PK_None, 79 PK_None,
80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
81 PK_Icmp32, 81 PK_Icmp32,
82 PK_Icmp64, 82 PK_Icmp64,
83 PK_Fcmp, 83 PK_Fcmp,
84 PK_Trunc 84 PK_Trunc,
85 PK_Arith // A flag-setting arithmetic instruction.
85 }; 86 };
86 87
87 /// Currently the actual enum values are not used (other than CK_None), but we 88 /// Currently the actual enum values are not used (other than CK_None), but we
88 /// go ahead and produce them anyway for symmetry with the 89 /// go ahead and produce them anyway for symmetry with the
89 /// BoolFoldingProducerKind. 90 /// BoolFoldingProducerKind.
90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91 92
92 private: 93 private:
93 BoolFolding(const BoolFolding &) = delete; 94 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete; 95 BoolFolding &operator=(const BoolFolding &) = delete;
(...skipping 23 matching lines...) Expand all
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
119 120
120 template <class MachineTraits> 121 template <class MachineTraits>
121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
123 if (llvm::isa<InstIcmp>(Instr)) { 124 if (llvm::isa<InstIcmp>(Instr)) {
124 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64) 125 if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
125 return PK_Icmp32; 126 return PK_Icmp32;
126 return PK_Icmp64; 127 return PK_Icmp64;
127 } 128 }
129 if (llvm::isa<InstFcmp>(Instr))
130 return PK_Fcmp;
131 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
132 if (MachineTraits::Is64Bit || Arith->getSrc(0)->getType() != IceType_i64) {
133 switch (Arith->getOp()) {
134 default:
135 break;
Jim Stichnoth 2015/11/11 14:05:20 I think you should just "return PK_None", unless y
sehr 2015/11/13 06:00:52 Done.
136 case InstArithmetic::And:
137 case InstArithmetic::Or:
138 return PK_Arith;
139 }
140 }
141 }
128 return PK_None; // TODO(stichnot): remove this 142 return PK_None; // TODO(stichnot): remove this
129 143
130 if (llvm::isa<InstFcmp>(Instr))
131 return PK_Fcmp;
132 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) { 144 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
133 switch (Cast->getCastKind()) { 145 switch (Cast->getCastKind()) {
134 default: 146 default:
135 return PK_None; 147 return PK_None;
136 case InstCast::Trunc: 148 case InstCast::Trunc:
137 return PK_Trunc; 149 return PK_Trunc;
138 } 150 }
139 } 151 }
140 return PK_None; 152 return PK_None;
141 } 153 }
(...skipping 1776 matching lines...) Expand 10 before | Expand all | Expand 10 after
1918 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 1930 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1919 assert(Producer->isDeleted()); 1931 assert(Producer->isDeleted());
1920 switch (BoolFolding::getProducerKind(Producer)) { 1932 switch (BoolFolding::getProducerKind(Producer)) {
1921 default: 1933 default:
1922 break; 1934 break;
1923 case BoolFolding::PK_Icmp32: 1935 case BoolFolding::PK_Icmp32:
1924 case BoolFolding::PK_Icmp64: { 1936 case BoolFolding::PK_Icmp64: {
1925 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst); 1937 lowerIcmpAndBr(llvm::dyn_cast<InstIcmp>(Producer), Inst);
1926 return; 1938 return;
1927 } 1939 }
1940 case BoolFolding::PK_Fcmp: {
1941 lowerFcmpAndBr(llvm::dyn_cast<InstFcmp>(Producer), Inst);
1942 return;
1943 }
1944 case BoolFolding::PK_Arith: {
1945 lowerArithAndBr(llvm::dyn_cast<InstArithmetic>(Producer), Inst);
1946 return;
1947 }
1928 } 1948 }
1929 } 1949 }
1930
1931 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 1950 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
1932 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1951 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1933 _cmp(Src0, Zero); 1952 _cmp(Src0, Zero);
1934 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1953 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1935 } 1954 }
1936 1955
1937 template <class Machine> 1956 template <class Machine>
1938 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 1957 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
1939 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1958 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1940 InstCast::OpKind CastKind = Inst->getCastKind(); 1959 InstCast::OpKind CastKind = Inst->getCastKind();
(...skipping 592 matching lines...) Expand 10 before | Expand all | Expand 10 after
2533 ExtractedElementR = T; 2552 ExtractedElementR = T;
2534 } 2553 }
2535 2554
2536 // Copy the element to the destination. 2555 // Copy the element to the destination.
2537 Variable *Dest = Inst->getDest(); 2556 Variable *Dest = Inst->getDest();
2538 _mov(Dest, ExtractedElementR); 2557 _mov(Dest, ExtractedElementR);
2539 } 2558 }
2540 2559
2541 template <class Machine> 2560 template <class Machine>
2542 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) { 2561 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {
2562 constexpr InstBr *Br = nullptr;
2563 lowerFcmpAndBr(Inst, Br);
2564 }
2565
2566 template <class Machine>
2567 void TargetX86Base<Machine>::lowerFcmpAndBr(const InstFcmp *Inst,
2568 const InstBr *Br) {
2543 Operand *Src0 = Inst->getSrc(0); 2569 Operand *Src0 = Inst->getSrc(0);
2544 Operand *Src1 = Inst->getSrc(1); 2570 Operand *Src1 = Inst->getSrc(1);
2545 Variable *Dest = Inst->getDest(); 2571 Variable *Dest = Inst->getDest();
2546 2572
2547 if (isVectorType(Dest->getType())) { 2573 if (isVectorType(Dest->getType())) {
2574 if (Br)
2575 llvm::report_fatal_error("vector compare/branch cannot be folded");
2548 InstFcmp::FCond Condition = Inst->getCondition(); 2576 InstFcmp::FCond Condition = Inst->getCondition();
2549 size_t Index = static_cast<size_t>(Condition); 2577 size_t Index = static_cast<size_t>(Condition);
2550 assert(Index < Traits::TableFcmpSize); 2578 assert(Index < Traits::TableFcmpSize);
2551 2579
2552 if (Traits::TableFcmp[Index].SwapVectorOperands) { 2580 if (Traits::TableFcmp[Index].SwapVectorOperands) {
2553 Operand *T = Src0; 2581 Operand *T = Src0;
Jim Stichnoth 2015/11/11 14:05:20 std::swap(Src0, Src1);
sehr 2015/11/13 06:00:52 Done.
2554 Src0 = Src1; 2582 Src0 = Src1;
2555 Src1 = T; 2583 Src1 = T;
2556 } 2584 }
2557 2585
2558 Variable *T = nullptr; 2586 Variable *T = nullptr;
2559 2587
2560 if (Condition == InstFcmp::True) { 2588 if (Condition == InstFcmp::True) {
2561 // makeVectorOfOnes() requires an integer vector type. 2589 // makeVectorOfOnes() requires an integer vector type.
2562 T = makeVectorOfMinusOnes(IceType_v4i32); 2590 T = makeVectorOfMinusOnes(IceType_v4i32);
2563 } else if (Condition == InstFcmp::False) { 2591 } else if (Condition == InstFcmp::False) {
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
2626 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None); 2654 bool HasC1 = (Traits::TableFcmp[Index].C1 != Traits::Cond::Br_None);
2627 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None); 2655 bool HasC2 = (Traits::TableFcmp[Index].C2 != Traits::Cond::Br_None);
2628 if (HasC1) { 2656 if (HasC1) {
2629 Src0 = legalize(Src0); 2657 Src0 = legalize(Src0);
2630 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2658 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2631 Variable *T = nullptr; 2659 Variable *T = nullptr;
2632 _mov(T, Src0); 2660 _mov(T, Src0);
2633 _ucomiss(T, Src1RM); 2661 _ucomiss(T, Src1RM);
2634 if (!HasC2) { 2662 if (!HasC2) {
2635 assert(Traits::TableFcmp[Index].Default); 2663 assert(Traits::TableFcmp[Index].Default);
2636 _setcc(Dest, Traits::TableFcmp[Index].C1); 2664 setccOrBr(Traits::TableFcmp[Index].C1, Dest, Br);
2637 return; 2665 return;
2638 } 2666 }
2639 } 2667 }
2640 Constant *Default = 2668 int32_t IntDefault = Traits::TableFcmp[Index].Default;
2641 Ctx->getConstantInt(Dest->getType(), Traits::TableFcmp[Index].Default); 2669 if (Br == nullptr) {
2642 _mov(Dest, Default); 2670 Constant *Default = Ctx->getConstantInt(Dest->getType(), IntDefault);
2643 if (HasC1) { 2671 _mov(Dest, Default);
2644 typename Traits::Insts::Label *Label = 2672 if (HasC1) {
2645 Traits::Insts::Label::create(Func, this); 2673 typename Traits::Insts::Label *Label =
2646 _br(Traits::TableFcmp[Index].C1, Label); 2674 Traits::Insts::Label::create(Func, this);
2647 if (HasC2) { 2675 _br(Traits::TableFcmp[Index].C1, Label);
2648 _br(Traits::TableFcmp[Index].C2, Label); 2676 if (HasC2) {
2677 _br(Traits::TableFcmp[Index].C2, Label);
2678 }
2679 Constant *NonDefault = Ctx->getConstantInt(Dest->getType(), !IntDefault);
2680 _mov_redefined(Dest, NonDefault);
2681 Context.insert(Label);
2649 } 2682 }
2650 Constant *NonDefault = 2683 } else {
Jim Stichnoth 2015/11/11 14:05:20 Can this be "else if"? } else if (IntDefault ==
sehr 2015/11/13 06:00:52 I used std::swap and removed the duplication, as w
2651 Ctx->getConstantInt(Dest->getType(), !Traits::TableFcmp[Index].Default); 2684 if (IntDefault == 0) {
2652 _mov_redefined(Dest, NonDefault); 2685 if (HasC1) {
2653 Context.insert(Label); 2686 _br(Traits::TableFcmp[Index].C1, Br->getTargetFalse());
2687 if (HasC2) {
2688 _br(Traits::TableFcmp[Index].C2, Br->getTargetFalse());
2689 }
2690 _br(Br->getTargetTrue());
2691 return;
2692 }
2693 _br(Br->getTargetFalse());
2694 } else {
2695 if (HasC1) {
2696 _br(Traits::TableFcmp[Index].C1, Br->getTargetTrue());
2697 if (HasC2) {
2698 _br(Traits::TableFcmp[Index].C2, Br->getTargetTrue());
2699 }
2700 _br(Br->getTargetFalse());
2701 return;
2702 }
2703 _br(Br->getTargetTrue());
2704 }
2654 } 2705 }
2655 } 2706 }
2656 2707
2657 inline bool isZero(const Operand *Opnd) { 2708 inline bool isZero(const Operand *Opnd) {
2658 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd)) 2709 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Opnd))
2659 return C64->getValue() == 0; 2710 return C64->getValue() == 0;
2660 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd)) 2711 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(Opnd))
2661 return C32->getValue() == 0; 2712 return C32->getValue() == 0;
2662 return false; 2713 return false;
2663 } 2714 }
(...skipping 289 matching lines...) Expand 10 before | Expand all | Expand 10 after
2953 // TODO(sehr,stichnot): This could be done with a single unconditional 3004 // TODO(sehr,stichnot): This could be done with a single unconditional
2954 // branch instruction, but subzero doesn't know how to handle the resulting 3005 // branch instruction, but subzero doesn't know how to handle the resulting
2955 // control flow graph changes now. Make it do so to eliminate mov and cmp. 3006 // control flow graph changes now. Make it do so to eliminate mov and cmp.
2956 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0))); 3007 _mov(Dest, Ctx->getConstantInt(Dest->getType(), (IcmpResult ? 1 : 0)));
2957 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0)); 3008 _cmp(Dest, Ctx->getConstantInt(Dest->getType(), 0));
2958 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 3009 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
2959 } 3010 }
2960 } 3011 }
2961 3012
2962 template <class Machine> 3013 template <class Machine>
3014 void TargetX86Base<Machine>::lowerArithAndBr(const InstArithmetic *Arith,
3015 const InstBr *Br) {
3016 Variable *T = nullptr;
3017 Operand *Src0 = legalize(Arith->getSrc(0));
3018 Operand *Src1 = legalize(Arith->getSrc(1));
3019 Variable *Dest = Arith->getDest();
3020 switch (Arith->getOp()) {
3021 default:
3022 llvm_unreachable("arithmetic operator not AND or OR");
3023 break;
3024 case InstArithmetic::And:
3025 _mov(T, Src0);
3026 _and(T, Src1);
John 2015/11/11 02:08:10 would _test be better here? just curious.
Jim Stichnoth 2015/11/11 14:05:20 Yes it would be better, since T would be able to s
sehr 2015/11/13 06:00:52 Done.
sehr 2015/11/13 06:00:52 Done.
3027 break;
3028 case InstArithmetic::Or:
3029 _mov(T, Src0);
3030 _or(T, Src1);
3031 break;
3032 }
3033 Context.insert(InstFakeUse::create(Func, T));
3034 Context.insert(InstFakeDef::create(Func, Dest));
3035 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
3036 }
3037
3038 template <class Machine>
2963 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { 3039 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
2964 Operand *SourceVectNotLegalized = Inst->getSrc(0); 3040 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2965 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 3041 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2966 ConstantInteger32 *ElementIndex = 3042 ConstantInteger32 *ElementIndex =
2967 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2)); 3043 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2968 // Only constant indices are allowed in PNaCl IR. 3044 // Only constant indices are allowed in PNaCl IR.
2969 assert(ElementIndex); 3045 assert(ElementIndex);
2970 unsigned Index = ElementIndex->getValue(); 3046 unsigned Index = ElementIndex->getValue();
2971 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 3047 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2972 3048
(...skipping 2908 matching lines...) Expand 10 before | Expand all | Expand 10 after
5881 } 5957 }
5882 // the offset is not eligible for blinding or pooling, return the original 5958 // the offset is not eligible for blinding or pooling, return the original
5883 // mem operand 5959 // mem operand
5884 return MemOperand; 5960 return MemOperand;
5885 } 5961 }
5886 5962
5887 } // end of namespace X86Internal 5963 } // end of namespace X86Internal
5888 } // end of namespace Ice 5964 } // end of namespace Ice
5889 5965
5890 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5966 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698