OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 2574 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2585 } | 2585 } |
2586 if (!Intrinsics::VerifyMemoryOrder( | 2586 if (!Intrinsics::VerifyMemoryOrder( |
2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
2589 return; | 2589 return; |
2590 } | 2590 } |
2591 Variable *DestPrev = Instr->getDest(); | 2591 Variable *DestPrev = Instr->getDest(); |
2592 Operand *PtrToMem = Instr->getArg(0); | 2592 Operand *PtrToMem = Instr->getArg(0); |
2593 Operand *Expected = Instr->getArg(1); | 2593 Operand *Expected = Instr->getArg(1); |
2594 Operand *Desired = Instr->getArg(2); | 2594 Operand *Desired = Instr->getArg(2); |
2595 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) | |
2596 return; | |
2595 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | 2597 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
2596 // TODO(jvoung): If we peek ahead a few instructions and see how | |
2597 // DestPrev is used (typically via another compare and branch), | |
2598 // we may be able to optimize. If the result truly is used by a | |
2599 // compare + branch, and the comparison is for equality, then we can | |
2600 // optimize out the later compare, and fuse with the later branch. | |
2601 return; | 2598 return; |
2602 } | 2599 } |
2603 case Intrinsics::AtomicFence: | 2600 case Intrinsics::AtomicFence: |
2604 if (!Intrinsics::VerifyMemoryOrder( | 2601 if (!Intrinsics::VerifyMemoryOrder( |
2605 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 2602 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
2606 Func->setError("Unexpected memory ordering for AtomicFence"); | 2603 Func->setError("Unexpected memory ordering for AtomicFence"); |
2607 return; | 2604 return; |
2608 } | 2605 } |
2609 _mfence(); | 2606 _mfence(); |
2610 return; | 2607 return; |
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2906 } | 2903 } |
2907 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); | 2904 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); |
2908 _mov(T_eax, Expected); | 2905 _mov(T_eax, Expected); |
2909 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 2906 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
2910 Variable *DesiredReg = legalizeToVar(Desired); | 2907 Variable *DesiredReg = legalizeToVar(Desired); |
2911 const bool Locked = true; | 2908 const bool Locked = true; |
2912 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 2909 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
2913 _mov(DestPrev, T_eax); | 2910 _mov(DestPrev, T_eax); |
2914 } | 2911 } |
2915 | 2912 |
2913 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | |
2914 Operand *Expected, | |
2915 Operand *Desired) { | |
2916 if (Ctx->getOptLevel() == Opt_m1) | |
2917 return false; | |
2918 // Peek ahead a few instructions and see how Dest is used. | |
2919 // It's very common to have: | |
2920 // | |
2921 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | |
2922 // [%y_phi = ...] // list of phi stores | |
2923 // %p = icmp eq i32 %x, %expected | |
2924 // br i1 %p, label %l1, label %l2 | |
2925 // | |
2926 // which we can optimize into: | |
2927 // | |
2928 // %x = <cmpxchg code> | |
2929 // [%y_phi = ...] // list of phi stores | |
2930 // br eq, %l1, %l2 | |
2931 InstList::iterator I = Context.getCur(); | |
Jim Stichnoth
2014/07/25 17:27:47
Try to use LoweringContext instead of directly usi
jvoung (off chromium)
2014/07/28 22:55:26
Good point about the skipping deleted instructions
| |
2932 // I is currently the InstIntrinsicCall. Peek past that. | |
2933 // This assumes that the atomic cmpxchg has not been lowered yet, | |
2934 // so that the instructions seen in the scan from "Cur" is simple. | |
2935 assert(llvm::isa<InstIntrinsicCall>(*I)); | |
2936 if (++I == Context.getEnd()) | |
2937 return false; | |
2938 // There might be phi assignments right before the compare+branch, since this | |
Jim Stichnoth
2014/07/25 17:27:46
I suggest also adding a note in the Phi lowering p
jvoung (off chromium)
2014/07/28 22:55:26
Done (added to the existing comment). The splittin
Jim Stichnoth
2014/07/29 18:08:44
I was thinking more about possible optimizations.
| |
2939 // could be a backward branch for a loop. This placement of assignments is | |
2940 // determined by placePhiStores(). | |
2941 std::vector<InstAssign *> PhiAssigns; | |
2942 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(*I)) { | |
2943 if (PhiAssign->getDest() == Dest) | |
2944 return false; | |
2945 PhiAssigns.push_back(PhiAssign); | |
2946 if (++I == Context.getEnd()) | |
2947 return false; | |
2948 } | |
2949 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(*I)) { | |
2950 if (!(NextCmp->getCondition() == InstIcmp::Eq && | |
2951 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) || | |
2952 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) { | |
2953 return false; | |
2954 } | |
2955 if (++I == Context.getEnd()) | |
2956 return false; | |
2957 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(*I)) { | |
2958 if (!NextBr->isUnconditional() && | |
2959 NextCmp->getDest() == NextBr->getCondition() && | |
2960 NextBr->isLastUse(NextCmp->getDest())) { | |
2961 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); | |
2962 for (size_t i = 0; i < PhiAssigns.size(); ++i) { | |
2963 // Lower the phi assignments now, before the branch (same placement | |
2964 // as before). | |
2965 InstAssign *PhiAssign = PhiAssigns[i]; | |
2966 lowerAssign(PhiAssign); | |
2967 PhiAssign->setDeleted(); | |
2968 Context.advanceNext(); | |
2969 } | |
2970 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); | |
2971 // Skip over the old compare and branch, by deleting them. | |
2972 NextCmp->setDeleted(); | |
2973 NextBr->setDeleted(); | |
2974 Context.advanceNext(); | |
2975 Context.advanceNext(); | |
2976 return true; | |
2977 } | |
2978 } | |
2979 } | |
2980 return false; | |
2981 } | |
2982 | |
2916 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 2983 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
2917 Operand *Ptr, Operand *Val) { | 2984 Operand *Ptr, Operand *Val) { |
2918 bool NeedsCmpxchg = false; | 2985 bool NeedsCmpxchg = false; |
2919 LowerBinOp Op_Lo = NULL; | 2986 LowerBinOp Op_Lo = NULL; |
2920 LowerBinOp Op_Hi = NULL; | 2987 LowerBinOp Op_Hi = NULL; |
2921 switch (Operation) { | 2988 switch (Operation) { |
2922 default: | 2989 default: |
2923 Func->setError("Unknown AtomicRMW operation"); | 2990 Func->setError("Unknown AtomicRMW operation"); |
2924 return; | 2991 return; |
2925 case Intrinsics::AtomicAdd: { | 2992 case Intrinsics::AtomicAdd: { |
(...skipping 1007 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3933 for (SizeT i = 0; i < Size; ++i) { | 4000 for (SizeT i = 0; i < Size; ++i) { |
3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4001 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
3935 } | 4002 } |
3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4003 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
3937 } | 4004 } |
3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4005 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
3939 << "\n"; | 4006 << "\n"; |
3940 } | 4007 } |
3941 | 4008 |
3942 } // end of namespace Ice | 4009 } // end of namespace Ice |
OLD | NEW |