OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 2636 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2647 } | 2647 } |
2648 if (!Intrinsics::VerifyMemoryOrder( | 2648 if (!Intrinsics::VerifyMemoryOrder( |
2649 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 2649 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
2650 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 2650 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
2651 return; | 2651 return; |
2652 } | 2652 } |
2653 Variable *DestPrev = Instr->getDest(); | 2653 Variable *DestPrev = Instr->getDest(); |
2654 Operand *PtrToMem = Instr->getArg(0); | 2654 Operand *PtrToMem = Instr->getArg(0); |
2655 Operand *Expected = Instr->getArg(1); | 2655 Operand *Expected = Instr->getArg(1); |
2656 Operand *Desired = Instr->getArg(2); | 2656 Operand *Desired = Instr->getArg(2); |
| 2657 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) |
| 2658 return; |
2657 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | 2659 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
2658 // TODO(jvoung): If we peek ahead a few instructions and see how | |
2659 // DestPrev is used (typically via another compare and branch), | |
2660 // we may be able to optimize. If the result truly is used by a | |
2661 // compare + branch, and the comparison is for equality, then we can | |
2662 // optimize out the later compare, and fuse with the later branch. | |
2663 return; | 2660 return; |
2664 } | 2661 } |
2665 case Intrinsics::AtomicFence: | 2662 case Intrinsics::AtomicFence: |
2666 if (!Intrinsics::VerifyMemoryOrder( | 2663 if (!Intrinsics::VerifyMemoryOrder( |
2667 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 2664 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
2668 Func->setError("Unexpected memory ordering for AtomicFence"); | 2665 Func->setError("Unexpected memory ordering for AtomicFence"); |
2669 return; | 2666 return; |
2670 } | 2667 } |
2671 _mfence(); | 2668 _mfence(); |
2672 return; | 2669 return; |
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2968 } | 2965 } |
2969 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); | 2966 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); |
2970 _mov(T_eax, Expected); | 2967 _mov(T_eax, Expected); |
2971 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 2968 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
2972 Variable *DesiredReg = legalizeToVar(Desired); | 2969 Variable *DesiredReg = legalizeToVar(Desired); |
2973 const bool Locked = true; | 2970 const bool Locked = true; |
2974 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 2971 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
2975 _mov(DestPrev, T_eax); | 2972 _mov(DestPrev, T_eax); |
2976 } | 2973 } |
2977 | 2974 |
| 2975 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, |
| 2976 Operand *Expected, |
| 2977 Operand *Desired) { |
| 2978 if (Ctx->getOptLevel() == Opt_m1) |
| 2979 return false; |
| 2980 // Peek ahead a few instructions and see how Dest is used. |
| 2981 // It's very common to have: |
| 2982 // |
| 2983 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 2984 // [%y_phi = ...] // list of phi stores |
| 2985 // %p = icmp eq i32 %x, %expected |
| 2986 // br i1 %p, label %l1, label %l2 |
| 2987 // |
| 2988 // which we can optimize into: |
| 2989 // |
| 2990 // %x = <cmpxchg code> |
| 2991 // [%y_phi = ...] // list of phi stores |
| 2992 // br eq, %l1, %l2 |
| 2993 InstList::iterator I = Context.getCur(); |
| 2994 // I is currently the InstIntrinsicCall. Peek past that. |
| 2995 // This assumes that the atomic cmpxchg has not been lowered yet, |
| 2996 // so that the instructions seen in the scan from "Cur" is simple. |
| 2997 assert(llvm::isa<InstIntrinsicCall>(*I)); |
| 2998 Inst *NextInst = Context.getNextInst(I); |
| 2999 if (!NextInst) |
| 3000 return false; |
| 3001 // There might be phi assignments right before the compare+branch, since this |
| 3002 // could be a backward branch for a loop. This placement of assignments is |
| 3003 // determined by placePhiStores(). |
| 3004 std::vector<InstAssign *> PhiAssigns; |
| 3005 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) { |
| 3006 if (PhiAssign->getDest() == Dest) |
| 3007 return false; |
| 3008 PhiAssigns.push_back(PhiAssign); |
| 3009 NextInst = Context.getNextInst(I); |
| 3010 if (!NextInst) |
| 3011 return false; |
| 3012 } |
| 3013 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) { |
| 3014 if (!(NextCmp->getCondition() == InstIcmp::Eq && |
| 3015 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) || |
| 3016 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) { |
| 3017 return false; |
| 3018 } |
| 3019 NextInst = Context.getNextInst(I); |
| 3020 if (!NextInst) |
| 3021 return false; |
| 3022 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) { |
| 3023 if (!NextBr->isUnconditional() && |
| 3024 NextCmp->getDest() == NextBr->getCondition() && |
| 3025 NextBr->isLastUse(NextCmp->getDest())) { |
| 3026 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); |
| 3027 for (size_t i = 0; i < PhiAssigns.size(); ++i) { |
| 3028 // Lower the phi assignments now, before the branch (same placement |
| 3029 // as before). |
| 3030 InstAssign *PhiAssign = PhiAssigns[i]; |
| 3031 lowerAssign(PhiAssign); |
| 3032 PhiAssign->setDeleted(); |
| 3033 Context.advanceNext(); |
| 3034 } |
| 3035 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); |
| 3036 // Skip over the old compare and branch, by deleting them. |
| 3037 NextCmp->setDeleted(); |
| 3038 NextBr->setDeleted(); |
| 3039 Context.advanceNext(); |
| 3040 Context.advanceNext(); |
| 3041 return true; |
| 3042 } |
| 3043 } |
| 3044 } |
| 3045 return false; |
| 3046 } |
| 3047 |
2978 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 3048 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
2979 Operand *Ptr, Operand *Val) { | 3049 Operand *Ptr, Operand *Val) { |
2980 bool NeedsCmpxchg = false; | 3050 bool NeedsCmpxchg = false; |
2981 LowerBinOp Op_Lo = NULL; | 3051 LowerBinOp Op_Lo = NULL; |
2982 LowerBinOp Op_Hi = NULL; | 3052 LowerBinOp Op_Hi = NULL; |
2983 switch (Operation) { | 3053 switch (Operation) { |
2984 default: | 3054 default: |
2985 Func->setError("Unknown AtomicRMW operation"); | 3055 Func->setError("Unknown AtomicRMW operation"); |
2986 return; | 3056 return; |
2987 case Intrinsics::AtomicAdd: { | 3057 case Intrinsics::AtomicAdd: { |
(...skipping 1029 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4017 for (SizeT i = 0; i < Size; ++i) { | 4087 for (SizeT i = 0; i < Size; ++i) { |
4018 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4088 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4019 } | 4089 } |
4020 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4090 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4021 } | 4091 } |
4022 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4092 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
4023 << "\n"; | 4093 << "\n"; |
4024 } | 4094 } |
4025 | 4095 |
4026 } // end of namespace Ice | 4096 } // end of namespace Ice |
OLD | NEW |