Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(543)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 413903002: Subzero: Add a peephole to fuse cmpxchg w/ later cmp+branch. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: blank Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 2636 matching lines...) Expand 10 before | Expand all | Expand 10 after
2647 } 2647 }
2648 if (!Intrinsics::VerifyMemoryOrder( 2648 if (!Intrinsics::VerifyMemoryOrder(
2649 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2649 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
2650 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 2650 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2651 return; 2651 return;
2652 } 2652 }
2653 Variable *DestPrev = Instr->getDest(); 2653 Variable *DestPrev = Instr->getDest();
2654 Operand *PtrToMem = Instr->getArg(0); 2654 Operand *PtrToMem = Instr->getArg(0);
2655 Operand *Expected = Instr->getArg(1); 2655 Operand *Expected = Instr->getArg(1);
2656 Operand *Desired = Instr->getArg(2); 2656 Operand *Desired = Instr->getArg(2);
2657 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2658 return;
2657 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); 2659 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2658 // TODO(jvoung): If we peek ahead a few instructions and see how
2659 // DestPrev is used (typically via another compare and branch),
2660 // we may be able to optimize. If the result truly is used by a
2661 // compare + branch, and the comparison is for equality, then we can
2662 // optimize out the later compare, and fuse with the later branch.
2663 return; 2660 return;
2664 } 2661 }
2665 case Intrinsics::AtomicFence: 2662 case Intrinsics::AtomicFence:
2666 if (!Intrinsics::VerifyMemoryOrder( 2663 if (!Intrinsics::VerifyMemoryOrder(
2667 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 2664 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
2668 Func->setError("Unexpected memory ordering for AtomicFence"); 2665 Func->setError("Unexpected memory ordering for AtomicFence");
2669 return; 2666 return;
2670 } 2667 }
2671 _mfence(); 2668 _mfence();
2672 return; 2669 return;
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
2968 } 2965 }
2969 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); 2966 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2970 _mov(T_eax, Expected); 2967 _mov(T_eax, Expected);
2971 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); 2968 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2972 Variable *DesiredReg = legalizeToVar(Desired); 2969 Variable *DesiredReg = legalizeToVar(Desired);
2973 const bool Locked = true; 2970 const bool Locked = true;
2974 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 2971 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2975 _mov(DestPrev, T_eax); 2972 _mov(DestPrev, T_eax);
2976 } 2973 }
2977 2974
2975 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
2976 Operand *Expected,
2977 Operand *Desired) {
2978 if (Ctx->getOptLevel() == Opt_m1)
2979 return false;
2980 // Peek ahead a few instructions and see how Dest is used.
2981 // It's very common to have:
2982 //
2983 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
2984 // [%y_phi = ...] // list of phi stores
2985 // %p = icmp eq i32 %x, %expected
2986 // br i1 %p, label %l1, label %l2
2987 //
2988 // which we can optimize into:
2989 //
2990 // %x = <cmpxchg code>
2991 // [%y_phi = ...] // list of phi stores
2992 // br eq, %l1, %l2
2993 InstList::iterator I = Context.getCur();
2994 // I is currently the InstIntrinsicCall. Peek past that.
2995 // This assumes that the atomic cmpxchg has not been lowered yet,
2996 // so that the instructions seen in the scan from "Cur" is simple.
2997 assert(llvm::isa<InstIntrinsicCall>(*I));
2998 Inst *NextInst = Context.getNextInst(I);
2999 if (!NextInst)
3000 return false;
3001 // There might be phi assignments right before the compare+branch, since this
3002 // could be a backward branch for a loop. This placement of assignments is
3003 // determined by placePhiStores().
3004 std::vector<InstAssign *> PhiAssigns;
3005 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
3006 if (PhiAssign->getDest() == Dest)
3007 return false;
3008 PhiAssigns.push_back(PhiAssign);
3009 NextInst = Context.getNextInst(I);
3010 if (!NextInst)
3011 return false;
3012 }
3013 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
3014 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
3015 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
3016 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
3017 return false;
3018 }
3019 NextInst = Context.getNextInst(I);
3020 if (!NextInst)
3021 return false;
3022 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
3023 if (!NextBr->isUnconditional() &&
3024 NextCmp->getDest() == NextBr->getCondition() &&
3025 NextBr->isLastUse(NextCmp->getDest())) {
3026 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3027 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3028 // Lower the phi assignments now, before the branch (same placement
3029 // as before).
3030 InstAssign *PhiAssign = PhiAssigns[i];
3031 lowerAssign(PhiAssign);
3032 PhiAssign->setDeleted();
3033 Context.advanceNext();
3034 }
3035 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
3036 // Skip over the old compare and branch, by deleting them.
3037 NextCmp->setDeleted();
3038 NextBr->setDeleted();
3039 Context.advanceNext();
3040 Context.advanceNext();
3041 return true;
3042 }
3043 }
3044 }
3045 return false;
3046 }
3047
2978 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 3048 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2979 Operand *Ptr, Operand *Val) { 3049 Operand *Ptr, Operand *Val) {
2980 bool NeedsCmpxchg = false; 3050 bool NeedsCmpxchg = false;
2981 LowerBinOp Op_Lo = NULL; 3051 LowerBinOp Op_Lo = NULL;
2982 LowerBinOp Op_Hi = NULL; 3052 LowerBinOp Op_Hi = NULL;
2983 switch (Operation) { 3053 switch (Operation) {
2984 default: 3054 default:
2985 Func->setError("Unknown AtomicRMW operation"); 3055 Func->setError("Unknown AtomicRMW operation");
2986 return; 3056 return;
2987 case Intrinsics::AtomicAdd: { 3057 case Intrinsics::AtomicAdd: {
(...skipping 1029 matching lines...) Expand 10 before | Expand all | Expand 10 after
4017 for (SizeT i = 0; i < Size; ++i) { 4087 for (SizeT i = 0; i < Size; ++i) {
4018 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4088 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4019 } 4089 }
4020 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4090 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4021 } 4091 }
4022 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4092 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4023 << "\n"; 4093 << "\n";
4024 } 4094 }
4025 4095
4026 } // end of namespace Ice 4096 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698