Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 413903002: Subzero: Add a peephole to fuse cmpxchg w/ later cmp+branch. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: bail out of om1 more quickly Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 2574 matching lines...) Expand 10 before | Expand all | Expand 10 after
2585 } 2585 }
2586 if (!Intrinsics::VerifyMemoryOrder( 2586 if (!Intrinsics::VerifyMemoryOrder(
2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2589 return; 2589 return;
2590 } 2590 }
2591 Variable *DestPrev = Instr->getDest(); 2591 Variable *DestPrev = Instr->getDest();
2592 Operand *PtrToMem = Instr->getArg(0); 2592 Operand *PtrToMem = Instr->getArg(0);
2593 Operand *Expected = Instr->getArg(1); 2593 Operand *Expected = Instr->getArg(1);
2594 Operand *Desired = Instr->getArg(2); 2594 Operand *Desired = Instr->getArg(2);
2595 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2596 return;
2595 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); 2597 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2596 // TODO(jvoung): If we peek ahead a few instructions and see how
2597 // DestPrev is used (typically via another compare and branch),
2598 // we may be able to optimize. If the result truly is used by a
2599 // compare + branch, and the comparison is for equality, then we can
2600 // optimize out the later compare, and fuse with the later branch.
2601 return; 2598 return;
2602 } 2599 }
2603 case Intrinsics::AtomicFence: 2600 case Intrinsics::AtomicFence:
2604 if (!Intrinsics::VerifyMemoryOrder( 2601 if (!Intrinsics::VerifyMemoryOrder(
2605 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 2602 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
2606 Func->setError("Unexpected memory ordering for AtomicFence"); 2603 Func->setError("Unexpected memory ordering for AtomicFence");
2607 return; 2604 return;
2608 } 2605 }
2609 _mfence(); 2606 _mfence();
2610 return; 2607 return;
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
2906 } 2903 }
2907 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); 2904 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2908 _mov(T_eax, Expected); 2905 _mov(T_eax, Expected);
2909 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); 2906 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2910 Variable *DesiredReg = legalizeToVar(Desired); 2907 Variable *DesiredReg = legalizeToVar(Desired);
2911 const bool Locked = true; 2908 const bool Locked = true;
2912 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 2909 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2913 _mov(DestPrev, T_eax); 2910 _mov(DestPrev, T_eax);
2914 } 2911 }
2915 2912
2913 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
2914 Operand *Expected,
2915 Operand *Desired) {
2916 if (Ctx->getOptLevel() == Opt_m1)
2917 return false;
2918 // Peek ahead a few instructions and see how Dest is used.
2919 // It's very common to have:
2920 //
2921 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
2922 // [%y_phi = ...] // list of phi stores
2923 // %p = icmp eq i32 %x, %expected
2924 // br i1 %p, label %l1, label %l2
2925 //
2926 // which we can optimize into:
2927 //
2928 // %x = <cmpxchg code>
2929 // [%y_phi = ...] // list of phi stores
2930 // br eq, %l1, %l2
2931 InstList::iterator I = Context.getCur();
Jim Stichnoth 2014/07/25 17:27:47 Try to use LoweringContext instead of directly usi
jvoung (off chromium) 2014/07/28 22:55:26 Good point about the skipping deleted instructions
2932 // I is currently the InstIntrinsicCall. Peek past that.
2933 // This assumes that the atomic cmpxchg has not been lowered yet,
2934 // so that the instructions seen in the scan from "Cur" is simple.
2935 assert(llvm::isa<InstIntrinsicCall>(*I));
2936 if (++I == Context.getEnd())
2937 return false;
2938 // There might be phi assignments right before the compare+branch, since this
Jim Stichnoth 2014/07/25 17:27:46 I suggest also adding a note in the Phi lowering p
jvoung (off chromium) 2014/07/28 22:55:26 Done (added to the existing comment). The splittin
Jim Stichnoth 2014/07/29 18:08:44 I was thinking more about possible optimizations.
2939 // could be a backward branch for a loop. This placement of assignments is
2940 // determined by placePhiStores().
2941 std::vector<InstAssign *> PhiAssigns;
2942 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(*I)) {
2943 if (PhiAssign->getDest() == Dest)
2944 return false;
2945 PhiAssigns.push_back(PhiAssign);
2946 if (++I == Context.getEnd())
2947 return false;
2948 }
2949 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(*I)) {
2950 if (!(NextCmp->getCondition() == InstIcmp::Eq &&
2951 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
2952 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
2953 return false;
2954 }
2955 if (++I == Context.getEnd())
2956 return false;
2957 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(*I)) {
2958 if (!NextBr->isUnconditional() &&
2959 NextCmp->getDest() == NextBr->getCondition() &&
2960 NextBr->isLastUse(NextCmp->getDest())) {
2961 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
2962 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
2963 // Lower the phi assignments now, before the branch (same placement
2964 // as before).
2965 InstAssign *PhiAssign = PhiAssigns[i];
2966 lowerAssign(PhiAssign);
2967 PhiAssign->setDeleted();
2968 Context.advanceNext();
2969 }
2970 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
2971 // Skip over the old compare and branch, by deleting them.
2972 NextCmp->setDeleted();
2973 NextBr->setDeleted();
2974 Context.advanceNext();
2975 Context.advanceNext();
2976 return true;
2977 }
2978 }
2979 }
2980 return false;
2981 }
2982
2916 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 2983 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2917 Operand *Ptr, Operand *Val) { 2984 Operand *Ptr, Operand *Val) {
2918 bool NeedsCmpxchg = false; 2985 bool NeedsCmpxchg = false;
2919 LowerBinOp Op_Lo = NULL; 2986 LowerBinOp Op_Lo = NULL;
2920 LowerBinOp Op_Hi = NULL; 2987 LowerBinOp Op_Hi = NULL;
2921 switch (Operation) { 2988 switch (Operation) {
2922 default: 2989 default:
2923 Func->setError("Unknown AtomicRMW operation"); 2990 Func->setError("Unknown AtomicRMW operation");
2924 return; 2991 return;
2925 case Intrinsics::AtomicAdd: { 2992 case Intrinsics::AtomicAdd: {
(...skipping 1007 matching lines...) Expand 10 before | Expand all | Expand 10 after
3933 for (SizeT i = 0; i < Size; ++i) { 4000 for (SizeT i = 0; i < Size; ++i) {
3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4001 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
3935 } 4002 }
3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4003 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
3937 } 4004 }
3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4005 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
3939 << "\n"; 4006 << "\n";
3940 } 4007 }
3941 4008
3942 } // end of namespace Ice 4009 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-cmpxchg-optimization.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698