Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 2574 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2585 } | 2585 } |
| 2586 if (!Intrinsics::VerifyMemoryOrder( | 2586 if (!Intrinsics::VerifyMemoryOrder( |
| 2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 2587 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
| 2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 2588 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
| 2589 return; | 2589 return; |
| 2590 } | 2590 } |
| 2591 Variable *DestPrev = Instr->getDest(); | 2591 Variable *DestPrev = Instr->getDest(); |
| 2592 Operand *PtrToMem = Instr->getArg(0); | 2592 Operand *PtrToMem = Instr->getArg(0); |
| 2593 Operand *Expected = Instr->getArg(1); | 2593 Operand *Expected = Instr->getArg(1); |
| 2594 Operand *Desired = Instr->getArg(2); | 2594 Operand *Desired = Instr->getArg(2); |
| 2595 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) | |
| 2596 return; | |
| 2595 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); | 2597 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
| 2596 // TODO(jvoung): If we peek ahead a few instructions and see how | |
| 2597 // DestPrev is used (typically via another compare and branch), | |
| 2598 // we may be able to optimize. If the result truly is used by a | |
| 2599 // compare + branch, and the comparison is for equality, then we can | |
| 2600 // optimize out the later compare, and fuse with the later branch. | |
| 2601 return; | 2598 return; |
| 2602 } | 2599 } |
| 2603 case Intrinsics::AtomicFence: | 2600 case Intrinsics::AtomicFence: |
| 2604 if (!Intrinsics::VerifyMemoryOrder( | 2601 if (!Intrinsics::VerifyMemoryOrder( |
| 2605 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 2602 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
| 2606 Func->setError("Unexpected memory ordering for AtomicFence"); | 2603 Func->setError("Unexpected memory ordering for AtomicFence"); |
| 2607 return; | 2604 return; |
| 2608 } | 2605 } |
| 2609 _mfence(); | 2606 _mfence(); |
| 2610 return; | 2607 return; |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2906 } | 2903 } |
| 2907 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); | 2904 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); |
| 2908 _mov(T_eax, Expected); | 2905 _mov(T_eax, Expected); |
| 2909 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 2906 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 2910 Variable *DesiredReg = legalizeToVar(Desired); | 2907 Variable *DesiredReg = legalizeToVar(Desired); |
| 2911 const bool Locked = true; | 2908 const bool Locked = true; |
| 2912 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 2909 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 2913 _mov(DestPrev, T_eax); | 2910 _mov(DestPrev, T_eax); |
| 2914 } | 2911 } |
| 2915 | 2912 |
| 2913 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | |
| 2914 Operand *Expected, | |
| 2915 Operand *Desired) { | |
| 2916 if (Ctx->getOptLevel() == Opt_m1) | |
| 2917 return false; | |
| 2918 // Peek ahead a few instructions and see how Dest is used. | |
| 2919 // It's very common to have: | |
| 2920 // | |
| 2921 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | |
| 2922 // [%y_phi = ...] // list of phi stores | |
| 2923 // %p = icmp eq i32 %x, %expected | |
| 2924 // br i1 %p, label %l1, label %l2 | |
| 2925 // | |
| 2926 // which we can optimize into: | |
| 2927 // | |
| 2928 // %x = <cmpxchg code> | |
| 2929 // [%y_phi = ...] // list of phi stores | |
| 2930 // br eq, %l1, %l2 | |
| 2931 InstList::iterator I = Context.getCur(); | |
|
Jim Stichnoth
2014/07/25 17:27:47
Try to use LoweringContext instead of directly usi
jvoung (off chromium)
2014/07/28 22:55:26
Good point about the skipping deleted instructions
| |
| 2932 // I is currently the InstIntrinsicCall. Peek past that. | |
| 2933 // This assumes that the atomic cmpxchg has not been lowered yet, | |
| 2934 // so that the instructions seen in the scan from "Cur" is simple. | |
| 2935 assert(llvm::isa<InstIntrinsicCall>(*I)); | |
| 2936 if (++I == Context.getEnd()) | |
| 2937 return false; | |
| 2938 // There might be phi assignments right before the compare+branch, since this | |
|
Jim Stichnoth
2014/07/25 17:27:46
I suggest also adding a note in the Phi lowering p
jvoung (off chromium)
2014/07/28 22:55:26
Done (added to the existing comment). The splittin
Jim Stichnoth
2014/07/29 18:08:44
I was thinking more about possible optimizations.
| |
| 2939 // could be a backward branch for a loop. This placement of assignments is | |
| 2940 // determined by placePhiStores(). | |
| 2941 std::vector<InstAssign *> PhiAssigns; | |
| 2942 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(*I)) { | |
| 2943 if (PhiAssign->getDest() == Dest) | |
| 2944 return false; | |
| 2945 PhiAssigns.push_back(PhiAssign); | |
| 2946 if (++I == Context.getEnd()) | |
| 2947 return false; | |
| 2948 } | |
| 2949 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(*I)) { | |
| 2950 if (!(NextCmp->getCondition() == InstIcmp::Eq && | |
| 2951 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) || | |
| 2952 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) { | |
| 2953 return false; | |
| 2954 } | |
| 2955 if (++I == Context.getEnd()) | |
| 2956 return false; | |
| 2957 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(*I)) { | |
| 2958 if (!NextBr->isUnconditional() && | |
| 2959 NextCmp->getDest() == NextBr->getCondition() && | |
| 2960 NextBr->isLastUse(NextCmp->getDest())) { | |
| 2961 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); | |
| 2962 for (size_t i = 0; i < PhiAssigns.size(); ++i) { | |
| 2963 // Lower the phi assignments now, before the branch (same placement | |
| 2964 // as before). | |
| 2965 InstAssign *PhiAssign = PhiAssigns[i]; | |
| 2966 lowerAssign(PhiAssign); | |
| 2967 PhiAssign->setDeleted(); | |
| 2968 Context.advanceNext(); | |
| 2969 } | |
| 2970 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); | |
| 2971 // Skip over the old compare and branch, by deleting them. | |
| 2972 NextCmp->setDeleted(); | |
| 2973 NextBr->setDeleted(); | |
| 2974 Context.advanceNext(); | |
| 2975 Context.advanceNext(); | |
| 2976 return true; | |
| 2977 } | |
| 2978 } | |
| 2979 } | |
| 2980 return false; | |
| 2981 } | |
| 2982 | |
| 2916 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 2983 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 2917 Operand *Ptr, Operand *Val) { | 2984 Operand *Ptr, Operand *Val) { |
| 2918 bool NeedsCmpxchg = false; | 2985 bool NeedsCmpxchg = false; |
| 2919 LowerBinOp Op_Lo = NULL; | 2986 LowerBinOp Op_Lo = NULL; |
| 2920 LowerBinOp Op_Hi = NULL; | 2987 LowerBinOp Op_Hi = NULL; |
| 2921 switch (Operation) { | 2988 switch (Operation) { |
| 2922 default: | 2989 default: |
| 2923 Func->setError("Unknown AtomicRMW operation"); | 2990 Func->setError("Unknown AtomicRMW operation"); |
| 2924 return; | 2991 return; |
| 2925 case Intrinsics::AtomicAdd: { | 2992 case Intrinsics::AtomicAdd: { |
| (...skipping 1007 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3933 for (SizeT i = 0; i < Size; ++i) { | 4000 for (SizeT i = 0; i < Size; ++i) { |
| 3934 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4001 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 3935 } | 4002 } |
| 3936 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4003 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 3937 } | 4004 } |
| 3938 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4005 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 3939 << "\n"; | 4006 << "\n"; |
| 3940 } | 4007 } |
| 3941 | 4008 |
| 3942 } // end of namespace Ice | 4009 } // end of namespace Ice |
| OLD | NEW |