Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 362463002: Subzero: lower the rest of the atomic operations. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: comment cleanup Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 1782 matching lines...) Expand 10 before | Expand all | Expand 10 after
1793 _cmp(Src0New, Src1); 1793 _cmp(Src0New, Src1);
1794 _mov(Dest, One); 1794 _mov(Dest, One);
1795 _br(getIcmp32Mapping(Inst->getCondition()), Label); 1795 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1796 Context.insert(InstFakeUse::create(Func, Dest)); 1796 Context.insert(InstFakeUse::create(Func, Dest));
1797 _mov(Dest, Zero); 1797 _mov(Dest, Zero);
1798 Context.insert(Label); 1798 Context.insert(Label);
1799 } 1799 }
1800 1800
1801 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 1801 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
1802 switch (Instr->getIntrinsicInfo().ID) { 1802 switch (Instr->getIntrinsicInfo().ID) {
1803 case Intrinsics::AtomicCmpxchg: 1803 case Intrinsics::AtomicCmpxchg: {
1804 if (!Intrinsics::VerifyMemoryOrder( 1804 if (!Intrinsics::VerifyMemoryOrder(
1805 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 1805 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
1806 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 1806 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
1807 return; 1807 return;
1808 } 1808 }
1809 if (!Intrinsics::VerifyMemoryOrder( 1809 if (!Intrinsics::VerifyMemoryOrder(
1810 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 1810 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
1811 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 1811 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
1812 return; 1812 return;
1813 } 1813 }
1814 // TODO(jvoung): fill it in. 1814 Variable *DestPrev = Instr->getDest();
1815 Func->setError("Unhandled intrinsic"); 1815 Operand *PtrToMem = Instr->getArg(0);
1816 Operand *Expected = Instr->getArg(1);
1817 Operand *Desired = Instr->getArg(2);
1818 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
1819 // TODO(jvoung): If we peek ahead a few instructions and see how
1820 // DestPrev is used (typically via another compare and branch),
1821 // we may be able to optimize. If the result truly is used by a
1822 // compare + branch, and the comparison is for equality, then we can
1823 // optimized out the later compare, and fuse with the later branch.
Jim Stichnoth 2014/07/08 04:50:19 optimized --> optimize
jvoung (off chromium) 2014/07/09 17:07:55 Done.
1816 return; 1824 return;
1825 }
1817 case Intrinsics::AtomicFence: 1826 case Intrinsics::AtomicFence:
1818 if (!Intrinsics::VerifyMemoryOrder( 1827 if (!Intrinsics::VerifyMemoryOrder(
1819 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 1828 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
1820 Func->setError("Unexpected memory ordering for AtomicFence"); 1829 Func->setError("Unexpected memory ordering for AtomicFence");
1821 return; 1830 return;
1822 } 1831 }
1823 _mfence(); 1832 _mfence();
1824 return; 1833 return;
1825 case Intrinsics::AtomicFenceAll: 1834 case Intrinsics::AtomicFenceAll:
1826 // NOTE: FenceAll should prevent and load/store from being moved 1835 // NOTE: FenceAll should prevent and load/store from being moved
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
2001 case Intrinsics::Trap: 2010 case Intrinsics::Trap:
2002 _ud2(); 2011 _ud2();
2003 return; 2012 return;
2004 case Intrinsics::UnknownIntrinsic: 2013 case Intrinsics::UnknownIntrinsic:
2005 Func->setError("Should not be lowering UnknownIntrinsic"); 2014 Func->setError("Should not be lowering UnknownIntrinsic");
2006 return; 2015 return;
2007 } 2016 }
2008 return; 2017 return;
2009 } 2018 }
2010 2019
2020 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
2021 Operand *Expected, Operand *Desired) {
2022 if (Expected->getType() == IceType_i64) {
2023 // Reserve the pre-colored registers first, before adding any more
2024 // infinite-weight variables from FormMemoryOperand's legalization.
2025 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2026 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2027 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2028 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2029 _mov(T_eax, loOperand(Expected));
2030 _mov(T_edx, hiOperand(Expected));
2031 _mov(T_ebx, loOperand(Desired));
2032 _mov(T_ecx, hiOperand(Desired));
2033 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2034 const bool Locked = true;
2035 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2036 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
2037 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
2038 _mov(DestLo, T_eax);
2039 _mov(DestHi, T_edx);
2040 return;
2041 }
2042 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2043 _mov(T_eax, Expected);
2044 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2045 Variable *DesiredReg = legalizeToVar(Desired);
2046 const bool Locked = true;
2047 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2048 _mov(DestPrev, T_eax);
2049 }
2050
2011 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 2051 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2012 Operand *Ptr, Operand *Val) { 2052 Operand *Ptr, Operand *Val) {
2053 bool NeedsCmpxchg = false;
2054 LowerBinOp Op_Lo = NULL;
2055 LowerBinOp Op_Hi = NULL;
2013 switch (Operation) { 2056 switch (Operation) {
2014 default: 2057 default:
2015 Func->setError("Unknown AtomicRMW operation"); 2058 Func->setError("Unknown AtomicRMW operation");
2016 return; 2059 return;
2017 case Intrinsics::AtomicAdd: { 2060 case Intrinsics::AtomicAdd: {
2018 if (Dest->getType() == IceType_i64) { 2061 if (Dest->getType() == IceType_i64) {
2019 // Do a nasty cmpxchg8b loop. Factor this into a function. 2062 // All the fall-through paths must set this to true, but use this
2020 // TODO(jvoung): fill it in. 2063 // for asserting.
2021 Func->setError("Unhandled AtomicRMW operation"); 2064 NeedsCmpxchg = true;
2022 return; 2065 Op_Lo = &TargetX8632::_add;
2066 Op_Hi = &TargetX8632::_adc;
2067 break;
2023 } 2068 }
2024 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); 2069 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2025 const bool Locked = true; 2070 const bool Locked = true;
2026 Variable *T = NULL; 2071 Variable *T = NULL;
2027 _mov(T, Val); 2072 _mov(T, Val);
2028 _xadd(Addr, T, Locked); 2073 _xadd(Addr, T, Locked);
2029 _mov(Dest, T); 2074 _mov(Dest, T);
2030 return; 2075 return;
2031 } 2076 }
2032 case Intrinsics::AtomicSub: { 2077 case Intrinsics::AtomicSub: {
2033 if (Dest->getType() == IceType_i64) { 2078 if (Dest->getType() == IceType_i64) {
2034 // Do a nasty cmpxchg8b loop. 2079 NeedsCmpxchg = true;
2035 // TODO(jvoung): fill it in. 2080 Op_Lo = &TargetX8632::_sub;
2036 Func->setError("Unhandled AtomicRMW operation"); 2081 Op_Hi = &TargetX8632::_sbb;
2037 return; 2082 break;
2038 } 2083 }
2039 // Generate a memory operand from Ptr. 2084 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2040 // neg... 2085 const bool Locked = true;
2041 // Then do the same as AtomicAdd. 2086 Variable *T = NULL;
2042 // TODO(jvoung): fill it in. 2087 _mov(T, Val);
2043 Func->setError("Unhandled AtomicRMW operation"); 2088 _neg(T);
2089 _xadd(Addr, T, Locked);
2090 _mov(Dest, T);
2044 return; 2091 return;
2045 } 2092 }
2046 case Intrinsics::AtomicOr: 2093 case Intrinsics::AtomicOr:
2094 // TODO(jvoung): If Dest is null or dead, then some of these
2095 // operations do not need an "exchange", but just a locked op.
2096 // That appears to be "worth" it for sub, or, and, and xor.
2097 // xadd is probably fine vs lock add for add, and xchg is fine
2098 // vs an atomic store.
2099 NeedsCmpxchg = true;
2100 Op_Lo = &TargetX8632::_or;
2101 Op_Hi = &TargetX8632::_or;
2102 break;
2047 case Intrinsics::AtomicAnd: 2103 case Intrinsics::AtomicAnd:
2104 NeedsCmpxchg = true;
2105 Op_Lo = &TargetX8632::_and;
2106 Op_Hi = &TargetX8632::_and;
2107 break;
2048 case Intrinsics::AtomicXor: 2108 case Intrinsics::AtomicXor:
2109 NeedsCmpxchg = true;
2110 Op_Lo = &TargetX8632::_xor;
2111 Op_Hi = &TargetX8632::_xor;
2112 break;
2049 case Intrinsics::AtomicExchange: 2113 case Intrinsics::AtomicExchange:
2050 // TODO(jvoung): fill it in. 2114 if (Dest->getType() == IceType_i64) {
2051 Func->setError("Unhandled AtomicRMW operation"); 2115 NeedsCmpxchg = true;
2116 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
2117 // just need to be moved to the ecx and ebx registers.
2118 Op_Lo = NULL;
2119 Op_Hi = NULL;
2120 break;
2121 }
2122 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2123 Variable *T = NULL;
2124 _mov(T, Val);
2125 _xchg(Addr, T);
2126 _mov(Dest, T);
2052 return; 2127 return;
2053 } 2128 }
2129 // Otherwise, we need a cmpxchg loop.
2130 assert(NeedsCmpxchg);
2131 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
2132 }
2133
2134 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
2135 Variable *Dest, Operand *Ptr,
2136 Operand *Val) {
2137 // Expand a more complex RMW operation as a cmpxchg loop:
2138 // For 64-bit:
2139 // mov eax, [ptr]
2140 // mov edx, [ptr + 4]
2141 // .LABEL:
2142 // mov ebx, eax
2143 // <Op_Lo> ebx, <desired_adj_lo>
2144 // mov ecx, edx
2145 // <Op_Hi> ecx, <desired_adj_hi>
2146 // lock cmpxchg8b [ptr]
2147 // jne .LABEL
2148 // mov <dest_lo>, eax
2149 // mov <dest_lo>, edx
2150 //
2151 // For 32-bit:
2152 // mov eax, [ptr]
2153 // .LABEL:
2154 // mov <reg>, eax
2155 // op <reg>, [desired_adj]
2156 // lock cmpxchg [ptr], <reg>
2157 // jne .LABEL
2158 // mov <dest>, eax
2159 //
2160 // If Op_{Lo,Hi} are NULL, then just copy the value.
2161 Val = legalize(Val);
2162 Type Ty = Val->getType();
2163 if (Ty == IceType_i64) {
2164 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2165 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2166 // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax,
2167 // but that conflicts with the later requirement that we use eax for
2168 // cmpxchg. The two infinite weight choices conflict and we end up
2169 // choosing eax for both. Work around this for now, but forcing Addr
Jim Stichnoth 2014/07/08 04:50:19 I'd like to understand more about this. It sounds
jvoung (off chromium) 2014/07/08 18:14:07 The problem was actually with O2 register allocati
Jim Stichnoth 2014/07/09 18:14:28 OK, then this is almost certainly a regalloc bug w
jvoung (off chromium) 2014/07/10 23:14:49 Done.
2170 // to use a different register. Arbitrarily picking edi instead.
2171 bool AllowOverlap = false;
2172 Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_edi);
2173 OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty);
2174 _mov(T_eax, loOperand(Addr));
2175 _mov(T_edx, hiOperand(Addr));
2176 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2177 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2178 InstX8632Label *Label = InstX8632Label::create(Func, this);
2179 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
2180 if (!IsXchg8b) {
2181 Context.insert(Label);
2182 _mov(T_ebx, T_eax);
2183 (this->*Op_Lo)(T_ebx, loOperand(Val));
2184 _mov(T_ecx, T_edx);
2185 (this->*Op_Hi)(T_ecx, hiOperand(Val));
2186 } else {
2187 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
2188 // It just needs the Val loaded into ebx and ecx.
2189 // That can also be done before the loop.
2190 _mov(T_ebx, loOperand(Val));
2191 _mov(T_ecx, hiOperand(Val));
2192 Context.insert(Label);
2193 }
2194 const bool Locked = true;
2195 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2196 _br(InstX8632Br::Br_ne, Label);
2197 if (!IsXchg8b) {
2198 // If Val is a variable, model the extended live range of Val through
2199 // the end of the loop, since it will be re-used by the loop.
2200 // Same with the address.
2201 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2202 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
2203 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
2204 Context.insert(InstFakeUse::create(Func, ValLo));
2205 Context.insert(InstFakeUse::create(Func, ValHi));
2206 }
2207 Context.insert(InstFakeUse::create(Func, LegalPtr));
2208 } else {
2209 // For xchg, just need to extend the live range of ebx/ecx.
2210 Context.insert(InstFakeUse::create(Func, T_ebx));
2211 Context.insert(InstFakeUse::create(Func, T_ecx));
2212 }
2213 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2214 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2215 _mov(DestLo, T_eax);
2216 _mov(DestHi, T_edx);
2217 return;
2218 }
2219 // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax,
2220 // but that conflicts with the later requirement that we use eax for
2221 // cmpxchg. The two infinite weight choices conflict and we end up
2222 // choosing eax for both. Work around this for now, but forcing Addr
2223 // to use a different register. Arbitrarily picking ecx instead.
2224 const bool AllowOverlap = false;
2225 Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_ecx);
2226 OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty);
2227 Variable *T_eax = makeReg(Ty, Reg_eax);
2228 _mov(T_eax, Addr);
2229 InstX8632Label *Label = InstX8632Label::create(Func, this);
2230 Context.insert(Label);
2231 Variable *T = NULL;
2232 // We want to pick a different register for T than Eax,
2233 // and we also already used ecx for the LegalPtr.
2234 _mov(T, T_eax, Reg_edx);
2235 (this->*Op_Lo)(T, Val);
2236 const bool Locked = true;
2237 _cmpxchg(Addr, T_eax, T, Locked);
2238 _br(InstX8632Br::Br_ne, Label);
2239 // If Val is a variable, model the extended live range of Val through
2240 // the end of the loop, since it will be re-used by the loop.
2241 // Same with the address.
2242 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2243 Context.insert(InstFakeUse::create(Func, ValVar));
2244 }
2245 Context.insert(InstFakeUse::create(Func, LegalPtr));
2246 _mov(Dest, T_eax);
2054 } 2247 }
2055 2248
2056 namespace { 2249 namespace {
2057 2250
2058 bool isAdd(const Inst *Inst) { 2251 bool isAdd(const Inst *Inst) {
2059 if (const InstArithmetic *Arith = 2252 if (const InstArithmetic *Arith =
2060 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 2253 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
2061 return (Arith->getOp() == InstArithmetic::Add); 2254 return (Arith->getOp() == InstArithmetic::Add);
2062 } 2255 }
2063 return false; 2256 return false;
(...skipping 450 matching lines...) Expand 10 before | Expand all | Expand 10 after
2514 Reg->setWeightInfinite(); 2707 Reg->setWeightInfinite();
2515 else 2708 else
2516 Reg->setRegNum(RegNum); 2709 Reg->setRegNum(RegNum);
2517 return Reg; 2710 return Reg;
2518 } 2711 }
2519 2712
2520 void TargetX8632::postLower() { 2713 void TargetX8632::postLower() {
2521 if (Ctx->getOptLevel() != Opt_m1) 2714 if (Ctx->getOptLevel() != Opt_m1)
2522 return; 2715 return;
2523 // TODO: Avoid recomputing WhiteList every instruction. 2716 // TODO: Avoid recomputing WhiteList every instruction.
2524 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None); 2717 RegSetMask RegInclude = RegSet_All;
2718 RegSetMask RegExclude = RegSet_None | RegSet_StackPointer;
2719 if (hasFramePointer())
jvoung (off chromium) 2014/07/07 17:31:16 split out -- but currently here so that my tests p
jvoung (off chromium) 2014/07/09 17:07:55 Done.
2720 RegExclude |= RegSet_FramePointer;
2721 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
2525 // Make one pass to black-list pre-colored registers. TODO: If 2722 // Make one pass to black-list pre-colored registers. TODO: If
2526 // there was some prior register allocation pass that made register 2723 // there was some prior register allocation pass that made register
2527 // assignments, those registers need to be black-listed here as 2724 // assignments, those registers need to be black-listed here as
2528 // well. 2725 // well.
2529 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; 2726 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
2530 ++I) { 2727 ++I) {
2531 const Inst *Inst = *I; 2728 const Inst *Inst = *I;
2532 if (Inst->isDeleted()) 2729 if (Inst->isDeleted())
2533 continue; 2730 continue;
2534 if (llvm::isa<InstFakeKill>(Inst)) 2731 if (llvm::isa<InstFakeKill>(Inst))
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
2681 for (SizeT i = 0; i < Size; ++i) { 2878 for (SizeT i = 0; i < Size; ++i) {
2682 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 2879 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
2683 } 2880 }
2684 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 2881 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
2685 } 2882 }
2686 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 2883 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
2687 << "\n"; 2884 << "\n";
2688 } 2885 }
2689 2886
2690 } // end of namespace Ice 2887 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698