Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(511)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 362463002: Subzero: lower the rest of the atomic operations. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: revert register alloc workarounds Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 1898 matching lines...) Expand 10 before | Expand all | Expand 10 after
1909 _cmp(Src0New, Src1); 1909 _cmp(Src0New, Src1);
1910 _mov(Dest, One); 1910 _mov(Dest, One);
1911 _br(getIcmp32Mapping(Inst->getCondition()), Label); 1911 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1912 Context.insert(InstFakeUse::create(Func, Dest)); 1912 Context.insert(InstFakeUse::create(Func, Dest));
1913 _mov(Dest, Zero); 1913 _mov(Dest, Zero);
1914 Context.insert(Label); 1914 Context.insert(Label);
1915 } 1915 }
1916 1916
1917 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 1917 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
1918 switch (Instr->getIntrinsicInfo().ID) { 1918 switch (Instr->getIntrinsicInfo().ID) {
1919 case Intrinsics::AtomicCmpxchg: 1919 case Intrinsics::AtomicCmpxchg: {
1920 if (!Intrinsics::VerifyMemoryOrder( 1920 if (!Intrinsics::VerifyMemoryOrder(
1921 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 1921 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
1922 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 1922 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
1923 return; 1923 return;
1924 } 1924 }
1925 if (!Intrinsics::VerifyMemoryOrder( 1925 if (!Intrinsics::VerifyMemoryOrder(
1926 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 1926 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
1927 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 1927 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
1928 return; 1928 return;
1929 } 1929 }
1930 // TODO(jvoung): fill it in. 1930 Variable *DestPrev = Instr->getDest();
1931 Func->setError("Unhandled intrinsic"); 1931 Operand *PtrToMem = Instr->getArg(0);
1932 Operand *Expected = Instr->getArg(1);
1933 Operand *Desired = Instr->getArg(2);
1934 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
1935 // TODO(jvoung): If we peek ahead a few instructions and see how
1936 // DestPrev is used (typically via another compare and branch),
1937 // we may be able to optimize. If the result truly is used by a
1938 // compare + branch, and the comparison is for equality, then we can
1939 // optimize out the later compare, and fuse with the later branch.
1932 return; 1940 return;
1941 }
1933 case Intrinsics::AtomicFence: 1942 case Intrinsics::AtomicFence:
1934 if (!Intrinsics::VerifyMemoryOrder( 1943 if (!Intrinsics::VerifyMemoryOrder(
1935 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 1944 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
1936 Func->setError("Unexpected memory ordering for AtomicFence"); 1945 Func->setError("Unexpected memory ordering for AtomicFence");
1937 return; 1946 return;
1938 } 1947 }
1939 _mfence(); 1948 _mfence();
1940 return; 1949 return;
1941 case Intrinsics::AtomicFenceAll: 1950 case Intrinsics::AtomicFenceAll:
1942 // NOTE: FenceAll should prevent and load/store from being moved 1951 // NOTE: FenceAll should prevent and load/store from being moved
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
2124 case Intrinsics::Trap: 2133 case Intrinsics::Trap:
2125 _ud2(); 2134 _ud2();
2126 return; 2135 return;
2127 case Intrinsics::UnknownIntrinsic: 2136 case Intrinsics::UnknownIntrinsic:
2128 Func->setError("Should not be lowering UnknownIntrinsic"); 2137 Func->setError("Should not be lowering UnknownIntrinsic");
2129 return; 2138 return;
2130 } 2139 }
2131 return; 2140 return;
2132 } 2141 }
2133 2142
2143 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
2144 Operand *Expected, Operand *Desired) {
2145 if (Expected->getType() == IceType_i64) {
2146 // Reserve the pre-colored registers first, before adding any more
2147 // infinite-weight variables from FormMemoryOperand's legalization.
2148 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2149 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2150 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2151 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2152 _mov(T_eax, loOperand(Expected));
2153 _mov(T_edx, hiOperand(Expected));
2154 _mov(T_ebx, loOperand(Desired));
2155 _mov(T_ecx, hiOperand(Desired));
2156 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2157 const bool Locked = true;
2158 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2159 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
2160 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
2161 _mov(DestLo, T_eax);
2162 _mov(DestHi, T_edx);
2163 return;
2164 }
2165 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2166 _mov(T_eax, Expected);
2167 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2168 Variable *DesiredReg = legalizeToVar(Desired);
2169 const bool Locked = true;
2170 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2171 _mov(DestPrev, T_eax);
2172 }
2173
2134 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 2174 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2135 Operand *Ptr, Operand *Val) { 2175 Operand *Ptr, Operand *Val) {
2176 bool NeedsCmpxchg = false;
2177 LowerBinOp Op_Lo = NULL;
2178 LowerBinOp Op_Hi = NULL;
2136 switch (Operation) { 2179 switch (Operation) {
2137 default: 2180 default:
2138 Func->setError("Unknown AtomicRMW operation"); 2181 Func->setError("Unknown AtomicRMW operation");
2139 return; 2182 return;
2140 case Intrinsics::AtomicAdd: { 2183 case Intrinsics::AtomicAdd: {
2141 if (Dest->getType() == IceType_i64) { 2184 if (Dest->getType() == IceType_i64) {
2142 // Do a nasty cmpxchg8b loop. Factor this into a function. 2185 // All the fall-through paths must set this to true, but use this
2143 // TODO(jvoung): fill it in. 2186 // for asserting.
2144 Func->setError("Unhandled AtomicRMW operation"); 2187 NeedsCmpxchg = true;
2145 return; 2188 Op_Lo = &TargetX8632::_add;
2189 Op_Hi = &TargetX8632::_adc;
2190 break;
2146 } 2191 }
2147 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); 2192 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2148 const bool Locked = true; 2193 const bool Locked = true;
2149 Variable *T = NULL; 2194 Variable *T = NULL;
2150 _mov(T, Val); 2195 _mov(T, Val);
2151 _xadd(Addr, T, Locked); 2196 _xadd(Addr, T, Locked);
2152 _mov(Dest, T); 2197 _mov(Dest, T);
2153 return; 2198 return;
2154 } 2199 }
2155 case Intrinsics::AtomicSub: { 2200 case Intrinsics::AtomicSub: {
2156 if (Dest->getType() == IceType_i64) { 2201 if (Dest->getType() == IceType_i64) {
2157 // Do a nasty cmpxchg8b loop. 2202 NeedsCmpxchg = true;
2158 // TODO(jvoung): fill it in. 2203 Op_Lo = &TargetX8632::_sub;
2159 Func->setError("Unhandled AtomicRMW operation"); 2204 Op_Hi = &TargetX8632::_sbb;
2160 return; 2205 break;
2161 } 2206 }
2162 // Generate a memory operand from Ptr. 2207 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2163 // neg... 2208 const bool Locked = true;
2164 // Then do the same as AtomicAdd. 2209 Variable *T = NULL;
2165 // TODO(jvoung): fill it in. 2210 _mov(T, Val);
2166 Func->setError("Unhandled AtomicRMW operation"); 2211 _neg(T);
2212 _xadd(Addr, T, Locked);
2213 _mov(Dest, T);
2167 return; 2214 return;
2168 } 2215 }
2169 case Intrinsics::AtomicOr: 2216 case Intrinsics::AtomicOr:
2217 // TODO(jvoung): If Dest is null or dead, then some of these
2218 // operations do not need an "exchange", but just a locked op.
2219 // That appears to be "worth" it for sub, or, and, and xor.
2220 // xadd is probably fine vs lock add for add, and xchg is fine
2221 // vs an atomic store.
2222 NeedsCmpxchg = true;
2223 Op_Lo = &TargetX8632::_or;
2224 Op_Hi = &TargetX8632::_or;
2225 break;
2170 case Intrinsics::AtomicAnd: 2226 case Intrinsics::AtomicAnd:
2227 NeedsCmpxchg = true;
2228 Op_Lo = &TargetX8632::_and;
2229 Op_Hi = &TargetX8632::_and;
2230 break;
2171 case Intrinsics::AtomicXor: 2231 case Intrinsics::AtomicXor:
2232 NeedsCmpxchg = true;
2233 Op_Lo = &TargetX8632::_xor;
2234 Op_Hi = &TargetX8632::_xor;
2235 break;
2172 case Intrinsics::AtomicExchange: 2236 case Intrinsics::AtomicExchange:
2173 // TODO(jvoung): fill it in. 2237 if (Dest->getType() == IceType_i64) {
2174 Func->setError("Unhandled AtomicRMW operation"); 2238 NeedsCmpxchg = true;
2239 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
2240 // just need to be moved to the ecx and ebx registers.
2241 Op_Lo = NULL;
2242 Op_Hi = NULL;
2243 break;
2244 }
2245 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2246 Variable *T = NULL;
2247 _mov(T, Val);
2248 _xchg(Addr, T);
2249 _mov(Dest, T);
2175 return; 2250 return;
2176 } 2251 }
2252 // Otherwise, we need a cmpxchg loop.
2253 assert(NeedsCmpxchg);
2254 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
2255 }
2256
2257 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
2258 Variable *Dest, Operand *Ptr,
2259 Operand *Val) {
2260 // Expand a more complex RMW operation as a cmpxchg loop:
2261 // For 64-bit:
2262 // mov eax, [ptr]
2263 // mov edx, [ptr + 4]
2264 // .LABEL:
2265 // mov ebx, eax
2266 // <Op_Lo> ebx, <desired_adj_lo>
2267 // mov ecx, edx
2268 // <Op_Hi> ecx, <desired_adj_hi>
2269 // lock cmpxchg8b [ptr]
2270 // jne .LABEL
2271 // mov <dest_lo>, eax
2272 // mov <dest_lo>, edx
2273 //
2274 // For 32-bit:
2275 // mov eax, [ptr]
2276 // .LABEL:
2277 // mov <reg>, eax
2278 // op <reg>, [desired_adj]
2279 // lock cmpxchg [ptr], <reg>
2280 // jne .LABEL
2281 // mov <dest>, eax
2282 //
2283 // If Op_{Lo,Hi} are NULL, then just copy the value.
2284 Val = legalize(Val);
2285 Type Ty = Val->getType();
2286 if (Ty == IceType_i64) {
2287 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2288 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2289 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
2290 _mov(T_eax, loOperand(Addr));
2291 _mov(T_edx, hiOperand(Addr));
2292 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2293 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2294 InstX8632Label *Label = InstX8632Label::create(Func, this);
2295 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
2296 if (!IsXchg8b) {
2297 Context.insert(Label);
2298 _mov(T_ebx, T_eax);
2299 (this->*Op_Lo)(T_ebx, loOperand(Val));
2300 _mov(T_ecx, T_edx);
2301 (this->*Op_Hi)(T_ecx, hiOperand(Val));
2302 } else {
2303 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
2304 // It just needs the Val loaded into ebx and ecx.
2305 // That can also be done before the loop.
2306 _mov(T_ebx, loOperand(Val));
2307 _mov(T_ecx, hiOperand(Val));
2308 Context.insert(Label);
2309 }
2310 const bool Locked = true;
2311 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2312 _br(InstX8632Br::Br_ne, Label);
2313 if (!IsXchg8b) {
2314 // If Val is a variable, model the extended live range of Val through
2315 // the end of the loop, since it will be re-used by the loop.
2316 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2317 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
2318 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
2319 Context.insert(InstFakeUse::create(Func, ValLo));
2320 Context.insert(InstFakeUse::create(Func, ValHi));
2321 }
2322 } else {
2323 // For xchg, the loop is slightly smaller and ebx/ecx are used.
2324 Context.insert(InstFakeUse::create(Func, T_ebx));
2325 Context.insert(InstFakeUse::create(Func, T_ecx));
2326 }
2327 // The address base is also reused in the loop.
2328 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
2329 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2330 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2331 _mov(DestLo, T_eax);
2332 _mov(DestHi, T_edx);
2333 return;
2334 }
2335 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
2336 Variable *T_eax = makeReg(Ty, Reg_eax);
2337 _mov(T_eax, Addr);
2338 InstX8632Label *Label = InstX8632Label::create(Func, this);
2339 Context.insert(Label);
2340 // We want to pick a different register for T than Eax, so don't use
2341 // _mov(T == NULL, T_eax).
2342 Variable *T = makeReg(Ty);
2343 _mov(T, T_eax);
2344 (this->*Op_Lo)(T, Val);
2345 const bool Locked = true;
2346 _cmpxchg(Addr, T_eax, T, Locked);
2347 _br(InstX8632Br::Br_ne, Label);
2348 // If Val is a variable, model the extended live range of Val through
2349 // the end of the loop, since it will be re-used by the loop.
2350 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2351 Context.insert(InstFakeUse::create(Func, ValVar));
2352 }
2353 // The address base is also reused in the loop.
2354 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
2355 _mov(Dest, T_eax);
2177 } 2356 }
2178 2357
2179 namespace { 2358 namespace {
2180 2359
2181 bool isAdd(const Inst *Inst) { 2360 bool isAdd(const Inst *Inst) {
2182 if (const InstArithmetic *Arith = 2361 if (const InstArithmetic *Arith =
2183 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 2362 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
2184 return (Arith->getOp() == InstArithmetic::Add); 2363 return (Arith->getOp() == InstArithmetic::Add);
2185 } 2364 }
2186 return false; 2365 return false;
(...skipping 651 matching lines...) Expand 10 before | Expand all | Expand 10 after
2838 for (SizeT i = 0; i < Size; ++i) { 3017 for (SizeT i = 0; i < Size; ++i) {
2839 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3018 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
2840 } 3019 }
2841 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3020 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
2842 } 3021 }
2843 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3022 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
2844 << "\n"; 3023 << "\n";
2845 } 3024 }
2846 3025
2847 } // end of namespace Ice 3026 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698