Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 362463002: Subzero: lower the rest of the atomic operations. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: rebase Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 1950 matching lines...) Expand 10 before | Expand all | Expand 10 after
1961 _cmp(Src0New, Src1); 1961 _cmp(Src0New, Src1);
1962 _mov(Dest, One); 1962 _mov(Dest, One);
1963 _br(getIcmp32Mapping(Inst->getCondition()), Label); 1963 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1964 Context.insert(InstFakeUse::create(Func, Dest)); 1964 Context.insert(InstFakeUse::create(Func, Dest));
1965 _mov(Dest, Zero); 1965 _mov(Dest, Zero);
1966 Context.insert(Label); 1966 Context.insert(Label);
1967 } 1967 }
1968 1968
1969 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 1969 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
1970 switch (Instr->getIntrinsicInfo().ID) { 1970 switch (Instr->getIntrinsicInfo().ID) {
1971 case Intrinsics::AtomicCmpxchg: 1971 case Intrinsics::AtomicCmpxchg: {
1972 if (!Intrinsics::VerifyMemoryOrder( 1972 if (!Intrinsics::VerifyMemoryOrder(
1973 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 1973 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {
1974 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 1974 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
1975 return; 1975 return;
1976 } 1976 }
1977 if (!Intrinsics::VerifyMemoryOrder( 1977 if (!Intrinsics::VerifyMemoryOrder(
1978 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 1978 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {
1979 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 1979 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
1980 return; 1980 return;
1981 } 1981 }
1982 // TODO(jvoung): fill it in. 1982 Variable *DestPrev = Instr->getDest();
1983 Func->setError("Unhandled intrinsic"); 1983 Operand *PtrToMem = Instr->getArg(0);
1984 Operand *Expected = Instr->getArg(1);
1985 Operand *Desired = Instr->getArg(2);
1986 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
1987 // TODO(jvoung): If we peek ahead a few instructions and see how
1988 // DestPrev is used (typically via another compare and branch),
1989 // we may be able to optimize. If the result truly is used by a
1990 // compare + branch, and the comparison is for equality, then we can
1991 // optimize out the later compare, and fuse with the later branch.
1984 return; 1992 return;
1993 }
1985 case Intrinsics::AtomicFence: 1994 case Intrinsics::AtomicFence:
1986 if (!Intrinsics::VerifyMemoryOrder( 1995 if (!Intrinsics::VerifyMemoryOrder(
1987 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 1996 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {
1988 Func->setError("Unexpected memory ordering for AtomicFence"); 1997 Func->setError("Unexpected memory ordering for AtomicFence");
1989 return; 1998 return;
1990 } 1999 }
1991 _mfence(); 2000 _mfence();
1992 return; 2001 return;
1993 case Intrinsics::AtomicFenceAll: 2002 case Intrinsics::AtomicFenceAll:
1994 // NOTE: FenceAll should prevent and load/store from being moved 2003 // NOTE: FenceAll should prevent and load/store from being moved
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
2176 case Intrinsics::Trap: 2185 case Intrinsics::Trap:
2177 _ud2(); 2186 _ud2();
2178 return; 2187 return;
2179 case Intrinsics::UnknownIntrinsic: 2188 case Intrinsics::UnknownIntrinsic:
2180 Func->setError("Should not be lowering UnknownIntrinsic"); 2189 Func->setError("Should not be lowering UnknownIntrinsic");
2181 return; 2190 return;
2182 } 2191 }
2183 return; 2192 return;
2184 } 2193 }
2185 2194
2195 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
2196 Operand *Expected, Operand *Desired) {
2197 if (Expected->getType() == IceType_i64) {
2198 // Reserve the pre-colored registers first, before adding any more
2199 // infinite-weight variables from FormMemoryOperand's legalization.
2200 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2201 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2202 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2203 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2204 _mov(T_eax, loOperand(Expected));
2205 _mov(T_edx, hiOperand(Expected));
2206 _mov(T_ebx, loOperand(Desired));
2207 _mov(T_ecx, hiOperand(Desired));
2208 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2209 const bool Locked = true;
2210 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2211 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
2212 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
2213 _mov(DestLo, T_eax);
2214 _mov(DestHi, T_edx);
2215 return;
2216 }
2217 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);
2218 _mov(T_eax, Expected);
2219 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
2220 Variable *DesiredReg = legalizeToVar(Desired);
2221 const bool Locked = true;
2222 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
2223 _mov(DestPrev, T_eax);
2224 }
2225
2186 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 2226 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
2187 Operand *Ptr, Operand *Val) { 2227 Operand *Ptr, Operand *Val) {
2228 bool NeedsCmpxchg = false;
2229 LowerBinOp Op_Lo = NULL;
2230 LowerBinOp Op_Hi = NULL;
2188 switch (Operation) { 2231 switch (Operation) {
2189 default: 2232 default:
2190 Func->setError("Unknown AtomicRMW operation"); 2233 Func->setError("Unknown AtomicRMW operation");
2191 return; 2234 return;
2192 case Intrinsics::AtomicAdd: { 2235 case Intrinsics::AtomicAdd: {
2193 if (Dest->getType() == IceType_i64) { 2236 if (Dest->getType() == IceType_i64) {
2194 // Do a nasty cmpxchg8b loop. Factor this into a function. 2237 // All the fall-through paths must set this to true, but use this
2195 // TODO(jvoung): fill it in. 2238 // for asserting.
2196 Func->setError("Unhandled AtomicRMW operation"); 2239 NeedsCmpxchg = true;
2197 return; 2240 Op_Lo = &TargetX8632::_add;
2241 Op_Hi = &TargetX8632::_adc;
2242 break;
2198 } 2243 }
2199 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); 2244 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2200 const bool Locked = true; 2245 const bool Locked = true;
2201 Variable *T = NULL; 2246 Variable *T = NULL;
2202 _mov(T, Val); 2247 _mov(T, Val);
2203 _xadd(Addr, T, Locked); 2248 _xadd(Addr, T, Locked);
2204 _mov(Dest, T); 2249 _mov(Dest, T);
2205 return; 2250 return;
2206 } 2251 }
2207 case Intrinsics::AtomicSub: { 2252 case Intrinsics::AtomicSub: {
2208 if (Dest->getType() == IceType_i64) { 2253 if (Dest->getType() == IceType_i64) {
2209 // Do a nasty cmpxchg8b loop. 2254 NeedsCmpxchg = true;
2210 // TODO(jvoung): fill it in. 2255 Op_Lo = &TargetX8632::_sub;
2211 Func->setError("Unhandled AtomicRMW operation"); 2256 Op_Hi = &TargetX8632::_sbb;
2212 return; 2257 break;
2213 } 2258 }
2214 // Generate a memory operand from Ptr. 2259 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2215 // neg... 2260 const bool Locked = true;
2216 // Then do the same as AtomicAdd. 2261 Variable *T = NULL;
2217 // TODO(jvoung): fill it in. 2262 _mov(T, Val);
2218 Func->setError("Unhandled AtomicRMW operation"); 2263 _neg(T);
2264 _xadd(Addr, T, Locked);
2265 _mov(Dest, T);
2219 return; 2266 return;
2220 } 2267 }
2221 case Intrinsics::AtomicOr: 2268 case Intrinsics::AtomicOr:
2269 // TODO(jvoung): If Dest is null or dead, then some of these
2270 // operations do not need an "exchange", but just a locked op.
2271 // That appears to be "worth" it for sub, or, and, and xor.
2272 // xadd is probably fine vs lock add for add, and xchg is fine
2273 // vs an atomic store.
2274 NeedsCmpxchg = true;
2275 Op_Lo = &TargetX8632::_or;
2276 Op_Hi = &TargetX8632::_or;
2277 break;
2222 case Intrinsics::AtomicAnd: 2278 case Intrinsics::AtomicAnd:
2279 NeedsCmpxchg = true;
2280 Op_Lo = &TargetX8632::_and;
2281 Op_Hi = &TargetX8632::_and;
2282 break;
2223 case Intrinsics::AtomicXor: 2283 case Intrinsics::AtomicXor:
2284 NeedsCmpxchg = true;
2285 Op_Lo = &TargetX8632::_xor;
2286 Op_Hi = &TargetX8632::_xor;
2287 break;
2224 case Intrinsics::AtomicExchange: 2288 case Intrinsics::AtomicExchange:
2225 // TODO(jvoung): fill it in. 2289 if (Dest->getType() == IceType_i64) {
2226 Func->setError("Unhandled AtomicRMW operation"); 2290 NeedsCmpxchg = true;
2291 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
2292 // just need to be moved to the ecx and ebx registers.
2293 Op_Lo = NULL;
2294 Op_Hi = NULL;
2295 break;
2296 }
2297 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());
2298 Variable *T = NULL;
2299 _mov(T, Val);
2300 _xchg(Addr, T);
2301 _mov(Dest, T);
2227 return; 2302 return;
2228 } 2303 }
2304 // Otherwise, we need a cmpxchg loop.
2305 assert(NeedsCmpxchg);
2306 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
2307 }
2308
2309 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
2310 Variable *Dest, Operand *Ptr,
2311 Operand *Val) {
2312 // Expand a more complex RMW operation as a cmpxchg loop:
2313 // For 64-bit:
2314 // mov eax, [ptr]
2315 // mov edx, [ptr + 4]
2316 // .LABEL:
2317 // mov ebx, eax
2318 // <Op_Lo> ebx, <desired_adj_lo>
2319 // mov ecx, edx
2320 // <Op_Hi> ecx, <desired_adj_hi>
2321 // lock cmpxchg8b [ptr]
2322 // jne .LABEL
2323 // mov <dest_lo>, eax
2324 // mov <dest_lo>, edx
2325 //
2326 // For 32-bit:
2327 // mov eax, [ptr]
2328 // .LABEL:
2329 // mov <reg>, eax
2330 // op <reg>, [desired_adj]
2331 // lock cmpxchg [ptr], <reg>
2332 // jne .LABEL
2333 // mov <dest>, eax
2334 //
2335 // If Op_{Lo,Hi} are NULL, then just copy the value.
2336 Val = legalize(Val);
2337 Type Ty = Val->getType();
2338 if (Ty == IceType_i64) {
2339 Variable *T_edx = makeReg(IceType_i32, Reg_edx);
2340 Variable *T_eax = makeReg(IceType_i32, Reg_eax);
2341 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
2342 _mov(T_eax, loOperand(Addr));
2343 _mov(T_edx, hiOperand(Addr));
2344 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);
2345 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);
2346 InstX8632Label *Label = InstX8632Label::create(Func, this);
2347 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
2348 if (!IsXchg8b) {
2349 Context.insert(Label);
2350 _mov(T_ebx, T_eax);
2351 (this->*Op_Lo)(T_ebx, loOperand(Val));
2352 _mov(T_ecx, T_edx);
2353 (this->*Op_Hi)(T_ecx, hiOperand(Val));
2354 } else {
2355 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
2356 // It just needs the Val loaded into ebx and ecx.
2357 // That can also be done before the loop.
2358 _mov(T_ebx, loOperand(Val));
2359 _mov(T_ecx, hiOperand(Val));
2360 Context.insert(Label);
2361 }
2362 const bool Locked = true;
2363 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
2364 _br(InstX8632Br::Br_ne, Label);
2365 if (!IsXchg8b) {
2366 // If Val is a variable, model the extended live range of Val through
2367 // the end of the loop, since it will be re-used by the loop.
2368 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2369 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
2370 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
2371 Context.insert(InstFakeUse::create(Func, ValLo));
2372 Context.insert(InstFakeUse::create(Func, ValHi));
2373 }
2374 } else {
2375 // For xchg, the loop is slightly smaller and ebx/ecx are used.
2376 Context.insert(InstFakeUse::create(Func, T_ebx));
2377 Context.insert(InstFakeUse::create(Func, T_ecx));
2378 }
2379 // The address base is also reused in the loop.
2380 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
2381 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2382 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2383 _mov(DestLo, T_eax);
2384 _mov(DestHi, T_edx);
2385 return;
2386 }
2387 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
2388 Variable *T_eax = makeReg(Ty, Reg_eax);
2389 _mov(T_eax, Addr);
2390 InstX8632Label *Label = InstX8632Label::create(Func, this);
2391 Context.insert(Label);
2392 // We want to pick a different register for T than Eax, so don't use
2393 // _mov(T == NULL, T_eax).
2394 Variable *T = makeReg(Ty);
2395 _mov(T, T_eax);
2396 (this->*Op_Lo)(T, Val);
2397 const bool Locked = true;
2398 _cmpxchg(Addr, T_eax, T, Locked);
2399 _br(InstX8632Br::Br_ne, Label);
2400 // If Val is a variable, model the extended live range of Val through
2401 // the end of the loop, since it will be re-used by the loop.
2402 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
2403 Context.insert(InstFakeUse::create(Func, ValVar));
2404 }
2405 // The address base is also reused in the loop.
2406 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
2407 _mov(Dest, T_eax);
2229 } 2408 }
2230 2409
2231 namespace { 2410 namespace {
2232 2411
2233 bool isAdd(const Inst *Inst) { 2412 bool isAdd(const Inst *Inst) {
2234 if (const InstArithmetic *Arith = 2413 if (const InstArithmetic *Arith =
2235 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 2414 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
2236 return (Arith->getOp() == InstArithmetic::Add); 2415 return (Arith->getOp() == InstArithmetic::Add);
2237 } 2416 }
2238 return false; 2417 return false;
(...skipping 651 matching lines...) Expand 10 before | Expand all | Expand 10 after
2890 for (SizeT i = 0; i < Size; ++i) { 3069 for (SizeT i = 0; i < Size; ++i) {
2891 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 3070 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
2892 } 3071 }
2893 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 3072 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
2894 } 3073 }
2895 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 3074 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
2896 << "\n"; 3075 << "\n";
2897 } 3076 }
2898 3077
2899 } // end of namespace Ice 3078 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698