OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 1950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1961 _cmp(Src0New, Src1); | 1961 _cmp(Src0New, Src1); |
1962 _mov(Dest, One); | 1962 _mov(Dest, One); |
1963 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 1963 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
1964 Context.insert(InstFakeUse::create(Func, Dest)); | 1964 Context.insert(InstFakeUse::create(Func, Dest)); |
1965 _mov(Dest, Zero); | 1965 _mov(Dest, Zero); |
1966 Context.insert(Label); | 1966 Context.insert(Label); |
1967 } | 1967 } |
1968 | 1968 |
1969 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 1969 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
1970 switch (Instr->getIntrinsicInfo().ID) { | 1970 switch (Instr->getIntrinsicInfo().ID) { |
1971 case Intrinsics::AtomicCmpxchg: | 1971 case Intrinsics::AtomicCmpxchg: { |
1972 if (!Intrinsics::VerifyMemoryOrder( | 1972 if (!Intrinsics::VerifyMemoryOrder( |
1973 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { | 1973 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { |
1974 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); | 1974 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); |
1975 return; | 1975 return; |
1976 } | 1976 } |
1977 if (!Intrinsics::VerifyMemoryOrder( | 1977 if (!Intrinsics::VerifyMemoryOrder( |
1978 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { | 1978 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { |
1979 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); | 1979 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); |
1980 return; | 1980 return; |
1981 } | 1981 } |
1982 // TODO(jvoung): fill it in. | 1982 Variable *DestPrev = Instr->getDest(); |
1983 Func->setError("Unhandled intrinsic"); | 1983 Operand *PtrToMem = Instr->getArg(0); |
| 1984 Operand *Expected = Instr->getArg(1); |
| 1985 Operand *Desired = Instr->getArg(2); |
| 1986 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); |
| 1987 // TODO(jvoung): If we peek ahead a few instructions and see how |
| 1988 // DestPrev is used (typically via another compare and branch), |
| 1989 // we may be able to optimize. If the result truly is used by a |
| 1990 // compare + branch, and the comparison is for equality, then we can |
| 1991 // optimize out the later compare, and fuse with the later branch. |
1984 return; | 1992 return; |
| 1993 } |
1985 case Intrinsics::AtomicFence: | 1994 case Intrinsics::AtomicFence: |
1986 if (!Intrinsics::VerifyMemoryOrder( | 1995 if (!Intrinsics::VerifyMemoryOrder( |
1987 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { | 1996 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { |
1988 Func->setError("Unexpected memory ordering for AtomicFence"); | 1997 Func->setError("Unexpected memory ordering for AtomicFence"); |
1989 return; | 1998 return; |
1990 } | 1999 } |
1991 _mfence(); | 2000 _mfence(); |
1992 return; | 2001 return; |
1993 case Intrinsics::AtomicFenceAll: | 2002 case Intrinsics::AtomicFenceAll: |
1994 // NOTE: FenceAll should prevent and load/store from being moved | 2003 // NOTE: FenceAll should prevent and load/store from being moved |
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2176 case Intrinsics::Trap: | 2185 case Intrinsics::Trap: |
2177 _ud2(); | 2186 _ud2(); |
2178 return; | 2187 return; |
2179 case Intrinsics::UnknownIntrinsic: | 2188 case Intrinsics::UnknownIntrinsic: |
2180 Func->setError("Should not be lowering UnknownIntrinsic"); | 2189 Func->setError("Should not be lowering UnknownIntrinsic"); |
2181 return; | 2190 return; |
2182 } | 2191 } |
2183 return; | 2192 return; |
2184 } | 2193 } |
2185 | 2194 |
| 2195 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, |
| 2196 Operand *Expected, Operand *Desired) { |
| 2197 if (Expected->getType() == IceType_i64) { |
| 2198 // Reserve the pre-colored registers first, before adding any more |
| 2199 // infinite-weight variables from FormMemoryOperand's legalization. |
| 2200 Variable *T_edx = makeReg(IceType_i32, Reg_edx); |
| 2201 Variable *T_eax = makeReg(IceType_i32, Reg_eax); |
| 2202 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); |
| 2203 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); |
| 2204 _mov(T_eax, loOperand(Expected)); |
| 2205 _mov(T_edx, hiOperand(Expected)); |
| 2206 _mov(T_ebx, loOperand(Desired)); |
| 2207 _mov(T_ecx, hiOperand(Desired)); |
| 2208 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 2209 const bool Locked = true; |
| 2210 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 2211 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
| 2212 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
| 2213 _mov(DestLo, T_eax); |
| 2214 _mov(DestHi, T_edx); |
| 2215 return; |
| 2216 } |
| 2217 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); |
| 2218 _mov(T_eax, Expected); |
| 2219 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 2220 Variable *DesiredReg = legalizeToVar(Desired); |
| 2221 const bool Locked = true; |
| 2222 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 2223 _mov(DestPrev, T_eax); |
| 2224 } |
| 2225 |
2186 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 2226 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
2187 Operand *Ptr, Operand *Val) { | 2227 Operand *Ptr, Operand *Val) { |
| 2228 bool NeedsCmpxchg = false; |
| 2229 LowerBinOp Op_Lo = NULL; |
| 2230 LowerBinOp Op_Hi = NULL; |
2188 switch (Operation) { | 2231 switch (Operation) { |
2189 default: | 2232 default: |
2190 Func->setError("Unknown AtomicRMW operation"); | 2233 Func->setError("Unknown AtomicRMW operation"); |
2191 return; | 2234 return; |
2192 case Intrinsics::AtomicAdd: { | 2235 case Intrinsics::AtomicAdd: { |
2193 if (Dest->getType() == IceType_i64) { | 2236 if (Dest->getType() == IceType_i64) { |
2194 // Do a nasty cmpxchg8b loop. Factor this into a function. | 2237 // All the fall-through paths must set this to true, but use this |
2195 // TODO(jvoung): fill it in. | 2238 // for asserting. |
2196 Func->setError("Unhandled AtomicRMW operation"); | 2239 NeedsCmpxchg = true; |
2197 return; | 2240 Op_Lo = &TargetX8632::_add; |
| 2241 Op_Hi = &TargetX8632::_adc; |
| 2242 break; |
2198 } | 2243 } |
2199 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); | 2244 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2200 const bool Locked = true; | 2245 const bool Locked = true; |
2201 Variable *T = NULL; | 2246 Variable *T = NULL; |
2202 _mov(T, Val); | 2247 _mov(T, Val); |
2203 _xadd(Addr, T, Locked); | 2248 _xadd(Addr, T, Locked); |
2204 _mov(Dest, T); | 2249 _mov(Dest, T); |
2205 return; | 2250 return; |
2206 } | 2251 } |
2207 case Intrinsics::AtomicSub: { | 2252 case Intrinsics::AtomicSub: { |
2208 if (Dest->getType() == IceType_i64) { | 2253 if (Dest->getType() == IceType_i64) { |
2209 // Do a nasty cmpxchg8b loop. | 2254 NeedsCmpxchg = true; |
2210 // TODO(jvoung): fill it in. | 2255 Op_Lo = &TargetX8632::_sub; |
2211 Func->setError("Unhandled AtomicRMW operation"); | 2256 Op_Hi = &TargetX8632::_sbb; |
2212 return; | 2257 break; |
2213 } | 2258 } |
2214 // Generate a memory operand from Ptr. | 2259 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
2215 // neg... | 2260 const bool Locked = true; |
2216 // Then do the same as AtomicAdd. | 2261 Variable *T = NULL; |
2217 // TODO(jvoung): fill it in. | 2262 _mov(T, Val); |
2218 Func->setError("Unhandled AtomicRMW operation"); | 2263 _neg(T); |
| 2264 _xadd(Addr, T, Locked); |
| 2265 _mov(Dest, T); |
2219 return; | 2266 return; |
2220 } | 2267 } |
2221 case Intrinsics::AtomicOr: | 2268 case Intrinsics::AtomicOr: |
| 2269 // TODO(jvoung): If Dest is null or dead, then some of these |
| 2270 // operations do not need an "exchange", but just a locked op. |
| 2271 // That appears to be "worth" it for sub, or, and, and xor. |
| 2272 // xadd is probably fine vs lock add for add, and xchg is fine |
| 2273 // vs an atomic store. |
| 2274 NeedsCmpxchg = true; |
| 2275 Op_Lo = &TargetX8632::_or; |
| 2276 Op_Hi = &TargetX8632::_or; |
| 2277 break; |
2222 case Intrinsics::AtomicAnd: | 2278 case Intrinsics::AtomicAnd: |
| 2279 NeedsCmpxchg = true; |
| 2280 Op_Lo = &TargetX8632::_and; |
| 2281 Op_Hi = &TargetX8632::_and; |
| 2282 break; |
2223 case Intrinsics::AtomicXor: | 2283 case Intrinsics::AtomicXor: |
| 2284 NeedsCmpxchg = true; |
| 2285 Op_Lo = &TargetX8632::_xor; |
| 2286 Op_Hi = &TargetX8632::_xor; |
| 2287 break; |
2224 case Intrinsics::AtomicExchange: | 2288 case Intrinsics::AtomicExchange: |
2225 // TODO(jvoung): fill it in. | 2289 if (Dest->getType() == IceType_i64) { |
2226 Func->setError("Unhandled AtomicRMW operation"); | 2290 NeedsCmpxchg = true; |
| 2291 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values |
| 2292 // just need to be moved to the ecx and ebx registers. |
| 2293 Op_Lo = NULL; |
| 2294 Op_Hi = NULL; |
| 2295 break; |
| 2296 } |
| 2297 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType()); |
| 2298 Variable *T = NULL; |
| 2299 _mov(T, Val); |
| 2300 _xchg(Addr, T); |
| 2301 _mov(Dest, T); |
2227 return; | 2302 return; |
2228 } | 2303 } |
| 2304 // Otherwise, we need a cmpxchg loop. |
| 2305 assert(NeedsCmpxchg); |
| 2306 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); |
| 2307 } |
| 2308 |
| 2309 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, |
| 2310 Variable *Dest, Operand *Ptr, |
| 2311 Operand *Val) { |
| 2312 // Expand a more complex RMW operation as a cmpxchg loop: |
| 2313 // For 64-bit: |
| 2314 // mov eax, [ptr] |
| 2315 // mov edx, [ptr + 4] |
| 2316 // .LABEL: |
| 2317 // mov ebx, eax |
| 2318 // <Op_Lo> ebx, <desired_adj_lo> |
| 2319 // mov ecx, edx |
| 2320 // <Op_Hi> ecx, <desired_adj_hi> |
| 2321 // lock cmpxchg8b [ptr] |
| 2322 // jne .LABEL |
| 2323 // mov <dest_lo>, eax |
| 2324 // mov <dest_lo>, edx |
| 2325 // |
| 2326 // For 32-bit: |
| 2327 // mov eax, [ptr] |
| 2328 // .LABEL: |
| 2329 // mov <reg>, eax |
| 2330 // op <reg>, [desired_adj] |
| 2331 // lock cmpxchg [ptr], <reg> |
| 2332 // jne .LABEL |
| 2333 // mov <dest>, eax |
| 2334 // |
| 2335 // If Op_{Lo,Hi} are NULL, then just copy the value. |
| 2336 Val = legalize(Val); |
| 2337 Type Ty = Val->getType(); |
| 2338 if (Ty == IceType_i64) { |
| 2339 Variable *T_edx = makeReg(IceType_i32, Reg_edx); |
| 2340 Variable *T_eax = makeReg(IceType_i32, Reg_eax); |
| 2341 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
| 2342 _mov(T_eax, loOperand(Addr)); |
| 2343 _mov(T_edx, hiOperand(Addr)); |
| 2344 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); |
| 2345 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); |
| 2346 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 2347 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; |
| 2348 if (!IsXchg8b) { |
| 2349 Context.insert(Label); |
| 2350 _mov(T_ebx, T_eax); |
| 2351 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
| 2352 _mov(T_ecx, T_edx); |
| 2353 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
| 2354 } else { |
| 2355 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
| 2356 // It just needs the Val loaded into ebx and ecx. |
| 2357 // That can also be done before the loop. |
| 2358 _mov(T_ebx, loOperand(Val)); |
| 2359 _mov(T_ecx, hiOperand(Val)); |
| 2360 Context.insert(Label); |
| 2361 } |
| 2362 const bool Locked = true; |
| 2363 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
| 2364 _br(InstX8632Br::Br_ne, Label); |
| 2365 if (!IsXchg8b) { |
| 2366 // If Val is a variable, model the extended live range of Val through |
| 2367 // the end of the loop, since it will be re-used by the loop. |
| 2368 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 2369 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
| 2370 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
| 2371 Context.insert(InstFakeUse::create(Func, ValLo)); |
| 2372 Context.insert(InstFakeUse::create(Func, ValHi)); |
| 2373 } |
| 2374 } else { |
| 2375 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
| 2376 Context.insert(InstFakeUse::create(Func, T_ebx)); |
| 2377 Context.insert(InstFakeUse::create(Func, T_ecx)); |
| 2378 } |
| 2379 // The address base is also reused in the loop. |
| 2380 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
| 2381 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 2382 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2383 _mov(DestLo, T_eax); |
| 2384 _mov(DestHi, T_edx); |
| 2385 return; |
| 2386 } |
| 2387 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
| 2388 Variable *T_eax = makeReg(Ty, Reg_eax); |
| 2389 _mov(T_eax, Addr); |
| 2390 InstX8632Label *Label = InstX8632Label::create(Func, this); |
| 2391 Context.insert(Label); |
| 2392 // We want to pick a different register for T than Eax, so don't use |
| 2393 // _mov(T == NULL, T_eax). |
| 2394 Variable *T = makeReg(Ty); |
| 2395 _mov(T, T_eax); |
| 2396 (this->*Op_Lo)(T, Val); |
| 2397 const bool Locked = true; |
| 2398 _cmpxchg(Addr, T_eax, T, Locked); |
| 2399 _br(InstX8632Br::Br_ne, Label); |
| 2400 // If Val is a variable, model the extended live range of Val through |
| 2401 // the end of the loop, since it will be re-used by the loop. |
| 2402 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
| 2403 Context.insert(InstFakeUse::create(Func, ValVar)); |
| 2404 } |
| 2405 // The address base is also reused in the loop. |
| 2406 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
| 2407 _mov(Dest, T_eax); |
2229 } | 2408 } |
2230 | 2409 |
2231 namespace { | 2410 namespace { |
2232 | 2411 |
2233 bool isAdd(const Inst *Inst) { | 2412 bool isAdd(const Inst *Inst) { |
2234 if (const InstArithmetic *Arith = | 2413 if (const InstArithmetic *Arith = |
2235 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 2414 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
2236 return (Arith->getOp() == InstArithmetic::Add); | 2415 return (Arith->getOp() == InstArithmetic::Add); |
2237 } | 2416 } |
2238 return false; | 2417 return false; |
(...skipping 651 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2890 for (SizeT i = 0; i < Size; ++i) { | 3069 for (SizeT i = 0; i < Size; ++i) { |
2891 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 3070 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
2892 } | 3071 } |
2893 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 3072 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
2894 } | 3073 } |
2895 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 3074 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
2896 << "\n"; | 3075 << "\n"; |
2897 } | 3076 } |
2898 | 3077 |
2899 } // end of namespace Ice | 3078 } // end of namespace Ice |
OLD | NEW |