Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index fbcc1ad0becd63bb43b99bee2d57590d3e8da3a5..26741908834a9ef905da62880e2cdd5ee66a0727 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -195,7 +195,6 @@ TargetARM32::TargetARM32(Cfg *Func) |
"Duplicate alias for " #val); \ |
RegisterAliases[RegARM32::val].set(RegAlias); \ |
} \ |
- RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \ |
assert(RegisterAliases[RegARM32::val][RegARM32::val]); \ |
ScratchRegs[RegARM32::val] = scratch; |
REGARM32_TABLE; |
@@ -217,6 +216,34 @@ TargetARM32::TargetARM32(Cfg *Func) |
TypeToRegisterSet[IceType_v4f32] = VectorRegisters; |
} |
+namespace { |
+void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
+ for (Variable *Var : Vars) { |
+ auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); |
+ if (!Var64) { |
+ // This is not the variable we are looking for. |
+ continue; |
+ } |
+ assert(Var64->hasReg() || !Var64->mustHaveReg()); |
+ if (!Var64->hasReg()) { |
+ continue; |
+ } |
+ SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum()); |
+ // This assumes little endian. |
+ Variable *Lo = Var64->getLo(); |
+ Variable *Hi = Var64->getHi(); |
+ assert(Lo->hasReg() == Hi->hasReg()); |
+ if (Lo->hasReg()) { |
+ continue; |
+ } |
+ Lo->setRegNum(FirstReg); |
+ Lo->setMustHaveReg(); |
+ Hi->setRegNum(FirstReg + 1); |
+ Hi->setMustHaveReg(); |
+ } |
+} |
+} // end of anonymous namespace |
+ |
void TargetARM32::translateO2() { |
TimerMarker T(TimerStack::TT_O2, Func); |
@@ -284,6 +311,7 @@ void TargetARM32::translateO2() { |
regAlloc(RAK_Global); |
if (Func->hasError()) |
return; |
+ copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
Func->dump("After linear scan regalloc"); |
if (Ctx->getFlags().getPhiEdgeSplit()) { |
@@ -344,6 +372,7 @@ void TargetARM32::translateOm1() { |
regAlloc(RAK_InfOnly); |
if (Func->hasError()) |
return; |
+ copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
Func->dump("After regalloc of infinite-weight variables"); |
Func->genFrame(); |
@@ -616,7 +645,7 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
auto *Mem = OperandARM32Mem::create( |
Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
Ctx->getConstantInt32(Arg->getStackOffset()))); |
- legalizeToReg(Mem, Arg->getRegNum()); |
+ _mov(Arg, legalizeToReg(Mem, Arg->getRegNum())); |
// This argument-copying instruction uses an explicit OperandARM32Mem |
// operand instead of a Variable, so its fill-from-stack operation has to |
// be tracked separately for statistics. |
@@ -716,6 +745,11 @@ void TargetARM32::addProlog(CfgNode *Node) { |
RegsUsed[RegARM32::Reg_lr] = true; |
} |
for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
+ if (RegARM32::isI64RegisterPair(i)) { |
+ // We don't save register pairs explicitly. Instead, we rely on the code |
+ // fake-defing/fake-using each register in the pair. |
+ continue; |
+ } |
if (CalleeSaves[i] && RegsUsed[i]) { |
// TODO(jvoung): do separate vpush for each floating point register |
// segment and += 4, or 8 depending on type. |
@@ -884,6 +918,10 @@ void TargetARM32::addEpilog(CfgNode *Node) { |
// Pop registers in ascending order just like push (instead of in reverse |
// order). |
for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
+ if (RegARM32::isI64RegisterPair(i)) { |
+ continue; |
+ } |
+ |
if (CalleeSaves[i] && RegsUsed[i]) { |
GPRsToRestore.push_back(getPhysicalRegister(i)); |
} |
@@ -1739,6 +1777,7 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
Variable *T_Lo = makeReg(IceType_i32); |
Variable *T_Hi = makeReg(IceType_i32); |
+ |
_mov(T_Lo, Src0Lo); |
_mov(DestLo, T_Lo); |
_mov(T_Hi, Src0Hi); |
@@ -2271,9 +2310,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
configureBitcastTemporary(T); |
Variable *Src0R = legalizeToReg(Src0); |
_mov(T, Src0R); |
- auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
- lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo())); |
- lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi())); |
+ lowerAssign(InstAssign::create(Func, Dest, T)); |
break; |
} |
case IceType_f64: { |
@@ -2282,11 +2319,11 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
// vmov T2, T0, T1 |
// Dest <- T2 |
assert(Src0->getType() == IceType_i64); |
+ Variable *T = makeReg(DestType); |
auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
Src64->initHiLo(Func); |
configureBitcastTemporary(Src64); |
lowerAssign(InstAssign::create(Func, Src64, Src0)); |
- Variable *T = makeReg(IceType_f64); |
_mov(T, Src64); |
lowerAssign(InstAssign::create(Func, Dest, T)); |
break; |
@@ -2537,38 +2574,460 @@ void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
UnimplementedError(Func->getContext()->getFlags()); |
} |
-void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
- switch (Instr->getIntrinsicInfo().ID) { |
- case Intrinsics::AtomicCmpxchg: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+namespace { |
+inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
+ if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
+ return Integer->getValue(); |
+ return Intrinsics::MemoryOrderInvalid; |
+} |
+} // end of anonymous namespace |
+ |
+void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
+ Operand *Ptr, Operand *Val) { |
+ // retry: |
+ // ldrex contents, [addr] |
+ // op tmp, contents, operand |
+ // strex success, tmp, [addr] |
+ // jne retry |
+ // fake-use(addr, operand) @ prevents undesirable clobbering. |
+ // mov dest, contents |
+ assert(Dest != nullptr); |
+ Type DestTy = Dest->getType(); |
+ (void)Ptr; |
+ (void)Val; |
+ |
+ OperandARM32Mem *Mem; |
+ Variable *PtrContentsReg; |
+ Variable *PtrContentsHiReg; |
+ Variable *PtrContentsLoReg; |
+ Variable *Value = Func->makeVariable(DestTy); |
+ Variable *ValueReg; |
+ Variable *ValueHiReg; |
+ Variable *ValueLoReg; |
+ Variable *Success = makeReg(IceType_i32); |
+ Variable *TmpReg; |
+ Variable *TmpHiReg; |
+ Variable *TmpLoReg; |
+ Operand *_0 = Ctx->getConstantZero(IceType_i32); |
+ InstARM32Label *Retry = InstARM32Label::create(Func, this); |
+ |
+ if (DestTy == IceType_i64) { |
+ Variable64On32 *PtrContentsReg64 = makeI64RegPair(); |
+ PtrContentsHiReg = PtrContentsReg64->getHi(); |
+ PtrContentsLoReg = PtrContentsReg64->getLo(); |
+ PtrContentsReg = PtrContentsReg64; |
+ |
+ llvm::cast<Variable64On32>(Value)->initHiLo(Func); |
+ Variable64On32 *ValueReg64 = makeI64RegPair(); |
+ ValueHiReg = ValueReg64->getHi(); |
+ ValueLoReg = ValueReg64->getLo(); |
+ ValueReg = ValueReg64; |
+ |
+ Variable64On32 *TmpReg64 = makeI64RegPair(); |
+ TmpHiReg = TmpReg64->getHi(); |
+ TmpLoReg = TmpReg64->getLo(); |
+ TmpReg = TmpReg64; |
+ } else { |
+ PtrContentsReg = makeReg(DestTy); |
+ PtrContentsHiReg = nullptr; |
+ PtrContentsLoReg = PtrContentsReg; |
+ |
+ ValueReg = makeReg(DestTy); |
+ ValueHiReg = nullptr; |
+ ValueLoReg = ValueReg; |
+ |
+ TmpReg = makeReg(DestTy); |
+ TmpHiReg = nullptr; |
+ TmpLoReg = TmpReg; |
+ } |
+ |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, Value)); |
+ } |
+ lowerAssign(InstAssign::create(Func, Value, Val)); |
+ |
+ Variable *PtrVar = Func->makeVariable(IceType_i32); |
+ lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); |
+ |
+ _dmb(); |
+ Context.insert(Retry); |
+ Mem = formMemoryOperand(PtrVar, DestTy); |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, ValueReg, Value)); |
+ } |
+ lowerAssign(InstAssign::create(Func, ValueReg, Value)); |
+ if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
+ _uxt(ValueReg, ValueReg); |
+ } |
+ _ldrex(PtrContentsReg, Mem); |
+ |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg)); |
+ } |
+ switch (Operation) { |
+ default: |
+ Func->setError("Unknown AtomicRMW operation"); |
return; |
+ case Intrinsics::AtomicAdd: |
+ if (DestTy == IceType_i64) { |
+ _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
+ } else { |
+ _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ } |
+ break; |
+ case Intrinsics::AtomicSub: |
+ if (DestTy == IceType_i64) { |
+ _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
+ } else { |
+ _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ } |
+ break; |
+ case Intrinsics::AtomicOr: |
+ _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ if (DestTy == IceType_i64) { |
+ _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
+ } |
+ break; |
+ case Intrinsics::AtomicAnd: |
+ _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ if (DestTy == IceType_i64) { |
+ _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
+ } |
+ break; |
+ case Intrinsics::AtomicXor: |
+ _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); |
+ if (DestTy == IceType_i64) { |
+ _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); |
+ } |
+ break; |
+ case Intrinsics::AtomicExchange: |
+ _mov(TmpLoReg, ValueLoReg); |
+ if (DestTy == IceType_i64) { |
+ _mov(TmpHiReg, ValueHiReg); |
+ } |
+ break; |
} |
+ _strex(Success, TmpReg, Mem); |
+ _cmp(Success, _0); |
+ _br(Retry, CondARM32::NE); |
+ |
+ // The following fake-uses ensure that Subzero will not clobber them in the |
+ // load-linked/store-conditional loop above. We might have to spill them, but |
+ // spilling is preferable over incorrect behavior. |
+ Context.insert(InstFakeUse::create(Func, PtrVar)); |
+ if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { |
+ Context.insert(InstFakeUse::create(Func, Value64->getHi())); |
+ Context.insert(InstFakeUse::create(Func, Value64->getLo())); |
+ } else { |
+ Context.insert(InstFakeUse::create(Func, Value)); |
+ } |
+ _dmb(); |
+ if (DestTy == IceType_i8 || DestTy == IceType_i16) { |
+ _uxt(PtrContentsReg, PtrContentsReg); |
+ } |
+ |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeUse::create(Func, PtrContentsReg)); |
+ } |
+ lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); |
+ if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { |
+ Context.insert(InstFakeUse::create(Func, Dest64->getLo())); |
+ Context.insert(InstFakeUse::create(Func, Dest64->getHi())); |
+ } else { |
+ Context.insert(InstFakeUse::create(Func, Dest)); |
+ } |
+} |
+ |
+void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
+ Variable *Dest = Instr->getDest(); |
+ Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void; |
+ Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID; |
+ switch (ID) { |
case Intrinsics::AtomicFence: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case Intrinsics::AtomicFenceAll: |
- // NOTE: FenceAll should prevent and load/store from being moved across the |
- // fence (both atomic and non-atomic). The InstARM32Mfence instruction is |
- // currently marked coarsely as "HasSideEffects". |
- UnimplementedError(Func->getContext()->getFlags()); |
+ assert(Dest == nullptr); |
+ _dmb(); |
return; |
case Intrinsics::AtomicIsLockFree: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Operand *ByteSize = Instr->getArg(0); |
+ auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize); |
+ if (CI == nullptr) { |
+ // The PNaCl ABI requires the byte size to be a compile-time constant. |
+ Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
+ return; |
+ } |
+ static constexpr int32_t NotLockFree = 0; |
+ static constexpr int32_t LockFree = 1; |
+ int32_t Result = NotLockFree; |
+ switch (CI->getValue()) { |
+ case 1: |
+ case 2: |
+ case 4: |
+ case 8: |
+ Result = LockFree; |
+ break; |
+ } |
+ _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result))); |
return; |
} |
case Intrinsics::AtomicLoad: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ assert(isScalarIntegerType(DestTy)); |
+ // We require the memory address to be naturally aligned. Given that is the |
+ // case, then normal loads are atomic. |
+ if (!Intrinsics::isMemoryOrderValid( |
+ ID, getConstantMemoryOrder(Instr->getArg(1)))) { |
+ Func->setError("Unexpected memory ordering for AtomicLoad"); |
+ return; |
+ } |
+ Variable *T; |
+ |
+ if (DestTy == IceType_i64) { |
+ // ldrex is the only arm instruction that is guaranteed to load a 64-bit |
+ // integer atomically. Everything else works with a regular ldr. |
+ T = makeI64RegPair(); |
+ _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); |
+ } else { |
+ T = makeReg(DestTy); |
+ _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); |
+ } |
+ _dmb(); |
+ lowerAssign(InstAssign::create(Func, Dest, T)); |
+ // Make sure the atomic load isn't elided when unused, by adding a FakeUse. |
+ // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert |
+ // the FakeUse on the last-inserted instruction's dest. |
+ Context.insert( |
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
return; |
} |
- case Intrinsics::AtomicRMW: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case Intrinsics::AtomicStore: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ // We require the memory address to be naturally aligned. Given that is the |
+ // case, then normal loads are atomic. |
+ if (!Intrinsics::isMemoryOrderValid( |
+ ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
+ Func->setError("Unexpected memory ordering for AtomicStore"); |
+ return; |
+ } |
+ Operand *Value = Instr->getArg(0); |
+ Type ValueTy = Value->getType(); |
+ assert(isScalarIntegerType(ValueTy)); |
+ Operand *Addr = Instr->getArg(1); |
+ |
+ if (ValueTy == IceType_i64) { |
+ // Atomic 64-bit stores require a load-locked/store-conditional loop using |
+ // ldrexd, and strexd. The lowered code is: |
+ // |
+ // retry: |
+ // ldrexd t.lo, t.hi, [addr] |
+ // strexd success, value.lo, value.hi, [addr] |
+ // cmp success, #0 |
+ // bne retry |
+ // fake-use(addr, value.lo, value.hi) |
+ // |
+ // The fake-use is needed to prevent those variables from being clobbered |
+ // in the loop (which will happen under register pressure.) |
+ Variable64On32 *Tmp = makeI64RegPair(); |
+ Variable64On32 *ValueVar = |
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
+ Variable *AddrVar = makeReg(IceType_i32); |
+ Variable *Success = makeReg(IceType_i32); |
+ OperandARM32Mem *Mem; |
+ Operand *_0 = Ctx->getConstantZero(IceType_i32); |
+ InstARM32Label *Retry = InstARM32Label::create(Func, this); |
+ Variable64On32 *NewReg = makeI64RegPair(); |
+ ValueVar->initHiLo(Func); |
+ ValueVar->mustNotHaveReg(); |
+ |
+ _dmb(); |
+ lowerAssign(InstAssign::create(Func, ValueVar, Value)); |
+ lowerAssign(InstAssign::create(Func, AddrVar, Addr)); |
+ |
+ Context.insert(Retry); |
+ Context.insert(InstFakeDef::create(Func, NewReg)); |
+ lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); |
+ Mem = formMemoryOperand(AddrVar, IceType_i64); |
+ _ldrex(Tmp, Mem); |
+ // This fake-use both prevents the ldrex from being dead-code eliminated, |
+ // while also keeping liveness happy about all defs being used. |
+ Context.insert( |
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
+ _strex(Success, NewReg, Mem); |
+ _cmp(Success, _0); |
+ _br(Retry, CondARM32::NE); |
+ |
+ Context.insert(InstFakeUse::create(Func, ValueVar->getLo())); |
+ Context.insert(InstFakeUse::create(Func, ValueVar->getHi())); |
+ Context.insert(InstFakeUse::create(Func, AddrVar)); |
+ _dmb(); |
+ return; |
+ } |
+ // non-64-bit stores are atomically as long as the address is aligned. This |
+ // is PNaCl, so addresses are aligned. |
+ Variable *T = makeReg(ValueTy); |
+ |
+ _dmb(); |
+ lowerAssign(InstAssign::create(Func, T, Value)); |
+ _str(T, formMemoryOperand(Addr, ValueTy)); |
+ _dmb(); |
+ return; |
+ } |
+ case Intrinsics::AtomicCmpxchg: { |
+ // The initial lowering for cmpxchg was: |
+ // |
+ // retry: |
+ // ldrex tmp, [addr] |
+ // cmp tmp, expected |
+ // mov expected, tmp |
+ // jne retry |
+ // strex success, new, [addr] |
+ // cmp success, #0 |
+ // bne retry |
+ // mov dest, expected |
+ // |
+ // Besides requiring two branches, that lowering could also potentially |
+ // write to memory (in mov expected, tmp) unless we were OK with increasing |
+ // the register pressure and requiring expected to be an infinite-weight |
+ // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through |
+ // careful rewritting, and thanks to predication, we now implement the |
+ // lowering as: |
+ // |
+ // retry: |
+ // ldrex tmp, [addr] |
+ // cmp tmp, expected |
+ // strexeq success, new, [addr] |
+ // movne expected, tmp |
+ // cmpeq success, #0 |
+ // bne retry |
+ // mov dest, expected |
+ // |
+ // Predication lets us move the strex ahead of the mov expected, tmp, which |
+ // allows tmp to be a non-infinite weight temporary. We wanted to avoid |
+ // writing to memory between ldrex and strex because, even though most times |
+ // that would cause no issues, if any interleaving memory write aliased |
+ // [addr] than we would have undefined behavior. Undefined behavior isn't |
+ // cool, so we try to avoid it. See the "Synchronization and semaphores" |
+ // section of the "ARM Architecture Reference Manual." |
+ |
+ assert(isScalarIntegerType(DestTy)); |
+ // We require the memory address to be naturally aligned. Given that is the |
+ // case, then normal loads are atomic. |
+ if (!Intrinsics::isMemoryOrderValid( |
+ ID, getConstantMemoryOrder(Instr->getArg(3)), |
+ getConstantMemoryOrder(Instr->getArg(4)))) { |
+ Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
+ return; |
+ } |
+ |
+ OperandARM32Mem *Mem; |
+ Variable *TmpReg; |
+ Variable *Expected, *ExpectedReg; |
+ Variable *New, *NewReg; |
+ Variable *Success = makeReg(IceType_i32); |
+ Operand *_0 = Ctx->getConstantZero(IceType_i32); |
+ InstARM32Label *Retry = InstARM32Label::create(Func, this); |
+ |
+ if (DestTy == IceType_i64) { |
+ Variable64On32 *TmpReg64 = makeI64RegPair(); |
+ Variable64On32 *New64 = |
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
+ Variable64On32 *NewReg64 = makeI64RegPair(); |
+ Variable64On32 *Expected64 = |
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
+ Variable64On32 *ExpectedReg64 = makeI64RegPair(); |
+ |
+ New64->initHiLo(Func); |
+ New64->mustNotHaveReg(); |
+ Expected64->initHiLo(Func); |
+ Expected64->mustNotHaveReg(); |
+ |
+ TmpReg = TmpReg64; |
+ New = New64; |
+ NewReg = NewReg64; |
+ Expected = Expected64; |
+ ExpectedReg = ExpectedReg64; |
+ } else { |
+ TmpReg = makeReg(DestTy); |
+ New = Func->makeVariable(DestTy); |
+ NewReg = makeReg(DestTy); |
+ Expected = Func->makeVariable(DestTy); |
+ ExpectedReg = makeReg(DestTy); |
+ } |
+ |
+ Mem = formMemoryOperand(Instr->getArg(0), DestTy); |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, Expected)); |
+ } |
+ lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, New)); |
+ } |
+ lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); |
+ _dmb(); |
+ |
+ Context.insert(Retry); |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected)); |
+ } |
+ lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); |
+ if (DestTy == IceType_i64) { |
+ Context.insert(InstFakeDef::create(Func, NewReg, New)); |
+ } |
+ lowerAssign(InstAssign::create(Func, NewReg, New)); |
+ |
+ _ldrex(TmpReg, Mem); |
+ Context.insert( |
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest())); |
+ if (DestTy == IceType_i64) { |
+ auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
+ auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); |
+ // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's |
+ // keep liveness happy, shall we? |
+ Context.insert(InstFakeUse::create(Func, TmpReg)); |
+ Context.insert(InstFakeUse::create(Func, ExpectedReg)); |
+ _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); |
+ _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); |
+ } else { |
+ _cmp(TmpReg, ExpectedReg); |
+ } |
+ _strex(Success, NewReg, Mem, CondARM32::EQ); |
+ if (DestTy == IceType_i64) { |
+ auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); |
+ auto *Expected64 = llvm::cast<Variable64On32>(Expected); |
+ _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); |
+ _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); |
+ auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg); |
+ Context.insert(FakeDef); |
+ FakeDef->setDestRedefined(); |
+ } else { |
+ _mov_redefined(Expected, TmpReg, CondARM32::NE); |
+ } |
+ _cmp(Success, _0, CondARM32::EQ); |
+ _br(Retry, CondARM32::NE); |
+ _dmb(); |
+ lowerAssign(InstAssign::create(Func, Dest, Expected)); |
+ Context.insert(InstFakeUse::create(Func, Expected)); |
+ if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { |
+ Context.insert(InstFakeUse::create(Func, New64->getLo())); |
+ Context.insert(InstFakeUse::create(Func, New64->getHi())); |
+ } else { |
+ Context.insert(InstFakeUse::create(Func, New)); |
+ } |
+ return; |
+ } |
+ case Intrinsics::AtomicRMW: { |
+ if (!Intrinsics::isMemoryOrderValid( |
+ ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
+ Func->setError("Unexpected memory ordering for AtomicRMW"); |
+ return; |
+ } |
+ lowerAtomicRMW( |
+ Dest, static_cast<uint32_t>( |
+ llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), |
+ Instr->getArg(1), Instr->getArg(2)); |
return; |
} |
case Intrinsics::Bswap: { |
- Variable *Dest = Instr->getDest(); |
Operand *Val = Instr->getArg(0); |
Type Ty = Val->getType(); |
if (Ty == IceType_i64) { |
@@ -2598,7 +3057,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Ctpop: { |
- Variable *Dest = Instr->getDest(); |
Operand *Val = Instr->getArg(0); |
InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
? H_call_ctpop_i32 |
@@ -2633,7 +3091,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
} else { |
ValLoR = legalizeToReg(Val); |
} |
- lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
+ lowerCLZ(Dest, ValLoR, ValHiR); |
return; |
} |
case Intrinsics::Cttz: { |
@@ -2657,17 +3115,16 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
_rbit(T, ValLoR); |
ValLoR = T; |
} |
- lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
+ lowerCLZ(Dest, ValLoR, ValHiR); |
return; |
} |
case Intrinsics::Fabs: { |
- Variable *Dest = Instr->getDest(); |
Type DestTy = Dest->getType(); |
Variable *T = makeReg(DestTy); |
if (isVectorType(DestTy)) { |
// Add a fake def to keep liveness consistent in the meantime. |
Context.insert(InstFakeDef::create(Func, T)); |
- _mov(Instr->getDest(), T); |
+ _mov(Dest, T); |
UnimplementedError(Func->getContext()->getFlags()); |
return; |
} |
@@ -2721,20 +3178,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
if (Ctx->getFlags().getUseSandboxing()) { |
UnimplementedError(Func->getContext()->getFlags()); |
} else { |
- InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); |
+ InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0); |
lowerCall(Call); |
} |
return; |
} |
case Intrinsics::Setjmp: { |
- InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1); |
+ InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1); |
Call->addArg(Instr->getArg(0)); |
lowerCall(Call); |
return; |
} |
case Intrinsics::Sqrt: { |
Variable *Src = legalizeToReg(Instr->getArg(0)); |
- Variable *Dest = Instr->getDest(); |
Variable *T = makeReg(Dest->getType()); |
_vsqrt(T, Src); |
_mov(Dest, T); |
@@ -2742,7 +3198,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
} |
case Intrinsics::Stacksave: { |
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
- Variable *Dest = Instr->getDest(); |
_mov(Dest, SP); |
return; |
} |
@@ -3224,6 +3679,16 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { |
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
} |
+Variable64On32 *TargetARM32::makeI64RegPair() { |
+ Variable64On32 *Reg = |
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
+ Reg->setMustHaveReg(); |
+ Reg->initHiLo(Func); |
+ Reg->getLo()->setMustNotHaveReg(); |
+ Reg->getHi()->setMustNotHaveReg(); |
+ return Reg; |
+} |
+ |
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
// There aren't any 64-bit integer registers for ARM32. |
assert(Type != IceType_i64); |