Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1369333003: Subzero. Enable Atomics in ARM. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments; make format; git pull; merge. Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringARM32.cpp
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index fbcc1ad0becd63bb43b99bee2d57590d3e8da3a5..26741908834a9ef905da62880e2cdd5ee66a0727 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -195,7 +195,6 @@ TargetARM32::TargetARM32(Cfg *Func)
"Duplicate alias for " #val); \
RegisterAliases[RegARM32::val].set(RegAlias); \
} \
- RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
ScratchRegs[RegARM32::val] = scratch;
REGARM32_TABLE;
@@ -217,6 +216,34 @@ TargetARM32::TargetARM32(Cfg *Func)
TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
}
+namespace {
+void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
+ for (Variable *Var : Vars) {
+ auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
+ if (!Var64) {
+ // This is not the variable we are looking for.
+ continue;
+ }
+ assert(Var64->hasReg() || !Var64->mustHaveReg());
+ if (!Var64->hasReg()) {
+ continue;
+ }
+ SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum());
+ // This assumes little endian.
+ Variable *Lo = Var64->getLo();
+ Variable *Hi = Var64->getHi();
+ assert(Lo->hasReg() == Hi->hasReg());
+ if (Lo->hasReg()) {
+ continue;
+ }
+ Lo->setRegNum(FirstReg);
+ Lo->setMustHaveReg();
+ Hi->setRegNum(FirstReg + 1);
+ Hi->setMustHaveReg();
+ }
+}
+} // end of anonymous namespace
+
void TargetARM32::translateO2() {
TimerMarker T(TimerStack::TT_O2, Func);
@@ -284,6 +311,7 @@ void TargetARM32::translateO2() {
regAlloc(RAK_Global);
if (Func->hasError())
return;
+ copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After linear scan regalloc");
if (Ctx->getFlags().getPhiEdgeSplit()) {
@@ -344,6 +372,7 @@ void TargetARM32::translateOm1() {
regAlloc(RAK_InfOnly);
if (Func->hasError())
return;
+ copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After regalloc of infinite-weight variables");
Func->genFrame();
@@ -616,7 +645,7 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
auto *Mem = OperandARM32Mem::create(
Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
Ctx->getConstantInt32(Arg->getStackOffset())));
- legalizeToReg(Mem, Arg->getRegNum());
+ _mov(Arg, legalizeToReg(Mem, Arg->getRegNum()));
// This argument-copying instruction uses an explicit OperandARM32Mem
// operand instead of a Variable, so its fill-from-stack operation has to
// be tracked separately for statistics.
@@ -716,6 +745,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
RegsUsed[RegARM32::Reg_lr] = true;
}
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+ if (RegARM32::isI64RegisterPair(i)) {
+ // We don't save register pairs explicitly. Instead, we rely on the code
+ // fake-defing/fake-using each register in the pair.
+ continue;
+ }
if (CalleeSaves[i] && RegsUsed[i]) {
// TODO(jvoung): do separate vpush for each floating point register
// segment and += 4, or 8 depending on type.
@@ -884,6 +918,10 @@ void TargetARM32::addEpilog(CfgNode *Node) {
// Pop registers in ascending order just like push (instead of in reverse
// order).
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
+ if (RegARM32::isI64RegisterPair(i)) {
+ continue;
+ }
+
if (CalleeSaves[i] && RegsUsed[i]) {
GPRsToRestore.push_back(getPhysicalRegister(i));
}
@@ -1739,6 +1777,7 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(IceType_i32);
Variable *T_Hi = makeReg(IceType_i32);
+
_mov(T_Lo, Src0Lo);
_mov(DestLo, T_Lo);
_mov(T_Hi, Src0Hi);
@@ -2271,9 +2310,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
configureBitcastTemporary(T);
Variable *Src0R = legalizeToReg(Src0);
_mov(T, Src0R);
- auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
- lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo()));
- lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi()));
+ lowerAssign(InstAssign::create(Func, Dest, T));
break;
}
case IceType_f64: {
@@ -2282,11 +2319,11 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
// vmov T2, T0, T1
// Dest <- T2
assert(Src0->getType() == IceType_i64);
+ Variable *T = makeReg(DestType);
auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
Src64->initHiLo(Func);
configureBitcastTemporary(Src64);
lowerAssign(InstAssign::create(Func, Src64, Src0));
- Variable *T = makeReg(IceType_f64);
_mov(T, Src64);
lowerAssign(InstAssign::create(Func, Dest, T));
break;
@@ -2537,38 +2574,460 @@ void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
UnimplementedError(Func->getContext()->getFlags());
}
-void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
- switch (Instr->getIntrinsicInfo().ID) {
- case Intrinsics::AtomicCmpxchg: {
- UnimplementedError(Func->getContext()->getFlags());
+namespace {
+inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
+ if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
+ return Integer->getValue();
+ return Intrinsics::MemoryOrderInvalid;
+}
+} // end of anonymous namespace
+
+void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
+ Operand *Ptr, Operand *Val) {
+ // retry:
+ // ldrex contents, [addr]
+ // op tmp, contents, operand
+ // strex success, tmp, [addr]
+ // jne retry
+ // fake-use(addr, operand) @ prevents undesirable clobbering.
+ // mov dest, contents
+ assert(Dest != nullptr);
+ Type DestTy = Dest->getType();
+ (void)Ptr;
+ (void)Val;
+
+ OperandARM32Mem *Mem;
+ Variable *PtrContentsReg;
+ Variable *PtrContentsHiReg;
+ Variable *PtrContentsLoReg;
+ Variable *Value = Func->makeVariable(DestTy);
+ Variable *ValueReg;
+ Variable *ValueHiReg;
+ Variable *ValueLoReg;
+ Variable *Success = makeReg(IceType_i32);
+ Variable *TmpReg;
+ Variable *TmpHiReg;
+ Variable *TmpLoReg;
+ Operand *_0 = Ctx->getConstantZero(IceType_i32);
+ InstARM32Label *Retry = InstARM32Label::create(Func, this);
+
+ if (DestTy == IceType_i64) {
+ Variable64On32 *PtrContentsReg64 = makeI64RegPair();
+ PtrContentsHiReg = PtrContentsReg64->getHi();
+ PtrContentsLoReg = PtrContentsReg64->getLo();
+ PtrContentsReg = PtrContentsReg64;
+
+ llvm::cast<Variable64On32>(Value)->initHiLo(Func);
+ Variable64On32 *ValueReg64 = makeI64RegPair();
+ ValueHiReg = ValueReg64->getHi();
+ ValueLoReg = ValueReg64->getLo();
+ ValueReg = ValueReg64;
+
+ Variable64On32 *TmpReg64 = makeI64RegPair();
+ TmpHiReg = TmpReg64->getHi();
+ TmpLoReg = TmpReg64->getLo();
+ TmpReg = TmpReg64;
+ } else {
+ PtrContentsReg = makeReg(DestTy);
+ PtrContentsHiReg = nullptr;
+ PtrContentsLoReg = PtrContentsReg;
+
+ ValueReg = makeReg(DestTy);
+ ValueHiReg = nullptr;
+ ValueLoReg = ValueReg;
+
+ TmpReg = makeReg(DestTy);
+ TmpHiReg = nullptr;
+ TmpLoReg = TmpReg;
+ }
+
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, Value));
+ }
+ lowerAssign(InstAssign::create(Func, Value, Val));
+
+ Variable *PtrVar = Func->makeVariable(IceType_i32);
+ lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
+
+ _dmb();
+ Context.insert(Retry);
+ Mem = formMemoryOperand(PtrVar, DestTy);
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, ValueReg, Value));
+ }
+ lowerAssign(InstAssign::create(Func, ValueReg, Value));
+ if (DestTy == IceType_i8 || DestTy == IceType_i16) {
+ _uxt(ValueReg, ValueReg);
+ }
+ _ldrex(PtrContentsReg, Mem);
+
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
+ }
+ switch (Operation) {
+ default:
+ Func->setError("Unknown AtomicRMW operation");
return;
+ case Intrinsics::AtomicAdd:
+ if (DestTy == IceType_i64) {
+ _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
+ } else {
+ _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ }
+ break;
+ case Intrinsics::AtomicSub:
+ if (DestTy == IceType_i64) {
+ _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
+ } else {
+ _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ }
+ break;
+ case Intrinsics::AtomicOr:
+ _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ if (DestTy == IceType_i64) {
+ _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
+ }
+ break;
+ case Intrinsics::AtomicAnd:
+ _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ if (DestTy == IceType_i64) {
+ _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
+ }
+ break;
+ case Intrinsics::AtomicXor:
+ _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
+ if (DestTy == IceType_i64) {
+ _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
+ }
+ break;
+ case Intrinsics::AtomicExchange:
+ _mov(TmpLoReg, ValueLoReg);
+ if (DestTy == IceType_i64) {
+ _mov(TmpHiReg, ValueHiReg);
+ }
+ break;
}
+ _strex(Success, TmpReg, Mem);
+ _cmp(Success, _0);
+ _br(Retry, CondARM32::NE);
+
+ // The following fake-uses ensure that Subzero will not clobber them in the
+ // load-linked/store-conditional loop above. We might have to spill them, but
+ // spilling is preferable over incorrect behavior.
+ Context.insert(InstFakeUse::create(Func, PtrVar));
+ if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
+ Context.insert(InstFakeUse::create(Func, Value64->getHi()));
+ Context.insert(InstFakeUse::create(Func, Value64->getLo()));
+ } else {
+ Context.insert(InstFakeUse::create(Func, Value));
+ }
+ _dmb();
+ if (DestTy == IceType_i8 || DestTy == IceType_i16) {
+ _uxt(PtrContentsReg, PtrContentsReg);
+ }
+
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeUse::create(Func, PtrContentsReg));
+ }
+ lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
+ if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
+ Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
+ Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
+ } else {
+ Context.insert(InstFakeUse::create(Func, Dest));
+ }
+}
+
+void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
+ Variable *Dest = Instr->getDest();
+ Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
+ Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
+ switch (ID) {
case Intrinsics::AtomicFence:
- UnimplementedError(Func->getContext()->getFlags());
- return;
case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved across the
- // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
- // currently marked coarsely as "HasSideEffects".
- UnimplementedError(Func->getContext()->getFlags());
+ assert(Dest == nullptr);
+ _dmb();
return;
case Intrinsics::AtomicIsLockFree: {
- UnimplementedError(Func->getContext()->getFlags());
+ Operand *ByteSize = Instr->getArg(0);
+ auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
+ if (CI == nullptr) {
+ // The PNaCl ABI requires the byte size to be a compile-time constant.
+ Func->setError("AtomicIsLockFree byte size should be compile-time const");
+ return;
+ }
+ static constexpr int32_t NotLockFree = 0;
+ static constexpr int32_t LockFree = 1;
+ int32_t Result = NotLockFree;
+ switch (CI->getValue()) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ Result = LockFree;
+ break;
+ }
+ _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
return;
}
case Intrinsics::AtomicLoad: {
- UnimplementedError(Func->getContext()->getFlags());
+ assert(isScalarIntegerType(DestTy));
+ // We require the memory address to be naturally aligned. Given that is the
+ // case, then normal loads are atomic.
+ if (!Intrinsics::isMemoryOrderValid(
+ ID, getConstantMemoryOrder(Instr->getArg(1)))) {
+ Func->setError("Unexpected memory ordering for AtomicLoad");
+ return;
+ }
+ Variable *T;
+
+ if (DestTy == IceType_i64) {
+ // ldrex is the only arm instruction that is guaranteed to load a 64-bit
+ // integer atomically. Everything else works with a regular ldr.
+ T = makeI64RegPair();
+ _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
+ } else {
+ T = makeReg(DestTy);
+ _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
+ }
+ _dmb();
+ lowerAssign(InstAssign::create(Func, Dest, T));
+ // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
+ // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
+ // the FakeUse on the last-inserted instruction's dest.
+ Context.insert(
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
return;
}
- case Intrinsics::AtomicRMW:
- UnimplementedError(Func->getContext()->getFlags());
- return;
case Intrinsics::AtomicStore: {
- UnimplementedError(Func->getContext()->getFlags());
+ // We require the memory address to be naturally aligned. Given that is the
+ // case, then normal loads are atomic.
+ if (!Intrinsics::isMemoryOrderValid(
+ ID, getConstantMemoryOrder(Instr->getArg(2)))) {
+ Func->setError("Unexpected memory ordering for AtomicStore");
+ return;
+ }
+ Operand *Value = Instr->getArg(0);
+ Type ValueTy = Value->getType();
+ assert(isScalarIntegerType(ValueTy));
+ Operand *Addr = Instr->getArg(1);
+
+ if (ValueTy == IceType_i64) {
+ // Atomic 64-bit stores require a load-locked/store-conditional loop using
+ // ldrexd, and strexd. The lowered code is:
+ //
+ // retry:
+ // ldrexd t.lo, t.hi, [addr]
+ // strexd success, value.lo, value.hi, [addr]
+ // cmp success, #0
+ // bne retry
+ // fake-use(addr, value.lo, value.hi)
+ //
+ // The fake-use is needed to prevent those variables from being clobbered
+ // in the loop (which will happen under register pressure.)
+ Variable64On32 *Tmp = makeI64RegPair();
+ Variable64On32 *ValueVar =
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ Variable *AddrVar = makeReg(IceType_i32);
+ Variable *Success = makeReg(IceType_i32);
+ OperandARM32Mem *Mem;
+ Operand *_0 = Ctx->getConstantZero(IceType_i32);
+ InstARM32Label *Retry = InstARM32Label::create(Func, this);
+ Variable64On32 *NewReg = makeI64RegPair();
+ ValueVar->initHiLo(Func);
+ ValueVar->mustNotHaveReg();
+
+ _dmb();
+ lowerAssign(InstAssign::create(Func, ValueVar, Value));
+ lowerAssign(InstAssign::create(Func, AddrVar, Addr));
+
+ Context.insert(Retry);
+ Context.insert(InstFakeDef::create(Func, NewReg));
+ lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
+ Mem = formMemoryOperand(AddrVar, IceType_i64);
+ _ldrex(Tmp, Mem);
+ // This fake-use both prevents the ldrex from being dead-code eliminated,
+ // while also keeping liveness happy about all defs being used.
+ Context.insert(
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+ _strex(Success, NewReg, Mem);
+ _cmp(Success, _0);
+ _br(Retry, CondARM32::NE);
+
+ Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
+ Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
+ Context.insert(InstFakeUse::create(Func, AddrVar));
+ _dmb();
+ return;
+ }
+ // non-64-bit stores are atomically as long as the address is aligned. This
+ // is PNaCl, so addresses are aligned.
+ Variable *T = makeReg(ValueTy);
+
+ _dmb();
+ lowerAssign(InstAssign::create(Func, T, Value));
+ _str(T, formMemoryOperand(Addr, ValueTy));
+ _dmb();
+ return;
+ }
+ case Intrinsics::AtomicCmpxchg: {
+ // The initial lowering for cmpxchg was:
+ //
+ // retry:
+ // ldrex tmp, [addr]
+ // cmp tmp, expected
+ // mov expected, tmp
+ // jne retry
+ // strex success, new, [addr]
+ // cmp success, #0
+ // bne retry
+ // mov dest, expected
+ //
+ // Besides requiring two branches, that lowering could also potentially
+ // write to memory (in mov expected, tmp) unless we were OK with increasing
+ // the register pressure and requiring expected to be an infinite-weight
+ // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
+ // careful rewritting, and thanks to predication, we now implement the
+ // lowering as:
+ //
+ // retry:
+ // ldrex tmp, [addr]
+ // cmp tmp, expected
+ // strexeq success, new, [addr]
+ // movne expected, tmp
+ // cmpeq success, #0
+ // bne retry
+ // mov dest, expected
+ //
+ // Predication lets us move the strex ahead of the mov expected, tmp, which
+ // allows tmp to be a non-infinite weight temporary. We wanted to avoid
+ // writing to memory between ldrex and strex because, even though most times
+ // that would cause no issues, if any interleaving memory write aliased
+ // [addr] than we would have undefined behavior. Undefined behavior isn't
+ // cool, so we try to avoid it. See the "Synchronization and semaphores"
+ // section of the "ARM Architecture Reference Manual."
+
+ assert(isScalarIntegerType(DestTy));
+ // We require the memory address to be naturally aligned. Given that is the
+ // case, then normal loads are atomic.
+ if (!Intrinsics::isMemoryOrderValid(
+ ID, getConstantMemoryOrder(Instr->getArg(3)),
+ getConstantMemoryOrder(Instr->getArg(4)))) {
+ Func->setError("Unexpected memory ordering for AtomicCmpxchg");
+ return;
+ }
+
+ OperandARM32Mem *Mem;
+ Variable *TmpReg;
+ Variable *Expected, *ExpectedReg;
+ Variable *New, *NewReg;
+ Variable *Success = makeReg(IceType_i32);
+ Operand *_0 = Ctx->getConstantZero(IceType_i32);
+ InstARM32Label *Retry = InstARM32Label::create(Func, this);
+
+ if (DestTy == IceType_i64) {
+ Variable64On32 *TmpReg64 = makeI64RegPair();
+ Variable64On32 *New64 =
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ Variable64On32 *NewReg64 = makeI64RegPair();
+ Variable64On32 *Expected64 =
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ Variable64On32 *ExpectedReg64 = makeI64RegPair();
+
+ New64->initHiLo(Func);
+ New64->mustNotHaveReg();
+ Expected64->initHiLo(Func);
+ Expected64->mustNotHaveReg();
+
+ TmpReg = TmpReg64;
+ New = New64;
+ NewReg = NewReg64;
+ Expected = Expected64;
+ ExpectedReg = ExpectedReg64;
+ } else {
+ TmpReg = makeReg(DestTy);
+ New = Func->makeVariable(DestTy);
+ NewReg = makeReg(DestTy);
+ Expected = Func->makeVariable(DestTy);
+ ExpectedReg = makeReg(DestTy);
+ }
+
+ Mem = formMemoryOperand(Instr->getArg(0), DestTy);
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, Expected));
+ }
+ lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, New));
+ }
+ lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
+ _dmb();
+
+ Context.insert(Retry);
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
+ }
+ lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
+ if (DestTy == IceType_i64) {
+ Context.insert(InstFakeDef::create(Func, NewReg, New));
+ }
+ lowerAssign(InstAssign::create(Func, NewReg, New));
+
+ _ldrex(TmpReg, Mem);
+ Context.insert(
+ InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
+ if (DestTy == IceType_i64) {
+ auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
+ auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
+ // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
+ // keep liveness happy, shall we?
+ Context.insert(InstFakeUse::create(Func, TmpReg));
+ Context.insert(InstFakeUse::create(Func, ExpectedReg));
+ _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
+ _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
+ } else {
+ _cmp(TmpReg, ExpectedReg);
+ }
+ _strex(Success, NewReg, Mem, CondARM32::EQ);
+ if (DestTy == IceType_i64) {
+ auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
+ auto *Expected64 = llvm::cast<Variable64On32>(Expected);
+ _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
+ _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
+ auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
+ Context.insert(FakeDef);
+ FakeDef->setDestRedefined();
+ } else {
+ _mov_redefined(Expected, TmpReg, CondARM32::NE);
+ }
+ _cmp(Success, _0, CondARM32::EQ);
+ _br(Retry, CondARM32::NE);
+ _dmb();
+ lowerAssign(InstAssign::create(Func, Dest, Expected));
+ Context.insert(InstFakeUse::create(Func, Expected));
+ if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
+ Context.insert(InstFakeUse::create(Func, New64->getLo()));
+ Context.insert(InstFakeUse::create(Func, New64->getHi()));
+ } else {
+ Context.insert(InstFakeUse::create(Func, New));
+ }
+ return;
+ }
+ case Intrinsics::AtomicRMW: {
+ if (!Intrinsics::isMemoryOrderValid(
+ ID, getConstantMemoryOrder(Instr->getArg(3)))) {
+ Func->setError("Unexpected memory ordering for AtomicRMW");
+ return;
+ }
+ lowerAtomicRMW(
+ Dest, static_cast<uint32_t>(
+ llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
+ Instr->getArg(1), Instr->getArg(2));
return;
}
case Intrinsics::Bswap: {
- Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
Type Ty = Val->getType();
if (Ty == IceType_i64) {
@@ -2598,7 +3057,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Ctpop: {
- Variable *Dest = Instr->getDest();
Operand *Val = Instr->getArg(0);
InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
? H_call_ctpop_i32
@@ -2633,7 +3091,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
} else {
ValLoR = legalizeToReg(Val);
}
- lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
+ lowerCLZ(Dest, ValLoR, ValHiR);
return;
}
case Intrinsics::Cttz: {
@@ -2657,17 +3115,16 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
_rbit(T, ValLoR);
ValLoR = T;
}
- lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
+ lowerCLZ(Dest, ValLoR, ValHiR);
return;
}
case Intrinsics::Fabs: {
- Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
Variable *T = makeReg(DestTy);
if (isVectorType(DestTy)) {
// Add a fake def to keep liveness consistent in the meantime.
Context.insert(InstFakeDef::create(Func, T));
- _mov(Instr->getDest(), T);
+ _mov(Dest, T);
UnimplementedError(Func->getContext()->getFlags());
return;
}
@@ -2721,20 +3178,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
if (Ctx->getFlags().getUseSandboxing()) {
UnimplementedError(Func->getContext()->getFlags());
} else {
- InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
+ InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0);
lowerCall(Call);
}
return;
}
case Intrinsics::Setjmp: {
- InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
+ InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1);
Call->addArg(Instr->getArg(0));
lowerCall(Call);
return;
}
case Intrinsics::Sqrt: {
Variable *Src = legalizeToReg(Instr->getArg(0));
- Variable *Dest = Instr->getDest();
Variable *T = makeReg(Dest->getType());
_vsqrt(T, Src);
_mov(Dest, T);
@@ -2742,7 +3198,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
}
case Intrinsics::Stacksave: {
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
- Variable *Dest = Instr->getDest();
_mov(Dest, SP);
return;
}
@@ -3224,6 +3679,16 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
}
+Variable64On32 *TargetARM32::makeI64RegPair() {
+ Variable64On32 *Reg =
+ llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
+ Reg->setMustHaveReg();
+ Reg->initHiLo(Func);
+ Reg->getLo()->setMustNotHaveReg();
+ Reg->getHi()->setMustNotHaveReg();
+ return Reg;
+}
+
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for ARM32.
assert(Type != IceType_i64);
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698