| Index: src/IceTargetLoweringARM32.cpp
|
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
|
| index fbcc1ad0becd63bb43b99bee2d57590d3e8da3a5..26741908834a9ef905da62880e2cdd5ee66a0727 100644
|
| --- a/src/IceTargetLoweringARM32.cpp
|
| +++ b/src/IceTargetLoweringARM32.cpp
|
| @@ -195,7 +195,6 @@ TargetARM32::TargetARM32(Cfg *Func)
|
| "Duplicate alias for " #val); \
|
| RegisterAliases[RegARM32::val].set(RegAlias); \
|
| } \
|
| - RegisterAliases[RegARM32::val].resize(RegARM32::Reg_NUM); \
|
| assert(RegisterAliases[RegARM32::val][RegARM32::val]); \
|
| ScratchRegs[RegARM32::val] = scratch;
|
| REGARM32_TABLE;
|
| @@ -217,6 +216,34 @@ TargetARM32::TargetARM32(Cfg *Func)
|
| TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
|
| }
|
|
|
| +namespace {
|
| +void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
|
| + for (Variable *Var : Vars) {
|
| + auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
|
| + if (!Var64) {
|
| + // This is not the variable we are looking for.
|
| + continue;
|
| + }
|
| + assert(Var64->hasReg() || !Var64->mustHaveReg());
|
| + if (!Var64->hasReg()) {
|
| + continue;
|
| + }
|
| + SizeT FirstReg = RegARM32::getI64PairFirstGPRNum(Var->getRegNum());
|
| + // This assumes little endian.
|
| + Variable *Lo = Var64->getLo();
|
| + Variable *Hi = Var64->getHi();
|
| + assert(Lo->hasReg() == Hi->hasReg());
|
| + if (Lo->hasReg()) {
|
| + continue;
|
| + }
|
| + Lo->setRegNum(FirstReg);
|
| + Lo->setMustHaveReg();
|
| + Hi->setRegNum(FirstReg + 1);
|
| + Hi->setMustHaveReg();
|
| + }
|
| +}
|
| +} // end of anonymous namespace
|
| +
|
| void TargetARM32::translateO2() {
|
| TimerMarker T(TimerStack::TT_O2, Func);
|
|
|
| @@ -284,6 +311,7 @@ void TargetARM32::translateO2() {
|
| regAlloc(RAK_Global);
|
| if (Func->hasError())
|
| return;
|
| + copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
|
| Func->dump("After linear scan regalloc");
|
|
|
| if (Ctx->getFlags().getPhiEdgeSplit()) {
|
| @@ -344,6 +372,7 @@ void TargetARM32::translateOm1() {
|
| regAlloc(RAK_InfOnly);
|
| if (Func->hasError())
|
| return;
|
| + copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
|
| Func->dump("After regalloc of infinite-weight variables");
|
|
|
| Func->genFrame();
|
| @@ -616,7 +645,7 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
|
| auto *Mem = OperandARM32Mem::create(
|
| Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
|
| Ctx->getConstantInt32(Arg->getStackOffset())));
|
| - legalizeToReg(Mem, Arg->getRegNum());
|
| + _mov(Arg, legalizeToReg(Mem, Arg->getRegNum()));
|
| // This argument-copying instruction uses an explicit OperandARM32Mem
|
| // operand instead of a Variable, so its fill-from-stack operation has to
|
| // be tracked separately for statistics.
|
| @@ -716,6 +745,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
|
| RegsUsed[RegARM32::Reg_lr] = true;
|
| }
|
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
|
| + if (RegARM32::isI64RegisterPair(i)) {
|
| + // We don't save register pairs explicitly. Instead, we rely on the code
|
| + // fake-defing/fake-using each register in the pair.
|
| + continue;
|
| + }
|
| if (CalleeSaves[i] && RegsUsed[i]) {
|
| // TODO(jvoung): do separate vpush for each floating point register
|
| // segment and += 4, or 8 depending on type.
|
| @@ -884,6 +918,10 @@ void TargetARM32::addEpilog(CfgNode *Node) {
|
| // Pop registers in ascending order just like push (instead of in reverse
|
| // order).
|
| for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
|
| + if (RegARM32::isI64RegisterPair(i)) {
|
| + continue;
|
| + }
|
| +
|
| if (CalleeSaves[i] && RegsUsed[i]) {
|
| GPRsToRestore.push_back(getPhysicalRegister(i));
|
| }
|
| @@ -1739,6 +1777,7 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
|
| Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| Variable *T_Lo = makeReg(IceType_i32);
|
| Variable *T_Hi = makeReg(IceType_i32);
|
| +
|
| _mov(T_Lo, Src0Lo);
|
| _mov(DestLo, T_Lo);
|
| _mov(T_Hi, Src0Hi);
|
| @@ -2271,9 +2310,7 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
|
| configureBitcastTemporary(T);
|
| Variable *Src0R = legalizeToReg(Src0);
|
| _mov(T, Src0R);
|
| - auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
|
| - lowerAssign(InstAssign::create(Func, Dest64On32->getLo(), T->getLo()));
|
| - lowerAssign(InstAssign::create(Func, Dest64On32->getHi(), T->getHi()));
|
| + lowerAssign(InstAssign::create(Func, Dest, T));
|
| break;
|
| }
|
| case IceType_f64: {
|
| @@ -2282,11 +2319,11 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
|
| // vmov T2, T0, T1
|
| // Dest <- T2
|
| assert(Src0->getType() == IceType_i64);
|
| + Variable *T = makeReg(DestType);
|
| auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
|
| Src64->initHiLo(Func);
|
| configureBitcastTemporary(Src64);
|
| lowerAssign(InstAssign::create(Func, Src64, Src0));
|
| - Variable *T = makeReg(IceType_f64);
|
| _mov(T, Src64);
|
| lowerAssign(InstAssign::create(Func, Dest, T));
|
| break;
|
| @@ -2537,38 +2574,460 @@ void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) {
|
| UnimplementedError(Func->getContext()->getFlags());
|
| }
|
|
|
| -void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| - switch (Instr->getIntrinsicInfo().ID) {
|
| - case Intrinsics::AtomicCmpxchg: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| +namespace {
|
| +inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
|
| + if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
|
| + return Integer->getValue();
|
| + return Intrinsics::MemoryOrderInvalid;
|
| +}
|
| +} // end of anonymous namespace
|
| +
|
| +void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
|
| + Operand *Ptr, Operand *Val) {
|
| + // retry:
|
| + // ldrex contents, [addr]
|
| + // op tmp, contents, operand
|
| + // strex success, tmp, [addr]
|
| + // jne retry
|
| + // fake-use(addr, operand) @ prevents undesirable clobbering.
|
| + // mov dest, contents
|
| + assert(Dest != nullptr);
|
| + Type DestTy = Dest->getType();
|
| + (void)Ptr;
|
| + (void)Val;
|
| +
|
| + OperandARM32Mem *Mem;
|
| + Variable *PtrContentsReg;
|
| + Variable *PtrContentsHiReg;
|
| + Variable *PtrContentsLoReg;
|
| + Variable *Value = Func->makeVariable(DestTy);
|
| + Variable *ValueReg;
|
| + Variable *ValueHiReg;
|
| + Variable *ValueLoReg;
|
| + Variable *Success = makeReg(IceType_i32);
|
| + Variable *TmpReg;
|
| + Variable *TmpHiReg;
|
| + Variable *TmpLoReg;
|
| + Operand *_0 = Ctx->getConstantZero(IceType_i32);
|
| + InstARM32Label *Retry = InstARM32Label::create(Func, this);
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + Variable64On32 *PtrContentsReg64 = makeI64RegPair();
|
| + PtrContentsHiReg = PtrContentsReg64->getHi();
|
| + PtrContentsLoReg = PtrContentsReg64->getLo();
|
| + PtrContentsReg = PtrContentsReg64;
|
| +
|
| + llvm::cast<Variable64On32>(Value)->initHiLo(Func);
|
| + Variable64On32 *ValueReg64 = makeI64RegPair();
|
| + ValueHiReg = ValueReg64->getHi();
|
| + ValueLoReg = ValueReg64->getLo();
|
| + ValueReg = ValueReg64;
|
| +
|
| + Variable64On32 *TmpReg64 = makeI64RegPair();
|
| + TmpHiReg = TmpReg64->getHi();
|
| + TmpLoReg = TmpReg64->getLo();
|
| + TmpReg = TmpReg64;
|
| + } else {
|
| + PtrContentsReg = makeReg(DestTy);
|
| + PtrContentsHiReg = nullptr;
|
| + PtrContentsLoReg = PtrContentsReg;
|
| +
|
| + ValueReg = makeReg(DestTy);
|
| + ValueHiReg = nullptr;
|
| + ValueLoReg = ValueReg;
|
| +
|
| + TmpReg = makeReg(DestTy);
|
| + TmpHiReg = nullptr;
|
| + TmpLoReg = TmpReg;
|
| + }
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, Value));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, Value, Val));
|
| +
|
| + Variable *PtrVar = Func->makeVariable(IceType_i32);
|
| + lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
|
| +
|
| + _dmb();
|
| + Context.insert(Retry);
|
| + Mem = formMemoryOperand(PtrVar, DestTy);
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, ValueReg, Value));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, ValueReg, Value));
|
| + if (DestTy == IceType_i8 || DestTy == IceType_i16) {
|
| + _uxt(ValueReg, ValueReg);
|
| + }
|
| + _ldrex(PtrContentsReg, Mem);
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, TmpReg, ValueReg));
|
| + }
|
| + switch (Operation) {
|
| + default:
|
| + Func->setError("Unknown AtomicRMW operation");
|
| return;
|
| + case Intrinsics::AtomicAdd:
|
| + if (DestTy == IceType_i64) {
|
| + _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
|
| + } else {
|
| + _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + }
|
| + break;
|
| + case Intrinsics::AtomicSub:
|
| + if (DestTy == IceType_i64) {
|
| + _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
|
| + } else {
|
| + _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + }
|
| + break;
|
| + case Intrinsics::AtomicOr:
|
| + _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + if (DestTy == IceType_i64) {
|
| + _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
|
| + }
|
| + break;
|
| + case Intrinsics::AtomicAnd:
|
| + _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + if (DestTy == IceType_i64) {
|
| + _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
|
| + }
|
| + break;
|
| + case Intrinsics::AtomicXor:
|
| + _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
|
| + if (DestTy == IceType_i64) {
|
| + _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
|
| + }
|
| + break;
|
| + case Intrinsics::AtomicExchange:
|
| + _mov(TmpLoReg, ValueLoReg);
|
| + if (DestTy == IceType_i64) {
|
| + _mov(TmpHiReg, ValueHiReg);
|
| + }
|
| + break;
|
| }
|
| + _strex(Success, TmpReg, Mem);
|
| + _cmp(Success, _0);
|
| + _br(Retry, CondARM32::NE);
|
| +
|
| + // The following fake-uses ensure that Subzero will not clobber them in the
|
| + // load-linked/store-conditional loop above. We might have to spill them, but
|
| + // spilling is preferable over incorrect behavior.
|
| + Context.insert(InstFakeUse::create(Func, PtrVar));
|
| + if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
|
| + Context.insert(InstFakeUse::create(Func, Value64->getHi()));
|
| + Context.insert(InstFakeUse::create(Func, Value64->getLo()));
|
| + } else {
|
| + Context.insert(InstFakeUse::create(Func, Value));
|
| + }
|
| + _dmb();
|
| + if (DestTy == IceType_i8 || DestTy == IceType_i16) {
|
| + _uxt(PtrContentsReg, PtrContentsReg);
|
| + }
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeUse::create(Func, PtrContentsReg));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg));
|
| + if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
|
| + Context.insert(InstFakeUse::create(Func, Dest64->getLo()));
|
| + Context.insert(InstFakeUse::create(Func, Dest64->getHi()));
|
| + } else {
|
| + Context.insert(InstFakeUse::create(Func, Dest));
|
| + }
|
| +}
|
| +
|
| +void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| + Variable *Dest = Instr->getDest();
|
| + Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
|
| + Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
|
| + switch (ID) {
|
| case Intrinsics::AtomicFence:
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| - return;
|
| case Intrinsics::AtomicFenceAll:
|
| - // NOTE: FenceAll should prevent and load/store from being moved across the
|
| - // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
|
| - // currently marked coarsely as "HasSideEffects".
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + assert(Dest == nullptr);
|
| + _dmb();
|
| return;
|
| case Intrinsics::AtomicIsLockFree: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + Operand *ByteSize = Instr->getArg(0);
|
| + auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
|
| + if (CI == nullptr) {
|
| + // The PNaCl ABI requires the byte size to be a compile-time constant.
|
| + Func->setError("AtomicIsLockFree byte size should be compile-time const");
|
| + return;
|
| + }
|
| + static constexpr int32_t NotLockFree = 0;
|
| + static constexpr int32_t LockFree = 1;
|
| + int32_t Result = NotLockFree;
|
| + switch (CI->getValue()) {
|
| + case 1:
|
| + case 2:
|
| + case 4:
|
| + case 8:
|
| + Result = LockFree;
|
| + break;
|
| + }
|
| + _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
|
| return;
|
| }
|
| case Intrinsics::AtomicLoad: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + assert(isScalarIntegerType(DestTy));
|
| + // We require the memory address to be naturally aligned. Given that is the
|
| + // case, then normal loads are atomic.
|
| + if (!Intrinsics::isMemoryOrderValid(
|
| + ID, getConstantMemoryOrder(Instr->getArg(1)))) {
|
| + Func->setError("Unexpected memory ordering for AtomicLoad");
|
| + return;
|
| + }
|
| + Variable *T;
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + // ldrex is the only arm instruction that is guaranteed to load a 64-bit
|
| + // integer atomically. Everything else works with a regular ldr.
|
| + T = makeI64RegPair();
|
| + _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
|
| + } else {
|
| + T = makeReg(DestTy);
|
| + _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
|
| + }
|
| + _dmb();
|
| + lowerAssign(InstAssign::create(Func, Dest, T));
|
| + // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
|
| + // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
|
| + // the FakeUse on the last-inserted instruction's dest.
|
| + Context.insert(
|
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
|
| return;
|
| }
|
| - case Intrinsics::AtomicRMW:
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| - return;
|
| case Intrinsics::AtomicStore: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + // We require the memory address to be naturally aligned. Given that is the
|
| + // case, then normal loads are atomic.
|
| + if (!Intrinsics::isMemoryOrderValid(
|
| + ID, getConstantMemoryOrder(Instr->getArg(2)))) {
|
| + Func->setError("Unexpected memory ordering for AtomicStore");
|
| + return;
|
| + }
|
| + Operand *Value = Instr->getArg(0);
|
| + Type ValueTy = Value->getType();
|
| + assert(isScalarIntegerType(ValueTy));
|
| + Operand *Addr = Instr->getArg(1);
|
| +
|
| + if (ValueTy == IceType_i64) {
|
| + // Atomic 64-bit stores require a load-locked/store-conditional loop using
|
| + // ldrexd, and strexd. The lowered code is:
|
| + //
|
| + // retry:
|
| + // ldrexd t.lo, t.hi, [addr]
|
| + // strexd success, value.lo, value.hi, [addr]
|
| + // cmp success, #0
|
| + // bne retry
|
| + // fake-use(addr, value.lo, value.hi)
|
| + //
|
| + // The fake-use is needed to prevent those variables from being clobbered
|
| + // in the loop (which will happen under register pressure.)
|
| + Variable64On32 *Tmp = makeI64RegPair();
|
| + Variable64On32 *ValueVar =
|
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
|
| + Variable *AddrVar = makeReg(IceType_i32);
|
| + Variable *Success = makeReg(IceType_i32);
|
| + OperandARM32Mem *Mem;
|
| + Operand *_0 = Ctx->getConstantZero(IceType_i32);
|
| + InstARM32Label *Retry = InstARM32Label::create(Func, this);
|
| + Variable64On32 *NewReg = makeI64RegPair();
|
| + ValueVar->initHiLo(Func);
|
| + ValueVar->mustNotHaveReg();
|
| +
|
| + _dmb();
|
| + lowerAssign(InstAssign::create(Func, ValueVar, Value));
|
| + lowerAssign(InstAssign::create(Func, AddrVar, Addr));
|
| +
|
| + Context.insert(Retry);
|
| + Context.insert(InstFakeDef::create(Func, NewReg));
|
| + lowerAssign(InstAssign::create(Func, NewReg, ValueVar));
|
| + Mem = formMemoryOperand(AddrVar, IceType_i64);
|
| + _ldrex(Tmp, Mem);
|
| + // This fake-use both prevents the ldrex from being dead-code eliminated,
|
| + // while also keeping liveness happy about all defs being used.
|
| + Context.insert(
|
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
|
| + _strex(Success, NewReg, Mem);
|
| + _cmp(Success, _0);
|
| + _br(Retry, CondARM32::NE);
|
| +
|
| + Context.insert(InstFakeUse::create(Func, ValueVar->getLo()));
|
| + Context.insert(InstFakeUse::create(Func, ValueVar->getHi()));
|
| + Context.insert(InstFakeUse::create(Func, AddrVar));
|
| + _dmb();
|
| + return;
|
| + }
|
| + // non-64-bit stores are atomically as long as the address is aligned. This
|
| + // is PNaCl, so addresses are aligned.
|
| + Variable *T = makeReg(ValueTy);
|
| +
|
| + _dmb();
|
| + lowerAssign(InstAssign::create(Func, T, Value));
|
| + _str(T, formMemoryOperand(Addr, ValueTy));
|
| + _dmb();
|
| + return;
|
| + }
|
| + case Intrinsics::AtomicCmpxchg: {
|
| + // The initial lowering for cmpxchg was:
|
| + //
|
| + // retry:
|
| + // ldrex tmp, [addr]
|
| + // cmp tmp, expected
|
| + // mov expected, tmp
|
| + // jne retry
|
| + // strex success, new, [addr]
|
| + // cmp success, #0
|
| + // bne retry
|
| + // mov dest, expected
|
| + //
|
| + // Besides requiring two branches, that lowering could also potentially
|
| + // write to memory (in mov expected, tmp) unless we were OK with increasing
|
| + // the register pressure and requiring expected to be an infinite-weight
|
| + // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
|
| + // careful rewritting, and thanks to predication, we now implement the
|
| + // lowering as:
|
| + //
|
| + // retry:
|
| + // ldrex tmp, [addr]
|
| + // cmp tmp, expected
|
| + // strexeq success, new, [addr]
|
| + // movne expected, tmp
|
| + // cmpeq success, #0
|
| + // bne retry
|
| + // mov dest, expected
|
| + //
|
| + // Predication lets us move the strex ahead of the mov expected, tmp, which
|
| + // allows tmp to be a non-infinite weight temporary. We wanted to avoid
|
| + // writing to memory between ldrex and strex because, even though most times
|
| + // that would cause no issues, if any interleaving memory write aliased
|
| + // [addr] than we would have undefined behavior. Undefined behavior isn't
|
| + // cool, so we try to avoid it. See the "Synchronization and semaphores"
|
| + // section of the "ARM Architecture Reference Manual."
|
| +
|
| + assert(isScalarIntegerType(DestTy));
|
| + // We require the memory address to be naturally aligned. Given that is the
|
| + // case, then normal loads are atomic.
|
| + if (!Intrinsics::isMemoryOrderValid(
|
| + ID, getConstantMemoryOrder(Instr->getArg(3)),
|
| + getConstantMemoryOrder(Instr->getArg(4)))) {
|
| + Func->setError("Unexpected memory ordering for AtomicCmpxchg");
|
| + return;
|
| + }
|
| +
|
| + OperandARM32Mem *Mem;
|
| + Variable *TmpReg;
|
| + Variable *Expected, *ExpectedReg;
|
| + Variable *New, *NewReg;
|
| + Variable *Success = makeReg(IceType_i32);
|
| + Operand *_0 = Ctx->getConstantZero(IceType_i32);
|
| + InstARM32Label *Retry = InstARM32Label::create(Func, this);
|
| +
|
| + if (DestTy == IceType_i64) {
|
| + Variable64On32 *TmpReg64 = makeI64RegPair();
|
| + Variable64On32 *New64 =
|
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
|
| + Variable64On32 *NewReg64 = makeI64RegPair();
|
| + Variable64On32 *Expected64 =
|
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
|
| + Variable64On32 *ExpectedReg64 = makeI64RegPair();
|
| +
|
| + New64->initHiLo(Func);
|
| + New64->mustNotHaveReg();
|
| + Expected64->initHiLo(Func);
|
| + Expected64->mustNotHaveReg();
|
| +
|
| + TmpReg = TmpReg64;
|
| + New = New64;
|
| + NewReg = NewReg64;
|
| + Expected = Expected64;
|
| + ExpectedReg = ExpectedReg64;
|
| + } else {
|
| + TmpReg = makeReg(DestTy);
|
| + New = Func->makeVariable(DestTy);
|
| + NewReg = makeReg(DestTy);
|
| + Expected = Func->makeVariable(DestTy);
|
| + ExpectedReg = makeReg(DestTy);
|
| + }
|
| +
|
| + Mem = formMemoryOperand(Instr->getArg(0), DestTy);
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, Expected));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, New));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
|
| + _dmb();
|
| +
|
| + Context.insert(Retry);
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, ExpectedReg, Expected));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
|
| + if (DestTy == IceType_i64) {
|
| + Context.insert(InstFakeDef::create(Func, NewReg, New));
|
| + }
|
| + lowerAssign(InstAssign::create(Func, NewReg, New));
|
| +
|
| + _ldrex(TmpReg, Mem);
|
| + Context.insert(
|
| + InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
|
| + if (DestTy == IceType_i64) {
|
| + auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
|
| + auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
|
| + // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
|
| + // keep liveness happy, shall we?
|
| + Context.insert(InstFakeUse::create(Func, TmpReg));
|
| + Context.insert(InstFakeUse::create(Func, ExpectedReg));
|
| + _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
|
| + _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
|
| + } else {
|
| + _cmp(TmpReg, ExpectedReg);
|
| + }
|
| + _strex(Success, NewReg, Mem, CondARM32::EQ);
|
| + if (DestTy == IceType_i64) {
|
| + auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
|
| + auto *Expected64 = llvm::cast<Variable64On32>(Expected);
|
| + _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
|
| + _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
|
| + auto *FakeDef = InstFakeDef::create(Func, Expected, TmpReg);
|
| + Context.insert(FakeDef);
|
| + FakeDef->setDestRedefined();
|
| + } else {
|
| + _mov_redefined(Expected, TmpReg, CondARM32::NE);
|
| + }
|
| + _cmp(Success, _0, CondARM32::EQ);
|
| + _br(Retry, CondARM32::NE);
|
| + _dmb();
|
| + lowerAssign(InstAssign::create(Func, Dest, Expected));
|
| + Context.insert(InstFakeUse::create(Func, Expected));
|
| + if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) {
|
| + Context.insert(InstFakeUse::create(Func, New64->getLo()));
|
| + Context.insert(InstFakeUse::create(Func, New64->getHi()));
|
| + } else {
|
| + Context.insert(InstFakeUse::create(Func, New));
|
| + }
|
| + return;
|
| + }
|
| + case Intrinsics::AtomicRMW: {
|
| + if (!Intrinsics::isMemoryOrderValid(
|
| + ID, getConstantMemoryOrder(Instr->getArg(3)))) {
|
| + Func->setError("Unexpected memory ordering for AtomicRMW");
|
| + return;
|
| + }
|
| + lowerAtomicRMW(
|
| + Dest, static_cast<uint32_t>(
|
| + llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
|
| + Instr->getArg(1), Instr->getArg(2));
|
| return;
|
| }
|
| case Intrinsics::Bswap: {
|
| - Variable *Dest = Instr->getDest();
|
| Operand *Val = Instr->getArg(0);
|
| Type Ty = Val->getType();
|
| if (Ty == IceType_i64) {
|
| @@ -2598,7 +3057,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| return;
|
| }
|
| case Intrinsics::Ctpop: {
|
| - Variable *Dest = Instr->getDest();
|
| Operand *Val = Instr->getArg(0);
|
| InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
|
| ? H_call_ctpop_i32
|
| @@ -2633,7 +3091,7 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| } else {
|
| ValLoR = legalizeToReg(Val);
|
| }
|
| - lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
|
| + lowerCLZ(Dest, ValLoR, ValHiR);
|
| return;
|
| }
|
| case Intrinsics::Cttz: {
|
| @@ -2657,17 +3115,16 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| _rbit(T, ValLoR);
|
| ValLoR = T;
|
| }
|
| - lowerCLZ(Instr->getDest(), ValLoR, ValHiR);
|
| + lowerCLZ(Dest, ValLoR, ValHiR);
|
| return;
|
| }
|
| case Intrinsics::Fabs: {
|
| - Variable *Dest = Instr->getDest();
|
| Type DestTy = Dest->getType();
|
| Variable *T = makeReg(DestTy);
|
| if (isVectorType(DestTy)) {
|
| // Add a fake def to keep liveness consistent in the meantime.
|
| Context.insert(InstFakeDef::create(Func, T));
|
| - _mov(Instr->getDest(), T);
|
| + _mov(Dest, T);
|
| UnimplementedError(Func->getContext()->getFlags());
|
| return;
|
| }
|
| @@ -2721,20 +3178,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| if (Ctx->getFlags().getUseSandboxing()) {
|
| UnimplementedError(Func->getContext()->getFlags());
|
| } else {
|
| - InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
|
| + InstCall *Call = makeHelperCall(H_call_read_tp, Dest, 0);
|
| lowerCall(Call);
|
| }
|
| return;
|
| }
|
| case Intrinsics::Setjmp: {
|
| - InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
|
| + InstCall *Call = makeHelperCall(H_call_setjmp, Dest, 1);
|
| Call->addArg(Instr->getArg(0));
|
| lowerCall(Call);
|
| return;
|
| }
|
| case Intrinsics::Sqrt: {
|
| Variable *Src = legalizeToReg(Instr->getArg(0));
|
| - Variable *Dest = Instr->getDest();
|
| Variable *T = makeReg(Dest->getType());
|
| _vsqrt(T, Src);
|
| _mov(Dest, T);
|
| @@ -2742,7 +3198,6 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| }
|
| case Intrinsics::Stacksave: {
|
| Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
|
| - Variable *Dest = Instr->getDest();
|
| _mov(Dest, SP);
|
| return;
|
| }
|
| @@ -3224,6 +3679,16 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
|
| llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
|
| }
|
|
|
| +Variable64On32 *TargetARM32::makeI64RegPair() {
|
| + Variable64On32 *Reg =
|
| + llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
|
| + Reg->setMustHaveReg();
|
| + Reg->initHiLo(Func);
|
| + Reg->getLo()->setMustNotHaveReg();
|
| + Reg->getHi()->setMustNotHaveReg();
|
| + return Reg;
|
| +}
|
| +
|
| Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
|
| // There aren't any 64-bit integer registers for ARM32.
|
| assert(Type != IceType_i64);
|
|
|