src/IceTargetLoweringX8632.cpp - Issue 362463002: Subzero: lower the rest of the atomic operations.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 362463002: Subzero: lower the rest of the atomic operations. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: comment cleanup Created 6 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('j') | tests_lit/llvm2ice_tests/nacl-atomic-intrinsics.ll » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index fec2d625f544eff323a05acacd436868da414501..71773d70f069a04e45e44a93cc4775b80102d2c1 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -1800,7 +1800,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

switch (Instr->getIntrinsicInfo().ID) {

- case Intrinsics::AtomicCmpxchg:

+ case Intrinsics::AtomicCmpxchg: {

if (!Intrinsics::VerifyMemoryOrder(

llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {

Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");

@@ -1811,9 +1811,18 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");

return;

}

- // TODO(jvoung): fill it in.

- Func->setError("Unhandled intrinsic");

+ Variable *DestPrev = Instr->getDest();

+ Operand *PtrToMem = Instr->getArg(0);

+ Operand *Expected = Instr->getArg(1);

+ Operand *Desired = Instr->getArg(2);

+ lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);

+ // TODO(jvoung): If we peek ahead a few instructions and see how

+ // DestPrev is used (typically via another compare and branch),

+ // we may be able to optimize. If the result truly is used by a

+ // compare + branch, and the comparison is for equality, then we can

+ // optimized out the later compare, and fuse with the later branch.

Jim Stichnoth 2014/07/08 04:50:19 optimized --> optimize

jvoung (off chromium) 2014/07/09 17:07:55 Done.

return;

+ }

case Intrinsics::AtomicFence:

if (!Intrinsics::VerifyMemoryOrder(

llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {

@@ -2008,18 +2017,54 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

+void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,

+ Operand *Expected, Operand *Desired) {

+ if (Expected->getType() == IceType_i64) {

+ // Reserve the pre-colored registers first, before adding any more

+ // infinite-weight variables from FormMemoryOperand's legalization.

+ Variable *T_edx = makeReg(IceType_i32, Reg_edx);

+ Variable *T_eax = makeReg(IceType_i32, Reg_eax);

+ Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);

+ Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);

+ _mov(T_eax, loOperand(Expected));

+ _mov(T_edx, hiOperand(Expected));

+ _mov(T_ebx, loOperand(Desired));

+ _mov(T_ecx, hiOperand(Desired));

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());

+ const bool Locked = true;

+ _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

+ _mov(DestLo, T_eax);

+ _mov(DestHi, T_edx);

+ return;

+ }

+ Variable *T_eax = makeReg(Expected->getType(), Reg_eax);

+ _mov(T_eax, Expected);

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());

+ Variable *DesiredReg = legalizeToVar(Desired);

+ const bool Locked = true;

+ _cmpxchg(Addr, T_eax, DesiredReg, Locked);

+ _mov(DestPrev, T_eax);

void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

Operand *Ptr, Operand *Val) {

+ bool NeedsCmpxchg = false;

+ LowerBinOp Op_Lo = NULL;

+ LowerBinOp Op_Hi = NULL;

switch (Operation) {

default:

Func->setError("Unknown AtomicRMW operation");

return;

case Intrinsics::AtomicAdd: {

if (Dest->getType() == IceType_i64) {

- // Do a nasty cmpxchg8b loop. Factor this into a function.

- // TODO(jvoung): fill it in.

- Func->setError("Unhandled AtomicRMW operation");

- return;

+ // All the fall-through paths must set this to true, but use this

+ // for asserting.

+ NeedsCmpxchg = true;

+ Op_Lo = &TargetX8632::_add;

+ Op_Hi = &TargetX8632::_adc;

+ break;

}

OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());

const bool Locked = true;

@@ -2031,26 +2076,174 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

}

case Intrinsics::AtomicSub: {

if (Dest->getType() == IceType_i64) {

- // Do a nasty cmpxchg8b loop.

- // TODO(jvoung): fill it in.

- Func->setError("Unhandled AtomicRMW operation");

- return;

+ NeedsCmpxchg = true;

+ Op_Lo = &TargetX8632::_sub;

+ Op_Hi = &TargetX8632::_sbb;

+ break;

}

- // Generate a memory operand from Ptr.

- // neg...

- // Then do the same as AtomicAdd.

- // TODO(jvoung): fill it in.

- Func->setError("Unhandled AtomicRMW operation");

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());

+ const bool Locked = true;

+ Variable *T = NULL;

+ _mov(T, Val);

+ _neg(T);

+ _xadd(Addr, T, Locked);

+ _mov(Dest, T);

return;

}

case Intrinsics::AtomicOr:

+ // TODO(jvoung): If Dest is null or dead, then some of these

+ // operations do not need an "exchange", but just a locked op.

+ // That appears to be "worth" it for sub, or, and, and xor.

+ // xadd is probably fine vs lock add for add, and xchg is fine

+ // vs an atomic store.

+ NeedsCmpxchg = true;

+ Op_Lo = &TargetX8632::_or;

+ Op_Hi = &TargetX8632::_or;

+ break;

case Intrinsics::AtomicAnd:

+ NeedsCmpxchg = true;

+ Op_Lo = &TargetX8632::_and;

+ Op_Hi = &TargetX8632::_and;

+ break;

case Intrinsics::AtomicXor:

+ NeedsCmpxchg = true;

+ Op_Lo = &TargetX8632::_xor;

+ Op_Hi = &TargetX8632::_xor;

+ break;

case Intrinsics::AtomicExchange:

- // TODO(jvoung): fill it in.

- Func->setError("Unhandled AtomicRMW operation");

+ if (Dest->getType() == IceType_i64) {

+ NeedsCmpxchg = true;

+ // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values

+ // just need to be moved to the ecx and ebx registers.

+ Op_Lo = NULL;

+ Op_Hi = NULL;

+ break;

+ }

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());

+ Variable *T = NULL;

+ _mov(T, Val);

+ _xchg(Addr, T);

+ _mov(Dest, T);

return;

}

+ // Otherwise, we need a cmpxchg loop.

+ assert(NeedsCmpxchg);

+ expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);

+void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,

+ Variable *Dest, Operand *Ptr,

+ Operand *Val) {

+ // Expand a more complex RMW operation as a cmpxchg loop:

+ // For 64-bit:

+ // mov eax, [ptr]

+ // mov edx, [ptr + 4]

+ // .LABEL:

+ // mov ebx, eax

+ // <Op_Lo> ebx, <desired_adj_lo>

+ // mov ecx, edx

+ // <Op_Hi> ecx, <desired_adj_hi>

+ // lock cmpxchg8b [ptr]

+ // jne .LABEL

+ // mov <dest_lo>, eax

+ // mov <dest_lo>, edx

+ //

+ // For 32-bit:

+ // mov eax, [ptr]

+ // .LABEL:

+ // mov <reg>, eax

+ // op <reg>, [desired_adj]

+ // lock cmpxchg [ptr], <reg>

+ // jne .LABEL

+ // mov <dest>, eax

+ //

+ // If Op_{Lo,Hi} are NULL, then just copy the value.

+ Val = legalize(Val);

+ Type Ty = Val->getType();

+ if (Ty == IceType_i64) {

+ Variable *T_edx = makeReg(IceType_i32, Reg_edx);

+ Variable *T_eax = makeReg(IceType_i32, Reg_eax);

+ // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax,

+ // but that conflicts with the later requirement that we use eax for

+ // cmpxchg. The two infinite weight choices conflict and we end up

+ // choosing eax for both. Work around this for now, but forcing Addr

Jim Stichnoth 2014/07/08 04:50:19 I'd like to understand more about this. It sounds

jvoung (off chromium) 2014/07/08 18:14:07 The problem was actually with O2 register allocati

Jim Stichnoth 2014/07/09 18:14:28 OK, then this is almost certainly a regalloc bug w

jvoung (off chromium) 2014/07/10 23:14:49 Done.

+ // to use a different register. Arbitrarily picking edi instead.

+ bool AllowOverlap = false;

+ Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_edi);

+ OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty);

+ _mov(T_eax, loOperand(Addr));

+ _mov(T_edx, hiOperand(Addr));

+ Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);

+ Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;

+ if (!IsXchg8b) {

+ Context.insert(Label);

+ _mov(T_ebx, T_eax);

+ (this->*Op_Lo)(T_ebx, loOperand(Val));

+ _mov(T_ecx, T_edx);

+ (this->*Op_Hi)(T_ecx, hiOperand(Val));

+ } else {

+ // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.

+ // It just needs the Val loaded into ebx and ecx.

+ // That can also be done before the loop.

+ _mov(T_ebx, loOperand(Val));

+ _mov(T_ecx, hiOperand(Val));

+ Context.insert(Label);

+ }

+ const bool Locked = true;

+ _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

+ _br(InstX8632Br::Br_ne, Label);

+ if (!IsXchg8b) {

+ // If Val is a variable, model the extended live range of Val through

+ // the end of the loop, since it will be re-used by the loop.

+ // Same with the address.

+ if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

+ Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));

+ Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));

+ Context.insert(InstFakeUse::create(Func, ValLo));

+ Context.insert(InstFakeUse::create(Func, ValHi));

+ }

+ Context.insert(InstFakeUse::create(Func, LegalPtr));

+ } else {

+ // For xchg, just need to extend the live range of ebx/ecx.

+ Context.insert(InstFakeUse::create(Func, T_ebx));

+ Context.insert(InstFakeUse::create(Func, T_ecx));

+ }

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ _mov(DestLo, T_eax);

+ _mov(DestHi, T_edx);

+ return;

+ }

+ // FormMemoryOperand legalizes the Ptr to any reg. It usually picks eax,

+ // but that conflicts with the later requirement that we use eax for

+ // cmpxchg. The two infinite weight choices conflict and we end up

+ // choosing eax for both. Work around this for now, but forcing Addr

+ // to use a different register. Arbitrarily picking ecx instead.

+ const bool AllowOverlap = false;

+ Variable *LegalPtr = legalizeToVar(Ptr, AllowOverlap, Reg_ecx);

+ OperandX8632Mem *Addr = FormMemoryOperand(LegalPtr, Ty);

+ Variable *T_eax = makeReg(Ty, Reg_eax);

+ _mov(T_eax, Addr);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ Context.insert(Label);

+ Variable *T = NULL;

+ // We want to pick a different register for T than Eax,

+ // and we also already used ecx for the LegalPtr.

+ _mov(T, T_eax, Reg_edx);

+ (this->*Op_Lo)(T, Val);

+ const bool Locked = true;

+ _cmpxchg(Addr, T_eax, T, Locked);

+ _br(InstX8632Br::Br_ne, Label);

+ // If Val is a variable, model the extended live range of Val through

+ // the end of the loop, since it will be re-used by the loop.

+ // Same with the address.

+ if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

+ Context.insert(InstFakeUse::create(Func, ValVar));

+ }

+ Context.insert(InstFakeUse::create(Func, LegalPtr));

+ _mov(Dest, T_eax);

}

namespace {

@@ -2521,7 +2714,11 @@ void TargetX8632::postLower() {

if (Ctx->getOptLevel() != Opt_m1)

return;

// TODO: Avoid recomputing WhiteList every instruction.

- llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);

+ RegSetMask RegInclude = RegSet_All;

+ RegSetMask RegExclude = RegSet_None | RegSet_StackPointer;

+ if (hasFramePointer())

jvoung (off chromium) 2014/07/07 17:31:16 split out -- but currently here so that my tests p

jvoung (off chromium) 2014/07/09 17:07:55 Done.

+ RegExclude |= RegSet_FramePointer;

+ llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);

// Make one pass to black-list pre-colored registers. TODO: If

// there was some prior register allocation pass that made register

// assignments, those registers need to be black-listed here as