src/IceTargetLoweringX8632.cpp - Issue 342763004: Add atomic load/store, fetch_add, fence, and is-lock-free lowering.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 342763004: Add atomic load/store, fetch_add, fence, and is-lock-free lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: change comment Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index af7b866d3696aa7a8a6c501cb3de738f208f4805..ef9bc22f525087d8ac85bd66b4824871ba58fbab 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -431,7 +431,6 @@ void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,

InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

}

-// static

Type TargetX8632::stackSlotType() { return IceType_i32; }

void TargetX8632::addProlog(CfgNode *Node) {

@@ -1615,7 +1614,7 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

Spill->setWeight(RegWeight::Zero);

Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);

- _mov(Spill, Src0RM);

+ _movq(Spill, Src0RM);

Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

@@ -1658,7 +1657,7 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

_store(T_Lo, SpillLo);

_mov(T_Hi, hiOperand(Src0));

_store(T_Hi, SpillHi);

- _mov(Dest, Spill);

+ _movq(Dest, Spill);

} break;

}

break;

@@ -1800,16 +1799,140 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

switch (Instr->getIntrinsicInfo().ID) {

case Intrinsics::AtomicCmpxchg:

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {

+ Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");

+ return;

+ }

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {

+ Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");

+ return;

+ }

+ // TODO(jvoung): fill it in.

+ Func->setError("Unhandled intrinsic");

+ return;

case Intrinsics::AtomicFence:

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {

+ Func->setError("Unexpected memory ordering for AtomicFence");

+ return;

+ }

+ _mfence();

+ return;

case Intrinsics::AtomicFenceAll:

- case Intrinsics::AtomicIsLockFree:

- case Intrinsics::AtomicLoad:

+ // NOTE: FenceAll should prevent and load/store from being moved

+ // across the fence (both atomic and non-atomic). The InstX8632Mfence

+ // instruction is currently marked coarsely as "HasSideEffects".

+ _mfence();

+ return;

+ case Intrinsics::AtomicIsLockFree: {

+ // X86 is always lock free for 8/16/32/64 bit accesses.

+ // TODO(jvoung): Since the result is constant when given a constant

+ // byte size, this opens up DCE opportunities.

+ Operand *ByteSize = Instr->getArg(0);

+ Variable *Dest = Instr->getDest();

+ if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) {

+ Constant *Result;

+ switch (CI->getValue()) {

+ default:

+ // Some x86-64 processors support the cmpxchg16b intruction, which

+ // can make 16-byte operations lock free (when used with the LOCK

+ // prefix). However, that's not supported in 32-bit mode, so just

+ // return 0 even for large sizes.

+ Result = Ctx->getConstantZero(IceType_i32);

+ break;

+ case 1:

+ case 2:

+ case 4:

+ case 8:

+ Result = Ctx->getConstantInt(IceType_i32, 1);

+ break;

+ }

+ _mov(Dest, Result);

+ return;

+ }

+ // The PNaCl ABI requires the byte size to be a compile-time constant.

+ Func->setError("AtomicIsLockFree byte size should be compile-time const");

+ return;

+ }

+ case Intrinsics::AtomicLoad: {

+ // We require the memory address to be naturally aligned.

+ // Given that is the case, then normal loads are atomic.

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) {

+ Func->setError("Unexpected memory ordering for AtomicLoad");

+ return;

+ }

+ Variable *Dest = Instr->getDest();

+ if (Dest->getType() == IceType_i64) {

+ // Follow what GCC does and use a movq instead of what lowerLoad()

+ // normally does (split the load into two).

+ // Thus, this skips load/arithmetic op folding. Load/arithmetic folding

+ // can't happen anyway, since this is x86-32 and integer arithmetic only

+ // happens on 32-bit quantities.

+ Variable *T = makeReg(IceType_f64);

+ OperandX8632Mem *Addr = FormMemoryOperand(Instr->getArg(0), IceType_f64);

+ _movq(T, Addr);

+ // Then cast the bits back out of the XMM register to the i64 Dest.

+ InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);

+ lowerCast(Cast);

+ // Make sure that the atomic load isn't elided.

+ Context.insert(InstFakeUse::create(Func, Dest->getLo()));

+ Context.insert(InstFakeUse::create(Func, Dest->getHi()));

+ return;

+ }

+ InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));

+ lowerLoad(Load);

+ // Make sure the atomic load isn't elided.

+ Context.insert(InstFakeUse::create(Func, Dest));

+ return;

+ }

case Intrinsics::AtomicRMW:

- case Intrinsics::AtomicStore:

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {

+ Func->setError("Unexpected memory ordering for AtomicRMW");

+ return;

+ }

+ lowerAtomicRMW(Instr->getDest(),

+ static_cast<uint32_t>(llvm::cast<ConstantInteger>(

+ Instr->getArg(0))->getValue()),

+ Instr->getArg(1), Instr->getArg(2));

+ return;

+ case Intrinsics::AtomicStore: {

+ if (!Intrinsics::VerifyMemoryOrder(

+ llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) {

+ Func->setError("Unexpected memory ordering for AtomicStore");

+ return;

+ }

+ // We require the memory address to be naturally aligned.

+ // Given that is the case, then normal stores are atomic.

+ // Add a fence after the store to make it visible.

+ Operand *Value = Instr->getArg(0);

+ Operand *Ptr = Instr->getArg(1);

+ if (Value->getType() == IceType_i64) {

+ // Use a movq instead of what lowerStore() normally does

+ // (split the store into two), following what GCC does.

+ // Cast the bits from int -> to an xmm register first.

+ Variable *T = makeReg(IceType_f64);

+ InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);

+ lowerCast(Cast);

+ // Then store XMM w/ a movq.

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, IceType_f64);

+ _storeq(T, Addr);

+ _mfence();

+ return;

+ }

+ InstStore *Store = InstStore::create(Func, Value, Ptr);

+ lowerStore(Store);

+ _mfence();

+ return;

+ }

case Intrinsics::Bswap:

case Intrinsics::Ctlz:

case Intrinsics::Ctpop:

case Intrinsics::Cttz:

+ // TODO(jvoung): fill it in.

Func->setError("Unhandled intrinsic");

return;

case Intrinsics::Longjmp: {

@@ -1817,7 +1940,7 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Call->addArg(Instr->getArg(0));

Call->addArg(Instr->getArg(1));

lowerCall(Call);

- break;

+ return;

}

case Intrinsics::Memcpy: {

// In the future, we could potentially emit an inline memcpy/memset, etc.

@@ -1827,7 +1950,7 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Call->addArg(Instr->getArg(1));

Call->addArg(Instr->getArg(2));

lowerCall(Call);

- break;

+ return;

}

case Intrinsics::Memmove: {

InstCall *Call = makeHelperCall("memmove", NULL, 3);

@@ -1835,7 +1958,7 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Call->addArg(Instr->getArg(1));

Call->addArg(Instr->getArg(2));

lowerCall(Call);

- break;

+ return;

}

case Intrinsics::Memset: {

// The value operand needs to be extended to a stack slot size

@@ -1849,32 +1972,33 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Call->addArg(ValExt);

Call->addArg(Instr->getArg(2));

lowerCall(Call);

- break;

+ return;

}

case Intrinsics::NaClReadTP: {

- Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ Constant *Zero = Ctx->getConstantZero(IceType_i32);

Operand *Src = OperandX8632Mem::create(Func, IceType_i32, NULL, Zero, NULL,

0, OperandX8632Mem::SegReg_GS);

Variable *Dest = Instr->getDest();

Variable *T = NULL;

_mov(T, Src);

_mov(Dest, T);

- break;

+ return;

}

case Intrinsics::Setjmp: {

InstCall *Call = makeHelperCall("setjmp", Instr->getDest(), 1);

Call->addArg(Instr->getArg(0));

lowerCall(Call);

- break;

+ return;

}

case Intrinsics::Sqrt:

case Intrinsics::Stacksave:

case Intrinsics::Stackrestore:

+ // TODO(jvoung): fill it in.

Func->setError("Unhandled intrinsic");

return;

case Intrinsics::Trap:

_ud2();

- break;

+ return;

case Intrinsics::UnknownIntrinsic:

Func->setError("Should not be lowering UnknownIntrinsic");

return;

@@ -1882,6 +2006,51 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

+void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

+ Operand *Ptr, Operand *Val) {

+ switch (Operation) {

+ default:

+ Func->setError("Unknown AtomicRMW operation");

+ return;

+ case Intrinsics::AtomicAdd: {

+ if (Dest->getType() == IceType_i64) {

+ // Do a nasty cmpxchg8b loop. Factor this into a function.

+ // TODO(jvoung): fill it in.

+ Func->setError("Unhandled AtomicRMW operation");

+ return;

+ }

+ OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Dest->getType());

+ const bool Locked = true;

+ Variable *T = NULL;

+ _mov(T, Val);

+ _xadd(Addr, T, Locked);

+ _mov(Dest, T);

+ return;

+ }

+ case Intrinsics::AtomicSub: {

+ if (Dest->getType() == IceType_i64) {

+ // Do a nasty cmpxchg8b loop.

+ // TODO(jvoung): fill it in.

+ Func->setError("Unhandled AtomicRMW operation");

+ return;

+ }

+ // Generate a memory operand from Ptr.

+ // neg...

+ // Then do the same as AtomicAdd.

+ // TODO(jvoung): fill it in.

+ Func->setError("Unhandled AtomicRMW operation");

+ return;

+ }

+ case Intrinsics::AtomicOr:

+ case Intrinsics::AtomicAnd:

+ case Intrinsics::AtomicXor:

+ case Intrinsics::AtomicExchange:

+ // TODO(jvoung): fill it in.

+ Func->setError("Unhandled AtomicRMW operation");

+ return;

+ }

namespace {

bool isAdd(const Inst *Inst) {

@@ -2018,15 +2187,7 @@ void TargetX8632::lowerLoad(const InstLoad *Inst) {

// optimization already creates an OperandX8632Mem operand, so it

// doesn't need another level of transformation.

Type Ty = Inst->getDest()->getType();

- Operand *Src0 = Inst->getSourceAddress();

- // Address mode optimization already creates an OperandX8632Mem

- // operand, so it doesn't need another level of transformation.

- if (!llvm::isa<OperandX8632Mem>(Src0)) {

- Variable *Base = llvm::dyn_cast<Variable>(Src0);

- Constant *Offset = llvm::dyn_cast<Constant>(Src0);

- assert(Base || Offset);

- Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);

- }

+ Operand *Src0 = FormMemoryOperand(Inst->getSourceAddress(), Ty);

// Fuse this load with a subsequent Arithmetic instruction in the

// following situations:

@@ -2034,6 +2195,8 @@ void TargetX8632::lowerLoad(const InstLoad *Inst) {

// a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true

// TODO: Clean up and test thoroughly.

+ // (E.g., if there is an mfence-all make sure the load ends up on the

+ // same side of the fence).

// TODO: Why limit to Arithmetic instructions? This could probably be

// applied to most any instruction type. Look at all source operands

@@ -2164,19 +2327,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {

void TargetX8632::lowerStore(const InstStore *Inst) {

Operand *Value = Inst->getData();

Operand *Addr = Inst->getAddr();

- OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);

- // Address mode optimization already creates an OperandX8632Mem

- // operand, so it doesn't need another level of transformation.

- if (!NewAddr) {

- // The address will be either a constant (which represents a global

- // variable) or a variable, so either the Base or Offset component

- // of the OperandX8632Mem will be set.

- Variable *Base = llvm::dyn_cast<Variable>(Addr);

- Constant *Offset = llvm::dyn_cast<Constant>(Addr);

- assert(Base || Offset);

- NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);

- }

- NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));

+ OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());

if (NewAddr->getType() == IceType_i64) {

Value = legalize(Value);

@@ -2294,10 +2445,11 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,

// need to go in uninitialized registers.

From = Ctx->getConstantZero(From->getType());

}

- bool NeedsReg = !(Allowed & Legal_Imm) ||

+ bool NeedsReg =

+ !(Allowed & Legal_Imm) ||

// ConstantFloat and ConstantDouble are actually memory operands.

- (!(Allowed & Legal_Mem) && (From->getType() == IceType_f32 ||

- From->getType() == IceType_f64));

+ (!(Allowed & Legal_Mem) &&

+ (From->getType() == IceType_f32 || From->getType() == IceType_f64));

if (NeedsReg) {

Variable *Reg = makeReg(From->getType(), RegNum);

_mov(Reg, From);

@@ -2330,6 +2482,20 @@ Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,

return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));

}

+OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {

+ OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);

+ // It may be the case that address mode optimization already creates

+ // an OperandX8632Mem, so in that case it wouldn't need another level

+ // of transformation.

+ if (!Mem) {

+ Variable *Base = llvm::dyn_cast<Variable>(Operand);

+ Constant *Offset = llvm::dyn_cast<Constant>(Operand);

+ assert(Base || Offset);

+ Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);

+ }

+ return llvm::cast<OperandX8632Mem>(legalize(Mem));

Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

// There aren't any 64-bit integer registers for x86-32.

assert(Type != IceType_i64);

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/llvm2ice.cpp » ('j') | no next file with comments »