Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 77048b089c1a4c9934568f8dd973429138e36f9c..2d6163b51b3ce1a411f73cbcaed7c2cae4b255a1 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -3522,17 +3522,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall( |
| return; |
| } |
| case Intrinsics::Memset: { |
| - // The value operand needs to be extended to a stack slot size because the |
| - // PNaCl ABI requires arguments to be at least 32 bits wide. |
| - Operand *ValOp = Instr->getArg(1); |
| - assert(ValOp->getType() == IceType_i8); |
| - Variable *ValExt = Func->makeVariable(stackSlotType()); |
| - lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); |
| - InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
| - Call->addArg(Instr->getArg(0)); |
| - Call->addArg(ValExt); |
| - Call->addArg(Instr->getArg(2)); |
| - lowerCall(Call); |
| + lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| return; |
| } |
| case Intrinsics::NaClReadTP: { |
| @@ -3970,6 +3960,148 @@ void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| } |
| +template <class Machine> |
| +void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, |
| + Operand *Count) { |
| + constexpr uint32_t UNROLL_LIMIT = 16; |
| + assert(Val->getType() == IceType_i8); |
| + |
| + // Check if the operands are constants |
| + const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
| + const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); |
| + const bool IsCountConst = CountConst != nullptr; |
| + const bool IsValConst = ValConst != nullptr; |
| + const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
| + const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0; |
| + |
| + // Unlikely, but nothing to do if it does happen |
| + if (IsCountConst && CountValue == 0) |
| + return; |
| + |
| + // TODO(ascull): if const count but reg val could inline too with subregs |
|
jvoung (off chromium)
2015/08/04 16:50:16
Could you expand on this comment more, in case it'
ascull
2015/08/04 18:01:01
Done.
|
| + if (IsCountConst && IsValConst) { |
| + constexpr Variable *Index = nullptr; |
| + constexpr uint16_t Shift = 0; |
| + Variable *Base = legalizeToReg(Dest); |
| + |
| + // 3 is the awkward size as it is too small for the vector or 32-bit |
|
jvoung (off chromium)
2015/08/04 16:50:16
lowerleftOvers -> lowerLeftOvers
ascull
2015/08/04 18:01:01
Done.
|
| + // operations and will not work with lowerleftOvers as there is no valid |
| + // overlap. |
| + if (CountValue == 3) { |
| + Constant *Offset = nullptr; |
| + auto *Mem = Traits::X86OperandMem::create(Func, IceType_i16, Base, Offset, |
| + Index, Shift); |
| + _store(Ctx->getConstantInt16((ValValue << 8) | ValValue), Mem); |
| + |
| + Offset = Ctx->getConstantInt(Base->getType(), 2); |
|
jvoung (off chromium)
2015/08/04 16:50:16
Hmm, getConstantInt() vs getConstantInt32 is to ma
ascull
2015/08/04 18:01:01
I thought I remembered that the size of Base and O
|
| + Mem = Traits::X86OperandMem::create(Func, IceType_i8, Base, Offset, Index, |
| + Shift); |
| + _store(Ctx->getConstantInt8(ValValue), Mem); |
| + return; |
| + } |
| + |
| + // Lowers the assignment to the remaining bytes. Assumes the original size |
| + // was large enough to allow for overlaps. |
| + auto lowerLeftOvers = [this, Base, CountValue]( |
| + uint32_t Value, uint32_t Size, Variable *VecReg) { |
| + auto lowerStoreZero = [this, Base, CountValue, Value](Type Ty) { |
|
jvoung (off chromium)
2015/08/04 16:50:16
Is this more general than lowerStoreZero now? lowe
ascull
2015/08/04 18:01:01
It is more general but I forgot to update the name
|
| + Constant *Offset = Ctx->getConstantInt( |
| + Base->getType(), CountValue - typeWidthInBytes(Ty)); |
| + auto *Mem = |
| + Traits::X86OperandMem::create(Func, Ty, Base, Offset, Index, Shift); |
| + _store(Ctx->getConstantInt(Ty, Value), Mem); |
| + }; |
| + |
| + if (Size > 8) { |
| + assert(VecReg != nullptr); |
| + Constant *Offset = |
| + Ctx->getConstantInt(Base->getType(), CountValue - 16); |
| + auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, |
| + Offset, Index, Shift); |
| + _storep(VecReg, Mem); |
| + } else if (Size > 4) { |
| + assert(VecReg != nullptr); |
| + Constant *Offset = Ctx->getConstantInt(Base->getType(), CountValue - 8); |
| + auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, |
| + Offset, Index, Shift); |
| + _storeq(VecReg, Mem); |
| + } else if (Size > 2) { |
| + lowerStoreZero(IceType_i32); |
| + } else if (Size > 1) { |
| + lowerStoreZero(IceType_i16); |
| + } else if (Size == 1) { |
| + lowerStoreZero(IceType_i8); |
| + } |
| + }; |
| + |
| + // When the value is zero it can be loaded into a register cheaply using |
| + // the xor trick. |
| + if (ValValue == 0 && CountValue >= 8 && CountValue <= 16 * UNROLL_LIMIT) { |
| + Variable *Zero = makeVectorOfZeros(IceType_v16i8); |
| + |
| + // Too small to use large vector operations so use small ones instead |
| + if (CountValue < 16) { |
| + Constant *Offset = nullptr; |
| + auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base, |
|
jvoung (off chromium)
2015/08/04 16:50:16
Hmm, maybe use IceType_i64 instead of Zero->getTyp
ascull
2015/08/04 18:01:01
Done.
|
| + Offset, Index, Shift); |
| + _storeq(Zero, Mem); |
| + lowerLeftOvers(0, CountValue - 8, Zero); |
| + return; |
| + } |
| + |
| + assert(CountValue >= 16); |
| + // Use large vector operations |
| + for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) { |
| + N -= 16; |
| + Constant *Offset = Ctx->getConstantInt(Base->getType(), N); |
| + auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base, |
| + Offset, Index, Shift); |
| + _storep(Zero, Mem); |
| + } |
| + uint32_t LeftOver = CountValue & 0xF; |
| + lowerLeftOvers(0, LeftOver, Zero); |
| + return; |
| + } |
| + |
| + // TODO(ascull): load val into reg and select eax, ax, al? |
| + if (CountValue <= 4 * UNROLL_LIMIT) { |
| + // TODO(ascull); 64-bit can do better with 64-bit mov |
| + uint32_t SpreadValue = |
| + (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; |
| + if (CountValue >= 4) { |
| + Constant *ValueConst = Ctx->getConstantInt32(SpreadValue); |
| + for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) { |
| + N -= 4; |
| + Constant *Offset = Ctx->getConstantInt(Base->getType(), N); |
| + auto *Mem = Traits::X86OperandMem::create(Func, IceType_i32, Base, |
| + Offset, Index, Shift); |
| + _store(ValueConst, Mem); |
| + } |
| + } |
| + uint32_t LeftOver = CountValue & 0x3; |
| + lowerLeftOvers(SpreadValue, LeftOver, nullptr); |
| + return; |
| + } |
| + } |
| + |
| + // Fall back on calling the memset function. The value operand needs to be |
| + // extended to a stack slot size because the PNaCl ABI requires arguments to |
| + // be at least 32 bits wide. |
| + Operand *ValExt; |
| + if (IsValConst) { |
| + ValExt = Ctx->getConstantInt(stackSlotType(), ValValue); |
| + } else { |
| + Variable *ValExtVar = Func->makeVariable(stackSlotType()); |
| + lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); |
| + ValExt = ValExtVar; |
| + } |
| + InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
| + Call->addArg(Dest); |
| + Call->addArg(ValExt); |
| + Call->addArg(Count); |
| + lowerCall(Call); |
| +} |
| + |
| inline bool isAdd(const Inst *Inst) { |
| if (const InstArithmetic *Arith = |
| llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |