Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(262)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1255053008: Inline memset when there is a constant value and count. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 3504 matching lines...) Expand 10 before | Expand all | Expand 10 after
3515 } 3515 }
3516 case Intrinsics::Memmove: { 3516 case Intrinsics::Memmove: {
3517 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); 3517 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
3518 Call->addArg(Instr->getArg(0)); 3518 Call->addArg(Instr->getArg(0));
3519 Call->addArg(Instr->getArg(1)); 3519 Call->addArg(Instr->getArg(1));
3520 Call->addArg(Instr->getArg(2)); 3520 Call->addArg(Instr->getArg(2));
3521 lowerCall(Call); 3521 lowerCall(Call);
3522 return; 3522 return;
3523 } 3523 }
3524 case Intrinsics::Memset: { 3524 case Intrinsics::Memset: {
3525 // The value operand needs to be extended to a stack slot size because the 3525 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2));
3526 // PNaCl ABI requires arguments to be at least 32 bits wide.
3527 Operand *ValOp = Instr->getArg(1);
3528 assert(ValOp->getType() == IceType_i8);
3529 Variable *ValExt = Func->makeVariable(stackSlotType());
3530 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
3531 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3532 Call->addArg(Instr->getArg(0));
3533 Call->addArg(ValExt);
3534 Call->addArg(Instr->getArg(2));
3535 lowerCall(Call);
3536 return; 3526 return;
3537 } 3527 }
3538 case Intrinsics::NaClReadTP: { 3528 case Intrinsics::NaClReadTP: {
3539 if (Ctx->getFlags().getUseSandboxing()) { 3529 if (Ctx->getFlags().getUseSandboxing()) {
3540 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); 3530 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand);
3541 Variable *Dest = Instr->getDest(); 3531 Variable *Dest = Instr->getDest();
3542 Variable *T = nullptr; 3532 Variable *T = nullptr;
3543 _mov(T, Src); 3533 _mov(T, Src);
3544 _mov(Dest, T); 3534 _mov(Dest, T);
3545 } else { 3535 } else {
(...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after
3963 } else { 3953 } else {
3964 _bsr(T_Dest2, SecondVar); 3954 _bsr(T_Dest2, SecondVar);
3965 _xor(T_Dest2, ThirtyOne); 3955 _xor(T_Dest2, ThirtyOne);
3966 } 3956 }
3967 _test(SecondVar, SecondVar); 3957 _test(SecondVar, SecondVar);
3968 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); 3958 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e);
3969 _mov(DestLo, T_Dest2); 3959 _mov(DestLo, T_Dest2);
3970 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 3960 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3971 } 3961 }
3972 3962
3963 template <class Machine>
3964 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val,
3965 Operand *Count) {
3966 constexpr uint32_t UNROLL_LIMIT = 16;
3967 assert(Val->getType() == IceType_i8);
3968
3969 // Check if the operands are constants
3970 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count);
3971 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val);
3972 const bool IsCountConst = CountConst != nullptr;
3973 const bool IsValConst = ValConst != nullptr;
3974 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0;
3975 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0;
3976
3977 // Unlikely, but nothing to do if it does happen
3978 if (IsCountConst && CountValue == 0)
3979 return;
3980
3981 // TODO(ascull): if const count but reg val could inline too with subregs
jvoung (off chromium) 2015/08/04 16:50:16 Could you expand on this comment more, in case it'
ascull 2015/08/04 18:01:01 Done.
3982 if (IsCountConst && IsValConst) {
3983 constexpr Variable *Index = nullptr;
3984 constexpr uint16_t Shift = 0;
3985 Variable *Base = legalizeToReg(Dest);
3986
3987 // 3 is the awkward size as it is too small for the vector or 32-bit
jvoung (off chromium) 2015/08/04 16:50:16 lowerleftOvers -> lowerLeftOvers
ascull 2015/08/04 18:01:01 Done.
3988 // operations and will not work with lowerleftOvers as there is no valid
3989 // overlap.
3990 if (CountValue == 3) {
3991 Constant *Offset = nullptr;
3992 auto *Mem = Traits::X86OperandMem::create(Func, IceType_i16, Base, Offset,
3993 Index, Shift);
3994 _store(Ctx->getConstantInt16((ValValue << 8) | ValValue), Mem);
3995
3996 Offset = Ctx->getConstantInt(Base->getType(), 2);
jvoung (off chromium) 2015/08/04 16:50:16 Hmm, getConstantInt() vs getConstantInt32 is to ma
ascull 2015/08/04 18:01:01 I thought I remembered that the size of Base and O
3997 Mem = Traits::X86OperandMem::create(Func, IceType_i8, Base, Offset, Index,
3998 Shift);
3999 _store(Ctx->getConstantInt8(ValValue), Mem);
4000 return;
4001 }
4002
4003 // Lowers the assignment to the remaining bytes. Assumes the original size
4004 // was large enough to allow for overlaps.
4005 auto lowerLeftOvers = [this, Base, CountValue](
4006 uint32_t Value, uint32_t Size, Variable *VecReg) {
4007 auto lowerStoreZero = [this, Base, CountValue, Value](Type Ty) {
jvoung (off chromium) 2015/08/04 16:50:16 Is this more general than lowerStoreZero now? lowe
ascull 2015/08/04 18:01:01 It is more general but I forgot to update the name
4008 Constant *Offset = Ctx->getConstantInt(
4009 Base->getType(), CountValue - typeWidthInBytes(Ty));
4010 auto *Mem =
4011 Traits::X86OperandMem::create(Func, Ty, Base, Offset, Index, Shift);
4012 _store(Ctx->getConstantInt(Ty, Value), Mem);
4013 };
4014
4015 if (Size > 8) {
4016 assert(VecReg != nullptr);
4017 Constant *Offset =
4018 Ctx->getConstantInt(Base->getType(), CountValue - 16);
4019 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base,
4020 Offset, Index, Shift);
4021 _storep(VecReg, Mem);
4022 } else if (Size > 4) {
4023 assert(VecReg != nullptr);
4024 Constant *Offset = Ctx->getConstantInt(Base->getType(), CountValue - 8);
4025 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base,
4026 Offset, Index, Shift);
4027 _storeq(VecReg, Mem);
4028 } else if (Size > 2) {
4029 lowerStoreZero(IceType_i32);
4030 } else if (Size > 1) {
4031 lowerStoreZero(IceType_i16);
4032 } else if (Size == 1) {
4033 lowerStoreZero(IceType_i8);
4034 }
4035 };
4036
4037 // When the value is zero it can be loaded into a register cheaply using
4038 // the xor trick.
4039 if (ValValue == 0 && CountValue >= 8 && CountValue <= 16 * UNROLL_LIMIT) {
4040 Variable *Zero = makeVectorOfZeros(IceType_v16i8);
4041
4042 // Too small to use large vector operations so use small ones instead
4043 if (CountValue < 16) {
4044 Constant *Offset = nullptr;
4045 auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base,
jvoung (off chromium) 2015/08/04 16:50:16 Hmm, maybe use IceType_i64 instead of Zero->getTyp
ascull 2015/08/04 18:01:01 Done.
4046 Offset, Index, Shift);
4047 _storeq(Zero, Mem);
4048 lowerLeftOvers(0, CountValue - 8, Zero);
4049 return;
4050 }
4051
4052 assert(CountValue >= 16);
4053 // Use large vector operations
4054 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) {
4055 N -= 16;
4056 Constant *Offset = Ctx->getConstantInt(Base->getType(), N);
4057 auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base,
4058 Offset, Index, Shift);
4059 _storep(Zero, Mem);
4060 }
4061 uint32_t LeftOver = CountValue & 0xF;
4062 lowerLeftOvers(0, LeftOver, Zero);
4063 return;
4064 }
4065
4066 // TODO(ascull): load val into reg and select eax, ax, al?
4067 if (CountValue <= 4 * UNROLL_LIMIT) {
4068 // TODO(ascull); 64-bit can do better with 64-bit mov
4069 uint32_t SpreadValue =
4070 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue;
4071 if (CountValue >= 4) {
4072 Constant *ValueConst = Ctx->getConstantInt32(SpreadValue);
4073 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) {
4074 N -= 4;
4075 Constant *Offset = Ctx->getConstantInt(Base->getType(), N);
4076 auto *Mem = Traits::X86OperandMem::create(Func, IceType_i32, Base,
4077 Offset, Index, Shift);
4078 _store(ValueConst, Mem);
4079 }
4080 }
4081 uint32_t LeftOver = CountValue & 0x3;
4082 lowerLeftOvers(SpreadValue, LeftOver, nullptr);
4083 return;
4084 }
4085 }
4086
4087 // Fall back on calling the memset function. The value operand needs to be
4088 // extended to a stack slot size because the PNaCl ABI requires arguments to
4089 // be at least 32 bits wide.
4090 Operand *ValExt;
4091 if (IsValConst) {
4092 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue);
4093 } else {
4094 Variable *ValExtVar = Func->makeVariable(stackSlotType());
4095 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val));
4096 ValExt = ValExtVar;
4097 }
4098 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
4099 Call->addArg(Dest);
4100 Call->addArg(ValExt);
4101 Call->addArg(Count);
4102 lowerCall(Call);
4103 }
4104
3973 inline bool isAdd(const Inst *Inst) { 4105 inline bool isAdd(const Inst *Inst) {
3974 if (const InstArithmetic *Arith = 4106 if (const InstArithmetic *Arith =
3975 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 4107 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3976 return (Arith->getOp() == InstArithmetic::Add); 4108 return (Arith->getOp() == InstArithmetic::Add);
3977 } 4109 }
3978 return false; 4110 return false;
3979 } 4111 }
3980 4112
3981 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, 4113 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,
3982 const Variable *Index, uint16_t Shift, 4114 const Variable *Index, uint16_t Shift,
(...skipping 1633 matching lines...) Expand 10 before | Expand all | Expand 10 after
5616 } 5748 }
5617 // the offset is not eligible for blinding or pooling, return the original 5749 // the offset is not eligible for blinding or pooling, return the original
5618 // mem operand 5750 // mem operand
5619 return MemOperand; 5751 return MemOperand;
5620 } 5752 }
5621 5753
5622 } // end of namespace X86Internal 5754 } // end of namespace X86Internal
5623 } // end of namespace Ice 5755 } // end of namespace Ice
5624 5756
5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/nacl-mem-intrinsics.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698