Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 3504 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3515 } | 3515 } |
| 3516 case Intrinsics::Memmove: { | 3516 case Intrinsics::Memmove: { |
| 3517 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); | 3517 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); |
| 3518 Call->addArg(Instr->getArg(0)); | 3518 Call->addArg(Instr->getArg(0)); |
| 3519 Call->addArg(Instr->getArg(1)); | 3519 Call->addArg(Instr->getArg(1)); |
| 3520 Call->addArg(Instr->getArg(2)); | 3520 Call->addArg(Instr->getArg(2)); |
| 3521 lowerCall(Call); | 3521 lowerCall(Call); |
| 3522 return; | 3522 return; |
| 3523 } | 3523 } |
| 3524 case Intrinsics::Memset: { | 3524 case Intrinsics::Memset: { |
| 3525 // The value operand needs to be extended to a stack slot size because the | 3525 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| 3526 // PNaCl ABI requires arguments to be at least 32 bits wide. | |
| 3527 Operand *ValOp = Instr->getArg(1); | |
| 3528 assert(ValOp->getType() == IceType_i8); | |
| 3529 Variable *ValExt = Func->makeVariable(stackSlotType()); | |
| 3530 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | |
| 3531 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | |
| 3532 Call->addArg(Instr->getArg(0)); | |
| 3533 Call->addArg(ValExt); | |
| 3534 Call->addArg(Instr->getArg(2)); | |
| 3535 lowerCall(Call); | |
| 3536 return; | 3526 return; |
| 3537 } | 3527 } |
| 3538 case Intrinsics::NaClReadTP: { | 3528 case Intrinsics::NaClReadTP: { |
| 3539 if (Ctx->getFlags().getUseSandboxing()) { | 3529 if (Ctx->getFlags().getUseSandboxing()) { |
| 3540 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); | 3530 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); |
| 3541 Variable *Dest = Instr->getDest(); | 3531 Variable *Dest = Instr->getDest(); |
| 3542 Variable *T = nullptr; | 3532 Variable *T = nullptr; |
| 3543 _mov(T, Src); | 3533 _mov(T, Src); |
| 3544 _mov(Dest, T); | 3534 _mov(Dest, T); |
| 3545 } else { | 3535 } else { |
| (...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3963 } else { | 3953 } else { |
| 3964 _bsr(T_Dest2, SecondVar); | 3954 _bsr(T_Dest2, SecondVar); |
| 3965 _xor(T_Dest2, ThirtyOne); | 3955 _xor(T_Dest2, ThirtyOne); |
| 3966 } | 3956 } |
| 3967 _test(SecondVar, SecondVar); | 3957 _test(SecondVar, SecondVar); |
| 3968 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3958 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3969 _mov(DestLo, T_Dest2); | 3959 _mov(DestLo, T_Dest2); |
| 3970 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3960 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3971 } | 3961 } |
| 3972 | 3962 |
| 3963 template <class Machine> | |
| 3964 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, | |
| 3965 Operand *Count) { | |
| 3966 constexpr uint32_t UNROLL_LIMIT = 16; | |
| 3967 assert(Val->getType() == IceType_i8); | |
| 3968 | |
| 3969 // Check if the operands are constants | |
| 3970 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | |
| 3971 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); | |
| 3972 const bool IsCountConst = CountConst != nullptr; | |
| 3973 const bool IsValConst = ValConst != nullptr; | |
| 3974 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | |
| 3975 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0; | |
| 3976 | |
| 3977 // Unlikely, but nothing to do if it does happen | |
| 3978 if (IsCountConst && CountValue == 0) | |
| 3979 return; | |
| 3980 | |
| 3981 // TODO(ascull): if const count but reg val could inline too with subregs | |
|
jvoung (off chromium)
2015/08/04 16:50:16
Could you expand on this comment more, in case it'
ascull
2015/08/04 18:01:01
Done.
| |
| 3982 if (IsCountConst && IsValConst) { | |
| 3983 constexpr Variable *Index = nullptr; | |
| 3984 constexpr uint16_t Shift = 0; | |
| 3985 Variable *Base = legalizeToReg(Dest); | |
| 3986 | |
| 3987 // 3 is the awkward size as it is too small for the vector or 32-bit | |
|
jvoung (off chromium)
2015/08/04 16:50:16
lowerleftOvers -> lowerLeftOvers
ascull
2015/08/04 18:01:01
Done.
| |
| 3988 // operations and will not work with lowerleftOvers as there is no valid | |
| 3989 // overlap. | |
| 3990 if (CountValue == 3) { | |
| 3991 Constant *Offset = nullptr; | |
| 3992 auto *Mem = Traits::X86OperandMem::create(Func, IceType_i16, Base, Offset, | |
| 3993 Index, Shift); | |
| 3994 _store(Ctx->getConstantInt16((ValValue << 8) | ValValue), Mem); | |
| 3995 | |
| 3996 Offset = Ctx->getConstantInt(Base->getType(), 2); | |
|
jvoung (off chromium)
2015/08/04 16:50:16
Hmm, getConstantInt() vs getConstantInt32 is to ma
ascull
2015/08/04 18:01:01
I thought I remembered that the size of Base and O
| |
| 3997 Mem = Traits::X86OperandMem::create(Func, IceType_i8, Base, Offset, Index, | |
| 3998 Shift); | |
| 3999 _store(Ctx->getConstantInt8(ValValue), Mem); | |
| 4000 return; | |
| 4001 } | |
| 4002 | |
| 4003 // Lowers the assignment to the remaining bytes. Assumes the original size | |
| 4004 // was large enough to allow for overlaps. | |
| 4005 auto lowerLeftOvers = [this, Base, CountValue]( | |
| 4006 uint32_t Value, uint32_t Size, Variable *VecReg) { | |
| 4007 auto lowerStoreZero = [this, Base, CountValue, Value](Type Ty) { | |
|
jvoung (off chromium)
2015/08/04 16:50:16
Is this more general than lowerStoreZero now? lowe
ascull
2015/08/04 18:01:01
It is more general but I forgot to update the name
| |
| 4008 Constant *Offset = Ctx->getConstantInt( | |
| 4009 Base->getType(), CountValue - typeWidthInBytes(Ty)); | |
| 4010 auto *Mem = | |
| 4011 Traits::X86OperandMem::create(Func, Ty, Base, Offset, Index, Shift); | |
| 4012 _store(Ctx->getConstantInt(Ty, Value), Mem); | |
| 4013 }; | |
| 4014 | |
| 4015 if (Size > 8) { | |
| 4016 assert(VecReg != nullptr); | |
| 4017 Constant *Offset = | |
| 4018 Ctx->getConstantInt(Base->getType(), CountValue - 16); | |
| 4019 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, | |
| 4020 Offset, Index, Shift); | |
| 4021 _storep(VecReg, Mem); | |
| 4022 } else if (Size > 4) { | |
| 4023 assert(VecReg != nullptr); | |
| 4024 Constant *Offset = Ctx->getConstantInt(Base->getType(), CountValue - 8); | |
| 4025 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, | |
| 4026 Offset, Index, Shift); | |
| 4027 _storeq(VecReg, Mem); | |
| 4028 } else if (Size > 2) { | |
| 4029 lowerStoreZero(IceType_i32); | |
| 4030 } else if (Size > 1) { | |
| 4031 lowerStoreZero(IceType_i16); | |
| 4032 } else if (Size == 1) { | |
| 4033 lowerStoreZero(IceType_i8); | |
| 4034 } | |
| 4035 }; | |
| 4036 | |
| 4037 // When the value is zero it can be loaded into a register cheaply using | |
| 4038 // the xor trick. | |
| 4039 if (ValValue == 0 && CountValue >= 8 && CountValue <= 16 * UNROLL_LIMIT) { | |
| 4040 Variable *Zero = makeVectorOfZeros(IceType_v16i8); | |
| 4041 | |
| 4042 // Too small to use large vector operations so use small ones instead | |
| 4043 if (CountValue < 16) { | |
| 4044 Constant *Offset = nullptr; | |
| 4045 auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base, | |
|
jvoung (off chromium)
2015/08/04 16:50:16
Hmm, maybe use IceType_i64 instead of Zero->getTyp
ascull
2015/08/04 18:01:01
Done.
| |
| 4046 Offset, Index, Shift); | |
| 4047 _storeq(Zero, Mem); | |
| 4048 lowerLeftOvers(0, CountValue - 8, Zero); | |
| 4049 return; | |
| 4050 } | |
| 4051 | |
| 4052 assert(CountValue >= 16); | |
| 4053 // Use large vector operations | |
| 4054 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) { | |
| 4055 N -= 16; | |
| 4056 Constant *Offset = Ctx->getConstantInt(Base->getType(), N); | |
| 4057 auto *Mem = Traits::X86OperandMem::create(Func, Zero->getType(), Base, | |
| 4058 Offset, Index, Shift); | |
| 4059 _storep(Zero, Mem); | |
| 4060 } | |
| 4061 uint32_t LeftOver = CountValue & 0xF; | |
| 4062 lowerLeftOvers(0, LeftOver, Zero); | |
| 4063 return; | |
| 4064 } | |
| 4065 | |
| 4066 // TODO(ascull): load val into reg and select eax, ax, al? | |
| 4067 if (CountValue <= 4 * UNROLL_LIMIT) { | |
| 4068 // TODO(ascull); 64-bit can do better with 64-bit mov | |
| 4069 uint32_t SpreadValue = | |
| 4070 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; | |
| 4071 if (CountValue >= 4) { | |
| 4072 Constant *ValueConst = Ctx->getConstantInt32(SpreadValue); | |
| 4073 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) { | |
| 4074 N -= 4; | |
| 4075 Constant *Offset = Ctx->getConstantInt(Base->getType(), N); | |
| 4076 auto *Mem = Traits::X86OperandMem::create(Func, IceType_i32, Base, | |
| 4077 Offset, Index, Shift); | |
| 4078 _store(ValueConst, Mem); | |
| 4079 } | |
| 4080 } | |
| 4081 uint32_t LeftOver = CountValue & 0x3; | |
| 4082 lowerLeftOvers(SpreadValue, LeftOver, nullptr); | |
| 4083 return; | |
| 4084 } | |
| 4085 } | |
| 4086 | |
| 4087 // Fall back on calling the memset function. The value operand needs to be | |
| 4088 // extended to a stack slot size because the PNaCl ABI requires arguments to | |
| 4089 // be at least 32 bits wide. | |
| 4090 Operand *ValExt; | |
| 4091 if (IsValConst) { | |
| 4092 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue); | |
| 4093 } else { | |
| 4094 Variable *ValExtVar = Func->makeVariable(stackSlotType()); | |
| 4095 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); | |
| 4096 ValExt = ValExtVar; | |
| 4097 } | |
| 4098 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | |
| 4099 Call->addArg(Dest); | |
| 4100 Call->addArg(ValExt); | |
| 4101 Call->addArg(Count); | |
| 4102 lowerCall(Call); | |
| 4103 } | |
| 4104 | |
| 3973 inline bool isAdd(const Inst *Inst) { | 4105 inline bool isAdd(const Inst *Inst) { |
| 3974 if (const InstArithmetic *Arith = | 4106 if (const InstArithmetic *Arith = |
| 3975 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 4107 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 3976 return (Arith->getOp() == InstArithmetic::Add); | 4108 return (Arith->getOp() == InstArithmetic::Add); |
| 3977 } | 4109 } |
| 3978 return false; | 4110 return false; |
| 3979 } | 4111 } |
| 3980 | 4112 |
| 3981 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, | 4113 inline void dumpAddressOpt(const Cfg *Func, const Variable *Base, |
| 3982 const Variable *Index, uint16_t Shift, | 4114 const Variable *Index, uint16_t Shift, |
| (...skipping 1633 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5616 } | 5748 } |
| 5617 // the offset is not eligible for blinding or pooling, return the original | 5749 // the offset is not eligible for blinding or pooling, return the original |
| 5618 // mem operand | 5750 // mem operand |
| 5619 return MemOperand; | 5751 return MemOperand; |
| 5620 } | 5752 } |
| 5621 | 5753 |
| 5622 } // end of namespace X86Internal | 5754 } // end of namespace X86Internal |
| 5623 } // end of namespace Ice | 5755 } // end of namespace Ice |
| 5624 | 5756 |
| 5625 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |