| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 3528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3539 } | 3539 } |
| 3540 case Intrinsics::Memmove: { | 3540 case Intrinsics::Memmove: { |
| 3541 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); | 3541 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); |
| 3542 Call->addArg(Instr->getArg(0)); | 3542 Call->addArg(Instr->getArg(0)); |
| 3543 Call->addArg(Instr->getArg(1)); | 3543 Call->addArg(Instr->getArg(1)); |
| 3544 Call->addArg(Instr->getArg(2)); | 3544 Call->addArg(Instr->getArg(2)); |
| 3545 lowerCall(Call); | 3545 lowerCall(Call); |
| 3546 return; | 3546 return; |
| 3547 } | 3547 } |
| 3548 case Intrinsics::Memset: { | 3548 case Intrinsics::Memset: { |
| 3549 // The value operand needs to be extended to a stack slot size because the | 3549 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| 3550 // PNaCl ABI requires arguments to be at least 32 bits wide. | |
| 3551 Operand *ValOp = Instr->getArg(1); | |
| 3552 assert(ValOp->getType() == IceType_i8); | |
| 3553 Variable *ValExt = Func->makeVariable(stackSlotType()); | |
| 3554 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | |
| 3555 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | |
| 3556 Call->addArg(Instr->getArg(0)); | |
| 3557 Call->addArg(ValExt); | |
| 3558 Call->addArg(Instr->getArg(2)); | |
| 3559 lowerCall(Call); | |
| 3560 return; | 3550 return; |
| 3561 } | 3551 } |
| 3562 case Intrinsics::NaClReadTP: { | 3552 case Intrinsics::NaClReadTP: { |
| 3563 if (Ctx->getFlags().getUseSandboxing()) { | 3553 if (Ctx->getFlags().getUseSandboxing()) { |
| 3564 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); | 3554 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); |
| 3565 Variable *Dest = Instr->getDest(); | 3555 Variable *Dest = Instr->getDest(); |
| 3566 Variable *T = nullptr; | 3556 Variable *T = nullptr; |
| 3567 _mov(T, Src); | 3557 _mov(T, Src); |
| 3568 _mov(Dest, T); | 3558 _mov(Dest, T); |
| 3569 } else { | 3559 } else { |
| (...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3988 _bsr(T_Dest2, SecondVar); | 3978 _bsr(T_Dest2, SecondVar); |
| 3989 _xor(T_Dest2, ThirtyOne); | 3979 _xor(T_Dest2, ThirtyOne); |
| 3990 } | 3980 } |
| 3991 _test(SecondVar, SecondVar); | 3981 _test(SecondVar, SecondVar); |
| 3992 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3982 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3993 _mov(DestLo, T_Dest2); | 3983 _mov(DestLo, T_Dest2); |
| 3994 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3984 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3995 } | 3985 } |
| 3996 | 3986 |
| 3997 template <class Machine> | 3987 template <class Machine> |
| 3988 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, |
| 3989 Operand *Count) { |
| 3990 constexpr uint32_t UNROLL_LIMIT = 16; |
| 3991 assert(Val->getType() == IceType_i8); |
| 3992 |
| 3993 // Check if the operands are constants |
| 3994 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); |
| 3995 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); |
| 3996 const bool IsCountConst = CountConst != nullptr; |
| 3997 const bool IsValConst = ValConst != nullptr; |
| 3998 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; |
| 3999 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0; |
| 4000 |
| 4001 // Unlikely, but nothing to do if it does happen |
| 4002 if (IsCountConst && CountValue == 0) |
| 4003 return; |
| 4004 |
| 4005 // TODO(ascull): if the count is constant but val is not it would be possible |
| 4006 // to inline by spreading the value across 4 bytes and accessing subregs e.g. |
| 4007 // eax, ax and al. |
| 4008 if (IsCountConst && IsValConst) { |
| 4009 Variable *Base = legalizeToReg(Dest); |
| 4010 |
| 4011 // 3 is the awkward size as it is too small for the vector or 32-bit |
| 4012 // operations and will not work with lowerLeftOvers as there is no valid |
| 4013 // overlap. |
| 4014 if (CountValue == 3) { |
| 4015 Constant *Offset = nullptr; |
| 4016 auto *Mem = |
| 4017 Traits::X86OperandMem::create(Func, IceType_i16, Base, Offset); |
| 4018 _store(Ctx->getConstantInt16((ValValue << 8) | ValValue), Mem); |
| 4019 |
| 4020 Offset = Ctx->getConstantInt8(2); |
| 4021 Mem = Traits::X86OperandMem::create(Func, IceType_i8, Base, Offset); |
| 4022 _store(Ctx->getConstantInt8(ValValue), Mem); |
| 4023 return; |
| 4024 } |
| 4025 |
| 4026 // Lowers the assignment to the remaining bytes. Assumes the original size |
| 4027 // was large enough to allow for overlaps. |
| 4028 auto lowerLeftOvers = [this, Base, CountValue]( |
| 4029 uint32_t SpreadValue, uint32_t Size, Variable *VecReg) { |
| 4030 auto lowerStoreSpreadValue = |
| 4031 [this, Base, CountValue, SpreadValue](Type Ty) { |
| 4032 Constant *Offset = |
| 4033 Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty)); |
| 4034 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); |
| 4035 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); |
| 4036 }; |
| 4037 |
| 4038 if (Size > 8) { |
| 4039 assert(VecReg != nullptr); |
| 4040 Constant *Offset = Ctx->getConstantInt32(CountValue - 16); |
| 4041 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, |
| 4042 Offset); |
| 4043 _storep(VecReg, Mem); |
| 4044 } else if (Size > 4) { |
| 4045 assert(VecReg != nullptr); |
| 4046 Constant *Offset = Ctx->getConstantInt32(CountValue - 8); |
| 4047 auto *Mem = |
| 4048 Traits::X86OperandMem::create(Func, IceType_i64, Base, Offset); |
| 4049 _storeq(VecReg, Mem); |
| 4050 } else if (Size > 2) { |
| 4051 lowerStoreSpreadValue(IceType_i32); |
| 4052 } else if (Size > 1) { |
| 4053 lowerStoreSpreadValue(IceType_i16); |
| 4054 } else if (Size == 1) { |
| 4055 lowerStoreSpreadValue(IceType_i8); |
| 4056 } |
| 4057 }; |
| 4058 |
| 4059 // When the value is zero it can be loaded into a register cheaply using |
| 4060 // the xor trick. |
| 4061 constexpr uint32_t BytesPerStorep = 16; |
| 4062 if (ValValue == 0 && CountValue >= 8 && |
| 4063 CountValue <= BytesPerStorep * UNROLL_LIMIT) { |
| 4064 Variable *Zero = makeVectorOfZeros(IceType_v16i8); |
| 4065 |
| 4066 // Too small to use large vector operations so use small ones instead |
| 4067 if (CountValue < 16) { |
| 4068 Constant *Offset = nullptr; |
| 4069 auto *Mem = |
| 4070 Traits::X86OperandMem::create(Func, IceType_i64, Base, Offset); |
| 4071 _storeq(Zero, Mem); |
| 4072 lowerLeftOvers(0, CountValue - 8, Zero); |
| 4073 return; |
| 4074 } |
| 4075 |
| 4076 assert(CountValue >= 16); |
| 4077 // Use large vector operations |
| 4078 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) { |
| 4079 N -= 16; |
| 4080 Constant *Offset = Ctx->getConstantInt32(N); |
| 4081 auto *Mem = |
| 4082 Traits::X86OperandMem::create(Func, Zero->getType(), Base, Offset); |
| 4083 _storep(Zero, Mem); |
| 4084 } |
| 4085 uint32_t LeftOver = CountValue & 0xF; |
| 4086 lowerLeftOvers(0, LeftOver, Zero); |
| 4087 return; |
| 4088 } |
| 4089 |
| 4090 // TODO(ascull): load val into reg and select subregs e.g. eax, ax, al? |
| 4091 constexpr uint32_t BytesPerStore = 4; |
| 4092 if (CountValue <= BytesPerStore * UNROLL_LIMIT) { |
| 4093 // TODO(ascull); 64-bit can do better with 64-bit mov |
| 4094 uint32_t SpreadValue = |
| 4095 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; |
| 4096 if (CountValue >= 4) { |
| 4097 Constant *ValueConst = Ctx->getConstantInt32(SpreadValue); |
| 4098 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) { |
| 4099 N -= 4; |
| 4100 Constant *Offset = Ctx->getConstantInt32(N); |
| 4101 auto *Mem = |
| 4102 Traits::X86OperandMem::create(Func, IceType_i32, Base, Offset); |
| 4103 _store(ValueConst, Mem); |
| 4104 } |
| 4105 } |
| 4106 uint32_t LeftOver = CountValue & 0x3; |
| 4107 lowerLeftOvers(SpreadValue, LeftOver, nullptr); |
| 4108 return; |
| 4109 } |
| 4110 } |
| 4111 |
| 4112 // Fall back on calling the memset function. The value operand needs to be |
| 4113 // extended to a stack slot size because the PNaCl ABI requires arguments to |
| 4114 // be at least 32 bits wide. |
| 4115 Operand *ValExt; |
| 4116 if (IsValConst) { |
| 4117 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue); |
| 4118 } else { |
| 4119 Variable *ValExtVar = Func->makeVariable(stackSlotType()); |
| 4120 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); |
| 4121 ValExt = ValExtVar; |
| 4122 } |
| 4123 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
| 4124 Call->addArg(Dest); |
| 4125 Call->addArg(ValExt); |
| 4126 Call->addArg(Count); |
| 4127 lowerCall(Call); |
| 4128 } |
| 4129 |
| 4130 template <class Machine> |
| 3998 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { | 4131 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { |
| 3999 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 4132 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 4000 if (NeedSandboxing) { | 4133 if (NeedSandboxing) { |
| 4001 _bundle_lock(); | 4134 _bundle_lock(); |
| 4002 const SizeT BundleSize = | 4135 const SizeT BundleSize = |
| 4003 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 4136 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 4004 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); | 4137 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 4005 } | 4138 } |
| 4006 _jmp(Target); | 4139 _jmp(Target); |
| 4007 if (NeedSandboxing) | 4140 if (NeedSandboxing) |
| (...skipping 1464 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5472 } | 5605 } |
| 5473 // the offset is not eligible for blinding or pooling, return the original | 5606 // the offset is not eligible for blinding or pooling, return the original |
| 5474 // mem operand | 5607 // mem operand |
| 5475 return MemOperand; | 5608 return MemOperand; |
| 5476 } | 5609 } |
| 5477 | 5610 |
| 5478 } // end of namespace X86Internal | 5611 } // end of namespace X86Internal |
| 5479 } // end of namespace Ice | 5612 } // end of namespace Ice |
| 5480 | 5613 |
| 5481 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5614 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |