Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 3528 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3539 } | 3539 } |
| 3540 case Intrinsics::Memmove: { | 3540 case Intrinsics::Memmove: { |
| 3541 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); | 3541 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3); |
| 3542 Call->addArg(Instr->getArg(0)); | 3542 Call->addArg(Instr->getArg(0)); |
| 3543 Call->addArg(Instr->getArg(1)); | 3543 Call->addArg(Instr->getArg(1)); |
| 3544 Call->addArg(Instr->getArg(2)); | 3544 Call->addArg(Instr->getArg(2)); |
| 3545 lowerCall(Call); | 3545 lowerCall(Call); |
| 3546 return; | 3546 return; |
| 3547 } | 3547 } |
| 3548 case Intrinsics::Memset: { | 3548 case Intrinsics::Memset: { |
| 3549 // The value operand needs to be extended to a stack slot size because the | 3549 lowerMemset(Instr->getArg(0), Instr->getArg(1), Instr->getArg(2)); |
| 3550 // PNaCl ABI requires arguments to be at least 32 bits wide. | |
| 3551 Operand *ValOp = Instr->getArg(1); | |
| 3552 assert(ValOp->getType() == IceType_i8); | |
| 3553 Variable *ValExt = Func->makeVariable(stackSlotType()); | |
| 3554 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | |
| 3555 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | |
| 3556 Call->addArg(Instr->getArg(0)); | |
| 3557 Call->addArg(ValExt); | |
| 3558 Call->addArg(Instr->getArg(2)); | |
| 3559 lowerCall(Call); | |
| 3560 return; | 3550 return; |
| 3561 } | 3551 } |
| 3562 case Intrinsics::NaClReadTP: { | 3552 case Intrinsics::NaClReadTP: { |
| 3563 if (Ctx->getFlags().getUseSandboxing()) { | 3553 if (Ctx->getFlags().getUseSandboxing()) { |
| 3564 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); | 3554 Operand *Src = dispatchToConcrete(&Machine::createNaClReadTPSrcOperand); |
| 3565 Variable *Dest = Instr->getDest(); | 3555 Variable *Dest = Instr->getDest(); |
| 3566 Variable *T = nullptr; | 3556 Variable *T = nullptr; |
| 3567 _mov(T, Src); | 3557 _mov(T, Src); |
| 3568 _mov(Dest, T); | 3558 _mov(Dest, T); |
| 3569 } else { | 3559 } else { |
| (...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3988 _bsr(T_Dest2, SecondVar); | 3978 _bsr(T_Dest2, SecondVar); |
| 3989 _xor(T_Dest2, ThirtyOne); | 3979 _xor(T_Dest2, ThirtyOne); |
| 3990 } | 3980 } |
| 3991 _test(SecondVar, SecondVar); | 3981 _test(SecondVar, SecondVar); |
| 3992 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); | 3982 _cmov(T_Dest2, T_Dest, Traits::Cond::Br_e); |
| 3993 _mov(DestLo, T_Dest2); | 3983 _mov(DestLo, T_Dest2); |
| 3994 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3984 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| 3995 } | 3985 } |
| 3996 | 3986 |
| 3997 template <class Machine> | 3987 template <class Machine> |
| 3988 void TargetX86Base<Machine>::lowerMemset(Operand *Dest, Operand *Val, | |
| 3989 Operand *Count) { | |
| 3990 constexpr uint32_t UNROLL_LIMIT = 16; | |
| 3991 assert(Val->getType() == IceType_i8); | |
| 3992 | |
| 3993 // Check if the operands are constants | |
| 3994 const auto *CountConst = llvm::dyn_cast<const ConstantInteger32>(Count); | |
| 3995 const auto *ValConst = llvm::dyn_cast<const ConstantInteger32>(Val); | |
| 3996 const bool IsCountConst = CountConst != nullptr; | |
| 3997 const bool IsValConst = ValConst != nullptr; | |
| 3998 const uint32_t CountValue = IsCountConst ? CountConst->getValue() : 0; | |
| 3999 const uint32_t ValValue = IsValConst ? ValConst->getValue() : 0; | |
| 4000 | |
| 4001 // Unlikely, but nothing to do if it does happen | |
| 4002 if (IsCountConst && CountValue == 0) | |
| 4003 return; | |
| 4004 | |
| 4005 // TODO(ascull): if the count is constant but val is not it would be possible | |
| 4006 // to inline by spreading the value across 4 bytes and accessing subregs e.g. | |
| 4007 // eax, ax and al. | |
| 4008 if (IsCountConst && IsValConst) { | |
| 4009 Variable *Base = legalizeToReg(Dest); | |
| 4010 | |
| 4011 // 3 is the awkward size as it is too small for the vector or 32-bit | |
| 4012 // operations and will not work with lowerLeftOvers as there is no valid | |
| 4013 // overlap. | |
| 4014 if (CountValue == 3) { | |
| 4015 Constant *Offset = nullptr; | |
| 4016 auto *Mem = | |
| 4017 Traits::X86OperandMem::create(Func, IceType_i16, Base, Offset); | |
| 4018 _store(Ctx->getConstantInt16((ValValue << 8) | ValValue), Mem); | |
| 4019 | |
| 4020 Offset = Ctx->getConstantInt8(2); | |
| 4021 Mem = Traits::X86OperandMem::create(Func, IceType_i8, Base, Offset); | |
| 4022 _store(Ctx->getConstantInt8(ValValue), Mem); | |
| 4023 return; | |
| 4024 } | |
| 4025 | |
| 4026 // Lowers the assignment to the remaining bytes. Assumes the original size | |
| 4027 // was large enough to allow for overlaps. | |
| 4028 auto lowerLeftOvers = [this, Base, CountValue]( | |
| 4029 uint32_t SpreadValue, uint32_t Size, Variable *VecReg) { | |
| 4030 auto lowerStoreSpreadValue = | |
| 4031 [this, Base, CountValue, SpreadValue](Type Ty) { | |
| 4032 Constant *Offset = | |
| 4033 Ctx->getConstantInt32(CountValue - typeWidthInBytes(Ty)); | |
| 4034 auto *Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); | |
| 4035 _store(Ctx->getConstantInt(Ty, SpreadValue), Mem); | |
| 4036 }; | |
| 4037 | |
| 4038 if (Size > 8) { | |
| 4039 assert(VecReg != nullptr); | |
| 4040 Constant *Offset = Ctx->getConstantInt32(CountValue - 16); | |
| 4041 auto *Mem = Traits::X86OperandMem::create(Func, VecReg->getType(), Base, | |
| 4042 Offset); | |
| 4043 _storep(VecReg, Mem); | |
| 4044 } else if (Size > 4) { | |
| 4045 assert(VecReg != nullptr); | |
| 4046 Constant *Offset = Ctx->getConstantInt32(CountValue - 8); | |
| 4047 auto *Mem = | |
| 4048 Traits::X86OperandMem::create(Func, IceType_i64, Base, Offset); | |
| 4049 _storeq(VecReg, Mem); | |
| 4050 } else if (Size > 2) { | |
| 4051 lowerStoreSpreadValue(IceType_i32); | |
| 4052 } else if (Size > 1) { | |
| 4053 lowerStoreSpreadValue(IceType_i16); | |
| 4054 } else if (Size == 1) { | |
| 4055 lowerStoreSpreadValue(IceType_i8); | |
| 4056 } | |
| 4057 }; | |
| 4058 | |
| 4059 // When the value is zero it can be loaded into a register cheaply using | |
| 4060 // the xor trick. | |
| 4061 if (ValValue == 0 && CountValue >= 8 && CountValue <= 16 * UNROLL_LIMIT) { | |
|
jvoung (off chromium)
2015/08/04 21:17:36
Would it make sense/look good to "document" the "1
| |
| 4062 Variable *Zero = makeVectorOfZeros(IceType_v16i8); | |
| 4063 | |
| 4064 // Too small to use large vector operations so use small ones instead | |
| 4065 if (CountValue < 16) { | |
| 4066 Constant *Offset = nullptr; | |
| 4067 auto *Mem = | |
| 4068 Traits::X86OperandMem::create(Func, IceType_i64, Base, Offset); | |
| 4069 _storeq(Zero, Mem); | |
| 4070 lowerLeftOvers(0, CountValue - 8, Zero); | |
| 4071 return; | |
| 4072 } | |
| 4073 | |
| 4074 assert(CountValue >= 16); | |
| 4075 // Use large vector operations | |
| 4076 for (uint32_t N = CountValue & 0xFFFFFFF0; N != 0;) { | |
| 4077 N -= 16; | |
| 4078 Constant *Offset = Ctx->getConstantInt32(N); | |
| 4079 auto *Mem = | |
| 4080 Traits::X86OperandMem::create(Func, Zero->getType(), Base, Offset); | |
| 4081 _storep(Zero, Mem); | |
| 4082 } | |
| 4083 uint32_t LeftOver = CountValue & 0xF; | |
| 4084 lowerLeftOvers(0, LeftOver, Zero); | |
| 4085 return; | |
| 4086 } | |
| 4087 | |
| 4088 // TODO(ascull): load val into reg and select subregs e.g. eax, ax, al? | |
| 4089 if (CountValue <= 4 * UNROLL_LIMIT) { | |
| 4090 // TODO(ascull); 64-bit can do better with 64-bit mov | |
| 4091 uint32_t SpreadValue = | |
| 4092 (ValValue << 24) | (ValValue << 16) | (ValValue << 8) | ValValue; | |
| 4093 if (CountValue >= 4) { | |
| 4094 Constant *ValueConst = Ctx->getConstantInt32(SpreadValue); | |
| 4095 for (uint32_t N = CountValue & 0xFFFFFFFC; N != 0;) { | |
| 4096 N -= 4; | |
| 4097 Constant *Offset = Ctx->getConstantInt32(N); | |
| 4098 auto *Mem = | |
| 4099 Traits::X86OperandMem::create(Func, IceType_i32, Base, Offset); | |
| 4100 _store(ValueConst, Mem); | |
| 4101 } | |
| 4102 } | |
| 4103 uint32_t LeftOver = CountValue & 0x3; | |
| 4104 lowerLeftOvers(SpreadValue, LeftOver, nullptr); | |
| 4105 return; | |
| 4106 } | |
| 4107 } | |
| 4108 | |
| 4109 // Fall back on calling the memset function. The value operand needs to be | |
| 4110 // extended to a stack slot size because the PNaCl ABI requires arguments to | |
| 4111 // be at least 32 bits wide. | |
| 4112 Operand *ValExt; | |
| 4113 if (IsValConst) { | |
| 4114 ValExt = Ctx->getConstantInt(stackSlotType(), ValValue); | |
| 4115 } else { | |
| 4116 Variable *ValExtVar = Func->makeVariable(stackSlotType()); | |
| 4117 lowerCast(InstCast::create(Func, InstCast::Zext, ValExtVar, Val)); | |
| 4118 ValExt = ValExtVar; | |
| 4119 } | |
| 4120 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); | |
| 4121 Call->addArg(Dest); | |
| 4122 Call->addArg(ValExt); | |
| 4123 Call->addArg(Count); | |
| 4124 lowerCall(Call); | |
| 4125 } | |
| 4126 | |
| 4127 template <class Machine> | |
| 3998 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { | 4128 void TargetX86Base<Machine>::lowerIndirectJump(Variable *Target) { |
| 3999 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 4129 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| 4000 if (NeedSandboxing) { | 4130 if (NeedSandboxing) { |
| 4001 _bundle_lock(); | 4131 _bundle_lock(); |
| 4002 const SizeT BundleSize = | 4132 const SizeT BundleSize = |
| 4003 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); | 4133 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| 4004 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); | 4134 _and(Target, Ctx->getConstantInt32(~(BundleSize - 1))); |
| 4005 } | 4135 } |
| 4006 _jmp(Target); | 4136 _jmp(Target); |
| 4007 if (NeedSandboxing) | 4137 if (NeedSandboxing) |
| (...skipping 1464 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5472 } | 5602 } |
| 5473 // the offset is not eligible for blinding or pooling, return the original | 5603 // the offset is not eligible for blinding or pooling, return the original |
| 5474 // mem operand | 5604 // mem operand |
| 5475 return MemOperand; | 5605 return MemOperand; |
| 5476 } | 5606 } |
| 5477 | 5607 |
| 5478 } // end of namespace X86Internal | 5608 } // end of namespace X86Internal |
| 5479 } // end of namespace Ice | 5609 } // end of namespace Ice |
| 5480 | 5610 |
| 5481 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 5611 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |