src/IceTargetLoweringX86BaseImpl.h - Issue 1592033002: Merge lowerCall and lowerRet between x86 and x64

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1592033002: Merge lowerCall and lowerRet between x86 and x64 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Code review comments Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 1078 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1089 Variable *T = makeReg(IceType_i32);	1089 Variable *T = makeReg(IceType_i32);

1090 auto *CalculateOperand = X86OperandMem::create(	1090 auto *CalculateOperand = X86OperandMem::create(

1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));	1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));

1092 _lea(T, CalculateOperand);	1092 _lea(T, CalculateOperand);

1093 _mov(Dest, T);	1093 _mov(Dest, T);

1094 } else {	1094 } else {

1095 _mov(Dest, esp);	1095 _mov(Dest, esp);

1096 }	1096 }

1097 }	1097 }

1098	1098

	1099 template <typename TraitsType>

	1100 void TargetX86Base<TraitsType>::lowerArguments() {

	1101 VarList &Args = Func->getArgs();

	1102 unsigned NumXmmArgs = 0;

	1103 bool XmmSlotsRemain = true;

	1104 unsigned NumGprArgs = 0;

	1105 bool GprSlotsRemain = true;

	1106

	1107 Context.init(Func->getEntryNode());

	1108 Context.setInsertPoint(Context.getCur());

	1109

	1110 for (SizeT i = 0, End = Args.size();

	1111 i < End && (XmmSlotsRemain \|\| GprSlotsRemain); ++i) {

	1112 Variable *Arg = Args[i];

	1113 Type Ty = Arg->getType();

	1114 Variable *RegisterArg = nullptr;

	1115 int32_t RegNum = Variable::NoRegister;

	1116 if (isVectorType(Ty)) {

	1117 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);

	1118 if (RegNum == Variable::NoRegister) {

	1119 XmmSlotsRemain = false;

	1120 continue;

	1121 }

	1122 ++NumXmmArgs;

	1123 RegisterArg = Func->makeVariable(Ty);

	1124 } else if (isScalarFloatingType(Ty)) {

	1125 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {

	1126 continue;

	1127 }

	1128 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);

	1129 if (RegNum == Variable::NoRegister) {

	1130 XmmSlotsRemain = false;

	1131 continue;

	1132 }

	1133 ++NumXmmArgs;

	1134 RegisterArg = Func->makeVariable(Ty);

	1135 } else if (isScalarIntegerType(Ty)) {

	1136 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);

	1137 if (RegNum == Variable::NoRegister) {

	1138 GprSlotsRemain = false;

	1139 continue;

	1140 }

	1141 ++NumGprArgs;

	1142 RegisterArg = Func->makeVariable(Ty);

	1143 }

	1144 assert(RegNum != Variable::NoRegister);

	1145 assert(RegisterArg != nullptr);

	1146 // Replace Arg in the argument list with the home register. Then generate

	1147 // an instruction in the prolog to copy the home register to the assigned

	1148 // location of Arg.

	1149 if (BuildDefs::dump())

	1150 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

	1151 RegisterArg->setRegNum(RegNum);

	1152 RegisterArg->setIsArg();

	1153 Arg->setIsArg(false);

	1154

	1155 Args[i] = RegisterArg;

	1156 Context.insert<InstAssign>(Arg, RegisterArg);

	1157 }

	1158 }

	1159

1099 /// Strength-reduce scalar integer multiplication by a constant (for i32 or	1160 /// Strength-reduce scalar integer multiplication by a constant (for i32 or

1100 /// narrower) for certain constants. The lea instruction can be used to multiply	1161 /// narrower) for certain constants. The lea instruction can be used to multiply

1101 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of	1162 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of

1102 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2	1163 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2

1103 /// lea-based multiplies by 5, combined with left-shifting by 2.	1164 /// lea-based multiplies by 5, combined with left-shifting by 2.

1104 template <typename TraitsType>	1165 template <typename TraitsType>

1105 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable Dest, Operand Src0,	1166 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable Dest, Operand Src0,

1106 int32_t Src1) {	1167 int32_t Src1) {

1107 // Disable this optimization for Om1 and O0, just to keep things simple	1168 // Disable this optimization for Om1 and O0, just to keep things simple

1108 // there.	1169 // there.

(...skipping 912 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2021 return;	2082 return;

2022 }	2083 }

2023 }	2084 }

2024 }	2085 }

2025 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);	2086 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);

2026 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2087 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2027 _cmp(Src0, Zero);	2088 _cmp(Src0, Zero);

2028 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());	2089 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());

2029 }	2090 }

2030	2091

	2092 namespace {
	John 2016/01/20 00:04:34 Anonymous namespaces are dangerous in .h files. Ma Anonymous namespaces are dangerous in .h files. Maybe eliminate the namespace, and Mark the function inline? sehr 2016/01/20 16:07:10 Done. Show quoted text On 2016/01/20 00:04:34, John wrote: > Anonymous namespaces are dangerous in .h files. Maybe eliminate the namespace, > and Mark the function inline? Done.
	2093 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining

	2094 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.

	2095 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }

	2096 //

	2097 } // end of anonymous namespace

	2098

	2099 template <typename TraitsType>

	2100 void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {

	2101 // Common x86 calling convention lowering:

	2102 //

	2103 // * At the point before the call, the stack must be aligned to 16 bytes.

	2104 //

	2105 // * Non-register arguments are pushed onto the stack in right-to-left order,

	2106 // such that the left-most argument ends up on the top of the stack at the

	2107 // lowest memory address.

	2108 //

	2109 // * Stack arguments of vector type are aligned to start at the next highest

	2110 // multiple of 16 bytes. Other stack arguments are aligned to the next word

	2111 // size boundary (4 or 8 bytes, respectively).

	2112 NeedsStackAlignment = true;

	2113

	2114 using OperandList =

	2115 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,

	2116 Traits::X86_MAX_GPR_ARGS)>;

	2117 OperandList XmmArgs;

	2118 CfgVector<std::pair<const Type, Operand *>> GprArgs;

	2119 OperandList StackArgs, StackArgLocations;

	2120 uint32_t ParameterAreaSizeBytes = 0;

	2121

	2122 // Classify each argument operand according to the location where the argument

	2123 // is passed.

	2124 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

	2125 Operand *Arg = Instr->getArg(i);

	2126 const Type Ty = Arg->getType();

	2127 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

	2128 assert(typeWidthInBytes(Ty) >= 4);

	2129 if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) !=

	2130 Variable::NoRegister)) {

	2131 XmmArgs.push_back(Arg);

	2132 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&

	2133 (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) {

	2134 XmmArgs.push_back(Arg);

	2135 } else if (isScalarIntegerType(Ty) &&

	2136 (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) !=

	2137 Variable::NoRegister)) {

	2138 GprArgs.emplace_back(Ty, Arg);

	2139 } else {

	2140 // Place on stack.

	2141 StackArgs.push_back(Arg);

	2142 if (isVectorType(Arg->getType())) {

	2143 ParameterAreaSizeBytes =

	2144 Traits::applyStackAlignment(ParameterAreaSizeBytes);

	2145 }

	2146 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);

	2147 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

	2148 StackArgLocations.push_back(

	2149 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

	2150 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

	2151 }

	2152 }

	2153 // Ensure there is enough space for the fstp/movs for floating returns.

	2154 Variable *Dest = Instr->getDest();

	2155 const Type DestTy = Dest ? Dest->getType() : IceType_void;

	2156 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {

	2157 if (isScalarFloatingType(DestTy)) {

	2158 ParameterAreaSizeBytes =

	2159 std::max(static_cast<size_t>(ParameterAreaSizeBytes),

	2160 typeWidthInBytesOnStack(DestTy));

	2161 }

	2162 }

	2163 // Adjust the parameter area so that the stack is aligned. It is assumed that

	2164 // the stack is already aligned at the start of the calling sequence.

	2165 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

	2166 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());

	2167 // Copy arguments that are passed on the stack to the appropriate stack

	2168 // locations.

	2169 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {

	2170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

	2171 }

	2172 // Copy arguments to be passed in registers to the appropriate registers.

	2173 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

	2174 Variable *Reg =

	2175 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));

	2176 // Generate a FakeUse of register arguments so that they do not get dead

	2177 // code eliminated as a result of the FakeKill of scratch registers after

	2178 // the call.

	2179 Context.insert<InstFakeUse>(Reg);

	2180 }

	2181 // Materialize moves for arguments passed in GPRs.

	2182 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {

	2183 const Type SignatureTy = GprArgs[i].first;

	2184 Operand *Arg = GprArgs[i].second;

	2185 Variable *Reg =

	2186 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));

	2187 assert(SignatureTy == IceType_i64 \|\| SignatureTy == IceType_i32);

	2188 assert(SignatureTy == Arg->getType());

	2189 (void)SignatureTy;

	2190 Context.insert<InstFakeUse>(Reg);

	2191 }

	2192 // Generate the call instruction. Assign its result to a temporary with high

	2193 // register allocation weight.

	2194 // ReturnReg doubles as ReturnRegLo as necessary.

	2195 Variable *ReturnReg = nullptr;

	2196 Variable *ReturnRegHi = nullptr;

	2197 if (Dest) {

	2198 switch (DestTy) {

	2199 case IceType_NUM:

	2200 case IceType_void:

	2201 case IceType_i1:

	2202 case IceType_i8:

	2203 case IceType_i16:

	2204 llvm::report_fatal_error("Invalid Call dest type");

	2205 break;

	2206 case IceType_i32:

	2207 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);

	2208 break;

	2209 case IceType_i64:

	2210 if (Traits::Is64Bit) {

	2211 ReturnReg = makeReg(

	2212 IceType_i64,

	2213 Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax));

	2214 } else {

	2215 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

	2216 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

	2217 }

	2218 break;

	2219 case IceType_f32:

	2220 case IceType_f64:

	2221 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {

	2222 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

	2223 // the fstp instruction.

	2224 break;

	2225 }

	2226 // Fallthrough intended.

	2227 case IceType_v4i1:

	2228 case IceType_v8i1:

	2229 case IceType_v16i1:

	2230 case IceType_v16i8:

	2231 case IceType_v8i16:

	2232 case IceType_v4i32:

	2233 case IceType_v4f32:

	2234 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);

	2235 break;

	2236 }

	2237 }

	2238 // Emit the call to the function.

	2239 Operand *CallTarget =

	2240 legalize(Instr->getCallTarget(), Legal_Reg \| Legal_Imm \| Legal_AddrAbs);

	2241 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);

	2242 // Keep the upper return register live on 32-bit platform.

	2243 if (ReturnRegHi)

	2244 Context.insert<InstFakeDef>(ReturnRegHi);

	2245 // Mark the call as killing all the caller-saves registers.
	Jim Stichnoth 2016/01/19 23:55:45 I would write "caller-save" for consistency with o I would write "caller-save" for consistency with other places. sehr 2016/01/20 16:07:10 Done. Show quoted text On 2016/01/19 23:55:45, stichnot wrote: > I would write "caller-save" for consistency with other places. Done.
	2246 Context.insert<InstFakeKill>(NewCall);

	2247 // Handle x86-32 floating point returns.

	2248 if (Dest != nullptr && isScalarFloatingType(Dest->getType()) &&

	2249 !Traits::X86_PASS_SCALAR_FP_IN_XMM) {

	2250 // Special treatment for an FP function which returns its result in st(0).

	2251 // If Dest ends up being a physical xmm register, the fstp emit code will

	2252 // route st(0) through the space reserved in the function argument area

	2253 // we allocated.

	2254 _fstp(Dest);

	2255 // Create a fake use of Dest in case it actually isn't used, because st(0)

	2256 // still needs to be popped.

	2257 Context.insert<InstFakeUse>(Dest);

	2258 }

	2259 // Generate a FakeUse to keep the call live if necessary.

	2260 if (Instr->hasSideEffects() && ReturnReg) {

	2261 Context.insert<InstFakeUse>(ReturnReg);

	2262 }

	2263 // Process the return value, if any.

	2264 if (Dest == nullptr)

	2265 return;

	2266 // Assign the result of the call to Dest.

	2267 if (isVectorType(DestTy)) {

	2268 assert(ReturnReg && "Vector type requires a return register");

	2269 _movp(Dest, ReturnReg);

	2270 } else if (isScalarFloatingType(DestTy)) {

	2271 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {

	2272 assert(ReturnReg && "FP type requires a return register");

	2273 _mov(Dest, ReturnReg);

	2274 }

	2275 } else {

	2276 assert(isScalarIntegerType(DestTy));

	2277 assert(ReturnReg && "Integer type requires a return register");

	2278 if (DestTy == IceType_i64 && !Traits::Is64Bit) {

	2279 assert(ReturnRegHi && "64-bit type requires two return registers");

	2280 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);

	2281 Variable *DestLo = Dest64On32->getLo();

	2282 Variable *DestHi = Dest64On32->getHi();

	2283 _mov(DestLo, ReturnReg);

	2284 _mov(DestHi, ReturnRegHi);

	2285 } else {

	2286 _mov(Dest, ReturnReg);

	2287 }

	2288 }

	2289 }

	2290

2031 template <typename TraitsType>	2291 template <typename TraitsType>

2032 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {	2292 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {

2033 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)	2293 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

2034 InstCast::OpKind CastKind = Inst->getCastKind();	2294 InstCast::OpKind CastKind = Inst->getCastKind();

2035 Variable *Dest = Inst->getDest();	2295 Variable *Dest = Inst->getDest();

2036 Type DestTy = Dest->getType();	2296 Type DestTy = Dest->getType();

2037 switch (CastKind) {	2297 switch (CastKind) {

2038 default:	2298 default:

2039 Func->setError("Cast type not supported");	2299 Func->setError("Cast type not supported");

2040 return;	2300 return;

(...skipping 2773 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4814 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));	5074 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));

4815 }	5075 }

4816 }	5076 }

4817	5077

4818 template <typename TraitsType>	5078 template <typename TraitsType>

4819 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /Inst/) {	5079 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /Inst/) {

4820 Func->setError("Phi found in regular instruction list");	5080 Func->setError("Phi found in regular instruction list");

4821 }	5081 }

4822	5082

4823 template <typename TraitsType>	5083 template <typename TraitsType>

	5084 void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) {

	5085 Variable *Reg = nullptr;

	5086 if (Inst->hasRetValue()) {

	5087 Operand *RetValue = legalize(Inst->getRetValue());

	5088 const Type ReturnType = RetValue->getType();

	5089 assert(isVectorType(ReturnType) \|\| isScalarFloatingType(ReturnType) \|\|

	5090 (ReturnType == IceType_i32) \|\| (ReturnType == IceType_i64));

	5091 Reg = moveReturnValueToRegister(RetValue, ReturnType);

	5092 }

	5093 // Add a ret instruction even if sandboxing is enabled, because addEpilog

	5094 // explicitly looks for a ret instruction as a marker for where to insert the

	5095 // frame removal instructions.

	5096 _ret(Reg);

	5097 // Add a fake use of esp to make sure esp stays alive for the entire

	5098 // function. Otherwise post-call esp adjustments get dead-code eliminated.

	5099 keepEspLiveAtExit();

	5100 }

	5101

	5102 template <typename TraitsType>

4824 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {	5103 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {

4825 Variable *Dest = Select->getDest();	5104 Variable *Dest = Select->getDest();

4826	5105

4827 if (isVectorType(Dest->getType())) {	5106 if (isVectorType(Dest->getType())) {

4828 lowerSelectVector(Select);	5107 lowerSelectVector(Select);

4829 return;	5108 return;

4830 }	5109 }

4831	5110

4832 Operand *Condition = Select->getCondition();	5111 Operand *Condition = Select->getCondition();

4833 // Handle folding opportunities.	5112 // Handle folding opportunities.

(...skipping 1786 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6620 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),	6899 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),

6621 MemOperand->getShift(), MemOperand->getSegmentRegister());	6900 MemOperand->getShift(), MemOperand->getSegmentRegister());

6622 return NewMemOperand;	6901 return NewMemOperand;

6623 }	6902 }

6624 }	6903 }

6625 }	6904 }

6626 } // end of namespace X86NAMESPACE	6905 } // end of namespace X86NAMESPACE

6627 } // end of namespace Ice	6906 } // end of namespace Ice

6628	6907

6629 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	6908 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »