Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(493)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1592033002: Merge lowerCall and lowerRet between x86 and x64 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Changed to use Variable::NoRegister Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after
1089 Variable *T = makeReg(IceType_i32); 1089 Variable *T = makeReg(IceType_i32);
1090 auto *CalculateOperand = X86OperandMem::create( 1090 auto *CalculateOperand = X86OperandMem::create(
1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); 1091 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
1092 _lea(T, CalculateOperand); 1092 _lea(T, CalculateOperand);
1093 _mov(Dest, T); 1093 _mov(Dest, T);
1094 } else { 1094 } else {
1095 _mov(Dest, esp); 1095 _mov(Dest, esp);
1096 } 1096 }
1097 } 1097 }
1098 1098
1099 template <typename TraitsType>
1100 void TargetX86Base<TraitsType>::lowerArguments() {
1101 VarList &Args = Func->getArgs();
1102 unsigned NumXmmArgs = 0;
1103 bool XmmSlotsRemain = true;
1104 unsigned NumGprArgs = 0;
1105 bool GprSlotsRemain = true;
1106
1107 Context.init(Func->getEntryNode());
1108 Context.setInsertPoint(Context.getCur());
1109
1110 for (SizeT i = 0, End = Args.size();
1111 i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) {
1112 Variable *Arg = Args[i];
1113 Type Ty = Arg->getType();
1114 Variable *RegisterArg = nullptr;
1115 int32_t RegNum = Variable::NoRegister;
1116 if (isVectorType(Ty)) {
1117 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
1118 if (RegNum == Variable::NoRegister) {
1119 XmmSlotsRemain = false;
1120 continue;
1121 }
1122 ++NumXmmArgs;
1123 RegisterArg = Func->makeVariable(Ty);
1124 } else if (isScalarFloatingType(Ty)) {
1125 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
1126 continue;
1127 }
1128 RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs);
1129 if (RegNum == Variable::NoRegister) {
1130 XmmSlotsRemain = false;
1131 continue;
1132 }
1133 ++NumXmmArgs;
1134 RegisterArg = Func->makeVariable(Ty);
1135 } else if (isScalarIntegerType(Ty)) {
1136 RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs);
1137 if (RegNum == Variable::NoRegister) {
1138 GprSlotsRemain = false;
1139 continue;
1140 }
1141 ++NumGprArgs;
1142 RegisterArg = Func->makeVariable(Ty);
1143 }
1144 assert(RegNum != Variable::NoRegister);
1145 assert(RegisterArg != nullptr);
1146 // Replace Arg in the argument list with the home register. Then generate
1147 // an instruction in the prolog to copy the home register to the assigned
1148 // location of Arg.
1149 if (BuildDefs::dump())
1150 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
1151 RegisterArg->setRegNum(RegNum);
1152 RegisterArg->setIsArg();
1153 Arg->setIsArg(false);
1154
1155 Args[i] = RegisterArg;
1156 Context.insert<InstAssign>(Arg, RegisterArg);
1157 }
1158 }
1159
1099 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1160 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1100 /// narrower) for certain constants. The lea instruction can be used to multiply 1161 /// narrower) for certain constants. The lea instruction can be used to multiply
1101 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1162 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1102 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1163 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1103 /// lea-based multiplies by 5, combined with left-shifting by 2. 1164 /// lea-based multiplies by 5, combined with left-shifting by 2.
1104 template <typename TraitsType> 1165 template <typename TraitsType>
1105 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1166 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1106 int32_t Src1) { 1167 int32_t Src1) {
1107 // Disable this optimization for Om1 and O0, just to keep things simple 1168 // Disable this optimization for Om1 and O0, just to keep things simple
1108 // there. 1169 // there.
(...skipping 912 matching lines...) Expand 10 before | Expand all | Expand 10 after
2021 return; 2082 return;
2022 } 2083 }
2023 } 2084 }
2024 } 2085 }
2025 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2086 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2026 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2087 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2027 _cmp(Src0, Zero); 2088 _cmp(Src0, Zero);
2028 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); 2089 _br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse());
2029 } 2090 }
2030 2091
2092 namespace {
2093 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
2094 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.
2095 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
2096 //
2097 } // end of anonymous namespace
2098
2099 template <typename TraitsType>
2100 void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
2101 // Common x86 calling convention lowering:
2102 //
2103 // * At the point before the call, the stack must be aligned to 16 bytes.
2104 //
2105 // * Non-register arguments are pushed onto the stack in right-to-left order,
2106 // such that the left-most argument ends up on the top of the stack at the
2107 // lowest memory address.
2108 //
2109 // * Stack arguments of vector type are aligned to start at the next highest
2110 // multiple of 16 bytes. Other stack arguments are aligned to the next word
2111 // size boundary (4 or 8 bytes, respectively).
2112 NeedsStackAlignment = true;
2113
2114 using OperandList =
2115 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
2116 Traits::X86_MAX_GPR_ARGS)>;
2117 OperandList XmmArgs;
2118 CfgVector<std::pair<const Type, Operand *>> GprArgs;
2119 OperandList StackArgs, StackArgLocations;
2120 int32_t ParameterAreaSizeBytes = 0;
Jim Stichnoth 2016/01/19 20:54:06 Make this uint32_t instead.
sehr 2016/01/19 21:47:35 Done.
2121
2122 // Classify each argument operand according to the location where the
Jim Stichnoth 2016/01/19 20:54:05 reflow to 80-col
sehr 2016/01/19 21:47:35 Done.
2123 // argument is passed.
2124 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2125 Operand *Arg = Instr->getArg(i);
2126 Type Ty = Arg->getType();
Jim Stichnoth 2016/01/19 20:54:06 const
sehr 2016/01/19 21:47:35 Done.
2127 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2128 assert(typeWidthInBytes(Ty) >= 4);
2129 if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) !=
2130 Variable::NoRegister)) {
2131 XmmArgs.push_back(Arg);
2132 } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM &&
2133 (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) {
2134 XmmArgs.push_back(Arg);
2135 } else if (isScalarIntegerType(Ty) &&
2136 (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) !=
2137 Variable::NoRegister)) {
2138 GprArgs.emplace_back(Ty, Arg);
2139 } else {
2140 // Place on stack.
2141 StackArgs.push_back(Arg);
2142 if (isVectorType(Arg->getType())) {
2143 ParameterAreaSizeBytes =
2144 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2145 }
2146 Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType);
2147 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2148 StackArgLocations.push_back(
2149 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2150 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2151 }
2152 }
2153 // Ensure there is enough space for the fstp/movs for floating returns.
2154 Variable *Dest = Instr->getDest();
Jim Stichnoth 2016/01/19 20:54:06 Add something like: const Type DestTy = Dest ? D
sehr 2016/01/19 21:47:35 Done.
2155 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2156 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
2157 ParameterAreaSizeBytes =
2158 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
2159 typeWidthInBytesOnStack(Dest->getType()));
2160 }
2161 }
2162 // Adjust the parameter area so that the stack is aligned. It is assumed that
2163 // the stack is already aligned at the start of the calling sequence.
2164 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2165 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
Jim Stichnoth 2016/01/19 20:54:06 Remove this static_cast, assuming ParameterAreaSiz
sehr 2016/01/19 21:47:35 Done.
2166 maxOutArgsSizeBytes());
2167 // Copy arguments that are passed on the stack to the appropriate stack
2168 // locations.
2169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
Jim Stichnoth 2016/01/19 20:54:06 Use NumStackArgs instead of "e", for consistency w
sehr 2016/01/19 21:47:35 Done.
2170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
2171 }
2172 // Copy arguments to be passed in registers to the appropriate registers.
2173 // TODO: Investigate the impact of lowering arguments passed in registers
Jim Stichnoth 2016/01/19 20:54:06 I think this TODO should just be removed entirely.
sehr 2016/01/19 21:47:35 Done.
2174 // after lowering stack arguments as opposed to the other way around.
2175 // Lowering register arguments after stack arguments may reduce register
2176 // pressure. On the other hand, lowering register arguments first (before
2177 // stack arguments) may result in more compact code, as the memory operand
2178 // displacements may end up being smaller before any stack adjustment is
2179 // done.
2180 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
2181 Variable *Reg =
2182 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
2183 // Generate a FakeUse of register arguments so that they do not get dead
2184 // code eliminated as a result of the FakeKill of scratch registers after
2185 // the call.
2186 Context.insert<InstFakeUse>(Reg);
2187 }
2188 // Materialize moves for arguments passed in GPRs.
2189 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
2190 const Type SignatureTy = GprArgs[i].first;
2191 Operand *Arg = GprArgs[i].second;
2192 Variable *Reg =
2193 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
2194 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
2195 assert(SignatureTy == Arg->getType());
2196 (void)SignatureTy;
2197 Context.insert<InstFakeUse>(Reg);
2198 }
2199 // Generate the call instruction. Assign its result to a temporary with high
2200 // register allocation weight.
2201 // ReturnReg doubles as ReturnRegLo as necessary.
2202 Variable *ReturnReg = nullptr;
2203 Variable *ReturnRegHi = nullptr;
2204 if (Dest) {
2205 const Type DestTy = Dest->getType();
2206 switch (DestTy) {
2207 case IceType_NUM:
2208 case IceType_void:
2209 case IceType_i1:
2210 case IceType_i8:
2211 case IceType_i16:
2212 llvm::report_fatal_error("Invalid Call dest type");
2213 break;
2214 case IceType_i32:
2215 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax);
2216 break;
2217 case IceType_i64:
2218 if (Traits::Is64Bit) {
2219 ReturnReg = makeReg(
2220 IceType_i64,
2221 Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax));
2222 } else {
2223 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2224 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2225 }
2226 break;
2227 case IceType_f32:
2228 case IceType_f64:
2229 if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2230 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2231 // the fstp instruction.
2232 break;
2233 }
2234 // Fallthrough intended.
2235 case IceType_v4i1:
2236 case IceType_v8i1:
2237 case IceType_v16i1:
2238 case IceType_v16i8:
2239 case IceType_v8i16:
2240 case IceType_v4i32:
2241 case IceType_v4f32:
2242 ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0);
2243 break;
2244 }
2245 }
2246 // Emit the call to the function.
2247 Operand *CallTarget =
2248 legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs);
2249 Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg);
2250 // Keep the upper return register live on 32-bit platform.
2251 if (ReturnRegHi)
2252 Context.insert<InstFakeDef>(ReturnRegHi);
2253 // Mark the call as killing all the callee-saves registers.
Jim Stichnoth 2016/01/19 20:54:06 caller-save
sehr 2016/01/19 21:47:35 Duh. Fixed.
2254 Context.insert<InstFakeKill>(NewCall);
2255 // Handle x86-32 floating point returns.
2256 if (Dest != nullptr && isScalarFloatingType(Dest->getType()) &&
2257 !Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2258 // Special treatment for an FP function which returns its result in st(0).
2259 // If Dest ends up being a physical xmm register, the fstp emit code will
2260 // route st(0) through the space reserved in the function argument area
2261 // we allocated.
2262 _fstp(Dest);
2263 // Create a fake use of Dest in case it actually isn't used, because st(0)
2264 // still needs to be popped.
2265 Context.insert<InstFakeUse>(Dest);
2266 }
2267 // Generate a FakeUse to keep the call live if necessary.
2268 if (Instr->hasSideEffects() && ReturnReg) {
2269 Context.insert<InstFakeUse>(ReturnReg);
2270 }
2271 // Process the return value, if any.
2272 if (!Dest)
Jim Stichnoth 2016/01/19 20:54:06 Dest == nullptr
sehr 2016/01/19 21:47:36 Done.
2273 return;
2274 // Assign the result of the call to Dest.
2275 const Type DestTy = Dest->getType();
2276 if (isVectorType(DestTy)) {
2277 assert(ReturnReg && "Vector type requires a return register");
2278 _movp(Dest, ReturnReg);
2279 } else if (isScalarFloatingType(DestTy)) {
2280 if (Traits::X86_PASS_SCALAR_FP_IN_XMM) {
2281 assert(ReturnReg && "FP type requires a return register");
2282 _mov(Dest, ReturnReg);
2283 }
2284 } else {
2285 assert(isScalarIntegerType(DestTy));
2286 assert(ReturnReg && "Integer type requires a return register");
2287 if (DestTy == IceType_i64 && !Traits::Is64Bit) {
2288 assert(ReturnRegHi && "64-bit type requires two return registers");
2289 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
2290 Variable *DestLo = Dest64On32->getLo();
2291 Variable *DestHi = Dest64On32->getHi();
2292 _mov(DestLo, ReturnReg);
2293 _mov(DestHi, ReturnRegHi);
2294 } else {
2295 _mov(Dest, ReturnReg);
2296 }
2297 }
2298 }
2299
2031 template <typename TraitsType> 2300 template <typename TraitsType>
2032 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { 2301 void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) {
2033 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 2302 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2034 InstCast::OpKind CastKind = Inst->getCastKind(); 2303 InstCast::OpKind CastKind = Inst->getCastKind();
2035 Variable *Dest = Inst->getDest(); 2304 Variable *Dest = Inst->getDest();
2036 Type DestTy = Dest->getType(); 2305 Type DestTy = Dest->getType();
2037 switch (CastKind) { 2306 switch (CastKind) {
2038 default: 2307 default:
2039 Func->setError("Cast type not supported"); 2308 Func->setError("Cast type not supported");
2040 return; 2309 return;
(...skipping 2773 matching lines...) Expand 10 before | Expand all | Expand 10 after
4814 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS)); 5083 _nop(RNGW(Traits::X86_NUM_NOP_VARIANTS));
4815 } 5084 }
4816 } 5085 }
4817 5086
4818 template <typename TraitsType> 5087 template <typename TraitsType>
4819 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { 5088 void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) {
4820 Func->setError("Phi found in regular instruction list"); 5089 Func->setError("Phi found in regular instruction list");
4821 } 5090 }
4822 5091
4823 template <typename TraitsType> 5092 template <typename TraitsType>
5093 void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) {
5094 Variable *Reg = nullptr;
5095 if (Inst->hasRetValue()) {
5096 Operand *RetValue = legalize(Inst->getRetValue());
5097 const Type ReturnType = RetValue->getType();
5098 assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) ||
5099 (ReturnType == IceType_i32) || (ReturnType == IceType_i64));
5100 Reg = moveReturnValueToRegister(RetValue, ReturnType);
5101 }
5102 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5103 // explicitly looks for a ret instruction as a marker for where to insert the
5104 // frame removal instructions.
5105 _ret(Reg);
5106 // Add a fake use of esp to make sure esp stays alive for the entire
5107 // function. Otherwise post-call esp adjustments get dead-code eliminated.
5108 keepEspLiveAtExit();
5109 }
5110
5111 template <typename TraitsType>
4824 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { 5112 void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {
4825 Variable *Dest = Select->getDest(); 5113 Variable *Dest = Select->getDest();
4826 5114
4827 if (isVectorType(Dest->getType())) { 5115 if (isVectorType(Dest->getType())) {
4828 lowerSelectVector(Select); 5116 lowerSelectVector(Select);
4829 return; 5117 return;
4830 } 5118 }
4831 5119
4832 Operand *Condition = Select->getCondition(); 5120 Operand *Condition = Select->getCondition();
4833 // Handle folding opportunities. 5121 // Handle folding opportunities.
(...skipping 1786 matching lines...) Expand 10 before | Expand all | Expand 10 after
6620 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(), 6908 Func, MemOperand->getType(), RegTemp, nullptr, MemOperand->getIndex(),
6621 MemOperand->getShift(), MemOperand->getSegmentRegister()); 6909 MemOperand->getShift(), MemOperand->getSegmentRegister());
6622 return NewMemOperand; 6910 return NewMemOperand;
6623 } 6911 }
6624 } 6912 }
6625 } 6913 }
6626 } // end of namespace X86NAMESPACE 6914 } // end of namespace X86NAMESPACE
6627 } // end of namespace Ice 6915 } // end of namespace Ice
6628 6916
6629 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 6917 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698