Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(244)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1266673003: Subzero. Implements x86-64 lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: git pull && addresses comments && git format Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 2108 matching lines...) Expand 10 before | Expand all | Expand 10 after
2119 } 2119 }
2120 } 2120 }
2121 2121
2122 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem); 2122 Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
2123 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2123 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2124 _cmp(Src0, Zero); 2124 _cmp(Src0, Zero);
2125 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 2125 _br(Traits::Cond::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
2126 } 2126 }
2127 2127
2128 template <class Machine> 2128 template <class Machine>
2129 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
2130 // x86-32 calling convention:
2131 //
2132 // * At the point before the call, the stack must be aligned to 16
2133 // bytes.
2134 //
2135 // * The first four arguments of vector type, regardless of their
2136 // position relative to the other arguments in the argument list, are
2137 // placed in registers xmm0 - xmm3.
2138 //
2139 // * Other arguments are pushed onto the stack in right-to-left order,
2140 // such that the left-most argument ends up on the top of the stack at
2141 // the lowest memory address.
2142 //
2143 // * Stack arguments of vector type are aligned to start at the next
2144 // highest multiple of 16 bytes. Other stack arguments are aligned to
2145 // 4 bytes.
2146 //
2147 // This intends to match the section "IA-32 Function Calling
2148 // Convention" of the document "OS X ABI Function Call Guide" by
2149 // Apple.
2150 NeedsStackAlignment = true;
2151
2152 typedef std::vector<Operand *> OperandList;
2153 OperandList XmmArgs;
2154 OperandList StackArgs, StackArgLocations;
2155 uint32_t ParameterAreaSizeBytes = 0;
2156
2157 // Classify each argument operand according to the location where the
2158 // argument is passed.
2159 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2160 Operand *Arg = Instr->getArg(i);
2161 Type Ty = Arg->getType();
2162 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
2163 assert(typeWidthInBytes(Ty) >= 4);
2164 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
2165 XmmArgs.push_back(Arg);
2166 } else {
2167 StackArgs.push_back(Arg);
2168 if (isVectorType(Arg->getType())) {
2169 ParameterAreaSizeBytes =
2170 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2171 }
2172 Variable *esp =
2173 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2174 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2175 StackArgLocations.push_back(
2176 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2177 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2178 }
2179 }
2180
2181 // Adjust the parameter area so that the stack is aligned. It is
2182 // assumed that the stack is already aligned at the start of the
2183 // calling sequence.
2184 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2185
2186 // Subtract the appropriate amount for the argument area. This also
2187 // takes care of setting the stack adjustment during emission.
2188 //
2189 // TODO: If for some reason the call instruction gets dead-code
2190 // eliminated after lowering, we would need to ensure that the
2191 // pre-call and the post-call esp adjustment get eliminated as well.
2192 if (ParameterAreaSizeBytes) {
2193 _adjust_stack(ParameterAreaSizeBytes);
2194 }
2195
2196 // Copy arguments that are passed on the stack to the appropriate
2197 // stack locations.
2198 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
2199 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
2200 }
2201
2202 // Copy arguments to be passed in registers to the appropriate
2203 // registers.
2204 // TODO: Investigate the impact of lowering arguments passed in
2205 // registers after lowering stack arguments as opposed to the other
2206 // way around. Lowering register arguments after stack arguments may
2207 // reduce register pressure. On the other hand, lowering register
2208 // arguments first (before stack arguments) may result in more compact
2209 // code, as the memory operand displacements may end up being smaller
2210 // before any stack adjustment is done.
2211 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
2212 Variable *Reg =
2213 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
2214 // Generate a FakeUse of register arguments so that they do not get
2215 // dead code eliminated as a result of the FakeKill of scratch
2216 // registers after the call.
2217 Context.insert(InstFakeUse::create(Func, Reg));
2218 }
2219 // Generate the call instruction. Assign its result to a temporary
2220 // with high register allocation weight.
2221 Variable *Dest = Instr->getDest();
2222 // ReturnReg doubles as ReturnRegLo as necessary.
2223 Variable *ReturnReg = nullptr;
2224 Variable *ReturnRegHi = nullptr;
2225 if (Dest) {
2226 switch (Dest->getType()) {
2227 case IceType_NUM:
2228 llvm_unreachable("Invalid Call dest type");
2229 break;
2230 case IceType_void:
2231 break;
2232 case IceType_i1:
2233 case IceType_i8:
2234 case IceType_i16:
2235 case IceType_i32:
2236 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
2237 break;
2238 case IceType_i64:
2239 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
2240 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
2241 break;
2242 case IceType_f32:
2243 case IceType_f64:
2244 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
2245 // the fstp instruction.
2246 break;
2247 case IceType_v4i1:
2248 case IceType_v8i1:
2249 case IceType_v16i1:
2250 case IceType_v16i8:
2251 case IceType_v8i16:
2252 case IceType_v4i32:
2253 case IceType_v4f32:
2254 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
2255 break;
2256 }
2257 }
2258 Operand *CallTarget = legalize(Instr->getCallTarget());
2259 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
2260 if (NeedSandboxing) {
2261 if (llvm::isa<Constant>(CallTarget)) {
2262 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2263 } else {
2264 Variable *CallTargetVar = nullptr;
2265 _mov(CallTargetVar, CallTarget);
2266 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2267 const SizeT BundleSize =
2268 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
2269 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2270 CallTarget = CallTargetVar;
2271 }
2272 }
2273 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
2274 Context.insert(NewCall);
2275 if (NeedSandboxing)
2276 _bundle_unlock();
2277 if (ReturnRegHi)
2278 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2279
2280 // Add the appropriate offset to esp. The call instruction takes care
2281 // of resetting the stack offset during emission.
2282 if (ParameterAreaSizeBytes) {
2283 Variable *esp =
2284 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2285 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
2286 }
2287
2288 // Insert a register-kill pseudo instruction.
2289 Context.insert(InstFakeKill::create(Func, NewCall));
2290
2291 // Generate a FakeUse to keep the call live if necessary.
2292 if (Instr->hasSideEffects() && ReturnReg) {
2293 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
2294 Context.insert(FakeUse);
2295 }
2296
2297 if (!Dest)
2298 return;
2299
2300 // Assign the result of the call to Dest.
2301 if (ReturnReg) {
2302 if (ReturnRegHi) {
2303 assert(Dest->getType() == IceType_i64);
2304 split64(Dest);
2305 Variable *DestLo = Dest->getLo();
2306 Variable *DestHi = Dest->getHi();
2307 _mov(DestLo, ReturnReg);
2308 _mov(DestHi, ReturnRegHi);
2309 } else {
2310 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
2311 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
2312 isVectorType(Dest->getType()));
2313 if (isVectorType(Dest->getType())) {
2314 _movp(Dest, ReturnReg);
2315 } else {
2316 _mov(Dest, ReturnReg);
2317 }
2318 }
2319 } else if (isScalarFloatingType(Dest->getType())) {
2320 // Special treatment for an FP function which returns its result in
2321 // st(0).
2322 // If Dest ends up being a physical xmm register, the fstp emit code
2323 // will route st(0) through a temporary stack slot.
2324 _fstp(Dest);
2325 // Create a fake use of Dest in case it actually isn't used,
2326 // because st(0) still needs to be popped.
2327 Context.insert(InstFakeUse::create(Func, Dest));
2328 }
2329 }
2330
2331 template <class Machine>
2332 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { 2129 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
2333 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 2130 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
2334 InstCast::OpKind CastKind = Inst->getCastKind(); 2131 InstCast::OpKind CastKind = Inst->getCastKind();
2335 Variable *Dest = Inst->getDest(); 2132 Variable *Dest = Inst->getDest();
2336 switch (CastKind) { 2133 switch (CastKind) {
2337 default: 2134 default:
2338 Func->setError("Cast type not supported"); 2135 Func->setError("Cast type not supported");
2339 return; 2136 return;
2340 case InstCast::Sext: { 2137 case InstCast::Sext: {
2341 // Src0RM is the source operand legalized to physical register or memory, 2138 // Src0RM is the source operand legalized to physical register or memory,
(...skipping 3263 matching lines...) Expand 10 before | Expand all | Expand 10 after
5605 } 5402 }
5606 // the offset is not eligible for blinding or pooling, return the original 5403 // the offset is not eligible for blinding or pooling, return the original
5607 // mem operand 5404 // mem operand
5608 return MemOperand; 5405 return MemOperand;
5609 } 5406 }
5610 5407
5611 } // end of namespace X86Internal 5408 } // end of namespace X86Internal
5612 } // end of namespace Ice 5409 } // end of namespace Ice
5613 5410
5614 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5411 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698