Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(526)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1449523002: Eliminate stack adjustment for float-returning functions (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review comments addressed. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceInstX86BaseImpl.h ('k') | src/IceTargetLoweringX8664.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
124 // 124 //
125 // * Stack arguments of vector type are aligned to start at the next highest 125 // * Stack arguments of vector type are aligned to start at the next highest
126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. 126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes.
127 // 127 //
128 // This intends to match the section "IA-32 Function Calling Convention" of 128 // This intends to match the section "IA-32 Function Calling Convention" of
129 // the document "OS X ABI Function Call Guide" by Apple. 129 // the document "OS X ABI Function Call Guide" by Apple.
130 NeedsStackAlignment = true; 130 NeedsStackAlignment = true;
131 131
132 OperandList XmmArgs; 132 OperandList XmmArgs;
133 OperandList StackArgs, StackArgLocations; 133 OperandList StackArgs, StackArgLocations;
134 uint32_t ParameterAreaSizeBytes = 0; 134 int32_t ParameterAreaSizeBytes = 0;
135 135
136 // Classify each argument operand according to the location where the 136 // Classify each argument operand according to the location where the
137 // argument is passed. 137 // argument is passed.
138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
139 Operand *Arg = Instr->getArg(i); 139 Operand *Arg = Instr->getArg(i);
140 Type Ty = Arg->getType(); 140 Type Ty = Arg->getType();
141 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 141 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
142 assert(typeWidthInBytes(Ty) >= 4); 142 assert(typeWidthInBytes(Ty) >= 4);
143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { 143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
144 XmmArgs.push_back(Arg); 144 XmmArgs.push_back(Arg);
145 } else { 145 } else {
146 StackArgs.push_back(Arg); 146 StackArgs.push_back(Arg);
147 if (isVectorType(Arg->getType())) { 147 if (isVectorType(Arg->getType())) {
148 ParameterAreaSizeBytes = 148 ParameterAreaSizeBytes =
149 Traits::applyStackAlignment(ParameterAreaSizeBytes); 149 Traits::applyStackAlignment(ParameterAreaSizeBytes);
150 } 150 }
151 Variable *esp = 151 Variable *esp =
152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc);
155 // Stack stores for arguments are fixed to esp. 155 // Stack stores for arguments are fixed to esp.
156 Mem->setIgnoreStackAdjust(true); 156 Mem->setIgnoreStackAdjust(true);
157 StackArgLocations.push_back(Mem); 157 StackArgLocations.push_back(Mem);
158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
159 } 159 }
160 } 160 }
161 // Ensure there is enough space for the fstp/movs for floating returns.
162 Variable *Dest = Instr->getDest();
163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
164 ParameterAreaSizeBytes =
165 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
166 typeWidthInBytesOnStack(Dest->getType()));
167 }
161 168
162 // Adjust the parameter area so that the stack is aligned. It is assumed that 169 // Adjust the parameter area so that the stack is aligned. It is assumed that
163 // the stack is already aligned at the start of the calling sequence. 170 // the stack is already aligned at the start of the calling sequence.
164 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
165 172
166 // Subtract the appropriate amount for the argument area. This also takes 173 // Subtract the appropriate amount for the argument area. This also takes
167 // care of setting the stack adjustment during emission. 174 // care of setting the stack adjustment during emission.
168 // 175 //
169 // TODO: If for some reason the call instruction gets dead-code eliminated 176 // TODO: If for some reason the call instruction gets dead-code eliminated
170 // after lowering, we would need to ensure that the pre-call and the 177 // after lowering, we would need to ensure that the pre-call and the
(...skipping 19 matching lines...) Expand all
190 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { 197 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
191 Variable *Reg = 198 Variable *Reg =
192 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); 199 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
193 // Generate a FakeUse of register arguments so that they do not get dead 200 // Generate a FakeUse of register arguments so that they do not get dead
194 // code eliminated as a result of the FakeKill of scratch registers after 201 // code eliminated as a result of the FakeKill of scratch registers after
195 // the call. 202 // the call.
196 Context.insert(InstFakeUse::create(Func, Reg)); 203 Context.insert(InstFakeUse::create(Func, Reg));
197 } 204 }
198 // Generate the call instruction. Assign its result to a temporary with high 205 // Generate the call instruction. Assign its result to a temporary with high
199 // register allocation weight. 206 // register allocation weight.
200 Variable *Dest = Instr->getDest();
201 // ReturnReg doubles as ReturnRegLo as necessary. 207 // ReturnReg doubles as ReturnRegLo as necessary.
202 Variable *ReturnReg = nullptr; 208 Variable *ReturnReg = nullptr;
203 Variable *ReturnRegHi = nullptr; 209 Variable *ReturnRegHi = nullptr;
204 if (Dest) { 210 if (Dest) {
205 switch (Dest->getType()) { 211 switch (Dest->getType()) {
206 case IceType_NUM: 212 case IceType_NUM:
207 case IceType_void: 213 case IceType_void:
208 llvm::report_fatal_error("Invalid Call dest type"); 214 llvm::report_fatal_error("Invalid Call dest type");
209 break; 215 break;
210 case IceType_i1: 216 case IceType_i1:
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
248 CallTarget = CallTargetVar; 254 CallTarget = CallTargetVar;
249 } 255 }
250 } 256 }
251 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); 257 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
252 Context.insert(NewCall); 258 Context.insert(NewCall);
253 if (NeedSandboxing) 259 if (NeedSandboxing)
254 _bundle_unlock(); 260 _bundle_unlock();
255 if (ReturnRegHi) 261 if (ReturnRegHi)
256 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 262 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
257 263
258 // Add the appropriate offset to esp. The call instruction takes care of
259 // resetting the stack offset during emission.
260 if (ParameterAreaSizeBytes) {
261 Variable *esp =
262 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
263 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
264 }
265
266 // Insert a register-kill pseudo instruction. 264 // Insert a register-kill pseudo instruction.
267 Context.insert(InstFakeKill::create(Func, NewCall)); 265 Context.insert(InstFakeKill::create(Func, NewCall));
268 266
267 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
268 // Special treatment for an FP function which returns its result in st(0).
269 // If Dest ends up being a physical xmm register, the fstp emit code will
270 // route st(0) through the space reserved in the function argument area
271 // we allocated.
272 _fstp(Dest);
273 // Create a fake use of Dest in case it actually isn't used, because st(0)
274 // still needs to be popped.
275 Context.insert(InstFakeUse::create(Func, Dest));
276 }
277
278 // Add the appropriate offset to esp.
279 if (ParameterAreaSizeBytes)
280 _adjust_stack(-ParameterAreaSizeBytes);
281
269 // Generate a FakeUse to keep the call live if necessary. 282 // Generate a FakeUse to keep the call live if necessary.
270 if (Instr->hasSideEffects() && ReturnReg) { 283 if (Instr->hasSideEffects() && ReturnReg) {
271 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
272 Context.insert(FakeUse); 285 Context.insert(FakeUse);
273 } 286 }
274 287
275 if (!Dest) 288 if (!Dest)
276 return; 289 return;
277 290
278 // Assign the result of the call to Dest. 291 // Assign the result of the call to Dest.
279 if (ReturnReg) { 292 if (ReturnReg) {
280 if (ReturnRegHi) { 293 if (ReturnRegHi) {
281 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); 294 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
282 Variable *DestLo = Dest64On32->getLo(); 295 Variable *DestLo = Dest64On32->getLo();
283 Variable *DestHi = Dest64On32->getHi(); 296 Variable *DestHi = Dest64On32->getHi();
284 _mov(DestLo, ReturnReg); 297 _mov(DestLo, ReturnReg);
285 _mov(DestHi, ReturnRegHi); 298 _mov(DestHi, ReturnRegHi);
286 } else { 299 } else {
287 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || 300 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
288 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || 301 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
289 isVectorType(Dest->getType())); 302 isVectorType(Dest->getType()));
290 if (isVectorType(Dest->getType())) { 303 if (isVectorType(Dest->getType())) {
291 _movp(Dest, ReturnReg); 304 _movp(Dest, ReturnReg);
292 } else { 305 } else {
293 _mov(Dest, ReturnReg); 306 _mov(Dest, ReturnReg);
294 } 307 }
295 } 308 }
296 } else if (isScalarFloatingType(Dest->getType())) {
297 // Special treatment for an FP function which returns its result in st(0).
298 // If Dest ends up being a physical xmm register, the fstp emit code will
299 // route st(0) through a temporary stack slot.
300 _fstp(Dest);
301 // Create a fake use of Dest in case it actually isn't used, because st(0)
302 // still needs to be popped.
303 Context.insert(InstFakeUse::create(Func, Dest));
304 } 309 }
305 } 310 }
306 311
307 void TargetX8632::lowerArguments() { 312 void TargetX8632::lowerArguments() {
308 VarList &Args = Func->getArgs(); 313 VarList &Args = Func->getArgs();
309 // The first four arguments of vector type, regardless of their position 314 // The first four arguments of vector type, regardless of their position
310 // relative to the other arguments in the argument list, are passed in 315 // relative to the other arguments in the argument list, are passed in
311 // registers xmm0 - xmm3. 316 // registers xmm0 - xmm3.
312 unsigned NumXmmArgs = 0; 317 unsigned NumXmmArgs = 0;
313 318
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
356 } else { 361 } else {
357 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); 362 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
358 } 363 }
359 } 364 }
360 // Add a ret instruction even if sandboxing is enabled, because addEpilog 365 // Add a ret instruction even if sandboxing is enabled, because addEpilog
361 // explicitly looks for a ret instruction as a marker for where to insert the 366 // explicitly looks for a ret instruction as a marker for where to insert the
362 // frame removal instructions. 367 // frame removal instructions.
363 _ret(Reg); 368 _ret(Reg);
364 // Add a fake use of esp to make sure esp stays alive for the entire 369 // Add a fake use of esp to make sure esp stays alive for the entire
365 // function. Otherwise post-call esp adjustments get dead-code eliminated. 370 // function. Otherwise post-call esp adjustments get dead-code eliminated.
366 // TODO: Are there more places where the fake use should be inserted? E.g. 371 keepEspLiveAtExit();
367 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
368 Variable *esp =
369 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
370 Context.insert(InstFakeUse::create(Func, esp));
371 } 372 }
372 373
373 void TargetX8632::addProlog(CfgNode *Node) { 374 void TargetX8632::addProlog(CfgNode *Node) {
374 // Stack frame layout: 375 // Stack frame layout:
375 // 376 //
376 // +------------------------+ 377 // +------------------------+
377 // | 1. return address | 378 // | 1. return address |
378 // +------------------------+ 379 // +------------------------+
379 // | 2. preserved registers | 380 // | 2. preserved registers |
380 // +------------------------+ 381 // +------------------------+
(...skipping 593 matching lines...) Expand 10 before | Expand all | Expand 10 after
974 // case the high-level table has extra entries. 975 // case the high-level table has extra entries.
975 #define X(tag, sizeLog2, align, elts, elty, str) \ 976 #define X(tag, sizeLog2, align, elts, elty, str) \
976 static_assert(_table1_##tag == _table2_##tag, \ 977 static_assert(_table1_##tag == _table2_##tag, \
977 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 978 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
978 ICETYPE_TABLE 979 ICETYPE_TABLE
979 #undef X 980 #undef X
980 } // end of namespace dummy3 981 } // end of namespace dummy3
981 } // end of anonymous namespace 982 } // end of anonymous namespace
982 983
983 } // end of namespace Ice 984 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceInstX86BaseImpl.h ('k') | src/IceTargetLoweringX8664.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698