src/IceTargetLoweringX8664.cpp - Issue 1341423002: Reflow comments to use the full width.

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fix spelling and rebase Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// This file implements the TargetLoweringX8664 class, which	11 /// This file implements the TargetLoweringX8664 class, which consists almost

12 /// consists almost entirely of the lowering sequence for each	12 /// entirely of the lowering sequence for each high-level instruction.

13 /// high-level instruction.

14 ///	13 ///

15 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

16	15

17 #include "IceTargetLoweringX8664.h"	16 #include "IceTargetLoweringX8664.h"

18	17

19 #include "IceTargetLoweringX8664Traits.h"	18 #include "IceTargetLoweringX8664Traits.h"

20 #include "IceTargetLoweringX86Base.h"	19 #include "IceTargetLoweringX86Base.h"

21	20

22 namespace Ice {	21 namespace Ice {

23	22

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
124	123

125 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining	124 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining

126 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.	125 // OperandList in lowerCall. std::max() is supposed to work, but it doesn't.

127 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }	126 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }

128	127

129 } // end of anonymous namespace	128 } // end of anonymous namespace

130	129

131 void TargetX8664::lowerCall(const InstCall *Instr) {	130 void TargetX8664::lowerCall(const InstCall *Instr) {

132 // x86-64 calling convention:	131 // x86-64 calling convention:

133 //	132 //

134 // * At the point before the call, the stack must be aligned to 16	133 // * At the point before the call, the stack must be aligned to 16 bytes.

135 // bytes.

136 //	134 //

137 // * The first eight arguments of vector/fp type, regardless of their	135 // * The first eight arguments of vector/fp type, regardless of their

138 // position relative to the other arguments in the argument list, are	136 // position relative to the other arguments in the argument list, are placed

139 // placed in registers %xmm0 - %xmm7.	137 // in registers %xmm0 - %xmm7.

140 //	138 //

141 // * The first six arguments of integer types, regardless of their	139 // * The first six arguments of integer types, regardless of their position

142 // position relative to the other arguments in the argument list, are	140 // relative to the other arguments in the argument list, are placed in

143 // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.	141 // registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.

144 //	142 //

145 // * Other arguments are pushed onto the stack in right-to-left order,	143 // * Other arguments are pushed onto the stack in right-to-left order, such

146 // such that the left-most argument ends up on the top of the stack at	144 // that the left-most argument ends up on the top of the stack at the lowest

147 // the lowest memory address.	145 // memory address.

148 //	146 //

149 // * Stack arguments of vector type are aligned to start at the next	147 // * Stack arguments of vector type are aligned to start at the next highest

150 // highest multiple of 16 bytes. Other stack arguments are aligned to	148 // multiple of 16 bytes. Other stack arguments are aligned to 8 bytes.

151 // 8 bytes.

152 //	149 //

153 // This intends to match the section "Function Calling Sequence" of the	150 // This intends to match the section "Function Calling Sequence" of the

154 // document "System V Application Binary Interface."	151 // document "System V Application Binary Interface."

155 NeedsStackAlignment = true;	152 NeedsStackAlignment = true;

156	153

157 using OperandList =	154 using OperandList =

158 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,	155 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,

159 Traits::X86_MAX_GPR_ARGS)>;	156 Traits::X86_MAX_GPR_ARGS)>;

160 OperandList XmmArgs;	157 OperandList XmmArgs;

161 OperandList GprArgs;	158 OperandList GprArgs;

(...skipping 22 matching lines...) Expand all Loading...
184 Traits::applyStackAlignment(ParameterAreaSizeBytes);	181 Traits::applyStackAlignment(ParameterAreaSizeBytes);

185 }	182 }

186 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	183 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

187 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);	184 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

188 StackArgLocations.push_back(	185 StackArgLocations.push_back(

189 Traits::X86OperandMem::create(Func, Ty, esp, Loc));	186 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

190 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());	187 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

191 }	188 }

192 }	189 }

193	190

194 // Adjust the parameter area so that the stack is aligned. It is	191 // Adjust the parameter area so that the stack is aligned. It is assumed that

195 // assumed that the stack is already aligned at the start of the	192 // the stack is already aligned at the start of the calling sequence.

196 // calling sequence.

197 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);	193 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

198	194

199 // Subtract the appropriate amount for the argument area. This also	195 // Subtract the appropriate amount for the argument area. This also takes

200 // takes care of setting the stack adjustment during emission.	196 // care of setting the stack adjustment during emission.

201 //	197 //

202 // TODO: If for some reason the call instruction gets dead-code	198 // TODO: If for some reason the call instruction gets dead-code eliminated

203 // eliminated after lowering, we would need to ensure that the	199 // after lowering, we would need to ensure that the pre-call and the

204 // pre-call and the post-call esp adjustment get eliminated as well.	200 // post-call esp adjustment get eliminated as well.

205 if (ParameterAreaSizeBytes) {	201 if (ParameterAreaSizeBytes) {

206 _adjust_stack(ParameterAreaSizeBytes);	202 _adjust_stack(ParameterAreaSizeBytes);

207 }	203 }

208	204

209 // Copy arguments that are passed on the stack to the appropriate	205 // Copy arguments that are passed on the stack to the appropriate stack

210 // stack locations.	206 // locations.

211 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {	207 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

212 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));	208 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

213 }	209 }

214	210

215 // Copy arguments to be passed in registers to the appropriate	211 // Copy arguments to be passed in registers to the appropriate registers.

216 // registers.	212 // TODO: Investigate the impact of lowering arguments passed in registers

217 // TODO: Investigate the impact of lowering arguments passed in	213 // after lowering stack arguments as opposed to the other way around.

218 // registers after lowering stack arguments as opposed to the other	214 // Lowering register arguments after stack arguments may reduce register

219 // way around. Lowering register arguments after stack arguments may	215 // pressure. On the other hand, lowering register arguments first (before

220 // reduce register pressure. On the other hand, lowering register	216 // stack arguments) may result in more compact code, as the memory operand

221 // arguments first (before stack arguments) may result in more compact	217 // displacements may end up being smaller before any stack adjustment is

222 // code, as the memory operand displacements may end up being smaller	218 // done.

223 // before any stack adjustment is done.

224 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {	219 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

225 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));	220 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));

226 // Generate a FakeUse of register arguments so that they do not get	221 // Generate a FakeUse of register arguments so that they do not get dead

227 // dead code eliminated as a result of the FakeKill of scratch	222 // code eliminated as a result of the FakeKill of scratch registers after

228 // registers after the call.	223 // the call.

229 Context.insert(InstFakeUse::create(Func, Reg));	224 Context.insert(InstFakeUse::create(Func, Reg));

230 }	225 }

231	226

232 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {	227 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {

233 Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));	228 Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));

234 Context.insert(InstFakeUse::create(Func, Reg));	229 Context.insert(InstFakeUse::create(Func, Reg));

235 }	230 }

236	231

237 // Generate the call instruction. Assign its result to a temporary	232 // Generate the call instruction. Assign its result to a temporary with high

238 // with high register allocation weight.	233 // register allocation weight.

239 Variable *Dest = Instr->getDest();	234 Variable *Dest = Instr->getDest();

240 // ReturnReg doubles as ReturnRegLo as necessary.	235 // ReturnReg doubles as ReturnRegLo as necessary.

241 Variable *ReturnReg = nullptr;	236 Variable *ReturnReg = nullptr;

242 if (Dest) {	237 if (Dest) {

243 switch (Dest->getType()) {	238 switch (Dest->getType()) {

244 case IceType_NUM:	239 case IceType_NUM:

245 case IceType_void:	240 case IceType_void:

246 llvm::report_fatal_error("Invalid Call dest type");	241 llvm::report_fatal_error("Invalid Call dest type");

247 break;	242 break;

248 case IceType_i1:	243 case IceType_i1:

(...skipping 21 matching lines...) Expand all Loading...
270 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();	265 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

271 if (NeedSandboxing) {	266 if (NeedSandboxing) {

272 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");	267 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");

273 }	268 }

274 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);	269 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

275 Context.insert(NewCall);	270 Context.insert(NewCall);

276 if (NeedSandboxing) {	271 if (NeedSandboxing) {

277 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");	272 llvm_unreachable("X86-64 Sandboxing codegen not implemented.");

278 }	273 }

279	274

280 // Add the appropriate offset to esp. The call instruction takes care	275 // Add the appropriate offset to esp. The call instruction takes care of

281 // of resetting the stack offset during emission.	276 // resetting the stack offset during emission.

282 if (ParameterAreaSizeBytes) {	277 if (ParameterAreaSizeBytes) {

283 Variable *Esp =	278 Variable *Esp =

284 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);	279 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

285 _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));	280 _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

286 }	281 }

287	282

288 // Insert a register-kill pseudo instruction.	283 // Insert a register-kill pseudo instruction.

289 Context.insert(InstFakeKill::create(Func, NewCall));	284 Context.insert(InstFakeKill::create(Func, NewCall));

290	285

291 // Generate a FakeUse to keep the call live if necessary.	286 // Generate a FakeUse to keep the call live if necessary.

(...skipping 11 matching lines...) Expand all Loading...
303 _movp(Dest, ReturnReg);	298 _movp(Dest, ReturnReg);

304 } else {	299 } else {

305 assert(isScalarFloatingType(Dest->getType()) \|\|	300 assert(isScalarFloatingType(Dest->getType()) \|\|

306 isScalarIntegerType(Dest->getType()));	301 isScalarIntegerType(Dest->getType()));

307 _mov(Dest, ReturnReg);	302 _mov(Dest, ReturnReg);

308 }	303 }

309 }	304 }

310	305

311 void TargetX8664::lowerArguments() {	306 void TargetX8664::lowerArguments() {

312 VarList &Args = Func->getArgs();	307 VarList &Args = Func->getArgs();

313 // The first eight vetcor typed arguments (as well as fp arguments) are passed	308 // The first eight vetcor typed arguments (as well as fp arguments) are

314 // in %xmm0 through %xmm7 regardless of their position in the argument list.	309 // passed in %xmm0 through %xmm7 regardless of their position in the argument

	310 // list.

315 unsigned NumXmmArgs = 0;	311 unsigned NumXmmArgs = 0;

316 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,	312 // The first six integer typed arguments are passed in %rdi, %rsi, %rdx,

317 // %r8, and %r9 regardless of their position in the argument list.	313 // %rcx, %r8, and %r9 regardless of their position in the argument list.

318 unsigned NumGprArgs = 0;	314 unsigned NumGprArgs = 0;

319	315

320 Context.init(Func->getEntryNode());	316 Context.init(Func->getEntryNode());

321 Context.setInsertPoint(Context.getCur());	317 Context.setInsertPoint(Context.getCur());

322	318

323 for (SizeT i = 0, End = Args.size();	319 for (SizeT i = 0, End = Args.size();

324 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS \|\|	320 i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS \|\|

325 NumGprArgs < Traits::X86_MAX_XMM_ARGS);	321 NumGprArgs < Traits::X86_MAX_XMM_ARGS);

326 ++i) {	322 ++i) {

327 Variable *Arg = Args[i];	323 Variable *Arg = Args[i];

(...skipping 10 matching lines...) Expand all Loading...
338 } else if (isScalarIntegerType(Ty)) {	334 } else if (isScalarIntegerType(Ty)) {

339 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {	335 if (NumGprArgs >= Traits::X86_MAX_GPR_ARGS) {

340 continue;	336 continue;

341 }	337 }

342 RegNum = getRegisterForGprArgNum(NumGprArgs);	338 RegNum = getRegisterForGprArgNum(NumGprArgs);

343 ++NumGprArgs;	339 ++NumGprArgs;

344 RegisterArg = Func->makeVariable(Ty);	340 RegisterArg = Func->makeVariable(Ty);

345 }	341 }

346 assert(RegNum != Variable::NoRegister);	342 assert(RegNum != Variable::NoRegister);

347 assert(RegisterArg != nullptr);	343 assert(RegisterArg != nullptr);

348 // Replace Arg in the argument list with the home register. Then	344 // Replace Arg in the argument list with the home register. Then generate

349 // generate an instruction in the prolog to copy the home register	345 // an instruction in the prolog to copy the home register to the assigned

350 // to the assigned location of Arg.	346 // location of Arg.

351 if (BuildDefs::dump())	347 if (BuildDefs::dump())

352 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));	348 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

353 RegisterArg->setRegNum(RegNum);	349 RegisterArg->setRegNum(RegNum);

354 RegisterArg->setIsArg();	350 RegisterArg->setIsArg();

355 Arg->setIsArg(false);	351 Arg->setIsArg(false);

356	352

357 Args[i] = RegisterArg;	353 Args[i] = RegisterArg;

358 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	354 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

359 }	355 }

360 }	356 }

361	357

362 void TargetX8664::lowerRet(const InstRet *Inst) {	358 void TargetX8664::lowerRet(const InstRet *Inst) {

363 Variable *Reg = nullptr;	359 Variable *Reg = nullptr;

364 if (Inst->hasRetValue()) {	360 if (Inst->hasRetValue()) {

365 Operand *Src0 = legalize(Inst->getRetValue());	361 Operand *Src0 = legalize(Inst->getRetValue());

366 if (isVectorType(Src0->getType()) \|\|	362 if (isVectorType(Src0->getType()) \|\|

367 isScalarFloatingType(Src0->getType())) {	363 isScalarFloatingType(Src0->getType())) {

368 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);	364 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);

369 } else {	365 } else {

370 assert(isScalarIntegerType(Src0->getType()));	366 assert(isScalarIntegerType(Src0->getType()));

371 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);	367 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);

372 }	368 }

373 }	369 }

374 // Add a ret instruction even if sandboxing is enabled, because	370 // Add a ret instruction even if sandboxing is enabled, because addEpilog

375 // addEpilog explicitly looks for a ret instruction as a marker for	371 // explicitly looks for a ret instruction as a marker for where to insert the

376 // where to insert the frame removal instructions.	372 // frame removal instructions.

377 _ret(Reg);	373 _ret(Reg);

378 // Add a fake use of esp to make sure esp stays alive for the entire	374 // Add a fake use of esp to make sure esp stays alive for the entire

379 // function. Otherwise post-call esp adjustments get dead-code	375 // function. Otherwise post-call esp adjustments get dead-code eliminated.

380 // eliminated. TODO: Are there more places where the fake use	376 // TODO: Are there more places where the fake use should be inserted? E.g.

381 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not	377 // "void f(int n){while(1) g(n);}" may not have a ret instruction.

382 // have a ret instruction.

383 Variable *esp =	378 Variable *esp =

384 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);	379 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

385 Context.insert(InstFakeUse::create(Func, esp));	380 Context.insert(InstFakeUse::create(Func, esp));

386 }	381 }

387	382

388 void TargetX8664::addProlog(CfgNode *Node) {	383 void TargetX8664::addProlog(CfgNode *Node) {

389 // Stack frame layout:	384 // Stack frame layout:

390 //	385 //

391 // +------------------------+	386 // +------------------------+

392 // \| 1. return address \|	387 // \| 1. return address \|

(...skipping 15 matching lines...) Expand all Loading...
408 //	403 //

409 // The following variables record the size in bytes of the given areas:	404 // The following variables record the size in bytes of the given areas:

410 // * X86_RET_IP_SIZE_BYTES: area 1	405 // * X86_RET_IP_SIZE_BYTES: area 1

411 // * PreservedRegsSizeBytes: area 2	406 // * PreservedRegsSizeBytes: area 2

412 // * SpillAreaPaddingBytes: area 3	407 // * SpillAreaPaddingBytes: area 3

413 // * GlobalsSize: area 4	408 // * GlobalsSize: area 4

414 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5	409 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

415 // * LocalsSpillAreaSize: area 6	410 // * LocalsSpillAreaSize: area 6

416 // * SpillAreaSizeBytes: areas 3 - 7	411 // * SpillAreaSizeBytes: areas 3 - 7

417	412

418 // Determine stack frame offsets for each Variable without a	413 // Determine stack frame offsets for each Variable without a register

419 // register assignment. This can be done as one variable per stack	414 // assignment. This can be done as one variable per stack slot. Or, do

420 // slot. Or, do coalescing by running the register allocator again	415 // coalescing by running the register allocator again with an infinite set of

421 // with an infinite set of registers (as a side effect, this gives	416 // registers (as a side effect, this gives variables a second chance at

422 // variables a second chance at physical register assignment).	417 // physical register assignment).

423 //	418 //

424 // A middle ground approach is to leverage sparsity and allocate one	419 // A middle ground approach is to leverage sparsity and allocate one block of

425 // block of space on the frame for globals (variables with	420 // space on the frame for globals (variables with multi-block lifetime), and

426 // multi-block lifetime), and one block to share for locals	421 // one block to share for locals (single-block lifetime).

427 // (single-block lifetime).

428	422

429 Context.init(Node);	423 Context.init(Node);

430 Context.setInsertPoint(Context.getCur());	424 Context.setInsertPoint(Context.getCur());

431	425

432 llvm::SmallBitVector CalleeSaves =	426 llvm::SmallBitVector CalleeSaves =

433 getRegisterSet(RegSet_CalleeSave, RegSet_None);	427 getRegisterSet(RegSet_CalleeSave, RegSet_None);

434 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());	428 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

435 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;	429 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

436 size_t GlobalsSize = 0;	430 size_t GlobalsSize = 0;

437 // If there is a separate locals area, this represents that area.	431 // If there is a separate locals area, this represents that area. Otherwise

438 // Otherwise it counts any variable not counted by GlobalsSize.	432 // it counts any variable not counted by GlobalsSize.

439 SpillAreaSizeBytes = 0;	433 SpillAreaSizeBytes = 0;

440 // If there is a separate locals area, this specifies the alignment	434 // If there is a separate locals area, this specifies the alignment for it.

441 // for it.

442 uint32_t LocalsSlotsAlignmentBytes = 0;	435 uint32_t LocalsSlotsAlignmentBytes = 0;

443 // The entire spill locations area gets aligned to largest natural	436 // The entire spill locations area gets aligned to largest natural alignment

444 // alignment of the variables that have a spill slot.	437 // of the variables that have a spill slot.

445 uint32_t SpillAreaAlignmentBytes = 0;	438 uint32_t SpillAreaAlignmentBytes = 0;

446 // A spill slot linked to a variable with a stack slot should reuse	439 // A spill slot linked to a variable with a stack slot should reuse that

447 // that stack slot.	440 // stack slot.

448 std::function<bool(Variable *)> TargetVarHook =	441 std::function<bool(Variable *)> TargetVarHook =

449 [&VariablesLinkedToSpillSlots](Variable *Var) {	442 [&VariablesLinkedToSpillSlots](Variable *Var) {

450 if (auto *SpillVar =	443 if (auto *SpillVar =

451 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {	444 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {

452 assert(Var->mustNotHaveReg());	445 assert(Var->mustNotHaveReg());

453 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {	446 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

454 VariablesLinkedToSpillSlots.push_back(Var);	447 VariablesLinkedToSpillSlots.push_back(Var);

455 return true;	448 return true;

456 }	449 }

457 }	450 }

(...skipping 21 matching lines...) Expand all Loading...
479	472

480 // Generate "push ebp; mov ebp, esp"	473 // Generate "push ebp; mov ebp, esp"

481 if (IsEbpBasedFrame) {	474 if (IsEbpBasedFrame) {

482 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))	475 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

483 .count() == 0);	476 .count() == 0);

484 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);	477 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);

485 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);	478 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

486 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	479 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

487 _push(ebp);	480 _push(ebp);

488 _mov(ebp, esp);	481 _mov(ebp, esp);

489 // Keep ebp live for late-stage liveness analysis	482 // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).

490 // (e.g. asm-verbose mode).

491 Context.insert(InstFakeUse::create(Func, ebp));	483 Context.insert(InstFakeUse::create(Func, ebp));

492 }	484 }

493	485

494 // Align the variables area. SpillAreaPaddingBytes is the size of	486 // Align the variables area. SpillAreaPaddingBytes is the size of the region

495 // the region after the preserved registers and before the spill areas.	487 // after the preserved registers and before the spill areas.

496 // LocalsSlotsPaddingBytes is the amount of padding between the globals	488 // LocalsSlotsPaddingBytes is the amount of padding between the globals and

497 // and locals area if they are separate.	489 // locals area if they are separate.

498 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);	490 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

499 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);	491 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

500 uint32_t SpillAreaPaddingBytes = 0;	492 uint32_t SpillAreaPaddingBytes = 0;

501 uint32_t LocalsSlotsPaddingBytes = 0;	493 uint32_t LocalsSlotsPaddingBytes = 0;

502 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,	494 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

503 SpillAreaAlignmentBytes, GlobalsSize,	495 SpillAreaAlignmentBytes, GlobalsSize,

504 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,	496 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

505 &LocalsSlotsPaddingBytes);	497 &LocalsSlotsPaddingBytes);

506 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;	498 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

507 uint32_t GlobalsAndSubsequentPaddingSize =	499 uint32_t GlobalsAndSubsequentPaddingSize =

508 GlobalsSize + LocalsSlotsPaddingBytes;	500 GlobalsSize + LocalsSlotsPaddingBytes;

509	501

510 // Align esp if necessary.	502 // Align esp if necessary.

511 if (NeedsStackAlignment) {	503 if (NeedsStackAlignment) {

512 uint32_t StackOffset =	504 uint32_t StackOffset =

513 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;	505 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

514 uint32_t StackSize =	506 uint32_t StackSize =

515 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);	507 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

516 SpillAreaSizeBytes = StackSize - StackOffset;	508 SpillAreaSizeBytes = StackSize - StackOffset;

517 }	509 }

518	510

519 // Generate "sub esp, SpillAreaSizeBytes"	511 // Generate "sub esp, SpillAreaSizeBytes"

520 if (SpillAreaSizeBytes)	512 if (SpillAreaSizeBytes)

521 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),	513 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

522 Ctx->getConstantInt32(SpillAreaSizeBytes));	514 Ctx->getConstantInt32(SpillAreaSizeBytes));

523 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);	515 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

524	516

525 resetStackAdjustment();	517 resetStackAdjustment();

526	518

527 // Fill in stack offsets for stack args, and copy args into registers	519 // Fill in stack offsets for stack args, and copy args into registers for

528 // for those that were register-allocated. Args are pushed right to	520 // those that were register-allocated. Args are pushed right to left, so

529 // left, so Arg[0] is closest to the stack/frame pointer.	521 // Arg[0] is closest to the stack/frame pointer.

530 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	522 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

531 size_t BasicFrameOffset =	523 size_t BasicFrameOffset =

532 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;	524 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

533 if (!IsEbpBasedFrame)	525 if (!IsEbpBasedFrame)

534 BasicFrameOffset += SpillAreaSizeBytes;	526 BasicFrameOffset += SpillAreaSizeBytes;

535	527

536 const VarList &Args = Func->getArgs();	528 const VarList &Args = Func->getArgs();

537 size_t InArgsSizeBytes = 0;	529 size_t InArgsSizeBytes = 0;

538 unsigned NumXmmArgs = 0;	530 unsigned NumXmmArgs = 0;

539 unsigned NumGPRArgs = 0;	531 unsigned NumGPRArgs = 0;

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
598 void TargetX8664::addEpilog(CfgNode *Node) {	590 void TargetX8664::addEpilog(CfgNode *Node) {

599 InstList &Insts = Node->getInsts();	591 InstList &Insts = Node->getInsts();

600 InstList::reverse_iterator RI, E;	592 InstList::reverse_iterator RI, E;

601 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {	593 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

602 if (llvm::isa<typename Traits::Insts::Ret>(*RI))	594 if (llvm::isa<typename Traits::Insts::Ret>(*RI))

603 break;	595 break;

604 }	596 }

605 if (RI == E)	597 if (RI == E)

606 return;	598 return;

607	599

608 // Convert the reverse_iterator position into its corresponding	600 // Convert the reverse_iterator position into its corresponding (forward)

609 // (forward) iterator position.	601 // iterator position.

610 InstList::iterator InsertPoint = RI.base();	602 InstList::iterator InsertPoint = RI.base();

611 --InsertPoint;	603 --InsertPoint;

612 Context.init(Node);	604 Context.init(Node);

613 Context.setInsertPoint(InsertPoint);	605 Context.setInsertPoint(InsertPoint);

614	606

615 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	607 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

616 if (IsEbpBasedFrame) {	608 if (IsEbpBasedFrame) {

617 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);	609 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

618 // For late-stage liveness analysis (e.g. asm-verbose mode),	610 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake

619 // adding a fake use of esp before the assignment of esp=ebp keeps	611 // use of esp before the assignment of esp=ebp keeps previous esp

620 // previous esp adjustments from being dead-code eliminated.	612 // adjustments from being dead-code eliminated.

621 Context.insert(InstFakeUse::create(Func, esp));	613 Context.insert(InstFakeUse::create(Func, esp));

622 _mov(esp, ebp);	614 _mov(esp, ebp);

623 _pop(ebp);	615 _pop(ebp);

624 } else {	616 } else {

625 // add esp, SpillAreaSizeBytes	617 // add esp, SpillAreaSizeBytes

626 if (SpillAreaSizeBytes)	618 if (SpillAreaSizeBytes)

627 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));	619 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

628 }	620 }

629	621

630 // Add pop instructions for preserved registers.	622 // Add pop instructions for preserved registers.

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
751 RPE_PooledConstantReordering, K);	743 RPE_PooledConstantReordering, K);

752 RandomShuffle(Pool.begin(), Pool.end(),	744 RandomShuffle(Pool.begin(), Pool.end(),

753 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });	745 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });

754 }	746 }

755	747

756 for (Constant *C : Pool) {	748 for (Constant *C : Pool) {

757 if (!C->getShouldBePooled())	749 if (!C->getShouldBePooled())

758 continue;	750 continue;

759 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);	751 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);

760 typename T::IceType::PrimType Value = Const->getValue();	752 typename T::IceType::PrimType Value = Const->getValue();

761 // Use memcpy() to copy bits from Value into RawValue in a way	753 // Use memcpy() to copy bits from Value into RawValue in a way that avoids

762 // that avoids breaking strict-aliasing rules.	754 // breaking strict-aliasing rules.

763 typename T::PrimitiveIntType RawValue;	755 typename T::PrimitiveIntType RawValue;

764 memcpy(&RawValue, &Value, sizeof(Value));	756 memcpy(&RawValue, &Value, sizeof(Value));

765 char buf[30];	757 char buf[30];

766 int CharsPrinted =	758 int CharsPrinted =

767 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);	759 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);

768 assert(CharsPrinted >= 0 &&	760 assert(CharsPrinted >= 0 &&

769 (size_t)CharsPrinted < llvm::array_lengthof(buf));	761 (size_t)CharsPrinted < llvm::array_lengthof(buf));

770 (void)CharsPrinted; // avoid warnings if asserts are disabled	762 (void)CharsPrinted; // avoid warnings if asserts are disabled

771 Const->emitPoolLabel(Str);	763 Const->emitPoolLabel(Str);

772 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "	764 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "

773 << Value << "\n";	765 << Value << "\n";

774 }	766 }

775 }	767 }

776	768

777 void TargetDataX8664::lowerConstants() {	769 void TargetDataX8664::lowerConstants() {

778 if (Ctx->getFlags().getDisableTranslation())	770 if (Ctx->getFlags().getDisableTranslation())

779 return;	771 return;

780 // No need to emit constants from the int pool since (for x86) they	772 // No need to emit constants from the int pool since (for x86) they are

781 // are embedded as immediates in the instructions, just emit float/double.	773 // embedded as immediates in the instructions, just emit float/double.

782 switch (Ctx->getFlags().getOutFileType()) {	774 switch (Ctx->getFlags().getOutFileType()) {

783 case FT_Elf: {	775 case FT_Elf: {

784 ELFObjectWriter *Writer = Ctx->getObjectWriter();	776 ELFObjectWriter *Writer = Ctx->getObjectWriter();

785	777

786 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);	778 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);

787 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);	779 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);

788 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);	780 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);

789	781

790 Writer->writeConstantPool<ConstantFloat>(IceType_f32);	782 Writer->writeConstantPool<ConstantFloat>(IceType_f32);

791 Writer->writeConstantPool<ConstantDouble>(IceType_f64);	783 Writer->writeConstantPool<ConstantDouble>(IceType_f64);

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
847 OstreamLocker L(Ctx);	839 OstreamLocker L(Ctx);

848 for (const VariableDeclaration *Var : Vars) {	840 for (const VariableDeclaration *Var : Vars) {

849 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {	841 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {

850 emitGlobal(*Var, SectionSuffix);	842 emitGlobal(*Var, SectionSuffix);

851 }	843 }

852 }	844 }

853 } break;	845 } break;

854 }	846 }

855 }	847 }

856	848

857 // In some cases, there are x-macros tables for both high-level and	849 // In some cases, there are x-macros tables for both high-level and low-level

858 // low-level instructions/operands that use the same enum key value.	850 // instructions/operands that use the same enum key value. The tables are kept

859 // The tables are kept separate to maintain a proper separation	851 // separate to maintain a proper separation between abstraction layers. There

860 // between abstraction layers. There is a risk that the tables could	852 // is a risk that the tables could get out of sync if enum values are reordered

861 // get out of sync if enum values are reordered or if entries are	853 // or if entries are added or deleted. The following dummy namespaces use

862 // added or deleted. The following dummy namespaces use

863 // static_asserts to ensure everything is kept in sync.	854 // static_asserts to ensure everything is kept in sync.

864	855

865 namespace {	856 namespace {

866 // Validate the enum values in FCMPX8664_TABLE.	857 // Validate the enum values in FCMPX8664_TABLE.

867 namespace dummy1 {	858 namespace dummy1 {

868 // Define a temporary set of enum values based on low-level table	859 // Define a temporary set of enum values based on low-level table entries.

869 // entries.

870 enum _tmp_enum {	860 enum _tmp_enum {

871 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,	861 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

872 FCMPX8664_TABLE	862 FCMPX8664_TABLE

873 #undef X	863 #undef X

874 _num	864 _num

875 };	865 };

876 // Define a set of constants based on high-level table entries.	866 // Define a set of constants based on high-level table entries.

877 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;	867 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

878 ICEINSTFCMP_TABLE	868 ICEINSTFCMP_TABLE

879 #undef X	869 #undef X

880 // Define a set of constants based on low-level table entries, and	870 // Define a set of constants based on low-level table entries, and ensure the

881 // ensure the table entry keys are consistent.	871 // table entry keys are consistent.

882 #define X(val, dflt, swapS, C1, C2, swapV, pred) \	872 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

883 static const int _table2_##val = _tmp_##val; \	873 static const int _table2_##val = _tmp_##val; \

884 static_assert( \	874 static_assert( \

885 _table1_##val == _table2_##val, \	875 _table1_##val == _table2_##val, \

886 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");	876 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");

887 FCMPX8664_TABLE	877 FCMPX8664_TABLE

888 #undef X	878 #undef X

889 // Repeat the static asserts with respect to the high-level table	879 // Repeat the static asserts with respect to the high-level table entries in

890 // entries in case the high-level table has extra entries.	880 // case the high-level table has extra entries.

891 #define X(tag, str) \	881 #define X(tag, str) \

892 static_assert( \	882 static_assert( \

893 _table1_##tag == _table2_##tag, \	883 _table1_##tag == _table2_##tag, \

894 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");	884 "Inconsistency between FCMPX8664_TABLE and ICEINSTFCMP_TABLE");

895 ICEINSTFCMP_TABLE	885 ICEINSTFCMP_TABLE

896 #undef X	886 #undef X

897 } // end of namespace dummy1	887 } // end of namespace dummy1

898	888

899 // Validate the enum values in ICMPX8664_TABLE.	889 // Validate the enum values in ICMPX8664_TABLE.

900 namespace dummy2 {	890 namespace dummy2 {

901 // Define a temporary set of enum values based on low-level table	891 // Define a temporary set of enum values based on low-level table entries.

902 // entries.

903 enum _tmp_enum {	892 enum _tmp_enum {

904 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,	893 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

905 ICMPX8664_TABLE	894 ICMPX8664_TABLE

906 #undef X	895 #undef X

907 _num	896 _num

908 };	897 };

909 // Define a set of constants based on high-level table entries.	898 // Define a set of constants based on high-level table entries.

910 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;	899 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

911 ICEINSTICMP_TABLE	900 ICEINSTICMP_TABLE

912 #undef X	901 #undef X

913 // Define a set of constants based on low-level table entries, and	902 // Define a set of constants based on low-level table entries, and ensure the

914 // ensure the table entry keys are consistent.	903 // table entry keys are consistent.

915 #define X(val, C_32, C1_64, C2_64, C3_64) \	904 #define X(val, C_32, C1_64, C2_64, C3_64) \

916 static const int _table2_##val = _tmp_##val; \	905 static const int _table2_##val = _tmp_##val; \

917 static_assert( \	906 static_assert( \

918 _table1_##val == _table2_##val, \	907 _table1_##val == _table2_##val, \

919 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");	908 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");

920 ICMPX8664_TABLE	909 ICMPX8664_TABLE

921 #undef X	910 #undef X

922 // Repeat the static asserts with respect to the high-level table	911 // Repeat the static asserts with respect to the high-level table entries in

923 // entries in case the high-level table has extra entries.	912 // case the high-level table has extra entries.

924 #define X(tag, str) \	913 #define X(tag, str) \

925 static_assert( \	914 static_assert( \

926 _table1_##tag == _table2_##tag, \	915 _table1_##tag == _table2_##tag, \

927 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");	916 "Inconsistency between ICMPX8664_TABLE and ICEINSTICMP_TABLE");

928 ICEINSTICMP_TABLE	917 ICEINSTICMP_TABLE

929 #undef X	918 #undef X

930 } // end of namespace dummy2	919 } // end of namespace dummy2

931	920

932 // Validate the enum values in ICETYPEX8664_TABLE.	921 // Validate the enum values in ICETYPEX8664_TABLE.

933 namespace dummy3 {	922 namespace dummy3 {

934 // Define a temporary set of enum values based on low-level table	923 // Define a temporary set of enum values based on low-level table entries.

935 // entries.

936 enum _tmp_enum {	924 enum _tmp_enum {

937 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,	925 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,

938 ICETYPEX8664_TABLE	926 ICETYPEX8664_TABLE

939 #undef X	927 #undef X

940 _num	928 _num

941 };	929 };

942 // Define a set of constants based on high-level table entries.	930 // Define a set of constants based on high-level table entries.

943 #define X(tag, sizeLog2, align, elts, elty, str) \	931 #define X(tag, sizeLog2, align, elts, elty, str) \

944 static const int _table1_##tag = tag;	932 static const int _table1_##tag = tag;

945 ICETYPE_TABLE	933 ICETYPE_TABLE

946 #undef X	934 #undef X

947 // Define a set of constants based on low-level table entries, and	935 // Define a set of constants based on low-level table entries, and ensure the

948 // ensure the table entry keys are consistent.	936 // table entry keys are consistent.

949 #define X(tag, elementty, cvt, sdss, pack, width, fld) \	937 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

950 static const int _table2_##tag = _tmp_##tag; \	938 static const int _table2_##tag = _tmp_##tag; \

951 static_assert(_table1_##tag == _table2_##tag, \	939 static_assert(_table1_##tag == _table2_##tag, \

952 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");	940 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");

953 ICETYPEX8664_TABLE	941 ICETYPEX8664_TABLE

954 #undef X	942 #undef X

955 // Repeat the static asserts with respect to the high-level table	943 // Repeat the static asserts with respect to the high-level table entries in

956 // entries in case the high-level table has extra entries.	944 // case the high-level table has extra entries.

957 #define X(tag, sizeLog2, align, elts, elty, str) \	945 #define X(tag, sizeLog2, align, elts, elty, str) \

958 static_assert(_table1_##tag == _table2_##tag, \	946 static_assert(_table1_##tag == _table2_##tag, \

959 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");	947 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");

960 ICETYPE_TABLE	948 ICETYPE_TABLE

961 #undef X	949 #undef X

962 } // end of namespace dummy3	950 } // end of namespace dummy3

963 } // end of anonymous namespace	951 } // end of anonymous namespace

964	952

965 } // end of namespace Ice	953 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632Traits.h ('k') | src/IceTargetLoweringX8664Traits.h » ('j') | no next file with comments »