src/IceTargetLoweringX8632.cpp - Issue 1024203002: Move some flag-like props from GlobalContext and TargetLowering to ClFlags.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1024203002: Move some flag-like props from GlobalContext and TargetLowering to ClFlags. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: review / clean up formatting Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction.	12 // high-level instruction.

13 //	13 //

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15	15

16 #include "llvm/Support/CommandLine.h"

17 #include "llvm/Support/MathExtras.h"	16 #include "llvm/Support/MathExtras.h"

18	17

19 #include "IceCfg.h"	18 #include "IceCfg.h"

20 #include "IceCfgNode.h"	19 #include "IceCfgNode.h"

21 #include "IceClFlags.h"	20 #include "IceClFlags.h"

22 #include "IceDefs.h"	21 #include "IceDefs.h"

23 #include "IceELFObjectWriter.h"	22 #include "IceELFObjectWriter.h"

24 #include "IceGlobalInits.h"	23 #include "IceGlobalInits.h"

25 #include "IceInstX8632.h"	24 #include "IceInstX8632.h"

26 #include "IceLiveness.h"	25 #include "IceLiveness.h"

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
146 assert((Alignment & (Alignment - 1)) == 0);	145 assert((Alignment & (Alignment - 1)) == 0);

147 return (Value + Alignment - 1) & -Alignment;	146 return (Value + Alignment - 1) & -Alignment;

148 }	147 }

149	148

150 // Value is in bytes. Return Value adjusted to the next highest multiple	149 // Value is in bytes. Return Value adjusted to the next highest multiple

151 // of the stack alignment.	150 // of the stack alignment.

152 uint32_t applyStackAlignment(uint32_t Value) {	151 uint32_t applyStackAlignment(uint32_t Value) {

153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);	152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

154 }	153 }

155	154

156 // Instruction set options

157 namespace cl = ::llvm::cl;

158 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(

159 "mattr", cl::desc("X86 target attributes"), cl::init(TargetX8632::SSE2),

160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2",

161 "Enable SSE2 instructions (default)"),

162 clEnumValN(TargetX8632::SSE4_1, "sse4.1",

163 "Enable SSE 4.1 instructions"),

164 clEnumValEnd));

165

166 // In some cases, there are x-macros tables for both high-level and	155 // In some cases, there are x-macros tables for both high-level and

167 // low-level instructions/operands that use the same enum key value.	156 // low-level instructions/operands that use the same enum key value.

168 // The tables are kept separate to maintain a proper separation	157 // The tables are kept separate to maintain a proper separation

169 // between abstraction layers. There is a risk that the tables could	158 // between abstraction layers. There is a risk that the tables could

170 // get out of sync if enum values are reordered or if entries are	159 // get out of sync if enum values are reordered or if entries are

171 // added or deleted. The following dummy namespaces use	160 // added or deleted. The following dummy namespaces use

172 // static_asserts to ensure everything is kept in sync.	161 // static_asserts to ensure everything is kept in sync.

173	162

174 // Validate the enum values in FCMPX8632_TABLE.	163 // Validate the enum values in FCMPX8632_TABLE.

175 namespace dummy1 {	164 namespace dummy1 {

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
265 #define X(tag, size, align, elts, elty, str) \	254 #define X(tag, size, align, elts, elty, str) \

266 static_assert(_table1_##tag == _table2_##tag, \	255 static_assert(_table1_##tag == _table2_##tag, \

267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");	256 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

268 ICETYPE_TABLE	257 ICETYPE_TABLE

269 #undef X	258 #undef X

270 } // end of namespace dummy3	259 } // end of namespace dummy3

271	260

272 } // end of anonymous namespace	261 } // end of anonymous namespace

273	262

274 TargetX8632::TargetX8632(Cfg *Func)	263 TargetX8632::TargetX8632(Cfg *Func)

275 : TargetLowering(Func), InstructionSet(CLInstructionSet),	264 : TargetLowering(Func),

	265 InstructionSet(static_cast<X86InstructionSet>(

	266 Func->getContext()->getFlags().getTargetInstructionSet() -

	267 TargetInstructionSet::X86InstructionSet_Begin)),

276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),	268 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),

277 SpillAreaSizeBytes(0), NextLabelNumber(0) {	269 SpillAreaSizeBytes(0), NextLabelNumber(0) {

	270 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==

	271 (TargetInstructionSet::X86InstructionSet_End -

	272 TargetInstructionSet::X86InstructionSet_Begin),

	273 "X86InstructionSet range different from TargetInstructionSet");

278 // TODO: Don't initialize IntegerRegisters and friends every time.	274 // TODO: Don't initialize IntegerRegisters and friends every time.

279 // Instead, initialize in some sort of static initializer for the	275 // Instead, initialize in some sort of static initializer for the

280 // class.	276 // class.

281 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);	277 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);

282 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);	278 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);

283 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);	279 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);

284 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);	280 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);

285 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);	281 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);

286 ScratchRegs.resize(RegX8632::Reg_NUM);	282 ScratchRegs.resize(RegX8632::Reg_NUM);

287 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	283 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

(...skipping 105 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
393	389

394 // Branch optimization. This needs to be done just before code	390 // Branch optimization. This needs to be done just before code

395 // emission. In particular, no transformations that insert or	391 // emission. In particular, no transformations that insert or

396 // reorder CfgNodes should be done after branch optimization. We go	392 // reorder CfgNodes should be done after branch optimization. We go

397 // ahead and do it before nop insertion to reduce the amount of work	393 // ahead and do it before nop insertion to reduce the amount of work

398 // needed for searching for opportunities.	394 // needed for searching for opportunities.

399 Func->doBranchOpt();	395 Func->doBranchOpt();

400 Func->dump("After branch optimization");	396 Func->dump("After branch optimization");

401	397

402 // Nop insertion	398 // Nop insertion

403 if (shouldDoNopInsertion()) {	399 if (Ctx->getFlags().shouldDoNopInsertion()) {

404 Func->doNopInsertion();	400 Func->doNopInsertion();

405 }	401 }

406 }	402 }

407	403

408 void TargetX8632::translateOm1() {	404 void TargetX8632::translateOm1() {

409 TimerMarker T(TimerStack::TT_Om1, Func);	405 TimerMarker T(TimerStack::TT_Om1, Func);

410	406

411 Func->placePhiLoads();	407 Func->placePhiLoads();

412 if (Func->hasError())	408 if (Func->hasError())

413 return;	409 return;

(...skipping 16 matching lines...) Expand all Loading...
430 if (Func->hasError())	426 if (Func->hasError())

431 return;	427 return;

432 Func->dump("After regalloc of infinite-weight variables");	428 Func->dump("After regalloc of infinite-weight variables");

433	429

434 Func->genFrame();	430 Func->genFrame();

435 if (Func->hasError())	431 if (Func->hasError())

436 return;	432 return;

437 Func->dump("After stack frame mapping");	433 Func->dump("After stack frame mapping");

438	434

439 // Nop insertion	435 // Nop insertion

440 if (shouldDoNopInsertion()) {	436 if (Ctx->getFlags().shouldDoNopInsertion()) {

441 Func->doNopInsertion();	437 Func->doNopInsertion();

442 }	438 }

443 }	439 }

444	440

445 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {	441 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {

446 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {	442 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

447 return Br->optimizeBranch(NextNode);	443 return Br->optimizeBranch(NextNode);

448 }	444 }

449 return false;	445 return false;

450 }	446 }

(...skipping 2768 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3219 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());	3215 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());

3220 Variable *DesiredReg = legalizeToVar(Desired);	3216 Variable *DesiredReg = legalizeToVar(Desired);

3221 const bool Locked = true;	3217 const bool Locked = true;

3222 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	3218 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3223 _mov(DestPrev, T_eax);	3219 _mov(DestPrev, T_eax);

3224 }	3220 }

3225	3221

3226 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,	3222 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,

3227 Operand *Expected,	3223 Operand *Expected,

3228 Operand *Desired) {	3224 Operand *Desired) {

3229 if (Ctx->getOptLevel() == Opt_m1)	3225 if (Ctx->getFlags().getOptLevel() == Opt_m1)

3230 return false;	3226 return false;

3231 // Peek ahead a few instructions and see how Dest is used.	3227 // Peek ahead a few instructions and see how Dest is used.

3232 // It's very common to have:	3228 // It's very common to have:

3233 //	3229 //

3234 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)	3230 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)

3235 // [%y_phi = ...] // list of phi stores	3231 // [%y_phi = ...] // list of phi stores

3236 // %p = icmp eq i32 %x, %expected	3232 // %p = icmp eq i32 %x, %expected

3237 // br i1 %p, label %l1, label %l2	3233 // br i1 %p, label %l1, label %l2

3238 //	3234 //

3239 // which we can optimize into:	3235 // which we can optimize into:

(...skipping 1292 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4532 assert(Type != IceType_i64);	4528 assert(Type != IceType_i64);

4533 Variable *Reg = Func->makeVariable(Type);	4529 Variable *Reg = Func->makeVariable(Type);

4534 if (RegNum == Variable::NoRegister)	4530 if (RegNum == Variable::NoRegister)

4535 Reg->setWeightInfinite();	4531 Reg->setWeightInfinite();

4536 else	4532 else

4537 Reg->setRegNum(RegNum);	4533 Reg->setRegNum(RegNum);

4538 return Reg;	4534 return Reg;

4539 }	4535 }

4540	4536

4541 void TargetX8632::postLower() {	4537 void TargetX8632::postLower() {

4542 if (Ctx->getOptLevel() == Opt_m1)	4538 if (Ctx->getFlags().getOptLevel() == Opt_m1)

4543 return;	4539 return;

4544 // Find two-address non-SSA instructions where Dest==Src0, and set	4540 // Find two-address non-SSA instructions where Dest==Src0, and set

4545 // the DestNonKillable flag to keep liveness analysis consistent.	4541 // the DestNonKillable flag to keep liveness analysis consistent.

4546 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {	4542 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) {

4547 if (Inst->isDeleted())	4543 if (Inst->isDeleted())

4548 continue;	4544 continue;

4549 if (Variable *Dest = Inst->getDest()) {	4545 if (Variable *Dest = Inst->getDest()) {

4550 // TODO(stichnot): We may need to consider all source	4546 // TODO(stichnot): We may need to consider all source

4551 // operands, not just the first one, if using 3-address	4547 // operands, not just the first one, if using 3-address

4552 // instructions.	4548 // instructions.

(...skipping 294 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4847 case FT_Asm:	4843 case FT_Asm:

4848 case FT_Iasm: {	4844 case FT_Iasm: {

4849 OstreamLocker L(Ctx);	4845 OstreamLocker L(Ctx);

4850 emitConstantPool<PoolTypeConverter<float>>(Ctx);	4846 emitConstantPool<PoolTypeConverter<float>>(Ctx);

4851 emitConstantPool<PoolTypeConverter<double>>(Ctx);	4847 emitConstantPool<PoolTypeConverter<double>>(Ctx);

4852 } break;	4848 } break;

4853 }	4849 }

4854 }	4850 }

4855	4851

4856 } // end of namespace Ice	4852 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTypes.h » ('j') | no next file with comments »