OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
12 // high-level instruction. | 12 // high-level instruction. |
13 // | 13 // |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 | 15 |
16 #include "llvm/Support/CommandLine.h" | |
17 #include "llvm/Support/MathExtras.h" | 16 #include "llvm/Support/MathExtras.h" |
18 | 17 |
19 #include "IceCfg.h" | 18 #include "IceCfg.h" |
20 #include "IceCfgNode.h" | 19 #include "IceCfgNode.h" |
21 #include "IceClFlags.h" | 20 #include "IceClFlags.h" |
22 #include "IceDefs.h" | 21 #include "IceDefs.h" |
23 #include "IceELFObjectWriter.h" | 22 #include "IceELFObjectWriter.h" |
24 #include "IceGlobalInits.h" | 23 #include "IceGlobalInits.h" |
25 #include "IceInstX8632.h" | 24 #include "IceInstX8632.h" |
26 #include "IceLiveness.h" | 25 #include "IceLiveness.h" |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
146 assert((Alignment & (Alignment - 1)) == 0); | 145 assert((Alignment & (Alignment - 1)) == 0); |
147 return (Value + Alignment - 1) & -Alignment; | 146 return (Value + Alignment - 1) & -Alignment; |
148 } | 147 } |
149 | 148 |
150 // Value is in bytes. Return Value adjusted to the next highest multiple | 149 // Value is in bytes. Return Value adjusted to the next highest multiple |
151 // of the stack alignment. | 150 // of the stack alignment. |
152 uint32_t applyStackAlignment(uint32_t Value) { | 151 uint32_t applyStackAlignment(uint32_t Value) { |
153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
154 } | 153 } |
155 | 154 |
156 // Instruction set options | |
157 namespace cl = ::llvm::cl; | |
158 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | |
159 "mattr", cl::desc("X86 target attributes"), cl::init(TargetX8632::SSE2), | |
160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2", | |
161 "Enable SSE2 instructions (default)"), | |
162 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | |
163 "Enable SSE 4.1 instructions"), | |
164 clEnumValEnd)); | |
165 | |
166 // In some cases, there are x-macros tables for both high-level and | 155 // In some cases, there are x-macros tables for both high-level and |
167 // low-level instructions/operands that use the same enum key value. | 156 // low-level instructions/operands that use the same enum key value. |
168 // The tables are kept separate to maintain a proper separation | 157 // The tables are kept separate to maintain a proper separation |
169 // between abstraction layers. There is a risk that the tables could | 158 // between abstraction layers. There is a risk that the tables could |
170 // get out of sync if enum values are reordered or if entries are | 159 // get out of sync if enum values are reordered or if entries are |
171 // added or deleted. The following dummy namespaces use | 160 // added or deleted. The following dummy namespaces use |
172 // static_asserts to ensure everything is kept in sync. | 161 // static_asserts to ensure everything is kept in sync. |
173 | 162 |
174 // Validate the enum values in FCMPX8632_TABLE. | 163 // Validate the enum values in FCMPX8632_TABLE. |
175 namespace dummy1 { | 164 namespace dummy1 { |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
265 #define X(tag, size, align, elts, elty, str) \ | 254 #define X(tag, size, align, elts, elty, str) \ |
266 static_assert(_table1_##tag == _table2_##tag, \ | 255 static_assert(_table1_##tag == _table2_##tag, \ |
267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 256 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
268 ICETYPE_TABLE | 257 ICETYPE_TABLE |
269 #undef X | 258 #undef X |
270 } // end of namespace dummy3 | 259 } // end of namespace dummy3 |
271 | 260 |
272 } // end of anonymous namespace | 261 } // end of anonymous namespace |
273 | 262 |
274 TargetX8632::TargetX8632(Cfg *Func) | 263 TargetX8632::TargetX8632(Cfg *Func) |
275 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 264 : TargetLowering(Func), |
| 265 InstructionSet(static_cast<X86InstructionSet>( |
| 266 Func->getContext()->getFlags().getTargetInstructionSet() - |
| 267 TargetInstructionSet::X86InstructionSet_Begin)), |
276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 268 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
277 SpillAreaSizeBytes(0), NextLabelNumber(0) { | 269 SpillAreaSizeBytes(0), NextLabelNumber(0) { |
| 270 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == |
| 271 (TargetInstructionSet::X86InstructionSet_End - |
| 272 TargetInstructionSet::X86InstructionSet_Begin), |
| 273 "X86InstructionSet range different from TargetInstructionSet"); |
278 // TODO: Don't initialize IntegerRegisters and friends every time. | 274 // TODO: Don't initialize IntegerRegisters and friends every time. |
279 // Instead, initialize in some sort of static initializer for the | 275 // Instead, initialize in some sort of static initializer for the |
280 // class. | 276 // class. |
281 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); | 277 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); |
282 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); | 278 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); |
283 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); | 279 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); |
284 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); | 280 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); |
285 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); | 281 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); |
286 ScratchRegs.resize(RegX8632::Reg_NUM); | 282 ScratchRegs.resize(RegX8632::Reg_NUM); |
287 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 283 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
393 | 389 |
394 // Branch optimization. This needs to be done just before code | 390 // Branch optimization. This needs to be done just before code |
395 // emission. In particular, no transformations that insert or | 391 // emission. In particular, no transformations that insert or |
396 // reorder CfgNodes should be done after branch optimization. We go | 392 // reorder CfgNodes should be done after branch optimization. We go |
397 // ahead and do it before nop insertion to reduce the amount of work | 393 // ahead and do it before nop insertion to reduce the amount of work |
398 // needed for searching for opportunities. | 394 // needed for searching for opportunities. |
399 Func->doBranchOpt(); | 395 Func->doBranchOpt(); |
400 Func->dump("After branch optimization"); | 396 Func->dump("After branch optimization"); |
401 | 397 |
402 // Nop insertion | 398 // Nop insertion |
403 if (shouldDoNopInsertion()) { | 399 if (Ctx->getFlags().shouldDoNopInsertion()) { |
404 Func->doNopInsertion(); | 400 Func->doNopInsertion(); |
405 } | 401 } |
406 } | 402 } |
407 | 403 |
408 void TargetX8632::translateOm1() { | 404 void TargetX8632::translateOm1() { |
409 TimerMarker T(TimerStack::TT_Om1, Func); | 405 TimerMarker T(TimerStack::TT_Om1, Func); |
410 | 406 |
411 Func->placePhiLoads(); | 407 Func->placePhiLoads(); |
412 if (Func->hasError()) | 408 if (Func->hasError()) |
413 return; | 409 return; |
(...skipping 16 matching lines...) Expand all Loading... |
430 if (Func->hasError()) | 426 if (Func->hasError()) |
431 return; | 427 return; |
432 Func->dump("After regalloc of infinite-weight variables"); | 428 Func->dump("After regalloc of infinite-weight variables"); |
433 | 429 |
434 Func->genFrame(); | 430 Func->genFrame(); |
435 if (Func->hasError()) | 431 if (Func->hasError()) |
436 return; | 432 return; |
437 Func->dump("After stack frame mapping"); | 433 Func->dump("After stack frame mapping"); |
438 | 434 |
439 // Nop insertion | 435 // Nop insertion |
440 if (shouldDoNopInsertion()) { | 436 if (Ctx->getFlags().shouldDoNopInsertion()) { |
441 Func->doNopInsertion(); | 437 Func->doNopInsertion(); |
442 } | 438 } |
443 } | 439 } |
444 | 440 |
445 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 441 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
446 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 442 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
447 return Br->optimizeBranch(NextNode); | 443 return Br->optimizeBranch(NextNode); |
448 } | 444 } |
449 return false; | 445 return false; |
450 } | 446 } |
(...skipping 2768 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3219 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 3215 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
3220 Variable *DesiredReg = legalizeToVar(Desired); | 3216 Variable *DesiredReg = legalizeToVar(Desired); |
3221 const bool Locked = true; | 3217 const bool Locked = true; |
3222 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3218 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
3223 _mov(DestPrev, T_eax); | 3219 _mov(DestPrev, T_eax); |
3224 } | 3220 } |
3225 | 3221 |
3226 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | 3222 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, |
3227 Operand *Expected, | 3223 Operand *Expected, |
3228 Operand *Desired) { | 3224 Operand *Desired) { |
3229 if (Ctx->getOptLevel() == Opt_m1) | 3225 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
3230 return false; | 3226 return false; |
3231 // Peek ahead a few instructions and see how Dest is used. | 3227 // Peek ahead a few instructions and see how Dest is used. |
3232 // It's very common to have: | 3228 // It's very common to have: |
3233 // | 3229 // |
3234 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3230 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
3235 // [%y_phi = ...] // list of phi stores | 3231 // [%y_phi = ...] // list of phi stores |
3236 // %p = icmp eq i32 %x, %expected | 3232 // %p = icmp eq i32 %x, %expected |
3237 // br i1 %p, label %l1, label %l2 | 3233 // br i1 %p, label %l1, label %l2 |
3238 // | 3234 // |
3239 // which we can optimize into: | 3235 // which we can optimize into: |
(...skipping 1292 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4532 assert(Type != IceType_i64); | 4528 assert(Type != IceType_i64); |
4533 Variable *Reg = Func->makeVariable(Type); | 4529 Variable *Reg = Func->makeVariable(Type); |
4534 if (RegNum == Variable::NoRegister) | 4530 if (RegNum == Variable::NoRegister) |
4535 Reg->setWeightInfinite(); | 4531 Reg->setWeightInfinite(); |
4536 else | 4532 else |
4537 Reg->setRegNum(RegNum); | 4533 Reg->setRegNum(RegNum); |
4538 return Reg; | 4534 return Reg; |
4539 } | 4535 } |
4540 | 4536 |
4541 void TargetX8632::postLower() { | 4537 void TargetX8632::postLower() { |
4542 if (Ctx->getOptLevel() == Opt_m1) | 4538 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
4543 return; | 4539 return; |
4544 // Find two-address non-SSA instructions where Dest==Src0, and set | 4540 // Find two-address non-SSA instructions where Dest==Src0, and set |
4545 // the DestNonKillable flag to keep liveness analysis consistent. | 4541 // the DestNonKillable flag to keep liveness analysis consistent. |
4546 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { | 4542 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { |
4547 if (Inst->isDeleted()) | 4543 if (Inst->isDeleted()) |
4548 continue; | 4544 continue; |
4549 if (Variable *Dest = Inst->getDest()) { | 4545 if (Variable *Dest = Inst->getDest()) { |
4550 // TODO(stichnot): We may need to consider all source | 4546 // TODO(stichnot): We may need to consider all source |
4551 // operands, not just the first one, if using 3-address | 4547 // operands, not just the first one, if using 3-address |
4552 // instructions. | 4548 // instructions. |
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4847 case FT_Asm: | 4843 case FT_Asm: |
4848 case FT_Iasm: { | 4844 case FT_Iasm: { |
4849 OstreamLocker L(Ctx); | 4845 OstreamLocker L(Ctx); |
4850 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 4846 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
4851 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 4847 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
4852 } break; | 4848 } break; |
4853 } | 4849 } |
4854 } | 4850 } |
4855 | 4851 |
4856 } // end of namespace Ice | 4852 } // end of namespace Ice |
OLD | NEW |