| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| 11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
| 12 // high-level instruction. | 12 // high-level instruction. |
| 13 // | 13 // |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #include "llvm/Support/CommandLine.h" | |
| 17 #include "llvm/Support/MathExtras.h" | 16 #include "llvm/Support/MathExtras.h" |
| 18 | 17 |
| 19 #include "IceCfg.h" | 18 #include "IceCfg.h" |
| 20 #include "IceCfgNode.h" | 19 #include "IceCfgNode.h" |
| 21 #include "IceClFlags.h" | 20 #include "IceClFlags.h" |
| 22 #include "IceDefs.h" | 21 #include "IceDefs.h" |
| 23 #include "IceELFObjectWriter.h" | 22 #include "IceELFObjectWriter.h" |
| 24 #include "IceGlobalInits.h" | 23 #include "IceGlobalInits.h" |
| 25 #include "IceInstX8632.h" | 24 #include "IceInstX8632.h" |
| 26 #include "IceLiveness.h" | 25 #include "IceLiveness.h" |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 146 assert((Alignment & (Alignment - 1)) == 0); | 145 assert((Alignment & (Alignment - 1)) == 0); |
| 147 return (Value + Alignment - 1) & -Alignment; | 146 return (Value + Alignment - 1) & -Alignment; |
| 148 } | 147 } |
| 149 | 148 |
| 150 // Value is in bytes. Return Value adjusted to the next highest multiple | 149 // Value is in bytes. Return Value adjusted to the next highest multiple |
| 151 // of the stack alignment. | 150 // of the stack alignment. |
| 152 uint32_t applyStackAlignment(uint32_t Value) { | 151 uint32_t applyStackAlignment(uint32_t Value) { |
| 153 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 152 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| 154 } | 153 } |
| 155 | 154 |
| 156 // Instruction set options | |
| 157 namespace cl = ::llvm::cl; | |
| 158 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | |
| 159 "mattr", cl::desc("X86 target attributes"), cl::init(TargetX8632::SSE2), | |
| 160 cl::values(clEnumValN(TargetX8632::SSE2, "sse2", | |
| 161 "Enable SSE2 instructions (default)"), | |
| 162 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | |
| 163 "Enable SSE 4.1 instructions"), | |
| 164 clEnumValEnd)); | |
| 165 | |
| 166 // In some cases, there are x-macros tables for both high-level and | 155 // In some cases, there are x-macros tables for both high-level and |
| 167 // low-level instructions/operands that use the same enum key value. | 156 // low-level instructions/operands that use the same enum key value. |
| 168 // The tables are kept separate to maintain a proper separation | 157 // The tables are kept separate to maintain a proper separation |
| 169 // between abstraction layers. There is a risk that the tables could | 158 // between abstraction layers. There is a risk that the tables could |
| 170 // get out of sync if enum values are reordered or if entries are | 159 // get out of sync if enum values are reordered or if entries are |
| 171 // added or deleted. The following dummy namespaces use | 160 // added or deleted. The following dummy namespaces use |
| 172 // static_asserts to ensure everything is kept in sync. | 161 // static_asserts to ensure everything is kept in sync. |
| 173 | 162 |
| 174 // Validate the enum values in FCMPX8632_TABLE. | 163 // Validate the enum values in FCMPX8632_TABLE. |
| 175 namespace dummy1 { | 164 namespace dummy1 { |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 265 #define X(tag, size, align, elts, elty, str) \ | 254 #define X(tag, size, align, elts, elty, str) \ |
| 266 static_assert(_table1_##tag == _table2_##tag, \ | 255 static_assert(_table1_##tag == _table2_##tag, \ |
| 267 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 256 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 268 ICETYPE_TABLE | 257 ICETYPE_TABLE |
| 269 #undef X | 258 #undef X |
| 270 } // end of namespace dummy3 | 259 } // end of namespace dummy3 |
| 271 | 260 |
| 272 } // end of anonymous namespace | 261 } // end of anonymous namespace |
| 273 | 262 |
| 274 TargetX8632::TargetX8632(Cfg *Func) | 263 TargetX8632::TargetX8632(Cfg *Func) |
| 275 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 264 : TargetLowering(Func), |
| 265 InstructionSet(static_cast<X86InstructionSet>( |
| 266 Func->getContext()->getFlags().getTargetInstructionSet() - |
| 267 TargetInstructionSet::X86InstructionSet_Begin)), |
| 276 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 268 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
| 277 SpillAreaSizeBytes(0), NextLabelNumber(0) { | 269 SpillAreaSizeBytes(0), NextLabelNumber(0) { |
| 270 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) == |
| 271 (TargetInstructionSet::X86InstructionSet_End - |
| 272 TargetInstructionSet::X86InstructionSet_Begin), |
| 273 "X86InstructionSet range different from TargetInstructionSet"); |
| 278 // TODO: Don't initialize IntegerRegisters and friends every time. | 274 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 279 // Instead, initialize in some sort of static initializer for the | 275 // Instead, initialize in some sort of static initializer for the |
| 280 // class. | 276 // class. |
| 281 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); | 277 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); |
| 282 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); | 278 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); |
| 283 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); | 279 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); |
| 284 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); | 280 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); |
| 285 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); | 281 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); |
| 286 ScratchRegs.resize(RegX8632::Reg_NUM); | 282 ScratchRegs.resize(RegX8632::Reg_NUM); |
| 287 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ | 283 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
| (...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 393 | 389 |
| 394 // Branch optimization. This needs to be done just before code | 390 // Branch optimization. This needs to be done just before code |
| 395 // emission. In particular, no transformations that insert or | 391 // emission. In particular, no transformations that insert or |
| 396 // reorder CfgNodes should be done after branch optimization. We go | 392 // reorder CfgNodes should be done after branch optimization. We go |
| 397 // ahead and do it before nop insertion to reduce the amount of work | 393 // ahead and do it before nop insertion to reduce the amount of work |
| 398 // needed for searching for opportunities. | 394 // needed for searching for opportunities. |
| 399 Func->doBranchOpt(); | 395 Func->doBranchOpt(); |
| 400 Func->dump("After branch optimization"); | 396 Func->dump("After branch optimization"); |
| 401 | 397 |
| 402 // Nop insertion | 398 // Nop insertion |
| 403 if (shouldDoNopInsertion()) { | 399 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 404 Func->doNopInsertion(); | 400 Func->doNopInsertion(); |
| 405 } | 401 } |
| 406 } | 402 } |
| 407 | 403 |
| 408 void TargetX8632::translateOm1() { | 404 void TargetX8632::translateOm1() { |
| 409 TimerMarker T(TimerStack::TT_Om1, Func); | 405 TimerMarker T(TimerStack::TT_Om1, Func); |
| 410 | 406 |
| 411 Func->placePhiLoads(); | 407 Func->placePhiLoads(); |
| 412 if (Func->hasError()) | 408 if (Func->hasError()) |
| 413 return; | 409 return; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 430 if (Func->hasError()) | 426 if (Func->hasError()) |
| 431 return; | 427 return; |
| 432 Func->dump("After regalloc of infinite-weight variables"); | 428 Func->dump("After regalloc of infinite-weight variables"); |
| 433 | 429 |
| 434 Func->genFrame(); | 430 Func->genFrame(); |
| 435 if (Func->hasError()) | 431 if (Func->hasError()) |
| 436 return; | 432 return; |
| 437 Func->dump("After stack frame mapping"); | 433 Func->dump("After stack frame mapping"); |
| 438 | 434 |
| 439 // Nop insertion | 435 // Nop insertion |
| 440 if (shouldDoNopInsertion()) { | 436 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 441 Func->doNopInsertion(); | 437 Func->doNopInsertion(); |
| 442 } | 438 } |
| 443 } | 439 } |
| 444 | 440 |
| 445 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 441 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 446 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 442 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
| 447 return Br->optimizeBranch(NextNode); | 443 return Br->optimizeBranch(NextNode); |
| 448 } | 444 } |
| 449 return false; | 445 return false; |
| 450 } | 446 } |
| (...skipping 2768 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3219 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 3215 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
| 3220 Variable *DesiredReg = legalizeToVar(Desired); | 3216 Variable *DesiredReg = legalizeToVar(Desired); |
| 3221 const bool Locked = true; | 3217 const bool Locked = true; |
| 3222 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3218 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
| 3223 _mov(DestPrev, T_eax); | 3219 _mov(DestPrev, T_eax); |
| 3224 } | 3220 } |
| 3225 | 3221 |
| 3226 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | 3222 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, |
| 3227 Operand *Expected, | 3223 Operand *Expected, |
| 3228 Operand *Desired) { | 3224 Operand *Desired) { |
| 3229 if (Ctx->getOptLevel() == Opt_m1) | 3225 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 3230 return false; | 3226 return false; |
| 3231 // Peek ahead a few instructions and see how Dest is used. | 3227 // Peek ahead a few instructions and see how Dest is used. |
| 3232 // It's very common to have: | 3228 // It's very common to have: |
| 3233 // | 3229 // |
| 3234 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) | 3230 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...) |
| 3235 // [%y_phi = ...] // list of phi stores | 3231 // [%y_phi = ...] // list of phi stores |
| 3236 // %p = icmp eq i32 %x, %expected | 3232 // %p = icmp eq i32 %x, %expected |
| 3237 // br i1 %p, label %l1, label %l2 | 3233 // br i1 %p, label %l1, label %l2 |
| 3238 // | 3234 // |
| 3239 // which we can optimize into: | 3235 // which we can optimize into: |
| (...skipping 1292 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4532 assert(Type != IceType_i64); | 4528 assert(Type != IceType_i64); |
| 4533 Variable *Reg = Func->makeVariable(Type); | 4529 Variable *Reg = Func->makeVariable(Type); |
| 4534 if (RegNum == Variable::NoRegister) | 4530 if (RegNum == Variable::NoRegister) |
| 4535 Reg->setWeightInfinite(); | 4531 Reg->setWeightInfinite(); |
| 4536 else | 4532 else |
| 4537 Reg->setRegNum(RegNum); | 4533 Reg->setRegNum(RegNum); |
| 4538 return Reg; | 4534 return Reg; |
| 4539 } | 4535 } |
| 4540 | 4536 |
| 4541 void TargetX8632::postLower() { | 4537 void TargetX8632::postLower() { |
| 4542 if (Ctx->getOptLevel() == Opt_m1) | 4538 if (Ctx->getFlags().getOptLevel() == Opt_m1) |
| 4543 return; | 4539 return; |
| 4544 // Find two-address non-SSA instructions where Dest==Src0, and set | 4540 // Find two-address non-SSA instructions where Dest==Src0, and set |
| 4545 // the DestNonKillable flag to keep liveness analysis consistent. | 4541 // the DestNonKillable flag to keep liveness analysis consistent. |
| 4546 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { | 4542 for (auto Inst = Context.getCur(), E = Context.getNext(); Inst != E; ++Inst) { |
| 4547 if (Inst->isDeleted()) | 4543 if (Inst->isDeleted()) |
| 4548 continue; | 4544 continue; |
| 4549 if (Variable *Dest = Inst->getDest()) { | 4545 if (Variable *Dest = Inst->getDest()) { |
| 4550 // TODO(stichnot): We may need to consider all source | 4546 // TODO(stichnot): We may need to consider all source |
| 4551 // operands, not just the first one, if using 3-address | 4547 // operands, not just the first one, if using 3-address |
| 4552 // instructions. | 4548 // instructions. |
| (...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4847 case FT_Asm: | 4843 case FT_Asm: |
| 4848 case FT_Iasm: { | 4844 case FT_Iasm: { |
| 4849 OstreamLocker L(Ctx); | 4845 OstreamLocker L(Ctx); |
| 4850 emitConstantPool<PoolTypeConverter<float>>(Ctx); | 4846 emitConstantPool<PoolTypeConverter<float>>(Ctx); |
| 4851 emitConstantPool<PoolTypeConverter<double>>(Ctx); | 4847 emitConstantPool<PoolTypeConverter<double>>(Ctx); |
| 4852 } break; | 4848 } break; |
| 4853 } | 4849 } |
| 4854 } | 4850 } |
| 4855 | 4851 |
| 4856 } // end of namespace Ice | 4852 } // end of namespace Ice |
| OLD | NEW |