OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
12 // high-level instruction. It also implements | 12 // high-level instruction. It also implements |
13 // TargetX8632Fast::postLower() which does the simplest possible | 13 // TargetX8632Fast::postLower() which does the simplest possible |
14 // register allocation for the "fast" target. | 14 // register allocation for the "fast" target. |
15 // | 15 // |
16 //===----------------------------------------------------------------------===// | 16 //===----------------------------------------------------------------------===// |
17 | 17 |
18 #include "IceDefs.h" | 18 #include "IceDefs.h" |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
21 #include "IceInstX8632.h" | 21 #include "IceInstX8632.h" |
22 #include "IceOperand.h" | 22 #include "IceOperand.h" |
23 #include "IceTargetLoweringX8632.def" | 23 #include "IceTargetLoweringX8632.def" |
24 #include "IceTargetLoweringX8632.h" | 24 #include "IceTargetLoweringX8632.h" |
| 25 #include "llvm/ADT/DenseMap.h" |
25 #include "llvm/Support/CommandLine.h" | 26 #include "llvm/Support/CommandLine.h" |
26 | 27 |
27 #include <strings.h> | 28 #include <strings.h> |
28 | 29 |
29 namespace Ice { | 30 namespace Ice { |
30 | 31 |
31 namespace { | 32 namespace { |
32 | 33 |
33 // The following table summarizes the logic for lowering the fcmp | 34 // The following table summarizes the logic for lowering the fcmp |
34 // instruction. There is one table entry for each of the 16 conditions. | 35 // instruction. There is one table entry for each of the 16 conditions. |
(...skipping 4122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4157 else | 4158 else |
4158 Reg->setRegNum(RegNum); | 4159 Reg->setRegNum(RegNum); |
4159 return Reg; | 4160 return Reg; |
4160 } | 4161 } |
4161 | 4162 |
4162 void TargetX8632::postLower() { | 4163 void TargetX8632::postLower() { |
4163 if (Ctx->getOptLevel() != Opt_m1) | 4164 if (Ctx->getOptLevel() != Opt_m1) |
4164 return; | 4165 return; |
4165 // TODO: Avoid recomputing WhiteList every instruction. | 4166 // TODO: Avoid recomputing WhiteList every instruction. |
4166 RegSetMask RegInclude = RegSet_All; | 4167 RegSetMask RegInclude = RegSet_All; |
4167 RegSetMask RegExclude = RegSet_None; | 4168 RegSetMask RegExclude = RegSet_StackPointer; |
4168 if (hasFramePointer()) | 4169 if (hasFramePointer()) |
4169 RegExclude |= RegSet_FramePointer; | 4170 RegExclude |= RegSet_FramePointer; |
4170 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude); | 4171 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude); |
4171 // Make one pass to black-list pre-colored registers. TODO: If | 4172 // Make one pass to black-list pre-colored registers. TODO: If |
4172 // there was some prior register allocation pass that made register | 4173 // there was some prior register allocation pass that made register |
4173 // assignments, those registers need to be black-listed here as | 4174 // assignments, those registers need to be black-listed here as |
4174 // well. | 4175 // well. |
| 4176 llvm::DenseMap<const Variable *, const Inst *> LastUses; |
| 4177 // The first pass also keeps track of which instruction is the last |
| 4178 // use for each infinite-weight variable. After the last use, the |
| 4179 // variable is released to the free list. |
4175 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | 4180 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; |
4176 ++I) { | 4181 ++I) { |
4177 const Inst *Inst = *I; | 4182 const Inst *Inst = *I; |
4178 if (Inst->isDeleted()) | 4183 if (Inst->isDeleted()) |
4179 continue; | 4184 continue; |
| 4185 // Don't consider a FakeKill instruction, because (currently) it |
| 4186 // is only used to kill all scratch registers at a call site, and |
| 4187 // we don't want to black-list all scratch registers during the |
| 4188 // call lowering. This could become a problem since it relies on |
| 4189 // the lowering sequence not keeping any infinite-weight variables |
| 4190 // live across a call. TODO(stichnot): Consider replacing this |
| 4191 // whole postLower() implementation with a robust local register |
| 4192 // allocator, for example compute live ranges only for pre-colored |
| 4193 // and infinite-weight variables and run the existing linear-scan |
| 4194 // allocator. |
4180 if (llvm::isa<InstFakeKill>(Inst)) | 4195 if (llvm::isa<InstFakeKill>(Inst)) |
4181 continue; | 4196 continue; |
4182 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | 4197 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { |
4183 Operand *Src = Inst->getSrc(SrcNum); | 4198 Operand *Src = Inst->getSrc(SrcNum); |
4184 SizeT NumVars = Src->getNumVars(); | 4199 SizeT NumVars = Src->getNumVars(); |
4185 for (SizeT J = 0; J < NumVars; ++J) { | 4200 for (SizeT J = 0; J < NumVars; ++J) { |
4186 const Variable *Var = Src->getVar(J); | 4201 const Variable *Var = Src->getVar(J); |
| 4202 // Track last uses of all variables, regardless of whether |
| 4203 // they are pre-colored or infinite-weight. |
| 4204 LastUses[Var] = Inst; |
4187 if (!Var->hasReg()) | 4205 if (!Var->hasReg()) |
4188 continue; | 4206 continue; |
4189 WhiteList[Var->getRegNum()] = false; | 4207 WhiteList[Var->getRegNum()] = false; |
4190 } | 4208 } |
4191 } | 4209 } |
4192 } | 4210 } |
4193 // The second pass colors infinite-weight variables. | 4211 // The second pass colors infinite-weight variables. |
4194 llvm::SmallBitVector AvailableRegisters = WhiteList; | 4212 llvm::SmallBitVector AvailableRegisters = WhiteList; |
| 4213 llvm::SmallBitVector FreedRegisters(WhiteList.size()); |
4195 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | 4214 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; |
4196 ++I) { | 4215 ++I) { |
| 4216 FreedRegisters.reset(); |
4197 const Inst *Inst = *I; | 4217 const Inst *Inst = *I; |
4198 if (Inst->isDeleted()) | 4218 if (Inst->isDeleted()) |
4199 continue; | 4219 continue; |
4200 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | 4220 // Skip FakeKill instructions like above. |
4201 Operand *Src = Inst->getSrc(SrcNum); | 4221 if (llvm::isa<InstFakeKill>(Inst)) |
| 4222 continue; |
| 4223 // Iterate over all variables referenced in the instruction, |
| 4224 // including the Dest variable (if any). If the variable is |
| 4225 // marked as infinite-weight, find it a register. If this |
| 4226 // instruction is the last use of the variable in the lowered |
| 4227 // sequence, release the register to the free list after this |
| 4228 // instruction is completely processed. Note that the first pass |
| 4229 // ignores the Dest operand, under the assumption that a |
| 4230 // pre-colored Dest will appear as a source operand in some |
| 4231 // subsequent instruction in the lowered sequence. |
| 4232 Variable *Dest = Inst->getDest(); |
| 4233 SizeT NumSrcs = Inst->getSrcSize(); |
| 4234 if (Dest) |
| 4235 ++NumSrcs; |
| 4236 OperandList Srcs(NumSrcs); |
| 4237 for (SizeT i = 0; i < Inst->getSrcSize(); ++i) |
| 4238 Srcs[i] = Inst->getSrc(i); |
| 4239 if (Dest) |
| 4240 Srcs[NumSrcs - 1] = Dest; |
| 4241 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) { |
| 4242 Operand *Src = Srcs[SrcNum]; |
4202 SizeT NumVars = Src->getNumVars(); | 4243 SizeT NumVars = Src->getNumVars(); |
4203 for (SizeT J = 0; J < NumVars; ++J) { | 4244 for (SizeT J = 0; J < NumVars; ++J) { |
4204 Variable *Var = Src->getVar(J); | 4245 Variable *Var = Src->getVar(J); |
4205 if (Var->hasReg()) | 4246 if (!Var->hasReg() && Var->getWeight().isInf()) { |
4206 continue; | 4247 llvm::SmallBitVector AvailableTypedRegisters = |
4207 if (!Var->getWeight().isInf()) | |
4208 continue; | |
4209 llvm::SmallBitVector AvailableTypedRegisters = | |
4210 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
4211 if (!AvailableTypedRegisters.any()) { | |
4212 // This is a hack in case we run out of physical registers due | |
4213 // to an excessively long code sequence, as might happen when | |
4214 // lowering arguments in lowerCall(). | |
4215 AvailableRegisters = WhiteList; | |
4216 AvailableTypedRegisters = | |
4217 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4248 AvailableRegisters & getRegisterSetForType(Var->getType()); |
| 4249 assert(AvailableTypedRegisters.any()); |
| 4250 int32_t RegNum = AvailableTypedRegisters.find_first(); |
| 4251 Var->setRegNum(RegNum); |
| 4252 AvailableRegisters[RegNum] = false; |
4218 } | 4253 } |
4219 assert(AvailableTypedRegisters.any()); | 4254 if (Var->hasReg()) { |
4220 int32_t RegNum = AvailableTypedRegisters.find_first(); | 4255 int32_t RegNum = Var->getRegNum(); |
4221 Var->setRegNum(RegNum); | 4256 assert(!AvailableRegisters[RegNum]); |
4222 AvailableRegisters[RegNum] = false; | 4257 if (LastUses[Var] == Inst) { |
| 4258 if (WhiteList[RegNum]) |
| 4259 FreedRegisters[RegNum] = true; |
| 4260 } |
| 4261 } |
4223 } | 4262 } |
4224 } | 4263 } |
| 4264 AvailableRegisters |= FreedRegisters; |
4225 } | 4265 } |
4226 } | 4266 } |
4227 | 4267 |
4228 template <> void ConstantInteger::emit(GlobalContext *Ctx) const { | 4268 template <> void ConstantInteger::emit(GlobalContext *Ctx) const { |
4229 Ostream &Str = Ctx->getStrEmit(); | 4269 Ostream &Str = Ctx->getStrEmit(); |
4230 Str << (int64_t) getValue(); | 4270 Str << (int64_t) getValue(); |
4231 } | 4271 } |
4232 | 4272 |
4233 template <> void ConstantFloat::emit(GlobalContext *Ctx) const { | 4273 template <> void ConstantFloat::emit(GlobalContext *Ctx) const { |
4234 Ostream &Str = Ctx->getStrEmit(); | 4274 Ostream &Str = Ctx->getStrEmit(); |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4336 Str << "\t.align\t" << Align << "\n"; | 4376 Str << "\t.align\t" << Align << "\n"; |
4337 Str << MangledName << ":\n"; | 4377 Str << MangledName << ":\n"; |
4338 for (SizeT i = 0; i < Size; ++i) { | 4378 for (SizeT i = 0; i < Size; ++i) { |
4339 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4379 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4340 } | 4380 } |
4341 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4381 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4342 } | 4382 } |
4343 } | 4383 } |
4344 | 4384 |
4345 } // end of namespace Ice | 4385 } // end of namespace Ice |
OLD | NEW |