| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 124 // | 124 // |
| 125 // * Stack arguments of vector type are aligned to start at the next highest | 125 // * Stack arguments of vector type are aligned to start at the next highest |
| 126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. | 126 // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. |
| 127 // | 127 // |
| 128 // This intends to match the section "IA-32 Function Calling Convention" of | 128 // This intends to match the section "IA-32 Function Calling Convention" of |
| 129 // the document "OS X ABI Function Call Guide" by Apple. | 129 // the document "OS X ABI Function Call Guide" by Apple. |
| 130 NeedsStackAlignment = true; | 130 NeedsStackAlignment = true; |
| 131 | 131 |
| 132 OperandList XmmArgs; | 132 OperandList XmmArgs; |
| 133 OperandList StackArgs, StackArgLocations; | 133 OperandList StackArgs, StackArgLocations; |
| 134 uint32_t ParameterAreaSizeBytes = 0; | 134 int32_t ParameterAreaSizeBytes = 0; |
| 135 | 135 |
| 136 // Classify each argument operand according to the location where the | 136 // Classify each argument operand according to the location where the |
| 137 // argument is passed. | 137 // argument is passed. |
| 138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 138 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 139 Operand *Arg = Instr->getArg(i); | 139 Operand *Arg = Instr->getArg(i); |
| 140 Type Ty = Arg->getType(); | 140 Type Ty = Arg->getType(); |
| 141 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 141 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 142 assert(typeWidthInBytes(Ty) >= 4); | 142 assert(typeWidthInBytes(Ty) >= 4); |
| 143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | 143 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 144 XmmArgs.push_back(Arg); | 144 XmmArgs.push_back(Arg); |
| 145 } else { | 145 } else { |
| 146 StackArgs.push_back(Arg); | 146 StackArgs.push_back(Arg); |
| 147 if (isVectorType(Arg->getType())) { | 147 if (isVectorType(Arg->getType())) { |
| 148 ParameterAreaSizeBytes = | 148 ParameterAreaSizeBytes = |
| 149 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 149 Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 150 } | 150 } |
| 151 Variable *esp = | 151 Variable *esp = |
| 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); |
| 155 // Stack stores for arguments are fixed to esp. | 155 // Stack stores for arguments are fixed to esp. |
| 156 Mem->setIgnoreStackAdjust(true); | 156 Mem->setIgnoreStackAdjust(true); |
| 157 StackArgLocations.push_back(Mem); | 157 StackArgLocations.push_back(Mem); |
| 158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 159 } | 159 } |
| 160 } | 160 } |
| 161 // Ensure there is enough space for the fstp/movs for floating returns. |
| 162 Variable *Dest = Instr->getDest(); |
| 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 164 ParameterAreaSizeBytes = |
| 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
| 166 typeWidthInBytesOnStack(Dest->getType())); |
| 167 } |
| 161 | 168 |
| 162 // Adjust the parameter area so that the stack is aligned. It is assumed that | 169 // Adjust the parameter area so that the stack is aligned. It is assumed that |
| 163 // the stack is already aligned at the start of the calling sequence. | 170 // the stack is already aligned at the start of the calling sequence. |
| 164 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 165 | 172 |
| 166 // Subtract the appropriate amount for the argument area. This also takes | 173 // Subtract the appropriate amount for the argument area. This also takes |
| 167 // care of setting the stack adjustment during emission. | 174 // care of setting the stack adjustment during emission. |
| 168 // | 175 // |
| 169 // TODO: If for some reason the call instruction gets dead-code eliminated | 176 // TODO: If for some reason the call instruction gets dead-code eliminated |
| 170 // after lowering, we would need to ensure that the pre-call and the | 177 // after lowering, we would need to ensure that the pre-call and the |
| (...skipping 19 matching lines...) Expand all Loading... |
| 190 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 197 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| 191 Variable *Reg = | 198 Variable *Reg = |
| 192 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); | 199 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); |
| 193 // Generate a FakeUse of register arguments so that they do not get dead | 200 // Generate a FakeUse of register arguments so that they do not get dead |
| 194 // code eliminated as a result of the FakeKill of scratch registers after | 201 // code eliminated as a result of the FakeKill of scratch registers after |
| 195 // the call. | 202 // the call. |
| 196 Context.insert(InstFakeUse::create(Func, Reg)); | 203 Context.insert(InstFakeUse::create(Func, Reg)); |
| 197 } | 204 } |
| 198 // Generate the call instruction. Assign its result to a temporary with high | 205 // Generate the call instruction. Assign its result to a temporary with high |
| 199 // register allocation weight. | 206 // register allocation weight. |
| 200 Variable *Dest = Instr->getDest(); | |
| 201 // ReturnReg doubles as ReturnRegLo as necessary. | 207 // ReturnReg doubles as ReturnRegLo as necessary. |
| 202 Variable *ReturnReg = nullptr; | 208 Variable *ReturnReg = nullptr; |
| 203 Variable *ReturnRegHi = nullptr; | 209 Variable *ReturnRegHi = nullptr; |
| 204 if (Dest) { | 210 if (Dest) { |
| 205 switch (Dest->getType()) { | 211 switch (Dest->getType()) { |
| 206 case IceType_NUM: | 212 case IceType_NUM: |
| 207 case IceType_void: | 213 case IceType_void: |
| 208 llvm::report_fatal_error("Invalid Call dest type"); | 214 llvm::report_fatal_error("Invalid Call dest type"); |
| 209 break; | 215 break; |
| 210 case IceType_i1: | 216 case IceType_i1: |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 248 CallTarget = CallTargetVar; | 254 CallTarget = CallTargetVar; |
| 249 } | 255 } |
| 250 } | 256 } |
| 251 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); | 257 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); |
| 252 Context.insert(NewCall); | 258 Context.insert(NewCall); |
| 253 if (NeedSandboxing) | 259 if (NeedSandboxing) |
| 254 _bundle_unlock(); | 260 _bundle_unlock(); |
| 255 if (ReturnRegHi) | 261 if (ReturnRegHi) |
| 256 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 262 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 257 | 263 |
| 258 // Add the appropriate offset to esp. The call instruction takes care of | |
| 259 // resetting the stack offset during emission. | |
| 260 if (ParameterAreaSizeBytes) { | |
| 261 Variable *esp = | |
| 262 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 263 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); | |
| 264 } | |
| 265 | |
| 266 // Insert a register-kill pseudo instruction. | 264 // Insert a register-kill pseudo instruction. |
| 267 Context.insert(InstFakeKill::create(Func, NewCall)); | 265 Context.insert(InstFakeKill::create(Func, NewCall)); |
| 268 | 266 |
| 267 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 268 // Special treatment for an FP function which returns its result in st(0). |
| 269 // If Dest ends up being a physical xmm register, the fstp emit code will |
| 270 // route st(0) through the space reserved in the function argument area |
| 271 // we allocated. |
| 272 _fstp(Dest); |
| 273 // Create a fake use of Dest in case it actually isn't used, because st(0) |
| 274 // still needs to be popped. |
| 275 Context.insert(InstFakeUse::create(Func, Dest)); |
| 276 } |
| 277 |
| 278 // Add the appropriate offset to esp. |
| 279 if (ParameterAreaSizeBytes) |
| 280 _adjust_stack(-ParameterAreaSizeBytes); |
| 281 |
| 269 // Generate a FakeUse to keep the call live if necessary. | 282 // Generate a FakeUse to keep the call live if necessary. |
| 270 if (Instr->hasSideEffects() && ReturnReg) { | 283 if (Instr->hasSideEffects() && ReturnReg) { |
| 271 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 272 Context.insert(FakeUse); | 285 Context.insert(FakeUse); |
| 273 } | 286 } |
| 274 | 287 |
| 275 if (!Dest) | 288 if (!Dest) |
| 276 return; | 289 return; |
| 277 | 290 |
| 278 // Assign the result of the call to Dest. | 291 // Assign the result of the call to Dest. |
| 279 if (ReturnReg) { | 292 if (ReturnReg) { |
| 280 if (ReturnRegHi) { | 293 if (ReturnRegHi) { |
| 281 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); | 294 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
| 282 Variable *DestLo = Dest64On32->getLo(); | 295 Variable *DestLo = Dest64On32->getLo(); |
| 283 Variable *DestHi = Dest64On32->getHi(); | 296 Variable *DestHi = Dest64On32->getHi(); |
| 284 _mov(DestLo, ReturnReg); | 297 _mov(DestLo, ReturnReg); |
| 285 _mov(DestHi, ReturnRegHi); | 298 _mov(DestHi, ReturnRegHi); |
| 286 } else { | 299 } else { |
| 287 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || | 300 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| 288 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || | 301 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| 289 isVectorType(Dest->getType())); | 302 isVectorType(Dest->getType())); |
| 290 if (isVectorType(Dest->getType())) { | 303 if (isVectorType(Dest->getType())) { |
| 291 _movp(Dest, ReturnReg); | 304 _movp(Dest, ReturnReg); |
| 292 } else { | 305 } else { |
| 293 _mov(Dest, ReturnReg); | 306 _mov(Dest, ReturnReg); |
| 294 } | 307 } |
| 295 } | 308 } |
| 296 } else if (isScalarFloatingType(Dest->getType())) { | |
| 297 // Special treatment for an FP function which returns its result in st(0). | |
| 298 // If Dest ends up being a physical xmm register, the fstp emit code will | |
| 299 // route st(0) through a temporary stack slot. | |
| 300 _fstp(Dest); | |
| 301 // Create a fake use of Dest in case it actually isn't used, because st(0) | |
| 302 // still needs to be popped. | |
| 303 Context.insert(InstFakeUse::create(Func, Dest)); | |
| 304 } | 309 } |
| 305 } | 310 } |
| 306 | 311 |
| 307 void TargetX8632::lowerArguments() { | 312 void TargetX8632::lowerArguments() { |
| 308 VarList &Args = Func->getArgs(); | 313 VarList &Args = Func->getArgs(); |
| 309 // The first four arguments of vector type, regardless of their position | 314 // The first four arguments of vector type, regardless of their position |
| 310 // relative to the other arguments in the argument list, are passed in | 315 // relative to the other arguments in the argument list, are passed in |
| 311 // registers xmm0 - xmm3. | 316 // registers xmm0 - xmm3. |
| 312 unsigned NumXmmArgs = 0; | 317 unsigned NumXmmArgs = 0; |
| 313 | 318 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 356 } else { | 361 } else { |
| 357 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); | 362 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax); |
| 358 } | 363 } |
| 359 } | 364 } |
| 360 // Add a ret instruction even if sandboxing is enabled, because addEpilog | 365 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
| 361 // explicitly looks for a ret instruction as a marker for where to insert the | 366 // explicitly looks for a ret instruction as a marker for where to insert the |
| 362 // frame removal instructions. | 367 // frame removal instructions. |
| 363 _ret(Reg); | 368 _ret(Reg); |
| 364 // Add a fake use of esp to make sure esp stays alive for the entire | 369 // Add a fake use of esp to make sure esp stays alive for the entire |
| 365 // function. Otherwise post-call esp adjustments get dead-code eliminated. | 370 // function. Otherwise post-call esp adjustments get dead-code eliminated. |
| 366 // TODO: Are there more places where the fake use should be inserted? E.g. | 371 keepEspLiveAtExit(); |
| 367 // "void f(int n){while(1) g(n);}" may not have a ret instruction. | |
| 368 Variable *esp = | |
| 369 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | |
| 370 Context.insert(InstFakeUse::create(Func, esp)); | |
| 371 } | 372 } |
| 372 | 373 |
| 373 void TargetX8632::addProlog(CfgNode *Node) { | 374 void TargetX8632::addProlog(CfgNode *Node) { |
| 374 // Stack frame layout: | 375 // Stack frame layout: |
| 375 // | 376 // |
| 376 // +------------------------+ | 377 // +------------------------+ |
| 377 // | 1. return address | | 378 // | 1. return address | |
| 378 // +------------------------+ | 379 // +------------------------+ |
| 379 // | 2. preserved registers | | 380 // | 2. preserved registers | |
| 380 // +------------------------+ | 381 // +------------------------+ |
| (...skipping 593 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 974 // case the high-level table has extra entries. | 975 // case the high-level table has extra entries. |
| 975 #define X(tag, sizeLog2, align, elts, elty, str) \ | 976 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 976 static_assert(_table1_##tag == _table2_##tag, \ | 977 static_assert(_table1_##tag == _table2_##tag, \ |
| 977 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 978 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 978 ICETYPE_TABLE | 979 ICETYPE_TABLE |
| 979 #undef X | 980 #undef X |
| 980 } // end of namespace dummy3 | 981 } // end of namespace dummy3 |
| 981 } // end of anonymous namespace | 982 } // end of anonymous namespace |
| 982 | 983 |
| 983 } // end of namespace Ice | 984 } // end of namespace Ice |
| OLD | NEW |