OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 1488 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); | 1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
1500 _lea(T, CalculateOperand); | 1500 _lea(T, CalculateOperand); |
1501 _mov(Dest, T); | 1501 _mov(Dest, T); |
1502 } else { | 1502 } else { |
1503 _mov(Dest, esp); | 1503 _mov(Dest, esp); |
1504 } | 1504 } |
1505 } | 1505 } |
1506 | 1506 |
1507 template <typename TraitsType> | 1507 template <typename TraitsType> |
1508 void TargetX86Base<TraitsType>::lowerArguments() { | 1508 void TargetX86Base<TraitsType>::lowerArguments() { |
| 1509 const bool OptM1 = Func->getOptLevel() == Opt_m1; |
1509 VarList &Args = Func->getArgs(); | 1510 VarList &Args = Func->getArgs(); |
1510 unsigned NumXmmArgs = 0; | 1511 unsigned NumXmmArgs = 0; |
1511 bool XmmSlotsRemain = true; | 1512 bool XmmSlotsRemain = true; |
1512 unsigned NumGprArgs = 0; | 1513 unsigned NumGprArgs = 0; |
1513 bool GprSlotsRemain = true; | 1514 bool GprSlotsRemain = true; |
1514 | 1515 |
1515 Context.init(Func->getEntryNode()); | 1516 Context.init(Func->getEntryNode()); |
1516 Context.setInsertPoint(Context.getCur()); | 1517 Context.setInsertPoint(Context.getCur()); |
1517 | 1518 |
1518 for (SizeT i = 0, End = Args.size(); | 1519 for (SizeT i = 0, End = Args.size(); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1554 // Replace Arg in the argument list with the home register. Then generate | 1555 // Replace Arg in the argument list with the home register. Then generate |
1555 // an instruction in the prolog to copy the home register to the assigned | 1556 // an instruction in the prolog to copy the home register to the assigned |
1556 // location of Arg. | 1557 // location of Arg. |
1557 if (BuildDefs::dump()) | 1558 if (BuildDefs::dump()) |
1558 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); | 1559 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
1559 RegisterArg->setRegNum(RegNum); | 1560 RegisterArg->setRegNum(RegNum); |
1560 RegisterArg->setIsArg(); | 1561 RegisterArg->setIsArg(); |
1561 Arg->setIsArg(false); | 1562 Arg->setIsArg(false); |
1562 | 1563 |
1563 Args[i] = RegisterArg; | 1564 Args[i] = RegisterArg; |
1564 Context.insert<InstAssign>(Arg, RegisterArg); | 1565 // When not Om1, do the assignment through a temporary, instead of directly |
| 1566 // from the pre-colored variable, so that a subsequent availabilityGet() |
| 1567 // call has a chance to work. (In Om1, don't bother creating extra |
| 1568 // instructions with extra variables to register-allocate.) |
| 1569 if (OptM1) { |
| 1570 Context.insert<InstAssign>(Arg, RegisterArg); |
| 1571 } else { |
| 1572 Variable *Tmp = makeReg(RegisterArg->getType()); |
| 1573 Context.insert<InstAssign>(Tmp, RegisterArg); |
| 1574 Context.insert<InstAssign>(Arg, Tmp); |
| 1575 } |
1565 } | 1576 } |
| 1577 if (!OptM1) |
| 1578 Context.availabilityUpdate(); |
1566 } | 1579 } |
1567 | 1580 |
1568 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1581 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
1569 /// narrower) for certain constants. The lea instruction can be used to multiply | 1582 /// narrower) for certain constants. The lea instruction can be used to multiply |
1570 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1583 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
1571 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1584 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
1572 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1585 /// lea-based multiplies by 5, combined with left-shifting by 2. |
1573 template <typename TraitsType> | 1586 template <typename TraitsType> |
1574 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1587 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
1575 int32_t Src1) { | 1588 int32_t Src1) { |
(...skipping 1005 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2581 ParameterAreaSizeBytes = | 2594 ParameterAreaSizeBytes = |
2582 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | 2595 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
2583 typeWidthInBytesOnStack(DestTy)); | 2596 typeWidthInBytesOnStack(DestTy)); |
2584 } | 2597 } |
2585 } | 2598 } |
2586 // Adjust the parameter area so that the stack is aligned. It is assumed that | 2599 // Adjust the parameter area so that the stack is aligned. It is assumed that |
2587 // the stack is already aligned at the start of the calling sequence. | 2600 // the stack is already aligned at the start of the calling sequence. |
2588 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 2601 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
2589 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); | 2602 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); |
2590 // Copy arguments that are passed on the stack to the appropriate stack | 2603 // Copy arguments that are passed on the stack to the appropriate stack |
2591 // locations. | 2604 // locations. We make sure legalize() is called on each argument at this |
| 2605 // point, to allow availabilityGet() to work. |
2592 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { | 2606 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { |
2593 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 2607 lowerStore( |
| 2608 InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i])); |
2594 } | 2609 } |
2595 // Copy arguments to be passed in registers to the appropriate registers. | 2610 // Copy arguments to be passed in registers to the appropriate registers. |
2596 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 2611 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
2597 Variable *Reg = | 2612 XmmArgs[i] = |
2598 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); | 2613 legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i)); |
2599 // Generate a FakeUse of register arguments so that they do not get dead | |
2600 // code eliminated as a result of the FakeKill of scratch registers after | |
2601 // the call. | |
2602 Context.insert<InstFakeUse>(Reg); | |
2603 } | 2614 } |
2604 // Materialize moves for arguments passed in GPRs. | 2615 // Materialize moves for arguments passed in GPRs. |
2605 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | 2616 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { |
2606 const Type SignatureTy = GprArgs[i].first; | 2617 const Type SignatureTy = GprArgs[i].first; |
2607 Operand *Arg = GprArgs[i].second; | 2618 Operand *Arg = legalize(GprArgs[i].second); |
2608 Variable *Reg = | 2619 GprArgs[i].second = |
2609 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); | 2620 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); |
2610 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | 2621 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); |
2611 assert(SignatureTy == Arg->getType()); | 2622 assert(SignatureTy == Arg->getType()); |
2612 (void)SignatureTy; | 2623 (void)SignatureTy; |
2613 Context.insert<InstFakeUse>(Reg); | 2624 } |
| 2625 // Generate a FakeUse of register arguments so that they do not get dead code |
| 2626 // eliminated as a result of the FakeKill of scratch registers after the call. |
| 2627 // These need to be right before the call instruction. |
| 2628 for (auto *Arg : XmmArgs) { |
| 2629 Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg)); |
| 2630 } |
| 2631 for (auto &ArgPair : GprArgs) { |
| 2632 Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second)); |
2614 } | 2633 } |
2615 // Generate the call instruction. Assign its result to a temporary with high | 2634 // Generate the call instruction. Assign its result to a temporary with high |
2616 // register allocation weight. | 2635 // register allocation weight. |
2617 // ReturnReg doubles as ReturnRegLo as necessary. | 2636 // ReturnReg doubles as ReturnRegLo as necessary. |
2618 Variable *ReturnReg = nullptr; | 2637 Variable *ReturnReg = nullptr; |
2619 Variable *ReturnRegHi = nullptr; | 2638 Variable *ReturnRegHi = nullptr; |
2620 if (Dest) { | 2639 if (Dest) { |
2621 switch (DestTy) { | 2640 switch (DestTy) { |
2622 case IceType_NUM: | 2641 case IceType_NUM: |
2623 case IceType_void: | 2642 case IceType_void: |
(...skipping 5323 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
7947 emitGlobal(*Var, SectionSuffix); | 7966 emitGlobal(*Var, SectionSuffix); |
7948 } | 7967 } |
7949 } | 7968 } |
7950 } break; | 7969 } break; |
7951 } | 7970 } |
7952 } | 7971 } |
7953 } // end of namespace X86NAMESPACE | 7972 } // end of namespace X86NAMESPACE |
7954 } // end of namespace Ice | 7973 } // end of namespace Ice |
7955 | 7974 |
7956 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7975 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
OLD | NEW |