| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// | 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 1488 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); | 1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); |
| 1500 _lea(T, CalculateOperand); | 1500 _lea(T, CalculateOperand); |
| 1501 _mov(Dest, T); | 1501 _mov(Dest, T); |
| 1502 } else { | 1502 } else { |
| 1503 _mov(Dest, esp); | 1503 _mov(Dest, esp); |
| 1504 } | 1504 } |
| 1505 } | 1505 } |
| 1506 | 1506 |
| 1507 template <typename TraitsType> | 1507 template <typename TraitsType> |
| 1508 void TargetX86Base<TraitsType>::lowerArguments() { | 1508 void TargetX86Base<TraitsType>::lowerArguments() { |
| 1509 const bool OptM1 = Func->getOptLevel() == Opt_m1; |
| 1509 VarList &Args = Func->getArgs(); | 1510 VarList &Args = Func->getArgs(); |
| 1510 unsigned NumXmmArgs = 0; | 1511 unsigned NumXmmArgs = 0; |
| 1511 bool XmmSlotsRemain = true; | 1512 bool XmmSlotsRemain = true; |
| 1512 unsigned NumGprArgs = 0; | 1513 unsigned NumGprArgs = 0; |
| 1513 bool GprSlotsRemain = true; | 1514 bool GprSlotsRemain = true; |
| 1514 | 1515 |
| 1515 Context.init(Func->getEntryNode()); | 1516 Context.init(Func->getEntryNode()); |
| 1516 Context.setInsertPoint(Context.getCur()); | 1517 Context.setInsertPoint(Context.getCur()); |
| 1517 | 1518 |
| 1518 for (SizeT i = 0, End = Args.size(); | 1519 for (SizeT i = 0, End = Args.size(); |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1554 // Replace Arg in the argument list with the home register. Then generate | 1555 // Replace Arg in the argument list with the home register. Then generate |
| 1555 // an instruction in the prolog to copy the home register to the assigned | 1556 // an instruction in the prolog to copy the home register to the assigned |
| 1556 // location of Arg. | 1557 // location of Arg. |
| 1557 if (BuildDefs::dump()) | 1558 if (BuildDefs::dump()) |
| 1558 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); | 1559 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); |
| 1559 RegisterArg->setRegNum(RegNum); | 1560 RegisterArg->setRegNum(RegNum); |
| 1560 RegisterArg->setIsArg(); | 1561 RegisterArg->setIsArg(); |
| 1561 Arg->setIsArg(false); | 1562 Arg->setIsArg(false); |
| 1562 | 1563 |
| 1563 Args[i] = RegisterArg; | 1564 Args[i] = RegisterArg; |
| 1564 Context.insert<InstAssign>(Arg, RegisterArg); | 1565 // When not Om1, do the assignment through a temporary, instead of directly |
| 1566 // from the pre-colored variable, so that a subsequent availabilityGet() |
| 1567 // call has a chance to work. (In Om1, don't bother creating extra |
| 1568 // instructions with extra variables to register-allocate.) |
| 1569 if (OptM1) { |
| 1570 Context.insert<InstAssign>(Arg, RegisterArg); |
| 1571 } else { |
| 1572 Variable *Tmp = makeReg(RegisterArg->getType()); |
| 1573 Context.insert<InstAssign>(Tmp, RegisterArg); |
| 1574 Context.insert<InstAssign>(Arg, Tmp); |
| 1575 } |
| 1565 } | 1576 } |
| 1577 if (!OptM1) |
| 1578 Context.availabilityUpdate(); |
| 1566 } | 1579 } |
| 1567 | 1580 |
| 1568 /// Strength-reduce scalar integer multiplication by a constant (for i32 or | 1581 /// Strength-reduce scalar integer multiplication by a constant (for i32 or |
| 1569 /// narrower) for certain constants. The lea instruction can be used to multiply | 1582 /// narrower) for certain constants. The lea instruction can be used to multiply |
| 1570 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of | 1583 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
| 1571 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 | 1584 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 |
| 1572 /// lea-based multiplies by 5, combined with left-shifting by 2. | 1585 /// lea-based multiplies by 5, combined with left-shifting by 2. |
| 1573 template <typename TraitsType> | 1586 template <typename TraitsType> |
| 1574 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, | 1587 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| 1575 int32_t Src1) { | 1588 int32_t Src1) { |
| (...skipping 1005 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2581 ParameterAreaSizeBytes = | 2594 ParameterAreaSizeBytes = |
| 2582 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | 2595 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
| 2583 typeWidthInBytesOnStack(DestTy)); | 2596 typeWidthInBytesOnStack(DestTy)); |
| 2584 } | 2597 } |
| 2585 } | 2598 } |
| 2586 // Adjust the parameter area so that the stack is aligned. It is assumed that | 2599 // Adjust the parameter area so that the stack is aligned. It is assumed that |
| 2587 // the stack is already aligned at the start of the calling sequence. | 2600 // the stack is already aligned at the start of the calling sequence. |
| 2588 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 2601 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 2589 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); | 2602 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); |
| 2590 // Copy arguments that are passed on the stack to the appropriate stack | 2603 // Copy arguments that are passed on the stack to the appropriate stack |
| 2591 // locations. | 2604 // locations. We make sure legalize() is called on each argument at this |
| 2605 // point, to allow availabilityGet() to work. |
| 2592 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { | 2606 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { |
| 2593 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 2607 lowerStore( |
| 2608 InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i])); |
| 2594 } | 2609 } |
| 2595 // Copy arguments to be passed in registers to the appropriate registers. | 2610 // Copy arguments to be passed in registers to the appropriate registers. |
| 2596 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 2611 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| 2597 Variable *Reg = | 2612 XmmArgs[i] = |
| 2598 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); | 2613 legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i)); |
| 2599 // Generate a FakeUse of register arguments so that they do not get dead | |
| 2600 // code eliminated as a result of the FakeKill of scratch registers after | |
| 2601 // the call. | |
| 2602 Context.insert<InstFakeUse>(Reg); | |
| 2603 } | 2614 } |
| 2604 // Materialize moves for arguments passed in GPRs. | 2615 // Materialize moves for arguments passed in GPRs. |
| 2605 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { | 2616 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { |
| 2606 const Type SignatureTy = GprArgs[i].first; | 2617 const Type SignatureTy = GprArgs[i].first; |
| 2607 Operand *Arg = GprArgs[i].second; | 2618 Operand *Arg = legalize(GprArgs[i].second); |
| 2608 Variable *Reg = | 2619 GprArgs[i].second = |
| 2609 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); | 2620 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); |
| 2610 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); | 2621 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); |
| 2611 assert(SignatureTy == Arg->getType()); | 2622 assert(SignatureTy == Arg->getType()); |
| 2612 (void)SignatureTy; | 2623 (void)SignatureTy; |
| 2613 Context.insert<InstFakeUse>(Reg); | 2624 } |
| 2625 // Generate a FakeUse of register arguments so that they do not get dead code |
| 2626 // eliminated as a result of the FakeKill of scratch registers after the call. |
| 2627 // These need to be right before the call instruction. |
| 2628 for (auto *Arg : XmmArgs) { |
| 2629 Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg)); |
| 2630 } |
| 2631 for (auto &ArgPair : GprArgs) { |
| 2632 Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second)); |
| 2614 } | 2633 } |
| 2615 // Generate the call instruction. Assign its result to a temporary with high | 2634 // Generate the call instruction. Assign its result to a temporary with high |
| 2616 // register allocation weight. | 2635 // register allocation weight. |
| 2617 // ReturnReg doubles as ReturnRegLo as necessary. | 2636 // ReturnReg doubles as ReturnRegLo as necessary. |
| 2618 Variable *ReturnReg = nullptr; | 2637 Variable *ReturnReg = nullptr; |
| 2619 Variable *ReturnRegHi = nullptr; | 2638 Variable *ReturnRegHi = nullptr; |
| 2620 if (Dest) { | 2639 if (Dest) { |
| 2621 switch (DestTy) { | 2640 switch (DestTy) { |
| 2622 case IceType_NUM: | 2641 case IceType_NUM: |
| 2623 case IceType_void: | 2642 case IceType_void: |
| (...skipping 5323 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7947 emitGlobal(*Var, SectionSuffix); | 7966 emitGlobal(*Var, SectionSuffix); |
| 7948 } | 7967 } |
| 7949 } | 7968 } |
| 7950 } break; | 7969 } break; |
| 7951 } | 7970 } |
| 7952 } | 7971 } |
| 7953 } // end of namespace X86NAMESPACE | 7972 } // end of namespace X86NAMESPACE |
| 7954 } // end of namespace Ice | 7973 } // end of namespace Ice |
| 7955 | 7974 |
| 7956 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H | 7975 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H |
| OLD | NEW |