Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(691)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 2052683003: Subzero: Improve effectiveness of local register availability peephole. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix comments. Implement for x86-32. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8664.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 1488 matching lines...) Expand 10 before | Expand all | Expand 10 after
1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize)); 1499 Func, IceType_void, esp, Ctx->getConstantInt(IceType_i32, OutArgsSize));
1500 _lea(T, CalculateOperand); 1500 _lea(T, CalculateOperand);
1501 _mov(Dest, T); 1501 _mov(Dest, T);
1502 } else { 1502 } else {
1503 _mov(Dest, esp); 1503 _mov(Dest, esp);
1504 } 1504 }
1505 } 1505 }
1506 1506
1507 template <typename TraitsType> 1507 template <typename TraitsType>
1508 void TargetX86Base<TraitsType>::lowerArguments() { 1508 void TargetX86Base<TraitsType>::lowerArguments() {
1509 const bool OptM1 = Func->getOptLevel() == Opt_m1;
1509 VarList &Args = Func->getArgs(); 1510 VarList &Args = Func->getArgs();
1510 unsigned NumXmmArgs = 0; 1511 unsigned NumXmmArgs = 0;
1511 bool XmmSlotsRemain = true; 1512 bool XmmSlotsRemain = true;
1512 unsigned NumGprArgs = 0; 1513 unsigned NumGprArgs = 0;
1513 bool GprSlotsRemain = true; 1514 bool GprSlotsRemain = true;
1514 1515
1515 Context.init(Func->getEntryNode()); 1516 Context.init(Func->getEntryNode());
1516 Context.setInsertPoint(Context.getCur()); 1517 Context.setInsertPoint(Context.getCur());
1517 1518
1518 for (SizeT i = 0, End = Args.size(); 1519 for (SizeT i = 0, End = Args.size();
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
1554 // Replace Arg in the argument list with the home register. Then generate 1555 // Replace Arg in the argument list with the home register. Then generate
1555 // an instruction in the prolog to copy the home register to the assigned 1556 // an instruction in the prolog to copy the home register to the assigned
1556 // location of Arg. 1557 // location of Arg.
1557 if (BuildDefs::dump()) 1558 if (BuildDefs::dump())
1558 RegisterArg->setName(Func, "home_reg:" + Arg->getName()); 1559 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1559 RegisterArg->setRegNum(RegNum); 1560 RegisterArg->setRegNum(RegNum);
1560 RegisterArg->setIsArg(); 1561 RegisterArg->setIsArg();
1561 Arg->setIsArg(false); 1562 Arg->setIsArg(false);
1562 1563
1563 Args[i] = RegisterArg; 1564 Args[i] = RegisterArg;
1564 Context.insert<InstAssign>(Arg, RegisterArg); 1565 // When not Om1, do the assignment through a temporary, instead of directly
1566 // from the pre-colored variable, so that a subsequent availabilityGet()
1567 // call has a chance to work. (In Om1, don't bother creating extra
1568 // instructions with extra variables to register-allocate.)
1569 if (OptM1) {
1570 Context.insert<InstAssign>(Arg, RegisterArg);
1571 } else {
1572 Variable *Tmp = makeReg(RegisterArg->getType());
1573 Context.insert<InstAssign>(Tmp, RegisterArg);
1574 Context.insert<InstAssign>(Arg, Tmp);
1575 }
1565 } 1576 }
1577 if (!OptM1)
1578 Context.availabilityUpdate();
1566 } 1579 }
1567 1580
1568 /// Strength-reduce scalar integer multiplication by a constant (for i32 or 1581 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
1569 /// narrower) for certain constants. The lea instruction can be used to multiply 1582 /// narrower) for certain constants. The lea instruction can be used to multiply
1570 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of 1583 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
1571 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2 1584 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
1572 /// lea-based multiplies by 5, combined with left-shifting by 2. 1585 /// lea-based multiplies by 5, combined with left-shifting by 2.
1573 template <typename TraitsType> 1586 template <typename TraitsType>
1574 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1587 bool TargetX86Base<TraitsType>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1575 int32_t Src1) { 1588 int32_t Src1) {
(...skipping 1005 matching lines...) Expand 10 before | Expand all | Expand 10 after
2581 ParameterAreaSizeBytes = 2594 ParameterAreaSizeBytes =
2582 std::max(static_cast<size_t>(ParameterAreaSizeBytes), 2595 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
2583 typeWidthInBytesOnStack(DestTy)); 2596 typeWidthInBytesOnStack(DestTy));
2584 } 2597 }
2585 } 2598 }
2586 // Adjust the parameter area so that the stack is aligned. It is assumed that 2599 // Adjust the parameter area so that the stack is aligned. It is assumed that
2587 // the stack is already aligned at the start of the calling sequence. 2600 // the stack is already aligned at the start of the calling sequence.
2588 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 2601 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2589 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); 2602 assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
2590 // Copy arguments that are passed on the stack to the appropriate stack 2603 // Copy arguments that are passed on the stack to the appropriate stack
2591 // locations. 2604 // locations. We make sure legalize() is called on each argument at this
2605 // point, to allow availabilityGet() to work.
2592 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { 2606 for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
2593 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); 2607 lowerStore(
2608 InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
2594 } 2609 }
2595 // Copy arguments to be passed in registers to the appropriate registers. 2610 // Copy arguments to be passed in registers to the appropriate registers.
2596 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { 2611 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
2597 Variable *Reg = 2612 XmmArgs[i] =
2598 legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); 2613 legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
2599 // Generate a FakeUse of register arguments so that they do not get dead
2600 // code eliminated as a result of the FakeKill of scratch registers after
2601 // the call.
2602 Context.insert<InstFakeUse>(Reg);
2603 } 2614 }
2604 // Materialize moves for arguments passed in GPRs. 2615 // Materialize moves for arguments passed in GPRs.
2605 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { 2616 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
2606 const Type SignatureTy = GprArgs[i].first; 2617 const Type SignatureTy = GprArgs[i].first;
2607 Operand *Arg = GprArgs[i].second; 2618 Operand *Arg = legalize(GprArgs[i].second);
2608 Variable *Reg = 2619 GprArgs[i].second =
2609 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); 2620 legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
2610 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); 2621 assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
2611 assert(SignatureTy == Arg->getType()); 2622 assert(SignatureTy == Arg->getType());
2612 (void)SignatureTy; 2623 (void)SignatureTy;
2613 Context.insert<InstFakeUse>(Reg); 2624 }
2625 // Generate a FakeUse of register arguments so that they do not get dead code
2626 // eliminated as a result of the FakeKill of scratch registers after the call.
2627 // These need to be right before the call instruction.
2628 for (auto *Arg : XmmArgs) {
2629 Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
2630 }
2631 for (auto &ArgPair : GprArgs) {
2632 Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
2614 } 2633 }
2615 // Generate the call instruction. Assign its result to a temporary with high 2634 // Generate the call instruction. Assign its result to a temporary with high
2616 // register allocation weight. 2635 // register allocation weight.
2617 // ReturnReg doubles as ReturnRegLo as necessary. 2636 // ReturnReg doubles as ReturnRegLo as necessary.
2618 Variable *ReturnReg = nullptr; 2637 Variable *ReturnReg = nullptr;
2619 Variable *ReturnRegHi = nullptr; 2638 Variable *ReturnRegHi = nullptr;
2620 if (Dest) { 2639 if (Dest) {
2621 switch (DestTy) { 2640 switch (DestTy) {
2622 case IceType_NUM: 2641 case IceType_NUM:
2623 case IceType_void: 2642 case IceType_void:
(...skipping 5323 matching lines...) Expand 10 before | Expand all | Expand 10 after
7947 emitGlobal(*Var, SectionSuffix); 7966 emitGlobal(*Var, SectionSuffix);
7948 } 7967 }
7949 } 7968 }
7950 } break; 7969 } break;
7951 } 7970 }
7952 } 7971 }
7953 } // end of namespace X86NAMESPACE 7972 } // end of namespace X86NAMESPACE
7954 } // end of namespace Ice 7973 } // end of namespace Ice
7955 7974
7956 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 7975 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8664.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698